Handle hugetlbfs file mapping
Hugetlbfs file mappings are handled differently than regular files: - pager_req_create will tell us the file is in a hugetlbfs - allocate memory upfront, we need to fail if not enough memory - the memory needs to be given again if another process maps the same file This implementation still has some hacks, in particular, the memory needs to be freed when all mappings are done and the file has been deleted/closed by all processes. We cannot know when the file is closed/unlinked easily, so clean up memory when all processes have exited. To test, install libhugetlbfs and link a program with the additional LDFLAGS += -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align Then run with HUGETLB_ELFMAP=RW set, you can check this works with HUGETLB_DEBUG=1 HUGETLB_VERBOSE=2 Change-Id: I327920ff06efd82e91b319b27319f41912169af1
This commit is contained in:
committed by
Masamichi Takagi
parent
3e3ccf377c
commit
39f9d7fdff
@ -229,6 +229,9 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
|
||||
unsigned long size,
|
||||
struct zap_details *details);
|
||||
|
||||
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
|
||||
|
||||
|
||||
static int symbols_init(void)
|
||||
{
|
||||
mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount");
|
||||
@ -263,6 +266,11 @@ static int symbols_init(void)
|
||||
if (WARN_ON(!mcctrl_zap_page_range))
|
||||
return -EFAULT;
|
||||
|
||||
mcctrl_hugetlbfs_inode_operations =
|
||||
(void *) kallsyms_lookup_name("hugetlbfs_inode_operations");
|
||||
if (WARN_ON(!mcctrl_hugetlbfs_inode_operations))
|
||||
return -EFAULT;
|
||||
|
||||
return arch_symbols_init();
|
||||
}
|
||||
|
||||
|
||||
@ -428,6 +428,7 @@ extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long size,
|
||||
struct zap_details *details);
|
||||
extern struct inode_operations *mcctrl_hugetlbfs_inode_operations;
|
||||
|
||||
/* syscall.c */
|
||||
void pager_add_process(void);
|
||||
|
||||
@ -1165,6 +1165,7 @@ enum {
|
||||
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
|
||||
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
|
||||
MF_SHM = 0x40000,
|
||||
MF_HUGETLBFS = 0x100000,
|
||||
};
|
||||
|
||||
static int pager_get_path(struct file *file, char *path) {
|
||||
@ -1254,6 +1255,17 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (inode->i_op == mcctrl_hugetlbfs_inode_operations) {
|
||||
mf_flags = MF_HUGETLBFS;
|
||||
/* pager is used as handle id on mckernel side, use inode */
|
||||
pager = (void *)st.ino;
|
||||
/* retrofit blksize in resp as well through st.size field;
|
||||
* the actual file size is not used
|
||||
*/
|
||||
st.size = st.blksize;
|
||||
goto out_reply;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
spin_lock_irqsave(&pager_lock, irqflags);
|
||||
|
||||
@ -1322,6 +1334,7 @@ found:
|
||||
}
|
||||
spin_unlock_irqrestore(&pager_lock, irqflags);
|
||||
|
||||
out_reply:
|
||||
phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp));
|
||||
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
|
||||
if (!resp) {
|
||||
|
||||
@ -6,7 +6,7 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR)
|
||||
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
|
||||
OBJS += rbtree.o
|
||||
OBJS += rbtree.o hugefileobj.o
|
||||
OBJS += pager.o
|
||||
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
@ -215,6 +215,10 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (result.flags & MF_HUGETLBFS) {
|
||||
return hugefileobj_pre_create(&result, objp, maxprotp);
|
||||
}
|
||||
|
||||
mcs_lock_lock(&fileobj_list_lock, &node);
|
||||
obj = obj_list_lookup(result.handle);
|
||||
if (obj)
|
||||
|
||||
303
kernel/hugefileobj.c
Normal file
303
kernel/hugefileobj.c
Normal file
@ -0,0 +1,303 @@
|
||||
#include <memobj.h>
|
||||
#include <ihk/mm.h>
|
||||
#include <kmsg.h>
|
||||
#include <kmalloc.h>
|
||||
#include <string.h>
|
||||
#include <debug.h>
|
||||
|
||||
#if DEBUG_HUGEFILEOBJ
|
||||
#undef DDEBUG_DEFAULT
|
||||
#define DDEBUG_DEFAULT DDEBUG_PRINT
|
||||
#endif
|
||||
|
||||
struct hugefilechunk {
|
||||
struct list_head list;
|
||||
off_t pgoff;
|
||||
int npages;
|
||||
void *mem;
|
||||
};
|
||||
|
||||
struct hugefileobj {
|
||||
struct memobj memobj;
|
||||
size_t pgsize;
|
||||
uintptr_t handle;
|
||||
unsigned int pgshift;
|
||||
struct list_head chunk_list;
|
||||
ihk_spinlock_t chunk_lock;
|
||||
struct list_head obj_list;
|
||||
};
|
||||
|
||||
static ihk_spinlock_t hugefileobj_list_lock;
|
||||
static LIST_HEAD(hugefileobj_list);
|
||||
|
||||
static struct hugefileobj *to_hugefileobj(struct memobj *memobj)
|
||||
{
|
||||
return (struct hugefileobj *)memobj;
|
||||
}
|
||||
|
||||
static struct memobj *to_memobj(struct hugefileobj *obj)
|
||||
{
|
||||
return &obj->memobj;
|
||||
}
|
||||
|
||||
static struct hugefileobj *hugefileobj_lookup(uintptr_t handle)
|
||||
{
|
||||
struct hugefileobj *p;
|
||||
|
||||
list_for_each_entry(p, &hugefileobj_list, obj_list) {
|
||||
if (p->handle == handle) {
|
||||
/* for the interval between last put and fileobj_free
|
||||
* taking list_lock
|
||||
*/
|
||||
if (memobj_ref(&p->memobj) <= 1) {
|
||||
ihk_atomic_dec(&p->memobj.refcnt);
|
||||
continue;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int hugefileobj_get_page(struct memobj *memobj, off_t off,
|
||||
int p2align, uintptr_t *physp,
|
||||
unsigned long *pflag, uintptr_t virt_addr)
|
||||
{
|
||||
struct hugefileobj *obj = to_hugefileobj(memobj);
|
||||
struct hugefilechunk *chunk;
|
||||
off_t pgoff;
|
||||
|
||||
if (p2align != obj->pgshift - PTL1_SHIFT) {
|
||||
kprintf("%s: p2align %d but expected %d\n",
|
||||
__func__, p2align, obj->pgshift - PTL1_SHIFT);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pgoff = off >> obj->pgshift;
|
||||
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
|
||||
list_for_each_entry(chunk, &obj->chunk_list, list) {
|
||||
if (pgoff >= chunk->pgoff + chunk->npages)
|
||||
continue;
|
||||
if (pgoff >= chunk->pgoff)
|
||||
break;
|
||||
kprintf("%s: no segment found for pgoff %lx (obj %p)\n",
|
||||
__func__, pgoff, obj);
|
||||
chunk = NULL;
|
||||
break;
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
|
||||
if (!chunk)
|
||||
return -EIO;
|
||||
|
||||
*physp = virt_to_phys(chunk->mem + (off - chunk->pgoff * PAGE_SIZE));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hugefileobj_free(struct memobj *memobj)
|
||||
{
|
||||
struct hugefileobj *obj = to_hugefileobj(memobj);
|
||||
struct hugefilechunk *chunk, *next;
|
||||
|
||||
dkprintf("Destroying hugefileobj %p\n", memobj);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
|
||||
list_del(&obj->obj_list);
|
||||
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
|
||||
|
||||
kfree(memobj->path);
|
||||
/* don't bother with chunk_lock, memobj refcounting makes this safe */
|
||||
list_for_each_entry_safe(chunk, next, &obj->chunk_list, list) {
|
||||
ihk_mc_free_pages_user(chunk->mem, chunk->npages);
|
||||
kfree(chunk);
|
||||
}
|
||||
kfree(memobj);
|
||||
}
|
||||
|
||||
struct memobj_ops hugefileobj_ops = {
|
||||
.free = hugefileobj_free,
|
||||
.get_page = hugefileobj_get_page,
|
||||
|
||||
};
|
||||
|
||||
void hugefileobj_cleanup(void)
|
||||
{
|
||||
struct hugefileobj *obj;
|
||||
int refcnt;
|
||||
|
||||
while (true) {
|
||||
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
|
||||
if (list_empty(&hugefileobj_list)) {
|
||||
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
|
||||
break;
|
||||
}
|
||||
obj = list_first_entry(&hugefileobj_list, struct hugefileobj,
|
||||
obj_list);
|
||||
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
|
||||
|
||||
if ((refcnt = memobj_unref(to_memobj(obj))) != 0) {
|
||||
kprintf("%s: obj %p had refcnt %ld > 1, destroying anyway\n",
|
||||
__func__, obj, refcnt + 1);
|
||||
hugefileobj_free(to_memobj(obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int hugefileobj_pre_create(struct pager_create_result *result,
|
||||
struct memobj **objp, int *maxprotp)
|
||||
{
|
||||
struct hugefileobj *obj;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
|
||||
obj = hugefileobj_lookup(result->handle);
|
||||
if (obj)
|
||||
goto out_unlock;
|
||||
|
||||
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
||||
if (!obj)
|
||||
return -ENOMEM;
|
||||
|
||||
obj->handle = result->handle;
|
||||
obj->pgsize = result->size;
|
||||
obj->pgshift = 0;
|
||||
INIT_LIST_HEAD(&obj->chunk_list);
|
||||
ihk_mc_spinlock_init(&obj->chunk_lock);
|
||||
obj->memobj.flags = result->flags;
|
||||
obj->memobj.status = MEMOBJ_TO_BE_PREFETCHED;
|
||||
obj->memobj.ops = &hugefileobj_ops;
|
||||
/* keep mapping around when process is gone */
|
||||
ihk_atomic_set(&obj->memobj.refcnt, 2);
|
||||
if (result->path[0]) {
|
||||
obj->memobj.path = kmalloc(PATH_MAX, IHK_MC_AP_NOWAIT);
|
||||
if (!obj->memobj.path) {
|
||||
kfree(obj);
|
||||
return -ENOMEM;
|
||||
}
|
||||
strncpy(obj->memobj.path, result->path, PATH_MAX);
|
||||
}
|
||||
|
||||
list_add(&obj->obj_list, &hugefileobj_list);
|
||||
out_unlock:
|
||||
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
|
||||
|
||||
*maxprotp = result->maxprot;
|
||||
*objp = to_memobj(obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
|
||||
int *pgshiftp, uintptr_t virt_addr)
|
||||
{
|
||||
struct hugefileobj *obj = to_hugefileobj(memobj);
|
||||
struct hugefilechunk *chunk = NULL, *old_chunk = NULL;
|
||||
int p2align;
|
||||
unsigned int pgshift;
|
||||
int npages, npages_left;
|
||||
void *v;
|
||||
off_t pgoff, next_pgoff;
|
||||
int error;
|
||||
|
||||
error = arch_get_smaller_page_size(NULL, obj->pgsize + 1, NULL,
|
||||
&p2align);
|
||||
if (error)
|
||||
return error;
|
||||
pgshift = p2align + PTL1_SHIFT;
|
||||
if (1 << pgshift != obj->pgsize) {
|
||||
dkprintf("invalid hugefileobj pagesize: %d\n",
|
||||
obj->pgsize);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (len & ((1 << pgshift) - 1)) {
|
||||
dkprintf("invalid hugetlbfs mmap size %d (pagesize %d)\n",
|
||||
len, 1 << pgshift);
|
||||
obj->pgshift = 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
if (off & ((1 << pgshift) - 1)) {
|
||||
dkprintf("invalid hugetlbfs mmap offset %d (pagesize %d)\n",
|
||||
off, 1 << pgshift);
|
||||
obj->pgshift = 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
|
||||
if (obj->pgshift && obj->pgshift != pgshift) {
|
||||
kprintf("pgshift changed between two calls on same inode?! had %d now %d\n",
|
||||
obj->pgshift, pgshift);
|
||||
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
obj->pgshift = pgshift;
|
||||
|
||||
/* Prealloc upfront, we need to fail here if not enough memory. */
|
||||
if (!list_empty(&obj->chunk_list))
|
||||
old_chunk = list_first_entry(&obj->chunk_list,
|
||||
struct hugefilechunk, list);
|
||||
pgoff = off >> PAGE_SHIFT;
|
||||
npages_left = len >> PAGE_SHIFT;
|
||||
npages = npages_left;
|
||||
while (npages_left) {
|
||||
while (old_chunk &&
|
||||
pgoff >= old_chunk->pgoff + old_chunk->npages) {
|
||||
if (list_is_last(&old_chunk->list, &obj->chunk_list)) {
|
||||
old_chunk = NULL;
|
||||
break;
|
||||
}
|
||||
old_chunk = list_entry(old_chunk->list.next,
|
||||
struct hugefilechunk, list);
|
||||
}
|
||||
if (old_chunk) {
|
||||
next_pgoff = old_chunk->pgoff + old_chunk->npages;
|
||||
if (pgoff >= old_chunk->pgoff && pgoff < next_pgoff) {
|
||||
npages_left -= next_pgoff - pgoff;
|
||||
pgoff = next_pgoff;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!chunk) {
|
||||
chunk = kmalloc(sizeof(*chunk), IHK_MC_AP_NOWAIT);
|
||||
}
|
||||
if (!chunk) {
|
||||
kprintf("could not allocate hugefileobj chunk\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (npages > npages_left)
|
||||
npages = npages_left;
|
||||
v = ihk_mc_alloc_aligned_pages_user(npages, p2align,
|
||||
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, virt_addr);
|
||||
if (!v) {
|
||||
if (npages == 1) {
|
||||
dkprintf("could not allocate more pages wth pgshift %d\n",
|
||||
pgshift);
|
||||
kfree(chunk);
|
||||
/* caller will cleanup the rest */
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* exponential backoff, try less aggressive? */
|
||||
npages /= 2;
|
||||
continue;
|
||||
}
|
||||
memset(v, 0, npages * PAGE_SIZE);
|
||||
chunk->npages = npages;
|
||||
chunk->mem = v;
|
||||
chunk->pgoff = pgoff;
|
||||
/* ordered list: insert before next (bigger) element */
|
||||
if (old_chunk)
|
||||
list_add(&chunk->list, old_chunk->list.prev);
|
||||
else
|
||||
list_add(&chunk->list, obj->chunk_list.prev);
|
||||
pgoff += npages;
|
||||
npages_left -= npages;
|
||||
}
|
||||
obj->memobj.size = len;
|
||||
|
||||
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
|
||||
|
||||
*pgshiftp = pgshift;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -19,6 +19,7 @@
|
||||
#include <ihk/lock.h>
|
||||
#include <errno.h>
|
||||
#include <list.h>
|
||||
#include <pager.h>
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_18 */
|
||||
@ -44,6 +45,7 @@ enum {
|
||||
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
|
||||
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
|
||||
MF_SHM = 0x40000,
|
||||
MF_HUGETLBFS = 0x100000,
|
||||
};
|
||||
|
||||
#define MEMOBJ_READY 0
|
||||
@ -83,11 +85,15 @@ static inline int memobj_ref(struct memobj *obj)
|
||||
return ihk_atomic_inc_return(&obj->refcnt);
|
||||
}
|
||||
|
||||
static inline void memobj_unref(struct memobj *obj)
|
||||
static inline int memobj_unref(struct memobj *obj)
|
||||
{
|
||||
if (ihk_atomic_dec_return(&obj->refcnt) == 0) {
|
||||
int cnt;
|
||||
|
||||
if ((cnt = ihk_atomic_dec_return(&obj->refcnt)) == 0) {
|
||||
(*obj->ops->free)(obj);
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline int memobj_get_page(struct memobj *obj, off_t off,
|
||||
@ -150,5 +156,10 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
||||
int zeroobj_create(struct memobj **objp);
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
|
||||
int prot, int populate_flags);
|
||||
int hugefileobj_pre_create(struct pager_create_result *result,
|
||||
struct memobj **objp, int *maxprotp);
|
||||
int hugefileobj_create(struct memobj *obj, size_t len, off_t off,
|
||||
int *pgshiftp, uintptr_t virt_addr);
|
||||
void hugefileobj_cleanup(void);
|
||||
|
||||
#endif /* HEADER_MEMOBJ_H */
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
#include <rusage.h>
|
||||
#include <ihk/ihk_monitor.h>
|
||||
#include <arch_rusage.h>
|
||||
#include <debug.h>
|
||||
|
||||
#ifdef ENABLE_RUSAGE
|
||||
|
||||
@ -118,7 +119,8 @@ static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys,
|
||||
struct page *page = phys_to_page(phys);
|
||||
|
||||
/* Is It file map and cow page? */
|
||||
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) &&
|
||||
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE |
|
||||
MF_HUGETLBFS)) &&
|
||||
!page) {
|
||||
//kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys);
|
||||
memory_stat_rss_add(size, pgsize);
|
||||
|
||||
@ -977,9 +977,15 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
||||
if (range->memobj) {
|
||||
memobj_ref(range->memobj);
|
||||
}
|
||||
error = ihk_mc_pt_free_range(vm->address_space->page_table, vm,
|
||||
(void *)start, (void *)end,
|
||||
(range->flag & VR_PRIVATE)? NULL: range->memobj);
|
||||
if (range->memobj && range->memobj->flags & MF_HUGETLBFS) {
|
||||
error = ihk_mc_pt_clear_range(vm->address_space->page_table,
|
||||
vm, (void *)start, (void *)end);
|
||||
} else {
|
||||
error = ihk_mc_pt_free_range(vm->address_space->page_table,
|
||||
vm, (void *)start, (void *)end,
|
||||
(range->flag & VR_PRIVATE) ? NULL :
|
||||
range->memobj);
|
||||
}
|
||||
if (range->memobj) {
|
||||
memobj_unref(range->memobj);
|
||||
}
|
||||
@ -1271,7 +1277,7 @@ int add_process_memory_range(struct process_vm *vm,
|
||||
if (phys != NOPHYS && !(flag & (VR_REMOTE | VR_DEMAND_PAGING))
|
||||
&& ((flag & VR_PROT_MASK) != VR_PROT_NONE)) {
|
||||
#if 1
|
||||
memset((void*)phys_to_virt(phys), 0, end - start);
|
||||
memset((void *)phys_to_virt(phys), 0, end - start);
|
||||
#else
|
||||
if (end - start < (1024*1024)) {
|
||||
memset((void*)phys_to_virt(phys), 0, end - start);
|
||||
@ -1451,7 +1457,8 @@ int change_prot_process_memory_range(struct process_vm *vm,
|
||||
* We need to keep the page table read-only to trigger a page
|
||||
* fault for copy-on-write later on
|
||||
*/
|
||||
if (range->memobj && (range->flag & VR_PRIVATE)) {
|
||||
if (range->memobj && (range->flag & VR_PRIVATE) &&
|
||||
!(range->memobj->flags & MF_HUGETLBFS)) {
|
||||
setattr &= ~PTATTR_WRITABLE;
|
||||
if (!clrattr && !setattr) {
|
||||
range->flag = newflag;
|
||||
@ -2502,6 +2509,13 @@ release_process(struct process *proc)
|
||||
#endif // PROFILE_ENABLE
|
||||
free_thread_pages(proc->main_thread);
|
||||
kfree(proc);
|
||||
|
||||
/* no process left */
|
||||
mcs_rwlock_reader_lock(&rset->pid1->children_lock, &lock);
|
||||
if (list_empty(&rset->pid1->children_list)) {
|
||||
hugefileobj_cleanup();
|
||||
}
|
||||
mcs_rwlock_reader_unlock(&rset->pid1->children_lock, &lock);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@ -1614,7 +1614,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
int p2align;
|
||||
void *p = NULL;
|
||||
int vrflags;
|
||||
intptr_t phys;
|
||||
uintptr_t phys;
|
||||
struct memobj *memobj = NULL;
|
||||
int maxprot;
|
||||
int denied;
|
||||
@ -1688,7 +1688,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else {
|
||||
else if (flags & MAP_ANONYMOUS) {
|
||||
/* Obtain mapping address */
|
||||
error = search_free_space(len, PAGE_SHIFT + p2align, &addr);
|
||||
if (error) {
|
||||
@ -1722,7 +1722,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
populated_mapping = 0;
|
||||
}
|
||||
|
||||
if (!(prot & PROT_WRITE)) {
|
||||
if ((flags & MAP_ANONYMOUS) && !(prot & PROT_WRITE)) {
|
||||
error = set_host_vma(addr, len, PROT_READ | PROT_EXEC, 1/* holding memory_range_lock */);
|
||||
if (error) {
|
||||
kprintf("do_mmap:set_host_vma failed. %d\n", error);
|
||||
@ -1774,15 +1774,56 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_mmap_device_file, len);
|
||||
#endif // PROFILE_ENABLE
|
||||
dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n",
|
||||
__FUNCTION__, fd, off, addr, addr + len);
|
||||
}
|
||||
}
|
||||
if (error) {
|
||||
kprintf("%s: error: file mapping failed, fd: %d, error: %d\n",
|
||||
__FUNCTION__, error);
|
||||
__func__, fd, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* hugetlbfs files are pre-created in fileobj_create, but
|
||||
* need extra processing
|
||||
*/
|
||||
if (memobj && (memobj->flags & MF_HUGETLBFS)) {
|
||||
error = hugefileobj_create(memobj, len, off, &pgshift,
|
||||
addr0);
|
||||
if (error) {
|
||||
memobj->ops->free(memobj);
|
||||
kprintf("%s: error creating hugetlbfs memobj, fd: %d, error: %d\n",
|
||||
__func__, fd, error);
|
||||
goto out;
|
||||
}
|
||||
p2align = pgshift - PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/* Obtain mapping address - delayed to use proper p2align */
|
||||
if (!(flags & MAP_FIXED))
|
||||
error = search_free_space(len, PAGE_SHIFT + p2align,
|
||||
&addr);
|
||||
if (error) {
|
||||
ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n",
|
||||
len, region->map_end, p2align, error);
|
||||
goto out;
|
||||
}
|
||||
if (!(prot & PROT_WRITE)) {
|
||||
error = set_host_vma(addr, len, PROT_READ | PROT_EXEC,
|
||||
1/* holding memory_range_lock */);
|
||||
if (error) {
|
||||
kprintf("do_mmap:set_host_vma failed. %d\n",
|
||||
error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ro_vma_mapped = 1;
|
||||
}
|
||||
if (memobj->flags & MF_HUGETLBFS) {
|
||||
dkprintf("Created hugefileobj %p (%d:%x %llx-%llx, fd %d, pgshift %d)\n",
|
||||
memobj, len, off, addr, addr+len, fd, pgshift);
|
||||
} else if (memobj->flags & MF_DEV_FILE) {
|
||||
dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n",
|
||||
__func__, fd, off, addr, addr + len);
|
||||
}
|
||||
}
|
||||
/* Prepopulated ANONYMOUS mapping */
|
||||
else if (!(vrflags & VR_DEMAND_PAGING)
|
||||
|
||||
16
test/issues/1203/C1203.sh
Normal file
16
test/issues/1203/C1203.sh
Normal file
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
. ../../common.sh
|
||||
|
||||
if "$MCEXEC" ./C1203T01; then
|
||||
echo "*** C1203T01: OK"
|
||||
else
|
||||
echo "*** C1203T01: NG"
|
||||
fi
|
||||
|
||||
# to run as user, chmod 1777 /dev/hugepages
|
||||
if sudo HUGETLB_VERBOSE=2 HUGETLB_ELFMAP=RW HUGETLB_DEBUG=1 "$MCEXEC" ./C1203T02; then
|
||||
echo "*** C1203T02: OK"
|
||||
else
|
||||
echo "*** C1203T02: NG"
|
||||
fi
|
||||
101
test/issues/1203/C1203T01.c
Normal file
101
test/issues/1203/C1203T01.c
Normal file
@ -0,0 +1,101 @@
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define MAP_SIZE (2 * (2 * 1024 * 1024))
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int fd;
|
||||
long int *addr;
|
||||
pid_t pid;
|
||||
|
||||
|
||||
if ((fd = open("/dev/hugepages/foo", O_CREAT|O_RDWR, 0600)) < 0) {
|
||||
perror("open");
|
||||
return -1;
|
||||
}
|
||||
unlink("/dev/hugepages/foo");
|
||||
if ((pid = fork()) == 0) {
|
||||
if ((addr = mmap(NULL, MAP_SIZE, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED, fd, 0)) == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) {
|
||||
if (addr[i] != 0) {
|
||||
fprintf(stderr,
|
||||
"memory wasn't zeroed at offset %lx\n",
|
||||
i * sizeof(long int));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
addr[42] = 12;
|
||||
if (munmap(addr, MAP_SIZE) < 0) {
|
||||
perror("munmap");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (pid < 0) {
|
||||
perror("fork");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (waitpid(pid, NULL, 0) <= 0) {
|
||||
perror("waitpid");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* bigger extent: check what was set is still here and rest is zero */
|
||||
if ((addr = mmap(NULL, 2 * MAP_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED,
|
||||
fd, 0)) == MAP_FAILED) {
|
||||
perror("mmap, 2");
|
||||
return -1;
|
||||
}
|
||||
if (addr[42] != 12) {
|
||||
perror("unexpected content");
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) {
|
||||
if (addr[MAP_SIZE / sizeof(long int) + i] != 0) {
|
||||
fprintf(stderr, "memory wasn't zeroed at offset %lx\n",
|
||||
MAP_SIZE + i * sizeof(long int));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
addr[MAP_SIZE / sizeof(long int) + 17] = 42;
|
||||
if (munmap(addr, MAP_SIZE) < 0) {
|
||||
perror("munmap, 2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* same with offset */
|
||||
if ((addr = mmap(NULL, 2 * MAP_SIZE, PROT_READ|PROT_EXEC,
|
||||
MAP_PRIVATE|MAP_NORESERVE, fd, MAP_SIZE))
|
||||
== MAP_FAILED) {
|
||||
perror("mmap, 2");
|
||||
return -1;
|
||||
}
|
||||
if (addr[17] != 42) {
|
||||
perror("unexpected content (2)");
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) {
|
||||
if (addr[MAP_SIZE / sizeof(long int) + i] != 0) {
|
||||
fprintf(stderr, "memory wasn't zeroed at offset %lx\n",
|
||||
2 * MAP_SIZE + i * sizeof(long int));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (munmap(addr, MAP_SIZE) < 0) {
|
||||
perror("munmap, 3");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
13
test/issues/1203/C1203T02.c
Normal file
13
test/issues/1203/C1203T02.c
Normal file
@ -0,0 +1,13 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#define __unused __attribute__((unused))
|
||||
|
||||
static __unused int data[1024*1024] = { 1, 0 };
|
||||
static __unused int data_zero[1024*1024] = { 0 };
|
||||
static __unused int const data_ro[1024*1024] = { 1, 0 };
|
||||
static __unused int const data_ro_zero[1024*1024] = { 0 };
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
12
test/issues/1203/Makefile
Normal file
12
test/issues/1203/Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
TARGET = C1203T01 C1203T02
|
||||
SCRIPT = ./C1203.sh
|
||||
C1203T02: LDFLAGS = -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align
|
||||
CFLAGS = -Wall
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
test: all
|
||||
bash $(SCRIPT)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) *.o
|
||||
Reference in New Issue
Block a user