Handle hugetlbfs file mapping

Hugetlbfs file mappings are handled differently than regular files:
 - pager_req_create will tell us the file is in a hugetlbfs
 - allocate memory upfront, we need to fail if not enough memory
 - the memory needs to be given again if another process maps the same
   file

This implementation still has some hacks, in particular, the memory
needs to be freed when all mappings are done and the file has been
deleted/closed by all processes.
We cannot know when the file is closed/unlinked easily, so clean up
memory when all processes have exited.

To test, install libhugetlbfs and link a program with the additional
LDFLAGS += -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align

Then run with HUGETLB_ELFMAP=RW set, you can check this works with
HUGETLB_DEBUG=1 HUGETLB_VERBOSE=2

Change-Id: I327920ff06efd82e91b319b27319f41912169af1
This commit is contained in:
Dominique Martinet
2018-09-21 10:50:31 +09:00
committed by Masamichi Takagi
parent 3e3ccf377c
commit 39f9d7fdff
14 changed files with 555 additions and 16 deletions

View File

@ -229,6 +229,9 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long size, unsigned long size,
struct zap_details *details); struct zap_details *details);
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void) static int symbols_init(void)
{ {
mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount"); mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount");
@ -263,6 +266,11 @@ static int symbols_init(void)
if (WARN_ON(!mcctrl_zap_page_range)) if (WARN_ON(!mcctrl_zap_page_range))
return -EFAULT; return -EFAULT;
mcctrl_hugetlbfs_inode_operations =
(void *) kallsyms_lookup_name("hugetlbfs_inode_operations");
if (WARN_ON(!mcctrl_hugetlbfs_inode_operations))
return -EFAULT;
return arch_symbols_init(); return arch_symbols_init();
} }

View File

@ -428,6 +428,7 @@ extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long size, unsigned long size,
struct zap_details *details); struct zap_details *details);
extern struct inode_operations *mcctrl_hugetlbfs_inode_operations;
/* syscall.c */ /* syscall.c */
void pager_add_process(void); void pager_add_process(void);

View File

@ -1165,6 +1165,7 @@ enum {
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
MF_SHM = 0x40000, MF_SHM = 0x40000,
MF_HUGETLBFS = 0x100000,
}; };
static int pager_get_path(struct file *file, char *path) { static int pager_get_path(struct file *file, char *path) {
@ -1254,6 +1255,17 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
goto out; goto out;
} }
if (inode->i_op == mcctrl_hugetlbfs_inode_operations) {
mf_flags = MF_HUGETLBFS;
/* pager is used as handle id on mckernel side, use inode */
pager = (void *)st.ino;
/* retrofit blksize in resp as well through st.size field;
* the actual file size is not used
*/
st.size = st.blksize;
goto out_reply;
}
for (;;) { for (;;) {
spin_lock_irqsave(&pager_lock, irqflags); spin_lock_irqsave(&pager_lock, irqflags);
@ -1322,6 +1334,7 @@ found:
} }
spin_unlock_irqrestore(&pager_lock, irqflags); spin_unlock_irqrestore(&pager_lock, irqflags);
out_reply:
phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp)); phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp));
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0); resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
if (!resp) { if (!resp) {

View File

@ -6,7 +6,7 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
OBJS += rbtree.o OBJS += rbtree.o hugefileobj.o
OBJS += pager.o OBJS += pager.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. # POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
DEPSRCS=$(wildcard $(SRC)/*.c) DEPSRCS=$(wildcard $(SRC)/*.c)

View File

@ -215,6 +215,10 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp, uintptr_t virt_a
goto out; goto out;
} }
if (result.flags & MF_HUGETLBFS) {
return hugefileobj_pre_create(&result, objp, maxprotp);
}
mcs_lock_lock(&fileobj_list_lock, &node); mcs_lock_lock(&fileobj_list_lock, &node);
obj = obj_list_lookup(result.handle); obj = obj_list_lookup(result.handle);
if (obj) if (obj)

303
kernel/hugefileobj.c Normal file
View File

@ -0,0 +1,303 @@
#include <memobj.h>
#include <ihk/mm.h>
#include <kmsg.h>
#include <kmalloc.h>
#include <string.h>
#include <debug.h>
#if DEBUG_HUGEFILEOBJ
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
struct hugefilechunk {
struct list_head list;
off_t pgoff;
int npages;
void *mem;
};
struct hugefileobj {
struct memobj memobj;
size_t pgsize;
uintptr_t handle;
unsigned int pgshift;
struct list_head chunk_list;
ihk_spinlock_t chunk_lock;
struct list_head obj_list;
};
static ihk_spinlock_t hugefileobj_list_lock;
static LIST_HEAD(hugefileobj_list);
static struct hugefileobj *to_hugefileobj(struct memobj *memobj)
{
return (struct hugefileobj *)memobj;
}
static struct memobj *to_memobj(struct hugefileobj *obj)
{
return &obj->memobj;
}
static struct hugefileobj *hugefileobj_lookup(uintptr_t handle)
{
struct hugefileobj *p;
list_for_each_entry(p, &hugefileobj_list, obj_list) {
if (p->handle == handle) {
/* for the interval between last put and fileobj_free
* taking list_lock
*/
if (memobj_ref(&p->memobj) <= 1) {
ihk_atomic_dec(&p->memobj.refcnt);
continue;
}
return p;
}
}
return NULL;
}
static int hugefileobj_get_page(struct memobj *memobj, off_t off,
int p2align, uintptr_t *physp,
unsigned long *pflag, uintptr_t virt_addr)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk;
off_t pgoff;
if (p2align != obj->pgshift - PTL1_SHIFT) {
kprintf("%s: p2align %d but expected %d\n",
__func__, p2align, obj->pgshift - PTL1_SHIFT);
return -ENOMEM;
}
pgoff = off >> obj->pgshift;
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
list_for_each_entry(chunk, &obj->chunk_list, list) {
if (pgoff >= chunk->pgoff + chunk->npages)
continue;
if (pgoff >= chunk->pgoff)
break;
kprintf("%s: no segment found for pgoff %lx (obj %p)\n",
__func__, pgoff, obj);
chunk = NULL;
break;
}
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
if (!chunk)
return -EIO;
*physp = virt_to_phys(chunk->mem + (off - chunk->pgoff * PAGE_SIZE));
return 0;
}
static void hugefileobj_free(struct memobj *memobj)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk, *next;
dkprintf("Destroying hugefileobj %p\n", memobj);
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
list_del(&obj->obj_list);
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
kfree(memobj->path);
/* don't bother with chunk_lock, memobj refcounting makes this safe */
list_for_each_entry_safe(chunk, next, &obj->chunk_list, list) {
ihk_mc_free_pages_user(chunk->mem, chunk->npages);
kfree(chunk);
}
kfree(memobj);
}
struct memobj_ops hugefileobj_ops = {
.free = hugefileobj_free,
.get_page = hugefileobj_get_page,
};
void hugefileobj_cleanup(void)
{
struct hugefileobj *obj;
int refcnt;
while (true) {
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
if (list_empty(&hugefileobj_list)) {
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
break;
}
obj = list_first_entry(&hugefileobj_list, struct hugefileobj,
obj_list);
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
if ((refcnt = memobj_unref(to_memobj(obj))) != 0) {
kprintf("%s: obj %p had refcnt %ld > 1, destroying anyway\n",
__func__, obj, refcnt + 1);
hugefileobj_free(to_memobj(obj));
}
}
}
int hugefileobj_pre_create(struct pager_create_result *result,
struct memobj **objp, int *maxprotp)
{
struct hugefileobj *obj;
ihk_mc_spinlock_lock_noirq(&hugefileobj_list_lock);
obj = hugefileobj_lookup(result->handle);
if (obj)
goto out_unlock;
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
if (!obj)
return -ENOMEM;
obj->handle = result->handle;
obj->pgsize = result->size;
obj->pgshift = 0;
INIT_LIST_HEAD(&obj->chunk_list);
ihk_mc_spinlock_init(&obj->chunk_lock);
obj->memobj.flags = result->flags;
obj->memobj.status = MEMOBJ_TO_BE_PREFETCHED;
obj->memobj.ops = &hugefileobj_ops;
/* keep mapping around when process is gone */
ihk_atomic_set(&obj->memobj.refcnt, 2);
if (result->path[0]) {
obj->memobj.path = kmalloc(PATH_MAX, IHK_MC_AP_NOWAIT);
if (!obj->memobj.path) {
kfree(obj);
return -ENOMEM;
}
strncpy(obj->memobj.path, result->path, PATH_MAX);
}
list_add(&obj->obj_list, &hugefileobj_list);
out_unlock:
ihk_mc_spinlock_unlock_noirq(&hugefileobj_list_lock);
*maxprotp = result->maxprot;
*objp = to_memobj(obj);
return 0;
}
int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
int *pgshiftp, uintptr_t virt_addr)
{
struct hugefileobj *obj = to_hugefileobj(memobj);
struct hugefilechunk *chunk = NULL, *old_chunk = NULL;
int p2align;
unsigned int pgshift;
int npages, npages_left;
void *v;
off_t pgoff, next_pgoff;
int error;
error = arch_get_smaller_page_size(NULL, obj->pgsize + 1, NULL,
&p2align);
if (error)
return error;
pgshift = p2align + PTL1_SHIFT;
if (1 << pgshift != obj->pgsize) {
dkprintf("invalid hugefileobj pagesize: %d\n",
obj->pgsize);
return -EINVAL;
}
if (len & ((1 << pgshift) - 1)) {
dkprintf("invalid hugetlbfs mmap size %d (pagesize %d)\n",
len, 1 << pgshift);
obj->pgshift = 0;
return -EINVAL;
}
if (off & ((1 << pgshift) - 1)) {
dkprintf("invalid hugetlbfs mmap offset %d (pagesize %d)\n",
off, 1 << pgshift);
obj->pgshift = 0;
return -EINVAL;
}
ihk_mc_spinlock_lock_noirq(&obj->chunk_lock);
if (obj->pgshift && obj->pgshift != pgshift) {
kprintf("pgshift changed between two calls on same inode?! had %d now %d\n",
obj->pgshift, pgshift);
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
return -EINVAL;
}
obj->pgshift = pgshift;
/* Prealloc upfront, we need to fail here if not enough memory. */
if (!list_empty(&obj->chunk_list))
old_chunk = list_first_entry(&obj->chunk_list,
struct hugefilechunk, list);
pgoff = off >> PAGE_SHIFT;
npages_left = len >> PAGE_SHIFT;
npages = npages_left;
while (npages_left) {
while (old_chunk &&
pgoff >= old_chunk->pgoff + old_chunk->npages) {
if (list_is_last(&old_chunk->list, &obj->chunk_list)) {
old_chunk = NULL;
break;
}
old_chunk = list_entry(old_chunk->list.next,
struct hugefilechunk, list);
}
if (old_chunk) {
next_pgoff = old_chunk->pgoff + old_chunk->npages;
if (pgoff >= old_chunk->pgoff && pgoff < next_pgoff) {
npages_left -= next_pgoff - pgoff;
pgoff = next_pgoff;
continue;
}
}
if (!chunk) {
chunk = kmalloc(sizeof(*chunk), IHK_MC_AP_NOWAIT);
}
if (!chunk) {
kprintf("could not allocate hugefileobj chunk\n");
return -ENOMEM;
}
if (npages > npages_left)
npages = npages_left;
v = ihk_mc_alloc_aligned_pages_user(npages, p2align,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, virt_addr);
if (!v) {
if (npages == 1) {
dkprintf("could not allocate more pages wth pgshift %d\n",
pgshift);
kfree(chunk);
/* caller will cleanup the rest */
return -ENOMEM;
}
/* exponential backoff, try less aggressive? */
npages /= 2;
continue;
}
memset(v, 0, npages * PAGE_SIZE);
chunk->npages = npages;
chunk->mem = v;
chunk->pgoff = pgoff;
/* ordered list: insert before next (bigger) element */
if (old_chunk)
list_add(&chunk->list, old_chunk->list.prev);
else
list_add(&chunk->list, obj->chunk_list.prev);
pgoff += npages;
npages_left -= npages;
}
obj->memobj.size = len;
ihk_mc_spinlock_unlock_noirq(&obj->chunk_lock);
*pgshiftp = pgshift;
return 0;
}

View File

@ -19,6 +19,7 @@
#include <ihk/lock.h> #include <ihk/lock.h>
#include <errno.h> #include <errno.h>
#include <list.h> #include <list.h>
#include <pager.h>
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ #ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#else /* POSTK_DEBUG_ARCH_DEP_18 */ #else /* POSTK_DEBUG_ARCH_DEP_18 */
@ -44,6 +45,7 @@ enum {
MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */ MF_XPMEM = 0x10000, /* To identify XPMEM attachment pages for rusage accounting */
MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */ MF_ZEROOBJ = 0x20000, /* To identify pages of anonymous, on-demand paging ranges for rusage accounting */
MF_SHM = 0x40000, MF_SHM = 0x40000,
MF_HUGETLBFS = 0x100000,
}; };
#define MEMOBJ_READY 0 #define MEMOBJ_READY 0
@ -83,11 +85,15 @@ static inline int memobj_ref(struct memobj *obj)
return ihk_atomic_inc_return(&obj->refcnt); return ihk_atomic_inc_return(&obj->refcnt);
} }
static inline void memobj_unref(struct memobj *obj) static inline int memobj_unref(struct memobj *obj)
{ {
if (ihk_atomic_dec_return(&obj->refcnt) == 0) { int cnt;
if ((cnt = ihk_atomic_dec_return(&obj->refcnt)) == 0) {
(*obj->ops->free)(obj); (*obj->ops->free)(obj);
} }
return cnt;
} }
static inline int memobj_get_page(struct memobj *obj, off_t off, static inline int memobj_get_page(struct memobj *obj, off_t off,
@ -150,5 +156,10 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp); int zeroobj_create(struct memobj **objp);
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp, int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
int prot, int populate_flags); int prot, int populate_flags);
int hugefileobj_pre_create(struct pager_create_result *result,
struct memobj **objp, int *maxprotp);
int hugefileobj_create(struct memobj *obj, size_t len, off_t off,
int *pgshiftp, uintptr_t virt_addr);
void hugefileobj_cleanup(void);
#endif /* HEADER_MEMOBJ_H */ #endif /* HEADER_MEMOBJ_H */

View File

@ -10,6 +10,7 @@
#include <rusage.h> #include <rusage.h>
#include <ihk/ihk_monitor.h> #include <ihk/ihk_monitor.h>
#include <arch_rusage.h> #include <arch_rusage.h>
#include <debug.h>
#ifdef ENABLE_RUSAGE #ifdef ENABLE_RUSAGE
@ -118,7 +119,8 @@ static inline int rusage_memory_stat_add(struct vm_range *range, uintptr_t phys,
struct page *page = phys_to_page(phys); struct page *page = phys_to_page(phys);
/* Is It file map and cow page? */ /* Is It file map and cow page? */
if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE)) && if ((range->memobj->flags & (MF_DEV_FILE | MF_REG_FILE |
MF_HUGETLBFS)) &&
!page) { !page) {
//kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys); //kprintf("%s: cow,phys=%lx\n", __FUNCTION__, phys);
memory_stat_rss_add(size, pgsize); memory_stat_rss_add(size, pgsize);

View File

@ -977,9 +977,15 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
if (range->memobj) { if (range->memobj) {
memobj_ref(range->memobj); memobj_ref(range->memobj);
} }
error = ihk_mc_pt_free_range(vm->address_space->page_table, vm, if (range->memobj && range->memobj->flags & MF_HUGETLBFS) {
(void *)start, (void *)end, error = ihk_mc_pt_clear_range(vm->address_space->page_table,
(range->flag & VR_PRIVATE)? NULL: range->memobj); vm, (void *)start, (void *)end);
} else {
error = ihk_mc_pt_free_range(vm->address_space->page_table,
vm, (void *)start, (void *)end,
(range->flag & VR_PRIVATE) ? NULL :
range->memobj);
}
if (range->memobj) { if (range->memobj) {
memobj_unref(range->memobj); memobj_unref(range->memobj);
} }
@ -1271,7 +1277,7 @@ int add_process_memory_range(struct process_vm *vm,
if (phys != NOPHYS && !(flag & (VR_REMOTE | VR_DEMAND_PAGING)) if (phys != NOPHYS && !(flag & (VR_REMOTE | VR_DEMAND_PAGING))
&& ((flag & VR_PROT_MASK) != VR_PROT_NONE)) { && ((flag & VR_PROT_MASK) != VR_PROT_NONE)) {
#if 1 #if 1
memset((void*)phys_to_virt(phys), 0, end - start); memset((void *)phys_to_virt(phys), 0, end - start);
#else #else
if (end - start < (1024*1024)) { if (end - start < (1024*1024)) {
memset((void*)phys_to_virt(phys), 0, end - start); memset((void*)phys_to_virt(phys), 0, end - start);
@ -1451,7 +1457,8 @@ int change_prot_process_memory_range(struct process_vm *vm,
* We need to keep the page table read-only to trigger a page * We need to keep the page table read-only to trigger a page
* fault for copy-on-write later on * fault for copy-on-write later on
*/ */
if (range->memobj && (range->flag & VR_PRIVATE)) { if (range->memobj && (range->flag & VR_PRIVATE) &&
!(range->memobj->flags & MF_HUGETLBFS)) {
setattr &= ~PTATTR_WRITABLE; setattr &= ~PTATTR_WRITABLE;
if (!clrattr && !setattr) { if (!clrattr && !setattr) {
range->flag = newflag; range->flag = newflag;
@ -2502,6 +2509,13 @@ release_process(struct process *proc)
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
free_thread_pages(proc->main_thread); free_thread_pages(proc->main_thread);
kfree(proc); kfree(proc);
/* no process left */
mcs_rwlock_reader_lock(&rset->pid1->children_lock, &lock);
if (list_empty(&rset->pid1->children_list)) {
hugefileobj_cleanup();
}
mcs_rwlock_reader_unlock(&rset->pid1->children_lock, &lock);
} }
void void

View File

@ -1614,7 +1614,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
int p2align; int p2align;
void *p = NULL; void *p = NULL;
int vrflags; int vrflags;
intptr_t phys; uintptr_t phys;
struct memobj *memobj = NULL; struct memobj *memobj = NULL;
int maxprot; int maxprot;
int denied; int denied;
@ -1688,7 +1688,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
goto out; goto out;
} }
} }
else { else if (flags & MAP_ANONYMOUS) {
/* Obtain mapping address */ /* Obtain mapping address */
error = search_free_space(len, PAGE_SHIFT + p2align, &addr); error = search_free_space(len, PAGE_SHIFT + p2align, &addr);
if (error) { if (error) {
@ -1722,7 +1722,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
populated_mapping = 0; populated_mapping = 0;
} }
if (!(prot & PROT_WRITE)) { if ((flags & MAP_ANONYMOUS) && !(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ | PROT_EXEC, 1/* holding memory_range_lock */); error = set_host_vma(addr, len, PROT_READ | PROT_EXEC, 1/* holding memory_range_lock */);
if (error) { if (error) {
kprintf("do_mmap:set_host_vma failed. %d\n", error); kprintf("do_mmap:set_host_vma failed. %d\n", error);
@ -1774,15 +1774,56 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
profile_event_add(PROFILE_mmap_device_file, len); profile_event_add(PROFILE_mmap_device_file, len);
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n",
__FUNCTION__, fd, off, addr, addr + len);
} }
} }
if (error) { if (error) {
kprintf("%s: error: file mapping failed, fd: %d, error: %d\n", kprintf("%s: error: file mapping failed, fd: %d, error: %d\n",
__FUNCTION__, error); __func__, fd, error);
goto out; goto out;
} }
/* hugetlbfs files are pre-created in fileobj_create, but
* need extra processing
*/
if (memobj && (memobj->flags & MF_HUGETLBFS)) {
error = hugefileobj_create(memobj, len, off, &pgshift,
addr0);
if (error) {
memobj->ops->free(memobj);
kprintf("%s: error creating hugetlbfs memobj, fd: %d, error: %d\n",
__func__, fd, error);
goto out;
}
p2align = pgshift - PAGE_SHIFT;
}
/* Obtain mapping address - delayed to use proper p2align */
if (!(flags & MAP_FIXED))
error = search_free_space(len, PAGE_SHIFT + p2align,
&addr);
if (error) {
ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n",
len, region->map_end, p2align, error);
goto out;
}
if (!(prot & PROT_WRITE)) {
error = set_host_vma(addr, len, PROT_READ | PROT_EXEC,
1/* holding memory_range_lock */);
if (error) {
kprintf("do_mmap:set_host_vma failed. %d\n",
error);
goto out;
}
ro_vma_mapped = 1;
}
if (memobj->flags & MF_HUGETLBFS) {
dkprintf("Created hugefileobj %p (%d:%x %llx-%llx, fd %d, pgshift %d)\n",
memobj, len, off, addr, addr+len, fd, pgshift);
} else if (memobj->flags & MF_DEV_FILE) {
dkprintf("%s: device fd: %d off: %lu mapping at %p - %p\n",
__func__, fd, off, addr, addr + len);
}
} }
/* Prepopulated ANONYMOUS mapping */ /* Prepopulated ANONYMOUS mapping */
else if (!(vrflags & VR_DEMAND_PAGING) else if (!(vrflags & VR_DEMAND_PAGING)

16
test/issues/1203/C1203.sh Normal file
View File

@ -0,0 +1,16 @@
#!/bin/bash
. ../../common.sh
if "$MCEXEC" ./C1203T01; then
echo "*** C1203T01: OK"
else
echo "*** C1203T01: NG"
fi
# to run as user, chmod 1777 /dev/hugepages
if sudo HUGETLB_VERBOSE=2 HUGETLB_ELFMAP=RW HUGETLB_DEBUG=1 "$MCEXEC" ./C1203T02; then
echo "*** C1203T02: OK"
else
echo "*** C1203T02: NG"
fi

101
test/issues/1203/C1203T01.c Normal file
View File

@ -0,0 +1,101 @@
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#define MAP_SIZE (2 * (2 * 1024 * 1024))
int main(int argc, char *argv[])
{
int fd;
long int *addr;
pid_t pid;
if ((fd = open("/dev/hugepages/foo", O_CREAT|O_RDWR, 0600)) < 0) {
perror("open");
return -1;
}
unlink("/dev/hugepages/foo");
if ((pid = fork()) == 0) {
if ((addr = mmap(NULL, MAP_SIZE, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, 0)) == MAP_FAILED) {
perror("mmap");
return -1;
}
for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) {
if (addr[i] != 0) {
fprintf(stderr,
"memory wasn't zeroed at offset %lx\n",
i * sizeof(long int));
return -1;
}
}
addr[42] = 12;
if (munmap(addr, MAP_SIZE) < 0) {
perror("munmap");
return -1;
}
return 0;
}
if (pid < 0) {
perror("fork");
return -1;
}
if (waitpid(pid, NULL, 0) <= 0) {
perror("waitpid");
return -1;
}
/* bigger extent: check what was set is still here and rest is zero */
if ((addr = mmap(NULL, 2 * MAP_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED,
fd, 0)) == MAP_FAILED) {
perror("mmap, 2");
return -1;
}
if (addr[42] != 12) {
perror("unexpected content");
return -1;
}
for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) {
if (addr[MAP_SIZE / sizeof(long int) + i] != 0) {
fprintf(stderr, "memory wasn't zeroed at offset %lx\n",
MAP_SIZE + i * sizeof(long int));
return -1;
}
}
addr[MAP_SIZE / sizeof(long int) + 17] = 42;
if (munmap(addr, MAP_SIZE) < 0) {
perror("munmap, 2");
return -1;
}
/* same with offset */
if ((addr = mmap(NULL, 2 * MAP_SIZE, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_NORESERVE, fd, MAP_SIZE))
== MAP_FAILED) {
perror("mmap, 2");
return -1;
}
if (addr[17] != 42) {
perror("unexpected content (2)");
return -1;
}
for (int i = 0; i < MAP_SIZE / sizeof(long int); i++) {
if (addr[MAP_SIZE / sizeof(long int) + i] != 0) {
fprintf(stderr, "memory wasn't zeroed at offset %lx\n",
2 * MAP_SIZE + i * sizeof(long int));
return -1;
}
}
if (munmap(addr, MAP_SIZE) < 0) {
perror("munmap, 3");
return -1;
}
return 0;
}

View File

@ -0,0 +1,13 @@
#include <unistd.h>
#define __unused __attribute__((unused))
static __unused int data[1024*1024] = { 1, 0 };
static __unused int data_zero[1024*1024] = { 0 };
static __unused int const data_ro[1024*1024] = { 1, 0 };
static __unused int const data_ro_zero[1024*1024] = { 0 };
int main(int argc, char *argv[])
{
return 0;
}

12
test/issues/1203/Makefile Normal file
View File

@ -0,0 +1,12 @@
TARGET = C1203T01 C1203T02
SCRIPT = ./C1203.sh
C1203T02: LDFLAGS = -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align
CFLAGS = -Wall
all: $(TARGET)
test: all
bash $(SCRIPT)
clean:
rm -f $(TARGET) *.o