diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 9d75267d..065aeaf4 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -814,6 +814,8 @@ static int split_large_page(pte_t *ptep) struct clear_range_args { int free_physical; + uint8_t padding[4]; + struct memobj *memobj; }; static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, @@ -822,13 +824,18 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, struct clear_range_args *args = args0; uint64_t phys; struct page *page; + pte_t old; if (*ptep == PTE_NULL) { return -ENOENT; } phys = *ptep & PT_PHYSMASK; - *ptep = PTE_NULL; + old = xchg(ptep, PTE_NULL); + + if ((old & PFL1_DIRTY) && args->memobj) { + memobj_flush_page(args->memobj, phys, PTL1_SIZE); + } if (args->free_physical) { page = phys_to_page(phys); @@ -848,6 +855,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, struct page_table *pt; int error; struct page *page; + pte_t old; if (*ptep == PTE_NULL) { return -ENOENT; @@ -869,7 +877,11 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, if (*ptep & PFL2_SIZE) { phys = *ptep & PT_PHYSMASK; - *ptep = PTE_NULL; + old = xchg(ptep, PTE_NULL); + + if ((old & PFL2_DIRTY) && args->memobj) { + memobj_flush_page(args->memobj, phys, PTL2_SIZE); + } if (args->free_physical) { page = phys_to_page(phys); @@ -922,7 +934,7 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base, } static int clear_range(struct page_table *pt, uintptr_t start, uintptr_t end, - int free_physical) + int free_physical, struct memobj *memobj) { int error; struct clear_range_args args; @@ -935,6 +947,8 @@ static int clear_range(struct page_table *pt, uintptr_t start, uintptr_t end, } args.free_physical = free_physical; + args.memobj = memobj; + error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args); return error; } @@ -943,14 +957,14 @@ int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end) { #define KEEP_PHYSICAL 0 return clear_range(pt, (uintptr_t)start, (uintptr_t)end, - KEEP_PHYSICAL); + KEEP_PHYSICAL, NULL); } -int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end) +int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end, struct memobj *memobj) { #define FREE_PHYSICAL 1 return clear_range(pt, (uintptr_t)start, (uintptr_t)end, - FREE_PHYSICAL); + FREE_PHYSICAL, memobj); } struct change_attr_args { @@ -1284,7 +1298,7 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, error = -EBUSY; ekprintf("set_range_l1(%lx,%lx,%lx):page exists. %d %lx\n", base, start, end, error, *ptep); - (void)clear_range(args->pt, start, base, KEEP_PHYSICAL); + (void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL); goto out; } @@ -1332,7 +1346,7 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, "__alloc_new_pt failed. %d %lx\n", base, start, end, error, *ptep); (void)clear_range(args->pt, start, base, - KEEP_PHYSICAL); + KEEP_PHYSICAL, NULL); goto out; } @@ -1343,7 +1357,7 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, ekprintf("set_range_l2(%lx,%lx,%lx):" "page exists. %d %lx\n", base, start, end, error, *ptep); - (void)clear_range(args->pt, start, base, KEEP_PHYSICAL); + (void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL); goto out; } else { @@ -1400,7 +1414,7 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, "__alloc_new_pt failed. %d %lx\n", base, start, end, error, *ptep); (void)clear_range(args->pt, start, base, - KEEP_PHYSICAL); + KEEP_PHYSICAL, NULL); goto out; } *ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR; @@ -1410,7 +1424,7 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, ekprintf("set_range_l3(%lx,%lx,%lx):" "page exists. %d %lx\n", base, start, end, error, *ptep); - (void)clear_range(args->pt, start, base, KEEP_PHYSICAL); + (void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL); goto out; } else { @@ -1449,7 +1463,7 @@ int set_range_l4(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, "__alloc_new_pt failed. %d %lx\n", base, start, end, error, *ptep); (void)clear_range(args->pt, start, base, - KEEP_PHYSICAL); + KEEP_PHYSICAL, NULL); goto out; } *ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR; diff --git a/arch/x86/tools/mcreboot-attached-mic.sh.in b/arch/x86/tools/mcreboot-attached-mic.sh.in index 442a787d..4556dbf6 100755 --- a/arch/x86/tools/mcreboot-attached-mic.sh.in +++ b/arch/x86/tools/mcreboot-attached-mic.sh.in @@ -43,7 +43,7 @@ if [ "$1" == "-u" ]; then exit fi -wait_time=50 +wait_time=20 if [ "$modules_were_loaded" == "1" ]; then echo "waiting for ${wait_time} seconds: " >&2 diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index 5115ce71..2dabd983 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -885,6 +885,84 @@ out: return ss; } +static int pager_req_write(ihk_os_t os, uintptr_t handle, off_t off, size_t size, uintptr_t rpa) +{ + ssize_t ss; + struct pager *pager; + struct file *file = NULL; + uintptr_t phys = -1; + ihk_device_t dev = ihk_os_to_dev(os); + void *buf = NULL; + mm_segment_t fs; + loff_t pos; + loff_t fsize; + size_t len; + + dprintk("pager_req_write(%lx,%lx,%lx,%lx)\n", handle, off, size, rpa); + + ss = down_interruptible(&pager_sem); + if (ss) { + printk("pager_req_write(%lx,%lx,%lx,%lx): signaled. %ld\n", handle, off, size, rpa, ss); + goto out; + } + + list_for_each_entry(pager, &pager_list, list) { + if ((uintptr_t)pager == handle) { + file = pager->rwfile; + break; + } + } + if (file) { + get_file(file); + } + up(&pager_sem); + + if (!file) { + ss = -EBADF; + printk("pager_req_write(%lx,%lx,%lx,%lx):pager not found. %ld\n", handle, off, size, rpa, ss); + goto out; + } + + /* + * XXX: vfs_write 位の階層を使いつつ, + * ファイルサイズ更新を回避する方法ないかな? + */ + fsize = i_size_read(file->f_mapping->host); + if (off >= fsize) { + ss = 0; + goto out; + } + + phys = ihk_device_map_memory(dev, rpa, size); + buf = ihk_device_map_virtual(dev, phys, size, NULL, 0); + fs = get_fs(); + set_fs(KERNEL_DS); + pos = off; + len = size; + if ((off + size) > fsize) { + len = fsize - off; + } + ss = vfs_write(file, buf, len, &pos); + set_fs(fs); + if (ss < 0) { + printk("pager_req_write(%lx,%lx,%lx,%lx):pwrite failed. %ld\n", handle, off, size, rpa, ss); + goto out; + } + +out: + if (buf) { + ihk_device_unmap_virtual(dev, buf, size); + } + if (phys != (uintptr_t)-1) { + ihk_device_unmap_memory(dev, phys, size); + } + if (file) { + fput(file); + } + dprintk("pager_req_write(%lx,%lx,%lx,%lx): %ld\n", handle, off, size, rpa, ss); + return ss; +} + static long pager_call(ihk_os_t os, struct syscall_request *req) { long ret; @@ -894,6 +972,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) #define PAGER_REQ_CREATE 0x0001 #define PAGER_REQ_RELEASE 0x0002 #define PAGER_REQ_READ 0x0003 +#define PAGER_REQ_WRITE 0x0004 case PAGER_REQ_CREATE: ret = pager_req_create(os, req->args[1], req->args[2]); break; @@ -906,6 +985,10 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) ret = pager_req_read(os, req->args[1], req->args[2], req->args[3], req->args[4]); break; + case PAGER_REQ_WRITE: + ret = pager_req_write(os, req->args[1], req->args[2], req->args[3], req->args[4]); + break; + default: ret = -ENOSYS; break; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 939735fa..2400dbfb 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 9f906d2a..54197cb4 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -33,12 +33,14 @@ static memobj_release_func_t fileobj_release; static memobj_ref_func_t fileobj_ref; static memobj_get_page_func_t fileobj_get_page; static memobj_copy_page_func_t fileobj_copy_page; +static memobj_flush_page_func_t fileobj_flush_page; static struct memobj_ops fileobj_ops = { .release = &fileobj_release, .ref = &fileobj_ref, .get_page = &fileobj_get_page, .copy_page = &fileobj_copy_page, + .flush_page = &fileobj_flush_page, }; static struct fileobj *to_fileobj(struct memobj *memobj) @@ -528,3 +530,31 @@ out: memobj, orgpa, p2align, newpa); return newpa; } + +static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, + size_t pgsize) +{ + struct fileobj *obj = to_fileobj(memobj); + struct page *page; + ihk_mc_user_context_t ctx; + ssize_t ss; + + page = phys_to_page(phys); + memobj_unlock(&obj->memobj); + + ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_WRITE; + ihk_mc_syscall_arg1(&ctx) = obj->handle; + ihk_mc_syscall_arg2(&ctx) = page->offset; + ihk_mc_syscall_arg3(&ctx) = pgsize; + ihk_mc_syscall_arg4(&ctx) = phys; + + ss = syscall_generic_forwarding(__NR_mmap, &ctx); + if (ss != pgsize) { + dkprintf("fileobj_flush_page(%p,%lx,%lx): %ld (%lx)\n", + memobj, phys, pgsize, ss, ss); + /* through */ + } + + memobj_lock(&obj->memobj); + return 0; +} diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index eab0f7f9..9dc4fe83 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -15,12 +15,14 @@ typedef void memobj_release_func_t(struct memobj *obj); typedef void memobj_ref_func_t(struct memobj *obj); typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp); typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align); +typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize); struct memobj_ops { memobj_release_func_t * release; memobj_ref_func_t * ref; memobj_get_page_func_t * get_page; memobj_copy_page_func_t * copy_page; + memobj_flush_page_func_t * flush_page; }; static inline void memobj_release(struct memobj *obj) @@ -45,6 +47,11 @@ static inline uintptr_t memobj_copy_page(struct memobj *obj, return (*obj->ops->copy_page)(obj, orgphys, p2align); } +static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t pgsize) +{ + return (*obj->ops->flush_page)(obj, phys, pgsize); +} + static inline void memobj_lock(struct memobj *obj) { ihk_mc_spinlock_lock_noirq(&obj->lock); diff --git a/kernel/include/pager.h b/kernel/include/pager.h index d595dc84..0648fb44 100644 --- a/kernel/include/pager.h +++ b/kernel/include/pager.h @@ -7,6 +7,7 @@ enum pager_op { PAGER_REQ_CREATE = 0x0001, PAGER_REQ_RELEASE = 0x0002, PAGER_REQ_READ = 0x0003, + PAGER_REQ_WRITE = 0x0004, }; /* diff --git a/kernel/process.c b/kernel/process.c index 5632ec4c..1774f152 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -324,15 +324,15 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) memobj_lock(range->memobj); } error = ihk_mc_pt_free_range(vm->page_table, - (void *)start, (void *)end); + (void *)start, (void *)end, range->memobj); if (range->memobj) { memobj_unlock(range->memobj); } ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error && (error != -ENOENT)) { ekprintf("free_process_memory_range(%p,%lx-%lx):" - "ihk_mc_pt_free_range(%lx-%lx) failed. %d\n", - vm, start0, end0, start, end, error); + "ihk_mc_pt_free_range(%lx-%lx,%p) failed. %d\n", + vm, start0, end0, start, end, range->memobj, error); /* through */ } } diff --git a/kernel/syscall.c b/kernel/syscall.c index d39209ab..9cf1f6ac 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -464,14 +464,6 @@ SYSCALL_DECLARE(mmap) goto out2; } - if ((flags & MAP_SHARED) && !(flags & MAP_ANONYMOUS)) { - ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):NYI:shared mapped file%lx\n", - addr0, len0, prot, flags, fd, off, - (flags & ~(supported_flags | ignored_flags))); - error = -EINVAL; - goto out2; - } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); if (flags & MAP_FIXED) { diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index ac844a12..1a099bc1 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -4,6 +4,8 @@ #include #include +struct memobj; + enum ihk_mc_gma_type { IHK_MC_GMA_MAP_START, IHK_MC_GMA_MAP_END, @@ -96,7 +98,7 @@ int ihk_mc_pt_change_page(page_table_t pt, void *virt, int ihk_mc_pt_clear_page(page_table_t pt, void *virt); int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt); int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end); -int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end); +int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end, struct memobj *memobj); int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end, enum ihk_mc_pt_attribute clrattr, enum ihk_mc_pt_attribute setattr);