diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 653b2d23..8aa7d02f 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -746,6 +746,17 @@ static struct list_head pager_list = LIST_HEAD_INIT(pager_list); struct pager_create_result { uintptr_t handle; int maxprot; + uint32_t flags; +}; + +enum { + /* for memobj.flags */ + MF_HAS_PAGER = 0x0001, + MF_SHMDT_OK = 0x0002, + MF_IS_REMOVABLE = 0x0004, + MF_PREFETCH = 0x0008, + MF_ZEROFILL = 0x0010, + MF_END }; static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) @@ -760,6 +771,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa) struct pager *newpager = NULL; uintptr_t phys; struct kstat st; + int mf_flags = 0; dprintk("pager_req_create(%d,%lx)\n", fd, (long)result_pa); @@ -856,6 +868,7 @@ found: resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0); resp->handle = (uintptr_t)pager; resp->maxprot = maxprot; + resp->flags = mf_flags; ihk_device_unmap_virtual(dev, resp, sizeof(*resp)); ihk_device_unmap_memory(dev, phys, sizeof(*resp)); diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 5027b7d5..649fca19 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -29,22 +29,26 @@ #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define ekprintf(...) kprintf(__VA_ARGS__) -static ihk_spinlock_t fileobj_list_lock = SPIN_LOCK_UNLOCKED; +mcs_lock_node_t fileobj_list_lock = {0, NULL}; static LIST_HEAD(fileobj_list); +#define FILEOBJ_PAGE_HASH_SHIFT 9 +#define FILEOBJ_PAGE_HASH_SIZE (1 << FILEOBJ_PAGE_HASH_SHIFT) +#define FILEOBJ_PAGE_HASH_MASK (FILEOBJ_PAGE_HASH_SIZE - 1) + struct fileobj { - struct memobj memobj; /* must be first */ - long sref; - long cref; - uintptr_t handle; - struct list_head page_list; - struct list_head list; + struct memobj memobj; /* must be first */ + long sref; + long cref; + uintptr_t handle; + struct list_head list; + struct list_head page_hash[FILEOBJ_PAGE_HASH_SIZE]; + mcs_lock_node_t page_hash_locks[FILEOBJ_PAGE_HASH_SIZE]; }; static memobj_release_func_t fileobj_release; static memobj_ref_func_t fileobj_ref; static memobj_get_page_func_t fileobj_get_page; -static memobj_copy_page_func_t fileobj_copy_page; static memobj_flush_page_func_t fileobj_flush_page; static memobj_invalidate_page_func_t fileobj_invalidate_page; static memobj_lookup_page_func_t fileobj_lookup_page; @@ -53,7 +57,7 @@ static struct memobj_ops fileobj_ops = { .release = &fileobj_release, .ref = &fileobj_ref, .get_page = &fileobj_get_page, - .copy_page = &fileobj_copy_page, + .copy_page = NULL, .flush_page = &fileobj_flush_page, .invalidate_page = &fileobj_invalidate_page, .lookup_page = &fileobj_lookup_page, @@ -72,28 +76,36 @@ static struct memobj *to_memobj(struct fileobj *fileobj) /*********************************************************************** * page_list */ -static void page_list_init(struct fileobj *obj) +static void fileobj_page_hash_init(struct fileobj *obj) { - INIT_LIST_HEAD(&obj->page_list); + int i; + for (i = 0; i < FILEOBJ_PAGE_HASH_SIZE; ++i) { + mcs_lock_init(&obj->page_hash_locks[i]); + INIT_LIST_HEAD(&obj->page_hash[i]); + } return; } -static void page_list_insert(struct fileobj *obj, struct page *page) +/* NOTE: caller must hold page_hash_locks[hash] */ +static void __fileobj_page_hash_insert(struct fileobj *obj, + struct page *page, int hash) { - list_add(&page->list, &obj->page_list); - return; + list_add(&page->list, &obj->page_hash[hash]); } -static void page_list_remove(struct fileobj *obj, struct page *page) +/* NOTE: caller must hold page_hash_locks[hash] */ +static void __fileobj_page_hash_remove(struct page *page) { list_del(&page->list); } -static struct page *page_list_lookup(struct fileobj *obj, off_t off) +/* NOTE: caller must hold page_hash_locks[hash] */ +static struct page *__fileobj_page_hash_lookup(struct fileobj *obj, + int hash, off_t off) { struct page *page; - list_for_each_entry(page, &obj->page_list, list) { + list_for_each_entry(page, &obj->page_hash[hash], list) { if ((page->mode != PM_WILL_PAGEIO) && (page->mode != PM_PAGEIO) && (page->mode != PM_DONE_PAGEIO) @@ -104,6 +116,7 @@ static struct page *page_list_lookup(struct fileobj *obj, off_t off) obj, off, page->mode); panic("page_list_lookup:invalid obj page"); } + if (page->offset == off) { goto out; } @@ -114,13 +127,22 @@ out: return page; } -static struct page *page_list_first(struct fileobj *obj) +static struct page *fileobj_page_hash_first(struct fileobj *obj) { - if (list_empty(&obj->page_list)) { - return NULL; + int i; + + for (i = 0; i < FILEOBJ_PAGE_HASH_SIZE; ++i) { + if (!list_empty(&obj->page_hash[i])) { + break; + } } - return list_first_entry(&obj->page_list, struct page, list); + if (i != FILEOBJ_PAGE_HASH_SIZE) { + return list_first_entry(&obj->page_hash[i], struct page, list); + } + else { + return NULL; + } } /*********************************************************************** @@ -163,10 +185,11 @@ static struct fileobj *obj_list_lookup(uintptr_t handle) int fileobj_create(int fd, struct memobj **objp, int *maxprotp) { ihk_mc_user_context_t ctx; - struct pager_create_result result; // XXX: assumes contiguous physical + struct pager_create_result result __attribute__((aligned(64))); int error; struct fileobj *newobj = NULL; struct fileobj *obj; + mcs_lock_node_t node; dkprintf("fileobj_create(%d)\n", fd); newobj = kmalloc(sizeof(*newobj), IHK_MC_AP_NOWAIT); @@ -179,6 +202,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp) ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE; ihk_mc_syscall_arg1(&ctx) = fd; ihk_mc_syscall_arg2(&ctx) = virt_to_phys(&result); + memset(&result, 0, sizeof(result)); error = syscall_generic_forwarding(__NR_mmap, &ctx); if (error) { @@ -192,14 +216,15 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp) newobj->handle = result.handle; newobj->sref = 1; newobj->cref = 1; - page_list_init(newobj); + fileobj_page_hash_init(newobj); ihk_mc_spinlock_init(&newobj->memobj.lock); - ihk_mc_spinlock_lock_noirq(&fileobj_list_lock); + mcs_lock_lock_noirq(&fileobj_list_lock, &node); obj = obj_list_lookup(result.handle); if (!obj) { obj_list_insert(newobj); obj = newobj; + to_memobj(obj)->flags |= result.flags; newobj = NULL; } else { @@ -208,7 +233,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp) memobj_unlock(&obj->memobj); /* locked by obj_list_lookup() */ } - ihk_mc_spinlock_unlock_noirq(&fileobj_list_lock); + mcs_lock_unlock_noirq(&fileobj_list_lock, &node); error = 0; *objp = to_memobj(obj); @@ -239,6 +264,7 @@ static void fileobj_release(struct memobj *memobj) long free_sref = 0; uintptr_t free_handle; struct fileobj *free_obj = NULL; + mcs_lock_node_t node; dkprintf("fileobj_release(%p %lx)\n", obj, obj->handle); @@ -254,17 +280,17 @@ static void fileobj_release(struct memobj *memobj) memobj_unlock(&obj->memobj); if (free_obj) { - ihk_mc_spinlock_lock_noirq(&fileobj_list_lock); + mcs_lock_lock_noirq(&fileobj_list_lock, &node); /* zap page_list */ for (;;) { struct page *page; void *page_va; - page = page_list_first(obj); + page = fileobj_page_hash_first(obj); if (!page) { break; } - page_list_remove(obj, page); + __fileobj_page_hash_remove(page); page_va = phys_to_virt(page_to_phys(page)); if (ihk_atomic_read(&page->count) != 1) { @@ -295,7 +321,7 @@ static void fileobj_release(struct memobj *memobj) #endif } obj_list_remove(free_obj); - ihk_mc_spinlock_unlock_noirq(&fileobj_list_lock); + mcs_lock_unlock_noirq(&fileobj_list_lock, &node); kfree(free_obj); } @@ -341,83 +367,101 @@ static void fileobj_do_pageio(void *args0) struct page *page; ihk_mc_user_context_t ctx; ssize_t ss; + mcs_lock_node_t mcs_node; + int hash = (off >> PAGE_SHIFT) & FILEOBJ_PAGE_HASH_MASK; - memobj_lock(&obj->memobj); - page = page_list_lookup(obj, off); + mcs_lock_lock_noirq(&obj->page_hash_locks[hash], + &mcs_node); + page = __fileobj_page_hash_lookup(obj, hash, off); if (!page) { goto out; } while (page->mode == PM_PAGEIO) { - memobj_unlock(&obj->memobj); + mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], + &mcs_node); cpu_pause(); - memobj_lock(&obj->memobj); + mcs_lock_lock_noirq(&obj->page_hash_locks[hash], + &mcs_node); } if (page->mode == PM_WILL_PAGEIO) { - page->mode = PM_PAGEIO; - memobj_unlock(&obj->memobj); - - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_READ; - ihk_mc_syscall_arg1(&ctx) = obj->handle; - ihk_mc_syscall_arg2(&ctx) = off; - ihk_mc_syscall_arg3(&ctx) = pgsize; - ihk_mc_syscall_arg4(&ctx) = page_to_phys(page); - - ss = syscall_generic_forwarding(__NR_mmap, &ctx); - - memobj_lock(&obj->memobj); - if (page->mode != PM_PAGEIO) { - kprintf("fileobj_do_pageio(%p,%lx,%lx):" - "invalid mode %x\n", - obj, off, pgsize, page->mode); - panic("fileobj_do_pageio:invalid page mode"); + if (to_memobj(obj)->flags & MF_ZEROFILL) { + void *virt = phys_to_virt(page_to_phys(page)); + memset(virt, 0, PAGE_SIZE); } + else { + page->mode = PM_PAGEIO; + mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], + &mcs_node); - if (ss == 0) { - dkprintf("fileobj_do_pageio(%p,%lx,%lx):EOF? %ld\n", - obj, off, pgsize, ss); - page->mode = PM_PAGEIO_EOF; - goto out; - } - else if (ss != pgsize) { - kprintf("fileobj_do_pageio(%p,%lx,%lx):" - "read failed. %ld\n", - obj, off, pgsize, ss); - page->mode = PM_PAGEIO_ERROR; - goto out; + ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_READ; + ihk_mc_syscall_arg1(&ctx) = obj->handle; + ihk_mc_syscall_arg2(&ctx) = off; + ihk_mc_syscall_arg3(&ctx) = pgsize; + ihk_mc_syscall_arg4(&ctx) = page_to_phys(page); + + dkprintf("%s: __NR_mmap for handle 0x%lx\n", + __FUNCTION__, obj->handle); + ss = syscall_generic_forwarding(__NR_mmap, &ctx); + + mcs_lock_lock_noirq(&obj->page_hash_locks[hash], + &mcs_node); + if (page->mode != PM_PAGEIO) { + kprintf("fileobj_do_pageio(%p,%lx,%lx):" + "invalid mode %x\n", + obj, off, pgsize, page->mode); + panic("fileobj_do_pageio:invalid page mode"); + } + + if (ss == 0) { + dkprintf("fileobj_do_pageio(%p,%lx,%lx):EOF? %ld\n", + obj, off, pgsize, ss); + page->mode = PM_PAGEIO_EOF; + goto out; + } + else if (ss != pgsize) { + kprintf("fileobj_do_pageio(%p,%lx,%lx):" + "read failed. %ld\n", + obj, off, pgsize, ss); + page->mode = PM_PAGEIO_ERROR; + goto out; + } } page->mode = PM_DONE_PAGEIO; } out: - memobj_unlock(&obj->memobj); + mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], + &mcs_node); fileobj_release(&obj->memobj); /* got fileobj_get_page() */ kfree(args0); dkprintf("fileobj_do_pageio(%p,%lx,%lx):\n", obj, off, pgsize); return; } -static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag) +static int fileobj_get_page(struct memobj *memobj, off_t off, + int p2align, uintptr_t *physp, unsigned long *pflag) { struct thread *proc = cpu_local_var(current); struct fileobj *obj = to_fileobj(memobj); - int error; + int error = -1; void *virt = NULL; int npages; uintptr_t phys = -1; struct page *page; struct pageio_args *args = NULL; + mcs_lock_node_t mcs_node; + int hash = (off >> PAGE_SHIFT) & FILEOBJ_PAGE_HASH_MASK; dkprintf("fileobj_get_page(%p,%lx,%x,%p)\n", obj, off, p2align, physp); - - memobj_lock(&obj->memobj); if (p2align != PAGE_P2ALIGN) { - error = -ENOMEM; - goto out; + return -ENOMEM; } - page = page_list_lookup(obj, off); + mcs_lock_lock_noirq(&obj->page_hash_locks[hash], + &mcs_node); + page = __fileobj_page_hash_lookup(obj, hash, off); if (!page || (page->mode == PM_WILL_PAGEIO) || (page->mode == PM_PAGEIO)) { args = kmalloc(sizeof(*args), IHK_MC_AP_NOWAIT); @@ -445,13 +489,15 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintp if (page->mode != PM_NONE) { panic("fileobj_get_page:invalid new page"); } - page->mode = PM_WILL_PAGEIO; page->offset = off; ihk_atomic_set(&page->count, 1); - page_list_insert(obj, page); + __fileobj_page_hash_insert(obj, page, hash); + page->mode = PM_WILL_PAGEIO; } + memobj_lock(&obj->memobj); ++obj->cref; /* for fileobj_do_pageio() */ + memobj_unlock(&obj->memobj); args->fileobj = obj; args->objoff = off; @@ -483,7 +529,8 @@ static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintp *physp = page_to_phys(page); virt = NULL; out: - memobj_unlock(&obj->memobj); + mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], + &mcs_node); if (virt) { ihk_mc_free_pages(virt, npages); } @@ -495,78 +542,6 @@ out: return error; } -static uintptr_t fileobj_copy_page( - struct memobj *memobj, uintptr_t orgpa, int p2align) -{ - struct page *orgpage = phys_to_page(orgpa); - size_t pgsize = PAGE_SIZE << p2align; - int npages = 1 << p2align; - void *newkva = NULL; - uintptr_t newpa = -1; - void *orgkva; - int count; - - dkprintf("fileobj_copy_page(%p,%lx,%d)\n", memobj, orgpa, p2align); - if (p2align != PAGE_P2ALIGN) { - panic("p2align"); - } - - memobj_lock(memobj); - for (;;) { - if (!orgpage || orgpage->mode != PM_MAPPED) { - kprintf("fileobj_copy_page(%p,%lx,%d):" - "invalid cow page. %x\n", - memobj, orgpa, p2align, orgpage ? orgpage->mode : 0); - panic("fileobj_copy_page:invalid cow page"); - } - count = ihk_atomic_read(&orgpage->count); - if (count == 2) { // XXX: private only - list_del(&orgpage->list); - ihk_atomic_dec(&orgpage->count); - orgpage->mode = PM_NONE; - newpa = orgpa; - break; - } - if (count <= 0) { - kprintf("fileobj_copy_page(%p,%lx,%d):" - "orgpage count corrupted. %x\n", - memobj, orgpa, p2align, count); - panic("fileobj_copy_page:orgpage count corrupted"); - } - if (newkva) { - orgkva = phys_to_virt(orgpa); - memcpy(newkva, orgkva, pgsize); - ihk_atomic_dec(&orgpage->count); - newpa = virt_to_phys(newkva); - if (phys_to_page(newpa)) { - page_map(phys_to_page(newpa)); - } - newkva = NULL; /* avoid ihk_mc_free_pages() */ - break; - } - - memobj_unlock(memobj); - newkva = ihk_mc_alloc_aligned_pages(npages, p2align, - IHK_MC_AP_NOWAIT); - if (!newkva) { - kprintf("fileobj_copy_page(%p,%lx,%d):" - "alloc page failed\n", - memobj, orgpa, p2align); - goto out; - } - memobj_lock(memobj); - } - memobj_unlock(memobj); - -out: - if (newkva) { - ihk_mc_free_pages(newkva, npages); - } - dkprintf("fileobj_copy_page(%p,%lx,%d): %lx\n", - memobj, orgpa, p2align, newpa); - return newpa; -} - static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, size_t pgsize) { @@ -575,6 +550,10 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, ihk_mc_user_context_t ctx; ssize_t ss; + if (to_memobj(obj)->flags & MF_ZEROFILL) { + return 0; + } + page = phys_to_page(phys); if (!page) { kprintf("%s: warning: tried to flush non-existing page for phys addr: 0x%lx\n", @@ -603,63 +582,48 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys, static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys, size_t pgsize) { - struct fileobj *obj = to_fileobj(memobj); - int error; - struct page *page; - dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize); - if (!(page = phys_to_page(phys)) - || !(page = page_list_lookup(obj, page->offset))) { - error = 0; - goto out; - } - - if (ihk_atomic_read(&page->count) == 1) { - if (page_unmap(page)) { - ihk_mc_free_pages(phys_to_virt(phys), - pgsize/PAGE_SIZE); - } - } - - error = 0; -out: - dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n", - memobj, phys, pgsize, error); - return error; + /* TODO: keep track of reverse mappings so that invalidation + * can be performed */ + kprintf("%s: WARNING: file mapping invalidation not supported\n", + __FUNCTION__); + return 0; } -static int fileobj_lookup_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag) +static int fileobj_lookup_page(struct memobj *memobj, off_t off, + int p2align, uintptr_t *physp, unsigned long *pflag) { struct fileobj *obj = to_fileobj(memobj); - int error; - uintptr_t phys = -1; + int error = -1; struct page *page; + mcs_lock_node_t mcs_node; + int hash = (off >> PAGE_SHIFT) & FILEOBJ_PAGE_HASH_MASK; dkprintf("fileobj_lookup_page(%p,%lx,%x,%p)\n", obj, off, p2align, physp); - memobj_lock(&obj->memobj); if (p2align != PAGE_P2ALIGN) { - error = -ENOMEM; - goto out; + return -ENOMEM; } - page = page_list_lookup(obj, off); + mcs_lock_lock_noirq(&obj->page_hash_locks[hash], + &mcs_node); + + page = __fileobj_page_hash_lookup(obj, hash, off); if (!page) { - error = -ENOENT; - dkprintf("fileobj_lookup_page(%p,%lx,%x,%p): page not found. %d\n", obj, off, p2align, physp, error); goto out; } - phys = page_to_phys(page); + *physp = page_to_phys(page); error = 0; - if (physp) { - *physp = phys; - } + out: - memobj_unlock(&obj->memobj); - dkprintf("fileobj_lookup_page(%p,%lx,%x,%p): %d %lx\n", - obj, off, p2align, physp, error, phys); + mcs_lock_unlock_noirq(&obj->page_hash_locks[hash], + &mcs_node); + + dkprintf("fileobj_lookup_page(%p,%lx,%x,%p): %d \n", + obj, off, p2align, physp, error); return error; } + diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index 78fd7876..79ac70fc 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -32,6 +32,9 @@ enum { MF_HAS_PAGER = 0x0001, MF_SHMDT_OK = 0x0002, MF_IS_REMOVABLE = 0x0004, + MF_PREFETCH = 0x0008, + MF_ZEROFILL = 0x0010, + MF_END }; struct memobj { diff --git a/kernel/include/pager.h b/kernel/include/pager.h index 48f33e12..95f6d38a 100644 --- a/kernel/include/pager.h +++ b/kernel/include/pager.h @@ -30,7 +30,7 @@ enum pager_op { struct pager_create_result { uintptr_t handle; int maxprot; - int8_t padding[4]; + uint32_t flags; }; /* diff --git a/kernel/process.c b/kernel/process.c index d8ed9105..413aab4f 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1384,6 +1384,11 @@ static int sync_one_page(void *arg0, page_table_t pt, pte_t *ptep, flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */ phys = pte_get_phys(ptep); + if (args->memobj->flags & MF_ZEROFILL) { + error = 0; + goto out; + } + error = memobj_flush_page(args->memobj, phys, pgsize); if (error) { ekprintf("sync_one_page(%p,%p,%p %#lx,%p,%d):" @@ -1411,11 +1416,19 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range, args.memobj = range->memobj; ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - memobj_lock(range->memobj); + + if (!(range->memobj->flags & MF_ZEROFILL)) { + memobj_lock(range->memobj); + } + error = visit_pte_range(vm->address_space->page_table, (void *)start, - (void *)end, range->pgshift, VPTEF_SKIP_NULL, - &sync_one_page, &args); - memobj_unlock(range->memobj); + (void *)end, range->pgshift, VPTEF_SKIP_NULL, + &sync_one_page, &args); + + if (!(range->memobj->flags & MF_ZEROFILL)) { + memobj_unlock(range->memobj); + } + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { ekprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):" diff --git a/kernel/syscall.c b/kernel/syscall.c index c13ec73d..63315f3b 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1363,6 +1363,10 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, goto out; } + if (memobj->flags & MF_PREFETCH) { + populated_mapping = 1; + } + error = 0; p = NULL; memobj = NULL;