diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index cd36a7ba..a649f950 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -289,11 +289,6 @@ void init_syscall(void) void init_cpu(void) { - asm volatile ( - "mov %%cr0,%%rax;" - "or $0x10000,%%rax;" - "mov %%rax,%%cr0" - ::: "%rax"); init_fpu(); init_lapic(); init_syscall(); @@ -499,7 +494,7 @@ int ihk_mc_unregister_interrupt_handler(int vector, extern unsigned long __page_fault_handler_address; -void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, unsigned long, void *)) +void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, void *)) { __page_fault_handler_address = (unsigned long)h; } diff --git a/arch/x86/kernel/include/registers.h b/arch/x86/kernel/include/registers.h index d06e4bed..80716dcb 100644 --- a/arch/x86/kernel/include/registers.h +++ b/arch/x86/kernel/include/registers.h @@ -128,8 +128,6 @@ struct x86_regs { unsigned long error, rip, cs, rflags, rsp, ss; }; -#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp) - /* * Page fault error code bits: * diff --git a/arch/x86/kernel/interrupt.S b/arch/x86/kernel/interrupt.S index 7745af1d..88b2d3b4 100644 --- a/arch/x86/kernel/interrupt.S +++ b/arch/x86/kernel/interrupt.S @@ -66,8 +66,8 @@ page_fault: cld PUSH_ALL_REGS movq %cr2, %rdi - movq 80(%rsp),%rsi - movq %rsp, %rdx + movq %rsp, %rsi + movq %rbp, %rdx movq __page_fault_handler_address(%rip), %rax andq %rax, %rax jz 1f diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 3aa0045f..0c8cfc4e 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -7,7 +7,6 @@ #include #include #include -#include #define ekprintf(...) kprintf(__VA_ARGS__) @@ -797,20 +796,15 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t star { struct clear_range_args *args = args0; uint64_t phys; - struct page *page; if (*ptep == PTE_NULL) { return -ENOENT; } phys = *ptep & PT_PHYSMASK; - *ptep = PTE_NULL; + *ptep = 0; if (args->free_physical) { - page = phys_to_page(phys); - if (page && (page->mode == PM_MAPPED) && !page_unmap(page)) { - return 0; - } ihk_mc_free_pages(phys_to_virt(phys), 1); } @@ -823,7 +817,6 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t star uint64_t phys; struct page_table *pt; int error; - struct page *page; if (*ptep == PTE_NULL) { return -ENOENT; @@ -848,15 +841,6 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t star *ptep = PTE_NULL; if (args->free_physical) { - page = phys_to_page(phys); - if (page && (page->mode == PM_MAPPED)) { - if (--page->count > 0) { - /* other mapping exists */ - return 0; - } - list_del(&page->list); - page->mode = PM_NONE; - } ihk_mc_free_pages(phys_to_virt(phys), LARGE_PAGE_SIZE/PAGE_SIZE); } @@ -1149,369 +1133,6 @@ int ihk_mc_pt_alloc_range(page_table_t pt, void *start, void *end, &alloc_range_l4, &attr); } -static int lookup_pte(struct page_table *pt, uintptr_t virt, pte_t **ptepp, - uintptr_t *pgbasep, size_t *pgsizep) -{ - int l4idx, l3idx, l2idx, l1idx; - - GET_VIRT_INDICES(virt, l4idx, l3idx, l2idx, l1idx); - - if (!(pt->entry[l4idx] & PFL4_PRESENT)) { - return -ENOENT; - } - - pt = phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK); - if (!(pt->entry[l3idx] & PFL3_PRESENT)) { - return -ENOENT; - } - - pt = phys_to_virt(pt->entry[l3idx] & PT_PHYSMASK); - if ((pt->entry[l2idx] == PTE_NULL) - || (pt->entry[l2idx] & PFL2_SIZE)) { - *ptepp = &pt->entry[l2idx]; - *pgbasep = GET_INDICES_VIRT(l4idx, l3idx, l2idx, 0); - *pgsizep = PTL2_SIZE; - return 0; - } - - pt = phys_to_virt(pt->entry[l2idx] & PT_PHYSMASK); - *ptepp = &pt->entry[l1idx]; - *pgbasep = GET_INDICES_VIRT(l4idx, l3idx, l2idx, l1idx); - *pgsizep = PTL1_SIZE; - - return 0; -} - -int ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, pte_t **ptepp, void **pgbasep, size_t *pgsizep) -{ - int error; - pte_t *ptep = NULL; - uintptr_t pgbase = 0; - size_t pgsize = 0; - - kprintf("ihk_mc_pt_lookup_pte(%p,%p)\n", pt, virt); - error = lookup_pte(pt, (uintptr_t)virt, &ptep, &pgbase, &pgsize); - if (error) { - kprintf("ihk_mc_pt_lookup_pte(%p,%p):lookup failed. %d\n", pt, virt, error); - goto out; - } - - error = 0; - *ptepp = ptep; - *pgbasep = (void *)pgbase; - *pgsizep = pgsize; - -out: - kprintf("ihk_mc_pt_lookup_pte(%p,%p): %d %p %lx %lx\n", pt, virt, error, ptep, pgbase, pgsize); - return error; -} - -static int page_p2align_list[] = { - LARGE_PAGE_P2ALIGN, - PAGE_P2ALIGN, - -1, -}; - -int ihk_mc_pt_choose_pagesize(page_table_t pt, void *start0, void *end0, - void *fault_addr0, size_t maxpgsize, void **pgaddrp, - size_t *pgsizep, int *p2alignp) -{ - const uintptr_t start = (uintptr_t)start0; - const uintptr_t end = (uintptr_t)end0; - const uintptr_t fault_addr = (uintptr_t)fault_addr0; - int ix; - int p2align; - size_t pgsize; - uintptr_t pgbase; - pte_t *ptep; - int error; - uintptr_t pga; - size_t pgs; - - kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p)\n", - pt, start0, end0, fault_addr0, maxpgsize, pgaddrp, - pgsizep, p2alignp); - - if ((fault_addr < start) || (end <= fault_addr)) { - kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p):" - "out of range\n", - pt, start0, end0, fault_addr0, maxpgsize, - pgaddrp, pgsizep, p2alignp); - panic("ihk_mc_pt_choose_pagesize:out of range"); - } - - pgs = 0; - for (ix = 0; page_p2align_list[ix] >= 0; ++ix) { - p2align = page_p2align_list[ix]; - pgsize = PAGE_SIZE << p2align; - pgbase = fault_addr & ~(pgsize - 1); - if ((maxpgsize != 0) && (pgsize > maxpgsize)) { - continue; - } - if ((pgbase < start) || (end < (pgbase + pgsize))) { - continue; - } - if (pgs == 0) { - error = lookup_pte(pt, fault_addr, &ptep, &pga, &pgs); - if (error == -ENOENT) { - error = 0; - pgs = LARGE_PAGE_SIZE; - pga = fault_addr & LARGE_PAGE_MASK; - } - else if (error) { - kprintf("ihk_mc_pt_choose_pagesize(" - "%p,%p,%p,%p,%lx,%p,%p,%p):" - "lookup pte failed. %d\n", - pt, start0, end0, fault_addr0, - maxpgsize, pgaddrp, pgsizep, - p2alignp, error); - goto out; - } - } - if (pgs < pgsize) { - continue; - } - - error = 0; - *pgaddrp = (void *)pgbase; - *pgsizep = pgsize; - *p2alignp = p2align; - goto out; - } - - kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p):" - "not reached\n", - pt, start0, end0, fault_addr0, maxpgsize, pgaddrp, - pgsizep, p2alignp); - panic("ihk_mc_pt_choose_pagesize:not reached"); - -out: - kprintf("ihk_mc_pt_choose_pagesize(%p,%p,%p,%p,%lx,%p,%p,%p):" - " %d %p %lx %d\n", - pt, start0, end0, fault_addr0, maxpgsize, pgaddrp, - pgsizep, p2alignp, error, *pgaddrp, *pgsizep, *p2alignp); - return error; -} - -struct set_range_args { - uintptr_t phys; - enum ihk_mc_pt_attribute attr; - int padding; - uintptr_t diff; -}; - -int set_range_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t start, - uint64_t end) -{ - struct set_range_args *args = args0; - int error; - uintptr_t phys; - - kprintf("set_range_l1(%p,%p,%lx,%lx,%lx)\n", - args0, ptep, base, start, end); - - if (*ptep != PTE_NULL) { - kprintf("set_range_l1(%p,%p,%lx,%lx,%lx):page exists\n", - args0, ptep, base, start, end); - error = -EBUSY; - goto out; - } - - phys = args->phys + (base - start); - *ptep = phys | attr_to_l1attr(args->attr); - - error = 0; -out: - kprintf("set_range_l1(%p,%p,%lx,%lx,%lx): %d\n", - args0, ptep, base, start, end, error); - return error; -} - -int set_range_l2(void *args0, pte_t *ptep, uint64_t base, uint64_t start, - uint64_t end) -{ - struct set_range_args *args = args0; - uintptr_t phys; - int error; - struct page_table *pt; - - kprintf("set_range_l2(%p,%p,%lx,%lx,%lx)\n", - args0, ptep, base, start, end); - - if (*ptep == PTE_NULL) { - if ((start <= base) && ((base + PTL2_SIZE) <= end) - && ((args->diff & (PTL2_SIZE - 1)) == 0)) { - phys = args->phys + (base - start); - *ptep = phys | attr_to_l2attr(args->attr|PTATTR_LARGEPAGE); - kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):" - "large page\n", - args0, ptep, base, start, end); - error = 0; - goto out; - } - - pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); - if (pt == NULL) { - kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):" - "__alloc_new_pt failed\n", - args0, ptep, base, start, end); - error = -ENOMEM; - goto out; - } - - *ptep = virt_to_phys(pt) | PFL2_PDIR_ATTR; - } - else if (*ptep & PFL2_SIZE) { - kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):" - "page exists\n", - args0, ptep, base, start, end); - error = -EBUSY; - goto out; - } - else { - pt = phys_to_virt(*ptep & PT_PHYSMASK); - } - - error = walk_pte_l1(pt, base, start, end, &set_range_l1, args0); - if (error) { - kprintf("set_range_l2(%p,%p,%lx,%lx,%lx):" - "walk_pte_l1 failed. %d\n", - args0, ptep, base, start, end, error); - goto out; - } - - error = 0; -out: - kprintf("set_range_l2(%p,%p,%lx,%lx,%lx): %d\n", - args0, ptep, base, start, end, error); - return error; -} - -int set_range_l3(void *args0, pte_t *ptep, uint64_t base, uint64_t start, - uint64_t end) -{ - struct page_table *pt; - int error; - - kprintf("set_range_l3(%p,%p,%lx,%lx,%lx)\n", - args0, ptep, base, start, end); - - if (*ptep == PTE_NULL) { - pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); - if (pt == NULL) { - kprintf("set_range_l3(%p,%p,%lx,%lx,%lx):" - "__alloc_new_pt failed\n", - args0, ptep, base, start, end); - return -ENOMEM; - } - *ptep = virt_to_phys(pt) | PFL3_PDIR_ATTR; - } - else { - pt = phys_to_virt(*ptep & PT_PHYSMASK); - } - - error = walk_pte_l2(pt, base, start, end, &set_range_l2, args0); - if (error) { - kprintf("set_range_l3(%p,%p,%lx,%lx,%lx):" - "walk_pte_l2 failed. %d\n", - args0, ptep, base, start, end, error); - goto out; - } - - error = 0; -out: - kprintf("set_range_l3(%p,%p,%lx,%lx,%lx): %d\n", - args0, ptep, base, start, end, error); - return error; -} - -int set_range_l4(void *args0, pte_t *ptep, uint64_t base, uint64_t start, - uint64_t end) -{ - struct page_table *pt; - int error; - - kprintf("set_range_l4(%p,%p,%lx,%lx,%lx)\n", - args0, ptep, base, start, end); - - if (*ptep == PTE_NULL) { - pt = __alloc_new_pt(IHK_MC_AP_NOWAIT); - if (pt == NULL) { - kprintf("set_range_l4(%p,%p,%lx,%lx,%lx):" - "__alloc_new_pt failed\n", - args0, ptep, base, start, end); - return -ENOMEM; - } - *ptep = virt_to_phys(pt) | PFL4_PDIR_ATTR; - } - else { - pt = phys_to_virt(*ptep & PT_PHYSMASK); - } - - error = walk_pte_l3(pt, base, start, end, &set_range_l3, args0); - if (error) { - kprintf("set_range_l4(%p,%p,%lx,%lx,%lx):" - "walk_pte_l3 failed. %d\n", - args0, ptep, base, start, end, error); - goto out; - } - - error = 0; -out: - kprintf("set_range_l4(%p,%p,%lx,%lx,%lx): %d\n", - args0, ptep, base, start, end, error); - return error; -} - -int ihk_mc_pt_set_range(page_table_t pt, void *start, void *end, - uintptr_t phys, enum ihk_mc_pt_attribute attr) -{ - int error; - struct set_range_args args; - - kprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x)\n", - pt, start, end, phys, attr); - - args.phys = phys; - args.attr = attr; - args.diff = (uintptr_t)start ^ phys; - - error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end, - &set_range_l4, &args); - if (error) { - kprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x):" - "walk_pte_l4 failed. %d\n", - pt, start, end, phys, attr, error); - goto out; - } - - error = 0; -out: - kprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x): %d\n", - pt, start, end, phys, attr, error); - return error; -} - -int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, uintptr_t phys, size_t pgsize, enum ihk_mc_pt_attribute attr) -{ - kprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x):\n", - pt, ptep, phys, pgsize, attr); - switch (pgsize) { - case PTL1_SIZE: - *ptep = phys | attr_to_l1attr(attr); - break; - case PTL2_SIZE: - *ptep = phys | attr_to_l2attr(attr | PTATTR_LARGEPAGE); - break; - default: - kprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x):\n", - pt, ptep, phys, pgsize, attr); - panic("ihk_mc_pt_set_pte"); - break; - } - return 0; -} - void load_page_table(struct page_table *pt) { unsigned long pt_addr; diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index e96a96e6..67c2fff1 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -71,8 +71,6 @@ struct syscall_load_desc { struct syscall_response { unsigned long status; long ret; - unsigned long fault_address; - unsigned long fault_reason; }; struct syscall_ret_desc { diff --git a/executer/kernel/control.c b/executer/kernel/control.c index 0889ecec..60df3cda 100644 --- a/executer/kernel/control.c +++ b/executer/kernel/control.c @@ -217,9 +217,9 @@ int mcexec_syscall(struct mcctrl_channel *c, unsigned long arg) return 0; } +#ifndef DO_USER_MODE int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc); -#ifndef DO_USER_MODE // static int remaining_job, base_cpu, job_pos; #endif @@ -243,24 +243,10 @@ int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req) if(swd.cpu >= usrdata->num_channels)return -EINVAL; c = usrdata->channels + swd.cpu; - if ((usrdata->channelowners[swd.cpu] != NULL) - && (usrdata->channelowners[swd.cpu] != current)) { - printk("mcexec_wait_syscall:double wait %p %p\n", - usrdata->channelowners[swd.cpu], - current); - return -EBUSY; - } #ifdef DO_USER_MODE -retry: - if (wait_event_interruptible(c->wq_syscall, c->req)) { - return -EINTR; - } + wait_event_interruptible(c->wq_syscall, c->req); c->req = 0; - if (!c->param.request_va->valid) { -printk("mcexec_wait_syscall:stray wakeup\n"); - goto retry; - } #else while (1) { c = usrdata->channels + swd.cpu; @@ -299,28 +285,22 @@ printk("mcexec_wait_syscall:stray wakeup\n"); } if (c->param.request_va && c->param.request_va->valid) { -#endif c->param.request_va->valid = 0; /* ack */ dprintk("SC #%lx, %lx\n", c->param.request_va->number, c->param.request_va->args[0]); - usrdata->channelowners[swd.cpu] = current; - if (__do_in_kernel_syscall(os, c, c->param.request_va)) { - if (copy_to_user(&req->sr, c->param.request_va, - sizeof(struct syscall_request))) { - usrdata->channelowners[swd.cpu] = NULL; - return -EFAULT; - } - return 0; - } - usrdata->channelowners[swd.cpu] = NULL; -#ifdef DO_USER_MODE - goto retry; + if (__do_in_kernel_syscall(os, c, c->param.request_va)) { #endif + if (copy_to_user(&req->sr, c->param.request_va, + sizeof(struct syscall_request))) { + return -EFAULT; + } #ifndef DO_USER_MODE - if (usrdata->mcctrl_dma_abort) { - return -2; - } + return 0; + } + if (usrdata->mcctrl_dma_abort) { + return -2; + } } } usrdata->remaining_job = 0; @@ -458,13 +438,6 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) { return -EFAULT; } - if (usrdata->channelowners[ret.cpu] != current) { - printk("mcexec_ret_syscall:owner mismatch: %p %p\n", - usrdata->channelowners[ret.cpu], - current); - return -EBUSY; - } - usrdata->channelowners[ret.cpu] = NULL; mc = usrdata->channels + ret.cpu; if (!mc) { return -EINVAL; @@ -515,15 +488,6 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg) } else { mc->param.response_va->status = 1; } -#if 1 - { - extern struct vm_area_struct *rus_vma; - - if (zap_vma_ptes(rus_vma, rus_vma->vm_start, rus_vma->vm_end - rus_vma->vm_start)) { - printk("zap_vma_ptes failed\n"); - } - } -#endif return 0; } diff --git a/executer/kernel/ikc.c b/executer/kernel/ikc.c index 5597eb5f..4f1dbed1 100644 --- a/executer/kernel/ikc.c +++ b/executer/kernel/ikc.c @@ -235,11 +235,6 @@ int prepare_ikc_channels(ihk_os_t os) printk("Error: cannot allocate channels.\n"); return -ENOMEM; } - usrdata->channelowners = kzalloc(sizeof(void *) * usrdata->num_channels, GFP_KERNEL); - if (usrdata->channelowners == NULL) { - printk("Error: cannot allocate channelowners.\n"); - return -ENOMEM; - } usrdata->os = os; init_waitqueue_head(&usrdata->wq_prepare); diff --git a/executer/kernel/mcctrl.h b/executer/kernel/mcctrl.h index 64fb7cad..88d7a3ff 100644 --- a/executer/kernel/mcctrl.h +++ b/executer/kernel/mcctrl.h @@ -80,7 +80,6 @@ struct mcctrl_usrdata { unsigned long last_thread_exec; wait_queue_head_t wq_prepare; unsigned long rpgtable; /* per process, not per OS */ - void **channelowners; }; int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp); diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index 5313146e..5be7a396 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -38,7 +37,7 @@ static void print_dma_lastreq(void) #endif #if 1 /* x86 depend, host OS side */ -unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, unsigned fflags) +unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva) { unsigned long rpa; int offsh; @@ -64,13 +63,6 @@ unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long return -EFAULT; } -#define PTE_RW 0x002 - if ((fflags & FAULT_FLAG_WRITE) && !(pt[ix] & PTE_RW)) { - ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); - ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); - return -EFAULT; - } - #define PTE_PS 0x080 if (pt[ix] & PTE_PS) { rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << offsh) - 1); @@ -92,64 +84,6 @@ out: } #endif -static int pager_call(ihk_os_t os, struct syscall_request *req); -static int remote_page_fault(struct mcctrl_usrdata *usrdata, struct vm_fault *vmf) -{ - int cpu; - struct mcctrl_channel *channel; - volatile struct syscall_request *req; - volatile struct syscall_response *resp; - - printk("remote_page_fault(%p,%p %x)\n", usrdata, vmf->virtual_address, vmf->flags); - /* get peer cpu */ - for (cpu = 0; cpu < usrdata->num_channels; ++cpu) { - if (usrdata->channelowners[cpu] == current) { - break; - } - } - if (cpu >= usrdata->num_channels) { - printk("cpu not found\n"); - return -ENOENT; - } - - channel = &usrdata->channels[cpu]; - req = channel->param.request_va; - resp = channel->param.response_va; - - /* request page fault */ - resp->ret = -EFAULT; - resp->fault_address = (unsigned long)vmf->virtual_address; - resp->fault_reason = (vmf->flags & FAULT_FLAG_WRITE)? 1: 0; - - req->valid = 0; - resp->status = 3; - -retry: - /* wait for response */ - while (req->valid == 0) { - schedule(); - } - req->valid = 0; - - /* check result */ - if (req->number != __NR_mmap) { - printk("remote_page_fault:invalid response. %lx %lx\n", - req->number, req->args[0]); - return -EIO; - } - else if (req->args[0] != 0x0101) { - resp->ret = pager_call(usrdata->os, (void *)req); - resp->status = 1; - goto retry; - } - else if (req->args[1] != 0) { - printk("remote_page_fault:response %d\n", (int)req->args[1]); - return (int)req->args[1]; - } - printk("remote_page_fault(%p,%p %x): 0\n", usrdata, vmf->virtual_address, vmf->flags); - return 0; -} - static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct mcctrl_usrdata * usrdata = vma->vm_file->private_data; @@ -157,26 +91,12 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long rpa; unsigned long phys; int error; - int try; dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); - for (try = 1; ; ++try) { - rpa = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable, - (unsigned long)vmf->virtual_address, - vmf->flags); -#define NTRIES 2 - if (((long)rpa >= 0) || (try >= NTRIES)) { - break; - } - - error = remote_page_fault(usrdata, vmf); - if (error) { - printk("forward_page_fault failed. %d\n", error); - break; - } - } + rpa = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable, + (unsigned long)vmf->virtual_address); if ((long)rpa < 0) { printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); @@ -210,7 +130,6 @@ static struct file_operations rus_fops = { .mmap = &rus_mmap, }; -struct vm_area_struct *rus_vma = NULL; int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp) { struct file *file; @@ -233,7 +152,6 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un } start = do_mmap_pgoff(file, 0, end, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0); - vma = find_vma(current->mm, 0); up_write(¤t->mm->mmap_sem); fput(file); if (IS_ERR_VALUE(start)) { @@ -241,7 +159,6 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un return start; } - rus_vma = vma; *startp = start; *endp = end; return 0; @@ -333,6 +250,12 @@ static void clear_wait(unsigned char *p, int size) p[size] = 0; } +static void __return_syscall(struct mcctrl_channel *c, int ret) +{ + c->param.response_va->ret = ret; + c->param.response_va->status = 1; +} + static unsigned long translate_remote_va(struct mcctrl_channel *c, unsigned long rva) { @@ -359,7 +282,6 @@ static unsigned long translate_remote_va(struct mcctrl_channel *c, //extern struct mcctrl_channel *channels; -#if 0 int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc) { @@ -475,328 +397,4 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, } } } -#endif #endif /* !DO_USER_MODE */ - -static void __return_syscall(struct mcctrl_channel *c, long ret) -{ - c->param.response_va->ret = ret; - c->param.response_va->status = 1; -} - -struct pager { - struct list_head list; - struct inode * inode; - void * handle; -}; - -/* - * for linux v2.6.35 or prior - */ -#ifndef DEFINE_SEMAPHORE -#define DEFINE_SEMAPHORE(...) DECLARE_MUTEX(__VA_ARGS__) -#endif - -static DEFINE_SEMAPHORE(pager_sem); -static struct list_head pager_list = LIST_HEAD_INIT(pager_list); - -struct pager_create_result { - uintptr_t handle; - int maxprot; -}; - -static int pager_req_create(ihk_os_t os, int fd, int flags, int prot, uintptr_t result_pa) -{ - const int ignore_flags = MAP_FIXED | MAP_DENYWRITE; - const int ok_flags = MAP_PRIVATE; - ihk_device_t dev = ihk_os_to_dev(os); - int error; - void *handle = NULL; - struct pager_create_result *resp; - int maxprot = -1; - struct file *file = NULL; - struct inode *inode; - struct pager *pager; - uintptr_t phys; - - printk("pager_req_create(%d,%x,%x,%lx)\n", fd, flags, prot, (long)result_pa); - - if (flags & ~(ignore_flags | ok_flags)) { - printk("pager_req_create(%d,%x,%x,%lx):not supported flags %x\n", - fd, flags, prot, (long)result_pa, - flags & ~(ignore_flags | ok_flags)); - error = -EINVAL; - goto out; - } - - file = fget(fd); - if (file == NULL) { - error = -EBADF; - printk("pager_req_create(%d,%x,%x,%lx):file not found. %d\n", fd, flags, prot, (long)result_pa, error); - goto out; - } - - inode = file->f_path.dentry->d_inode; - if (inode == NULL) { - error = -EBADF; - printk("pager_req_create(%d,%x,%x,%lx):inode not found. %d\n", fd, flags, prot, (long)result_pa, error); - goto out; - } - - if (!(file->f_mode & (FMODE_READ | FMODE_WRITE))) { - maxprot = PROT_NONE; - } - else { - maxprot = 0; - if (file->f_mode & FMODE_READ) { - maxprot |= PROT_READ; - maxprot |= PROT_EXEC; - } - if (file->f_mode & FMODE_WRITE) { - maxprot |= PROT_WRITE; - } - } - - error = down_interruptible(&pager_sem); - if (error) { - error = -EINTR; - printk("pager_req_create(%d,%x,%x,%lx):signaled. %d\n", fd, flags, prot, (long)result_pa, error); - goto out; - } - - list_for_each_entry(pager, &pager_list, list) { - if (pager->inode == inode) { - handle = pager->handle; - error = -EALREADY; - up(&pager_sem); - goto found; - } - } - - pager = kzalloc(sizeof(*pager), GFP_KERNEL); - if (pager == NULL) { - error = -ENOMEM; - printk("pager_req_create(%d,%x,%x,%lx):kzalloc failed. %d\n", fd, flags, prot, (long)result_pa, error); - up(&pager_sem); - goto out; - } - - down_write(¤t->mm->mmap_sem); - handle = (void *)do_mmap_pgoff(file, 0, PAGE_SIZE, prot, (flags & ok_flags), 0); - up_write(¤t->mm->mmap_sem); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - printk("pager_req_create(%d,%x,%x,%lx):mmap failed. %d\n", - fd, flags, prot, (long)result_pa, error); - kfree(pager); - up(&pager_sem); - goto out; - } - - pager->inode = inode; - pager->handle = handle; - list_add(&pager->list, &pager_list); - up(&pager_sem); - - error = 0; -found: - phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp)); - resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0); - resp->handle = (uintptr_t)handle; - resp->maxprot = maxprot; - ihk_device_unmap_virtual(dev, resp, sizeof(*resp)); - ihk_device_unmap_memory(dev, phys, sizeof(*resp)); - -out: - if (file != NULL) { - fput(file); - } - printk("pager_req_create(%d,%x,%x,%lx): %d %p %x\n", - fd, flags, prot, (long)result_pa, error, handle, maxprot); - return error; -} - -static int pager_req_release(ihk_os_t os, uintptr_t handle) -{ - struct vm_area_struct *vma; - int error; - struct pager *pager; - struct pager *next; - - printk("pager_req_relase(%p,%lx)\n", os, handle); - - error = down_interruptible(&pager_sem); - if (error) { - printk("pager_req_relase(%p,%lx):signaled. %d\n", os, handle, error); - down_write(¤t->mm->mmap_sem); - goto out; - } - - list_for_each_entry_safe(pager, next, &pager_list, list) { - if ((uintptr_t)pager->handle == handle) { - list_del(&pager->list); - up(&pager_sem); - kfree(pager); - goto found; - } - } - up(&pager_sem); - - error = -EBADF; - printk("pager_req_relase(%p,%lx):pager not found. %d\n", os, handle, error); - down_write(¤t->mm->mmap_sem); - goto out; - -found: - down_write(¤t->mm->mmap_sem); - vma = find_vma(current->mm, handle); - if (vma == 0) { - error = -EBADF; - printk("pager_req_relase(%p,%lx):vma not found. %d\n", os, handle, error); - goto out; - } - if ((vma->vm_start != handle) || (vma->vm_end != (handle + PAGE_SIZE))) { - error = -EBADF; - printk("pager_req_relase(%p,%lx):invalid vma. %d\n", os, handle, error); - goto out; - } - if (vma->vm_file == NULL) { - error = -EBADF; - printk("pager_req_relase(%p,%lx):file not found. %d\n", os, handle, error); - goto out; - } - - error = do_munmap(current->mm, handle, PAGE_SIZE); - if (error) { - printk("pager_req_relase(%p,%lx):do_munmap failed. %d\n", os, handle, error); - goto out; - } - - error = 0; -out: - up_write(¤t->mm->mmap_sem); - printk("pager_req_relase(%p,%lx): %d\n", os, handle, error); - return error; -} - -static int pager_req_read(ihk_os_t os, uintptr_t handle, off_t off, size_t size, uintptr_t rpa) -{ - ihk_device_t dev = ihk_os_to_dev(os); - struct vm_area_struct *vma; - int error; - struct file *file; - uintptr_t phys; - void *buf; - mm_segment_t fs; - loff_t pos; - ssize_t ss; - - printk("pager_req_read(%lx,%lx,%lx,%lx)\n", handle, off, size, rpa); - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, handle); - if (vma == 0) { - error = -EBADF; - printk("pager_req_read(%lx,%lx,%lx,%lx):vma not found. %d\n", handle, off, size, rpa, error); - up_read(¤t->mm->mmap_sem); - goto out; - } - if ((vma->vm_start != handle) || (vma->vm_end != (handle + PAGE_SIZE))) { - error = -EBADF; - printk("pager_req_read(%lx,%lx,%lx,%lx):invalid vma. %d\n", handle, off, size, rpa, error); - up_read(¤t->mm->mmap_sem); - goto out; - } - file = vma->vm_file; - if (file == NULL) { - error = -EBADF; - printk("pager_req_read(%lx,%lx,%lx,%lx):file not found. %d\n", handle, off, size, rpa, error); - up_read(¤t->mm->mmap_sem); - goto out; - } - get_file(file); - up_read(¤t->mm->mmap_sem); - - phys = ihk_device_map_memory(dev, rpa, size); - buf = ihk_device_map_virtual(dev, phys, size, NULL, 0); - fs = get_fs(); - set_fs(KERNEL_DS); - pos = off; - ss = vfs_read(file, buf, size, &pos); - if ((ss >= 0) && (ss != size)) { - if (clear_user(buf+ss, size-ss) == 0) { - ss = size; - } - else { - ss = -EIO; - } - } - set_fs(fs); - ihk_device_unmap_virtual(dev, buf, size); - ihk_device_unmap_memory(dev, phys, size); - fput(file); - if (ss < 0) { - error = ss; - printk("pager_req_read(%lx,%lx,%lx,%lx):pread failed. %d\n", handle, off, size, rpa, error); - goto out; - } - error = 0; -out: - printk("pager_req_read(%lx,%lx,%lx,%lx): %d\n", handle, off, size, rpa, error); - return error; -} - -static int pager_call(ihk_os_t os, struct syscall_request *req) -{ - int error; - - printk("pager_call(%p %#lx)\n", req, req->args[0]); - switch (req->args[0]) { -#define PAGER_REQ_CREATE 0x0001 -#define PAGER_REQ_RELEASE 0x0002 -#define PAGER_REQ_READ 0x0003 - case PAGER_REQ_CREATE: - error = pager_req_create(os, req->args[1], req->args[2], req->args[3], req->args[4]); - break; - - case PAGER_REQ_RELEASE: - error = pager_req_release(os, req->args[1]); - break; - - case PAGER_REQ_READ: - error = pager_req_read(os, req->args[1], req->args[2], req->args[3], req->args[4]); - break; - - default: - error = -ENOSYS; - break; - } - - printk("pager_call(%p %#lx): %d\n", req, req->args[0], error); - return error; -} - -int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc) -{ - int error; - long ret; - - printk("__do_in_kernel_syscall(%p,%p,%p %ld)\n", os, c, sc, sc->number); - switch (sc->number) { - case __NR_mmap: - ret = pager_call(os, sc); - break; - - default: - error = -ENOSYS; - goto out; - break; - } - - __return_syscall(c, ret); - - error = 0; -out: - printk("__do_in_kernel_syscall(%p,%p,%p %ld): %d\n", os, c, sc, sc->number, error); - return error; -} diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 4ed7edf5..c0ff8cc9 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -635,12 +635,33 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) pthread_mutex_unlock(lock); return w.sr.args[0]; - case __NR_mmap: - case __NR_munmap: - case __NR_mprotect: - /* reserved for internal use */ - do_syscall_return(fd, cpu, -ENOSYS, 0, 0, 0, 0); - break; + case __NR_mmap: { + // w.sr.args[0] is converted to MIC physical address + __dprintf("mcexec.c,mmap,MIC-paddr=%lx,len=%lx,prot=%lx,flags=%lx,fd=%lx,offset=%lx\n", + w.sr.args[0], w.sr.args[1], w.sr.args[2], w.sr.args[3], w.sr.args[4], w.sr.args[5]); + off_t old_off = lseek(w.sr.args[4], 0, SEEK_CUR); + if(old_off == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; goto mmap_out; } + off_t rlseek = lseek(w.sr.args[4], w.sr.args[5], SEEK_SET); + if(rlseek == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; goto mmap_out; } + ssize_t toread = w.sr.args[1]; + ret = 0; + while(toread > 0) { + __dprintf("mcexec.c,mmap,read,addr=%lx,len=%lx\n", (long int)((void *)dma_buf + w.sr.args[1] - toread), toread); + ssize_t rread = read(w.sr.args[4], (void *)dma_buf + w.sr.args[1] - toread, toread); + if(rread == 0) { + __dprint("mcexec.c,mmap,read==0\n"); + goto mmap_zero_out; + } else if(rread < 0) { + __dprint("mcexec.c,mmap,read failed\n"); ret = -errno; break; + } + toread -= rread; + } + mmap_zero_out: + rlseek = lseek(w.sr.args[4], old_off, SEEK_SET); + if(rlseek == -1) { __dprint("mcexec.c,mmap,lseek failed\n"); ret = -errno; } + mmap_out: + do_syscall_return(fd, cpu, ret, 1, (unsigned long)dma_buf, w.sr.args[0], w.sr.args[1]); + break; } #ifdef USE_SYSCALL_MOD_CALL case 303:{ diff --git a/kernel/Makefile.build b/kernel/Makefile.build index 8c0629cf..36bf10b2 100644 --- a/kernel/Makefile.build +++ b/kernel/Makefile.build @@ -1,6 +1,6 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o -OBJS += process.o copy.o waitq.o futex.o timer.o plist.o memobj.o +OBJS += process.o copy.o waitq.o futex.o timer.o plist.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ diff --git a/kernel/Makefile.build.dcfa b/kernel/Makefile.build.dcfa index db8f89a7..580eeb89 100644 --- a/kernel/Makefile.build.dcfa +++ b/kernel/Makefile.build.dcfa @@ -1,6 +1,6 @@ IHKDIR=$(IHKBASE)/$(TARGETDIR) OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o -OBJS += process.o copy.o waitq.o futex.o timer.o plist.o memobj.o +OBJS += process.o copy.o waitq.o futex.o timer.o plist.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ diff --git a/kernel/host.c b/kernel/host.c index 0327ac36..08ea5809 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -92,13 +92,12 @@ static int process_msg_prepare_process(unsigned long rphys) range_npages = (e - s) >> PAGE_SHIFT; flags = VR_NONE; flags |= PROT_TO_VR_FLAG(pn->sections[i].prot); - flags |= VRFLAG_PROT_TO_MAXPROT(flags); if((up_v = ihk_mc_alloc_pages(range_npages, IHK_MC_AP_NOWAIT)) == NULL){ goto err; } up = virt_to_phys(up_v); - if(add_process_memory_range(proc, s, e, up, flags, NULL, 0) != 0){ + if(add_process_memory_range(proc, s, e, up, flags) != 0){ ihk_mc_free_pages(up_v, range_npages); goto err; } @@ -169,32 +168,29 @@ static int process_msg_prepare_process(unsigned long rphys) /* Map system call stuffs */ flags = VR_RESERVED | VR_PROT_READ | VR_PROT_WRITE; - flags |= VRFLAG_PROT_TO_MAXPROT(flags); addr = proc->vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT; e = addr + PAGE_SIZE * DOORBELL_PAGE_COUNT; if(add_process_memory_range(proc, addr, e, cpu_local_var(scp).doorbell_pa, - VR_REMOTE | flags, NULL, 0) != 0){ + VR_REMOTE | flags) != 0){ goto err; } addr = e; e = addr + PAGE_SIZE * REQUEST_PAGE_COUNT; if(add_process_memory_range(proc, addr, e, cpu_local_var(scp).request_pa, - VR_REMOTE | flags, NULL, 0) != 0){ + VR_REMOTE | flags) != 0){ goto err; } addr = e; e = addr + PAGE_SIZE * RESPONSE_PAGE_COUNT; if(add_process_memory_range(proc, addr, e, cpu_local_var(scp).response_pa, - flags, NULL, 0) != 0){ + flags) != 0){ goto err; } /* Map, copy and update args and envs */ - flags = VR_PROT_READ | VR_PROT_WRITE; - flags |= VRFLAG_PROT_TO_MAXPROT(flags); addr = e; e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT; @@ -204,7 +200,7 @@ static int process_msg_prepare_process(unsigned long rphys) args_envs_p = virt_to_phys(args_envs); if(add_process_memory_range(proc, addr, e, args_envs_p, - flags, NULL, 0) != 0){ + VR_PROT_READ|VR_PROT_WRITE) != 0){ ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT); goto err; } diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h deleted file mode 100644 index f4e15b4f..00000000 --- a/kernel/include/memobj.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef HEADER_MEMOBJ_H -#define HEADER_MEMOBJ_H - -#include -#include -#include -#include - -struct memobj { - struct list_head list; - ihk_atomic_t ref; - uintptr_t handle; - struct list_head page_list; - ihk_spinlock_t page_list_lock; -}; - -int memobj_create(int fd, int flags, int prot, struct memobj **objp, int *maxprotp); -void memobj_ref(struct memobj *obj); -void memobj_release(struct memobj *obj); -int memobj_get_page(struct memobj *obj, off_t off, size_t pgsize, uintptr_t *physp); - -#endif /* HEADER_MEMOBJ_H */ diff --git a/kernel/include/page.h b/kernel/include/page.h index d06d4d48..6d5aaaaa 100644 --- a/kernel/include/page.h +++ b/kernel/include/page.h @@ -2,25 +2,16 @@ #define __HEADER_PAGE_H struct page { - struct list_head list; - uint8_t mode; - uint8_t padding[3]; - int32_t count; - off_t offset; + struct list_head list; + uint64_t flags; + int64_t count; }; -/* mode */ -enum page_mode { - PM_NONE = 0x00, - PM_PENDING_FREE = 0x01, - PM_PAGEIO = 0x02, - PM_MAPPED = 0x03, - PM_ANON_COW = 0x04, -}; +/* flags */ +#define PAGE_IN_LIST 0x0001UL struct page *phys_to_page(uintptr_t phys); uintptr_t page_to_phys(struct page *page); -int page_unmap(struct page *page); void *allocate_pages(int npages, enum ihk_mc_ap_flag flag); void free_pages(void *va, int npages); diff --git a/kernel/include/pager.h b/kernel/include/pager.h deleted file mode 100644 index 840edcda..00000000 --- a/kernel/include/pager.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef HEADER_PAGER_H -#define HEADER_PAGER_H - -#include - -enum pager_op { - PAGER_REQ_CREATE = 0x0001, - PAGER_REQ_RELEASE = 0x0002, - PAGER_REQ_READ = 0x0003, -}; - -/* - * int pager_req_create(int fd, int flags, int prot, uintptr_t result_rpa); - */ -struct pager_create_result { - uintptr_t handle; - int maxprot; -}; - -/* - * int pager_req_release(uintptr_t handle); - */ -/* - * int pager_req_read(uintptr_t handle, off_t off, size_t size, uintptr_t buf_rpa); - */ -#endif /* HEADER_PAGER_H */ diff --git a/kernel/include/process.h b/kernel/include/process.h index ad0f43bc..5788f840 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -7,7 +7,6 @@ #include #include #include -#include #define VR_NONE 0x0 #define VR_STACK 0x1 @@ -15,21 +14,13 @@ #define VR_IO_NOCACHE 0x100 #define VR_REMOTE 0x200 #define VR_DEMAND_PAGING 0x1000 -#define VR_PRIVATE 0x2000 #define VR_PROT_NONE 0x00000000 #define VR_PROT_READ 0x00010000 #define VR_PROT_WRITE 0x00020000 #define VR_PROT_EXEC 0x00040000 #define VR_PROT_MASK 0x00070000 -#define VR_MAXPROT_NONE 0x00000000 -#define VR_MAXPROT_READ 0x00100000 -#define VR_MAXPROT_WRITE 0x00200000 -#define VR_MAXPROT_EXEC 0x00400000 -#define VR_MAXPROT_MASK 0x00700000 #define PROT_TO_VR_FLAG(prot) (((unsigned long)(prot) << 16) & VR_PROT_MASK) -#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4) -#define VRFLAG_MAXPROT_TO_PROT(vrflag) (((vrflag) & VR_MAXPROT_MASK) >> 4) #define PS_RUNNING 0x1 #define PS_INTERRUPTIBLE 0x2 @@ -50,8 +41,6 @@ struct vm_range { struct list_head list; unsigned long start, end; unsigned long flag; - struct memobj *memobj; - off_t objoff; }; struct vm_regions { @@ -117,6 +106,7 @@ struct process_vm { // is protected by its own lock (see ihk/manycore/generic/page_alloc.c) }; + struct process *create_process(unsigned long user_pc); struct process *clone_process(struct process *org, unsigned long pc, unsigned long sp); @@ -124,12 +114,10 @@ void destroy_process(struct process *proc); void hold_process(struct process *proc); void free_process(struct process *proc); void free_process_memory(struct process *proc); -void flush_process_memory(struct process *proc); int add_process_memory_range(struct process *process, unsigned long start, unsigned long end, - unsigned long phys, unsigned long flag, - struct memobj *memobj, off_t objoff); + unsigned long phys, unsigned long flag); int remove_process_memory_range( struct process *process, unsigned long start, unsigned long end); int split_process_memory_range(struct process *process, @@ -145,7 +133,6 @@ struct vm_range *next_process_memory_range( struct process_vm *vm, struct vm_range *range); struct vm_range *previous_process_memory_range( struct process_vm *vm, struct vm_range *range); -int page_fault_process_memory_range(struct process *proc, struct vm_range *range, uintptr_t fault_addr, uint64_t reason); int remove_process_region(struct process *proc, unsigned long start, unsigned long end); struct program_load_desc; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 7a3b76a5..2701489c 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -126,8 +126,6 @@ struct syscall_request { struct syscall_response { unsigned long status; long ret; - unsigned long fault_address; - unsigned long fault_reason; }; struct syscall_post { @@ -192,7 +190,6 @@ struct syscall_params { extern int do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx); extern int obtain_clone_cpuid(); -extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx); #define DECLARATOR(number,name) __NR_##name = number, #define SYSCALL_HANDLED(number,name) DECLARATOR(number,name) diff --git a/kernel/mem.c b/kernel/mem.c index 37be9be1..401d38ee 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -69,12 +68,12 @@ void free_pages(void *va, int npages) struct list_head *pendings = &cpu_local_var(pending_free_pages); struct page *page; - page = phys_to_page(virt_to_phys(va)); - if (page->mode != PM_NONE) { - panic("free_pages:not PM_NONE"); - } if (pendings->next != NULL) { - page->mode = PM_PENDING_FREE; + page = phys_to_page(virt_to_phys(va)); + if (page->flags & PAGE_IN_LIST) { + panic("free_pages"); + } + page->flags |= PAGE_IN_LIST; page->count = npages; list_add_tail(&page->list, pendings); return; @@ -104,10 +103,10 @@ void finish_free_pages_pending(void) } list_for_each_entry_safe(page, next, pendings, list) { - if (page->mode != PM_PENDING_FREE) { - panic("free_pending_pages:not PM_PENDING_FREE"); + if (!(page->flags & PAGE_IN_LIST)) { + panic("free_pending_pages"); } - page->mode = PM_NONE; + page->flags &= ~PAGE_IN_LIST; list_del(&page->list); ihk_pagealloc_free(pa_allocator, page_to_phys(page), page->count); } @@ -144,39 +143,72 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = { void sigsegv(void *); -static void unhandled_page_fault(struct process *proc, unsigned long address, void *regs) +static void page_fault_handler(unsigned long address, void *regs, + unsigned long rbp) { - struct process_vm *vm = proc->vm; - struct vm_range *range; - char found; + struct vm_range *range, *next; + char found = 0; int irqflags; unsigned long error = ((struct x86_regs *)regs)->error; irqflags = kprintf_lock(); - __kprintf("[%d] Page fault for 0x%lX\n", - ihk_mc_get_processor_id(), address); - __kprintf("%s for %s access in %s mode (reserved bit %s set), " - "it %s an instruction fetch\n", - (error & PF_PROT ? "protection fault" : "no page found"), - (error & PF_WRITE ? "write" : "read"), - (error & PF_USER ? "user" : "kernel"), - (error & PF_RSVD ? "was" : "wasn't"), - (error & PF_INSTR ? "was" : "wasn't")); + __kprintf("[%d] Page fault for 0x%lX, (rbp: 0x%lX)\n", + ihk_mc_get_processor_id(), address, rbp); - found = 0; - list_for_each_entry(range, &vm->vm_range_list, list) { + __kprintf("%s for %s access in %s mode (reserved bit %s set), it %s an instruction fetch\n", + (error & PF_PROT ? "protection fault" : "no page found"), + (error & PF_WRITE ? "write" : "read"), + (error & PF_USER ? "user" : "kernel"), + (error & PF_RSVD ? "was" : "wasn't"), + (error & PF_INSTR ? "was" : "wasn't")); + + list_for_each_entry_safe(range, next, + &cpu_local_var(current)->vm->vm_range_list, + list) { + if (range->start <= address && range->end > address) { + __kprintf("address is in range, flag: 0x%X! \n", range->flag); + if(range->flag & VR_DEMAND_PAGING){ + //allocate page for demand paging + __kprintf("demand paging\n"); + void* pa = allocate_pages(1, IHK_MC_AP_CRITICAL); + if(!pa){ + kprintf_unlock(irqflags); + panic("allocate_pages failed"); + } + __kprintf("physical memory area obtained %lx\n", virt_to_phys(pa)); + + { + enum ihk_mc_pt_attribute flag = 0; + struct process *process = cpu_local_var(current); + unsigned long flags = ihk_mc_spinlock_lock(&process->vm->page_table_lock); + const enum ihk_mc_pt_attribute attr = flag | PTATTR_WRITABLE | PTATTR_USER | PTATTR_FOR_USER; + + int rc = ihk_mc_pt_set_page(process->vm->page_table, (void*)(address & PAGE_MASK), virt_to_phys(pa), attr); + if(rc != 0) { + ihk_mc_spinlock_unlock(&process->vm->page_table_lock, flags); + __kprintf("ihk_mc_pt_set_page failed,rc=%d,%p,%lx,%08x\n", rc, (void*)(address & PAGE_MASK), virt_to_phys(pa), attr); + ihk_mc_pt_print_pte(process->vm->page_table, (void*)address); + goto fn_fail; + } + ihk_mc_spinlock_unlock(&process->vm->page_table_lock, flags); + __kprintf("update_process_page_table success\n"); + } + kprintf_unlock(irqflags); + memset(pa, 0, PAGE_SIZE); + return; + } found = 1; - __kprintf("address is in range, flag: 0x%X! \n", - range->flag); - ihk_mc_pt_print_pte(vm->page_table, (void*)address); + ihk_mc_pt_print_pte(cpu_local_var(current)->vm->page_table, + (void*)address); break; } } - if (!found) { + + if (!found) __kprintf("address is out of range! \n"); - } + fn_fail: kprintf_unlock(irqflags); /* TODO */ @@ -184,72 +216,19 @@ static void unhandled_page_fault(struct process *proc, unsigned long address, vo #ifdef DEBUG_PRINT_MEM { - uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs); - - kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n", - sp[0], sp[1], sp[2], sp[3]); + const struct x86_regs *_regs = regs; + dkprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n", + *((unsigned long*)_regs->rsp), + *((unsigned long*)_regs->rsp+8), + *((unsigned long*)_regs->rsp+16), + *((unsigned long*)_regs->rsp+24) + ); } #endif -#if 0 - panic("mem fault"); -#endif sigsegv(regs); - return; -} -static void page_fault_handler(unsigned long address, unsigned long reason, void *regs) -{ - struct process *proc = cpu_local_var(current); - struct process_vm *vm = proc->vm; - struct vm_range *range; - unsigned long vrflag; - unsigned long denied; - int error; - - kprintf("[%d]page_fault_handler(%lx,%lx,%p)\n", - ihk_mc_get_processor_id(), address, reason, regs); - - ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); - range = lookup_process_memory_range(vm, address, address+1); - if (range == NULL) { - kprintf("page_fault_handler(%lx,%lx,%p):out of range\n", - address, reason, regs); - unhandled_page_fault(proc, address, regs); - goto out; - } - - if (reason & PF_WRITE) { - vrflag = VR_PROT_WRITE; - } - else if (reason & PF_INSTR) { - vrflag = VR_PROT_EXEC; - } - else { - vrflag = VR_PROT_READ; - } - - denied = vrflag & ~range->flag; - if (denied) { - kprintf("page_fault_handler(%lx,%lx,%p):access denied. %lx\n", - address, reason, regs, denied); - unhandled_page_fault(proc, address, regs); - goto out; - } - - error = page_fault_process_memory_range(proc, range, address, reason); - if (error) { - kprintf("page_fault_handler(%lx,%lx,%p):fault range failed. %d\n", - address, reason, regs, error); - unhandled_page_fault(proc, address, regs); - goto out; - } - -out: - ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); - kprintf("[%d]page_fault_handler(%lx,%lx,%p):\n", - ihk_mc_get_processor_id(), address, reason, regs); - return; + //panic("mem fault"); } static void page_allocator_init(void) @@ -310,7 +289,6 @@ static void page_allocator_init(void) &query_free_mem_handler); } -#if 1 struct page *phys_to_page(uintptr_t phys) { int64_t ix; @@ -338,26 +316,6 @@ uintptr_t page_to_phys(struct page *page) return phys; } -int page_unmap(struct page *page) -{ - kprintf("page_unmap(%p %x %d)\n", page, page->mode, page->count); - if (page->mode != PM_MAPPED) { - panic("page_unmap:not PM_MAPPED"); - } - - if (--page->count > 0) { - /* other mapping exist */ - kprintf("page_unmap(%p %x %d): 0\n", page, page->mode, page->count); - return 0; - } - - /* no mapping exist */ - list_del(&page->list); - page->mode = PM_NONE; - kprintf("page_unmap(%p %x %d): 1\n", page, page->mode, page->count); - return 1; -} - static void page_init(void) { size_t npages; @@ -372,7 +330,6 @@ static void page_init(void) memset(pa_pages, 0, allocsize); return; } -#endif void register_kmalloc(void) { diff --git a/kernel/memobj.c b/kernel/memobj.c deleted file mode 100644 index 824059cb..00000000 --- a/kernel/memobj.c +++ /dev/null @@ -1,221 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define dkprintf(...) kprintf(__VA_ARGS__) -#define ekprintf(...) kprintf(__VA_ARGS__) - -static ihk_spinlock_t memobj_list_lock = SPIN_LOCK_UNLOCKED; -static LIST_HEAD(memobj_list); - -int memobj_create(int fd, int flags, int prot, struct memobj **objpp, int *maxprotp) -{ - ihk_mc_user_context_t ctx; - struct pager_create_result result; - int error; - struct memobj *memobj = NULL; - struct memobj *obj; - - kprintf("memobj_create(%d,%x,%x)\n", fd, flags, prot); - memobj = kmalloc(sizeof(*memobj), IHK_MC_AP_NOWAIT); - if (memobj == NULL) { - error = -ENOMEM; - kprintf("memobj_create(%d,%x,%x):kmalloc failed. %d\n", fd, flags, prot, error); - goto out; - } - -retry: - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_CREATE; - ihk_mc_syscall_arg1(&ctx) = fd; - ihk_mc_syscall_arg2(&ctx) = flags; - ihk_mc_syscall_arg3(&ctx) = prot; - ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result); - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error == -EALREADY) { - kprintf("memobj_create(%d,%x,%x,%p):create failed. %d\n", - fd, flags, prot, objpp, error); - ihk_mc_spinlock_lock_noirq(&memobj_list_lock); - list_for_each_entry(obj, &memobj_list, list) { - if (obj->handle == result.handle) { - memobj_ref(obj); - ihk_mc_spinlock_unlock_noirq(&memobj_list_lock); - kfree(memobj); - memobj = obj; - goto found; - } - } - ihk_mc_spinlock_unlock_noirq(&memobj_list_lock); - goto retry; - } - else if (error) { - kprintf("memobj_create(%d,%x,%x,%p):create failed. %d\n", - fd, flags, prot, objpp, error); - goto out; - } - - memset(memobj, 0, sizeof(*memobj)); - ihk_atomic_set(&memobj->ref, 1); - memobj->handle = result.handle; - INIT_LIST_HEAD(&memobj->page_list); - ihk_mc_spinlock_init(&memobj->page_list_lock); - - ihk_mc_spinlock_lock_noirq(&memobj_list_lock); - list_add(&memobj->list, &memobj_list); - ihk_mc_spinlock_unlock_noirq(&memobj_list_lock); - -found: - error = 0; - *objpp = memobj; - *maxprotp = result.maxprot; - memobj = NULL; - -out: - kprintf("memobj_create(%d,%x,%x):%d %p %x\n", fd, flags, prot, error, *objpp, *maxprotp); - return error; -} - -void memobj_ref(struct memobj *obj) -{ - kprintf("memobj_ref(%p):\n", obj); - ihk_atomic_inc(&obj->ref); - return; -} - -void memobj_release(struct memobj *obj) -{ - ihk_mc_user_context_t ctx; - int error; - - kprintf("memobj_release(%p)\n", obj); - ihk_mc_spinlock_lock_noirq(&memobj_list_lock); - if (!ihk_atomic_dec_and_test(&obj->ref)) { - ihk_mc_spinlock_unlock_noirq(&memobj_list_lock); - kprintf("memobj_release(%p):keep\n", obj); - return; - } - list_del(&obj->list); - ihk_mc_spinlock_unlock_noirq(&memobj_list_lock); - - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_RELEASE; - ihk_mc_syscall_arg1(&ctx) = obj->handle; - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error) { - kprintf("memobj_release(%p):release failed. %d\n", obj, error); - /* through */ - } - - kfree(obj); - kprintf("memobj_release(%p):free\n", obj); - return; -} - -int memobj_get_page(struct memobj *obj, off_t off, size_t pgsize, uintptr_t *physp) -{ - int error; - void *virt = NULL; - uintptr_t phys = -1; - ihk_mc_user_context_t ctx; - struct page *page; - - kprintf("memobj_get_page(%p,%lx,%lx,%p)\n", obj, off, pgsize, physp); - if (pgsize != PAGE_SIZE) { - error = -ENOMEM; - goto out; - } - -retry: - for (;;) { - ihk_mc_spinlock_lock_noirq(&obj->page_list_lock); - list_for_each_entry(page, &obj->page_list, list) { - if ((page->mode != PM_PAGEIO) && (page->mode != PM_MAPPED)) { - panic("memobj_get_page:invalid obj page"); - } - if (page->offset == off) { - if (page->mode == PM_PAGEIO) { - ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); - goto retry; - } - ++page->count; - phys = page_to_phys(page); - ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); - goto found; - } - } - - if (virt != NULL) { - page = phys_to_page(phys); - break; - } - ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); - - virt = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); - if (virt == NULL) { - error = -ENOMEM; - goto out; - } - phys = virt_to_phys(virt); - } - - if (page->mode != PM_NONE) { - panic("memobj_get_page:invalid new page"); - } - page->mode = PM_PAGEIO; - page->offset = off; - list_add(&page->list, &obj->page_list); - ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); - - ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_READ; - ihk_mc_syscall_arg1(&ctx) = obj->handle; - ihk_mc_syscall_arg2(&ctx) = off; - ihk_mc_syscall_arg3(&ctx) = pgsize; - ihk_mc_syscall_arg4(&ctx) = phys; - - error = syscall_generic_forwarding(__NR_mmap, &ctx); - if (error) { - kprintf("memobj_get_page(%p,%lx,%lx,%p):read failed. %d\n", - obj, off, pgsize, physp, error); - ihk_mc_spinlock_lock_noirq(&obj->page_list_lock); - if (page->mode != PM_PAGEIO) { - panic("memobj_get_page:invalid io page"); - } - list_del(&page->list); - ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); - page->mode = PM_NONE; - goto out; - } - - ihk_mc_spinlock_lock_noirq(&obj->page_list_lock); - if (page->mode != PM_PAGEIO) { - panic("memobj_get_page:invalid io page"); - } - page->mode = PM_MAPPED; - page->count = 1; - ihk_mc_spinlock_unlock_noirq(&obj->page_list_lock); - virt = NULL; - -found: - error = 0; - *physp = phys; - -out: - if (virt != NULL) { - ihk_mc_free_pages(virt, 1); - } - kprintf("memobj_get_page(%p,%lx,%lx,%p): %d %lx\n", - obj, off, pgsize, physp, error, phys); - return error; -} diff --git a/kernel/process.c b/kernel/process.c index 5ba11ade..e5ca9486 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -211,16 +211,6 @@ int split_process_memory_range(struct process *proc, struct vm_range *range, newrange->end = range->end; newrange->flag = range->flag; - if (range->memobj != NULL) { - memobj_ref(range->memobj); - newrange->memobj = range->memobj; - newrange->objoff = range->objoff + (addr - range->start); - } - else { - newrange->memobj = NULL; - newrange->objoff = 0; - } - range->end = addr; list_add(&newrange->list, &range->list); @@ -248,27 +238,13 @@ int join_process_memory_range(struct process *proc, merging->start, merging->end); if ((surviving->end != merging->start) - || (surviving->flag != merging->flag) - || (surviving->memobj != merging->memobj)) { + || (surviving->flag != merging->flag)) { error = -EINVAL; goto out; } - if (surviving->memobj != NULL) { - size_t len; - off_t endoff; - - len = surviving->end - surviving->start; - endoff = surviving->objoff + len; - if (endoff != merging->objoff) { - return -EINVAL; - } - } surviving->end = merging->end; - if (merging->memobj != NULL) { - memobj_release(merging->memobj); - } list_del(&merging->list); ihk_mc_free(merging); @@ -292,7 +268,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) intptr_t lpend; #endif /* USE_LARGE_PAGES */ - kprintf("free_process_memory_range(%p,%lx-%lx)\n", + dkprintf("free_process_memory_range(%p,%lx-%lx)\n", vm, start0, end0); start = range->start; @@ -318,17 +294,10 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } #endif /* USE_LARGE_PAGES */ - if (range->memobj != NULL) { - ihk_mc_spinlock_lock_noirq(&range->memobj->page_list_lock); - } - ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); error = ihk_mc_pt_free_range(vm->page_table, (void *)start, (void *)end); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); - if (range->memobj != NULL) { - ihk_mc_spinlock_unlock_noirq(&range->memobj->page_list_lock); - } if (error && (error != -ENOENT)) { ekprintf("free_process_memory_range(%p,%lx-%lx):" "ihk_mc_pt_free_range(%lx-%lx) failed. %d\n", @@ -349,13 +318,10 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } } - if (range->memobj != NULL) { - memobj_release(range->memobj); - } list_del(&range->list); ihk_mc_free(range); - kprintf("free_process_memory_range(%p,%lx-%lx): 0\n", + dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n", vm, start0, end0); return 0; } @@ -467,8 +433,7 @@ enum ihk_mc_pt_attribute vrflag_to_ptattr(unsigned long flag) int add_process_memory_range(struct process *process, unsigned long start, unsigned long end, - unsigned long phys, unsigned long flag, - struct memobj *memobj, off_t offset) + unsigned long phys, unsigned long flag) { struct vm_range *range; int rc; @@ -493,8 +458,6 @@ int add_process_memory_range(struct process *process, range->start = start; range->end = end; range->flag = flag; - range->memobj = memobj; - range->objoff = offset; if(range->flag & VR_DEMAND_PAGING) { dkprintf("range: 0x%lX - 0x%lX => physicall memory area is allocated on demand (%ld) [%lx]\n", @@ -673,321 +636,6 @@ out: return error; } -static int pf_anon_page_not_present(struct process *proc, struct vm_range *range, uintptr_t fault_addr) -{ - int error; - int npages; - void *virt = NULL; - void *ptepgaddr; - size_t ptepgsize; - void *pgaddr; - size_t pgsize; - int p2align; - uintptr_t phys; - enum ihk_mc_pt_attribute attr; - size_t maxpgsize; - pte_t *ptep; - - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx)\n", proc, range->start, range->end, range->flag, fault_addr); - - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); - error = ihk_mc_pt_lookup_pte(proc->vm->page_table, (void *)fault_addr, &ptep, &ptepgaddr, &ptepgsize); - if (error && (error != -ENOENT)) { - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):lookup pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - if (!error && (*ptep != PTE_NULL)) { - if (!(*ptep & PF_PRESENT)) { - error = -EFAULT; - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):disabled page. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - - error = 0; - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):already mapped. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - flush_tlb(); - goto out; - } - - if (error) { - error = 0; - ptepgsize = LARGE_PAGE_SIZE; - ptepgaddr = (void *)-1; - } - maxpgsize = ptepgsize; -#ifndef USE_LARGE_PAGES - if (maxpgsize > PAGE_SIZE) { - maxpgsize = PAGE_SIZE; - } -#endif - for (;;) { - error = ihk_mc_pt_choose_pagesize(proc->vm->page_table, (void *)range->start, (void *)range->end, (void *)fault_addr, maxpgsize, &pgaddr, &pgsize, &p2align); - if (error) { - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):choose pagesize failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - - npages = pgsize / PAGE_SIZE; - virt = ihk_mc_alloc_aligned_pages(npages, p2align, IHK_MC_AP_NOWAIT); - if (virt) { - phys = virt_to_phys(virt); - memset(virt, 0, pgsize); - break; - } - - if (pgsize <= PAGE_SIZE) { - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):alloc pages failed\n", proc, range->start, range->end, range->flag, fault_addr); - error = -ENOMEM; - goto out; - } - - maxpgsize = pgsize - 1; - } - - attr = vrflag_to_ptattr(range->flag); - if ((ptepgaddr == pgaddr) && (ptepgsize == pgsize)) { -kprintf("HIT\n"); - error = ihk_mc_pt_set_pte(proc->vm->page_table, ptep, phys, pgsize, attr); - if (error) { - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):set pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - } - else { - error = ihk_mc_pt_set_range(proc->vm->page_table, pgaddr, pgaddr+pgsize, phys, attr); - if (error) { - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx):set range failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - } - virt = NULL; - - error = 0; -out: - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); - if (virt != NULL) { - ihk_mc_free_pages(virt, npages); - } - kprintf("pf_anon_page_not_present(%p,%lx-%lx %lx,%lx): %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - return error; -} - -static int pf_obj_page_not_present(struct process *proc, struct vm_range *range, uintptr_t fault_addr) -{ - int error; - int npages; - struct page *page = NULL; - void *pgaddr; - size_t pgsize; - int p2align; - uintptr_t phys; - enum ihk_mc_pt_attribute attr; - size_t maxpgsize; - off_t off; - pte_t *ptep; - - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx)\n", proc, range->start, range->end, range->flag, fault_addr); - - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); - error = ihk_mc_pt_lookup_pte(proc->vm->page_table, (void *)fault_addr, &ptep, &pgaddr, &pgsize); - if (error == -ENOENT) { - maxpgsize = LARGE_PAGE_SIZE; - } - else if (error) { - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):lookup pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - else if (*ptep != PTE_NULL) { - if (!*ptep & PF_PRESENT) { - error = -EFAULT; - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):disabled page. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):already mapped. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - flush_tlb(); - error = 0; - goto out; - } - else { - maxpgsize = pgsize; - } - -#ifndef USE_LARGE_PAGES - maxpgsize = PAGE_SIZE; -#else - /* temporary? restriction */ - maxpgsize = PAGE_SIZE; -#endif - do { - error = ihk_mc_pt_choose_pagesize(proc->vm->page_table, (void *)range->start, (void *)range->end, (void *)fault_addr, maxpgsize, &pgaddr, &pgsize, &p2align); - if (error) { - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):choose pagesize failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - - off = range->objoff + ((uintptr_t)pgaddr - range->start); - error = memobj_get_page(range->memobj, off, pgsize, &phys); - if (error) { - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):get page failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - - } - npages = pgsize / PAGE_SIZE; - page = phys_to_page(phys); - } while (0); - - attr = vrflag_to_ptattr(range->flag); - if ((range->flag & VR_PRIVATE) && (range->flag & VR_PROT_WRITE)) { - /* for copy-on-write */ - attr &= ~PTATTR_WRITABLE; - } - - error = ihk_mc_pt_set_range(proc->vm->page_table, pgaddr, pgaddr+pgsize, phys, attr); - if (error) { - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx):set range failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - - error = 0; - page = NULL; /* avoid page_unmap() */ - -out: - if ((page != NULL) && page_unmap(page)) { - ihk_mc_free_pages(phys_to_virt(page_to_phys(page)), npages); - } - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); - kprintf("pf_obj_page_not_present(%p,%lx-%lx %lx,%lx): %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - return error; -} - -static int pf_obj_cow_page(struct process *proc, struct vm_range *range, uintptr_t fault_addr) -{ - int error; - pte_t *ptep; - void *pgaddr; - size_t pgsize; - uintptr_t oldpa; - void *oldva; - void *newva; - uintptr_t newpa; - struct page *oldpage; - enum ihk_mc_pt_attribute attr; - - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx)\n", proc, range->start, range->end, range->flag, fault_addr); - - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); - error = ihk_mc_pt_lookup_pte(proc->vm->page_table, (void *)fault_addr, &ptep, &pgaddr, &pgsize); - if (error) { - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):pte not found. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - flush_tlb(); - error = 0; - goto out; - } - if (pgsize != PAGE_SIZE) { - panic("pf_obj_cow_page:NYI:cow large page"); - } - - oldpa = *ptep & PT_PHYSMASK; - oldva = phys_to_virt(oldpa); - oldpage = phys_to_page(oldpa); - - if (oldpage) { - newva = NULL; - ihk_mc_spinlock_lock_noirq(&range->memobj->page_list_lock); - for (;;) { - if (oldpage->mode != PM_MAPPED) { - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):invalid cow page. %p %x\n", proc, range->start, range->end, range->flag, fault_addr, range->memobj, oldpage->mode); - panic("page_fault_process_meory_range:invalid cow page"); - } - if (oldpage->count == 1) { - if (newva) { - ihk_mc_free_pages(newva, 1); - } - list_del(&oldpage->list); - oldpage->mode = PM_NONE; - newpa = oldpa; - newva = oldva; - break; - } - if (oldpage->count <= 0) { - panic("pf_obj_cow_page:oldpage count corrupted"); - } - if (newva) { - memcpy(newva, oldva, pgsize); - --oldpage->count; - break; - } - ihk_mc_spinlock_unlock_noirq(&range->memobj->page_list_lock); - newva = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); - if (!newva) { - error = -ENOMEM; - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):alloc page failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - ihk_mc_spinlock_lock_noirq(&range->memobj->page_list_lock); - } - ihk_mc_spinlock_unlock_noirq(&range->memobj->page_list_lock); - } - else { - newva = ihk_mc_alloc_pages(1, IHK_MC_AP_NOWAIT); - if (newva == NULL) { - error = -ENOMEM; - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):alloc page failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - goto out; - } - - memcpy(newva, oldva, pgsize); - } - - newpa = virt_to_phys(newva); - attr = vrflag_to_ptattr(range->flag); - error = ihk_mc_pt_set_pte(proc->vm->page_table, ptep, newpa, pgsize, attr); - if (error) { - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx):set pte failed. %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - ihk_mc_free_pages(newva, 1); - goto out; - } - - error = 0; -out: - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); - kprintf("pf_obj_cow_page(%p,%lx-%lx %lx,%lx): %d\n", proc, range->start, range->end, range->flag, fault_addr, error); - return error; -} - -int page_fault_process_memory_range(struct process *proc, - struct vm_range *range, uintptr_t fault_addr, uint64_t reason) -{ - int error; - - kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx)\n", - proc, range->start, range->end, range->flag, - fault_addr, reason); - - if (!(reason & PF_PROT) && !range->memobj) { - error = pf_anon_page_not_present(proc, range, fault_addr); - } - else if (!(reason & PF_PROT) && range->memobj) { - error = pf_obj_page_not_present(proc, range, fault_addr); - } - else if ((reason & PF_PROT) && (reason & PF_WRITE) && (range->flag & VR_PROT_WRITE) && range->memobj) { - error = pf_obj_cow_page(proc, range, fault_addr); - } - else { - error = -EFAULT; - kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):" - "unknown fault. %d\n", - proc, range->start, range->end, range->flag, - fault_addr, reason, error); - } - - kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx): %d\n", - proc, range->start, range->end, range->flag, - fault_addr, reason, error); - return error; -} - int init_process_stack(struct process *process, struct program_load_desc *pn, int argc, char **argv, int envc, char **env) @@ -1000,18 +648,14 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, unsigned long end = process->vm->region.user_end; unsigned long start = end - size; int rc; - unsigned long vrflag; if(stack == NULL) return -ENOMEM; memset(stack, 0, size); - vrflag = VR_STACK; - vrflag |= VR_PROT_READ | VR_PROT_WRITE | VR_PROT_EXEC; - vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag); if ((rc = add_process_memory_range(process, start, end, virt_to_phys(stack), - vrflag, NULL, 0)) != 0) { + VR_STACK|VR_PROT_READ|VR_PROT_WRITE)) != 0) { ihk_mc_free_pages(stack, USER_STACK_NR_PAGES); return rc; } @@ -1139,7 +783,7 @@ unsigned long extend_process_region(struct process *proc, } } if((rc = add_process_memory_range(proc, aligned_end, aligned_new_end, - (p==0?0:virt_to_phys(p)), flag, NULL, 0)) != 0){ + (p==0?0:virt_to_phys(p)), flag)) != 0){ free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT); return end; } @@ -1164,24 +808,6 @@ int remove_process_region(struct process *proc, return 0; } -void flush_process_memory(struct process *proc) -{ - struct process_vm *vm = proc->vm; - struct vm_range *range; - - kprintf("flush_process_memory(%p)\n", proc); - ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); - list_for_each_entry(range, &vm->vm_range_list, list) { - if (range->memobj != NULL) { - memobj_release(range->memobj); - range->memobj = NULL; - } - } - ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); - kprintf("flush_process_memory(%p):\n", proc); - return; -} - void free_process_memory(struct process *proc) { struct vm_range *range, *next; diff --git a/kernel/syscall.c b/kernel/syscall.c index f6c31864..68b2d5cb 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -119,36 +119,10 @@ int do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx) ihk_mc_get_processor_id(), req->number); -#define STATUS_IN_PROGRESS 0 -#define STATUS_COMPLETED 1 -#define STATUS_PAGE_FAULT 3 - while (res->status != STATUS_COMPLETED) { - while (res->status == STATUS_IN_PROGRESS) { - cpu_pause(); - } - - if (res->status == STATUS_PAGE_FAULT) { - volatile struct syscall_request *req = cpu_local_var(scp).request_va; - int error; - uint8_t u8; - - /* do page fault */ - u8 = *(volatile uint8_t *)res->fault_address; // XXX: - if (res->fault_reason) { - *(uint8_t *)res->fault_address = u8; // XXX: - } - error = 0; - - /* send result */ - req->number = __NR_mmap; - req->args[0] = 0x101; - req->args[1] = error; - - res->status = STATUS_IN_PROGRESS; - req->valid = 1; - } + while (!res->status) { + cpu_pause(); } - + dkprintf("SC(%d)[%3d] got host reply: %d \n", ihk_mc_get_processor_id(), req->number, res->ret); @@ -188,7 +162,6 @@ terminate(int rc, int sig, ihk_mc_user_context_t *ctx) /* XXX: send SIGKILL to all threads in this process */ - flush_process_memory(proc); /* temporary hack */ do_syscall(&request, ctx); #define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */ @@ -336,9 +309,6 @@ SYSCALL_DECLARE(mmap) void *p; int vrflags; intptr_t phys; - struct memobj *memobj; - int maxprot; - int denied; dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n", ihk_mc_get_processor_id(), @@ -418,7 +388,6 @@ SYSCALL_DECLARE(mmap) /* do the map */ vrflags = VR_NONE; vrflags |= PROT_TO_VR_FLAG(prot); - vrflags |= (flags & MAP_PRIVATE)? VR_PRIVATE: 0; if (flags & MAP_ANONYMOUS) { if (0) { /* dummy */ @@ -432,28 +401,11 @@ SYSCALL_DECLARE(mmap) else if ((len == 64*1024*1024) || (len == 128*1024*1024)) { vrflags |= VR_DEMAND_PAGING; } -#if 1 - vrflags |= VR_DEMAND_PAGING; -#endif - } - else { - /* mapped file */ - vrflags |= VR_DEMAND_PAGING; } p = NULL; phys = 0; - memobj = NULL; - maxprot = PROT_READ | PROT_WRITE | PROT_EXEC; - if (!(flags & MAP_ANONYMOUS)) { - error = memobj_create(fd, flags, prot, &memobj, &maxprot); - if (error) { - ekprintf("sys_mmap:memobj_create failed. %d\n", error); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); - goto out; - } - } - else if (!(vrflags & VR_DEMAND_PAGING) + if (!(vrflags & VR_DEMAND_PAGING) && ((vrflags & VR_PROT_MASK) != VR_PROT_NONE)) { npages = len >> PAGE_SHIFT; p2align = PAGE_P2ALIGN; @@ -474,22 +426,7 @@ SYSCALL_DECLARE(mmap) phys = virt_to_phys(p); } - if ((flags & MAP_PRIVATE) && (maxprot & PROT_READ)) { - maxprot = PROT_READ | PROT_WRITE | PROT_EXEC; - } - denied = prot & ~maxprot; - if (denied) { - ekprintf("sys_mmap:denied %x. %x %x\n", denied, prot, maxprot); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); - if (p != NULL) { - ihk_mc_free_pages(p, npages); - } - error = -EACCES; - goto out; - } - vrflags |= VRFLAG_PROT_TO_MAXPROT(PROT_TO_VR_FLAG(maxprot)); - - error = add_process_memory_range(proc, addr, addr+len, phys, vrflags, memobj, off); + error = add_process_memory_range(proc, addr, addr+len, phys, vrflags); if (error) { ekprintf("sys_mmap:add_process_memory_range" "(%p,%lx,%lx,%lx,%lx) failed %d\n", @@ -503,6 +440,32 @@ SYSCALL_DECLARE(mmap) } ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + + /* read page with pread64() */ + if (!(flags & MAP_ANONYMOUS)) { + ihk_mc_user_context_t ctx2; + ssize_t ss; + + ihk_mc_syscall_arg0(&ctx2) = fd; + ihk_mc_syscall_arg1(&ctx2) = addr; + ihk_mc_syscall_arg2(&ctx2) = len; + ihk_mc_syscall_arg3(&ctx2) = off; + + ss = syscall_generic_forwarding(__NR_pread64, &ctx2); + if (ss < 0) { + ekprintf("sys_mmap:pread(%d,%lx,%lx,%lx) failed %ld\n", + fd, addr, len, off, (long)ss); + error = do_munmap((void *)addr, len); + if (error) { + ekprintf("sys_mmap:do_munmap(%lx,%lx) failed. %d\n", + addr, len, error); + /* through */ + } + error = ss; + goto out; + } + } + error = 0; out: dkprintf("[%d]sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n", @@ -544,7 +507,6 @@ SYSCALL_DECLARE(mprotect) int error; struct vm_range *changed; const unsigned long protflags = PROT_TO_VR_FLAG(prot); - unsigned long denied; dkprintf("[%d]sys_mprotect(%lx,%lx,%x)\n", ihk_mc_get_processor_id(), start, len0, prot); @@ -596,14 +558,6 @@ SYSCALL_DECLARE(mprotect) error = -EINVAL; goto out; } - - denied = protflags & ~VRFLAG_MAXPROT_TO_PROT(range->flag); - if (denied) { - ekprintf("sys_mprotect(%lx,%lx,%x):denied %lx. %lx %lx\n", - start, len0, prot, denied, protflags, range->flag); - error = -EACCES; - goto out; - } } /* do the mprotect */ @@ -675,7 +629,6 @@ SYSCALL_DECLARE(brk) unsigned long address = ihk_mc_syscall_arg0(ctx); struct vm_regions *region = &cpu_local_var(current)->vm->region; unsigned long r; - unsigned long vrflag; dkprintf("SC(%d)[sys_brk] brk_start=%lx,end=%lx\n", ihk_mc_get_processor_id(), region->brk_start, region->brk_end); @@ -693,8 +646,6 @@ SYSCALL_DECLARE(brk) } /* try to extend memory region */ - vrflag = VR_PROT_READ | VR_PROT_WRITE; - vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag); ihk_mc_spinlock_lock_noirq(&cpu_local_var(current)->vm->memory_range_lock); region->brk_end = extend_process_region(cpu_local_var(current), region->brk_start, region->brk_end, address, diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 3c6e140e..fee5f442 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -1,7 +1,6 @@ #ifndef __HEADER_GENERIC_IHK_MM_H #define __HEADER_GENERIC_IHK_MM_H -#include #include enum ihk_mc_gma_type { @@ -59,7 +58,7 @@ struct ihk_mc_pa_ops { }; void ihk_mc_set_page_allocator(struct ihk_mc_pa_ops *); -void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, unsigned long, void *)); +void ihk_mc_set_page_fault_handler(void (*h)(unsigned long, void *, unsigned long)); unsigned long ihk_mc_map_memory(void *os, unsigned long phys, unsigned long size); @@ -101,13 +100,6 @@ int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end, enum ihk_mc_pt_attribute setattr); int ihk_mc_pt_alloc_range(page_table_t pt, void *start, void *end, enum ihk_mc_pt_attribute attr); -int ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, pte_t **ptepp, void **pgbasep, size_t *pgsizep); -int ihk_mc_pt_choose_pagesize(page_table_t pt, void *start, void *end, - void *fault_addr, size_t maxpgsize, void **pgbasep, - size_t *pgsizep, int *p2alignp); -int ihk_mc_pt_set_range(page_table_t pt, void *start, void *end, - uintptr_t phys, enum ihk_mc_pt_attribute attr); -int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, uintptr_t phys, size_t pgsize, enum ihk_mc_pt_attribute attr); int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size, enum ihk_mc_pt_prepare_flag);