diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 761507d5..f7b6fea2 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -31,7 +31,10 @@ struct program_load_desc { int cpu; int pid; unsigned long entry; + unsigned long user_start; + unsigned long user_end; unsigned long rprocess; + unsigned long rpgtable; unsigned long at_phdr; unsigned long at_phent; unsigned long at_phnum; diff --git a/executer/kernel/control.c b/executer/kernel/control.c index 07f2a37e..c68015ad 100644 --- a/executer/kernel/control.c +++ b/executer/kernel/control.c @@ -50,6 +50,11 @@ static long mcexec_prepare_image(ihk_os_t os, pdesc->pid = task_tgid_vnr(current); + if (reserve_user_space(usrdata, &pdesc->user_start, &pdesc->user_end)) { + kfree(pdesc); + return -ENOMEM; + } + args = kmalloc(pdesc->args_len, GFP_KERNEL); if (copy_from_user(args, pdesc->args, pdesc->args_len)) { kfree(args); @@ -82,6 +87,7 @@ static long mcexec_prepare_image(ihk_os_t os, wait_event_interruptible(usrdata->wq_prepare, pdesc->status); + usrdata->rpgtable = pdesc->rpgtable; if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * desc.num_sections)) { ret = -EFAULT; diff --git a/executer/kernel/mcctrl.h b/executer/kernel/mcctrl.h index 8969c0ec..0708f354 100644 --- a/executer/kernel/mcctrl.h +++ b/executer/kernel/mcctrl.h @@ -77,9 +77,13 @@ struct mcctrl_usrdata { int mcctrl_dma_abort; unsigned long last_thread_exec; wait_queue_head_t wq_prepare; + unsigned long rpgtable; /* per process, not per OS */ }; int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp); int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg); int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); +int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, + unsigned long *endp); + #endif diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index 0f155e1f..c7684550 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -13,6 +16,13 @@ #define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6) //#define SC_DEBUG + +#ifdef SC_DEBUG +#define dprintk(...) printk(__VA_ARGS__) +#else +#define dprintk(...) +#endif + #ifdef SC_DEBUG //static struct ihk_dma_request last_request; @@ -26,6 +36,129 @@ static void print_dma_lastreq(void) } #endif +#if 1 /* x86 depend, host OS side */ +unsigned long translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva) +{ + unsigned long rpa; + int offsh; + int i; + int ix; + unsigned long phys; + unsigned long *pt; + + rpa = rpt; + offsh = 39; + /* i = 0: PML4, 1: PDPT, 2: PDT, 3: PT */ + for (i = 0; i < 4; ++i) { + ix = (rva >> offsh) & 0x1FF; + phys = ihk_device_map_memory(ihk_os_to_dev(os), rpa, PAGE_SIZE); + pt = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0); + dprintk("rpa %#lx offsh %d ix %#x phys %#lx pt %p pt[ix] %#lx\n", + rpa, offsh, ix, phys, pt, pt[ix]); + +#define PTE_P 0x001 + if (!(pt[ix] & PTE_P)) { + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + return -EFAULT; + } + +#define PTE_PS 0x080 + if (pt[ix] & PTE_PS) { + rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << offsh) - 1); + rpa |= rva & ((1UL << offsh) - 1); + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + goto out; + } + + rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << 12) - 1); + offsh -= 9; + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + } + rpa |= rva & ((1UL << 12) - 1); +out: + dprintk("translate_rva_to_rpa: rva %#lx --> rpa %#lx\n", rva, rpa); + return rpa; +} +#endif + +static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct mcctrl_usrdata * usrdata = vma->vm_file->private_data; + ihk_device_t dev = ihk_os_to_dev(usrdata->os); + unsigned long rpa; + unsigned long phys; + int error; + + dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", + vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); + + rpa = translate_rva_to_rpa(usrdata->os, usrdata->rpgtable, + (unsigned long)vmf->virtual_address); + + phys = ihk_device_map_memory(dev, rpa, PAGE_SIZE); + error = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, phys>>PAGE_SHIFT); + ihk_device_unmap_memory(dev, phys, PAGE_SIZE); + if (error) { + printk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", + vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); + return VM_FAULT_SIGBUS; + } + + return VM_FAULT_NOPAGE; +} + +static struct vm_operations_struct rus_vmops = { + .fault = &rus_vm_fault, +}; + +static int rus_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_flags |= VM_IO | VM_RESERVED | VM_DONTEXPAND | VM_PFNMAP; + vma->vm_ops = &rus_vmops; + return 0; +} + +static struct file_operations rus_fops = { + .mmap = &rus_mmap, +}; + +int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp) +{ + struct file *file; + struct vm_area_struct *vma; + unsigned long start; + unsigned long end; + + file = anon_inode_getfile("[mckernel]", &rus_fops, usrdata, O_RDWR); + if (IS_ERR(file)) { + return PTR_ERR(file); + } + +#define DESIRED_USER_END 0x800000000000 +#define GAP_FOR_MCEXEC 0x008000000000UL + end = DESIRED_USER_END; + down_write(¤t->mm->mmap_sem); + vma = find_vma(current->mm, 0); + if (vma) { + end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1); + } + start = do_mmap_pgoff(file, 0, end, + PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0); + up_write(¤t->mm->mmap_sem); + fput(file); + if (IS_ERR_VALUE(start)) { + printk("mcctrl:user space reservation failed.\n"); + return start; + } + + *startp = start; + *endp = end; + return 0; +} + //unsigned long last_thread_exec = 0; #ifndef DO_USER_MODE diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index 254da320..72d3d447 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -1,6 +1,6 @@ CC=@CC@ BINDIR=@BINDIR@ -CFLAGS=-Wall -O +CFLAGS=-Wall -O -fPIE -pie TARGET=mcexec all: $(TARGET) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 41573d2b..68f62394 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -524,12 +524,31 @@ void do_syscall_load(int fd, int cpu, unsigned long dest, unsigned long src, } } +static long +do_generic_syscall( + struct syscall_wait_desc *w) +{ + long ret; + + __dprintf("do_generic_syscall(%ld)\n", w->sr.number); + + errno = 0; + ret = syscall(w->sr.number, w->sr.args[0], w->sr.args[1], w->sr.args[2], + w->sr.args[3], w->sr.args[4], w->sr.args[5]); + if (errno != 0) { + ret = -errno; + } + + __dprintf("do_generic_syscall(%ld):%ld (%#lx)\n", w->sr.number, ret, ret); + return ret; +} + #define SET_ERR(ret) if (ret == -1) ret = -errno int main_loop(int fd, int cpu, pthread_mutex_t *lock) { struct syscall_wait_desc w; - int ret; + long ret; w.cpu = cpu; @@ -837,7 +856,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) } #endif default: - __dprintf("Unhandled system calls: %ld\n", w.sr.number); + ret = do_generic_syscall(&w); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; } diff --git a/kernel/host.c b/kernel/host.c index 64a97603..b0514123 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -64,6 +64,8 @@ static void process_msg_prepare_process(unsigned long rphys) proc = create_process(p->entry); proc->pid = 1024; + proc->vm->region.user_start = pn->user_start; + proc->vm->region.user_end = pn->user_end; /* TODO: Clear it at the proper timing */ cpu_local_var(scp).post_idx = 0; @@ -254,6 +256,7 @@ static void process_msg_prepare_process(unsigned long rphys) dkprintf("env OK\n"); p->rprocess = (unsigned long)proc; + p->rpgtable = virt_to_phys(proc->vm->page_table); init_process_stack(proc, pn, argc, argv, envc, env); dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid, diff --git a/kernel/include/process.h b/kernel/include/process.h index 489897c6..f57ff567 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -40,6 +40,7 @@ struct vm_regions { unsigned long brk_start, brk_end; unsigned long map_start, map_end; unsigned long stack_start, stack_end; + unsigned long user_start, user_end; }; struct process_vm; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index b514a5b8..a63e2b2f 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -89,7 +89,10 @@ struct program_load_desc { int cpu; int pid; unsigned long entry; + unsigned long user_start; + unsigned long user_end; unsigned long rprocess; + unsigned long rpgtable; unsigned long at_phdr; unsigned long at_phent; unsigned long at_phnum; diff --git a/kernel/process.c b/kernel/process.c index 77de5310..2b334d6a 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -140,6 +140,14 @@ int add_process_large_range(struct process *process, int npages_allocated = 0; void *virt; + if ((start < process->vm->region.user_start) + || (process->vm->region.user_end < end)) { + kprintf("large range(%#lx - %#lx) is not in user avail(%#lx - %#lx)\n", + start, end, process->vm->region.user_start, + process->vm->region.user_end); + return -EINVAL; + } + range = kmalloc(sizeof(struct vm_range), 0); if (!range) { return -ENOMEM; @@ -194,6 +202,14 @@ int add_process_memory_range(struct process *process, { struct vm_range *range; + if ((start < process->vm->region.user_start) + || (process->vm->region.user_end < end)) { + kprintf("range(%#lx - %#lx) is not in user avail(%#lx - %#lx)\n", + start, end, process->vm->region.user_start, + process->vm->region.user_end); + return -EINVAL; + } + range = kmalloc(sizeof(struct vm_range), 0); if (!range) { return -ENOMEM; @@ -239,16 +255,15 @@ void init_process_stack(struct process *process, struct program_load_desc *pn, { int s_ind = 0; int arg_ind; + unsigned long size = USER_STACK_NR_PAGES * PAGE_SIZE; char *stack = ihk_mc_alloc_pages(USER_STACK_NR_PAGES, 0); - unsigned long *p = (unsigned long *)(stack + - (USER_STACK_NR_PAGES * PAGE_SIZE)); + unsigned long *p = (unsigned long *)(stack + size); + unsigned long end = process->vm->region.user_end; + unsigned long start = end - size; - memset(stack, 0, USER_STACK_NR_PAGES * PAGE_SIZE); + memset(stack, 0, size); - add_process_memory_range(process, USER_END - - (USER_STACK_NR_PAGES * PAGE_SIZE), - USER_END, - virt_to_phys(stack), VR_STACK); + add_process_memory_range(process, start, end, virt_to_phys(stack), VR_STACK); s_ind = -1; p[s_ind--] = 0; /* AT_NULL */ @@ -274,10 +289,9 @@ void init_process_stack(struct process *process, struct program_load_desc *pn, p[s_ind] = argc; ihk_mc_modify_user_context(process->uctx, IHK_UCR_STACK_POINTER, - USER_END + sizeof(unsigned long) * s_ind); - process->vm->region.stack_end = USER_END; - process->vm->region.stack_start = USER_END - - (USER_STACK_NR_PAGES * PAGE_SIZE); + end + sizeof(unsigned long) * s_ind); + process->vm->region.stack_end = end; + process->vm->region.stack_start = start; } diff --git a/kernel/syscall.c b/kernel/syscall.c index 611847fa..eedfdab9 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1291,6 +1291,14 @@ static int clone_init(void) #endif +long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx) +{ + SYSCALL_HEADER; + dkprintf("syscall_generic_forwarding(%d)\n", n); + SYSCALL_ARGS_6(D,D,D,D,D,D); + SYSCALL_FOOTER; +} + long syscall(int num, ihk_mc_user_context_t *ctx) { long l; @@ -1324,7 +1332,8 @@ long syscall(int num, ihk_mc_user_context_t *ctx) dkprintf("\n"); - if (syscall_table[num]) { + if ((0 <= num) && (num < sizeof(syscall_table)/sizeof(syscall_table[0])) + && (syscall_table[num] != NULL)) { l = syscall_table[num](num, ctx); dkprintf("SC(%d)[%3d] ret: %d\n", @@ -1335,8 +1344,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx) ihk_mc_syscall_arg2(ctx), ihk_mc_syscall_arg3(ctx), ihk_mc_syscall_arg4(ctx), ihk_mc_syscall_pc(ctx), ihk_mc_syscall_sp(ctx)); - //while(1); - l = -ENOSYS; + l = syscall_generic_forwarding(num, ctx); } return l;