From 10190e9ed69f7dff7a943954e405830b425d3b15 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Fri, 25 Jul 2014 09:17:43 +0900 Subject: [PATCH 1/8] prepare for a condition variable for each process. (Redmine#193) --- executer/kernel/control.c | 7 ++++--- executer/kernel/mcctrl.h | 2 +- executer/kernel/syscall.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/executer/kernel/control.c b/executer/kernel/control.c index 4b0c837d..3a3bd6ae 100644 --- a/executer/kernel/control.c +++ b/executer/kernel/control.c @@ -330,12 +330,13 @@ retry_alloc: } wqhln->pid = pid; + wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); list_add_tail(&wqhln->list, &c->wq_list); } ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags); - c->req = 1; + wqhln->req = 1; wake_up(&wqhln->wq_syscall); return 0; @@ -388,6 +389,7 @@ retry_alloc: } wqhln->pid = swd.pid; + wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); @@ -403,7 +405,7 @@ retry_alloc: list_add_tail(&wqhln->list, &c->wq_list); ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags); - ret = wait_event_interruptible(wqhln->wq_syscall, c->req); + ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); /* Remove per-process wait queue head */ irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); @@ -427,7 +429,6 @@ retry_alloc: return -EINTR; } - c->req = 0; #if 1 mb(); if (!c->param.request_va->valid) { diff --git a/executer/kernel/mcctrl.h b/executer/kernel/mcctrl.h index 26c5adcb..0266068f 100644 --- a/executer/kernel/mcctrl.h +++ b/executer/kernel/mcctrl.h @@ -93,6 +93,7 @@ struct wait_queue_head_list_node { struct list_head list; wait_queue_head_t wq_syscall; int pid; + int req; }; struct mcctrl_channel { @@ -101,7 +102,6 @@ struct mcctrl_channel { struct ikc_scd_init_param init; void *dma_buf; - int req; struct list_head wq_list; ihk_spinlock_t wq_list_lock; }; diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index a748fdf2..e35f1e0e 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -255,6 +255,7 @@ retry_alloc: /* Prepare per-process wait queue head */ wqhln->pid = current->tgid; + wqhln->req = 0; init_waitqueue_head(&wqhln->wq_syscall); irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); @@ -271,7 +272,7 @@ retry_alloc: ihk_ikc_spinlock_unlock(&channel->wq_list_lock, irqflags); /* wait for response */ - error = wait_event_interruptible(wqhln->wq_syscall, channel->req); + error = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); /* Remove per-process wait queue head */ irqflags = ihk_ikc_spinlock_lock(&channel->wq_list_lock); @@ -283,7 +284,6 @@ retry_alloc: printk("remote_page_fault:interrupted. %d\n", error); goto out; } - channel->req = 0; if (!req->valid) { printk("remote_page_fault:not valid\n"); } From 17730617e1e4cd5719237d8d27c77bd658797ade Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Fri, 25 Jul 2014 10:54:30 +0900 Subject: [PATCH 2/8] when wakeup for syscall and recieve signal occur in the same timing, a program is freezed. --- executer/kernel/control.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/executer/kernel/control.c b/executer/kernel/control.c index 3a3bd6ae..e6c5fb92 100644 --- a/executer/kernel/control.c +++ b/executer/kernel/control.c @@ -407,6 +407,10 @@ retry_alloc: ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req); + if (ret) { + return -EINTR; + } + /* Remove per-process wait queue head */ irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock); list_del(&wqhln->list); @@ -425,10 +429,6 @@ retry_alloc: return -EINTR; } - if (ret) { - return -EINTR; - } - #if 1 mb(); if (!c->param.request_va->valid) { From 07b07476b75e917613d50a7260a066bdab49e2fd Mon Sep 17 00:00:00 2001 From: "Balazs Gerofi bgerofi@riken.jp" Date: Sun, 27 Jul 2014 13:11:41 +0900 Subject: [PATCH 3/8] remote TLB invalidation: follow migrated threads by affinity calls --- kernel/process.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/process.c b/kernel/process.c index 78f52d2b..aacd0856 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1781,6 +1781,7 @@ static void do_migrate(void) ihk_mc_spinlock_lock_noirq(&cur_v->migq_lock); list_for_each_entry_safe(req, tmp, &cur_v->migq, list) { int cpu_id; + int old_cpu_id; struct cpu_local_var *v; /* 0. check if migration is necessary */ @@ -1802,9 +1803,17 @@ static void do_migrate(void) double_rq_lock(cur_v, v); list_del(&req->proc->sched_list); cur_v->runq_len -= 1; + old_cpu_id = req->proc->cpu_id; req->proc->cpu_id = cpu_id; list_add_tail(&req->proc->sched_list, &v->runq); v->runq_len += 1; + + /* update cpu_set of the VM for remote TLB invalidation */ + cpu_clear(old_cpu_id, &req->proc->vm->cpu_set, + &req->proc->vm->cpu_set_lock); + cpu_set(cpu_id, &req->proc->vm->cpu_set, + &req->proc->vm->cpu_set_lock); + if (v->runq_len == 1) ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); double_rq_unlock(cur_v, v); From c83e80ad9126429f7feada70237c41d9a79ae7b6 Mon Sep 17 00:00:00 2001 From: "Balazs Gerofi bgerofi@riken.jp" Date: Mon, 28 Jul 2014 15:34:58 +0900 Subject: [PATCH 4/8] execve(): clear host user-space PTEs before context switching --- executer/user/mcexec.c | 53 ++++++++++++++++++++++++++++++++++------ kernel/include/process.h | 3 +++ kernel/process.c | 3 --- kernel/syscall.c | 24 +++++++++++++++++- 4 files changed, 72 insertions(+), 11 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 04d57a34..0e24d0fa 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1217,18 +1217,57 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) int ret = -1; struct program_load_desc *desc; struct remote_transfer trans; - + FILE *fp; + int status; + char path[2048]; + char *filename; + /* Load descriptor phase */ case 1: - if (load_elf_desc((char *)w.sr.args[1], &desc) != 0) { + + filename = (char *)w.sr.args[1]; + + /* Is filename without path? */ + if (0 && strncmp(filename, "/", 1) + //&& strncmp(filename, ".", 1) + ) { + + char *PATH = getenv("PATH"); + fprintf(stderr, "PATH: %s\n", PATH); + + /* Open command for reading. */ + sprintf(path, "/usr/bin/which %s", filename); + fp = popen(path, "r"); + if (fp == NULL) { + fprintf(stderr, "execve(): failed to run which\n" ); + goto return_execve1; + } + + /* Read the output a line at a time - output it. */ + if (fgets(path, sizeof(path)-1, fp) == NULL) { + fprintf(stderr, "execve(): failed to read which\n" ); + pclose(fp); + goto return_execve1; + } + + /* close */ + pclose(fp); + } + else { + sprintf(path, "%s", filename); + } + + __dprintf("execve: filename: %s\n", filename); + __dprintf("execve: LD_LIBRARY_PATH: %s\n", getenv("LD_LIBRARY_PATH") ? getenv("LD_LIBRARY_PATH") : "(empty)"); + + if (load_elf_desc(path, &desc) != 0) { fprintf(stderr, - "execve(): error loading ELF for file %s\n", - (char *)w.sr.args[1]); + "execve(): error loading ELF for file %s\n", path); goto return_execve1; } __dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n", - w.sr.args[1], desc->num_sections); + path, desc->num_sections); /* Copy descriptor to co-kernel side */ trans.userp = (void*)desc; @@ -1246,7 +1285,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) } __dprintf("execve(): load_elf_desc() for %s OK\n", - w.sr.args[1]); + path); /* We can't be sure next phase will succeed */ /* TODO: what shall we do with fp in desc?? */ @@ -1279,7 +1318,7 @@ return_execve1: goto return_execve1; } - printf("execve(): transfer ELF desc OK\n"); + __dprintf("execve(): transfer ELF desc OK\n"); transfer_image(fd, desc); __dprintf("execve(): image transferred\n"); diff --git a/kernel/include/process.h b/kernel/include/process.h index adf74ecf..066b5e96 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -84,6 +84,9 @@ //#define USE_LARGE_PAGES #endif +#define USER_STACK_NR_PAGES 8192 +#define KERNEL_STACK_NR_PAGES 25 + #include #include #include diff --git a/kernel/process.c b/kernel/process.c index aacd0856..13f820fb 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -43,9 +43,6 @@ #endif -#define USER_STACK_NR_PAGES 8192 -#define KERNEL_STACK_NR_PAGES 25 - extern long do_arch_prctl(unsigned long code, unsigned long address); static void insert_vm_range_list(struct process_vm *vm, struct vm_range *newrange); diff --git a/kernel/syscall.c b/kernel/syscall.c index 67280657..aa453800 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1228,6 +1228,11 @@ SYSCALL_DECLARE(execve) /* Unmap all memory areas of the process, userspace will be gone */ free_process_memory_ranges(cpu_local_var(current)); + ihk_mc_init_user_process(&cpu_local_var(current)->ctx, + &cpu_local_var(current)->uctx, + ((char *)cpu_local_var(current)) + + KERNEL_STACK_NR_PAGES * PAGE_SIZE, desc->entry, 0); + /* Create virtual memory ranges and update args/envs */ if (prepare_process_ranges_args_envs(cpu_local_var(current), desc, desc, PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER, @@ -1235,6 +1240,18 @@ SYSCALL_DECLARE(execve) kprintf("execve(): PANIC: preparing ranges, args, envs, stack\n"); panic(""); } + + /* Clear host user space PTEs */ + request.number = __NR_munmap; + request.args[0] = cpu_local_var(current)->vm->region.user_start; + request.args[1] = cpu_local_var(current)->vm->region.user_end - + cpu_local_var(current)->vm->region.user_start; + dkprintf("execve(): requesting host PTE clear\n"); + + if (do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0)) { + kprintf("execve(): ERROR: clearing PTEs in host process\n"); + panic(""); + } /* Request host to transfer ELF image */ request.number = __NR_execve; @@ -1250,8 +1267,13 @@ SYSCALL_DECLARE(execve) panic(""); } - dkprintf("execve(): returning to new process\n"); + /* Switch to new execution context */ + dkprintf("execve(): switching to new process\n"); + + ihk_mc_switch_context(NULL, &cpu_local_var(current)->ctx, + cpu_local_var(current)); + /* Never reach here */ return 0; } From 8f78b3085c2968f91fdacca2c4f6b1e3340b88ed Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Tue, 29 Jul 2014 11:04:46 +0900 Subject: [PATCH 5/8] Sending SIGKILL/SIGTERM resulting in SIGSEGV delivered (Redmine#194) --- arch/x86/kernel/syscall.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index 38c1ea42..2a1ba8cc 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -160,10 +160,6 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1); - if(sig == SIGKILL || sig == SIGTERM) - terminate(0, sig, (ihk_mc_user_context_t *)regs->rsp); - - irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); if(regs == NULL){ /* call from syscall */ asm("movq %%gs:132, %0" : "=r" (regs)); --regs; @@ -171,6 +167,11 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin else{ rc = regs->rax; } + + if(sig == SIGKILL || sig == SIGTERM) + terminate(0, sig, (ihk_mc_user_context_t *)regs->rsp); + + irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); k = proc->sighandler->action + sig - 1; if(k->sa.sa_handler == (void *)1){ From e5d3407d8e6862330dac90629f0120def6085828 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Tue, 29 Jul 2014 11:10:11 +0900 Subject: [PATCH 6/8] child mcexec don't print signal terminate message --- executer/user/mcexec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 0e24d0fa..e4b8c1cd 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -116,6 +116,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock); static int fd; static char *altroot; static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK"; +static int ischild; pid_t gettid(void) { @@ -1106,8 +1107,10 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) sig = w.sr.args[0] & 0x7f; term = (w.sr.args[0] & 0xff00) >> 8; if(isatty(2)){ - if(sig) - fprintf(stderr, "Terminate by signal %d\n", sig); + if(sig){ + if(!ischild) + fprintf(stderr, "Terminate by signal %d\n", sig); + } else if(term) __dprintf("Exit status: %d\n", term); } @@ -1163,6 +1166,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) case 0: { int i; + ischild = 1; /* Reopen device fd */ close(fd); fd = open(dev, O_RDWR); From f1e39f5fb20fed40d3754432909d16c7f1c6d03c Mon Sep 17 00:00:00 2001 From: "bgerofi@riken.jp" Date: Mon, 28 Jul 2014 22:09:54 +0900 Subject: [PATCH 7/8] execve(): find executable first in COKERNEL_PATH and then in PATH if executable name is not absolute --- executer/user/mcexec.c | 68 ++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index e4b8c1cd..63c6e7f7 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1221,8 +1221,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) int ret = -1; struct program_load_desc *desc; struct remote_transfer trans; - FILE *fp; - int status; + int error; + int found = 0; char path[2048]; char *filename; @@ -1232,37 +1232,59 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) filename = (char *)w.sr.args[1]; /* Is filename without path? */ - if (0 && strncmp(filename, "/", 1) - //&& strncmp(filename, ".", 1) - ) { + if (strncmp(filename, "/", 1) + && strncmp(filename, ".", 1)) { + + char *token, *string, *tofree; + char *PATH = getenv("COKERNEL_PATH"); + if (!PATH) { + PATH = getenv("PATH"); + } + + __dprintf("PATH: %s\n", PATH); - char *PATH = getenv("PATH"); - fprintf(stderr, "PATH: %s\n", PATH); - - /* Open command for reading. */ - sprintf(path, "/usr/bin/which %s", filename); - fp = popen(path, "r"); - if (fp == NULL) { - fprintf(stderr, "execve(): failed to run which\n" ); + /* strsep() modifies string! */ + tofree = string = strdup(PATH); + if (string == NULL) { + printf("error: copying PATH, not enough memory?\n"); goto return_execve1; } - /* Read the output a line at a time - output it. */ - if (fgets(path, sizeof(path)-1, fp) == NULL) { - fprintf(stderr, "execve(): failed to read which\n" ); - pclose(fp); - goto return_execve1; + while ((token = strsep(&string, ":")) != NULL) { + + error = snprintf(path, sizeof(path), + "%s/%s", token, filename); + if (error < 0 || error >= sizeof(path)) { + fprintf(stderr, "execve(): array too small?\n"); + continue; + } + + error = access(path, X_OK); + if (!error) { + found = 1; + break; + } } - /* close */ - pclose(fp); + free(tofree); } else { - sprintf(path, "%s", filename); + error = snprintf(path, sizeof(path), "%s", filename); + if (error < 0 || error >= sizeof(path)) { + fprintf(stderr, "execve(): array too small?\n"); + goto return_execve1; + } + + found = 1; + } + + if (!found) { + fprintf(stderr, + "execve(): error finding file %s\n", path); + goto return_execve1; } - __dprintf("execve: filename: %s\n", filename); - __dprintf("execve: LD_LIBRARY_PATH: %s\n", getenv("LD_LIBRARY_PATH") ? getenv("LD_LIBRARY_PATH") : "(empty)"); + __dprintf("execve(): path to binary: %s\n", path); if (load_elf_desc(path, &desc) != 0) { fprintf(stderr, From 27bfe37b80139b7f92f8a4593b1de0954f32a6d3 Mon Sep 17 00:00:00 2001 From: "Balazs Gerofi bgerofi@riken.jp" Date: Tue, 29 Jul 2014 15:48:14 +0900 Subject: [PATCH 8/8] execve(): fix looking up relative path and checking executable permission --- executer/user/mcexec.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 63c6e7f7..96525e5a 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -342,6 +342,12 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p) FILE *interp = NULL; char *interp_path; struct program_load_desc *desc; + int ret = 0; + + if ((ret = access(filename, X_OK)) != 0) { + fprintf(stderr, "Error: %s is not an executable?\n", filename); + return ret; + } fp = fopen(filename, "rb"); if (!fp) { @@ -1218,22 +1224,24 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) /* Execve phase */ switch (w.sr.args[0]) { - int ret = -1; struct program_load_desc *desc; struct remote_transfer trans; int error; - int found = 0; + int found; char path[2048]; char *filename; + int ret; /* Load descriptor phase */ case 1: - + + ret = -1; + found = 0; filename = (char *)w.sr.args[1]; - /* Is filename without path? */ + /* Is filename a single component without path? */ if (strncmp(filename, "/", 1) - && strncmp(filename, ".", 1)) { + && !strchr(filename, '/')) { char *token, *string, *tofree; char *PATH = getenv("COKERNEL_PATH"); @@ -1260,7 +1268,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) } error = access(path, X_OK); - if (!error) { + if (error == 0) { found = 1; break; } @@ -1280,13 +1288,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) if (!found) { fprintf(stderr, - "execve(): error finding file %s\n", path); + "execve(): error finding file %s\n", filename); goto return_execve1; } __dprintf("execve(): path to binary: %s\n", path); - if (load_elf_desc(path, &desc) != 0) { + if ((ret = load_elf_desc(path, &desc)) != 0) { fprintf(stderr, "execve(): error loading ELF for file %s\n", path); goto return_execve1; @@ -1325,6 +1333,7 @@ return_execve1: /* Copy program image phase */ case 2: + ret = -1; /* Alloc descriptor */ desc = malloc(w.sr.args[2]); if (!desc) {