From be4d84c0c1305e205b22c1a91b011ca113255005 Mon Sep 17 00:00:00 2001 From: Masamichi Takagi Date: Tue, 26 Sep 2017 16:49:40 +0900 Subject: [PATCH] mcexec: Add --stack-premap=[,] of stack is pre-mapped on creating a process. And its max size of stack is set to . This replaces MCKERNEL_RLIMIT_STACK=,. --- executer/include/uprotocol.h | 1 + executer/user/mcexec.c | 65 ++++++++++++++++++++++++++++++------ kernel/host.c | 5 +++ kernel/include/syscall.h | 1 + kernel/process.c | 11 +++--- 5 files changed, 67 insertions(+), 16 deletions(-) diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 434bb63b..e4150f82 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -140,6 +140,7 @@ struct program_load_desc { unsigned long mpol_flags; unsigned long mpol_threshold; unsigned long heap_extension; + long stack_premap; int nr_processes; char shell_path[SHELL_PATH_MAX_LEN]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index f9f06e68..2b723f30 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -214,6 +214,9 @@ static unsigned long mpol_threshold = 0; static unsigned long heap_extension = (4*1024); static int profile = 0; static int disable_sched_yield = 0; +static long stack_premap = (2ULL << 20); +static long stack_max = -1; +static struct rlimit rlim_stack; /* Partitioned execution (e.g., for MPI) */ static int nr_processes = 0; @@ -1211,7 +1214,7 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[]) struct rlimit new_rlim; /* save original value to environment variable */ - n = snprintf(newval, sizeof(newval), "%#lx,%#lx", + n = snprintf(newval, sizeof(newval), "%ld,%ld", (unsigned long)orig_rlim->rlim_cur, (unsigned long)orig_rlim->rlim_max); if (n >= sizeof(newval)) { @@ -1651,13 +1654,16 @@ static struct option mcexec_options[] = { .flag = NULL, .val = 'h', }, + { + .name = "stack-premap", + .has_arg = required_argument, + .flag = NULL, + .val = 's', + }, /* end */ { NULL, 0, NULL, 0, }, }; -#define MCEXEC_DEF_CUR_STACK_SIZE (2 * 1024 * 1024) /* 2 MiB */ -#define MCEXEC_DEF_MAX_STACK_SIZE (64 * 1024 * 1024) /* 64 MiB */ - #ifdef ENABLE_MCOVERLAYFS void bind_mount_recursive(const char *root, char *prefix) { @@ -1762,7 +1768,7 @@ static void ld_preload_init() if (disable_sched_yield) { sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR); - __dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path); + __dprintf("%s: preload library: %s\n", __FUNCTION__, envbuf); if (setenv("LD_PRELOAD", envbuf, 1) < 0) { printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n", __FUNCTION__); @@ -1796,7 +1802,6 @@ int main(int argc, char **argv) char *p; int i; int error; - struct rlimit rlim_stack; unsigned long lcur; unsigned long lmax; int target_core = 0; @@ -1846,10 +1851,15 @@ int main(int argc, char **argv) CHKANDJUMP(error == -1, 1, "unsetenv failed"); } - rlim_stack.rlim_cur = MCEXEC_DEF_CUR_STACK_SIZE; - rlim_stack.rlim_max = MCEXEC_DEF_MAX_STACK_SIZE; + /* Inherit ulimit settings to McKernel process */ + if (getrlimit(RLIMIT_STACK, &rlim_stack)) { + fprintf(stderr, "getrlimit failed\n"); + return 1; + } + __dprintf("rlim_stack=%ld,%ld\n", rlim_stack.rlim_cur, rlim_stack.rlim_max); -#define MCEXEC_MAX_STACK_SIZE (1024 * 1024 * 1024) /* 1 GiB */ + /* Shrink mcexec stack if it leaves too small room for McKernel process */ +#define MCEXEC_MAX_STACK_SIZE (16 * 1024 * 1024) /* 1 GiB */ if (rlim_stack.rlim_cur > MCEXEC_MAX_STACK_SIZE) { /* need to call reduce_stack() before modifying the argv[] */ (void)reduce_stack(&rlim_stack, argv); /* no return, unless failure */ @@ -1859,9 +1869,9 @@ int main(int argc, char **argv) /* Parse options ("+" denotes stop at the first non-option) */ #ifdef ADD_ENVS_OPTION - while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:", mcexec_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:s:", mcexec_options, NULL)) != -1) { #else /* ADD_ENVS_OPTION */ - while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:", mcexec_options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:s:", mcexec_options, NULL)) != -1) { #endif /* ADD_ENVS_OPTION */ switch (opt) { char *tmp; @@ -1903,6 +1913,23 @@ int main(int argc, char **argv) add_env_list(&extra_env, optarg); break; #endif /* ADD_ENVS_OPTION */ + + case 's': { + char *token, *dup, *line; + dup = strdup(optarg); + line = dup; + token = strsep(&line, ","); + if (token != NULL && *token != 0) { + stack_premap = atobytes(token); + } + token = strsep(&line, ","); + if (token != NULL && *token != 0) { + stack_max = atobytes(token); + } + free(dup); + __dprintf("stack_premap=%ld,stack_max=%ld\n", stack_premap, stack_max); + break; } + case 0: /* long opt */ break; @@ -2111,6 +2138,7 @@ int main(int argc, char **argv) desc->cpu = target_core; desc->enable_vdso = enable_vdso; + /* Restore the stack size when mcexec stack was shrinked */ p = getenv(rlimit_stack_envname); if (p) { char *saveptr; @@ -2155,8 +2183,19 @@ int main(int argc, char **argv) rlim_stack.rlim_cur = lcur; } } + + /* Overwrite the max with of "--stack-premap ," */ + if (stack_max != -1) { + rlim_stack.rlim_cur = stack_max; + if (rlim_stack.rlim_max != -1 && rlim_stack.rlim_max < rlim_stack.rlim_cur) { + rlim_stack.rlim_max = rlim_stack.rlim_cur; + } + } + desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur; desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max; + desc->stack_premap = stack_premap; + __dprintf("desc->rlimit[MCK_RLIMIT_STACK]=%ld,%ld\n", desc->rlimit[MCK_RLIMIT_STACK].rlim_cur, desc->rlimit[MCK_RLIMIT_STACK].rlim_max); ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0); if(ncpu == -1){ @@ -3632,6 +3671,10 @@ fork_err: __dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n", path, desc->num_sections); + desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur; + desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max; + desc->stack_premap = stack_premap; + /* Copy descriptor to co-kernel side */ trans.userp = (void*)desc; trans.rphys = w.sr.args[2]; diff --git a/kernel/host.c b/kernel/host.c index 9e77d226..0c756d7c 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -497,6 +497,11 @@ static int process_msg_prepare_process(unsigned long rphys) } vm->region.map_end = vm->region.map_start; memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); + dkprintf("%s: rlim_cur: %ld, rlim_max: %ld, stack_premap: %ld\n", + __FUNCTION__, + proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, + proc->rlimit[MCK_RLIMIT_STACK].rlim_max, + pn->stack_premap); if (prepare_process_ranges_args_envs(thread, pn, p, attr, NULL, 0, NULL, 0) != 0) { diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 89e73d51..0d64226f 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -197,6 +197,7 @@ struct program_load_desc { unsigned long mpol_flags; unsigned long mpol_threshold; unsigned long heap_extension; + long stack_premap; int nr_processes; char shell_path[SHELL_PATH_MAX_LEN]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; diff --git a/kernel/process.c b/kernel/process.c index e58026db..f0eb8bd8 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2089,15 +2089,16 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, #ifdef POSTK_DEBUG_ARCH_DEP_80 /* user stack prepage size fix */ minsz = LARGE_PAGE_SIZE; #else /* POSTK_DEBUG_ARCH_DEP_80 */ - minsz = (proc->rlimit[MCK_RLIMIT_STACK].rlim_cur + minsz = (pn->stack_premap + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; #endif /* POSTK_DEBUG_ARCH_DEP_80 */ - size = (proc->rlimit[MCK_RLIMIT_STACK].rlim_max + size = (proc->rlimit[MCK_RLIMIT_STACK].rlim_cur + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; - dkprintf("%s: rlim_max: %lu, rlim_cur: %lu\n", + dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n", __FUNCTION__, - proc->rlimit[MCK_RLIMIT_STACK].rlim_max, - proc->rlimit[MCK_RLIMIT_STACK].rlim_cur); + pn->stack_premap, + proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, + minsz, size); if (size > (USER_END / 2)) { size = USER_END / 2; }