mcexec: Add --stack-premap=<premap_size>[,<max>]

<premap_size> of stack is pre-mapped on creating a process.
And its max size of stack is set to <max>.
This replaces MCKERNEL_RLIMIT_STACK=<premap_size>,<max>.
This commit is contained in:
Masamichi Takagi
2017-09-26 16:49:40 +09:00
parent c43c1b640a
commit be4d84c0c1
5 changed files with 67 additions and 16 deletions

View File

@ -140,6 +140,7 @@ struct program_load_desc {
unsigned long mpol_flags; unsigned long mpol_flags;
unsigned long mpol_threshold; unsigned long mpol_threshold;
unsigned long heap_extension; unsigned long heap_extension;
long stack_premap;
int nr_processes; int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN]; char shell_path[SHELL_PATH_MAX_LEN];
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];

View File

@ -214,6 +214,9 @@ static unsigned long mpol_threshold = 0;
static unsigned long heap_extension = (4*1024); static unsigned long heap_extension = (4*1024);
static int profile = 0; static int profile = 0;
static int disable_sched_yield = 0; static int disable_sched_yield = 0;
static long stack_premap = (2ULL << 20);
static long stack_max = -1;
static struct rlimit rlim_stack;
/* Partitioned execution (e.g., for MPI) */ /* Partitioned execution (e.g., for MPI) */
static int nr_processes = 0; static int nr_processes = 0;
@ -1211,7 +1214,7 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
struct rlimit new_rlim; struct rlimit new_rlim;
/* save original value to environment variable */ /* save original value to environment variable */
n = snprintf(newval, sizeof(newval), "%#lx,%#lx", n = snprintf(newval, sizeof(newval), "%ld,%ld",
(unsigned long)orig_rlim->rlim_cur, (unsigned long)orig_rlim->rlim_cur,
(unsigned long)orig_rlim->rlim_max); (unsigned long)orig_rlim->rlim_max);
if (n >= sizeof(newval)) { if (n >= sizeof(newval)) {
@ -1651,13 +1654,16 @@ static struct option mcexec_options[] = {
.flag = NULL, .flag = NULL,
.val = 'h', .val = 'h',
}, },
{
.name = "stack-premap",
.has_arg = required_argument,
.flag = NULL,
.val = 's',
},
/* end */ /* end */
{ NULL, 0, NULL, 0, }, { NULL, 0, NULL, 0, },
}; };
#define MCEXEC_DEF_CUR_STACK_SIZE (2 * 1024 * 1024) /* 2 MiB */
#define MCEXEC_DEF_MAX_STACK_SIZE (64 * 1024 * 1024) /* 64 MiB */
#ifdef ENABLE_MCOVERLAYFS #ifdef ENABLE_MCOVERLAYFS
void bind_mount_recursive(const char *root, char *prefix) void bind_mount_recursive(const char *root, char *prefix)
{ {
@ -1762,7 +1768,7 @@ static void ld_preload_init()
if (disable_sched_yield) { if (disable_sched_yield) {
sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR); sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR);
__dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path); __dprintf("%s: preload library: %s\n", __FUNCTION__, envbuf);
if (setenv("LD_PRELOAD", envbuf, 1) < 0) { if (setenv("LD_PRELOAD", envbuf, 1) < 0) {
printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n", printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n",
__FUNCTION__); __FUNCTION__);
@ -1796,7 +1802,6 @@ int main(int argc, char **argv)
char *p; char *p;
int i; int i;
int error; int error;
struct rlimit rlim_stack;
unsigned long lcur; unsigned long lcur;
unsigned long lmax; unsigned long lmax;
int target_core = 0; int target_core = 0;
@ -1846,10 +1851,15 @@ int main(int argc, char **argv)
CHKANDJUMP(error == -1, 1, "unsetenv failed"); CHKANDJUMP(error == -1, 1, "unsetenv failed");
} }
rlim_stack.rlim_cur = MCEXEC_DEF_CUR_STACK_SIZE; /* Inherit ulimit settings to McKernel process */
rlim_stack.rlim_max = MCEXEC_DEF_MAX_STACK_SIZE; if (getrlimit(RLIMIT_STACK, &rlim_stack)) {
fprintf(stderr, "getrlimit failed\n");
return 1;
}
__dprintf("rlim_stack=%ld,%ld\n", rlim_stack.rlim_cur, rlim_stack.rlim_max);
#define MCEXEC_MAX_STACK_SIZE (1024 * 1024 * 1024) /* 1 GiB */ /* Shrink mcexec stack if it leaves too small room for McKernel process */
#define MCEXEC_MAX_STACK_SIZE (16 * 1024 * 1024) /* 1 GiB */
if (rlim_stack.rlim_cur > MCEXEC_MAX_STACK_SIZE) { if (rlim_stack.rlim_cur > MCEXEC_MAX_STACK_SIZE) {
/* need to call reduce_stack() before modifying the argv[] */ /* need to call reduce_stack() before modifying the argv[] */
(void)reduce_stack(&rlim_stack, argv); /* no return, unless failure */ (void)reduce_stack(&rlim_stack, argv); /* no return, unless failure */
@ -1859,9 +1869,9 @@ int main(int argc, char **argv)
/* Parse options ("+" denotes stop at the first non-option) */ /* Parse options ("+" denotes stop at the first non-option) */
#ifdef ADD_ENVS_OPTION #ifdef ADD_ENVS_OPTION
while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:", mcexec_options, NULL)) != -1) { while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:s:", mcexec_options, NULL)) != -1) {
#else /* ADD_ENVS_OPTION */ #else /* ADD_ENVS_OPTION */
while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:", mcexec_options, NULL)) != -1) { while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:s:", mcexec_options, NULL)) != -1) {
#endif /* ADD_ENVS_OPTION */ #endif /* ADD_ENVS_OPTION */
switch (opt) { switch (opt) {
char *tmp; char *tmp;
@ -1903,6 +1913,23 @@ int main(int argc, char **argv)
add_env_list(&extra_env, optarg); add_env_list(&extra_env, optarg);
break; break;
#endif /* ADD_ENVS_OPTION */ #endif /* ADD_ENVS_OPTION */
case 's': {
char *token, *dup, *line;
dup = strdup(optarg);
line = dup;
token = strsep(&line, ",");
if (token != NULL && *token != 0) {
stack_premap = atobytes(token);
}
token = strsep(&line, ",");
if (token != NULL && *token != 0) {
stack_max = atobytes(token);
}
free(dup);
__dprintf("stack_premap=%ld,stack_max=%ld\n", stack_premap, stack_max);
break; }
case 0: /* long opt */ case 0: /* long opt */
break; break;
@ -2111,6 +2138,7 @@ int main(int argc, char **argv)
desc->cpu = target_core; desc->cpu = target_core;
desc->enable_vdso = enable_vdso; desc->enable_vdso = enable_vdso;
/* Restore the stack size when mcexec stack was shrinked */
p = getenv(rlimit_stack_envname); p = getenv(rlimit_stack_envname);
if (p) { if (p) {
char *saveptr; char *saveptr;
@ -2155,8 +2183,19 @@ int main(int argc, char **argv)
rlim_stack.rlim_cur = lcur; rlim_stack.rlim_cur = lcur;
} }
} }
/* Overwrite the max with <max> of "--stack-premap <premap>,<max>" */
if (stack_max != -1) {
rlim_stack.rlim_cur = stack_max;
if (rlim_stack.rlim_max != -1 && rlim_stack.rlim_max < rlim_stack.rlim_cur) {
rlim_stack.rlim_max = rlim_stack.rlim_cur;
}
}
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur; desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max; desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
desc->stack_premap = stack_premap;
__dprintf("desc->rlimit[MCK_RLIMIT_STACK]=%ld,%ld\n", desc->rlimit[MCK_RLIMIT_STACK].rlim_cur, desc->rlimit[MCK_RLIMIT_STACK].rlim_max);
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0); ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
if(ncpu == -1){ if(ncpu == -1){
@ -3632,6 +3671,10 @@ fork_err:
__dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n", __dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n",
path, desc->num_sections); path, desc->num_sections);
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
desc->stack_premap = stack_premap;
/* Copy descriptor to co-kernel side */ /* Copy descriptor to co-kernel side */
trans.userp = (void*)desc; trans.userp = (void*)desc;
trans.rphys = w.sr.args[2]; trans.rphys = w.sr.args[2];

View File

@ -497,6 +497,11 @@ static int process_msg_prepare_process(unsigned long rphys)
} }
vm->region.map_end = vm->region.map_start; vm->region.map_end = vm->region.map_start;
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
dkprintf("%s: rlim_cur: %ld, rlim_max: %ld, stack_premap: %ld\n",
__FUNCTION__,
proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
proc->rlimit[MCK_RLIMIT_STACK].rlim_max,
pn->stack_premap);
if (prepare_process_ranges_args_envs(thread, pn, p, attr, if (prepare_process_ranges_args_envs(thread, pn, p, attr,
NULL, 0, NULL, 0) != 0) { NULL, 0, NULL, 0) != 0) {

View File

@ -197,6 +197,7 @@ struct program_load_desc {
unsigned long mpol_flags; unsigned long mpol_flags;
unsigned long mpol_threshold; unsigned long mpol_threshold;
unsigned long heap_extension; unsigned long heap_extension;
long stack_premap;
int nr_processes; int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN]; char shell_path[SHELL_PATH_MAX_LEN];
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];

View File

@ -2089,15 +2089,16 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
#ifdef POSTK_DEBUG_ARCH_DEP_80 /* user stack prepage size fix */ #ifdef POSTK_DEBUG_ARCH_DEP_80 /* user stack prepage size fix */
minsz = LARGE_PAGE_SIZE; minsz = LARGE_PAGE_SIZE;
#else /* POSTK_DEBUG_ARCH_DEP_80 */ #else /* POSTK_DEBUG_ARCH_DEP_80 */
minsz = (proc->rlimit[MCK_RLIMIT_STACK].rlim_cur minsz = (pn->stack_premap
+ LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK;
#endif /* POSTK_DEBUG_ARCH_DEP_80 */ #endif /* POSTK_DEBUG_ARCH_DEP_80 */
size = (proc->rlimit[MCK_RLIMIT_STACK].rlim_max size = (proc->rlimit[MCK_RLIMIT_STACK].rlim_cur
+ LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK;
dkprintf("%s: rlim_max: %lu, rlim_cur: %lu\n", dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n",
__FUNCTION__, __FUNCTION__,
proc->rlimit[MCK_RLIMIT_STACK].rlim_max, pn->stack_premap,
proc->rlimit[MCK_RLIMIT_STACK].rlim_cur); proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
minsz, size);
if (size > (USER_END / 2)) { if (size > (USER_END / 2)) {
size = USER_END / 2; size = USER_END / 2;
} }