diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 765aace0..77d81988 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -92,6 +92,10 @@ struct get_cpu_set_arg { typedef unsigned long __cpu_set_unit; #define PLD_CPU_SET_SIZE (PLD_CPU_SET_MAX_CPUS / (8 * sizeof(__cpu_set_unit))) +#define MPOL_NO_HEAP 0x01 +#define MPOL_NO_STACK 0x02 +#define MPOL_NO_BSS 0x04 + struct program_load_desc { int num_sections; int status; @@ -120,6 +124,7 @@ struct program_load_desc { unsigned long envs_len; struct rlimit rlimit[MCK_RLIM_MAX]; unsigned long interp_align; + unsigned long mpol_flags; char shell_path[SHELL_PATH_MAX_LEN]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; struct program_image_section sections[0]; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index d47a6f3e..0bf1522e 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -154,6 +154,9 @@ static char *altroot; static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK"; static int ischild; static int enable_vdso = 1; +static int mpol_no_heap = 0; +static int mpol_no_stack = 0; +static int mpol_no_bss = 0; /* Partitioned execution (e.g., for MPI) */ static int nr_processes = 0; @@ -1279,7 +1282,24 @@ static struct option mcexec_options[] = { .flag = &enable_vdso, .val = 1, }, - + { + .name = "mpol-no-heap", + .has_arg = no_argument, + .flag = &mpol_no_heap, + .val = 1, + }, + { + .name = "mpol-no-stack", + .has_arg = no_argument, + .flag = &mpol_no_stack, + .val = 1, + }, + { + .name = "mpol-no-bss", + .has_arg = no_argument, + .flag = &mpol_no_bss, + .val = 1, + }, /* end */ { NULL, 0, NULL, 0, }, }; @@ -1669,6 +1689,19 @@ int main(int argc, char **argv) #endif } + desc->mpol_flags = 0; + if (mpol_no_heap) { + desc->mpol_flags |= MPOL_NO_HEAP; + } + + if (mpol_no_stack) { + desc->mpol_flags |= MPOL_NO_STACK; + } + + if (mpol_no_bss) { + desc->mpol_flags |= MPOL_NO_BSS; + } + if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) { perror("prepare"); close(fd); diff --git a/kernel/host.c b/kernel/host.c index 89cdd548..5e758d72 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -127,8 +127,10 @@ int prepare_process_ranges_args_envs(struct thread *thread, flags |= VRFLAG_PROT_TO_MAXPROT(flags); flags |= VR_DEMAND_PAGING; - /* Non-TEXT sections that are large respect user allocation policy */ - if (i >= 1 && pn->sections[i].len >= AP_USER_THRESHOLD) { + /* Non-TEXT sections that are large respect user allocation policy + * unless user explicitly requests otherwise */ + if (i >= 1 && pn->sections[i].len >= AP_USER_THRESHOLD && + !(pn->mpol_flags & MPOL_NO_BSS)) { dkprintf("%s: section: %d size: %d pages -> IHK_MC_AP_USER\n", __FUNCTION__, i, range_npages); ap_flags = IHK_MC_AP_USER; @@ -426,6 +428,7 @@ static int process_msg_prepare_process(unsigned long rphys) proc->sgid = pn->cred[6]; proc->fsgid = pn->cred[7]; proc->termsig = SIGCHLD; + proc->mpol_flags = pn->mpol_flags; vm->region.user_start = pn->user_start; vm->region.user_end = pn->user_end; diff --git a/kernel/include/process.h b/kernel/include/process.h index 40a0d383..92c1b50c 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -553,6 +553,8 @@ struct process { long maxrss; long maxrss_children; + /* Memory policy flags */ + unsigned long mpol_flags; // perf_event int perf_status; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index e54bf783..b998dc18 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -160,6 +160,10 @@ struct program_image_section { typedef unsigned long __cpu_set_unit; #define PLD_CPU_SET_SIZE (PLD_CPU_SET_MAX_CPUS / (8 * sizeof(__cpu_set_unit))) +#define MPOL_NO_HEAP 0x01 +#define MPOL_NO_STACK 0x02 +#define MPOL_NO_BSS 0x04 + struct program_load_desc { int num_sections; int status; @@ -188,6 +192,7 @@ struct program_load_desc { unsigned long envs_len; struct rlimit rlimit[MCK_RLIM_MAX]; unsigned long interp_align; + unsigned long mpol_flags; char shell_path[SHELL_PATH_MAX_LEN]; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; struct program_image_section sections[0]; diff --git a/kernel/process.c b/kernel/process.c index 49ddd2e4..be2bbdd1 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -92,6 +92,7 @@ init_process(struct process *proc, struct process *parent) proc->egid = parent->egid; proc->sgid = parent->sgid; proc->fsgid = parent->fsgid; + proc->mpol_flags = parent->mpol_flags; memcpy(proc->rlimit, parent->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); } @@ -1927,7 +1928,8 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, /* Apply user allocation policy to stacks */ /* TODO: make threshold kernel or mcexec argument */ - ap_flag = (size >= AP_USER_THRESHOLD) ? IHK_MC_AP_USER : 0; + ap_flag = (size >= AP_USER_THRESHOLD && + !(proc->mpol_flags & MPOL_NO_STACK)) ? IHK_MC_AP_USER : 0; dkprintf("%s: size: %lu %s\n", __FUNCTION__, size, ap_flag ? "(IHK_MC_AP_USER)" : ""); @@ -1943,7 +1945,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, memset(stack, 0, minsz); vrflag = VR_STACK | VR_DEMAND_PAGING; - vrflag |= (ap_flag ? VR_AP_USER : 0); + vrflag |= ((ap_flag & IHK_MC_AP_USER) ? VR_AP_USER : 0); vrflag |= PROT_TO_VR_FLAG(pn->stack_prot); vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC; #define NOPHYS ((uintptr_t)-1) @@ -2070,7 +2072,8 @@ unsigned long extend_process_region(struct process_vm *vm, } else { p = ihk_mc_alloc_aligned_pages((aligned_new_end - aligned_end) >> PAGE_SHIFT, - LARGE_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT | IHK_MC_AP_USER); + LARGE_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT | + (!(vm->proc->mpol_flags & MPOL_NO_HEAP) ? IHK_MC_AP_USER : 0)); if (!p) { return end; diff --git a/kernel/syscall.c b/kernel/syscall.c index 3880daa9..25773d11 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1430,7 +1430,8 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, /* Small allocations mostly benefit from closest RAM, * otherwise follow user requested policy */ unsigned long ap_flag = - (len >= AP_USER_THRESHOLD || flags & MAP_STACK) ? + (!(flags & MAP_STACK) && len >= AP_USER_THRESHOLD) || + ((flags & MAP_STACK) && !(thread->proc->mpol_flags & MPOL_NO_STACK)) ? IHK_MC_AP_USER : 0; if (ap_flag) {