stack: grow on page fault

The steps of the technique to replace stack with hugetlbfs map are as
follows:

(1) Prepare a hugetlbfs map with the size of rlim_cur
(2) Copy the active region of the stack to the hugetlbfs map.
    The range to copy is determined by reading /proc/[pid]/maps.
(3) Replace the stack map with the hugetlbfs map

The step (2) tries to copy a huge region if McKernel doesn't grow the
stack at run-time.

Change-Id: I5858c35b5c26dd0a42cccf9e3cc4c64b1a81f160
This commit is contained in:
Masamichi Takagi
2020-07-16 16:58:23 +09:00
committed by Masamichi Takagi
parent a20e1acf01
commit 40f8091fab
2 changed files with 67 additions and 17 deletions

View File

@ -2211,13 +2211,62 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
{
int error;
const uintptr_t fault_addr = (uintptr_t)fault_addr0;
struct vm_range *range;
struct vm_range *range = NULL;
struct thread *thread = cpu_local_var(current);
int locked = 0;
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
ihk_mc_get_processor_id(), vm, fault_addr0, reason);
/* grow stack */
if (fault_addr >= thread->vm->region.stack_start &&
fault_addr < thread->vm->region.stack_end) {
range = lookup_process_memory_range(vm,
thread->vm->region.stack_end - 1,
thread->vm->region.stack_end);
if (range == NULL) {
error = -EFAULT;
ekprintf("%s: vm: %p, addr: %p, reason: %lx):"
"stack not found: %d\n",
__func__, vm, fault_addr0, reason, error);
goto out;
}
/* don't grow if replaced with hugetlbfs */
if (range->memobj) {
goto skip;
}
if (fault_addr >= range->start) {
goto skip;
}
if (thread->vm->is_memory_range_lock_taken == -1 ||
thread->vm->is_memory_range_lock_taken !=
ihk_mc_get_processor_id()) {
ihk_rwspinlock_write_lock_noirq(&vm->memory_range_lock);
locked = 1;
}
if (range->pgshift) {
range->start = fault_addr &
~((1UL << range->pgshift) - 1);
} else {
range->start = fault_addr & PAGE_MASK;
}
if (locked) {
ihk_rwspinlock_write_unlock_noirq(&vm->memory_range_lock);
locked = 0;
}
dkprintf("%s: addr: %lx, reason: %lx, range: %lx-%lx:"
"stack found\n",
__func__, (unsigned long)fault_addr, reason,
range->start, range->end);
}
skip:
if (thread->vm->is_memory_range_lock_taken == -1 ||
thread->vm->is_memory_range_lock_taken != ihk_mc_get_processor_id()) {
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
@ -2232,13 +2281,16 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui
goto out;
}
range = lookup_process_memory_range(vm, fault_addr, fault_addr+1);
if (range == NULL) {
error = -EFAULT;
dkprintf("do_page_fault_process_vm(): vm: %p, addr: %p, reason: %lx):"
"out of range: %d\n",
vm, fault_addr0, reason, error);
goto out;
if (!range) {
range = lookup_process_memory_range(vm, fault_addr,
fault_addr+1);
if (range == NULL) {
error = -EFAULT;
dkprintf("%s: vm: %p, addr: %p, reason: %lx):"
"out of range: %d\n",
__func__, vm, fault_addr0, reason, error);
goto out;
}
}
if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE)
@ -2402,16 +2454,15 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
size = minsz;
}
size = (size + USER_STACK_PREPAGE_SIZE - 1) & USER_STACK_PAGE_MASK;
dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n",
__FUNCTION__,
pn->stack_premap,
proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
minsz, size);
start = (end - size) & USER_STACK_PAGE_MASK;
dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu, maxsz: %lx\n",
__func__, pn->stack_premap,
proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
minsz, size, maxsz);
start = (end - minsz) & USER_STACK_PAGE_MASK;
/* Apply user allocation policy to stacks */
/* TODO: make threshold kernel or mcexec argument */
ap_flag = (size >= proc->mpol_threshold &&
ap_flag = (minsz >= proc->mpol_threshold &&
!(proc->mpol_flags & MPOL_NO_STACK)) ? IHK_MC_AP_USER : 0;
dkprintf("%s: max size: %lu, mapped size: %lu %s\n",
__FUNCTION__, size, minsz,
@ -2543,7 +2594,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
ihk_mc_modify_user_context(thread->uctx, IHK_UCR_STACK_POINTER,
end + sizeof(unsigned long) * s_ind);
thread->vm->region.stack_end = end;
thread->vm->region.stack_start = start;
thread->vm->region.stack_start = (end - size) & USER_STACK_PAGE_MASK;
return 0;
}