diff --git arch/arm64/kernel/memory.c arch/arm64/kernel/memory.c index 2361c6f8..18c131eb 100644 --- arch/arm64/kernel/memory.c +++ arch/arm64/kernel/memory.c @@ -2691,6 +2691,13 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, ptl1_set(ptep, pte); error = 0; + + if (args->attr[0] & PTE_CONT && + __page_offset(base, PTL1_CONT_SIZE) == 0) { + kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", + __func__, base, PTL1_CONT_SIZE, phys); + } + // call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", @@ -2783,6 +2790,17 @@ retry: ptl_set(ptep, phys | args->attr[level-1], level); error = 0; + + if (args->attr[level-1] & PTE_CONT) { + if (__page_offset(base, tbl.cont_pgsize) == 0) { + kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", + __func__, base, tbl.cont_pgsize, phys); + } + } else { + kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", + __func__, base, tbl.pgsize, phys); + } + dkprintf("set_range_middle(%lx,%lx,%lx,%d):" "large page. %d %lx\n", base, start, end, level, error, *ptep); diff --git arch/x86_64/kernel/memory.c arch/x86_64/kernel/memory.c index 751b892c..48d7f6b1 100644 --- arch/x86_64/kernel/memory.c +++ arch/x86_64/kernel/memory.c @@ -2034,6 +2034,10 @@ retry: dkprintf("set_range_l2(%lx,%lx,%lx):" "2MiB page. %d %lx\n", base, start, end, error, *ptep); + + kprintf("%s: large page allocation, addr: %016lx, size: %d\n", + __func__, base, PTL2_SIZE); + // Call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE); @@ -2123,6 +2127,9 @@ retry: "1GiB page. %d %lx\n", base, start, end, error, *ptep); + kprintf("%s: large page allocation, addr: %016lx, size: %d\n", + __func__, base, PTL3_SIZE); + // Call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE); diff --git kernel/host.c kernel/host.c index 8ccac6ef..5d408eaa 100644 --- kernel/host.c +++ kernel/host.c @@ -96,11 +96,35 @@ int prepare_process_ranges_args_envs(struct thread *thread, unsigned long ap_flags; enum ihk_mc_pt_attribute ptattr; + unsigned long stack_page_size = pn->stack_premap; + unsigned long stack_page_mask = ~(stack_page_size - 1); + unsigned long stack_page_shift = 63 - __builtin_clzl(stack_page_size); + unsigned long stack_page_p2align = stack_page_shift - PAGE_SHIFT; + + unsigned long section_page_size; + unsigned long section_page_mask; + unsigned long section_page_shift; + int section_page_p2align; + n = p->num_sections; vm->region.data_start = ULONG_MAX; aout_base = (pn->reloc)? vm->region.map_end: 0; for (i = 0; i < n; i++) { + if (stack_page_size > PAGE_SIZE && + pn->sections[i].len >= stack_page_size && + !(pn->sections[i].len & ~stack_page_mask)) { + section_page_size = stack_page_size; + section_page_mask = stack_page_mask; + section_page_shift = stack_page_shift; + section_page_p2align = stack_page_p2align; + } else { + section_page_size = PAGE_SIZE; + section_page_mask = PAGE_MASK; + section_page_shift = PAGE_SHIFT; + section_page_p2align = PAGE_P2ALIGN; + } + ap_flags = 0; if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) { interp_obase = pn->sections[i].vaddr; @@ -119,11 +143,22 @@ int prepare_process_ranges_args_envs(struct thread *thread, pn->sections[i].vaddr += aout_base; p->sections[i].vaddr = pn->sections[i].vaddr; } - s = (pn->sections[i].vaddr) & PAGE_MASK; + s = (pn->sections[i].vaddr & section_page_mask); e = (pn->sections[i].vaddr + pn->sections[i].len - + PAGE_SIZE - 1) & PAGE_MASK; - range_npages = ((pn->sections[i].vaddr - s) + - pn->sections[i].filesz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + section_page_size - 1) & section_page_mask; + + if (section_page_size > PAGE_SIZE) { + /* Pre-map .bss as well */ + range_npages = (((pn->sections[i].vaddr - s) + + pn->sections[i].len + section_page_size - 1) >> + section_page_shift) << + section_page_p2align; + } else { + range_npages = ((pn->sections[i].vaddr - s) + + pn->sections[i].filesz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + } + flags = VR_NONE; flags |= PROT_TO_VR_FLAG(pn->sections[i].prot); flags |= VRFLAG_PROT_TO_MAXPROT(flags); @@ -139,18 +174,37 @@ int prepare_process_ranges_args_envs(struct thread *thread, flags |= VR_AP_USER; } - if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0, - pn->sections[i].len > LARGE_PAGE_SIZE ? - LARGE_PAGE_SHIFT : PAGE_SHIFT, - &range) != 0) { - kprintf("ERROR: adding memory range for ELF section %i\n", i); - goto err; + if (section_page_size > PAGE_SIZE) { + if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0, + section_page_shift, + &range) != 0) { + kprintf("ERROR: adding memory range for ELF section %i\n", i); + goto err; + } + } else { + if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0, + pn->sections[i].len > LARGE_PAGE_SIZE ? + LARGE_PAGE_SHIFT : PAGE_SHIFT, + &range) != 0) { + kprintf("ERROR: adding memory range for ELF section %i\n", i); + goto err; + } } - if ((up_v = ihk_mc_alloc_pages_user(range_npages, - IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) { - kprintf("ERROR: alloc pages for ELF section %i\n", i); - goto err; + + if (section_page_size > PAGE_SIZE) { + if ((up_v = ihk_mc_alloc_aligned_pages_user(range_npages, + section_page_p2align, + IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) { + kprintf("ERROR: alloc pages for ELF section %i\n", i); + goto err; + } + } else { + if ((up_v = ihk_mc_alloc_pages_user(range_npages, + IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) { + kprintf("ERROR: alloc pages for ELF section %i\n", i); + goto err; + } } up = virt_to_phys(up_v); @@ -210,8 +264,19 @@ int prepare_process_ranges_args_envs(struct thread *thread, vm->region.map_start = vm->region.map_end = TASK_UNMAPPED_BASE; + unsigned long heap_page_size; + unsigned long heap_page_mask; + + if (proc->heap_extension > PAGE_SIZE) { + heap_page_size = proc->heap_extension; + heap_page_mask = ~(heap_page_size - 1); + } else { + heap_page_size = PAGE_SIZE; + heap_page_mask = PAGE_MASK; + } + vm->region.brk_start = vm->region.brk_end = - (vm->region.data_end + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + (vm->region.data_end + heap_page_size - 1) & heap_page_mask; if (vm->region.brk_start >= vm->region.map_start) { kprintf("%s: ERROR: data section is too large (end addr: %lx)\n", diff --git kernel/include/rusage_private.h kernel/include/rusage_private.h index cfa10faa..735a9eda 100644 --- kernel/include/rusage_private.h +++ kernel/include/rusage_private.h @@ -12,7 +12,7 @@ #include #include -#ifdef ENABLE_RUSAGE +#if 0 /* def ENABLE_RUSAGE */ #define RUSAGE_OOM_MARGIN (2 * 1024 * 1024) // 2MB diff --git kernel/process.c kernel/process.c index bcaf8a72..1dfc7562 100644 --- kernel/process.c +++ kernel/process.c @@ -2033,6 +2033,12 @@ retry: } dkprintf("%s: attr=%x\n", __FUNCTION__, attr); + + if (pgsize > PAGE_SIZE) { + kprintf("large page allocation, addr: %016lx, size: %d, phys: %lx\n", + pgaddr, pgsize, phys); + } + error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep, pgsize, phys, attr); if (error) { @@ -2260,10 +2266,15 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, int stack_populated_size = 0; int stack_align_padding = 0; + + unsigned long stack_page_size = pn->stack_premap; + unsigned long stack_page_mask = ~(stack_page_size - 1); + unsigned long stack_page_shift = 63 - __builtin_clzl(stack_page_size); + unsigned long stack_page_p2align = stack_page_shift - PAGE_SHIFT; + /* Create stack range */ - end = STACK_TOP(&thread->vm->region) & USER_STACK_PAGE_MASK; - minsz = (pn->stack_premap + USER_STACK_PREPAGE_SIZE - 1) & - USER_STACK_PAGE_MASK; + end = STACK_TOP(&thread->vm->region) & stack_page_mask; + minsz = stack_page_size; maxsz = (end - thread->vm->region.map_start) / 2; size = proc->rlimit[MCK_RLIMIT_STACK].rlim_cur; if (size > maxsz) { @@ -2272,13 +2283,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, else if (size < minsz) { size = minsz; } - size = (size + USER_STACK_PREPAGE_SIZE - 1) & USER_STACK_PAGE_MASK; + size = (size + stack_page_size - 1) & stack_page_mask; + dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n", __FUNCTION__, pn->stack_premap, proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, minsz, size); - start = (end - size) & USER_STACK_PAGE_MASK; + start = (end - size) & stack_page_mask; /* Apply user allocation policy to stacks */ /* TODO: make threshold kernel or mcexec argument */ @@ -2289,9 +2301,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, ap_flag ? "(IHK_MC_AP_USER)" : ""); stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT, - USER_STACK_PAGE_P2ALIGN, - IHK_MC_AP_NOWAIT | ap_flag, - start); + stack_page_p2align, IHK_MC_AP_NOWAIT | ap_flag, start); if (!stack) { kprintf("%s: error: couldn't allocate initial stack\n", @@ -2307,7 +2317,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC; #define NOPHYS ((uintptr_t)-1) if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, - vrflag, NULL, 0, USER_STACK_PAGE_SHIFT, &range)) != 0) { + vrflag, NULL, 0, stack_page_shift, &range)) != 0) { ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); kprintf("%s: error addding process memory range: %d\n", rc); return rc; @@ -2319,7 +2329,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, (void *)end, virt_to_phys(stack), arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL), - USER_STACK_PAGE_SHIFT, range, 0); + stack_page_shift, range, 0); if (error) { kprintf("init_process_stack:" "set range %lx-%lx %lx failed. %d\n", @@ -2432,14 +2442,15 @@ unsigned long extend_process_region(struct process_vm *vm, void *p; int rc; - size_t align_size = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_SIZE : PAGE_SIZE; - unsigned long align_mask = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_MASK : PAGE_MASK; - unsigned long align_p2align = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_P2ALIGN : PAGE_P2ALIGN; - int align_shift = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_SHIFT : PAGE_SHIFT; + unsigned long heap_page_size = vm->proc->heap_extension; + unsigned long heap_page_mask = ~(heap_page_size - 1); + unsigned long heap_page_shift = 63 - __builtin_clzl(heap_page_size); + unsigned long heap_page_p2align = heap_page_shift - PAGE_SHIFT; + + size_t align_size = heap_page_size; + unsigned long align_mask = heap_page_mask; + unsigned long align_p2align = heap_page_p2align; + int align_shift = heap_page_shift; new_end_allocated = (address + (PAGE_SIZE - 1)) & PAGE_MASK; if ((new_end_allocated - end_allocated) < vm->proc->heap_extension) {