diff --git arch/arm64/kernel/memory.c arch/arm64/kernel/memory.c index ceca343..5e6c137 100644 --- arch/arm64/kernel/memory.c +++ arch/arm64/kernel/memory.c @@ -2701,6 +2701,13 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, ptl1_set(ptep, pte); error = 0; + + if (args->attr[0] & PTE_CONT && + __page_offset(base, PTL1_CONT_SIZE) == 0) { + kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", + __func__, base, PTL1_CONT_SIZE, phys); + } + // call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL1_SIZE, PTL1_SIZE); @@ -2791,6 +2798,17 @@ retry: ptl_set(ptep, phys | args->attr[level-1], level); error = 0; + + if (args->attr[level-1] & PTE_CONT) { + if (__page_offset(base, tbl.cont_pgsize) == 0) { + kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", + __func__, base, tbl.cont_pgsize, phys); + } + } else { + kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", + __func__, base, tbl.pgsize, phys); + } + dkprintf("set_range_middle(%lx,%lx,%lx,%d):" "large page. %d %lx\n", base, start, end, level, error, *ptep); diff --git arch/x86_64/kernel/memory.c arch/x86_64/kernel/memory.c index 980d935..4fea4b0 100644 --- arch/x86_64/kernel/memory.c +++ arch/x86_64/kernel/memory.c @@ -2030,6 +2030,10 @@ retry: dkprintf("set_range_l2(%lx,%lx,%lx):" "2MiB page. %d %lx\n", base, start, end, error, *ptep); + + kprintf("%s: large page allocation, addr: %016lx, size: %d\n", + __func__, base, PTL2_SIZE); + // Call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE); @@ -2119,6 +2123,9 @@ retry: "1GiB page. %d %lx\n", base, start, end, error, *ptep); + kprintf("%s: large page allocation, addr: %016lx, size: %d\n", + __func__, base, PTL3_SIZE); + // Call memory_stat_rss_add() here because pgshift is resolved here if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) { dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE); diff --git kernel/host.c kernel/host.c index 0de0a08..6fb906e 100644 --- kernel/host.c +++ kernel/host.c @@ -96,10 +96,34 @@ int prepare_process_ranges_args_envs(struct thread *thread, unsigned long ap_flags; enum ihk_mc_pt_attribute ptattr; + unsigned long stack_page_size = pn->stack_premap; + unsigned long stack_page_mask = ~(stack_page_size - 1); + unsigned long stack_page_shift = 63 - __builtin_clzl(stack_page_size); + unsigned long stack_page_p2align = stack_page_shift - PAGE_SHIFT; + + unsigned long section_page_size; + unsigned long section_page_mask; + unsigned long section_page_shift; + int section_page_p2align; + n = p->num_sections; aout_base = (pn->reloc)? vm->region.map_end: 0; for (i = 0; i < n; i++) { + if (stack_page_size > PAGE_SIZE && + pn->sections[i].len >= stack_page_size && + !(pn->sections[i].len & ~stack_page_mask)) { + section_page_size = stack_page_size; + section_page_mask = stack_page_mask; + section_page_shift = stack_page_shift; + section_page_p2align = stack_page_p2align; + } else { + section_page_size = PAGE_SIZE; + section_page_mask = PAGE_MASK; + section_page_shift = PAGE_SHIFT; + section_page_p2align = PAGE_P2ALIGN; + } + ap_flags = 0; if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) { interp_obase = pn->sections[i].vaddr; @@ -118,11 +142,22 @@ int prepare_process_ranges_args_envs(struct thread *thread, pn->sections[i].vaddr += aout_base; p->sections[i].vaddr = pn->sections[i].vaddr; } - s = (pn->sections[i].vaddr) & PAGE_MASK; + s = (pn->sections[i].vaddr & section_page_mask); e = (pn->sections[i].vaddr + pn->sections[i].len - + PAGE_SIZE - 1) & PAGE_MASK; - range_npages = ((pn->sections[i].vaddr - s) + - pn->sections[i].filesz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + section_page_size - 1) & section_page_mask; + + if (section_page_size > PAGE_SIZE) { + /* Pre-map .bss as well */ + range_npages = (((pn->sections[i].vaddr - s) + + pn->sections[i].len + section_page_size - 1) >> + section_page_shift) << + section_page_p2align; + } else { + range_npages = ((pn->sections[i].vaddr - s) + + pn->sections[i].filesz + PAGE_SIZE - 1) >> PAGE_SHIFT; + + } + flags = VR_NONE; flags |= PROT_TO_VR_FLAG(pn->sections[i].prot); flags |= VRFLAG_PROT_TO_MAXPROT(flags); @@ -138,18 +173,37 @@ int prepare_process_ranges_args_envs(struct thread *thread, flags |= VR_AP_USER; } - if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0, - pn->sections[i].len > LARGE_PAGE_SIZE ? - LARGE_PAGE_SHIFT : PAGE_SHIFT, - &range) != 0) { - kprintf("ERROR: adding memory range for ELF section %i\n", i); - goto err; + if (section_page_size > PAGE_SIZE) { + if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0, + section_page_shift, + &range) != 0) { + kprintf("ERROR: adding memory range for ELF section %i\n", i); + goto err; + } + } else { + if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0, + pn->sections[i].len > LARGE_PAGE_SIZE ? + LARGE_PAGE_SHIFT : PAGE_SHIFT, + &range) != 0) { + kprintf("ERROR: adding memory range for ELF section %i\n", i); + goto err; + } } - if ((up_v = ihk_mc_alloc_pages_user(range_npages, - IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) { - kprintf("ERROR: alloc pages for ELF section %i\n", i); - goto err; + + if (section_page_size > PAGE_SIZE) { + if ((up_v = ihk_mc_alloc_aligned_pages_user(range_npages, + section_page_p2align, + IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) { + kprintf("ERROR: alloc pages for ELF section %i\n", i); + goto err; + } + } else { + if ((up_v = ihk_mc_alloc_pages_user(range_npages, + IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) { + kprintf("ERROR: alloc pages for ELF section %i\n", i); + goto err; + } } up = virt_to_phys(up_v); @@ -212,8 +266,19 @@ int prepare_process_ranges_args_envs(struct thread *thread, pn->at_entry += aout_base; } + unsigned long heap_page_size; + unsigned long heap_page_mask; + + if (proc->heap_extension > PAGE_SIZE) { + heap_page_size = proc->heap_extension; + heap_page_mask = ~(heap_page_size - 1); + } else { + heap_page_size = PAGE_SIZE; + heap_page_mask = PAGE_MASK; + } + vm->region.brk_start = vm->region.brk_end = - (vm->region.data_end + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + (vm->region.data_end + heap_page_size - 1) & heap_page_mask; #if 0 { diff --git kernel/include/rusage_private.h kernel/include/rusage_private.h index 7da7728..14253b2 100644 --- kernel/include/rusage_private.h +++ kernel/include/rusage_private.h @@ -12,7 +12,7 @@ #include #include -#ifdef ENABLE_RUSAGE +#if 0 /* def ENABLE_RUSAGE */ #define RUSAGE_OOM_MARGIN (2 * 1024 * 1024) // 2MB diff --git kernel/process.c kernel/process.c index 32a8ef6..24f262a 100644 --- kernel/process.c +++ kernel/process.c @@ -2030,6 +2030,12 @@ retry: #endif /* POSTK_DEBUG_TEMP_FIX_86 */ dkprintf("%s: attr=%x\n", __FUNCTION__, attr); + + if (pgsize > PAGE_SIZE) { + kprintf("large page allocation, addr: %016lx, size: %d, phys: %lx\n", + pgaddr, pgsize, phys); + } + error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep, pgsize, phys, attr); if (error) { @@ -2256,15 +2262,20 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, int stack_populated_size = 0; int stack_align_padding = 0; + + unsigned long stack_page_size = pn->stack_premap; + unsigned long stack_page_mask = ~(stack_page_size - 1); + unsigned long stack_page_shift = 63 - __builtin_clzl(stack_page_size); + unsigned long stack_page_p2align = stack_page_shift - PAGE_SHIFT; + /* Create stack range */ #ifdef POSTK_DEBUG_ARCH_DEP_104 /* user stack prepage size fix */ - end = STACK_TOP(&thread->vm->region) & USER_STACK_PAGE_MASK; - minsz = (pn->stack_premap + USER_STACK_PREPAGE_SIZE - 1) & - USER_STACK_PAGE_MASK; + end = STACK_TOP(&thread->vm->region) & stack_page_mask; + minsz = stack_page_size; #else /* POSTK_DEBUG_ARCH_DEP_104 */ - end = STACK_TOP(&thread->vm->region) & LARGE_PAGE_MASK; + end = STACK_TOP(&thread->vm->region) & stack_page_mask; minsz = (pn->stack_premap - + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + + stack_page_size - 1) & stack_page_mask; #endif /* POSTK_DEBUG_ARCH_DEP_104 */ maxsz = (end - thread->vm->region.map_start) / 2; size = proc->rlimit[MCK_RLIMIT_STACK].rlim_cur; @@ -2275,9 +2286,9 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, size = minsz; } #ifdef POSTK_DEBUG_ARCH_DEP_104 /* user stack prepage size fix */ - size = (size + USER_STACK_PREPAGE_SIZE - 1) & USER_STACK_PAGE_MASK; + size = (size + stack_page_size - 1) & stack_page_mask; #else /* POSTK_DEBUG_ARCH_DEP_104 */ - size = (size + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + size = (size + stack_page_size - 1) & stack_page_mask; #endif /* POSTK_DEBUG_ARCH_DEP_104 */ dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n", __FUNCTION__, @@ -2285,9 +2296,9 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, minsz, size); #ifdef POSTK_DEBUG_ARCH_DEP_104 /* user stack prepage size fix */ - start = (end - size) & USER_STACK_PAGE_MASK; + start = (end - size) & stack_page_mask; #else /* POSTK_DEBUG_ARCH_DEP_104 */ - start = (end - size) & LARGE_PAGE_MASK; + start = (end - size) & stack_page_mask; #endif /* POSTK_DEBUG_ARCH_DEP_104 */ /* Apply user allocation policy to stacks */ @@ -2300,10 +2311,10 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, #ifdef POSTK_DEBUG_ARCH_DEP_104 /* user stack prepage size fix */ stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT, - USER_STACK_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT | ap_flag, start); + stack_page_p2align, IHK_MC_AP_NOWAIT | ap_flag, start); #else /* POSTK_DEBUG_ARCH_DEP_104 */ stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT, - LARGE_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT | ap_flag, start); + stack_page_p2align, IHK_MC_AP_NOWAIT | ap_flag, start); #endif /* POSTK_DEBUG_ARCH_DEP_104 */ if (!stack) { @@ -2321,13 +2332,13 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, #define NOPHYS ((uintptr_t)-1) #ifdef POSTK_DEBUG_ARCH_DEP_104 /* user stack prepage size fix */ if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, - vrflag, NULL, 0, USER_STACK_PAGE_SHIFT, &range)) != 0) { + vrflag, NULL, 0, stack_page_shift, &range)) != 0) { ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); return rc; } #else /* POSTK_DEBUG_ARCH_DEP_104 */ if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, - vrflag, NULL, 0, LARGE_PAGE_SHIFT, &range)) != 0) { + vrflag, NULL, 0, stack_page_shift, &range)) != 0) { ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); kprintf("%s: error addding process memory range: %d\n", rc); return rc; @@ -2340,14 +2351,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, thread->vm, (void *)(end - minsz), (void *)end, virt_to_phys(stack), arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL), - USER_STACK_PAGE_SHIFT, range, 0 + stack_page_shift, range, 0 ); #else /* POSTK_DEBUG_ARCH_DEP_104 */ error = ihk_mc_pt_set_range(thread->vm->address_space->page_table, thread->vm, (void *)(end - minsz), (void *)end, virt_to_phys(stack), arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL), - LARGE_PAGE_SHIFT, range, 0 + stack_page_shift, range, 0 ); #endif /* POSTK_DEBUG_ARCH_DEP_104 */ @@ -2464,14 +2475,15 @@ unsigned long extend_process_region(struct process_vm *vm, void *p; int rc; - size_t align_size = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_SIZE : PAGE_SIZE; - unsigned long align_mask = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_MASK : PAGE_MASK; - unsigned long align_p2align = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_P2ALIGN : PAGE_P2ALIGN; - int align_shift = vm->proc->heap_extension > PAGE_SIZE ? - LARGE_PAGE_SHIFT : PAGE_SHIFT; + unsigned long heap_page_size = vm->proc->heap_extension; + unsigned long heap_page_mask = ~(heap_page_size - 1); + unsigned long heap_page_shift = 63 - __builtin_clzl(heap_page_size); + unsigned long heap_page_p2align = heap_page_shift - PAGE_SHIFT; + + size_t align_size = heap_page_size; + unsigned long align_mask = heap_page_mask; + unsigned long align_p2align = heap_page_p2align; + int align_shift = heap_page_shift; new_end_allocated = (address + (PAGE_SIZE - 1)) & PAGE_MASK; if ((new_end_allocated - end_allocated) < vm->proc->heap_extension) { diff --git kernel/syscall.c kernel/syscall.c index 449c931..10a9039 100644 --- kernel/syscall.c +++ kernel/syscall.c @@ -5228,9 +5228,22 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) return -ENOSPC; } - pgshift = PAGE_SHIFT; if (shmflg & SHM_HUGETLB) { pgshift = (shmflg >> SHM_HUGE_SHIFT) & 0x3F; + } else { + size_t pgsize; + + if (size > PAGE_SIZE) { + error = arch_get_smaller_page_size(NULL, size + 1, &pgsize, NULL); + if (error) { + ekprintf("%s: arch_get_smaller_page_size failed. %d\n", error); + return error; + } + + pgshift = 63 - __builtin_clzl(pgsize);; + } else { + pgshift = PAGE_SHIFT; + } } memset(&ads, 0, sizeof(ads));