diff --git arch/arm64/kernel/memory.c arch/arm64/kernel/memory.c
index 2361c6f8..18c131eb 100644
--- arch/arm64/kernel/memory.c
+++ arch/arm64/kernel/memory.c
@@ -2691,6 +2691,13 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
 	ptl1_set(ptep, pte);
 
 	error = 0;
+
+	if (args->attr[0] & PTE_CONT &&
+	    __page_offset(base, PTL1_CONT_SIZE) == 0) {
+		kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n",
+			__func__, base, PTL1_CONT_SIZE, phys);
+	}
+
 	// call memory_stat_rss_add() here because pgshift is resolved here
 	if (rusage_memory_stat_add(args->range, phys, PTL1_SIZE, PTL1_SIZE)) {
 		dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n",
@@ -2783,6 +2790,17 @@ retry:
 				ptl_set(ptep, phys | args->attr[level-1],
 					level);
 				error = 0;
+
+				if (args->attr[level-1] & PTE_CONT) {
+					if (__page_offset(base, tbl.cont_pgsize) == 0) {
+						kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n",
+							__func__, base, tbl.cont_pgsize, phys);
+					}
+				} else {
+					kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n",
+						__func__, base, tbl.pgsize, phys);
+				}
+
 				dkprintf("set_range_middle(%lx,%lx,%lx,%d):"
 					 "large page. %d %lx\n",
 					 base, start, end, level, error, *ptep);
diff --git arch/x86_64/kernel/memory.c arch/x86_64/kernel/memory.c
index 751b892c..48d7f6b1 100644
--- arch/x86_64/kernel/memory.c
+++ arch/x86_64/kernel/memory.c
@@ -2034,6 +2034,10 @@ retry:
 			dkprintf("set_range_l2(%lx,%lx,%lx):"
 					"2MiB page. %d %lx\n",
 					base, start, end, error, *ptep);
+
+			kprintf("%s: large page allocation, addr: %016lx, size: %d\n",
+				__func__, base, PTL2_SIZE);
+
 			// Call memory_stat_rss_add() here because pgshift is resolved here
 			if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) {
 				dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE);
@@ -2123,6 +2127,9 @@ retry:
 					"1GiB page. %d %lx\n",
 					base, start, end, error, *ptep);
 
+			kprintf("%s: large page allocation, addr: %016lx, size: %d\n",
+				__func__, base, PTL3_SIZE);
+
 			// Call memory_stat_rss_add() here because pgshift is resolved here
 			if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) {
 				dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE);
diff --git kernel/host.c kernel/host.c
index 8ccac6ef..5d408eaa 100644
--- kernel/host.c
+++ kernel/host.c
@@ -96,11 +96,35 @@ int prepare_process_ranges_args_envs(struct thread *thread,
 	unsigned long ap_flags;
 	enum ihk_mc_pt_attribute ptattr;
 	
+	unsigned long stack_page_size = pn->stack_premap;
+	unsigned long stack_page_mask = ~(stack_page_size - 1);
+	unsigned long stack_page_shift = 63 - __builtin_clzl(stack_page_size);
+	unsigned long stack_page_p2align = stack_page_shift - PAGE_SHIFT;
+	
+	unsigned long section_page_size;
+	unsigned long section_page_mask;
+	unsigned long section_page_shift;
+	int section_page_p2align;
+	
 	n = p->num_sections;
 
 	vm->region.data_start = ULONG_MAX;
 	aout_base = (pn->reloc)? vm->region.map_end: 0;
 	for (i = 0; i < n; i++) {
+		if (stack_page_size > PAGE_SIZE &&
+		    pn->sections[i].len >= stack_page_size &&
+		    !(pn->sections[i].len & ~stack_page_mask)) {
+			section_page_size = stack_page_size;
+			section_page_mask = stack_page_mask;
+			section_page_shift = stack_page_shift;
+			section_page_p2align = stack_page_p2align;
+ 		} else {
+			section_page_size = PAGE_SIZE;
+			section_page_mask = PAGE_MASK;
+			section_page_shift = PAGE_SHIFT;
+			section_page_p2align = PAGE_P2ALIGN;
+		}
+
 		ap_flags = 0;
 		if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) {
 			interp_obase = pn->sections[i].vaddr;
@@ -119,11 +143,22 @@ int prepare_process_ranges_args_envs(struct thread *thread,
 			pn->sections[i].vaddr += aout_base;
 			p->sections[i].vaddr = pn->sections[i].vaddr;
 		}
-		s = (pn->sections[i].vaddr) & PAGE_MASK;
+		s = (pn->sections[i].vaddr & section_page_mask);
 		e = (pn->sections[i].vaddr + pn->sections[i].len
-				+ PAGE_SIZE - 1) & PAGE_MASK;
-		range_npages = ((pn->sections[i].vaddr - s) +
-			pn->sections[i].filesz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+				+ section_page_size - 1) & section_page_mask;
+
+		if (section_page_size > PAGE_SIZE) {
+			/* Pre-map .bss as well */
+			range_npages = (((pn->sections[i].vaddr - s) +
+					 pn->sections[i].len + section_page_size - 1) >>
+					section_page_shift) <<
+				section_page_p2align;
+		} else {
+			range_npages = ((pn->sections[i].vaddr - s) +
+					pn->sections[i].filesz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+		}
+
 		flags = VR_NONE;
 		flags |= PROT_TO_VR_FLAG(pn->sections[i].prot);
 		flags |= VRFLAG_PROT_TO_MAXPROT(flags);
@@ -139,18 +174,37 @@ int prepare_process_ranges_args_envs(struct thread *thread,
 			flags |= VR_AP_USER;
 		}
 
-		if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0,
-					pn->sections[i].len > LARGE_PAGE_SIZE ?
-					LARGE_PAGE_SHIFT : PAGE_SHIFT,
-					&range) != 0) {
-			kprintf("ERROR: adding memory range for ELF section %i\n", i);
-			goto err;
+                if (section_page_size > PAGE_SIZE) {
+			if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0,
+						     section_page_shift,
+						     &range) != 0) {
+				kprintf("ERROR: adding memory range for ELF section %i\n", i);
+				goto err;
+			}
+		} else {
+			if (add_process_memory_range(vm, s, e, NOPHYS, flags, NULL, 0,
+						     pn->sections[i].len > LARGE_PAGE_SIZE ?
+						     LARGE_PAGE_SHIFT : PAGE_SHIFT,
+						     &range) != 0) {
+				kprintf("ERROR: adding memory range for ELF section %i\n", i);
+				goto err;
+			}
 		}
 
-		if ((up_v = ihk_mc_alloc_pages_user(range_npages,
-						IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) {
-			kprintf("ERROR: alloc pages for ELF section %i\n", i);
-			goto err;
+
+		if (section_page_size > PAGE_SIZE) {
+			if ((up_v = ihk_mc_alloc_aligned_pages_user(range_npages,
+								    section_page_p2align,
+								    IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) {
+				kprintf("ERROR: alloc pages for ELF section %i\n", i);
+				goto err;
+			}
+		} else {
+			if ((up_v = ihk_mc_alloc_pages_user(range_npages,
+							    IHK_MC_AP_NOWAIT | ap_flags, s)) == NULL) {
+				kprintf("ERROR: alloc pages for ELF section %i\n", i);
+				goto err;
+			}
 		}
 
 		up = virt_to_phys(up_v);
@@ -210,8 +264,19 @@ int prepare_process_ranges_args_envs(struct thread *thread,
 
 	vm->region.map_start = vm->region.map_end = TASK_UNMAPPED_BASE;
 
+	unsigned long heap_page_size;
+	unsigned long heap_page_mask;
+
+	if (proc->heap_extension > PAGE_SIZE) {
+		heap_page_size = proc->heap_extension;
+		heap_page_mask = ~(heap_page_size - 1);
+	} else {
+		heap_page_size = PAGE_SIZE;
+		heap_page_mask = PAGE_MASK;
+	}
+
 	vm->region.brk_start = vm->region.brk_end =
-		(vm->region.data_end + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK;
+		(vm->region.data_end + heap_page_size - 1) & heap_page_mask;
 
 	if (vm->region.brk_start >= vm->region.map_start) {
 		kprintf("%s: ERROR: data section is too large (end addr: %lx)\n",
diff --git kernel/include/rusage_private.h kernel/include/rusage_private.h
index cfa10faa..735a9eda 100644
--- kernel/include/rusage_private.h
+++ kernel/include/rusage_private.h
@@ -12,7 +12,7 @@
 #include <arch_rusage.h>
 #include <debug.h>
 
-#ifdef ENABLE_RUSAGE
+#if 0 /* def ENABLE_RUSAGE */
 
 #define RUSAGE_OOM_MARGIN (2 * 1024 * 1024) // 2MB
 
diff --git kernel/process.c kernel/process.c
index bcaf8a72..1dfc7562 100644
--- kernel/process.c
+++ kernel/process.c
@@ -2033,6 +2033,12 @@ retry:
 		}
 
 		dkprintf("%s: attr=%x\n", __FUNCTION__, attr);
+
+		if (pgsize > PAGE_SIZE) {
+			kprintf("large page allocation, addr: %016lx, size: %d, phys: %lx\n",
+				pgaddr, pgsize, phys);
+		}
+
 		error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep,
 		                          pgsize, phys, attr);
 		if (error) {
@@ -2260,10 +2266,15 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
 	int stack_populated_size = 0;
 	int stack_align_padding = 0;
 
+
+	unsigned long stack_page_size = pn->stack_premap;
+	unsigned long stack_page_mask = ~(stack_page_size - 1);
+	unsigned long stack_page_shift = 63 - __builtin_clzl(stack_page_size);
+	unsigned long stack_page_p2align = stack_page_shift - PAGE_SHIFT;
+
 	/* Create stack range */
-	end = STACK_TOP(&thread->vm->region) & USER_STACK_PAGE_MASK;
-	minsz = (pn->stack_premap + USER_STACK_PREPAGE_SIZE - 1) &
-		USER_STACK_PAGE_MASK;
+	end = STACK_TOP(&thread->vm->region) & stack_page_mask;
+	minsz = stack_page_size;
 	maxsz = (end - thread->vm->region.map_start) / 2;
 	size = proc->rlimit[MCK_RLIMIT_STACK].rlim_cur;
 	if (size > maxsz) {
@@ -2272,13 +2283,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
 	else if (size < minsz) {
 		size = minsz;
 	}
-	size = (size + USER_STACK_PREPAGE_SIZE - 1) & USER_STACK_PAGE_MASK;
+	size = (size + stack_page_size - 1) & stack_page_mask;
+
 	dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n",
 			__FUNCTION__,
 			pn->stack_premap,
 			proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
 			minsz, size);
-	start = (end - size) & USER_STACK_PAGE_MASK;
+	start = (end - size) & stack_page_mask;
 
 	/* Apply user allocation policy to stacks */
 	/* TODO: make threshold kernel or mcexec argument */
@@ -2289,9 +2301,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
 			ap_flag ? "(IHK_MC_AP_USER)" : "");
 
 	stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT,
-						USER_STACK_PAGE_P2ALIGN,
-						IHK_MC_AP_NOWAIT | ap_flag,
-						start);
+				stack_page_p2align, IHK_MC_AP_NOWAIT | ap_flag, start);
 
 	if (!stack) {
 		kprintf("%s: error: couldn't allocate initial stack\n",
@@ -2307,7 +2317,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
 	vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC;
 #define	NOPHYS	((uintptr_t)-1)
 	if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS,
-			vrflag, NULL, 0, USER_STACK_PAGE_SHIFT, &range)) != 0) {
+			vrflag, NULL, 0, stack_page_shift, &range)) != 0) {
 		ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT);
 		kprintf("%s: error addding process memory range: %d\n", rc);
 		return rc;
@@ -2319,7 +2329,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
 				    (void *)end, virt_to_phys(stack),
 				    arch_vrflag_to_ptattr(vrflag, PF_POPULATE,
 							  NULL),
-				    USER_STACK_PAGE_SHIFT, range, 0);
+				    stack_page_shift, range, 0);
 	if (error) {
 		kprintf("init_process_stack:"
 				"set range %lx-%lx %lx failed. %d\n",
@@ -2432,14 +2442,15 @@ unsigned long extend_process_region(struct process_vm *vm,
 	void *p;
 	int rc;
 
-	size_t align_size = vm->proc->heap_extension > PAGE_SIZE ?
-		LARGE_PAGE_SIZE : PAGE_SIZE;
-	unsigned long align_mask = vm->proc->heap_extension > PAGE_SIZE ?
-		LARGE_PAGE_MASK : PAGE_MASK;
-	unsigned long align_p2align = vm->proc->heap_extension > PAGE_SIZE ?
-		LARGE_PAGE_P2ALIGN : PAGE_P2ALIGN;
-	int align_shift = vm->proc->heap_extension > PAGE_SIZE ?
-		LARGE_PAGE_SHIFT : PAGE_SHIFT;
+	unsigned long heap_page_size = vm->proc->heap_extension;
+	unsigned long heap_page_mask = ~(heap_page_size - 1);
+	unsigned long heap_page_shift = 63 - __builtin_clzl(heap_page_size);
+	unsigned long heap_page_p2align = heap_page_shift - PAGE_SHIFT;
+
+	size_t align_size = heap_page_size;
+	unsigned long align_mask = heap_page_mask;
+	unsigned long align_p2align = heap_page_p2align;
+	int align_shift = heap_page_shift;
 
 	new_end_allocated = (address + (PAGE_SIZE - 1)) & PAGE_MASK;
 	if ((new_end_allocated - end_allocated) < vm->proc->heap_extension) {