diff --git a/arch/arm64/kernel/include/prctl.h b/arch/arm64/kernel/include/prctl.h index d345e38e..8ffe9553 100644 --- a/arch/arm64/kernel/include/prctl.h +++ b/arch/arm64/kernel/include/prctl.h @@ -2,6 +2,9 @@ #ifndef __HEADER_ARM64_COMMON_PRCTL_H #define __HEADER_ARM64_COMMON_PRCTL_H +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + /* arm64 Scalable Vector Extension controls */ #define PR_SVE_SET_VL 48 /* set task vector length */ #define PR_SVE_SET_VL_THREAD (1 << 1) /* set just this thread */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index d72e07b3..da101a43 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -161,7 +161,12 @@ fault: SYSCALL_DECLARE(prctl) { + struct process *proc = cpu_local_var(current)->proc; int option = (int)ihk_mc_syscall_arg0(ctx); + unsigned long arg2 = (unsigned long)ihk_mc_syscall_arg1(ctx); + unsigned long arg3 = (unsigned long)ihk_mc_syscall_arg2(ctx); + unsigned long arg4 = (unsigned long)ihk_mc_syscall_arg3(ctx); + unsigned long arg5 = (unsigned long)ihk_mc_syscall_arg4(ctx); long error; switch (option) { @@ -172,6 +177,19 @@ SYSCALL_DECLARE(prctl) case PR_SVE_GET_VL: error = SVE_GET_VL(cpu_local_var(current)); break; + case PR_SET_THP_DISABLE: + if (arg3 || arg4 || arg5) { + return -EINVAL; + } + proc->thp_disable = arg2; + error = 0; + break; + case PR_GET_THP_DISABLE: + if (arg2 || arg3 || arg4 || arg5) { + return -EINVAL; + } + error = proc->thp_disable; + break; default: error = syscall_generic_forwarding(__NR_prctl, ctx); break; diff --git a/arch/x86_64/kernel/include/prctl.h b/arch/x86_64/kernel/include/prctl.h index c3de4631..2a16ca20 100644 --- a/arch/x86_64/kernel/include/prctl.h +++ b/arch/x86_64/kernel/include/prctl.h @@ -9,6 +9,9 @@ #ifndef __ARCH_PRCTL_H #define __ARCH_PRCTL_H +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + #define ARCH_SET_GS 0x1001 #define ARCH_SET_FS 0x1002 #define ARCH_GET_FS 0x1003 diff --git a/arch/x86_64/kernel/include/syscall_list.h b/arch/x86_64/kernel/include/syscall_list.h index ce28d52e..875ed6e5 100644 --- a/arch/x86_64/kernel/include/syscall_list.h +++ b/arch/x86_64/kernel/include/syscall_list.h @@ -109,6 +109,7 @@ SYSCALL_HANDLED(149, mlock) SYSCALL_HANDLED(150, munlock) SYSCALL_HANDLED(151, mlockall) SYSCALL_HANDLED(152, munlockall) +SYSCALL_HANDLED(157, prctl) SYSCALL_HANDLED(158, arch_prctl) SYSCALL_HANDLED(160, setrlimit) SYSCALL_HANDLED(164, settimeofday) diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index a090448f..d474e683 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c @@ -169,6 +169,38 @@ fault: return -EFAULT; } +SYSCALL_DECLARE(prctl) +{ + struct process *proc = cpu_local_var(current)->proc; + int option = (int)ihk_mc_syscall_arg0(ctx); + unsigned long arg2 = (unsigned long)ihk_mc_syscall_arg1(ctx); + unsigned long arg3 = (unsigned long)ihk_mc_syscall_arg2(ctx); + unsigned long arg4 = (unsigned long)ihk_mc_syscall_arg3(ctx); + unsigned long arg5 = (unsigned long)ihk_mc_syscall_arg4(ctx); + int ret = 0; + + switch (option) { + case PR_SET_THP_DISABLE: + if (arg3 || arg4 || arg5) { + return -EINVAL; + } + proc->thp_disable = arg2; + ret = 0; + break; + case PR_GET_THP_DISABLE: + if (arg2 || arg3 || arg4 || arg5) { + return -EINVAL; + } + ret = proc->thp_disable; + break; + default: + ret = syscall_generic_forwarding(__NR_prctl, ctx); + break; + } + + return ret; +} + struct sigsp { unsigned long flags; void *link; diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index d2b44310..e0506cb0 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -142,6 +142,7 @@ struct program_load_desc { unsigned long heap_extension; long stack_premap; unsigned long mpol_bind_mask; + int thp_disable; int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */ int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int nr_processes; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 6e911d88..dbf44fd7 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -2060,6 +2060,21 @@ static void ld_preload_init() } } +static int get_thp_disable(void) +{ + int ret = 0; + + ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0); + + /* PR_GET_THP_DISABLE supported since Linux 3.15 */ + if (ret < 0) { + /* if not supported, make THP enable */ + ret = 0; + } + + return ret; +} + int main(int argc, char **argv) { int ret = 0; @@ -2699,6 +2714,7 @@ int main(int argc, char **argv) desc->uti_thread_rank = uti_thread_rank; desc->uti_use_last_cpu = uti_use_last_cpu; + desc->thp_disable = get_thp_disable(); /* user_start and user_end are set by this call */ if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) { diff --git a/kernel/host.c b/kernel/host.c index c5ce9d25..3b876661 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -484,6 +484,7 @@ static int process_msg_prepare_process(unsigned long rphys) proc->termsig = SIGCHLD; proc->mpol_flags = pn->mpol_flags; proc->mpol_threshold = pn->mpol_threshold; + proc->thp_disable = pn->thp_disable; proc->nr_processes = pn->nr_processes; proc->process_rank = pn->process_rank; proc->heap_extension = pn->heap_extension; diff --git a/kernel/include/process.h b/kernel/include/process.h index 6f34ee2e..bece5737 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -556,6 +556,7 @@ struct process { int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */ int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int clone_count; + int thp_disable; // perf_event int perf_status; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index c5c34526..6f4822b9 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -86,6 +86,10 @@ #define SCD_MSG_FUTEX_WAKE 0x60 +/* For prctl() */ +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + /* Cloning flags. */ # define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */ # define CLONE_VM 0x00000100 /* Set if VM shared between processes. */ @@ -201,6 +205,7 @@ struct program_load_desc { unsigned long heap_extension; long stack_premap; unsigned long mpol_bind_mask; + int thp_disable; int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */ int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int nr_processes; diff --git a/kernel/process.c b/kernel/process.c index e326dffd..bc0609be 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -114,6 +114,7 @@ init_process(struct process *proc, struct process *parent) proc->fsgid = parent->fsgid; proc->mpol_flags = parent->mpol_flags; proc->mpol_threshold = parent->mpol_threshold; + proc->thp_disable = parent->thp_disable; memcpy(proc->rlimit, parent->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); memcpy(&proc->cpu_set, &parent->cpu_set, diff --git a/kernel/syscall.c b/kernel/syscall.c index fbca0a5e..986f9ba1 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1658,7 +1658,8 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot, pgshift = (flags >> MAP_HUGE_SHIFT) & 0x3F; p2align = pgshift - PAGE_SHIFT; } - else if ((flags & MAP_PRIVATE) && (flags & MAP_ANONYMOUS)) { + else if ((flags & MAP_PRIVATE) && (flags & MAP_ANONYMOUS) + && !proc->thp_disable) { pgshift = 0; /* transparent huge page */ p2align = PAGE_P2ALIGN; @@ -5198,7 +5199,10 @@ int do_shmget(const key_t key, const size_t size, const int shmflg) if (shmflg & SHM_HUGETLB) { pgshift = (shmflg >> SHM_HUGE_SHIFT) & 0x3F; + } else if (proc->thp_disable) { + pgshift = PAGE_SHIFT; } else { + /* transparent huge page */ size_t pgsize; int p2align; diff --git a/test/issues/1181/C1181.sh b/test/issues/1181/C1181.sh new file mode 100644 index 00000000..9a788d86 --- /dev/null +++ b/test/issues/1181/C1181.sh @@ -0,0 +1,199 @@ +#!/bin/sh + +USELTP=1 +USEOSTEST=0 + +if [ $# -lt 1 ]; then + echo "usage: C1181.sh " + exit 1 +fi + +. ../../common.sh + +if [ -f "./$1_config" ]; then + . ./$1_config +else + echo "$1 is unexpected arch" + exit 1 +fi + +tid=001 +echo "*** CT$tid start *******************************" +echo "** case: THP_DISABLED" +./set_thp_and_exec 1 ${MCEXEC} ./check_thp 1 | tee ./CT${tid}.txt +echo "** case: THP_ENABLED" +./set_thp_and_exec 0 ${MCEXEC} ./check_thp 0 | tee -a ./CT${tid}.txt +ok=`grep "\[ OK \]" CT${tid}.txt | wc -l` +ng=`grep "\[ NG \]" CT${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** CT$tid: PASSED (ok:$ok, ng:$ng)" +else + echo "*** CT$tid: FAILED (ok:$ok, ng:$ng)" +fi + +LARGE_PAGE_STR="large page allocation" +echo "" +tid=002 +echo "*** CT$tid start *******************************" +echo "** case: THP_DISABLED" +${IHKOSCTL} 0 clear_kmsg +./set_thp_and_exec 1 ${MCEXEC} ./mmap_large +${IHKOSCTL} 0 kmsg > ./CT${tid}_01.txt +if grep "${LARGE_PAGE_STR}" ./CT${tid}_01.txt &> /dev/null ; then + echo "[ NG ] THP is Working" | tee ./CT${tid}.txt +else + echo "[ OK ] THP is NOT Working" | tee ./CT${tid}.txt +fi + +echo "** case: THP_ENABLED" +${IHKOSCTL} 0 clear_kmsg +./set_thp_and_exec 0 ${MCEXEC} ./mmap_large +${IHKOSCTL} 0 kmsg > ./CT${tid}_02.txt +if grep "${LARGE_PAGE_STR}" ./CT${tid}_02.txt &> /dev/null ; then + pgsize_allocated=`grep "large page allocation" ./CT${tid}_02.txt | tail -1 | grep -oE 'size: \w*' | sed 's/size: //'` + echo "pgsize_allocated: ${pgsize_allocated}" + if [ "$pgsize_allocated" == "${MMAP_LARGE}" ]; then + echo "[ OK ] THP is Working well" | tee -a ./CT${tid}.txt + else + echo "[ NG ] THP is Working, but pgsize is INVALID" | tee -a ./CT${tid}.txt + fi +else + echo "[ NG ] when THP is ENABLED, NOT Working" | tee -a ./CT${tid}.txt +fi + +ok=`grep "\[ OK \]" CT${tid}.txt | wc -l` +ng=`grep "\[ NG \]" CT${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** CT$tid: PASSED (ok:$ok, ng:$ng)" +else + echo "*** CT$tid: FAILED (ok:$ok, ng:$ng)" +fi + +echo "" +tid=003 +echo "*** CT$tid start *******************************" +echo "** case: THP_DISABLED" +${IHKOSCTL} 0 clear_kmsg +./set_thp_and_exec 1 ${MCEXEC} ./shm_large +${IHKOSCTL} 0 kmsg > ./CT${tid}_01.txt +if grep "${LARGE_PAGE_STR}" ./CT${tid}_01.txt &> /dev/null ; then + echo "[ NG ] THP is Working" | tee ./CT${tid}.txt +else + echo "[ OK ] THP is NOT Working" | tee ./CT${tid}.txt +fi + +echo "** case: THP_ENABLED" +${IHKOSCTL} 0 clear_kmsg +./set_thp_and_exec 0 ${MCEXEC} ./shm_large +${IHKOSCTL} 0 kmsg > ./CT${tid}_02.txt +if grep "${LARGE_PAGE_STR}" ./CT${tid}_02.txt &> /dev/null ; then + pgsize_allocated=`grep "large page allocation" ./CT${tid}_02.txt | tail -1 | grep -oE 'size: \w*' | sed 's/size: //'` + echo "pgsize_allocated: ${pgsize_allocated}" + if [ "$pgsize_allocated" == "${SHM_LARGE}" ]; then + echo "[ OK ] THP is Working well" | tee -a ./CT${tid}.txt + else + echo "[ NG ] THP is Working, but pgsize is INVALID" | tee -a ./CT${tid}.txt + fi +else + echo "[ NG ] large page is NOT Working" | tee -a ./CT${tid}.txt +fi + +ok=`grep "\[ OK \]" CT${tid}.txt | wc -l` +ng=`grep "\[ NG \]" CT${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** CT$tid: PASSED (ok:$ok, ng:$ng)" +else + echo "*** CT$tid: FAILED (ok:$ok, ng:$ng)" +fi + +# init for hugetlbfs test +sudo mkdir -p /mnt/hugetlbfs-2M +sudo mount -t hugetlbfs -o mode=777,pagesize=2m none /mnt/hugetlbfs-2M + +sudo mkdir -p /mnt/hugetlbfs-1G +sudo mount -t hugetlbfs -o mode=777,pagesize=1g none /mnt/hugetlbfs-1G +echo "" +tid=004 +echo "*** CT$tid start *******************************" +echo "** case: THP_DISABLED" +${IHKOSCTL} 0 clear_kmsg +./set_thp_and_exec 1 ${MCEXEC} ./mmap_hugetlbfs +${IHKOSCTL} 0 kmsg > ./CT${tid}_01.txt +if grep "${LARGE_PAGE_STR}" ./CT${tid}_01.txt &> /dev/null ; then + pgsize_allocated=`grep "large page allocation" ./CT${tid}_01.txt | head -1 | grep -oE 'size: \w*' | sed 's/size: //'` + echo "pgsize_allocated: ${pgsize_allocated}" + if [ "$pgsize_allocated" == "${SIZE_2M}" ]; then + echo "[ OK ] hugetlbfs-2M is Working well" | tee ./CT${tid}.txt + else + echo "[ NG ] pgsize is INVALID" | tee ./CT${tid}.txt + fi + + pgsize_allocated=`grep "large page allocation" ./CT${tid}_01.txt | tail -1 | grep -oE 'size: \w*' | sed 's/size: //'` + echo "pgsize_allocated: ${pgsize_allocated}" + if [ "$pgsize_allocated" == "${SIZE_1G}" ]; then + echo "[ OK ] hugetlbfs-1G is Working well" | tee -a ./CT${tid}.txt + else + echo "[ NG ] pgsize is INVALID" | tee -a ./CT${tid}.txt + fi +else + echo "[ NG ] hugetlbfs-1G is NOT Working" | tee ./CT${tid}.txt +fi + +echo "** case: THP_ENABLED" +${IHKOSCTL} 0 clear_kmsg +./set_thp_and_exec 0 ${MCEXEC} ./mmap_hugetlbfs +${IHKOSCTL} 0 kmsg > ./CT${tid}_02.txt +if grep "${LARGE_PAGE_STR}" ./CT${tid}_02.txt &> /dev/null ; then + pgsize_allocated=`grep "large page allocation" ./CT${tid}_02.txt | head -1 | grep -oE 'size: \w*' | sed 's/size: //'` + echo "pgsize_allocated: ${pgsize_allocated}" + if [ "$pgsize_allocated" == "${SIZE_2M}" ]; then + echo "[ OK ] hugetlbfs-2M is Working well" | tee -a ./CT${tid}.txt + else + echo "[ NG ] pgsize is INVALID" | tee -a ./CT${tid}.txt + fi + + pgsize_allocated=`grep "large page allocation" ./CT${tid}_02.txt | tail -1 | grep -oE 'size: \w*' | sed 's/size: //'` + echo "pgsize_allocated: ${pgsize_allocated}" + if [ "$pgsize_allocated" == "${SIZE_1G}" ]; then + echo "[ OK ] hugetlbfs-1G is Working well" | tee -a ./CT${tid}.txt + else + echo "[ NG ] pgsize is INVALID" | tee -a ./CT${tid}.txt + fi +else + echo "[ NG ] hugetlbfs-1G is NOT Working" | tee -a ./CT${tid}.txt +fi + +ok=`grep "\[ OK \]" CT${tid}.txt | wc -l` +ng=`grep "\[ NG \]" CT${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** CT$tid: PASSED (ok:$ok, ng:$ng)" +else + echo "*** CT$tid: FAILED (ok:$ok, ng:$ng)" +fi +# fini for hugetlbfs test +sudo umount /mnt/hugetlbfs-2M +sudo umount /mnt/hugetlbfs-1G + +echo "" +tid=005 +echo "*** CT$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/prctl01 2>&1 | tee ./CT${tid}.txt +ok=`grep TPASS CT${tid}.txt | wc -l` +ng=`grep TFAIL CT${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** CT$tid: PASSED (ok:$ok, ng:$ng)" +else + echo "*** CT$tid: FAILED (ok:$ok, ng:$ng)" +fi + +echo "" +tid=006 +echo "*** CT$tid start *******************************" +sudo ${MCEXEC} ${LTPBIN}/prctl02 2>&1 | tee ./CT${tid}.txt +ok=`grep TPASS CT${tid}.txt | wc -l` +ng=`grep TFAIL CT${tid}.txt | wc -l` +if [ $ng = 0 ]; then + echo "*** CT$tid: PASSED (ok:$ok, ng:$ng)" +else + echo "*** CT$tid: FAILED (ok:$ok, ng:$ng)" +fi diff --git a/test/issues/1181/Makefile b/test/issues/1181/Makefile new file mode 100644 index 00000000..b03644e6 --- /dev/null +++ b/test/issues/1181/Makefile @@ -0,0 +1,30 @@ +include $(HOME)/.mck_test_config.mk +CC = gcc +TARGET=set_thp_and_exec check_thp mmap_large shm_large mmap_hugetlbfs + +CPPFLAGS = +LDFLAGS = + +all: $(TARGET) + +set_thp_and_exec: set_thp_and_exec.c + $(CC) -o $@ $^ $(LDFLAGS) + +check_thp: check_thp.c + $(CC) -o $@ $^ $(LDFLAGS) + +mmap_large: mmap_large.c + $(CC) -o $@ $^ $(LDFLAGS) + +shm_large: shm_large.c + $(CC) -o $@ $^ $(LDFLAGS) + +mmap_hugetlbfs: mmap_hugetlbfs.c + $(CC) -o $@ $^ $(LDFLAGS) + +test: all + @sh ./C1181.sh $(ARCH) + +clean: + rm -f $(TARGET) *.o CT*.txt + diff --git a/test/issues/1181/README b/test/issues/1181/README new file mode 100644 index 00000000..b0220ed9 --- /dev/null +++ b/test/issues/1181/README @@ -0,0 +1,36 @@ +【Issue#1181 動作確認】 +□前提 +本テストはprctlのPR_SET_THP_DISABLE, PR_GET_THP_DISABLE オプションを +使用するため、Linux 3.15.0 以降の環境で実行する必要がある + +□ テスト内容 +1. prctlによるTHP制御が実現されていることの確認 +CT001: + mcexecのTHP設定がmckernelプロセスに反映されていることを確認 + +CT002: + mckernelプロセスのmmapがTHP設定どおりの動作をすることを確認 + +CT003: + mckernelプロセスのshmgetがTHP設定どおりの動作をすることを確認 + +CT004: + hugetlbfsが利用できることを確認 + +2. 既存のprctl機能に影響がないことをLTPを用いて確認 +CT005: ltp-prctl01 +CT006: ltp-prctl02 + +□ 実行手順 +(1) cd && patch -p0 < /test/issues/1181/large_page.patch +(2) McKernelをビルドする +(3) cd /test/issues/1181/ && make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1181/arm64_config b/test/issues/1181/arm64_config new file mode 100644 index 00000000..ff5c59f3 --- /dev/null +++ b/test/issues/1181/arm64_config @@ -0,0 +1,6 @@ +SIZE_2M="2097152" +SIZE_1G="1073741824" + +MMAP_LARGE=${SIZE_2M} +SHM_LARGE=${SIZE_2M} +SHM_HUGE=${SIZE_1G} diff --git a/test/issues/1181/check_thp.c b/test/issues/1181/check_thp.c new file mode 100644 index 00000000..e2f7ad4e --- /dev/null +++ b/test/issues/1181/check_thp.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + int rc = 0, ret = 0; + int expected_thp = 0; + + if (argc < 2) { + printf("err: too few arguments\n"); + return -1; + } + + expected_thp = atoi(argv[1]); + + rc = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0); + if (rc < 0) { + perror("err: PR_GET_THP_DISABLE"); + } + + if (rc == expected_thp) { + printf("[ OK ] get thp_disable: %d\n", rc); + ret = 0; + } + else { + printf("[ NG ] get thp_disable: %d (expected %d)\n", + rc, expected_thp); + ret = -1; + goto out; + } + + out: + return ret; +} diff --git a/test/issues/1181/large_page.patch b/test/issues/1181/large_page.patch new file mode 100644 index 00000000..efe04cc2 --- /dev/null +++ b/test/issues/1181/large_page.patch @@ -0,0 +1,109 @@ +diff --git arch/arm64/kernel/memory.c arch/arm64/kernel/memory.c +index 4cefc9f..d8fbb8c 100644 +--- arch/arm64/kernel/memory.c ++++ arch/arm64/kernel/memory.c +@@ -2356,6 +2356,16 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + ptl1_set(ptep, pte); + + error = 0; ++ ++ if (args->attr[0] & PTE_CONT && ++ __page_offset(base, PTL1_CONT_SIZE) == 0) { ++ if (base >= cpu_local_var(current)->vm->region.user_start && ++ base < cpu_local_var(current)->vm->region.stack_start) { ++ kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", ++ __func__, base, PTL1_CONT_SIZE, phys); ++ } ++ } ++ + out: + dkprintf("set_range_l1(%lx,%lx,%lx): %d %lx\n", + base, start, end, error, *ptep); +@@ -2436,6 +2446,23 @@ retry: + ptl_set(ptep, phys | args->attr[level-1], + level); + error = 0; ++ ++ if (args->attr[level-1] & PTE_CONT) { ++ if (__page_offset(base, tbl.cont_pgsize) == 0) { ++ if (base >= cpu_local_var(current)->vm->region.user_start && ++ base < cpu_local_var(current)->vm->region.stack_start) { ++ kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", ++ __func__, base, tbl.cont_pgsize, phys); ++ } ++ } ++ } else { ++ if (base >= cpu_local_var(current)->vm->region.user_start && ++ base < cpu_local_var(current)->vm->region.user_end) { ++ kprintf("%s: large page allocation, addr: %016lx, size: %d, phys: %lx\n", ++ __func__, base, tbl.pgsize, phys); ++ } ++ } ++ + dkprintf("set_range_middle(%lx,%lx,%lx,%d):" + "large page. %d %lx\n", + base, start, end, level, error, *ptep); +diff --git arch/x86_64/kernel/memory.c arch/x86_64/kernel/memory.c +index cf7cac4..ff29cff 100644 +--- arch/x86_64/kernel/memory.c ++++ arch/x86_64/kernel/memory.c +@@ -2027,6 +2027,13 @@ retry: + dkprintf("set_range_l2(%lx,%lx,%lx):" + "2MiB page. %d %lx\n", + base, start, end, error, *ptep); ++ ++ if (base >= cpu_local_var(current)->vm->region.user_start && ++ base < cpu_local_var(current)->vm->region.stack_start) { ++ kprintf("%s: large page allocation, addr: %016lx, size: %d\n", ++ __func__, base, PTL2_SIZE); ++ } ++ + // Call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, PTL2_SIZE, PTL2_SIZE)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL2_SIZE, PTL2_SIZE); +@@ -2116,6 +2123,12 @@ retry: + "1GiB page. %d %lx\n", + base, start, end, error, *ptep); + ++ if (base >= cpu_local_var(current)->vm->region.user_start && ++ base < cpu_local_var(current)->vm->region.stack_start) { ++ kprintf("%s: large page allocation, addr: %016lx, size: %d\n", ++ __func__, base, PTL3_SIZE); ++ } ++ + // Call memory_stat_rss_add() here because pgshift is resolved here + if (rusage_memory_stat_add(args->range, phys, PTL3_SIZE, PTL3_SIZE)) { + dkprintf("%lx+,%s: calling memory_stat_rss_add(),base=%lx,phys=%lx,size=%ld,pgsize=%ld\n", phys, __FUNCTION__, base, phys, PTL3_SIZE, PTL3_SIZE); +diff --git kernel/include/rusage_private.h kernel/include/rusage_private.h +index 7da7728..14253b2 100644 +--- kernel/include/rusage_private.h ++++ kernel/include/rusage_private.h +@@ -12,7 +12,7 @@ + #include + #include + +-#ifdef ENABLE_RUSAGE ++#if 0 /* def ENABLE_RUSAGE */ + + #define RUSAGE_OOM_MARGIN (2 * 1024 * 1024) // 2MB + +diff --git kernel/process.c kernel/process.c +index bc0609b..cfa7d5e 100644 +--- kernel/process.c ++++ kernel/process.c +@@ -2009,6 +2009,15 @@ retry: + } + + dkprintf("%s: attr=%x\n", __FUNCTION__, attr); ++ ++ if (pgsize > PAGE_SIZE) { ++ if ((unsigned long)pgaddr >= cpu_local_var(current)->vm->region.user_start && ++ (unsigned long)pgaddr < cpu_local_var(current)->vm->region.stack_start) { ++ kprintf("large page allocation, addr: %016lx, size: %d, phys: %lx\n", ++ pgaddr, pgsize, phys); ++ } ++ } ++ + error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep, + pgsize, phys, attr); + if (error) { diff --git a/test/issues/1181/mmap_hugetlbfs.c b/test/issues/1181/mmap_hugetlbfs.c new file mode 100644 index 00000000..a7d9ccd4 --- /dev/null +++ b/test/issues/1181/mmap_hugetlbfs.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PAGE_SHIFT_2M 21 +#define PAGE_SIZE_2M (1UL << PAGE_SHIFT_2M) +char fn_2M[] = "/mnt/hugetlbfs-2M/tmp"; + +#define PAGE_SHIFT_1G 30 +#define PAGE_SIZE_1G (1UL << PAGE_SHIFT_1G) +char fn_1G[] = "/mnt/hugetlbfs-1G/tmp"; + +int trial_num; + +int mmap_hugetlbfs(char *fn, size_t page_size) +{ + int fd; + char *addr_mmap; + + fd = open(fn, O_CREAT | O_RDWR, 0755); + if (fd == -1) { + printf("open failed, fn:%s\n"); + goto fn_fail; + } + + addr_mmap = mmap(0, page_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, 0); + if (addr_mmap == (void *)-1) { + printf("mmap failed\n"); + goto fn_fail; + } + addr_mmap[0] = 'z'; + + printf("large page request, addr: %016lx, size: %ld\n", + (unsigned long)addr_mmap, page_size); + + munmap(addr_mmap, page_size); + close(fd); + unlink(fn); + + return 0; + fn_fail: + return 1; +} + +int main(int argc, char **argv) +{ + int ret; + + ret = mmap_hugetlbfs(fn_2M, PAGE_SIZE_2M); + if (ret != 0) { + printf("mmap_hugetlbfs failed, fn: %s", fn_2M); + goto fn_fail; + } + + ret = mmap_hugetlbfs(fn_1G, PAGE_SIZE_1G); + if (ret != 0) { + printf("mmap_hugetlbfs failed, fn: %s", fn_1G); + goto fn_fail; + } + + return 0; + fn_fail: + return 1; +} diff --git a/test/issues/1181/mmap_large.c b/test/issues/1181/mmap_large.c new file mode 100644 index 00000000..4ff245e8 --- /dev/null +++ b/test/issues/1181/mmap_large.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include + +#define MAP_SIZE (16 << 20) + +int main(int argc, char **argv) +{ + int ret = 0; + void *mem = NULL; + + mem = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (!mem) { + printf("mmap failed\n"); + ret = -1; + goto out; + } + + printf("mmap to %p, size: %ldM\n", mem, MAP_SIZE / 1024 / 1024); + + memset(mem, 0, MAP_SIZE); + + out: + if (mem) { + munmap(mem, MAP_SIZE); + } + return 0; +} diff --git a/test/issues/1181/result.log b/test/issues/1181/result.log new file mode 100644 index 00000000..d182faee --- /dev/null +++ b/test/issues/1181/result.log @@ -0,0 +1,67 @@ +mcstop+release.sh ... done +mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 ... done +*** CT001 start ******************************* +** case: THP_DISABLED +[ OK ] get thp_disable: 1 +** case: THP_ENABLED +[ OK ] get thp_disable: 0 +*** CT001: PASSED (ok:2, ng:0) + +*** CT002 start ******************************* +** case: THP_DISABLED +set thp_disable: 1 +exec: /home/satoken/ppos/bin/mcexec ./mmap_large +mmap to 0x2aaaab00d000, size: 16M +[ OK ] THP is NOT Working +** case: THP_ENABLED +set thp_disable: 0 +exec: /home/satoken/ppos/bin/mcexec ./mmap_large +mmap to 0x2aaaab200000, size: 16M +pgsize_allocated: 2097152 +[ OK ] THP is Working well +*** CT002: PASSED (ok:2, ng:0) + +*** CT003 start ******************************* +** case: THP_DISABLED +set thp_disable: 1 +exec: /home/satoken/ppos/bin/mcexec ./shm_large +shmat to 0x2aaaab00d000, size: 16M +[ OK ] THP is NOT Working +** case: THP_ENABLED +set thp_disable: 0 +exec: /home/satoken/ppos/bin/mcexec ./shm_large +shmat to 0x2aaaab200000, size: 16M +pgsize_allocated: 2097152 +[ OK ] THP is Working well +*** CT003: PASSED (ok:2, ng:0) + +*** CT004 start ******************************* +** case: THP_DISABLED +set thp_disable: 1 +exec: /home/satoken/ppos/bin/mcexec ./mmap_hugetlbfs +large page request, addr: 00002aaaab200000, size: 2097152 +large page request, addr: 00002aaac0000000, size: 1073741824 +pgsize_allocated: 2097152 +[ OK ] hugetlbfs-2M is Working well +pgsize_allocated: 1073741824 +[ OK ] hugetlbfs-1G is Working well +** case: THP_ENABLED +set thp_disable: 0 +exec: /home/satoken/ppos/bin/mcexec ./mmap_hugetlbfs +large page request, addr: 00002aaaab200000, size: 2097152 +large page request, addr: 00002aaac0000000, size: 1073741824 +pgsize_allocated: 2097152 +[ OK ] hugetlbfs-2M is Working well +pgsize_allocated: 1073741824 +[ OK ] hugetlbfs-1G is Working well +*** CT004: PASSED (ok:4, ng:0) + +*** CT005 start ******************************* +prctl01 1 TPASS : Test Passed +prctl01 2 TPASS : Test Passed +*** CT005: PASSED (ok:2, ng:0) + +*** CT006 start ******************************* +prctl02 1 TPASS : Test Passed +prctl02 2 TPASS : Test Passed +*** CT006: PASSED (ok:2, ng:0) diff --git a/test/issues/1181/set_thp_and_exec.c b/test/issues/1181/set_thp_and_exec.c new file mode 100644 index 00000000..1c1049a0 --- /dev/null +++ b/test/issues/1181/set_thp_and_exec.c @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include + +#define EXARG_MAX 64 +#define CMD_MAX_LEN 1024 + +int main(int argc, char *argv[]) +{ + int rc = 0, i; + int thp_disable = 0; + char *exargv[EXARG_MAX] = {}; + char *exenvp[1] = {NULL}; + char execcmd[CMD_MAX_LEN] = {}; + + if (argc < 3) { + printf("err: too few arguments\n"); + return -1; + } + + if (argc > EXARG_MAX + 1) { + printf("err: too many arguments\n"); + return -1; + } + + thp_disable = atoi(argv[1]); + + rc = prctl(PR_SET_THP_DISABLE, thp_disable, 0, 0, 0); + if (rc < 0) { + perror("err: PR_SET_THP_DISABLE"); + } + printf("set thp_disable: %d\n", thp_disable); + + for (i = 1; i < argc; i++) { + exargv[i - 2] = argv[i]; + } + + for (i = 0; i < EXARG_MAX; i++) { + if (!exargv[i]) { + break; + } + if (i != 0) { + strncat(execcmd, " ", CMD_MAX_LEN - 2); + } + strncat(execcmd, exargv[i], CMD_MAX_LEN - strlen(execcmd) - 1); + } + + printf("exec: %s\n", execcmd); + execve(exargv[0], exargv, exenvp); + + /* can't reach here */ + printf("err: execve failed\n"); + return -1; +} diff --git a/test/issues/1181/shm_large.c b/test/issues/1181/shm_large.c new file mode 100644 index 00000000..a20f8c29 --- /dev/null +++ b/test/issues/1181/shm_large.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include + +#define SHM_SIZE (16 << 20) + +int main(int argc, char **argv) +{ + void *shm = NULL; + key_t key = ftok(argv[0], 0); + int shmid; + struct shmid_ds buf; + + shmid = shmget(key, SHM_SIZE, IPC_CREAT | 0660); + if (shmid < 0) { + perror("shmget: "); + return -1; + } + + shm = shmat(shmid, NULL, 0); + if (!shm) { + perror("shmat: "); + return -1; + } + + printf("shmat to %p, size: %ldM\n", shm, SHM_SIZE / 1024 / 1024); + memset(shm, 0, SHM_SIZE); + + shmdt(shm); + + shmctl(shmid, IPC_RMID, &buf); + + return 0; +} diff --git a/test/issues/1181/x86_64_config b/test/issues/1181/x86_64_config new file mode 100644 index 00000000..ff5c59f3 --- /dev/null +++ b/test/issues/1181/x86_64_config @@ -0,0 +1,6 @@ +SIZE_2M="2097152" +SIZE_1G="1073741824" + +MMAP_LARGE=${SIZE_2M} +SHM_LARGE=${SIZE_2M} +SHM_HUGE=${SIZE_1G}