From 93dafc5f79bb453bb68553dcf0ac06c47141c9aa Mon Sep 17 00:00:00 2001 From: Ken Sato Date: Fri, 11 Dec 2020 13:25:57 +0900 Subject: [PATCH] migrate: Don't migrate on in-kernel interrupt Change-Id: I9c07e0d633687ce232ec3cd0c80439ca2e856293 Refs: #1555 --- arch/arm64/kernel/irq-gic-v3.c | 11 +- arch/x86_64/kernel/cpu.c | 12 +- test/issues/1555/C1555.c | 271 ++++++++++++++++++++++++++++ test/issues/1555/C1555.sh | 103 +++++++++++ test/issues/1555/Makefile | 18 ++ test/issues/1555/README | 38 ++++ test/issues/1555/aarch64_result.log | 7 + test/issues/1555/ltp_list | 21 +++ test/issues/1555/test_1555.patch | 74 ++++++++ test/issues/1555/x86_64_result.log | 8 + 10 files changed, 555 insertions(+), 8 deletions(-) create mode 100644 test/issues/1555/C1555.c create mode 100644 test/issues/1555/C1555.sh create mode 100644 test/issues/1555/Makefile create mode 100644 test/issues/1555/README create mode 100644 test/issues/1555/aarch64_result.log create mode 100644 test/issues/1555/ltp_list create mode 100644 test/issues/1555/test_1555.patch create mode 100644 test/issues/1555/x86_64_result.log diff --git a/arch/arm64/kernel/irq-gic-v3.c b/arch/arm64/kernel/irq-gic-v3.c index 113ec4bc..14333695 100644 --- a/arch/arm64/kernel/irq-gic-v3.c +++ b/arch/arm64/kernel/irq-gic-v3.c @@ -344,10 +344,13 @@ void handle_interrupt_gicv3(struct pt_regs *regs) //irqflags = ihk_mc_spinlock_lock(&v->runq_lock); /* For migration by IPI or by timesharing */ - if (v->flags & - (CPU_FLAG_NEED_MIGRATE | CPU_FLAG_NEED_RESCHED)) { - v->flags &= ~CPU_FLAG_NEED_RESCHED; - do_check = 1; + if (v->flags & CPU_FLAG_NEED_RESCHED) { + if (v->flags & CPU_FLAG_NEED_MIGRATE && !from_user) { + // Don't migrate on K2K schedule + } else { + v->flags &= ~CPU_FLAG_NEED_RESCHED; + do_check = 1; + } } //ihk_mc_spinlock_unlock(&v->runq_lock, irqflags); diff --git a/arch/x86_64/kernel/cpu.c b/arch/x86_64/kernel/cpu.c index 8d9ea707..7bf6fbff 100644 --- a/arch/x86_64/kernel/cpu.c +++ b/arch/x86_64/kernel/cpu.c @@ -932,11 +932,12 @@ void handle_interrupt(int vector, struct x86_user_context *regs) { struct ihk_mc_interrupt_handler *h; struct cpu_local_var *v = get_this_cpu_local_var(); + int from_user = interrupt_from_user(regs); lapic_ack(); ++v->in_interrupt; - set_cputime(interrupt_from_user(regs) ? + set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN); dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n", @@ -1054,15 +1055,18 @@ void handle_interrupt(int vector, struct x86_user_context *regs) } interrupt_exit(regs); - set_cputime(interrupt_from_user(regs) ? + set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT); --v->in_interrupt; /* for migration by IPI */ if (v->flags & CPU_FLAG_NEED_MIGRATE) { - schedule(); - check_signal(0, regs, 0); + // Don't migrate on K2K schedule + if (from_user) { + schedule(); + check_signal(0, regs, 0); + } } } diff --git a/test/issues/1555/C1555.c b/test/issues/1555/C1555.c new file mode 100644 index 00000000..15c3c965 --- /dev/null +++ b/test/issues/1555/C1555.c @@ -0,0 +1,271 @@ +/* 1400_arm64.c COPYRIGHT FUJITSU LIMITED 2020 */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#define POINT_ORDER_NUM 2 +#define SLEEP_SEC 2 + +#ifdef __x86_64__ +#define PAUSE_INST "pause" +#elif defined(__aarch64__) +#define PAUSE_INST "yield" +#else +#error "unexpected archtecture." +#endif + +#define cpu_pause() \ + ({ \ + __asm__ __volatile__(PAUSE_INST ::: "memory"); \ + }) + +static int *sync1 = MAP_FAILED; +static int *parent_core = MAP_FAILED; +static int *point_order = MAP_FAILED; +static int *od = MAP_FAILED; + +int main(int argc, char *argv[]) +{ + pid_t pid = -1; + pid_t ret_pid = -1; + int status = 0; + int i = 0, rc; + int *resp; + int result = -1; + int ret = -1; + int failed = 0; + + /* get shared memory */ + sync1 = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + parent_core = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + point_order = (int *)mmap(NULL, sizeof(int) * POINT_ORDER_NUM, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + od = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + /* mmap check */ + if (sync1 == MAP_FAILED || + parent_core == MAP_FAILED || + point_order == MAP_FAILED || + od == MAP_FAILED) { + printf("mmap() Failed.\n"); + goto out; + } + + for (i = 0; i < POINT_ORDER_NUM; i++) { + point_order[i] = 0; + } + *od = 0; + *sync1 = 0; + + /* create child process */ + pid = fork(); + + switch (pid) { + case -1: + /* error */ + printf("fork() Failed.\n"); + goto out; + + case 0: { + /* child */ + /* before migrate, get cpunum */ + int old_mycore = sched_getcpu(); + + printf("[child:%d] running core %d\n", getpid(), old_mycore); + + /* sync parent */ + *sync1 = 1; + +#ifdef MIGRATE_ON_OFFLOAD + int sec = SLEEP_SEC; + resp = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + /* debug syscall */ + rc = syscall(888, sec, resp, 0, 0, 0, 0); + if (rc != sec || *resp != sec) { + printf("[child:%d] debug_syscall failed\n", getpid()); + _exit(-1); + } +#endif + + /* wait until migrated */ + while (sched_getcpu() == old_mycore) { + cpu_pause(); + } + point_order[0] = ++(*od); + + _exit(0); + break; + } + + default: { + /* parent */ + cpu_set_t cpuset; + + /* sync child */ + while (*sync1 != 1) { + cpu_pause(); + } + + /* parent corenum get */ + *parent_core = sched_getcpu(); + + /* child process to migrate parent core */ + printf("[parent:%d] running core %d\n", getpid(), *parent_core); + printf("[parent] child process(pid=%d) " + "migrate/bind to core %d\n", + pid, *parent_core); + + CPU_ZERO(&cpuset); + CPU_SET(*parent_core, &cpuset); + + /* sched_setaffinity interval */ + usleep(10000); + + result = sched_setaffinity(pid, sizeof(cpuset), &cpuset); + if (result == -1) { + printf("errno = %d\n", errno); + printf("child migrate/bind " + "sched_setaffinity failed.\n"); + } + + /* parent core bind */ + printf("[parent] parent process bind to core %d\n", + *parent_core); + result = sched_setaffinity(0, sizeof(cpuset), &cpuset); + if (result == -1) { + printf("errno = %d\n", errno); + printf("parent bind sched_setaffinity failed.\n"); + } + +#ifdef MIGRATE_ON_OFFLOAD + /* wait for child woken up */ + usleep(SLEEP_SEC * 1000000 + 1000); +#endif + + /* sync child, switch to child process */ + printf("[parent] send sched_yield.\n"); + + result = 0; + + result = sched_yield(); + + point_order[1] = ++(*od); + + break; + } + } + + if (result == -1) { + printf("sched_yield failed.\n"); + } + + /* child process status check. */ + ret_pid = wait(&status); + if (ret_pid == pid) { + if (WIFEXITED(status)) { + if (WEXITSTATUS(status)) { + printf("TP failed, child migrate fail.\n"); + } + else { + goto wait_ok; + } + } + else { + printf("TP failed, child is not exited.\n"); + } + + if (WIFSIGNALED(status)) { + printf("TP failed, child signaled by %d.\n", + WTERMSIG(status)); + + if (WCOREDUMP(status)) { + printf("coredumped.\n"); + } + } + else { + printf("TP failed, child is not signaled.\n"); + } + + + if (WIFSTOPPED(status)) { + printf("TP failed, child is stopped by signal %d.\n", + WSTOPSIG(status)); + } + else { + printf("TP failed, child is not stopped.\n"); + } + + if (WIFCONTINUED(status)) { + printf("TP failed, child is continued.\n"); + } + else { + printf("TP failed, child is not continued.\n"); + } + + for (i = 0; i < POINT_ORDER_NUM; i++) { + printf("point_order[%d] = %d\n", i, point_order[i]); + } + goto out; + } + else { + printf("TP failed, child process wait() fail.\n"); + + for (i = 0; i < POINT_ORDER_NUM; i++) { + printf("point_order[%d] = %d\n", i, point_order[i]); + } + goto out; + } + +wait_ok: + for (i = 0; i < POINT_ORDER_NUM; i++) { + printf("point_order[%d] = %d\n", i, point_order[i]); + + if (point_order[i] == 0) { + failed = 1; + } + } + + if (failed != 0) { + printf("TP failed, parent or child process is not running.\n"); + goto out; + } + + if (result != -1) { + if (point_order[0] < point_order[1]) { + ret = 0; + } + else { + printf("TP failed, out of order.\n"); + } + } + +out: + /* unmap semaphore memory */ + if (od != MAP_FAILED) { + munmap(od, sizeof(int)); + } + + if (point_order != MAP_FAILED) { + munmap(point_order, sizeof(int) * POINT_ORDER_NUM); + } + + if (parent_core != MAP_FAILED) { + munmap(parent_core, sizeof(int)); + } + + if (sync1 != MAP_FAILED) { + munmap(sync1, sizeof(int)); + } + return ret; +} diff --git a/test/issues/1555/C1555.sh b/test/issues/1555/C1555.sh new file mode 100644 index 00000000..4eebb6a7 --- /dev/null +++ b/test/issues/1555/C1555.sh @@ -0,0 +1,103 @@ +#/bin/sh + +USELTP=1 +USEOSTEST=0 + +MCREBOOT=0 +. ../../common.sh +BOOTPARAM="${BOOTPARAM} -e anon_on_demand" +mcreboot + +PWD=`pwd` +STOPFILE="./1555_stop" +LOGFILE="${PWD}/1555_log" +LTPLIST="${PWD}/ltp_list" +TESTTIME=43200 # 6 hours + +issue="1555" +echo "start-time: `date`" +stime=`date "+%s"` +failed=0 +loops=0 + +while : +do + sudo ${MCEXEC} ./C1555T01 > ${LOGFILE} 2>&1 + if [ $? -ne 0 ]; then + echo "C1555T01 failed." + failed=1 + break + fi + + ${IHKOSCTL} 0 clear_kmsg + sudo ${MCEXEC} ./C1555T02 > ${LOGFILE} 2>&1 + if [ $? -ne 0 ]; then + echo "C1555T02 failed." + failed=1 + break + fi + + dbg_cnt=`${IHKOSCTL} 0 kmsg | grep "ISSUE_1555" | wc -l` + if [ ${dbg_cnt} -eq 0 ]; then + echo "C1555T02 failed. Did not migrate in offload." + failed=1 + break + fi + + pushd ${LTPBIN} > /dev/null + + while read line + do + ${MCEXEC} ./${line} > ${LOGFILE} 2>&1 + if [ $? -ne 0 ]; then + echo "${line} failed." + failed=1 + break + fi + + ng=`grep FAIL ${LOGFILE} | wc -l` + if [ $ng -ne 0 ]; then + echo "${line} failed." + cat ${LOGFILE} + failed=1 + break + fi + done < ${LTPLIST} + popd > /dev/null + + let loops++ + + if [ -e ${STOPFILE} ]; then + rm -f ${STOPFILE} + break + fi + + etime=`date "+%s"` + run_time=$((${etime} - ${stime})) + if [ ${TESTTIME} -le ${run_time} ]; then + break; + fi + + if [ ${failed} -eq 1 ]; then + break + fi +done + +echo "end-time: `date`" +etime=`date "+%s"` +run_time=$((${etime} - ${stime})) + +if [ ${TESTTIME} -le ${run_time} ]; then + if [ ${failed} -eq 0 ]; then + echo "Issue#${issue} test OK." + echo "Test cases run ${loops} times." + rm -f ${LOGFILE} + else + echo "Issue#${issue} test NG." + echo "Test cases run ${loops} times." + fi +else + echo "Issue#${issue} test NG." + echo "Test cases run ${loops} times." +fi + diff --git a/test/issues/1555/Makefile b/test/issues/1555/Makefile new file mode 100644 index 00000000..07adcbec --- /dev/null +++ b/test/issues/1555/Makefile @@ -0,0 +1,18 @@ +CC=gcc +CFLAGS=-g +LDFLAGS= + +TARGET=C1555T01 C1555T02 + +all: $(TARGET) + +C1555T01: C1555.c + $(CC) -o $@ $^ + +C1555T02: C1555.c + $(CC) -DMIGRATE_ON_OFFLOAD -o $@ $^ + +test: all + sh ./C1555.sh +clean: + rm -f $(TARGET) *.o *.txt diff --git a/test/issues/1555/README b/test/issues/1555/README new file mode 100644 index 00000000..73fe24bb --- /dev/null +++ b/test/issues/1555/README @@ -0,0 +1,38 @@ +【Issue#1555 動作確認】 +□ テスト内容 +Issueにて報告された症状はmigrate指示のタイミングによって発生の有無が +変化するため、下記のテストを6時間連続実行して、症状が発生しないことを確認する。 + +1. 下記のテストプログラムを実行し、症状が発生しないことを確認する +C1555T01: (Issue#1400 のテストプログラム 1400_arm64.c を流用) + 親プロセスが子プロセスと自身を同一CPUにバインドしてsched_yield()した場合、 + 子プロセス、親プロセスの順序で実行されることを確認する。 + +C1555T02: + C1555T01 のテストケースにおいて、子プロセスがmigrate指示を受ける際に + システムコールのオフロードの中であり、その最中にRemote page faultが発生した場合にも + 子プロセス、親プロセスの順序で実行されることを確認する。 + +2. 以下のLTPを用いて既存機能に影響が無いことを確認 + - sched_yield01 + - signal01,02,03,04,05 + - rt_sigaction01,02,03 + - rt_sigprocmask01,02 + - rt_sigsuspend01 + - rt_tgsigqueueinfo01 + - futex_wait01,02,03,04 + - futex_wake01 + - futex_wait_bitset01 + - execveat02 + +□ 実行手順 +$ make test + +McKernelのインストール先や、OSTEST, LTPの配置場所は、 +$HOME/.mck_test_config を参照している +.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを +$HOMEにコピーし、適宜編集する + +□ 実行結果 +x86_64_result.log aarch64_result.log 参照。 +すべての項目をPASSしていることを確認。 diff --git a/test/issues/1555/aarch64_result.log b/test/issues/1555/aarch64_result.log new file mode 100644 index 00000000..b70fe793 --- /dev/null +++ b/test/issues/1555/aarch64_result.log @@ -0,0 +1,7 @@ +sh ./C1555.sh +mcstop+release.sh ... done +mcreboot.sh -c 37-43,49-55 -m 2G@2,2G@3 -r 37-43:36+49-55:48 -O -e anon_on_demand ... done +start-time: 2021年 1月 27日 水曜日 14:41:56 JST +end-time: 2021年 1月 28日 木曜日 02:42:01 JST +Issue#1555 test OK. +Test cases run 4426 times. diff --git a/test/issues/1555/ltp_list b/test/issues/1555/ltp_list new file mode 100644 index 00000000..e50f44c6 --- /dev/null +++ b/test/issues/1555/ltp_list @@ -0,0 +1,21 @@ +sched_yield01 +signal01 +signal02 +signal03 +signal04 +signal05 +rt_sigaction01 +rt_sigaction02 +rt_sigaction03 +rt_sigprocmask01 +rt_sigprocmask02 +rt_sigsuspend01 +rt_tgsigqueueinfo01 +futex_wait01 +futex_wait02 +futex_wait03 +futex_wait04 +futex_wake01 +futex_wait_bitset01 +execveat02 +sigsuspend01 diff --git a/test/issues/1555/test_1555.patch b/test/issues/1555/test_1555.patch new file mode 100644 index 00000000..ffabad88 --- /dev/null +++ b/test/issues/1555/test_1555.patch @@ -0,0 +1,74 @@ +diff --git a/arch/arm64/kernel/include/syscall_list.h b/arch/arm64/kernel/include/syscall_list.h +index 28e99eb..1d9f052 100644 +--- a/arch/arm64/kernel/include/syscall_list.h ++++ b/arch/arm64/kernel/include/syscall_list.h +@@ -137,6 +137,7 @@ SYSCALL_HANDLED(802, linux_mlock) + SYSCALL_HANDLED(803, suspend_threads) + SYSCALL_HANDLED(804, resume_threads) + SYSCALL_HANDLED(811, linux_spawn) ++SYSCALL_DELEGATED(888, dbg_sleep) + + SYSCALL_DELEGATED(1024, open) + SYSCALL_DELEGATED(1035, readlink) +diff --git a/arch/x86_64/kernel/include/syscall_list.h b/arch/x86_64/kernel/include/syscall_list.h +index 17a1d65..8010d3e 100644 +--- a/arch/x86_64/kernel/include/syscall_list.h ++++ b/arch/x86_64/kernel/include/syscall_list.h +@@ -181,6 +181,7 @@ SYSCALL_HANDLED(802, linux_mlock) + SYSCALL_HANDLED(803, suspend_threads) + SYSCALL_HANDLED(804, resume_threads) + SYSCALL_HANDLED(811, linux_spawn) ++SYSCALL_DELEGATED(888, dbg_sleep) + + /* Do not edit the lines including this comment and + * EOF just after it because those are used as a +diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c +index c48e245..118de75 100644 +--- a/executer/user/mcexec.c ++++ b/executer/user/mcexec.c +@@ -5039,6 +5039,15 @@ return_linux_spawn: + break; + #endif + ++ case 888: { // dbg_sleep ++ int sec = (int)w.sr.args[0]; ++ int *resp = (int *)w.sr.args[1]; ++ sleep(sec); ++ *resp = sec; ++ do_syscall_return(fd, cpu, sec, 0, 0, 0, 0); ++ break; ++ } ++ + default: + ret = do_generic_syscall(&w); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); +diff --git a/kernel/syscall.c b/kernel/syscall.c +index 8a919e1..0b0fbc3 100644 +--- a/kernel/syscall.c ++++ b/kernel/syscall.c +@@ -181,6 +181,7 @@ long do_syscall(struct syscall_request *req, int cpu) + struct thread *thread = cpu_local_var(current); + struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor); + int mstatus = 0; ++ int orig_cpu; + + #ifdef PROFILE_ENABLE + /* We cannot use thread->profile_start_ts here because the +@@ -231,6 +232,7 @@ long do_syscall(struct syscall_request *req, int cpu) + #ifdef ENABLE_TOFU + res.pde_data = NULL; + #endif ++ orig_cpu = ihk_mc_get_processor_id(); + send_syscall(req, cpu, &res); + + if (req->rtid == -1) { +@@ -386,6 +388,9 @@ long do_syscall(struct syscall_request *req, int cpu) + preempt_enable(); + } + ++ if (orig_cpu != ihk_mc_get_processor_id()) { ++ kprintf("ISSUE_1555 migrated during syscall_offload\n"); ++ } + dkprintf("%s: syscall num: %d got host reply: %d \n", + __FUNCTION__, req->number, res.ret); + diff --git a/test/issues/1555/x86_64_result.log b/test/issues/1555/x86_64_result.log new file mode 100644 index 00000000..9c6a7054 --- /dev/null +++ b/test/issues/1555/x86_64_result.log @@ -0,0 +1,8 @@ +sh ./C1555.sh +mcstop+release.sh ... done +mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 -O -e anon_on_demand ... done +start-time: Wed Jan 27 14:48:14 JST 2021 +execveat02 failed. +end-time: Thu Jan 28 02:48:18 JST 2021 +Issue#1555 test NG. +Test cases run 4855 times.