migrate: Don't migrate on in-kernel interrupt

Change-Id: I9c07e0d633687ce232ec3cd0c80439ca2e856293
Refs: #1555
This commit is contained in:
Ken Sato
2020-12-11 13:25:57 +09:00
committed by Masamichi Takagi
parent 583319125a
commit 93dafc5f79
10 changed files with 555 additions and 8 deletions

View File

@ -344,10 +344,13 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
//irqflags = ihk_mc_spinlock_lock(&v->runq_lock);
/* For migration by IPI or by timesharing */
if (v->flags &
(CPU_FLAG_NEED_MIGRATE | CPU_FLAG_NEED_RESCHED)) {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
do_check = 1;
if (v->flags & CPU_FLAG_NEED_RESCHED) {
if (v->flags & CPU_FLAG_NEED_MIGRATE && !from_user) {
// Don't migrate on K2K schedule
} else {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
do_check = 1;
}
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);

View File

@ -932,11 +932,12 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
{
struct ihk_mc_interrupt_handler *h;
struct cpu_local_var *v = get_this_cpu_local_var();
int from_user = interrupt_from_user(regs);
lapic_ack();
++v->in_interrupt;
set_cputime(interrupt_from_user(regs) ?
set_cputime(from_user ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
@ -1054,15 +1055,18 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
}
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs) ?
set_cputime(from_user ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
--v->in_interrupt;
/* for migration by IPI */
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
// Don't migrate on K2K schedule
if (from_user) {
schedule();
check_signal(0, regs, 0);
}
}
}

271
test/issues/1555/C1555.c Normal file
View File

@ -0,0 +1,271 @@
/* 1400_arm64.c COPYRIGHT FUJITSU LIMITED 2020 */
#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/syscall.h>
#define POINT_ORDER_NUM 2
#define SLEEP_SEC 2
#ifdef __x86_64__
#define PAUSE_INST "pause"
#elif defined(__aarch64__)
#define PAUSE_INST "yield"
#else
#error "unexpected archtecture."
#endif
#define cpu_pause() \
({ \
__asm__ __volatile__(PAUSE_INST ::: "memory"); \
})
static int *sync1 = MAP_FAILED;
static int *parent_core = MAP_FAILED;
static int *point_order = MAP_FAILED;
static int *od = MAP_FAILED;
int main(int argc, char *argv[])
{
pid_t pid = -1;
pid_t ret_pid = -1;
int status = 0;
int i = 0, rc;
int *resp;
int result = -1;
int ret = -1;
int failed = 0;
/* get shared memory */
sync1 = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
parent_core = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
point_order = (int *)mmap(NULL, sizeof(int) * POINT_ORDER_NUM,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
od = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
/* mmap check */
if (sync1 == MAP_FAILED ||
parent_core == MAP_FAILED ||
point_order == MAP_FAILED ||
od == MAP_FAILED) {
printf("mmap() Failed.\n");
goto out;
}
for (i = 0; i < POINT_ORDER_NUM; i++) {
point_order[i] = 0;
}
*od = 0;
*sync1 = 0;
/* create child process */
pid = fork();
switch (pid) {
case -1:
/* error */
printf("fork() Failed.\n");
goto out;
case 0: {
/* child */
/* before migrate, get cpunum */
int old_mycore = sched_getcpu();
printf("[child:%d] running core %d\n", getpid(), old_mycore);
/* sync parent */
*sync1 = 1;
#ifdef MIGRATE_ON_OFFLOAD
int sec = SLEEP_SEC;
resp = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
/* debug syscall */
rc = syscall(888, sec, resp, 0, 0, 0, 0);
if (rc != sec || *resp != sec) {
printf("[child:%d] debug_syscall failed\n", getpid());
_exit(-1);
}
#endif
/* wait until migrated */
while (sched_getcpu() == old_mycore) {
cpu_pause();
}
point_order[0] = ++(*od);
_exit(0);
break;
}
default: {
/* parent */
cpu_set_t cpuset;
/* sync child */
while (*sync1 != 1) {
cpu_pause();
}
/* parent corenum get */
*parent_core = sched_getcpu();
/* child process to migrate parent core */
printf("[parent:%d] running core %d\n", getpid(), *parent_core);
printf("[parent] child process(pid=%d) "
"migrate/bind to core %d\n",
pid, *parent_core);
CPU_ZERO(&cpuset);
CPU_SET(*parent_core, &cpuset);
/* sched_setaffinity interval */
usleep(10000);
result = sched_setaffinity(pid, sizeof(cpuset), &cpuset);
if (result == -1) {
printf("errno = %d\n", errno);
printf("child migrate/bind "
"sched_setaffinity failed.\n");
}
/* parent core bind */
printf("[parent] parent process bind to core %d\n",
*parent_core);
result = sched_setaffinity(0, sizeof(cpuset), &cpuset);
if (result == -1) {
printf("errno = %d\n", errno);
printf("parent bind sched_setaffinity failed.\n");
}
#ifdef MIGRATE_ON_OFFLOAD
/* wait for child woken up */
usleep(SLEEP_SEC * 1000000 + 1000);
#endif
/* sync child, switch to child process */
printf("[parent] send sched_yield.\n");
result = 0;
result = sched_yield();
point_order[1] = ++(*od);
break;
}
}
if (result == -1) {
printf("sched_yield failed.\n");
}
/* child process status check. */
ret_pid = wait(&status);
if (ret_pid == pid) {
if (WIFEXITED(status)) {
if (WEXITSTATUS(status)) {
printf("TP failed, child migrate fail.\n");
}
else {
goto wait_ok;
}
}
else {
printf("TP failed, child is not exited.\n");
}
if (WIFSIGNALED(status)) {
printf("TP failed, child signaled by %d.\n",
WTERMSIG(status));
if (WCOREDUMP(status)) {
printf("coredumped.\n");
}
}
else {
printf("TP failed, child is not signaled.\n");
}
if (WIFSTOPPED(status)) {
printf("TP failed, child is stopped by signal %d.\n",
WSTOPSIG(status));
}
else {
printf("TP failed, child is not stopped.\n");
}
if (WIFCONTINUED(status)) {
printf("TP failed, child is continued.\n");
}
else {
printf("TP failed, child is not continued.\n");
}
for (i = 0; i < POINT_ORDER_NUM; i++) {
printf("point_order[%d] = %d\n", i, point_order[i]);
}
goto out;
}
else {
printf("TP failed, child process wait() fail.\n");
for (i = 0; i < POINT_ORDER_NUM; i++) {
printf("point_order[%d] = %d\n", i, point_order[i]);
}
goto out;
}
wait_ok:
for (i = 0; i < POINT_ORDER_NUM; i++) {
printf("point_order[%d] = %d\n", i, point_order[i]);
if (point_order[i] == 0) {
failed = 1;
}
}
if (failed != 0) {
printf("TP failed, parent or child process is not running.\n");
goto out;
}
if (result != -1) {
if (point_order[0] < point_order[1]) {
ret = 0;
}
else {
printf("TP failed, out of order.\n");
}
}
out:
/* unmap semaphore memory */
if (od != MAP_FAILED) {
munmap(od, sizeof(int));
}
if (point_order != MAP_FAILED) {
munmap(point_order, sizeof(int) * POINT_ORDER_NUM);
}
if (parent_core != MAP_FAILED) {
munmap(parent_core, sizeof(int));
}
if (sync1 != MAP_FAILED) {
munmap(sync1, sizeof(int));
}
return ret;
}

103
test/issues/1555/C1555.sh Normal file
View File

@ -0,0 +1,103 @@
#/bin/sh
USELTP=1
USEOSTEST=0
MCREBOOT=0
. ../../common.sh
BOOTPARAM="${BOOTPARAM} -e anon_on_demand"
mcreboot
PWD=`pwd`
STOPFILE="./1555_stop"
LOGFILE="${PWD}/1555_log"
LTPLIST="${PWD}/ltp_list"
TESTTIME=43200 # 6 hours
issue="1555"
echo "start-time: `date`"
stime=`date "+%s"`
failed=0
loops=0
while :
do
sudo ${MCEXEC} ./C1555T01 > ${LOGFILE} 2>&1
if [ $? -ne 0 ]; then
echo "C1555T01 failed."
failed=1
break
fi
${IHKOSCTL} 0 clear_kmsg
sudo ${MCEXEC} ./C1555T02 > ${LOGFILE} 2>&1
if [ $? -ne 0 ]; then
echo "C1555T02 failed."
failed=1
break
fi
dbg_cnt=`${IHKOSCTL} 0 kmsg | grep "ISSUE_1555" | wc -l`
if [ ${dbg_cnt} -eq 0 ]; then
echo "C1555T02 failed. Did not migrate in offload."
failed=1
break
fi
pushd ${LTPBIN} > /dev/null
while read line
do
${MCEXEC} ./${line} > ${LOGFILE} 2>&1
if [ $? -ne 0 ]; then
echo "${line} failed."
failed=1
break
fi
ng=`grep FAIL ${LOGFILE} | wc -l`
if [ $ng -ne 0 ]; then
echo "${line} failed."
cat ${LOGFILE}
failed=1
break
fi
done < ${LTPLIST}
popd > /dev/null
let loops++
if [ -e ${STOPFILE} ]; then
rm -f ${STOPFILE}
break
fi
etime=`date "+%s"`
run_time=$((${etime} - ${stime}))
if [ ${TESTTIME} -le ${run_time} ]; then
break;
fi
if [ ${failed} -eq 1 ]; then
break
fi
done
echo "end-time: `date`"
etime=`date "+%s"`
run_time=$((${etime} - ${stime}))
if [ ${TESTTIME} -le ${run_time} ]; then
if [ ${failed} -eq 0 ]; then
echo "Issue#${issue} test OK."
echo "Test cases run ${loops} times."
rm -f ${LOGFILE}
else
echo "Issue#${issue} test NG."
echo "Test cases run ${loops} times."
fi
else
echo "Issue#${issue} test NG."
echo "Test cases run ${loops} times."
fi

18
test/issues/1555/Makefile Normal file
View File

@ -0,0 +1,18 @@
CC=gcc
CFLAGS=-g
LDFLAGS=
TARGET=C1555T01 C1555T02
all: $(TARGET)
C1555T01: C1555.c
$(CC) -o $@ $^
C1555T02: C1555.c
$(CC) -DMIGRATE_ON_OFFLOAD -o $@ $^
test: all
sh ./C1555.sh
clean:
rm -f $(TARGET) *.o *.txt

38
test/issues/1555/README Normal file
View File

@ -0,0 +1,38 @@
【Issue#1555 動作確認】
□ テスト内容
Issueにて報告された症状はmigrate指示のタイミングによって発生の有無が
変化するため、下記のテストを6時間連続実行して、症状が発生しないことを確認する。
1. 下記のテストプログラムを実行し、症状が発生しないことを確認する
C1555T01: (Issue#1400 のテストプログラム 1400_arm64.c を流用)
親プロセスが子プロセスと自身を同一CPUにバインドしてsched_yield()した場合、
子プロセス、親プロセスの順序で実行されることを確認する。
C1555T02:
C1555T01 のテストケースにおいて、子プロセスがmigrate指示を受ける際に
システムコールのオフロードの中であり、その最中にRemote page faultが発生した場合にも
子プロセス、親プロセスの順序で実行されることを確認する。
2. 以下のLTPを用いて既存機能に影響が無いことを確認
- sched_yield01
- signal01,02,03,04,05
- rt_sigaction01,02,03
- rt_sigprocmask01,02
- rt_sigsuspend01
- rt_tgsigqueueinfo01
- futex_wait01,02,03,04
- futex_wake01
- futex_wait_bitset01
- execveat02
□ 実行手順
$ make test
McKernelのインストール先や、OSTEST, LTPの配置場所は、
$HOME/.mck_test_config を参照している
.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを
$HOMEにコピーし、適宜編集する
□ 実行結果
x86_64_result.log aarch64_result.log 参照。
すべての項目をPASSしていることを確認。

View File

@ -0,0 +1,7 @@
sh ./C1555.sh
mcstop+release.sh ... done
mcreboot.sh -c 37-43,49-55 -m 2G@2,2G@3 -r 37-43:36+49-55:48 -O -e anon_on_demand ... done
start-time: 2021年 1月 27日 水曜日 14:41:56 JST
end-time: 2021年 1月 28日 木曜日 02:42:01 JST
Issue#1555 test OK.
Test cases run 4426 times.

21
test/issues/1555/ltp_list Normal file
View File

@ -0,0 +1,21 @@
sched_yield01
signal01
signal02
signal03
signal04
signal05
rt_sigaction01
rt_sigaction02
rt_sigaction03
rt_sigprocmask01
rt_sigprocmask02
rt_sigsuspend01
rt_tgsigqueueinfo01
futex_wait01
futex_wait02
futex_wait03
futex_wait04
futex_wake01
futex_wait_bitset01
execveat02
sigsuspend01

View File

@ -0,0 +1,74 @@
diff --git a/arch/arm64/kernel/include/syscall_list.h b/arch/arm64/kernel/include/syscall_list.h
index 28e99eb..1d9f052 100644
--- a/arch/arm64/kernel/include/syscall_list.h
+++ b/arch/arm64/kernel/include/syscall_list.h
@@ -137,6 +137,7 @@ SYSCALL_HANDLED(802, linux_mlock)
SYSCALL_HANDLED(803, suspend_threads)
SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
+SYSCALL_DELEGATED(888, dbg_sleep)
SYSCALL_DELEGATED(1024, open)
SYSCALL_DELEGATED(1035, readlink)
diff --git a/arch/x86_64/kernel/include/syscall_list.h b/arch/x86_64/kernel/include/syscall_list.h
index 17a1d65..8010d3e 100644
--- a/arch/x86_64/kernel/include/syscall_list.h
+++ b/arch/x86_64/kernel/include/syscall_list.h
@@ -181,6 +181,7 @@ SYSCALL_HANDLED(802, linux_mlock)
SYSCALL_HANDLED(803, suspend_threads)
SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
+SYSCALL_DELEGATED(888, dbg_sleep)
/* Do not edit the lines including this comment and
* EOF just after it because those are used as a
diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c
index c48e245..118de75 100644
--- a/executer/user/mcexec.c
+++ b/executer/user/mcexec.c
@@ -5039,6 +5039,15 @@ return_linux_spawn:
break;
#endif
+ case 888: { // dbg_sleep
+ int sec = (int)w.sr.args[0];
+ int *resp = (int *)w.sr.args[1];
+ sleep(sec);
+ *resp = sec;
+ do_syscall_return(fd, cpu, sec, 0, 0, 0, 0);
+ break;
+ }
+
default:
ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 8a919e1..0b0fbc3 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -181,6 +181,7 @@ long do_syscall(struct syscall_request *req, int cpu)
struct thread *thread = cpu_local_var(current);
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
int mstatus = 0;
+ int orig_cpu;
#ifdef PROFILE_ENABLE
/* We cannot use thread->profile_start_ts here because the
@@ -231,6 +232,7 @@ long do_syscall(struct syscall_request *req, int cpu)
#ifdef ENABLE_TOFU
res.pde_data = NULL;
#endif
+ orig_cpu = ihk_mc_get_processor_id();
send_syscall(req, cpu, &res);
if (req->rtid == -1) {
@@ -386,6 +388,9 @@ long do_syscall(struct syscall_request *req, int cpu)
preempt_enable();
}
+ if (orig_cpu != ihk_mc_get_processor_id()) {
+ kprintf("ISSUE_1555 migrated during syscall_offload\n");
+ }
dkprintf("%s: syscall num: %d got host reply: %d \n",
__FUNCTION__, req->number, res.ret);

View File

@ -0,0 +1,8 @@
sh ./C1555.sh
mcstop+release.sh ... done
mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 -O -e anon_on_demand ... done
start-time: Wed Jan 27 14:48:14 JST 2021
execveat02 failed.
end-time: Thu Jan 28 02:48:18 JST 2021
Issue#1555 test NG.
Test cases run 4855 times.