test: runq_lock and over-scheduling fix.

Change-Id: I236ab585403076d716be350c8b51e8d352122f2b
Refs: #1400
This commit is contained in:
Shiratori, Takehiro
2020-03-05 15:57:57 +09:00
parent 67f5a1d4e0
commit 93581cb142
4 changed files with 386 additions and 0 deletions

68
test/issues/1400/arm64/1400.sh Executable file
View File

@ -0,0 +1,68 @@
#!/bin/sh
# 1400.sh COPYRIGHT FUJITSU LIMITED 2020
. $HOME/.mck_test_config
BOOTPARAM="-c 12-15 -m 1G@4 -O"
USELTP=1
. ../../../common.sh
PWD=`pwd`
LOGFILE="${PWD}/1400_log"
STOPFILE="./1400_stop"
LTPLIST="${PWD}/ltp_list.txt"
echo "issue-1400 test run."
echo "start-time: `date`"
stime=`date "+%s"`
failed=0
while :
do
${MCEXEC} ./1400_arm64 > ${LOGFILE} 2>&1
if [ $? -ne 0 ]; then
echo "1400_arm64 failed."
failed=1
break
fi
pushd ${LTPBIN} > /dev/null
while read line
do
${MCEXEC} ./${line} > ${LOGFILE} 2>&1
if [ $? -ne 0 ]; then
echo "${line} failed."
failed=1
break
fi
done < ${LTPLIST}
popd > /dev/null
if [ -e ${STOPFILE} ]; then
rm -f ${STOPFILE}
break
fi
if [ ${failed} -eq 1 ]; then
break
fi
done
echo "end-time: `date`"
etime=`date "+%s"`
run_time=$((${etime} - ${stime}))
if [ 43200 -le ${run_time} ]; then
if [ ${failed} -eq 0 ]; then
echo "issue-1400 test OK."
rm -f ${LOGFILE}
else
echo "issue-1400 test NG."
fi
else
echo "issue-1400 test NG."
fi
mcstop

View File

@ -0,0 +1,238 @@
/* 1400_arm64.c COPYRIGHT FUJITSU LIMITED 2020 */
#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/types.h>
#define POINT_ORDER_NUM 2
static int *sync1 = MAP_FAILED;
static int *parent_core = MAP_FAILED;
static int *point_order = MAP_FAILED;
static int *od = MAP_FAILED;
int main(int argc, char *argv[])
{
pid_t pid = -1;
pid_t ret_pid = -1;
int status = 0;
int i = 0;
int result = -1;
int ret = -1;
int failed = 0;
/* get shared memory */
sync1 = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
parent_core = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
point_order = (int *)mmap(NULL, sizeof(int) * POINT_ORDER_NUM,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
od = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
/* mmap check */
if (sync1 == MAP_FAILED ||
parent_core == MAP_FAILED ||
point_order == MAP_FAILED ||
od == MAP_FAILED) {
printf("mmap() Failed.\n");
goto out;
}
for (i = 0; i < POINT_ORDER_NUM; i++) {
point_order[i] = 0;
}
*od = 0;
*sync1 = 0;
/* create child process */
pid = fork();
switch (pid) {
case -1:
/* error */
printf("fork() Failed.\n");
goto out;
case 0: {
/* child */
/* before migrate, get cpunum */
int old_mycore = sched_getcpu();
printf("[child:%d] running core %d\n", getpid(), old_mycore);
/* sync parent */
*sync1 = 1;
/* wait until migrated */
while (sched_getcpu() == old_mycore) {
__asm__ __volatile__("yield" ::: "memory");
}
point_order[0] = ++(*od);
_exit(0);
break;
}
default: {
/* parent */
cpu_set_t cpuset;
/* sync child */
while (*sync1 != 1) {
__asm__ __volatile__("yield" ::: "memory");
}
/* parent corenum get */
*parent_core = sched_getcpu();
/* child process to migrate parent core */
printf("[parent:%d] running core %d\n", getpid(), *parent_core);
printf("[parent] child process(pid=%d) "
"migrate/bind to core %d\n",
pid, *parent_core);
CPU_ZERO(&cpuset);
CPU_SET(*parent_core, &cpuset);
result = sched_setaffinity(pid, sizeof(cpuset), &cpuset);
if (result == -1) {
printf("errno = %d\n", errno);
printf("child migrate/bind "
"sched_setaffinity failed.\n");
}
/* parent core bind */
printf("[parent] parent process bind to core %d\n",
*parent_core);
result = sched_setaffinity(0, sizeof(cpuset), &cpuset);
if (result == -1) {
printf("errno = %d\n", errno);
printf("parent bind sched_setaffinity failed.\n");
}
/* sched_setaffinity interval */
usleep(10000);
/* sync child, switch to child process */
printf("[parent] send sched_yield.\n");
result = 0;
result = sched_yield();
point_order[1] = ++(*od);
break;
}
}
if (result == -1) {
printf("sched_yield failed.\n");
}
/* child process status check. */
ret_pid = wait(&status);
if (ret_pid == pid) {
if (WIFEXITED(status)) {
if (WEXITSTATUS(status)) {
printf("TP failed, child migrate fail.\n");
}
else {
goto wait_ok;
}
}
else {
printf("TP failed, child is not exited.\n");
}
if (WIFSIGNALED(status)) {
printf("TP failed, child signaled by %d.\n",
WTERMSIG(status));
if (WCOREDUMP(status)) {
printf("coredumped.\n");
}
}
else {
printf("TP failed, child is not signaled.\n");
}
if (WIFSTOPPED(status)) {
printf("TP failed, child is stopped by signal %d.\n",
WSTOPSIG(status));
}
else {
printf("TP failed, child is not stopped.\n");
}
if (WIFCONTINUED(status)) {
printf("TP failed, child is continued.\n");
}
else {
printf("TP failed, child is not continued.\n");
}
for (i = 0; i < POINT_ORDER_NUM; i++) {
printf("point_order[%d] = %d\n", i, point_order[i]);
}
goto out;
}
else {
printf("TP failed, child process wait() fail.\n");
for (i = 0; i < POINT_ORDER_NUM; i++) {
printf("point_order[%d] = %d\n", i, point_order[i]);
}
goto out;
}
wait_ok:
for (i = 0; i < POINT_ORDER_NUM; i++) {
printf("point_order[%d] = %d\n", i, point_order[i]);
if (point_order[i] == 0) {
failed = 1;
}
}
if (failed != 0) {
printf("TP failed, parent or child process is not running.\n");
goto out;
}
if (result != -1) {
if (point_order[0] < point_order[1]) {
ret = 0;
}
else {
printf("TP failed, out of order.\n");
}
}
out:
/* unmap semaphore memory */
if (od != MAP_FAILED) {
munmap(od, sizeof(int));
}
if (point_order != MAP_FAILED) {
munmap(point_order, sizeof(int) * POINT_ORDER_NUM);
}
if (parent_core != MAP_FAILED) {
munmap(parent_core, sizeof(int));
}
if (sync1 != MAP_FAILED) {
munmap(sync1, sizeof(int));
}
return ret;
}

View File

@ -0,0 +1,57 @@
/* README COPYRIGHT FUJITSU LIMITED 2020 */
ostest-sched_yield.000: 親プロセスが子プロセスと自信を同一CPUにバインドしてsched_yield()しても子プロセスがスケジュールされない
https://postpeta.pccluster.org/redmine/issues/1400
テストセットREADME
(0) 事前準備
1. $HOME/.mck_test_configを用意する
(1) テスト実行方法
1. make test
(2) テスト終了方法
1. touch ./1400_stop
テスト項目全項目を1セットとして行うため、終了までにタイムラグがあることがある。
(3) テスト項目詳細
1400_arm64.c
親プロセスが子プロセスと自身を同一CPUにバインドしてsched_yield()した場合、
子プロセス、親プロセスの順序で実行されることを確認する。
本修正が既存機能へのレベルダウンとなっていないか確認するため、
下記のLTP項目についても実施対象とする。
- sched_yield01
- signal01,02,03,04,05
- rt_sigaction01,02,03
- rt_sigprocmask01,02
- rt_sigsuspend01
- rt_tgsigqueueinfo01
- futex_wait01,02,03,04
- futex_wake01,03
- futex_wait_bitset01,02
- execveat02
現象はタイミングによって発生有無が変化するため、テストは無限ループによる
長時間実行を行う。
12時間以上の実行継続、テスト成功を目安とし、合否判定とする。
なお、本テストの実行は別障害のため、NGとなることがある。
別障害は https://postpeta.pccluster.org/redmine/issues/1454 で管理。
また、本修正により明らかになった上記とは別の障害により以下のLTP項目が
 FAILになる。
- msgrcv05
- msgsnd05
- semctl01
- semop05
こちらの別障害については、
https://postpeta.pccluster.org/redmine/issues/1455
にて管理する。
以上。

View File

@ -0,0 +1,23 @@
sched_yield01
signal01
signal02
signal03
signal04
signal05
rt_sigaction01
rt_sigaction02
rt_sigaction03
rt_sigprocmask01
rt_sigprocmask02
rt_sigsuspend01
rt_tgsigqueueinfo01
futex_wait01
futex_wait02
futex_wait03
futex_wait04
futex_wake01
futex_wake03
futex_wait_bitset01
futex_wait_bitset02
execveat02
sigsuspend01