mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: I0ce1b2d48cda10713920cb88692e107b8c4d3bab Refs: #929
This commit is contained in:
committed by
Masamichi Takagi
parent
45bc6a617a
commit
d1d93d90cc
145
test/issues/929/C929.sh
Executable file
145
test/issues/929/C929.sh
Executable file
@ -0,0 +1,145 @@
|
||||
#/bin/sh
|
||||
|
||||
USELTP=0
|
||||
USEOSTEST=0
|
||||
|
||||
. ../../common.sh
|
||||
|
||||
issue="929"
|
||||
tid=01
|
||||
|
||||
tname=`printf "C${issue}T%02d" ${tid}`
|
||||
echo "*** ${tname} start *******************************"
|
||||
TEST_CMD="mpirun -f ./hostfile -ppn 5 ${MCEXEC} -n 5 ./test_prog.sh"
|
||||
echo ${TEST_CMD}
|
||||
${TEST_CMD} &> ${tname}.txt
|
||||
mpi_ret=$?
|
||||
|
||||
cat ./${tname}.txt
|
||||
started_num=`grep 'test_prog is started' ./${tname}.txt | wc -l`
|
||||
|
||||
if [ ${mpi_ret} -eq 0 -a ${started_num} -eq 5 ]; then
|
||||
echo "*** ${tname} PASSED ******************************"
|
||||
else
|
||||
echo "*** ${tname} FAILED ******************************"
|
||||
fi
|
||||
let tid++
|
||||
echo ""
|
||||
|
||||
tname=`printf "C${issue}T%02d" ${tid}`
|
||||
echo "*** ${tname} start *******************************"
|
||||
TEST_CMD="mpirun -f ./hostfile -ppn 5 ${MCEXEC} -n 3 ./test_prog.sh"
|
||||
echo ${TEST_CMD}
|
||||
${TEST_CMD} &> ${tname}.txt
|
||||
mpi_ret=$?
|
||||
|
||||
cat ./${tname}.txt
|
||||
started_num=`grep 'test_prog is started' ./${tname}.txt | wc -l`
|
||||
|
||||
if [ ${mpi_ret} -ne 0 -a ${started_num} -eq 3 ]; then
|
||||
echo "*** ${tname} PASSED ******************************"
|
||||
else
|
||||
echo "*** ${tname} FAILED ******************************"
|
||||
fi
|
||||
let tid++
|
||||
echo ""
|
||||
|
||||
tname=`printf "C${issue}T%02d" ${tid}`
|
||||
echo "*** ${tname} start *******************************"
|
||||
TEST_CMD="mpirun -f ./hostfile -ppn 3 ${MCEXEC} -n 5 ./test_prog.sh"
|
||||
echo ${TEST_CMD}
|
||||
${TEST_CMD} &> ${tname}.txt
|
||||
mpi_ret=$?
|
||||
|
||||
cat ./${tname}.txt
|
||||
started_num=`grep 'test_prog is started' ./${tname}.txt | wc -l`
|
||||
|
||||
if [ ${mpi_ret} -ne 0 -a ${started_num} -eq 0 ]; then
|
||||
echo "*** ${tname} PASSED ******************************"
|
||||
else
|
||||
echo "*** ${tname} FAILED ******************************"
|
||||
fi
|
||||
let tid++
|
||||
echo ""
|
||||
|
||||
tname=`printf "C${issue}T%02d" ${tid}`
|
||||
echo "*** ${tname} start *******************************"
|
||||
TEST_CMD="mpirun -f ./hostfile -ppn 6 ${MCEXEC} -n 3 ./test_prog.sh"
|
||||
echo ${TEST_CMD}
|
||||
${TEST_CMD} &> ${tname}.txt
|
||||
mpi_ret=$?
|
||||
|
||||
cat ./${tname}.txt
|
||||
started_num=`grep 'test_prog is started' ./${tname}.txt | wc -l`
|
||||
|
||||
if [ ${mpi_ret} -ne 0 -a ${started_num} -eq 3 ]; then
|
||||
echo "*** ${tname} PASSED ******************************"
|
||||
else
|
||||
echo "*** ${tname} FAILED ******************************"
|
||||
fi
|
||||
let tid++
|
||||
echo ""
|
||||
|
||||
tname=`printf "C${issue}T%02d" ${tid}`
|
||||
echo "*** ${tname} start *******************************"
|
||||
TEST_CMD="mpirun -f ./hostfile -ppn 250 ${MCEXEC} -n 250 ./test_prog.sh"
|
||||
echo ${TEST_CMD}
|
||||
${TEST_CMD} &> ${tname}.txt
|
||||
mpi_ret=$?
|
||||
|
||||
head -n 10 ./${tname}.txt
|
||||
echo "..."
|
||||
started_num=`grep 'test_prog is started' ./${tname}.txt | wc -l`
|
||||
|
||||
if [ ${mpi_ret} -ne 0 -a ${started_num} -eq 0 ]; then
|
||||
echo "*** ${tname} PASSED ******************************"
|
||||
else
|
||||
echo "*** ${tname} FAILED ******************************"
|
||||
fi
|
||||
let tid++
|
||||
echo ""
|
||||
|
||||
tname=`printf "C${issue}T%02d" ${tid}`
|
||||
echo "*** ${tname} start *******************************"
|
||||
ng=0
|
||||
TEST_CMD="mpirun -f ./hostfile -ppn 5 ${MCEXEC} -n 5 ./test_prog.sh"
|
||||
echo "** reboot mcrernel for check pe_list_len"
|
||||
mcreboot
|
||||
echo "** enable debug message in mcexec_get_cpuset"
|
||||
sudo sh -c "echo -n 'func mcexec_get_cpuset +p' > /sys/kernel/debug/dynamic_debug/control"
|
||||
echo ${TEST_CMD}
|
||||
for i in `seq 1 20`
|
||||
do
|
||||
${TEST_CMD} &> ${tname}.txt
|
||||
mpi_ret=$?
|
||||
started_num=`grep 'test_prog is started' ./${tname}.txt | wc -l`
|
||||
if [ ${mpi_ret} -eq 0 -a ${started_num} -eq 5 ]; then
|
||||
echo "[OK] exec: $i"
|
||||
else
|
||||
echo "[NG] exec: $i"
|
||||
let ng++
|
||||
fi
|
||||
done
|
||||
echo "** check pe_list_len"
|
||||
dmesg --notime | grep "mcexec_get_cpuset: pe_list" | tail -n 20 | cut -f 2-3 -d ':' > ./pe_list_len.txt
|
||||
cat ./pe_list_len.txt | while read line
|
||||
do
|
||||
len=`echo ${line} | cut -f 2 -d ':'`
|
||||
if [ ${len} -ge 0 -a ${len} -le 5 ]; then
|
||||
echo "[OK] ${line}"
|
||||
else
|
||||
echo "[NG] ${line}"
|
||||
let ng++
|
||||
fi
|
||||
done
|
||||
echo "** disable debug message in mcexec_get_cpuset"
|
||||
sudo sh -c "echo -n 'func mcexec_get_cpuset -p' > /sys/kernel/debug/dynamic_debug/control"
|
||||
|
||||
if [ ${ng} -eq 0 ]; then
|
||||
echo "*** ${tname} PASSED ******************************"
|
||||
else
|
||||
echo "*** ${tname} FAILED ******************************"
|
||||
fi
|
||||
let tid++
|
||||
echo ""
|
||||
|
||||
11
test/issues/929/Makefile
Normal file
11
test/issues/929/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
CFLAGS=-g
|
||||
LDFLAGS=
|
||||
|
||||
TARGET=
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
test: all
|
||||
./C929.sh
|
||||
clean:
|
||||
rm -f $(TARGET) *.o *.txt
|
||||
36
test/issues/929/README
Normal file
36
test/issues/929/README
Normal file
@ -0,0 +1,36 @@
|
||||
【Issue#929 動作確認】
|
||||
□ テスト内容
|
||||
1. mpirunで指定する-ppnと、mcexecで指定する-n の指定状況ごとに
|
||||
想定どおりの動作となることを確認
|
||||
C929T01:
|
||||
-ppn == -n の場合に、プログラムが実行され、mpirunが成功する
|
||||
|
||||
C929T02:
|
||||
-ppn > -n の場合に、プログラムの一部が実行され、mpirunが失敗する
|
||||
|
||||
C929T03:
|
||||
-ppn < -n の場合に、プログラムが実行されず、mpirunが失敗する
|
||||
|
||||
C929T04:
|
||||
-ppn が -n の整数倍である場合に、プログラムの一部が実行され、mpirunが失敗する
|
||||
|
||||
C929T05:
|
||||
-ppn と -n がMcKernelに割り当てたCPU数よりも大きい場合に、
|
||||
プログラムが実行されず、mpirunが失敗する
|
||||
|
||||
C929T06:
|
||||
-ppn == -n での正常実行を20回連続で行った場合に、
|
||||
プログラムが実行され、mpirunが成功する
|
||||
また、mcctrlで管理しているpart_exec_list の要素数が5を超えない
|
||||
|
||||
□ 実行手順
|
||||
$ make test
|
||||
|
||||
McKernelのインストール先や、OSTEST, LTPの配置場所は、
|
||||
$HOME/.mck_test_config を参照している
|
||||
.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを
|
||||
$HOMEにコピーし、適宜編集する
|
||||
|
||||
□ 実行結果
|
||||
x86_64_result.log aarch64_result.log 参照。
|
||||
すべての項目をPASSしていることを確認。
|
||||
99
test/issues/929/aarch64_result.log
Normal file
99
test/issues/929/aarch64_result.log
Normal file
@ -0,0 +1,99 @@
|
||||
*** C929T01 start *******************************
|
||||
mpirun -f ./hostfile -ppn 5 /home/satoken/ihk+mckernel/bin/mcexec -n 5 ./test_prog.sh
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
*** C929T01 PASSED ******************************
|
||||
|
||||
*** C929T02 start *******************************
|
||||
mpirun -f ./hostfile -ppn 5 /home/satoken/ihk+mckernel/bin/mcexec -n 3 ./test_prog.sh
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
*** C929T02 PASSED ******************************
|
||||
|
||||
*** C929T03 start *******************************
|
||||
mpirun -f ./hostfile -ppn 3 /home/satoken/ihk+mckernel/bin/mcexec -n 5 ./test_prog.sh
|
||||
getting CPU set for partitioned execution: Connection timed out
|
||||
getting CPU set for partitioned execution: Connection timed out
|
||||
getting CPU set for partitioned execution: Connection timed out
|
||||
*** C929T03 PASSED ******************************
|
||||
|
||||
*** C929T04 start *******************************
|
||||
mpirun -f ./hostfile -ppn 6 /home/satoken/ihk+mckernel/bin/mcexec -n 3 ./test_prog.sh
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
*** C929T04 PASSED ******************************
|
||||
|
||||
*** C929T05 start *******************************
|
||||
mpirun -f ./hostfile -ppn 250 /home/satoken/ihk+mckernel/bin/mcexec -n 250 ./test_prog.sh
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
...
|
||||
*** C929T05 PASSED ******************************
|
||||
|
||||
*** C929T06 start *******************************
|
||||
** reboot mcrernel for check pe_list_len
|
||||
mcreboot.sh -c 1-6,29-34 -m 50G@0,50G@1 -r 1-6:0+29-34:28 -O ... done
|
||||
** enable debug message in mcexec_get_cpuset
|
||||
mpirun -f ./hostfile -ppn 5 /home/satoken/ihk+mckernel/bin/mcexec -n 5 ./test_prog.sh
|
||||
[OK] exec: 1
|
||||
[OK] exec: 2
|
||||
[OK] exec: 3
|
||||
[OK] exec: 4
|
||||
[OK] exec: 5
|
||||
[OK] exec: 6
|
||||
[OK] exec: 7
|
||||
[OK] exec: 8
|
||||
[OK] exec: 9
|
||||
[OK] exec: 10
|
||||
[OK] exec: 11
|
||||
[OK] exec: 12
|
||||
[OK] exec: 13
|
||||
[OK] exec: 14
|
||||
[OK] exec: 15
|
||||
[OK] exec: 16
|
||||
[OK] exec: 17
|
||||
[OK] exec: 18
|
||||
[OK] exec: 19
|
||||
[OK] exec: 20
|
||||
** check pe_list_len
|
||||
[OK] pe_list_len:0
|
||||
[OK] pe_list_len:1
|
||||
[OK] pe_list_len:2
|
||||
[OK] pe_list_len:3
|
||||
[OK] pe_list_len:4
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
[OK] pe_list_len:5
|
||||
** disable debug message in mcexec_get_cpuset
|
||||
*** C929T06 PASSED ******************************
|
||||
|
||||
1
test/issues/929/hostfile
Normal file
1
test/issues/929/hostfile
Normal file
@ -0,0 +1 @@
|
||||
localhost
|
||||
3
test/issues/929/test_prog.sh
Executable file
3
test/issues/929/test_prog.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "test_prog is started."
|
||||
74
test/issues/929/x86_64_result.log
Normal file
74
test/issues/929/x86_64_result.log
Normal file
@ -0,0 +1,74 @@
|
||||
*** C929T01 start *******************************
|
||||
mpirun -f ./hostfile -ppn 5 /home/satoken/ihk+mckernel/bin/mcexec -n 5 ./test_prog.sh
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
*** C929T01 PASSED ******************************
|
||||
|
||||
*** C929T02 start *******************************
|
||||
mpirun -f ./hostfile -ppn 5 /home/satoken/ihk+mckernel/bin/mcexec -n 3 ./test_prog.sh
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
*** C929T02 PASSED ******************************
|
||||
|
||||
*** C929T03 start *******************************
|
||||
mpirun -f ./hostfile -ppn 3 /home/satoken/ihk+mckernel/bin/mcexec -n 5 ./test_prog.sh
|
||||
getting CPU set for partitioned execution: Connection timed out
|
||||
getting CPU set for partitioned execution: Connection timed out
|
||||
getting CPU set for partitioned execution: Connection timed out
|
||||
*** C929T03 PASSED ******************************
|
||||
|
||||
*** C929T04 start *******************************
|
||||
mpirun -f ./hostfile -ppn 6 /home/satoken/ihk+mckernel/bin/mcexec -n 3 ./test_prog.sh
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
getting CPU set for partitioned execution: Invalid argument
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
test_prog is started.
|
||||
*** C929T04 PASSED ******************************
|
||||
|
||||
*** C929T05 start *******************************
|
||||
mpirun -f ./hostfile -ppn 250 /home/satoken/ihk+mckernel/bin/mcexec -n 250 ./test_prog.sh
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
error: nr_processes can't exceed nr. of CPUs
|
||||
...
|
||||
*** C929T05 PASSED ******************************
|
||||
|
||||
*** C929T06 start *******************************
|
||||
mpirun -f ./hostfile -ppn 5 /home/satoken/ihk+mckernel/bin/mcexec -n 5 ./test_prog.sh
|
||||
[OK] exec: 1
|
||||
[OK] exec: 2
|
||||
[OK] exec: 3
|
||||
[OK] exec: 4
|
||||
[OK] exec: 5
|
||||
[OK] exec: 6
|
||||
[OK] exec: 7
|
||||
[OK] exec: 8
|
||||
[OK] exec: 9
|
||||
[OK] exec: 10
|
||||
[OK] exec: 11
|
||||
[OK] exec: 12
|
||||
[OK] exec: 13
|
||||
[OK] exec: 14
|
||||
[OK] exec: 15
|
||||
[OK] exec: 16
|
||||
[OK] exec: 17
|
||||
[OK] exec: 18
|
||||
[OK] exec: 19
|
||||
[OK] exec: 20
|
||||
*** C929T06 PASSED ******************************
|
||||
|
||||
Reference in New Issue
Block a user