From 236a072311c0a3695884e2181ebbb63b1840185d Mon Sep 17 00:00:00 2001 From: Yutaka Ishikawa Date: Sun, 23 Jul 2017 21:19:15 +0900 Subject: [PATCH] Add qlmpi and swap to mckernel (This is rebase commit for merging to development) --- Makefile.in | 3 + arch/x86/kernel/include/syscall_list.h | 7 + config.h.in | 15 + configure | 274 ++++- configure.ac | 103 ++ executer/include/md5.h | 91 ++ executer/include/pmi.h | 473 ++++++++ executer/include/qlmpi.h | 58 + executer/include/qlmpilib.h | 10 + executer/kernel/mcctrl/syscall.c | 82 +- executer/user/Makefile.in | 44 +- executer/user/libqlfort.c | 101 ++ executer/user/mcexec.c | 243 +++- executer/user/md5.c | 381 ++++++ executer/user/ql_mpiexec_start.c | 1073 +++++++++++++++++ executer/user/ql_server.c | 597 +++++++++ executer/user/ql_talker.c | 101 ++ executer/user/qlmpilib.c | 320 +++++ kernel/Makefile.build.in | 1 + kernel/include/process.h | 3 + kernel/include/swapfmt.h.in | 34 + kernel/pager.c | 837 +++++++++++++ kernel/syscall.c | 96 ++ test/qlmpi/dump-pages.c | 127 ++ test/qlmpi/qlmpi_sample.c | 78 ++ test/qlmpi/qlmpi_testsuite/CT20.f | 73 ++ test/qlmpi/qlmpi_testsuite/CT20.sh | 27 + test/qlmpi/qlmpi_testsuite/CT21.f | 62 + test/qlmpi/qlmpi_testsuite/CT21.sh | 27 + test/qlmpi/qlmpi_testsuite/CT22.f | 27 + test/qlmpi/qlmpi_testsuite/CT22.sh | 27 + test/qlmpi/qlmpi_testsuite/Makefile | 57 + test/qlmpi/qlmpi_testsuite/config | 9 + test/qlmpi/qlmpi_testsuite/gendata.c | 40 + test/qlmpi/qlmpi_testsuite/go_ql_test.sh | 10 + test/qlmpi/qlmpi_testsuite/hostfile20 | 2 + test/qlmpi/qlmpi_testsuite/hostfile21 | 1 + test/qlmpi/qlmpi_testsuite/hostfile22 | 2 + test/qlmpi/qlmpi_testsuite/ql_irreg.sh | 210 ++++ test/qlmpi/qlmpi_testsuite/ql_normal.sh | 348 ++++++ test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt | 230 ++++ .../qlmpi/qlmpi_testsuite/test_cases/CT01.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT02.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT03.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT04.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT05.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT06.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT07.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT08.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT09.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT10.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT11.txt | 5 + .../qlmpi/qlmpi_testsuite/test_cases/CT12.txt | 5 + .../qlmpi_testsuite/test_cases/ECT91.txt | 5 + test/qlmpi/qlmpi_testsuite/usr_prg_A.c | 59 + test/qlmpi/qlmpi_testsuite/usr_prg_B.c | 45 + test/qlmpi/qlmpi_testsuite/usr_prg_C.c | 45 + test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c | 56 + test/qlmpi/qlmpi_testsuite/util/mpiexec | 5 + test/qlmpi/qlmpi_testsuite/util/wrong_mfile | 2 + test/qlmpi/swaptest.c | 81 ++ 61 files changed, 6638 insertions(+), 24 deletions(-) create mode 100644 executer/include/md5.h create mode 100644 executer/include/pmi.h create mode 100644 executer/include/qlmpi.h create mode 100644 executer/include/qlmpilib.h create mode 100644 executer/user/libqlfort.c create mode 100644 executer/user/md5.c create mode 100644 executer/user/ql_mpiexec_start.c create mode 100644 executer/user/ql_server.c create mode 100644 executer/user/ql_talker.c create mode 100644 executer/user/qlmpilib.c create mode 100644 kernel/include/swapfmt.h.in create mode 100644 kernel/pager.c create mode 100644 test/qlmpi/dump-pages.c create mode 100644 test/qlmpi/qlmpi_sample.c create mode 100644 test/qlmpi/qlmpi_testsuite/CT20.f create mode 100755 test/qlmpi/qlmpi_testsuite/CT20.sh create mode 100644 test/qlmpi/qlmpi_testsuite/CT21.f create mode 100755 test/qlmpi/qlmpi_testsuite/CT21.sh create mode 100644 test/qlmpi/qlmpi_testsuite/CT22.f create mode 100755 test/qlmpi/qlmpi_testsuite/CT22.sh create mode 100644 test/qlmpi/qlmpi_testsuite/Makefile create mode 100644 test/qlmpi/qlmpi_testsuite/config create mode 100644 test/qlmpi/qlmpi_testsuite/gendata.c create mode 100755 test/qlmpi/qlmpi_testsuite/go_ql_test.sh create mode 100644 test/qlmpi/qlmpi_testsuite/hostfile20 create mode 100644 test/qlmpi/qlmpi_testsuite/hostfile21 create mode 100644 test/qlmpi/qlmpi_testsuite/hostfile22 create mode 100755 test/qlmpi/qlmpi_testsuite/ql_irreg.sh create mode 100755 test/qlmpi/qlmpi_testsuite/ql_normal.sh create mode 100644 test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT01.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT02.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT03.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT04.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT05.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT06.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT07.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT08.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT09.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT10.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT11.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/CT12.txt create mode 100644 test/qlmpi/qlmpi_testsuite/test_cases/ECT91.txt create mode 100644 test/qlmpi/qlmpi_testsuite/usr_prg_A.c create mode 100644 test/qlmpi/qlmpi_testsuite/usr_prg_B.c create mode 100644 test/qlmpi/qlmpi_testsuite/usr_prg_C.c create mode 100644 test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c create mode 100755 test/qlmpi/qlmpi_testsuite/util/mpiexec create mode 100644 test/qlmpi/qlmpi_testsuite/util/wrong_mfile create mode 100644 test/qlmpi/swaptest.c diff --git a/Makefile.in b/Makefile.in index c79a58c3..ac9bf784 100755 --- a/Makefile.in +++ b/Makefile.in @@ -1,5 +1,6 @@ TARGET = @TARGET@ SBINDIR = @SBINDIR@ +INCDIR = @INCDIR@ ETCDIR = @ETCDIR@ MANDIR = @MANDIR@ @@ -53,6 +54,8 @@ install:: mkdir -p -m 755 $(ETCDIR); \ install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \ install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \ + mkdir -p -m 755 $(INCDIR); \ + install -m 644 kernel/include/swapfmt.h $(INCDIR); \ mkdir -p -m 755 $(MANDIR)/man1; \ install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \ ;; \ diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 19affc00..c8fe6e0f 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -68,6 +68,7 @@ SYSCALL_DELEGATED(69, msgsnd) SYSCALL_DELEGATED(70, msgrcv) SYSCALL_HANDLED(72, fcntl) SYSCALL_DELEGATED(79, getcwd) +SYSCALL_DELEGATED(87, unlink) SYSCALL_DELEGATED(89, readlink) SYSCALL_HANDLED(96, gettimeofday) SYSCALL_HANDLED(97, getrlimit) @@ -157,4 +158,10 @@ SYSCALL_HANDLED(730, util_migrate_inter_kernel) SYSCALL_HANDLED(731, util_indicate_clone) SYSCALL_HANDLED(732, get_system) +/* McKernel Specific */ +SYSCALL_HANDLED(801, swapout) +SYSCALL_HANDLED(802, linux_mlock) +SYSCALL_HANDLED(803, suspend_threads) +SYSCALL_HANDLED(804, resume_threads) +SYSCALL_HANDLED(811, linux_spawn) /**** End of File ****/ diff --git a/config.h.in b/config.h.in index 6954ee91..71e51236 100644 --- a/config.h.in +++ b/config.h.in @@ -6,6 +6,9 @@ /* whether memdump feature is enabled */ #undef ENABLE_MEMDUMP +/* whether mcoverlayfs is enabled */ +#undef ENABLE_QLMPI + /* whether rusage is enabled */ #undef ENABLE_RUSAGE @@ -72,9 +75,15 @@ /* Define to address of kernel symbol vdso_start, or 0 if exported */ #undef MCCTRL_KSYM_vdso_start +/* Define to address of kernel symbol walk_page_range, or 0 if exported */ +#undef MCCTRL_KSYM_walk_page_range + /* Define to address of kernel symbol zap_page_range, or 0 if exported */ #undef MCCTRL_KSYM_zap_page_range +/* McKernel specific headers */ +#undef MCKERNEL_INCDIR + /* McKernel specific libraries */ #undef MCKERNEL_LIBDIR @@ -101,3 +110,9 @@ /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS + +/* install directory for system binary. */ +#undef SBINDIR + +/* install directory for binary. */ +#undef BINDIR diff --git a/configure b/configure index d66ff111..5991a132 100755 --- a/configure +++ b/configure @@ -628,13 +628,16 @@ IHK_RELEASE_DATE DCFA_VERSION MCKERNEL_VERSION IHK_VERSION +ENABLE_QLMPI ENABLE_RUSAGE ENABLE_MCOVERLAYFS MANDIR KERNDIR KMODDIR ETCDIR +INCDIR MCKERNEL_LIBDIR +MCKERNEL_INCDIR SBINDIR BINDIR TARGET @@ -642,6 +645,7 @@ UNAME_R KDIR ARCH XCC +FGREP EGREP GREP CPP @@ -693,6 +697,9 @@ SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking +with_mpi +with_mpi_include +with_mpi_lib with_kernelsrc with_target with_system_map @@ -700,6 +707,7 @@ enable_dcfa enable_memdump enable_mcoverlayfs enable_rusage +enable_qlmpi with_uname_r ' ac_precious_vars='build_alias @@ -1324,10 +1332,16 @@ Optional Features: --enable-memdump enable dumping memory and analyzing a dump --enable-mcoverlayfs enable mcoverlayfs implementation --enable-rusage enable rusage implementation + --enable-qlmpi enable qlmpi implementation Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-mpi=PATH specify path where mpi include directory and lib + directory can be found + --with-mpi-include=PATH specify path where mpi include directory can be + found + --with-mpi-lib=PATH specify path where mpi lib directory can be found --with-kernelsrc=path Path to 'kernel src', default is /lib/modules/uname_r/build --with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86} @@ -2060,6 +2074,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + IHK_VERSION=0.9.0 MCKERNEL_VERSION=0.9.0 DCFA_VERSION=0.9.0 @@ -3309,6 +3327,187 @@ if test "x$numa_lib_found" != "xyes"; then : as_fn_error $? "Unable to find NUMA library, missing numactl-devel?" "$LINENO" 5 fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +$as_echo_n "checking for fgrep... " >&6; } +if ${ac_cv_path_FGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in fgrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +$as_echo "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + + + +# Check whether --with-mpi was given. +if test "${with_mpi+set}" = set; then : + withval=$with_mpi; case "$withval" in #( + yes|no|'') : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-mpi=PATH expects a valid PATH" >&5 +$as_echo "$as_me: WARNING: --without-mpi=PATH expects a valid PATH" >&2;} + with_mpi="" ;; #( + *) : + ;; +esac +else + with_mpi= +fi + + +# Check whether --with-mpi-include was given. +if test "${with_mpi_include+set}" = set; then : + withval=$with_mpi_include; case "$withval" in #( + yes|no|'') : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-mpi-include=PATH expects a valid PATH" >&5 +$as_echo "$as_me: WARNING: --without-mpi-include=PATH expects a valid PATH" >&2;} + with_mpi_include="" ;; #( + *) : + ;; +esac +fi + + +# Check whether --with-mpi-lib was given. +if test "${with_mpi_lib+set}" = set; then : + withval=$with_mpi_lib; case "$withval" in #( + yes|no|'') : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-mpi-lib=PATH expects a valid PATH" >&5 +$as_echo "$as_me: WARNING: --without-mpi-lib=PATH expects a valid PATH" >&2;} + with_mpi_lib="" ;; #( + *) : + ;; +esac +fi + + + # The args have been sanitized into empty/non-empty values above. + # Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options + # taking priority + + if test -n "${with_mpi_include}"; then : + + + if echo "$CPPFLAGS" | $FGREP -e "\<-I${with_mpi_include}\>" >/dev/null 2>&1; then : + echo "CPPFLAGS(='$CPPFLAGS') contains '-I${with_mpi_include}', not appending" >&5 +else + echo "CPPFLAGS(='$CPPFLAGS') does not contain '-I${with_mpi_include}', appending" >&5 + CPPFLAGS="$CPPFLAGS -I${with_mpi_include}" + +fi + +else + if test -n "${with_mpi}"; then : + + + if echo "$CPPFLAGS" | $FGREP -e "\<-I${with_mpi}/include\>" >/dev/null 2>&1; then : + echo "CPPFLAGS(='$CPPFLAGS') contains '-I${with_mpi}/include', not appending" >&5 +else + echo "CPPFLAGS(='$CPPFLAGS') does not contain '-I${with_mpi}/include', appending" >&5 + CPPFLAGS="$CPPFLAGS -I${with_mpi}/include" + +fi + +fi +fi + + if test -n "${with_mpi_lib}"; then : + + + if echo "$LDFLAGS" | $FGREP -e "\<-L${with_mpi_lib}\>" >/dev/null 2>&1; then : + echo "LDFLAGS(='$LDFLAGS') contains '-L${with_mpi_lib}', not appending" >&5 +else + echo "LDFLAGS(='$LDFLAGS') does not contain '-L${with_mpi_lib}', appending" >&5 + LDFLAGS="$LDFLAGS -L${with_mpi_lib}" + +fi + +else + if test -n "${with_mpi}"; then : + + + if echo "$LDFLAGS" | $FGREP -e "\<-L${with_mpi}/lib\>" >/dev/null 2>&1; then : + echo "LDFLAGS(='$LDFLAGS') contains '-L${with_mpi}/lib', not appending" >&5 +else + echo "LDFLAGS(='$LDFLAGS') does not contain '-L${with_mpi}/lib', appending" >&5 + LDFLAGS="$LDFLAGS -L${with_mpi}/lib" + +fi + + if test -d "${with_mpi}/lib64"; then : + + + if echo "$LDFLAGS" | $FGREP -e "\<-L${with_mpi}/lib64\>" >/dev/null 2>&1; then : + echo "LDFLAGS(='$LDFLAGS') contains '-L${with_mpi}/lib64', not appending" >&5 +else + echo "LDFLAGS(='$LDFLAGS') does not contain '-L${with_mpi}/lib64', appending" >&5 + LDFLAGS="$LDFLAGS -L${with_mpi}/lib64" + +fi + +fi + +fi + +fi + + # Check whether --with-kernelsrc was given. if test "${with_kernelsrc+set}" = set; then : @@ -3368,6 +3567,14 @@ else fi +# Check whether --enable-qlmpi was given. +if test "${enable_qlmpi+set}" = set; then : + enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval +else + ENABLE_QLMPI=no +fi + + # Check whether --with-uname_r was given. if test "${with_uname_r+set}" = set; then : @@ -4059,9 +4266,15 @@ case $WITH_TARGET in if test "X$SBINDIR" = X; then SBINDIR="$prefix/sbin" fi + if test "X$MCKERNEL_INCDIR" = X; then + MCKERNEL_INCDIR="$prefix/include" + fi if test "X$MCKERNEL_LIBDIR" = X; then MCKERNEL_LIBDIR="$prefix/lib" fi + if test "X$INCDIR" = X; then + INCDIR="$prefix/include" + fi if test "X$ETCDIR" = X; then ETCDIR="$prefix/etc" fi @@ -4423,6 +4636,31 @@ _ACEOF fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol walk_page_range" >&5 +$as_echo_n "checking System.map for symbol walk_page_range... " >&6; } + mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " walk_page_range\$" | cut -d\ -f1` + if test -z $mcctrl_addr; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +$as_echo "not found" >&6; } + else + mcctrl_result=$mcctrl_addr + mcctrl_addr="0x$mcctrl_addr" + + if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_walk_page_range\$" >/dev/null`; then + mcctrl_result="exported" + mcctrl_addr="0" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 +$as_echo "$mcctrl_result" >&6; } + +cat >>confdefs.h <<_ACEOF +#define MCCTRL_KSYM_walk_page_range $mcctrl_addr +_ACEOF + + fi + + case $ENABLE_MEMDUMP in yes|no|auto) ;; @@ -4526,6 +4764,17 @@ else $as_echo "$as_me: mcoverlayfs is disabled" >&6;} fi +if test "x$ENABLE_QLMPI" = "xyes" ; then + +$as_echo "#define ENABLE_QLMPI 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: qlmpi is enabled" >&5 +$as_echo "$as_me: qlmpi is enabled" >&6;} +else + { $as_echo "$as_me:${as_lineno-$LINENO}: qlmpi is disabled" >&5 +$as_echo "$as_me: qlmpi is disabled" >&6;} +fi + case $ENABLE_RUSAGE in yes|no) ;; @@ -4548,6 +4797,14 @@ else $as_echo "$as_me: rusage is disabled" >&6;} fi +if test "x$MCKERNEL_INCDIR" != "x" ; then + +cat >>confdefs.h <<_ACEOF +#define MCKERNEL_INCDIR "$MCKERNEL_INCDIR" +_ACEOF + +fi + if test "x$MCKERNEL_LIBDIR" != "x" ; then cat >>confdefs.h <<_ACEOF @@ -4557,6 +4814,20 @@ _ACEOF fi +cat >>confdefs.h <<_ACEOF +#define BINDIR "$BINDIR" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define SBINDIR "$SBINDIR" +_ACEOF + + + + + + @@ -4583,7 +4854,7 @@ fi ac_config_headers="$ac_config_headers config.h" -ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in" +ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in" if test "x$enable_dcfa" = xyes; then : @@ -5293,6 +5564,7 @@ do "executer/kernel/mcoverlayfs/linux-4.6.7/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/linux-4.6.7/Makefile" ;; "kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;; "kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;; + "kernel/include/swapfmt.h") CONFIG_FILES="$CONFIG_FILES kernel/include/swapfmt.h" ;; "arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;; "arch/x86/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-attached-mic.sh" ;; "arch/x86/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-builtin-x86.sh" ;; diff --git a/configure.ac b/configure.ac index dde5f50b..22193e4a 100644 --- a/configure.ac +++ b/configure.ac @@ -8,6 +8,76 @@ m4_define([DCFA_RELEASE_DATE_m4],[2013-11-18])dnl AC_INIT([mckernel], MCKERNEL_VERSION_m4) +dnl *** PAC_APPEND_FLAG is copied from aclocal_util.m4 of mpich-3.3a2 *** +dnl Usage: PAC_APPEND_FLAG([-02], [CFLAGS]) +dnl appends the given argument to the specified shell variable unless the +dnl argument is already present in the variable +AC_DEFUN([PAC_APPEND_FLAG],[ + AC_REQUIRE([AC_PROG_FGREP]) + AS_IF( + [echo "$$2" | $FGREP -e "\<$1\>" >/dev/null 2>&1], + [echo "$2(='$$2') contains '$1', not appending" >&AS_MESSAGE_LOG_FD], + [echo "$2(='$$2') does not contain '$1', appending" >&AS_MESSAGE_LOG_FD + $2="$$2 $1"] + ) +]) + +dnl *** PAC_SET_HEADER_LIB_PATH is copied from aclocal_libs.m4 of mpich-3.3a2 *** +dnl PAC_SET_HEADER_LIB_PATH(with_option,[default_path]) +dnl This macro looks for the --with-xxx=, --with-xxx-include and --with-xxx-lib= +dnl options and sets the library and include paths. +dnl +dnl TODO as written, this macro cannot handle a "with_option" arg that has "-" +dnl characters in it. Use AS_TR_SH (and possibly AS_VAR_* macros) to handle +dnl this case if it ever arises. +AC_DEFUN([PAC_SET_HEADER_LIB_PATH],[ + AC_ARG_WITH([$1], + [AC_HELP_STRING([--with-$1=PATH], + [specify path where $1 include directory and lib directory can be found])], + + [AS_CASE(["$withval"], + [yes|no|''], + [AC_MSG_WARN([--with[out]-$1=PATH expects a valid PATH]) + with_$1=""])], + [with_$1=$2]) + AC_ARG_WITH([$1-include], + [AC_HELP_STRING([--with-$1-include=PATH], + [specify path where $1 include directory can be found])], + [AS_CASE(["$withval"], + [yes|no|''], + [AC_MSG_WARN([--with[out]-$1-include=PATH expects a valid PATH]) + with_$1_include=""])], + []) + AC_ARG_WITH([$1-lib], + [AC_HELP_STRING([--with-$1-lib=PATH], + [specify path where $1 lib directory can be found])], + [AS_CASE(["$withval"], + [yes|no|''], + [AC_MSG_WARN([--with[out]-$1-lib=PATH expects a valid PATH]) + with_$1_lib=""])], + []) + + # The args have been sanitized into empty/non-empty values above. + # Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options + # taking priority + + AS_IF([test -n "${with_$1_include}"], + [PAC_APPEND_FLAG([-I${with_$1_include}],[CPPFLAGS])], + [AS_IF([test -n "${with_$1}"], + [PAC_APPEND_FLAG([-I${with_$1}/include],[CPPFLAGS])])]) + + AS_IF([test -n "${with_$1_lib}"], + [PAC_APPEND_FLAG([-L${with_$1_lib}],[LDFLAGS])], + [AS_IF([test -n "${with_$1}"], + dnl is adding lib64 by default really the right thing to do? What if + dnl we are on a 32-bit host that happens to have both lib dirs available? + [PAC_APPEND_FLAG([-L${with_$1}/lib],[LDFLAGS]) + AS_IF([test -d "${with_$1}/lib64"], + [PAC_APPEND_FLAG([-L${with_$1}/lib64],[LDFLAGS])]) + ]) + ]) +]) + IHK_VERSION=IHK_VERSION_m4 MCKERNEL_VERSION=MCKERNEL_VERSION_m4 DCFA_VERSION=DCFA_VERSION_m4 @@ -24,6 +94,8 @@ AC_CHECK_LIB([numa],[numa_run_on_node],[numa_lib_found=yes]) AS_IF([test "x$numa_lib_found" != "xyes"], [AC_MSG_ERROR([Unable to find NUMA library, missing numactl-devel?])]) +PAC_SET_HEADER_LIB_PATH([mpi]) + AC_ARG_WITH([kernelsrc], AC_HELP_STRING( [--with-kernelsrc=path],[Path to 'kernel src', default is /lib/modules/uname_r/build]), @@ -61,6 +133,12 @@ AC_ARG_ENABLE([rusage], [ENABLE_RUSAGE=$enableval], [ENABLE_RUSAGE=yes]) +AC_ARG_ENABLE([qlmpi], + AC_HELP_STRING([--enable-qlmpi], + [enable qlmpi implementation]), + [ENABLE_QLMPI=$enableval], + [ENABLE_QLMPI=no]) + AC_ARG_WITH([uname_r], AC_HELP_STRING( [--with-uname_r=uname_r],[Value of '`uname -r`' on the target platform, default is local value]), @@ -172,9 +250,15 @@ case $WITH_TARGET in if test "X$SBINDIR" = X; then SBINDIR="$prefix/sbin" fi + if test "X$MCKERNEL_INCDIR" = X; then + MCKERNEL_INCDIR="$prefix/include" + fi if test "X$MCKERNEL_LIBDIR" = X; then MCKERNEL_LIBDIR="$prefix/lib" fi + if test "X$INCDIR" = X; then + INCDIR="$prefix/include" + fi if test "X$ETCDIR" = X; then ETCDIR="$prefix/etc" fi @@ -262,6 +346,7 @@ MCCTRL_FIND_KSYM([__vvar_page]) MCCTRL_FIND_KSYM([hpet_address]) MCCTRL_FIND_KSYM([hv_clock]) MCCTRL_FIND_KSYM([sys_readlink]) +MCCTRL_FIND_KSYM([walk_page_range]) case $ENABLE_MEMDUMP in yes|no|auto) @@ -305,6 +390,13 @@ else AC_MSG_NOTICE([mcoverlayfs is disabled]) fi +if test "x$ENABLE_QLMPI" = "xyes" ; then + AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled]) + AC_MSG_NOTICE([qlmpi is enabled]) +else + AC_MSG_NOTICE([qlmpi is disabled]) +fi + case $ENABLE_RUSAGE in yes|no) ;; @@ -323,10 +415,17 @@ else AC_MSG_NOTICE([rusage is disabled]) fi +if test "x$MCKERNEL_INCDIR" != "x" ; then + AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers]) +fi + if test "x$MCKERNEL_LIBDIR" != "x" ; then AC_DEFINE_UNQUOTED(MCKERNEL_LIBDIR,"$MCKERNEL_LIBDIR",[McKernel specific libraries]) fi +AC_DEFINE_UNQUOTED(BINDIR,"$BINDIR",[Path of install directory for binary]) +AC_DEFINE_UNQUOTED(SBINDIR,"$SBINDIR",[Path of install directory for system binary]) + AC_SUBST(CC) AC_SUBST(XCC) AC_SUBST(ARCH) @@ -335,7 +434,9 @@ AC_SUBST(UNAME_R) AC_SUBST(TARGET) AC_SUBST(BINDIR) AC_SUBST(SBINDIR) +AC_SUBST(MCKERNEL_INCDIR) AC_SUBST(MCKERNEL_LIBDIR) +AC_SUBST(INCDIR) AC_SUBST(ETCDIR) AC_SUBST(KMODDIR) AC_SUBST(KERNDIR) @@ -343,6 +444,7 @@ AC_SUBST(MANDIR) AC_SUBST(CFLAGS) AC_SUBST(ENABLE_MCOVERLAYFS) AC_SUBST(ENABLE_RUSAGE) +AC_SUBST(ENABLE_QLMPI) AC_SUBST(IHK_VERSION) AC_SUBST(MCKERNEL_VERSION) @@ -365,6 +467,7 @@ AC_CONFIG_FILES([ executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build + kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh diff --git a/executer/include/md5.h b/executer/include/md5.h new file mode 100644 index 00000000..698c995d --- /dev/null +++ b/executer/include/md5.h @@ -0,0 +1,91 @@ +/* + Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + L. Peter Deutsch + ghost@aladdin.com + + */ +/* $Id: md5.h,v 1.4 2002/04/13 19:20:28 lpd Exp $ */ +/* + Independent implementation of MD5 (RFC 1321). + + This code implements the MD5 Algorithm defined in RFC 1321, whose + text is available at + http://www.ietf.org/rfc/rfc1321.txt + The code is derived from the text of the RFC, including the test suite + (section A.5) but excluding the rest of Appendix A. It does not include + any code or documentation that is identified in the RFC as being + copyrighted. + + The original and principal author of md5.h is L. Peter Deutsch + . Other authors are noted in the change history + that follows (in reverse chronological order): + + 2002-04-13 lpd Removed support for non-ANSI compilers; removed + references to Ghostscript; clarified derivation from RFC 1321; + now handles byte order either statically or dynamically. + 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. + 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); + added conditionalization for C++ compilation from Martin + Purschke . + 1999-05-03 lpd Original version. + */ + +#ifndef md5_INCLUDED +# define md5_INCLUDED + +/* + * This package supports both compile-time and run-time determination of CPU + * byte order. If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be + * compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is + * defined as non-zero, the code will be compiled to run only on big-endian + * CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to + * run on either big- or little-endian CPUs, but will run slightly less + * efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined. + */ + +typedef unsigned char md5_byte_t; /* 8-bit byte */ +typedef unsigned int md5_word_t; /* 32-bit word */ + +/* Define the state of the MD5 Algorithm. */ +typedef struct md5_state_s { + md5_word_t count[2]; /* message length in bits, lsw first */ + md5_word_t abcd[4]; /* digest buffer */ + md5_byte_t buf[64]; /* accumulate block */ +} md5_state_t; + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Initialize the algorithm. */ +void md5_init(md5_state_t *pms); + +/* Append a string to the message. */ +void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes); + +/* Finish the message and return the digest. */ +void md5_finish(md5_state_t *pms, md5_byte_t digest[16]); + +#ifdef __cplusplus +} /* end extern "C" */ +#endif + +#endif /* md5_INCLUDED */ diff --git a/executer/include/pmi.h b/executer/include/pmi.h new file mode 100644 index 00000000..eeb1f8e7 --- /dev/null +++ b/executer/include/pmi.h @@ -0,0 +1,473 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ +/* + * (C) 2001 by Argonne National Laboratory. + * See COPYRIGHT in top-level directory. + */ + +#ifndef PMI_H_INCLUDED +#define PMI_H_INCLUDED + +#ifdef USE_PMI2_API +#error This header file defines the PMI v1 API, but PMI2 was selected +#endif + +/* prototypes for the PMI interface in MPICH */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/*D +PMI_CONSTANTS - PMI definitions + +Error Codes: ++ PMI_SUCCESS - operation completed successfully +. PMI_FAIL - operation failed +. PMI_ERR_NOMEM - input buffer not large enough +. PMI_ERR_INIT - PMI not initialized +. PMI_ERR_INVALID_ARG - invalid argument +. PMI_ERR_INVALID_KEY - invalid key argument +. PMI_ERR_INVALID_KEY_LENGTH - invalid key length argument +. PMI_ERR_INVALID_VAL - invalid val argument +. PMI_ERR_INVALID_VAL_LENGTH - invalid val length argument +. PMI_ERR_INVALID_LENGTH - invalid length argument +. PMI_ERR_INVALID_NUM_ARGS - invalid number of arguments +. PMI_ERR_INVALID_ARGS - invalid args argument +. PMI_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument +. PMI_ERR_INVALID_KEYVALP - invalid keyvalp argument +- PMI_ERR_INVALID_SIZE - invalid size argument + +Booleans: ++ PMI_TRUE - true +- PMI_FALSE - false + +D*/ +#define PMI_SUCCESS 0 +#define PMI_FAIL -1 +#define PMI_ERR_INIT 1 +#define PMI_ERR_NOMEM 2 +#define PMI_ERR_INVALID_ARG 3 +#define PMI_ERR_INVALID_KEY 4 +#define PMI_ERR_INVALID_KEY_LENGTH 5 +#define PMI_ERR_INVALID_VAL 6 +#define PMI_ERR_INVALID_VAL_LENGTH 7 +#define PMI_ERR_INVALID_LENGTH 8 +#define PMI_ERR_INVALID_NUM_ARGS 9 +#define PMI_ERR_INVALID_ARGS 10 +#define PMI_ERR_INVALID_NUM_PARSED 11 +#define PMI_ERR_INVALID_KEYVALP 12 +#define PMI_ERR_INVALID_SIZE 13 + +/* PMI Group functions */ + +/*@ +PMI_Init - initialize the Process Manager Interface + +Output Parameter: +. spawned - spawned flag + +Return values: ++ PMI_SUCCESS - initialization completed successfully +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - initialization failed + +Notes: +Initialize PMI for this process group. The value of spawned indicates whether +this process was created by 'PMI_Spawn_multiple'. 'spawned' will be 'PMI_TRUE' if +this process group has a parent and 'PMI_FALSE' if it does not. + +@*/ +int PMI_Init( int *spawned ); + +/*@ +PMI_Initialized - check if PMI has been initialized + +Output Parameter: +. initialized - boolean value + +Return values: ++ PMI_SUCCESS - initialized successfully set +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to set the variable + +Notes: +On successful output, initialized will either be 'PMI_TRUE' or 'PMI_FALSE'. + ++ PMI_TRUE - initialize has been called. +- PMI_FALSE - initialize has not been called or previously failed. + +@*/ +int PMI_Initialized( int *initialized ); + +/*@ +PMI_Finalize - finalize the Process Manager Interface + +Return values: ++ PMI_SUCCESS - finalization completed successfully +- PMI_FAIL - finalization failed + +Notes: + Finalize PMI for this process group. + +@*/ +int PMI_Finalize( void ); + +/*@ +PMI_Get_size - obtain the size of the process group + +Output Parameters: +. size - pointer to an integer that receives the size of the process group + +Return values: ++ PMI_SUCCESS - size successfully obtained +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to return the size + +Notes: +This function returns the size of the process group to which the local process +belongs. + +@*/ +int PMI_Get_size( int *size ); + +/*@ +PMI_Get_rank - obtain the rank of the local process in the process group + +Output Parameters: +. rank - pointer to an integer that receives the rank in the process group + +Return values: ++ PMI_SUCCESS - rank successfully obtained +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to return the rank + +Notes: +This function returns the rank of the local process in its process group. + +@*/ +int PMI_Get_rank( int *rank ); + +/*@ +PMI_Get_universe_size - obtain the universe size + +Output Parameters: +. size - pointer to an integer that receives the size + +Return values: ++ PMI_SUCCESS - size successfully obtained +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to return the size + + +@*/ +int PMI_Get_universe_size( int *size ); + +/*@ +PMI_Get_appnum - obtain the application number + +Output parameters: +. appnum - pointer to an integer that receives the appnum + +Return values: ++ PMI_SUCCESS - appnum successfully obtained +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to return the size + + +@*/ +int PMI_Get_appnum( int *appnum ); + +/*@ +PMI_Publish_name - publish a name + +Input parameters: +. service_name - string representing the service being published +. port - string representing the port on which to contact the service + +Return values: ++ PMI_SUCCESS - port for service successfully published +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to publish service + + +@*/ +int PMI_Publish_name( const char service_name[], const char port[] ); + +/*@ +PMI_Unpublish_name - unpublish a name + +Input parameters: +. service_name - string representing the service being unpublished + +Return values: ++ PMI_SUCCESS - port for service successfully published +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to unpublish service + + +@*/ +int PMI_Unpublish_name( const char service_name[] ); + +/*@ +PMI_Lookup_name - lookup a service by name + +Input parameters: +. service_name - string representing the service being published + +Output parameters: +. port - string representing the port on which to contact the service + +Return values: ++ PMI_SUCCESS - port for service successfully obtained +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to lookup service + + +@*/ +int PMI_Lookup_name( const char service_name[], char port[] ); + +/*@ +PMI_Barrier - barrier across the process group + +Return values: ++ PMI_SUCCESS - barrier successfully finished +- PMI_FAIL - barrier failed + +Notes: +This function is a collective call across all processes in the process group +the local process belongs to. It will not return until all the processes +have called 'PMI_Barrier()'. + +@*/ +int PMI_Barrier( void ); + +/*@ +PMI_Abort - abort the process group associated with this process + +Input Parameters: ++ exit_code - exit code to be returned by this process +- error_msg - error message to be printed + +Return values: +. none - this function should not return +@*/ +int PMI_Abort(int exit_code, const char error_msg[]); + +/* PMI Keymap functions */ +/*@ +PMI_KVS_Get_my_name - obtain the name of the keyval space the local process group has access to + +Input Parameters: +. length - length of the kvsname character array + +Output Parameters: +. kvsname - a string that receives the keyval space name + +Return values: ++ PMI_SUCCESS - kvsname successfully obtained +. PMI_ERR_INVALID_ARG - invalid argument +. PMI_ERR_INVALID_LENGTH - invalid length argument +- PMI_FAIL - unable to return the kvsname + +Notes: +This function returns the name of the keyval space that this process and all +other processes in the process group have access to. The output parameter, +kvsname, must be at least as long as the value returned by +'PMI_KVS_Get_name_length_max()'. + +@*/ +int PMI_KVS_Get_my_name( char kvsname[], int length ); + +/*@ +PMI_KVS_Get_name_length_max - obtain the length necessary to store a kvsname + +Output Parameter: +. length - maximum length required to hold a keyval space name + +Return values: ++ PMI_SUCCESS - length successfully set +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to set the length + +Notes: +This function returns the string length required to store a keyval space name. + +A routine is used rather than setting a maximum value in 'pmi.h' to allow +different implementations of PMI to be used with the same executable. These +different implementations may allow different maximum lengths; by using a +routine here, we can interface with a variety of implementations of PMI. + +@*/ +int PMI_KVS_Get_name_length_max( int *length ); + +/*@ +PMI_KVS_Get_key_length_max - obtain the length necessary to store a key + +Output Parameter: +. length - maximum length required to hold a key string. + +Return values: ++ PMI_SUCCESS - length successfully set +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to set the length + +Notes: +This function returns the string length required to store a key. + +@*/ +int PMI_KVS_Get_key_length_max( int *length ); + +/*@ +PMI_KVS_Get_value_length_max - obtain the length necessary to store a value + +Output Parameter: +. length - maximum length required to hold a keyval space value + +Return values: ++ PMI_SUCCESS - length successfully set +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - unable to set the length + +Notes: +This function returns the string length required to store a value from a +keyval space. + +@*/ +int PMI_KVS_Get_value_length_max( int *length ); + +/*@ +PMI_KVS_Put - put a key/value pair in a keyval space + +Input Parameters: ++ kvsname - keyval space name +. key - key +- value - value + +Return values: ++ PMI_SUCCESS - keyval pair successfully put in keyval space +. PMI_ERR_INVALID_KVS - invalid kvsname argument +. PMI_ERR_INVALID_KEY - invalid key argument +. PMI_ERR_INVALID_VAL - invalid val argument +- PMI_FAIL - put failed + +Notes: +This function puts the key/value pair in the specified keyval space. The +value is not visible to other processes until 'PMI_KVS_Commit()' is called. +The function may complete locally. After 'PMI_KVS_Commit()' is called, the +value may be retrieved by calling 'PMI_KVS_Get()'. All keys put to a keyval +space must be unique to the keyval space. You may not put more than once +with the same key. + +@*/ +int PMI_KVS_Put( const char kvsname[], const char key[], const char value[]); + +/*@ +PMI_KVS_Commit - commit all previous puts to the keyval space + +Input Parameters: +. kvsname - keyval space name + +Return values: ++ PMI_SUCCESS - commit succeeded +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - commit failed + +Notes: +This function commits all previous puts since the last 'PMI_KVS_Commit()' into +the specified keyval space. It is a process local operation. + +@*/ +int PMI_KVS_Commit( const char kvsname[] ); + +/*@ +PMI_KVS_Get - get a key/value pair from a keyval space + +Input Parameters: ++ kvsname - keyval space name +. key - key +- length - length of value character array + +Output Parameters: +. value - value + +Return values: ++ PMI_SUCCESS - get succeeded +. PMI_ERR_INVALID_KVS - invalid kvsname argument +. PMI_ERR_INVALID_KEY - invalid key argument +. PMI_ERR_INVALID_VAL - invalid val argument +. PMI_ERR_INVALID_LENGTH - invalid length argument +- PMI_FAIL - get failed + +Notes: +This function gets the value of the specified key in the keyval space. + +@*/ +int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int length); + +/* PMI Process Creation functions */ + +/*S +PMI_keyval_t - keyval structure used by PMI_Spawn_mulitiple + +Fields: ++ key - name of the key +- val - value of the key + +S*/ +typedef struct PMI_keyval_t +{ + const char * key; + char * val; +} PMI_keyval_t; + +/*@ +PMI_Spawn_multiple - spawn a new set of processes + +Input Parameters: ++ count - count of commands +. cmds - array of command strings +. argvs - array of argv arrays for each command string +. maxprocs - array of maximum processes to spawn for each command string +. info_keyval_sizes - array giving the number of elements in each of the + 'info_keyval_vectors' +. info_keyval_vectors - array of keyval vector arrays +. preput_keyval_size - Number of elements in 'preput_keyval_vector' +- preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space + +Output Parameter: +. errors - array of errors for each command + +Return values: ++ PMI_SUCCESS - spawn successful +. PMI_ERR_INVALID_ARG - invalid argument +- PMI_FAIL - spawn failed + +Notes: +This function spawns a set of processes into a new process group. The 'count' +field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs', +'info_keyval_sizes' and 'info_keyval_vectors'. The 'preput_keyval_size' refers +to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector' +contains keyval pairs that will be put in the keyval space of the newly +created process group before the processes are started. The 'maxprocs' array +specifies the desired number of processes to create for each 'cmd' string. +The actual number of processes may be less than the numbers specified in +maxprocs. The acceptable number of processes spawned may be controlled by +``soft'' keyvals in the info arrays. The ``soft'' option is specified by +mpiexec in the MPI-2 standard. Environment variables may be passed to the +spawned processes through PMI implementation specific 'info_keyval' parameters. +@*/ +int PMI_Spawn_multiple(int count, + const char * cmds[], + const char ** argvs[], + const int maxprocs[], + const int info_keyval_sizesp[], + const PMI_keyval_t * info_keyval_vectors[], + int preput_keyval_size, + const PMI_keyval_t preput_keyval_vector[], + int errors[]); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/executer/include/qlmpi.h b/executer/include/qlmpi.h new file mode 100644 index 00000000..45048eb6 --- /dev/null +++ b/executer/include/qlmpi.h @@ -0,0 +1,58 @@ + +#ifndef __HEADER_QLMPI_H +#define __HEADER_QLMPI_H + +/* UerProgram executed */ +#define QL_EXEC_END 'E' +/* qlmpiexec_finalize */ +#define QL_RET_FINAL 'F' +/* UserProgram resume */ +#define QL_RET_RESUME 'R' +/* Connect from ql_mpiexec_start/Finalize*/ +#define QL_COM_CONN 'N' +/* Abnormal end */ +#define QL_AB_END 'A' + +/* Client kind */ +/* mpiexec moniter Program */ +#define QL_MONITOR 1 +/* mcexec */ +#define QL_MCEXEC_PRO 2 +/* ql_mcexec_start ql_mpiexec_finalize */ +#define QL_MPEXEC 3 + + +#define QL_SOCK "ql_sock" + +#define QL_MAX_PATH 4096 +#define QL_PARAM_PATH "./" +#define QL_PARAM_EXTE ".param" +#define QL_SWAP_PATH "/tmp" +#define QL_SOCKT_PATH "/run/user" + +#define QL_NAME "QL_NAME" +#define QL_SWAP_ENV "QL_SWAP_PATH" +#define QL_PARAM_ENV "QL_PARAM_PATH" +#define QL_SOCK_ENV "QL_SOCKET_PATH" + +#define QL_BUF_MAX 256 + + +struct client_fd { + int fd; //FD + int client; //Client Kind + char *name; //QL_NAME + int status; //execute status +}; + +int ql_recv(int fd,char ** buf); + +int ql_send(int fd,int command,char *buf); + + +#define QL_COMMAND '0' +#define QL_ARG '1' +#define QL_ENV '2' + +//#define QL_DEBUG +#endif diff --git a/executer/include/qlmpilib.h b/executer/include/qlmpilib.h new file mode 100644 index 00000000..8cb8f7fd --- /dev/null +++ b/executer/include/qlmpilib.h @@ -0,0 +1,10 @@ + +#ifndef __HEADER_QLMPILIB_H +#define __HEADER_QLMPILIB_H + +int ql_client(int *argc, char ***argv); + +#define QL_CONTINUE 1 +#define QL_EXIT 0 + +#endif diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 5fd22936..167a7906 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -890,9 +890,11 @@ static struct vm_operations_struct rus_vmops = { static int rus_mmap(struct file *file, struct vm_area_struct *vma) { #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP; +// vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP; + vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP; #else - vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP; +// vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP; + vma->vm_flags |= VM_DONTDUMP | VM_MIXEDMAP; #endif vma->vm_ops = &rus_vmops; return 0; @@ -1713,6 +1715,75 @@ out: return error; } +#ifdef MCCTRL_KSYM_walk_page_range +static void +(*mcctrl_walk_page_range)(unsigned long addr, unsigned long end, struct mm_walk *walk) +#if MCCTRL_KSYM_walk_page_range + = (void *)MCCTRL_KSYM_walk_page_range; +#else + = &walk_page_range; +#endif +#endif + +static int mywalk(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) +{ + unsigned long pfn; + struct page *page; + + if (pte == NULL) { + kprintf("mywalk: ptr(%p)\n", pte); + return 0; + } + pfn = pte_pfn(*pte); + page = pfn_to_page(pfn); + if (page == NULL) { + kprintf("mywalk: pte(%p) page is null\n", pte); + return 0; + } + if (PageLocked(page)) { + kprintf("mywalk: MLOCK (%p)\n", (void*) addr); + } + if (addr > 0x700000 && addr < 0x705000) { + kprintf("mywalk: %p(%lx)\n", (void*) addr, page->flags); + } + return 0; +} + +static long pager_req_mlock_list(ihk_os_t os, unsigned long start, + unsigned long end, void *addr, int nent) +{ + struct addrpair { + unsigned long start; + unsigned long end; + unsigned long flag; + } *addrpair = (struct addrpair *) addr; + int cnt = 0; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + kprintf("pager_req_mlock_list: addr(%p)\n", addr); + vma = find_vma(current->mm, 0x7010a0); + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + if (vma->vm_start < start || vma->vm_start > end) continue; + kprintf("\t%p: %p -- %p\t%lx\n", vma, + (void*)vma->vm_start, (void*)vma->vm_end, + vma->vm_flags & VM_LOCKED); + if (vma->vm_flags & VM_LOCKED) { + kprintf("\t locked\n"); + if (++cnt >= nent) { /* last entry is a marker */ + addrpair->start = (unsigned long) -1; + goto full; + } + addrpair->start = vma->vm_start; + addrpair->end = vma->vm_end; + addrpair->flag = vma->vm_flags; + addrpair++; + } + } +full: + return cnt; +} + static long pager_call(ihk_os_t os, struct syscall_request *req) { long ret; @@ -1726,6 +1797,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) #define PAGER_REQ_MAP 0x0005 #define PAGER_REQ_PFN 0x0006 #define PAGER_REQ_UNMAP 0x0007 +#define PAGER_REQ_MLOCK_LIST 0x0008 case PAGER_REQ_CREATE: ret = pager_req_create(os, req->args[1], req->args[2]); break; @@ -1754,7 +1826,11 @@ static long pager_call(ihk_os_t os, struct syscall_request *req) case PAGER_REQ_UNMAP: ret = pager_req_unmap(os, req->args[1]); break; - + case PAGER_REQ_MLOCK_LIST: + ret = pager_req_mlock_list(os, (unsigned long) req->args[1], + (unsigned long) req->args[2], + (void*) req->args[3], (int) req->args[4]); + break; default: ret = -ENOSYS; printk("pager_call(%#lx):unknown req %ld\n", req->args[0], ret); diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index c1a5a885..658ae443 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -1,22 +1,33 @@ CC=@CC@ +MCC=mpicc BINDIR=@BINDIR@ +SBINDIR=@SBINDIR@ prefix=@prefix@ exec_prefix=@exec_prefix@ LIBDIR=@libdir@ +MCKERNEL_INCDIR=@MCKERNEL_INCDIR@ MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@ KDIR ?= @KDIR@ CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH} +LDFLAGS=@LDFLAGS@ VPATH=@abs_srcdir@ TARGET=mcexec libsched_yield @uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair LIBS=@LIBS@ ARCH=@ARCH@ IHKDIR ?= $(VPATH)/../../../ihk/linux/include/ +MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread +ENABLE_QLMPI=@ENABLE_QLMPI@ + +ifeq ($(ENABLE_QLMPI),yes) + MCEXEC_LIBS += -lmpi + TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so +endif all: $(TARGET) mcexec: mcexec.c libmcexec.a - $(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -DLIBDIR=\"$(LIBDIR)\" -fPIE -pie -L. -lmcexec -lrt -lnuma -pthread -o $@ $^ $(EXTRA_OBJS) + $(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) $(LDFLAGS) -DLIBDIR=\"$(LIBDIR)\" -fPIE -pie -L. $(MCEXEC_LIBS) -o $@ $^ $(EXTRA_OBJS) eclair: eclair.c $(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS) @@ -27,6 +38,27 @@ libsched_yield: libsched_yield.c libmcexec.a:: (cd arch/${ARCH}; make) +libqlmpi.so: qlmpilib.c + $(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $< + +libqlfort.so: libqlfort.c + $(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $< -ldl + +ql_server: ql_server.c + $(CC) $(CFLAGS) -o $@ $^ + +ql_mpiexec_start: ql_mpiexec_start.o md5.o + $(CC) $^ $(CFLAGS) -pthread -o $@ + +ql_mpiexec_finalize.o: ql_mpiexec_start.c + $(CC) $(CFLAGS) -DQL_MPIEXEC_FINALIZE -c -o $@ $< + +ql_mpiexec_finalize: ql_mpiexec_finalize.o md5.o + $(CC) $^ $(CFLAGS) -pthread -o $@ + +ql_talker: ql_talker.o + $(CC) $^ $(CFLAGS) -o $@ + clean:: (cd arch/${ARCH}; make clean) $(RM) $(TARGET) *.o @@ -39,5 +71,13 @@ install:: install -m 755 mcexec $(BINDIR) mkdir -p -m 755 $(MCKERNEL_LIBDIR) install -m 755 libsched_yield.so.1.0.0 $(MCKERNEL_LIBDIR) +ifeq ($(ENABLE_QLMPI),yes) + install -m 644 ../include/qlmpilib.h $(MCKERNEL_INCDIR) + install -m 755 libqlmpi.so $(MCKERNEL_LIBDIR) + install -m 755 libqlfort.so $(MCKERNEL_LIBDIR) + install -m 755 ql_server $(SBINDIR) + install -m 755 ql_mpiexec_start $(BINDIR) + install -m 755 ql_mpiexec_finalize $(BINDIR) + install -m 755 ql_talker $(SBINDIR) +endif @uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR) - diff --git a/executer/user/libqlfort.c b/executer/user/libqlfort.c new file mode 100644 index 00000000..2f822c8b --- /dev/null +++ b/executer/user/libqlfort.c @@ -0,0 +1,101 @@ +#define _GNU_SOURCE +#include +#include +#include +#include + +static int *mck_ql_argc; +static char ***mck_ql_argv; +static int (*intel_iargc)(); +static int (*intel_getarg)(int *, char *, int, int); +static int (*gfortran_iargc)(); +static int (*gfortran_getarg)(int *, char *, int); +static void (*mpi_init)(int *); +static int dl_init_flag; + +static inline void +init() +{ + if (dl_init_flag) + return; + + mck_ql_argc = dlsym(RTLD_NEXT, "mck_ql_argc"); + mck_ql_argv = dlsym(RTLD_NEXT, "mck_ql_argv"); + intel_iargc = dlsym(RTLD_NEXT, "for_iargc"); + intel_getarg = dlsym(RTLD_NEXT, "for_getarg"); + gfortran_iargc = dlsym(RTLD_NEXT, "_gfortran_iargc"); + gfortran_getarg = dlsym(RTLD_NEXT, "_gfortran_getarg_i4"); + mpi_init = dlsym(RTLD_NEXT, "mpi_init_"); + dl_init_flag = 1; +} + +// for GNU Fortran +int +_gfortran_iargc() +{ + init(); + + if (mck_ql_argc && mck_ql_argv && *mck_ql_argv) + return *mck_ql_argc - 1; + if (gfortran_iargc) + return gfortran_iargc(); + return 0; +} + +void +_gfortran_getarg_i4(int *n, char *arg, int arg_len) +{ + int l; + + init(); + if (mck_ql_argc && mck_ql_argv && *mck_ql_argv) { + memset(arg, ' ', arg_len); + if (*n < 0 || *n > *mck_ql_argc) + return; + l = strlen((*mck_ql_argv)[*n]); + if (l > arg_len) + l = arg_len; + strncpy(arg, (*mck_ql_argv)[*n], l); + return; + } + if (gfortran_getarg) { + gfortran_getarg(n, arg, arg_len); + return; + } + return; +} + +// for Intel Fortran +int +for_iargc() +{ + init(); + if (mck_ql_argc && mck_ql_argv && *mck_ql_argv) + return *mck_ql_argc - 1; + if (intel_iargc) + return intel_iargc(); + return 0; +} + +void +for_getarg(int *n, char *arg, int dmy1, int arg_len) +{ + int l; + + init(); + if (mck_ql_argc && mck_ql_argv && *mck_ql_argv) { + memset(arg, ' ', arg_len); + if (*n < 0 || *n > *mck_ql_argc) + return; + l = strlen((*mck_ql_argv)[*n]); + if (l > arg_len) + l = arg_len; + strncpy(arg, (*mck_ql_argv)[*n], l); + return; + } + if (intel_getarg) { + intel_getarg(n, arg, dmy1, arg_len); + return; + } + return; +} diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index b447b530..8c1ce0e6 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -73,7 +73,12 @@ #include "../../config.h" #include #include +#include #include +#include +#include +#include "../include/pmi.h" +#include "../include/qlmpi.h" //#define DEBUG @@ -1568,6 +1573,37 @@ opendev() return fd; } +static void ld_preload_init() +{ + char envbuf[PATH_MAX]; + char *old_ld_preload; + + if (disable_sched_yield) { + sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR); + __dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path); + if (setenv("LD_PRELOAD", envbuf, 1) < 0) { + printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n", + __FUNCTION__); + } + } + /* Set LD_PRELOAD to McKernel specific value */ + else if (getenv(ld_preload_envname)) { + if (setenv("LD_PRELOAD", getenv(ld_preload_envname), 1) < 0) { + printf("%s: warning: failed to set LD_PRELOAD environment variable\n", + __FUNCTION__); + } + unsetenv(ld_preload_envname); + } + +#ifdef ENABLE_QLMPI + sprintf(envbuf, "%s/libqlfort.so", MCKERNEL_LIBDIR); + if ((old_ld_preload = getenv("LD_PRELOAD"))) { + sprintf(strchr(envbuf, '\0'), " %s", old_ld_preload); + } + setenv("LD_PRELOAD", envbuf, 1); +#endif +} + int main(int argc, char **argv) { int ret = 0; @@ -1683,24 +1719,7 @@ int main(int argc, char **argv) if (opendev() == -1) exit(EXIT_FAILURE); - if (disable_sched_yield) { - char sched_yield_lib_path[PATH_MAX]; - sprintf(sched_yield_lib_path, "%s/libsched_yield.so.1.0.0", - MCKERNEL_LIBDIR); - __dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path); - if (setenv("LD_PRELOAD", sched_yield_lib_path, 1) < 0) { - printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n", - __FUNCTION__); - } - } - /* Set LD_PRELOAD to McKernel specific value */ - else if (getenv(ld_preload_envname)) { - if (setenv("LD_PRELOAD", getenv(ld_preload_envname), 1) < 0) { - printf("%s: warning: failed to set LD_PRELOAD environment variable\n", - __FUNCTION__); - } - unsetenv(ld_preload_envname); - } + ld_preload_init(); /* Collect environment variables */ envs_len = flatten_strings(-1, NULL, environ, &envs); @@ -3416,6 +3435,194 @@ return_execve2: } do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; + case 801: {// swapout +#ifdef ENABLE_QLMPI + int rc; + int spawned; + int rank; + int ql_fd = -1; + int len; + struct sockaddr_un unix_addr; + char msg_buf[QL_BUF_MAX]; + char *ql_name; + + rc = PMI_Init(&spawned); + if (rc != 0) { + fprintf(stderr, "swapout(): ERROR: failed to init PMI\n"); + ret = -1; + goto return_swapout; + } + rc = PMI_Get_rank(&rank); + if (rc != 0) { + fprintf(stderr, "swapout(): ERROR: failed to get Rank\n"); + ret = -1; + goto return_swapout; + } + + // swap synchronization + rc = PMI_Barrier(); + + if (rank == 0) { + // tell ql_server what calculation is done. + ql_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (ql_fd < 0) { + fprintf(stderr, "swapout(): ERROR: failed to open socket\n"); + ret = -1; + goto return_swapout; + } + + unix_addr.sun_family = AF_UNIX; + strcpy(unix_addr.sun_path, getenv("QL_SOCKET_FILE")); + len = sizeof(unix_addr.sun_family) + strlen(unix_addr.sun_path) + 1; + rc = connect(ql_fd, (struct sockaddr*)&unix_addr, len); + if (rc < 0) { + fprintf(stderr, "swapout(): ERROR: failed to connect ql_server\n"); + ret = -1; + goto return_swapout; + } + + ql_name = getenv(QL_NAME); + sprintf(msg_buf, "%c %04x %s", + QL_EXEC_END, (unsigned int)strlen(ql_name), ql_name); + rc = send(ql_fd, msg_buf, strlen(msg_buf) + 1, 0); + if (rc < 0) { + fprintf(stderr, "swapout(): ERROR: failed to send QL_EXEC_END\n"); + ret = -1; + goto return_swapout; + } + + // wait resume-req from ql_server. +#ifdef QL_DEBUG + fprintf(stdout, "INFO: waiting resume-req ...\n"); +#endif + rc = recv(ql_fd, msg_buf, strlen(msg_buf) + 1, 0); + + if (rc < 0) { + fprintf(stderr, "swapout(): ERROR: failed to recieve\n"); + ret = -1; + goto return_swapout; + } + + // parse message + if (msg_buf[0] == QL_RET_RESUME) { +#ifdef QL_DEBUG + fprintf(stdout, "INFO: recieved resume-req\n"); +#endif + } + else { + fprintf(stderr, "swapout(): ERROR: recieved unexpected requsest from ql_server\n"); + ret = -1; + goto return_swapout; + } + + // resume-req synchronization + rc = PMI_Barrier(); + } + else { + // resume-req synchronization + rc = PMI_Barrier(); + } + + ret = 0; + +return_swapout: + if (ql_fd >= 0) { + close(ql_fd); + } + + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); +#else + printf("mcexec has not been compiled with ENABLE_QLMPI\n"); + ret = -1; + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); +#endif // ENABLE_QLMPI + break; + } + case 802: /* debugging purpose */ + printf("linux mlock(%p, %ld)\n", + (void *)w.sr.args[0], w.sr.args[1]); + printf("str(%p)=%s", (void*)w.sr.args[0], (char*)w.sr.args[0]); + ret = mlock((void *)w.sr.args[0], w.sr.args[1]); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + +#ifndef ARG_MAX +#define ARG_MAX 256 +#endif + case 811: { // linux_spawn + int rc, i; + pid_t pid; + size_t slen; + char *exec_path = NULL; + char* argv[ARG_MAX]; + char** spawn_args = (char**)w.sr.args[1]; + + if (!w.sr.args[0] || ! spawn_args) { + fprintf(stderr, "linux_spawn(): ERROR: invalid argument \n"); + ret = -1; + goto return_linux_spawn; + } + + // copy exec_path + slen = strlen((char*)w.sr.args[0]) + 1; + if (slen <= 0 || slen >= PATH_MAX) { + fprintf(stderr, "linux_spawn(): ERROR: invalid exec_path \n"); + ret = -1; + goto return_linux_spawn; + } + exec_path = malloc(slen); + if (!exec_path) { + fprintf(stderr, "linux_spawn(): ERROR: failed to allocating exec_path\n"); + ret = -1; + goto return_linux_spawn; + } + memset(exec_path, '\0', slen); + + rc = do_strncpy_from_user(fd, exec_path, (void *)w.sr.args[0], slen); + if (rc < 0) { + fprintf(stderr, "linux_spawn(): ERROR: failed to strncpy from user\n"); + ret = -1; + goto return_linux_spawn; + } + + // copy args to argv[] + for (i = 0; spawn_args[i] != NULL; i++) { + slen = strlen(spawn_args[i]) + 1; + argv[i] = malloc(slen); + if (!argv[i]) { + fprintf(stderr, "linux_spawn(): ERROR: failed to allocating argv[%d]\n", i); + ret = -1; + goto return_linux_spawn; + } + memset(argv[i], '\0', slen); + rc = do_strncpy_from_user(fd, argv[i], spawn_args[i], slen); + if (rc < 0) { + fprintf(stderr, "linux_spawn(): ERROR: failed to strncpy from user\n"); + ret = -1; + goto return_linux_spawn; + } + } + + rc = posix_spawn(&pid, exec_path, NULL, NULL, argv, NULL); + if (rc != 0) { + fprintf(stderr, "linux_spawn(): ERROR: posix_spawn returned %d\n", rc); + ret = -1; + goto return_linux_spawn; + } + + ret = 0; +return_linux_spawn: + // free allocated memory + if (exec_path) { + free(exec_path); + } + for (i = 0; argv[i] != NULL; i++) { + free(argv[i]); + } + + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } default: ret = do_generic_syscall(&w); diff --git a/executer/user/md5.c b/executer/user/md5.c new file mode 100644 index 00000000..d7ae49bf --- /dev/null +++ b/executer/user/md5.c @@ -0,0 +1,381 @@ +/* + Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + L. Peter Deutsch + ghost@aladdin.com + + */ +/* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */ +/* + Independent implementation of MD5 (RFC 1321). + + This code implements the MD5 Algorithm defined in RFC 1321, whose + text is available at + http://www.ietf.org/rfc/rfc1321.txt + The code is derived from the text of the RFC, including the test suite + (section A.5) but excluding the rest of Appendix A. It does not include + any code or documentation that is identified in the RFC as being + copyrighted. + + The original and principal author of md5.c is L. Peter Deutsch + . Other authors are noted in the change history + that follows (in reverse chronological order): + + 2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order + either statically or dynamically; added missing #include + in library. + 2002-03-11 lpd Corrected argument list for main(), and added int return + type, in test program and T value program. + 2002-02-21 lpd Added missing #include in test program. + 2000-07-03 lpd Patched to eliminate warnings about "constant is + unsigned in ANSI C, signed in traditional"; made test program + self-checking. + 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. + 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5). + 1999-05-03 lpd Original version. + */ + +#include "../include/md5.h" +#include + +#undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */ +#ifdef ARCH_IS_BIG_ENDIAN +# define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1) +#else +# define BYTE_ORDER 0 +#endif + +#define T_MASK ((md5_word_t)~0) +#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) +#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) +#define T3 0x242070db +#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) +#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) +#define T6 0x4787c62a +#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) +#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) +#define T9 0x698098d8 +#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) +#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) +#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) +#define T13 0x6b901122 +#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) +#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) +#define T16 0x49b40821 +#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) +#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) +#define T19 0x265e5a51 +#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) +#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) +#define T22 0x02441453 +#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) +#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) +#define T25 0x21e1cde6 +#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) +#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) +#define T28 0x455a14ed +#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) +#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) +#define T31 0x676f02d9 +#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) +#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) +#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) +#define T35 0x6d9d6122 +#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) +#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) +#define T38 0x4bdecfa9 +#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) +#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) +#define T41 0x289b7ec6 +#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) +#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) +#define T44 0x04881d05 +#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) +#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) +#define T47 0x1fa27cf8 +#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) +#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) +#define T50 0x432aff97 +#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) +#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) +#define T53 0x655b59c3 +#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) +#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) +#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) +#define T57 0x6fa87e4f +#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) +#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) +#define T60 0x4e0811a1 +#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) +#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) +#define T63 0x2ad7d2bb +#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) + + +static void +md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/) +{ + md5_word_t + a = pms->abcd[0], b = pms->abcd[1], + c = pms->abcd[2], d = pms->abcd[3]; + md5_word_t t; +#if BYTE_ORDER > 0 + /* Define storage only for big-endian CPUs. */ + md5_word_t X[16]; +#else + /* Define storage for little-endian or both types of CPUs. */ + md5_word_t xbuf[16]; + const md5_word_t *X; +#endif + + { +#if BYTE_ORDER == 0 + /* + * Determine dynamically whether this is a big-endian or + * little-endian machine, since we can use a more efficient + * algorithm on the latter. + */ + static const int w = 1; + + if (*((const md5_byte_t *)&w)) /* dynamic little-endian */ +#endif +#if BYTE_ORDER <= 0 /* little-endian */ + { + /* + * On little-endian machines, we can process properly aligned + * data without copying it. + */ + if (!((data - (const md5_byte_t *)0) & 3)) { + /* data are properly aligned */ + X = (const md5_word_t *)data; + } else { + /* not aligned */ + memcpy(xbuf, data, 64); + X = xbuf; + } + } +#endif +#if BYTE_ORDER == 0 + else /* dynamic big-endian */ +#endif +#if BYTE_ORDER >= 0 /* big-endian */ + { + /* + * On big-endian machines, we must arrange the bytes in the + * right order. + */ + const md5_byte_t *xp = data; + int i; + +# if BYTE_ORDER == 0 + X = xbuf; /* (dynamic only) */ +# else +# define xbuf X /* (static only) */ +# endif + for (i = 0; i < 16; ++i, xp += 4) + xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); + } +#endif + } + +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + + /* Round 1. */ + /* Let [abcd k s i] denote the operation + a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ +#define F(x, y, z) (((x) & (y)) | (~(x) & (z))) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + F(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 7, T1); + SET(d, a, b, c, 1, 12, T2); + SET(c, d, a, b, 2, 17, T3); + SET(b, c, d, a, 3, 22, T4); + SET(a, b, c, d, 4, 7, T5); + SET(d, a, b, c, 5, 12, T6); + SET(c, d, a, b, 6, 17, T7); + SET(b, c, d, a, 7, 22, T8); + SET(a, b, c, d, 8, 7, T9); + SET(d, a, b, c, 9, 12, T10); + SET(c, d, a, b, 10, 17, T11); + SET(b, c, d, a, 11, 22, T12); + SET(a, b, c, d, 12, 7, T13); + SET(d, a, b, c, 13, 12, T14); + SET(c, d, a, b, 14, 17, T15); + SET(b, c, d, a, 15, 22, T16); +#undef SET + + /* Round 2. */ + /* Let [abcd k s i] denote the operation + a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ +#define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + G(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 1, 5, T17); + SET(d, a, b, c, 6, 9, T18); + SET(c, d, a, b, 11, 14, T19); + SET(b, c, d, a, 0, 20, T20); + SET(a, b, c, d, 5, 5, T21); + SET(d, a, b, c, 10, 9, T22); + SET(c, d, a, b, 15, 14, T23); + SET(b, c, d, a, 4, 20, T24); + SET(a, b, c, d, 9, 5, T25); + SET(d, a, b, c, 14, 9, T26); + SET(c, d, a, b, 3, 14, T27); + SET(b, c, d, a, 8, 20, T28); + SET(a, b, c, d, 13, 5, T29); + SET(d, a, b, c, 2, 9, T30); + SET(c, d, a, b, 7, 14, T31); + SET(b, c, d, a, 12, 20, T32); +#undef SET + + /* Round 3. */ + /* Let [abcd k s t] denote the operation + a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + H(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 5, 4, T33); + SET(d, a, b, c, 8, 11, T34); + SET(c, d, a, b, 11, 16, T35); + SET(b, c, d, a, 14, 23, T36); + SET(a, b, c, d, 1, 4, T37); + SET(d, a, b, c, 4, 11, T38); + SET(c, d, a, b, 7, 16, T39); + SET(b, c, d, a, 10, 23, T40); + SET(a, b, c, d, 13, 4, T41); + SET(d, a, b, c, 0, 11, T42); + SET(c, d, a, b, 3, 16, T43); + SET(b, c, d, a, 6, 23, T44); + SET(a, b, c, d, 9, 4, T45); + SET(d, a, b, c, 12, 11, T46); + SET(c, d, a, b, 15, 16, T47); + SET(b, c, d, a, 2, 23, T48); +#undef SET + + /* Round 4. */ + /* Let [abcd k s t] denote the operation + a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ +#define I(x, y, z) ((y) ^ ((x) | ~(z))) +#define SET(a, b, c, d, k, s, Ti)\ + t = a + I(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 6, T49); + SET(d, a, b, c, 7, 10, T50); + SET(c, d, a, b, 14, 15, T51); + SET(b, c, d, a, 5, 21, T52); + SET(a, b, c, d, 12, 6, T53); + SET(d, a, b, c, 3, 10, T54); + SET(c, d, a, b, 10, 15, T55); + SET(b, c, d, a, 1, 21, T56); + SET(a, b, c, d, 8, 6, T57); + SET(d, a, b, c, 15, 10, T58); + SET(c, d, a, b, 6, 15, T59); + SET(b, c, d, a, 13, 21, T60); + SET(a, b, c, d, 4, 6, T61); + SET(d, a, b, c, 11, 10, T62); + SET(c, d, a, b, 2, 15, T63); + SET(b, c, d, a, 9, 21, T64); +#undef SET + + /* Then perform the following additions. (That is increment each + of the four registers by the value it had before this block + was started.) */ + pms->abcd[0] += a; + pms->abcd[1] += b; + pms->abcd[2] += c; + pms->abcd[3] += d; +} + +void +md5_init(md5_state_t *pms) +{ + pms->count[0] = pms->count[1] = 0; + pms->abcd[0] = 0x67452301; + pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; + pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; + pms->abcd[3] = 0x10325476; +} + +void +md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes) +{ + const md5_byte_t *p = data; + int left = nbytes; + int offset = (pms->count[0] >> 3) & 63; + md5_word_t nbits = (md5_word_t)(nbytes << 3); + + if (nbytes <= 0) + return; + + /* Update the message length. */ + pms->count[1] += nbytes >> 29; + pms->count[0] += nbits; + if (pms->count[0] < nbits) + pms->count[1]++; + + /* Process an initial partial block. */ + if (offset) { + int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); + + memcpy(pms->buf + offset, p, copy); + if (offset + copy < 64) + return; + p += copy; + left -= copy; + md5_process(pms, pms->buf); + } + + /* Process full blocks. */ + for (; left >= 64; p += 64, left -= 64) + md5_process(pms, p); + + /* Process a final partial block. */ + if (left) + memcpy(pms->buf, p, left); +} + +void +md5_finish(md5_state_t *pms, md5_byte_t digest[16]) +{ + static const md5_byte_t pad[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + md5_byte_t data[8]; + int i; + + /* Save the length before padding. */ + for (i = 0; i < 8; ++i) + data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3)); + /* Pad to 56 bytes mod 64. */ + md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1); + /* Append the length. */ + md5_append(pms, data, 8); + for (i = 0; i < 16; ++i) + digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3)); +} diff --git a/executer/user/ql_mpiexec_start.c b/executer/user/ql_mpiexec_start.c new file mode 100644 index 00000000..f60a9f69 --- /dev/null +++ b/executer/user/ql_mpiexec_start.c @@ -0,0 +1,1073 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../config.h" + +#include "../include/qlmpi.h" +#include "../include/md5.h" + +#define MCEXEC "mcexec" +#define QL_PIPE_PATH "/tmp/" +#define QL_PIPE_IN_EXTENTION ".in" +#define QL_PIPE_OUT_EXTENTION ".out" +#define QL_SERVER_EXECUTION SBINDIR "/ql_server" +#define QL_TALKER_EXECUTION SBINDIR "/ql_talker" + +extern char **environ; + +struct mpi_opt { + const char *opt; + int n; + int flags; +#define HOSTFILE_OPT 1 +#define HOSTLIST_OPT 2 +#define NODE_OPT 4 +#define UNSUPP_OPT 8 +#define ENVLIST_OPT 16 +#define ENVNONE_OPT 32 +#define ENVALL_OPT 64 +#define ENV_OPT 128 +}; + +const struct mpi_opt opts[] = { + {"help", 0, 0}, + {"h", 0, 0}, + {"genv", -1, ENV_OPT}, + {"genvlist", 1, ENVLIST_OPT}, + {"genvnone", 0, ENVNONE_OPT}, + {"genvall", 0, ENVALL_OPT}, + {"f", 1, HOSTFILE_OPT}, + {"hostfile", 1, HOSTFILE_OPT}, + {"machinefile", 1, HOSTFILE_OPT}, + {"machine", 1, HOSTLIST_OPT}, + {"machines", 1, HOSTLIST_OPT}, + {"machinelist", 1, HOSTLIST_OPT}, + {"host", 1, HOSTLIST_OPT}, + {"hosts", 1, HOSTLIST_OPT}, + {"hostlist", 1, HOSTLIST_OPT}, + {"ppn", 1, 0}, + {"profile", 0, 0}, + {"prepend-rank", 0, 0}, + {"l", 0, 0}, + {"prepend-pattern", 1, 0}, + {"outfile-pattern", 1, 0}, + {"outfile", 1, 0}, + {"errfile-pattern", 1, 0}, + {"errfile", 1, 0}, + {"wdir", 1, 0}, + {"configfile", 1, 0}, + {"env", -1, ENV_OPT}, + {"envlist", 1, ENVLIST_OPT}, + {"envnone", 0, ENVNONE_OPT}, + {"envall", 0, ENVALL_OPT}, + {"n", 1, NODE_OPT}, + {"np", 1, NODE_OPT}, + {"launcher", 1, 0}, + {"launcher-exec", 1, 0}, + {"bootstrap", 1, 0}, + {"bootstrap-exec", 1, 0}, + {"enable-x", 0, 0}, + {"disable-x", 0, 0}, + {"rmk", 1, 0}, + {"bind-to", 1, 0}, + {"binding", 1, 0}, + {"map-by", 1, 0}, + {"membind", 1, 0}, + {"topolib", 1, 0}, + {"ckpoint-interval", 1, 0}, + {"ckpoint-prefix", 1, 0}, + {"ckpoint-num", 1, 0}, + {"ckpointlib", 1, 0}, + {"demux", 1, 0}, + {"verbose", 0, 0}, + {"v", 0, 0}, + {"debug", 0, 0}, + {"info", 0, 0}, + {"version", 0, 0}, + {"print-all-exitcodes", 0, 0}, + {"iface", 1, 0}, + {"nameserver", 1, 0}, + {"disable-auto-cleanup", 0, 0}, + {"dac", 0, 0}, + {"enable-auto-cleanup", 0, 0}, + {"disable-hostname-propagation", 0, 0}, + {"enable-hostname-propagation", 0, 0}, + {"order-nodes", 1, 0}, + {"localhost", 1, 0}, + {"usize", 1, 0}, + {NULL, 0, 0} +}; + +char **mpi_opt_top; +char **usr_opt_top; + +int fdstdin = -1; +int fdstdout = -1; +int fdstderr = -1; +char ql_name[33] = ""; +char ql_sock_file[1024] = ""; +char target_host[256] = ""; +struct sockaddr_un wsock; + +char * +trim(char *buf) +{ + char *p; + char *q; + + for(p = buf; *p && (isspace(*p)); p++); + if(!*p) + return p; + for(q = strchr(p, '\0') - 1; isspace(*q); q--) + *q = '\0'; + return p; +} + +void +esc_put(FILE *fp, char type, const char *buf) +{ + const char *t; + + fprintf(fp, "%c %ld ", type, strlen(buf)); + for (t = buf; *t; t++) { + if (*t == '%' || *t < ' ') + fprintf(fp, "%%%02x", *t); + else + fputc(*t, fp); + } + fputc('\n', fp); +} + +static void +ql_setenv(char **env, char *k, char *v) +{ + char **e; + char *w; + char *t; + int l; + + if (!*k || *k == '=') + return; + + l = strlen(k); + if (v) + l += strlen(v) + 1; + else if (!strchr(k, '=')) + l++; + w = malloc(l + 1); + strcpy(w, k); + if (!(t = strchr(w, '='))) + strcat(w, "="); + else + t[1] = '\0'; + l = strlen(w); + for (e = env; *e; e++) + if (!strncmp(w, *e, l)) + break; + if (v) + strcat(w, v); + else if (t) + strcpy(w, k); + if (!*e) + e[1] = NULL; + *e = w; +} + +static void +ql_envlist(char **env, char *list) +{ + char *w = strdup(list); + char *p = w; + + for (;;) { + char *q = strchr(p, ','); + + if (q) { + *q = '\0'; + ql_setenv(env, p, NULL); + p = q + 1; + } + else { + ql_setenv(env, p, NULL); + break; + } + } + + free(w); +} + +static int +sendfd(int sock, int fd) +{ + struct msghdr msg; + struct iovec iov; + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg = (struct cmsghdr*)cmsgbuf; + char c; + + iov.iov_base = &c; + iov.iov_len = 1; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + *((int *)CMSG_DATA(cmsg)) = fd; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + msg.msg_flags = 0; + if (sendmsg(sock, &msg, 0) == -1) { + return -1; + } + + return 0; +} + +#ifndef QL_MPIEXEC_FINALIZE +static int +recvfd(int sock) +{ + struct msghdr msg; + struct iovec iov; + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg = (struct cmsghdr *)cmsgbuf; + char c; + + iov.iov_base = &c; + iov.iov_len = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + msg.msg_flags = MSG_WAITALL; + if (recvmsg(sock, &msg, 0) == -1) { + return -1; + } + + return *(int *)CMSG_DATA(cmsg); +} + +int eventfds[2]; + +static void +chld(int s) +{ + write(eventfds[1], "Q", 1); +} + +static void +term_server() +{ + char buf[1024]; + + sprintf(buf,"ssh %s %s %c %s %s %s", target_host, QL_TALKER_EXECUTION, + QL_RET_FINAL, "-n", ql_name ,ql_sock_file); + system(buf); +} + +#define RBUFSIZE 65536 +struct rbuf { + int rpos; + int wpos; + char buf[RBUFSIZE]; +}; + +struct rbuf * +buf_new() +{ + struct rbuf *rbuf = malloc(sizeof(struct rbuf)); + + rbuf->rpos = 0; + rbuf->wpos = 0; + return rbuf; +} + +int +buf_empty(struct rbuf *bp) +{ + return bp->rpos == bp->wpos; +} + +int +buf_full(struct rbuf *bp) +{ + if (bp->wpos) + return bp->rpos == bp->wpos - 1; + return bp->rpos == RBUFSIZE - 1; +} + +int +buf_read(int fd, struct rbuf *bp) +{ + struct iovec iov[2]; + int iovlen = 1; + int rc; + + iov[0].iov_base = bp->buf + bp->rpos; + if (bp->rpos >= bp->wpos) { + if (bp->wpos == 0) + iov[0].iov_len = RBUFSIZE - bp->rpos - 1; + else { + iovlen = 2; + iov[0].iov_len = RBUFSIZE - bp->rpos; + iov[1].iov_base = bp->buf; + iov[1].iov_len = bp->wpos - 1; + } + } + else + iov[0].iov_len = bp->wpos - bp->rpos - 1; + rc = readv(fd, iov, iovlen); + if (rc <= 0) + return rc; + bp->rpos += rc; + if (bp->rpos >= RBUFSIZE) + bp->rpos -= RBUFSIZE; + return rc; +} + +int +buf_write(int fd, struct rbuf *bp) +{ + struct iovec iov[2]; + int iovlen = 1; + int rc; + + iov[0].iov_base = bp->buf + bp->wpos; + if (bp->wpos > bp->rpos) { + iov[0].iov_len = sizeof(bp->buf) - bp->wpos; + iov[1].iov_base = bp->buf; + if ((iov[1].iov_len = bp->rpos)) + iovlen = 2; + } + else + iov[0].iov_len = bp->rpos - bp->wpos; + rc = writev(fd, iov, iovlen); + if (rc <= 0) + return rc; + bp->wpos += rc; + if (bp->wpos >= RBUFSIZE) + bp->wpos -= RBUFSIZE; + return rc; +} + +struct fds { + struct fds *next; + int in_fd; + int out_fd; + struct rbuf *buf; +}; + +struct fds * +fds_new(int in_fd, int out_fd) +{ + struct fds *fdp; + + fdp = malloc(sizeof(struct fds)); + + fdp->next = NULL; + fdp->buf = buf_new(); + fdp->in_fd = in_fd; + fdp->out_fd = out_fd; + return fdp; +} + +static void +ql_wrapper(char **args, int afd) +{ + pid_t pid; + char c; + int pfds0[2]; + int pfds1[2]; + int pfds2[2]; + int pfds3[2]; + int rc; + int maxfd = afd; + fd_set readfds; + fd_set writefds; + int first = 1; + int cfd = -1; + int nflg = 0; + struct fds *fds0; + struct fds *fds1; + struct fds *fds2; + struct fds *fdtop; + struct fds *fdp; + int exitcode = 1; + + pipe(eventfds); + if (eventfds[0] > maxfd) + maxfd = eventfds[0]; + + pipe(pfds0); + pipe(pfds1); + pipe(pfds2); + + fds0 = fds_new(-1, pfds0[1]); + fds1 = fds_new(pfds1[0], -1); + fds2 = fds_new(pfds2[0], -1); + fdtop = fds0; + fds0->next = fds1; + fds1->next = fds2; + + socketpair(AF_UNIX, SOCK_STREAM, 0, pfds3); + fcntl(pfds3[1], F_SETFD, FD_CLOEXEC); + pid = fork(); + if (pid == 0) { + close(afd); + close(pfds0[1]); + if (pfds0[0] != 0) { + dup2(pfds0[0], 0); + close(pfds0[0]); + } + close(pfds1[0]); + if (pfds1[1] != 1) { + dup2(pfds1[1], 1); + close(pfds1[1]); + } + close(pfds2[0]); + if (pfds2[1] != 1) { + dup2(pfds2[1], 2); + close(pfds2[1]); + } + close(pfds3[0]); + + // wait for client + while ((rc = read(pfds3[1], &c, 1)) == -1 && errno == EINTR); + if (rc != 1) //client is already terminated + exit(0); + execvp("mpiexec", args); + + // exec fail + rc = errno; + write(pfds3[1], &rc, sizeof rc); + exit(1); + } + close(pfds0[0]); + close(pfds1[1]); + close(pfds2[1]); + close(pfds3[1]); + if (pfds0[1] > maxfd) + maxfd = pfds0[1]; + if (pfds1[0] > maxfd) + maxfd = pfds1[0]; + if (pfds2[0] > maxfd) + maxfd = pfds2[0]; + + signal(SIGPIPE, SIG_IGN); + signal(SIGCHLD, chld); + + for (;;) { + if (afd == -1 && + (fds1->out_fd == -1 || buf_empty(fds1->buf)) && + (fds2->out_fd == -1 || buf_empty(fds2->buf))) + break; + FD_ZERO(&readfds); + FD_ZERO(&writefds); + FD_SET(eventfds[0], &readfds); + if (cfd == -1) + FD_SET(afd, &readfds); + else + FD_SET(cfd, &readfds); + + for (fdp = fdtop; fdp; fdp = fdp->next) { + if (fdp->out_fd != -1 && !buf_empty(fdp->buf)) + FD_SET(fdp->out_fd, &writefds); + if (fdp->in_fd != -1 && !buf_full(fdp->buf)) + FD_SET(fdp->in_fd, &readfds); + } + + rc = select(maxfd + 1, &readfds, &writefds, NULL, NULL); + if (rc == -1 && errno == EINTR) + continue; + if (rc == 0) + continue; + + if (FD_ISSET(eventfds[0], &readfds)) { // child die + int c; + int st; + + read(eventfds[0], &c, 1); + while (waitpid(pid, &st, 0) == -1 && errno == EINTR); + term_server(); + if (cfd != -1) { + write(cfd, "X", 1); + write(cfd, &st, 4); + } + exitcode = 0; + close(afd); + afd = -1; + } + + if (FD_ISSET(afd, &readfds)) { + struct sockaddr_un sock; + socklen_t len; + + len = sizeof sock; + cfd = accept(afd, (struct sockaddr *)&sock, &len); + if (cfd == -1) { + goto end; + } + if (cfd > maxfd) + maxfd = cfd; + } + if (FD_ISSET(cfd, &readfds)) { + int fd; + + rc = read(cfd, &c, 1); + if (rc == 0) { + close(cfd); + cfd = -1; + if (!nflg) { // abormally terminated + kill(pid, SIGINT); + term_server(); + close(afd); + afd = -1; + } + } + else if (c == 'C') { + nflg = 0; + } + else if (c == '0') { + fd = recvfd(cfd); + if (fd > maxfd) + maxfd = fd; + fds0->out_fd = fd; + } + else if (c == '1') { + fd = recvfd(cfd); + if (fd > maxfd) + maxfd = fd; + fds1->out_fd = fd; + } + else if (c == '2') { + fd = recvfd(cfd); + if (fd > maxfd) + maxfd = fd; + fds2->out_fd = fd; + if (first) { + char buf[256]; + int e = 0; + + first = 0; + write(pfds3[0], " ", 1); + rc = read(pfds3[0], &e, sizeof e); + if (rc == sizeof e) { // failed to exec + sprintf(buf, "mpiexec: exec(%s)" + "\n", strerror(e)); + write(fd, buf, strlen(buf)); + goto end; + } + else if (rc == -1) { + sprintf(buf, "mpiexec: read(%s)" + "\n", strerror(errno)); + write(fd, buf, strlen(buf)); + } + close(pfds3[0]); + } + } + else if (c == 'E') { + nflg = 1; + } + else if (c == 'F') { + nflg = 1; + } + if (c != 'F') + write(cfd, " ", 1); + } + for (fdp = fdtop; fdp; fdp = fdp->next) { + if (fdp->out_fd != -1 && + FD_ISSET(fdp->out_fd, &writefds)) { + rc = buf_write(fdp->out_fd, fdp->buf); + if (fdp->in_fd == -1 && + buf_empty(fdp->buf)) { + close(fdp->out_fd); + fdp->out_fd = -1; + } + } + if (fdp->in_fd != -1 && + FD_ISSET(fdp->in_fd, &readfds)) { + rc = buf_read(fdp->in_fd, fdp->buf); + if (rc == 0) { + close(fdp->in_fd); + fdp->in_fd = -1; + if (buf_empty(fdp->buf)) { + close(fdp->out_fd); + fdp->out_fd = -1; + } + } + } + } + } +end: + unlink(wsock.sun_path); + exit(exitcode); +} +#endif + +int ql_check_directory( char * path,char * file ,char *filep){ + struct stat st; + int rc; + + sprintf(filep,"%s/%s",path,file); + + rc = stat(filep,&st); + if (rc == 0) { + /* file exist */ + return 1; + } + else { + rc = stat(path,&st); + /* file or directory exist */ + if ( rc == 0) { + mode_t m = st.st_mode; + if (S_ISDIR(m)) { + /* directory exist */ + return 1; + } + return 0; + } + else { + mode_t m = st.st_mode; + if (S_ISDIR(m)) { + /* directory exist */ + return rc; + } + else { + if (mkdir(path, (S_IRUSR | S_IWUSR | S_IRWXU | + S_IRGRP | S_IWGRP | S_IRWXG | + S_IROTH | S_IWOTH | S_IRWXO)) == 0) { + return 1; + } + return 0; /* mkdir error */ + } + } + } +} + + + + + +/* ex: ql_mpiexec_start -machinefile file_name -n 4 mcexec a.out arg1 arg2 */ +// stdin, stdout, stderr +#define PIPE_HANDLE_NUM 3 +int main(int argc, char *argv[]) +{ + char *machinefile = NULL; + char ql_param_file[1024] = ""; /* */ + char ql_sock_path[1024] = ""; + char ql_file[1024] = ""; + char *exe_name =NULL; + char tmp[4096]; + struct stat st; /* for file check */ + int fd; + int i; + md5_state_t state; + md5_byte_t digest[16]; + FILE *fp; + char base[1024]; + char *ptr; + char **a; + char **b; + int rc; + char **env; + int n; + int uid; + char *pt; + int wfd; + socklen_t wlen; + int exitcode = 0; + char c; + +#ifndef QL_MPIEXEC_FINALIZE + int f_flg = 0; +#endif + + for (a = environ, n = 0; *a; a++, n++); + for (a = argv; *a; a++) { + if (!strcmp(*a, "-genv") || + !strcmp(*a, "-env")) { + n++; + } + else if ((!strcmp(*a, "-genvlist") || + !strcmp(*a, "-envlist")) && + a[1]) { + char *t; + + n++; + for (t = a[1]; *a; t++) + if (*t == ',') + n++; + } + } + env = malloc(sizeof(char *) * (n + 2)); + for (a = environ, b = env; (*b = *a); a++, b++); + + md5_init(&state); + mpi_opt_top = argv + 1; + for (a = mpi_opt_top; *a; a++) { + char *opt; + const struct mpi_opt *o; + int i; + + if ((*a)[0] != '-') + break; + opt = (*a) + 1; + for (o = opts; o->opt; o++) { + if (!strcmp(opt, o->opt)) + break; + } + if (!o->opt) { + fprintf(stderr, "unknown option: %s\n", *a); + exit(1); + } + if (o->n < 0) { // -genv, -env + a++; + if (!*a) { + fprintf(stderr, "bad option: -%s\n", o->opt); + exit(1); + } + if (!strchr(*a, '=')) { + char *k = *a; + a++; + if (!*a) { + fprintf(stderr, "bad option: -%s\n", + o->opt); + exit(1); + } + if (o->flags & ENV_OPT) + ql_setenv(env, k, *a); + } + else { + if (o->flags & ENV_OPT) + ql_setenv(env, *a, NULL); + } + } + else { + for (i = 0; i < o->n; i++) { + a++; + if (!*a) { + fprintf(stderr, "bad option: -%s\n", + o->opt); + exit(1); + } + } + if (o->flags & UNSUPP_OPT) { + fprintf(stderr, "unsupported option: " + "-%s\n", o->opt); + exit(1); + } + if (o->flags & HOSTFILE_OPT) + machinefile = *a; + if (o->flags & NODE_OPT) { + md5_append(&state, (const md5_byte_t *)*a, + strlen(*a)); + } + if (o->flags & ENVNONE_OPT) { + env[0] = NULL; + } + if (o->flags & ENVALL_OPT) { + for (a = environ, b = env; (*b = *a); a++, b++); + } + if (o->flags & ENVLIST_OPT) { + ql_envlist(env, *a); + } + } + } + usr_opt_top = a; + if (!*a) { + fprintf(stderr, "no user program\n"); + exit(1); + } + exe_name = *a; + md5_append(&state, (const md5_byte_t *)exe_name, strlen(exe_name)); + + for (; *a; a++) + if (!strcmp(*a, ":")) { + fprintf(stderr, "':' is unsupported\n"); + exit(1); + } + + if (machinefile) { + /* get target_host from -machinefile */ + if (!stat(machinefile, &st)) { /* file exist*/ + char *b; + size_t siz; + FILE *f; + char line[65536]; + + siz = st.st_size; + fd = open(machinefile, O_RDONLY); + b = mmap(NULL, siz, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (b == (void *)-1) { + fprintf(stderr, "unable to read hostfile(%s): %s\n", machinefile, strerror(errno)); + exit(1); + } + md5_append(&state, (const md5_byte_t *)b, siz); + munmap(b, siz); + + if (!(f = fopen(machinefile, "r"))) { + fprintf(stderr, "could not open hostfile(%s): %s\n", machinefile, strerror(errno)); + exit(1); + } + while (fgets(line, sizeof line, f)) { + char *w; + char *t; + if ((w = strchr(line, '#'))) + *w = '\0'; + if ((w = strchr(line, ':'))) + *w = '\0'; + if ((w = strchr(line, ','))) + *w = '\0'; + t = trim(line); + if ((w = strchr(line, ' '))) + *w = '\0'; + if ((w = strchr(line, '\t'))) + *w = '\0'; + if (t[0] == '\0') + continue; + strcpy(target_host, t); + break; + } + fclose(f); +#ifdef QL_DEBUG + printf(" target_host %s\n", target_host); +#endif + } + else { + fprintf(stderr, "-machinefile not exist\n"); + exit(1); + } + } + else { + fprintf(stderr, "specify -machinefile option\n"); + exit(1); + } + + md5_finish(&state, digest); + for (i = 0; i < 16; i++) { + sprintf(ql_name + i * 2, "%02x", digest[i]); + } + if ((ptr = getenv(QL_PARAM_ENV)) == NULL) { + sprintf(base, "%s", getenv("HOME")); + } + else{ + sprintf(base, "%s", ptr); + } + + setenv("QL_NAME", ql_name, 1); + ql_setenv(env, "QL_NAME", ql_name); + + uid = (int)getuid(); + if ((pt = getenv(QL_SOCK_ENV)) != NULL) { + sprintf(ql_sock_path,"%s/%s",pt,QL_SOCK); + } + else { + sprintf(ql_sock_path,"%s/%d/%s",QL_SOCKT_PATH,uid,QL_SOCK); + } + sprintf(ql_file,"%s.%d", QL_SOCK, uid); + + if(!ql_check_directory(ql_sock_path,ql_file,ql_sock_file)) { + fprintf(stderr, "socket directory not exist\n"); + exit(1); + } + + setenv("QL_SOCKET_FILE", ql_sock_file, 1); +#ifdef QL_DEBUG + printf(" socket path %s\n", ql_sock_file); +#endif + +#ifndef QL_MPIEXEC_FINALIZE + sprintf(tmp, "ssh %s ""%s %s %s""", target_host, QL_SERVER_EXECUTION ,ql_sock_path ,ql_file); + +#ifdef QL_DEBUG + printf(" system %s\n", tmp); +#endif + if((rc = system(tmp)) == -1){ + fprintf(stderr, "ql_server not execution %s", strerror(errno)); + exit(-1); + } +#endif + + memset(&wsock, '\0', sizeof wsock); + wsock.sun_family = AF_UNIX; + sprintf(wsock.sun_path, "%s/%s.s", ql_sock_path, ql_name); + wlen = sizeof wsock.sun_family + strlen(wsock.sun_path) + 1; + + if(stat(wsock.sun_path, &st)){ /* socket file not exist */ +#ifdef QL_MPIEXEC_FINALIZE + fprintf(stderr,"not found mpi process\n"); + exit(1); +#else + pid_t pid; + int wst; + + f_flg = 1; + + if ((wfd = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) { + fprintf(stderr, "ql_mpiexec_start: socket(%s)\n", + strerror(errno)); + exit(1); + } + if (bind(wfd, (struct sockaddr *)&wsock, wlen) == -1) { + fprintf(stderr, "ql_mpiexec_start: bind(%s)\n", + strerror(errno)); + exit(1); + } + if (listen(wfd, 5) == -1) { + fprintf(stderr, "ql_mpiexec_start: listen(%s)\n", + strerror(errno)); + exit(1); + } + + if ((pid = fork()) == 0) { + int i; + char **args; + char **b; + + if (fork()) + exit(0); + + setsid(); + if (wfd < 3) { + dup2(wfd, 3); + wfd = 3; + } + for (i = 0; i < 4096; i++) + if (i != wfd) + close(i); + open("/dev/null", O_RDONLY); + open("/dev/null", O_WRONLY); + open("/dev/null", O_WRONLY); + + args = (char **)malloc(sizeof(char *) * (argc + 2)); + *args = "mpiexec"; + for (a = mpi_opt_top, b = args + 1; a != usr_opt_top; + a++) + *(b++) = *a; + *(b++) = BINDIR "/mcexec"; + for (; *a; a++) + *(b++) = *a; + *b = NULL; + ql_wrapper(args, wfd); + exit(-1); /*not reach */ + } + close(wfd); + while(waitpid(pid, &wst, 0) == -1 && errno == EINTR); +#endif + } + else{ + int env_n; + int arg_n; + + for (arg_n = 0, a = usr_opt_top; *a; a++, arg_n++); + for (env_n = 0, a = env; *a; a++, env_n++); + + /* param file output */ + sprintf(ql_param_file, "%s/%s%s", base, ql_name, QL_PARAM_EXTE); + fp = fopen(ql_param_file, "w"); +#ifdef QL_MPIEXEC_FINALIZE + fprintf(fp, "%c COM=%c\n", QL_COMMAND, QL_RET_FINAL); +#else + fprintf(fp, "%c COM=%c %d %d\n", QL_COMMAND, QL_RET_RESUME, + arg_n, env_n); + + for (a = usr_opt_top; *a; a++) + esc_put(fp, QL_ARG, *a); + for (a = env; *a; a++) + esc_put(fp, QL_ENV, *a); +#endif + + fclose(fp); + } + + if ((wfd = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) { + fprintf(stderr, "ql_mpiexec_start: socket(%s)\n", + strerror(errno)); + exit(1); + } + rc = connect(wfd, (struct sockaddr *)&wsock, wlen); + if (rc == -1) { + fprintf(stderr, "ql_mpiexec_start: connect(%s)\n", + strerror(errno)); + exit(1); + } + write(wfd, "C", 1); + if ((rc = read(wfd, &c, 1)) <= 0) + exit(1); + write(wfd, "0", 1); + sendfd(wfd, 0); + if ((rc = read(wfd, &c, 1)) <= 0) + exit(1); + write(wfd, "1", 1); + sendfd(wfd, 1); + if ((rc = read(wfd, &c, 1)) <= 0) + exit(1); + write(wfd, "2", 1); + sendfd(wfd, 2); + if ((rc = read(wfd, &c, 1)) <= 0) + exit(1); + +#ifdef QL_MPIEXEC_FINALIZE + sprintf(tmp,"ssh %s %s %c %s %s %s", + target_host, QL_TALKER_EXECUTION, QL_RET_RESUME, "-n", ql_name , ql_sock_file); + rc = system(tmp); + write(wfd, "F", 1); +#else + if (f_flg == 1) { + sprintf(tmp,"ssh %s %s %c %c %s %s", + target_host, QL_TALKER_EXECUTION, QL_COM_CONN, + QL_EXEC_END, ql_name ,ql_sock_file); + rc = system(tmp); + /* send N and recv E */ + } + else{ + sprintf(tmp,"ssh %s %s %c %c %s %s", + target_host, QL_TALKER_EXECUTION, QL_RET_RESUME, + QL_EXEC_END, ql_name , ql_sock_file); + rc = system(tmp); + /* send R and recv E */ + } + write(wfd, "E", 1); +#endif + + if ((rc = read(wfd, &c, 1)) <= 0) + goto end; + if (c == 'X') { + int wst; + + read(wfd, &wst, sizeof st); + if (WIFSIGNALED(wst)) { + int sig = WTERMSIG(wst); + signal(sig, SIG_DFL); + kill(getpid(), sig); + pause(); + } + exitcode = WEXITSTATUS(wst); + } + close(wfd); + +end: + unlink(ql_param_file); + exit(exitcode); +} diff --git a/executer/user/ql_server.c b/executer/user/ql_server.c new file mode 100644 index 00000000..95b90720 --- /dev/null +++ b/executer/user/ql_server.c @@ -0,0 +1,597 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../include/qlmpi.h" + +#define NALLOC 10 +#define NOLOG + +#ifndef NOLOG +#define LOGFILE "ql_server.log" +int log_open(char *f_name); +int log_close(); +void log_printf(const char *format, ...); +void log_dump(struct client_fd *fd_list,int fd_size); + +FILE * log_fp; +#endif + +int listen_fd = -1; +char file_path[1024]; + +int check_ql_server( char * path,char * file ,char *filep){ + struct stat st; + int rc; + + sprintf(filep,"%s/%s",path,file); + + rc = stat(filep,&st); + if (rc == 0) { + fprintf(stderr,"socket file exests. %s\n",filep); + return rc; + } + else { + rc = stat(path,&st); + if ( rc == 0) { + fprintf(stderr,"dir(file) exests. %s %d\n",path,rc); + return 1; + } + else { + mode_t m = st.st_mode; + if (S_ISDIR(m)) { + fprintf(stderr,"dir exests. %s %d\n",path,rc); + return rc; /* dir exist */ + } + else { + if (mkdir(path, (S_IRUSR | S_IWUSR | S_IRWXU | + S_IRGRP | S_IWGRP | S_IRWXG | + S_IROTH | S_IWOTH | S_IRWXO)) == 0) { + fprintf(stderr,"dir create. %s %d\n",path,rc); + return 1; + } + fprintf(stderr,"mkdir error. %s %d\n",path,rc); + return 0; /* mkdir error */ + } + } + } +} + +void terminate(int rc){ + + if (listen_fd >= 0) { + shutdown(listen_fd, 2); + close(listen_fd); + unlink(file_path); + } +#ifndef NOLOG + log_close(); +#endif + exit(rc); +} + +int s_fd_list(char * p_name,int client_type , + struct client_fd *fd_list,int fd_size){ + int i; + for (i = 0; fd_size > i; i++) { + if ((fd_list[i].client == client_type) && + (!strcmp(fd_list[i].name,p_name)) && + (fd_list[i].fd != -1)) { + break; + } + } + return i; +} + +int main( int argc, char *argv[]){ + int i,j, fd, rc = 0, len, maxfd; + int fd_size ; + struct client_fd *fd_list; + fd_set rset, allset; + struct sockaddr_un unix_addr; + char *buf; + int s_indx; +#ifndef NOLOG + int e_no; /*errno copy*/ +#endif + char * null_buff = ""; + + if (argc < 3 ) { + fprintf(stderr," few args \n"); + exit(-1); + } + + for (i = 0; i < 4096; i++) + close(i); + open("/dev/null", O_RDONLY); + open("/dev/null", O_WRONLY); + open("/dev/null", O_WRONLY); + + if (!check_ql_server(argv[1], argv[2] ,file_path)) { + fprintf(stderr,"ql_server already exists.\n"); + exit(-1); + } + signal(SIGINT, terminate); + signal(SIGTERM, terminate); + + listen_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (listen_fd < 0) { + fprintf(stderr,"listen error.\n"); + terminate(rc); + } + +#ifndef NOLOG + log_open(argv[1]); +#endif + unix_addr.sun_family = AF_UNIX; + strcpy(unix_addr.sun_path, file_path); +#ifndef NOLOG + log_printf("file_path =%s \n",file_path); +#endif + len = sizeof(unix_addr.sun_family) + strlen(unix_addr.sun_path) + 1; + rc = bind(listen_fd, (struct sockaddr *)&unix_addr, len); + + if (rc < 0) { +#ifndef NOLOG + log_printf("bind error \n",file_path); +#endif + terminate(rc); + } + + // become a daemon + if (fork()) + exit(0); + if (fork()) + exit(0); + setsid(); + + rc = listen(listen_fd, 5); + if (rc < 0) { +#ifndef NOLOG + log_printf("listen error \n"); +#endif + terminate(rc); + } + + FD_ZERO(&allset); + FD_SET(listen_fd, &allset); + maxfd = listen_fd; + fd_size = NALLOC; + fd_list = malloc(sizeof(struct client_fd)*fd_size); + for (i = 0; i < fd_size; i++) { + fd_list[i].fd = -1; + } + +#ifndef NOLOG + log_printf("loop_start \n"); +#endif + for (;;) { + memcpy(&rset, &allset, sizeof(rset)); + rc = select(maxfd + 1, &rset, NULL, NULL, NULL); + if (rc == -1) { +#ifndef NOLOG + e_no = errno; + log_printf("server:select error.\n"); + log_printf("select error string by strerror: %s\n", + strerror(e_no)); + log_printf("select error code: %d\n", e_no); +#endif + terminate(rc); + } +#ifndef NOLOG + log_printf("server:select.\n"); +#endif + + if (FD_ISSET(listen_fd, &rset)) { + len = sizeof(unix_addr); + fd = accept(listen_fd, (struct sockaddr *)&unix_addr, + (socklen_t*)&len); + if (fd < 0) { +#ifndef NOLOG + log_printf("server:accept error.\n"); +#endif + terminate(fd); + } +#ifndef NOLOG + log_printf("server:accept (%d).\n", fd); +#endif + for (i = 0; fd_size > i; i++) { + if (fd_list[i].fd == -1) { + fd_list[i].fd = fd; + break; + } + } + if (i >= fd_size) { + fd_list = realloc(fd_list, + sizeof(int)*(fd_size+NALLOC)); + for (i = fd_size; i < (fd_size + NALLOC); i++) { + fd_list[i].fd = -1; + } + fd_list[fd_size].fd = fd; + fd_size += NALLOC; + } + FD_SET(fd, &allset); + if (fd > maxfd) { + maxfd = fd; + } + } + + for (i = 0; i < fd_size; i++) { + if (fd_list[i].fd == -1) + continue; + fd = fd_list[i].fd; + if (!FD_ISSET(fd, &rset)) + continue; + rc = ql_recv(fd, &buf); +#ifndef NOLOG + log_printf("ql_recv (%d) index = %d fd = %d \n", rc,i,fd); +#endif + if(rc < 0){ +#ifndef NOLOG + log_printf("server:recv (%d) error.\n", fd); +#endif + terminate(rc); + } + if (rc == 0) { +#ifndef NOLOG + log_printf("server:closed (%d).\n", fd); +#endif + fd_list[i].fd = -1; + if (strcmp(fd_list[i].name,null_buff)) { + free(fd_list[i].name); + fd_list[i].name = null_buff; + } + FD_CLR(fd, &allset); + maxfd = -1; + for (j = 0; fd_size > j ; j++) { + if (fd > maxfd) { + maxfd = fd; + } + } + close(fd); +#ifndef NOLOG + log_printf("index = %d\n",i); + log_dump(fd_list,fd_size); +#endif + if (maxfd == -1) { + terminate(rc); + } + continue; + } + + if (rc == QL_EXEC_END){ /* swapout from mcexec */ + fd_list[i].client = QL_MCEXEC_PRO; + fd_list[i].name = buf; + fd_list[i].status = QL_EXEC_END; +#ifndef NOLOG + log_printf("index = %d\n",i); + log_dump(fd_list,fd_size); +#endif +/* send E command to ql_talker */ + if ((s_indx = s_fd_list(fd_list[i].name, + QL_MPEXEC,fd_list, + fd_size)) < fd_size) { +#ifndef NOLOG + log_printf("E command to talker %d \n",s_indx); +#endif + rc = ql_send(fd_list[s_indx].fd, + QL_EXEC_END,NULL); +/* fd close for ql_talker */ + FD_CLR(fd_list[s_indx].fd, &allset); + maxfd = -1; + close(fd_list[s_indx].fd); + free(fd_list[s_indx].name); + fd_list[s_indx].fd = -1; + fd_list[s_indx].name = null_buff; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } + if (maxfd == -1) terminate(0); + } + else{ + /* ql_talker not found */ +#ifndef NOLOG + log_printf("ql_talker not found\n",i); +#endif + /* send I command to mcexec and param_file put A command*/ + } +#ifndef NOLOG + log_printf("index = %d\n",i); + log_dump(fd_list,fd_size); +#endif + } + else if (rc == QL_RET_RESUME) { + /* recv R command from ql_talker */ + fd_list[i].client = QL_MPEXEC; + fd_list[i].name = buf; + fd_list[i].status = QL_RET_RESUME; +#ifndef NOLOG + log_printf("index = %d,fd_size=%d\n", + i,fd_size); + log_dump(fd_list,fd_size); +#endif + /* send R command to mcexec */ + if (((s_indx = s_fd_list(fd_list[i].name, + QL_MCEXEC_PRO , + fd_list,fd_size)) < fd_size) && + fd_list[s_indx].status == QL_EXEC_END) { +#ifndef NOLOG + log_printf("R command to mcexec %d \n",s_indx); + log_dump(fd_list,fd_size); +#endif + rc = ql_send(fd_list[s_indx].fd, + QL_RET_RESUME,NULL); + fd_list[s_indx].status = QL_RET_RESUME; + FD_CLR(fd_list[s_indx].fd, &allset); + close(fd_list[s_indx].fd); + free(fd_list[s_indx].name); + fd_list[s_indx].fd = -1; + fd_list[s_indx].name = null_buff; + maxfd = -1; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } + if (maxfd == -1) terminate(0); + } + else{ +/* mcexec not found */ +/* send A command to ql_talker */ +#ifndef NOLOG + log_printf("send A command index = %d,fd_size=%d\n", + i,fd_size); + log_dump(fd_list,fd_size); +#endif + rc = ql_send(fd_list[i].fd, + QL_AB_END,NULL); +/* fd close for ql_talker */ + FD_CLR(fd_list[i].fd, &allset); + close(fd_list[i].fd); + free(fd_list[i].name); + fd_list[i].fd = -1; +// fd_list[i].name = NULL; + fd_list[i].name = null_buff; + maxfd = -1; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } + if (maxfd == -1) terminate(0); + } +#ifndef NOLOG + log_printf("index = %d,s_indx=%d\n", + i,s_indx); + log_dump(fd_list,fd_size); +#endif + } + else if (rc == QL_COM_CONN) { + /* connect from ql_mpiexec_* */ + fd_list[i].client = QL_MPEXEC; + fd_list[i].name = buf; + fd_list[i].status = QL_COM_CONN; +#ifndef NOLOG + log_printf("N command index = %d,fd_size=%d\n", + i,fd_size); + log_dump(fd_list,fd_size); +#endif + if ((s_indx = s_fd_list(fd_list[i].name, + QL_MCEXEC_PRO,fd_list, + fd_size)) < fd_size) { + rc = ql_send(fd_list[i].fd, + QL_EXEC_END,NULL); +/* fd close for ql_talker */ + FD_CLR(fd_list[i].fd, &allset); + maxfd = -1; + close(fd_list[i].fd); + free(fd_list[i].name); + fd_list[i].fd = -1; + fd_list[i].name = null_buff; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } + // if (maxfd == -1) terminate(0); + } +#ifndef NOLOG + log_dump(fd_list,fd_size); +#endif + } + else if(rc == QL_RET_FINAL) { + /* F command from Monitor Process */ + fd_list[i].client = QL_MONITOR; + fd_list[i].name = buf; + fd_list[i].status = QL_RET_FINAL; +#ifndef NOLOG + log_printf("F command index = %d,fd_size=%d\n", + i,fd_size); + log_dump(fd_list,fd_size); +#endif + /* search ql_mpiexec_start process */ + if ((s_indx = s_fd_list(fd_list[i].name, + QL_MPEXEC,fd_list, + fd_size)) < fd_size) { + /* send A command */ + rc = ql_send(fd_list[s_indx].fd, + QL_AB_END,NULL); + /* table clear */ + FD_CLR(fd_list[s_indx].fd, &allset); + maxfd = -1; + close(fd_list[s_indx].fd); + free(fd_list[s_indx].name); + fd_list[s_indx].fd = -1; + fd_list[s_indx].name = null_buff; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } + } + /* search mcexec process */ + if ((s_indx = s_fd_list(fd_list[i].name, + QL_MCEXEC_PRO,fd_list, + fd_size)) < fd_size) { + /* table clear */ + FD_CLR(fd_list[s_indx].fd, &allset); + maxfd = -1; + close(fd_list[s_indx].fd); + free(fd_list[s_indx].name); + fd_list[s_indx].fd = -1; + fd_list[s_indx].name = null_buff; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } + } + FD_CLR(fd_list[i].fd, &allset); + close(fd_list[i].fd); + free(fd_list[i].name); + fd_list[i].fd = -1; + fd_list[i].name = null_buff; + maxfd = -1; + for (j = 0; fd_size > j ; j++) { + if (fd_list[j].fd > maxfd) { + maxfd = fd_list[j].fd; + } + } +#ifndef NOLOG + log_printf("F command end index = %d,fd_size=%d\n", + i,fd_size); + log_dump(fd_list,fd_size); +#endif + if (maxfd == -1) + terminate(0); + } + else { +#ifndef NOLOG + log_printf("server:unknwon commond %d (%d).\n", + rc, fd); +#endif + } +#ifndef NOLOG + log_printf("server:recv (%d) .\n", fd); +#endif + } + } + terminate(0); +} + +#ifndef NOLOG +int log_open(char *f_path){ + char f_name[1024]; + sprintf(f_name,"%s/%s",f_path,LOGFILE); + if ((log_fp = fopen(f_name,"w")) == NULL) { + log_fp = stderr; + } + return 0; +} + +int log_close(){ + if (log_fp != stdout) { + fclose(log_fp); + } + return 0; +} + +void log_printf(const char *format, ...){ + va_list arglist; + char log[1024]; + + va_start(arglist, format); + vsprintf(log, format, arglist); + fprintf(log_fp, "%s\n", log); + va_end(arglist); + fflush(log_fp); +} + +void log_dump(struct client_fd *fd_list,int fd_size){ + int i; + for (i = 0; fd_size > i; i++) { + if (fd_list[i].fd != -1) { + log_printf("|%4d|%4d|%c|%s|\n",fd_list[i].fd, + fd_list[i].client,(char)fd_list[i].status, + fd_list[i].name); + } + else{ + log_printf("|%4d|0000| | |\n",fd_list[i].fd); + } + } + log_printf("-----------------------\n"); +} +#endif + +int ql_recv(int fd,char ** buf){ + char l_buf[QL_BUF_MAX]; + char comm; + int size = 0; + int rc; + int ret; + + rc = recv(fd, l_buf, QL_BUF_MAX, 0); +#ifndef NOLOG + log_printf("rc = %d,l_buf=%s\n",rc,l_buf); +#endif + if (rc <= 0) { + return rc; + } + + sscanf(l_buf, "%c %x", &comm, &size); + ret = (int)(comm); +#ifndef NOLOG + log_printf("COMM=%c size = %x rc= %d\n", ret, size, rc); +#endif + if (size > 0) { + *buf = malloc(size+1); + memcpy(*buf, &l_buf[7], size); + buf[size] = 0x00; +#ifndef NOLOG + log_printf("COMM=%c size = %x *buf= %s\n",ret,size,*buf); +#endif + } +#ifndef NOLOG + log_printf("ret = %d\n", ret); +#endif + return ret; +} + +int ql_send(int fd,int command,char *buf){ + char *lbuf; + int size; + int rc; + + if (buf != NULL) { + size = strlen(buf); + lbuf = alloca(size+7+1); + sprintf(lbuf,"%c %04x %s",command,size,buf); + } + else{ + size = 0; + lbuf = alloca(6+1); + sprintf(lbuf,"%c 0000",command); + } +#ifndef NOLOG + log_printf("send lbuf=%s",lbuf); +#endif + rc=send(fd,lbuf,strlen(lbuf),0); + return rc; +} + diff --git a/executer/user/ql_talker.c b/executer/user/ql_talker.c new file mode 100644 index 00000000..1e6b64d7 --- /dev/null +++ b/executer/user/ql_talker.c @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../include/qlmpi.h" + +int fd = -1; + +#define BUF_MAX 256 + +void terminate(int rc) +{ + if(fd >= 0){ + shutdown(fd, 2); + close(fd); + } + exit(rc); +} + +int main(int argc, char* argv[]) +{ + int rc=-1, len; + struct sockaddr_un unix_addr; + char buf[BUF_MAX]; + + signal(SIGINT, terminate); + signal(SIGTERM, terminate); + + if (argc < 5) { +#ifdef QL_DEBUG + printf("too few arguments\n"); +#endif + return rc; + } + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { +#ifdef QL_DEBUG + printf("client:socket error.\n"); +#endif + terminate(rc); + } +#ifdef QL_DEBUG + printf("client:socket.\n"); +#endif + unix_addr.sun_family = AF_UNIX; + strcpy(unix_addr.sun_path, argv[4]); +#ifdef QL_DEBUG + printf("socket_path %s\n",argv[4]); +#endif + len = sizeof(unix_addr.sun_family)+strlen(unix_addr.sun_path) + 1; + rc = connect(fd, (struct sockaddr*)&unix_addr, len); + if (rc < 0) { +#ifdef QL_DEBUG + printf("client:connect error.\n"); + printf("%s %s\n", unix_addr.sun_path, strerror(errno)); +#endif + terminate(rc); + } + + if (argv[1][0]) { + sprintf(buf,"%s %04x %s",argv[1], + (unsigned int)strlen(argv[3]),argv[3]); + rc = send(fd, buf, strlen(buf) + 1, 0); + if (rc < 0) { +#ifdef QL_DEBUG + printf("send error.\n"); +#endif + terminate(rc); + } + } + if (strcmp(argv[2],"-n")) { +#ifdef QL_DEBUG + printf("waiting reply message from ql_server ...\n"); +#endif + rc = recv(fd, buf, 256, 0); +#ifdef QL_DEBUG + printf("%s\n",buf); +#endif + if (rc < 0) { +#ifdef QL_DEBUG + printf("recv error\n"); +#endif + terminate(rc); + } + if (buf[0] == argv[2][0]){ + terminate(0); + } + if (buf[0] == QL_AB_END){ + /* abnormal end */ + terminate(-2); + } + } + + terminate(0); + return rc; /*not reached */ +} diff --git a/executer/user/qlmpilib.c b/executer/user/qlmpilib.c new file mode 100644 index 00000000..952ac6f5 --- /dev/null +++ b/executer/user/qlmpilib.c @@ -0,0 +1,320 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mpi.h" +#include "../include/qlmpilib.h" +#include "../include/qlmpi.h" +#include "../include/pmi.h" + + +#define BUF_SIZE (32*1024) +#define NALLOC 10 +#define QL_SUCCESS 0 +#define QL_NORMAL 2 + +//#define QL_DEBUG + +static char ql_name[33]; +static char swap_file[1024]; +static char param_file[1024]; +static int ql_mode_flg = 0; /* 0 is normal */ +static int rank = -1; +static char buffer[BUF_SIZE]; +static int ql_initialized; +int mck_ql_argc = NALLOC; +char **mck_ql_argv; +char **mck_ql_env; + +static void freev(char **v) +{ + char **a; + + for (a = v; *a; a++) + free(*a); + free(v); +} + +static void esc_get(char *in, char *out) +{ + char *p; + char *q; + int c; + + for (p = in, q = out; *p; p++) { + if (*p == '%' && p[1] && p[2]) { + int i; + for (i = 0, c = 0; i < 2; i++) { + p++; + c <<= 4; + if (*p >= '0' && *p <= '9') + c += *p - '0'; + else if (*p >= 'A' && *p <= 'F') + c += *p - 'A' + 10; + else if (*p >= 'a' && *p <= 'f') + c += *p - 'a' + 10; + } + *(q++) = c; + } + else + *(q++) = *p; + } + *q = '\0'; +} + +static int swapout(char *fname, void *buf, size_t sz, int flag) +{ + int cc; + + cc = syscall(801, fname, buf, sz, flag); + + return cc; +} + +static int ql_get_option() { + char *env_str; + + env_str = getenv(QL_NAME); + if (env_str == NULL) { + return 0; + } + else{ + strcpy(ql_name,env_str); + return 1; + } + +} + +int ql_init() { + char tmp_path[1024]; + char *env_str; + + if (ql_initialized) { + return QL_CONTINUE; + } + + ql_mode_flg = ql_get_option(); +#ifdef QL_DEBUG + printf("flg = %d \n",ql_mode_flg); +#endif + + if (ql_mode_flg) { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + /* get param_file path */ + env_str = getenv(QL_PARAM_ENV); + if (env_str == NULL) { + sprintf(tmp_path,"%s/",getenv("HOME")); + } + else{ + sprintf(tmp_path,"%s/",env_str); + } + sprintf(param_file,"%s%s%s",tmp_path,ql_name,QL_PARAM_EXTE); + +#ifdef QL_DEBUG + printf("param_file = %s\n",param_file); +#endif + + /* get swap_file path*/ + env_str = getenv(QL_SWAP_ENV); + if (env_str == NULL) { + strcpy(tmp_path,QL_SWAP_PATH); + } + else{ + strcpy(tmp_path,env_str); + } + sprintf(swap_file,"%s/%s%d",tmp_path,ql_name,rank); + +#ifdef QL_DEBUG + printf("swap_file = %s rank=%d\n",swap_file,rank); +#endif + ql_initialized = 1; + return QL_SUCCESS; + } + + ql_initialized = 1; + return QL_NORMAL; +} + +int ql_client(int *argc,char ***argv) +{ + int rc; + int ret = QL_EXIT; + char buf[4096]; + FILE *fp; + char **envs; + char **args; + char **a; + char **e; + + if (ql_mode_flg == 0) return(QL_EXIT); + + syscall(803); + rc = PMI_Barrier(); + + rc = swapout(swap_file, buffer, BUF_SIZE, 0); + +#ifdef QL_DEBUG + printf(" swapout rc=%d\n",rc); +#endif + if (rc == -1) { + /* terminate due to swap error */ + syscall(804); + return QL_EXIT; + } + + /* param file */ + if ((fp = fopen(param_file,"r")) == NULL) { + /* param file open error */ +#ifdef QL_DEBUG + printf("param_file open error\n"); +#endif + syscall(804); + return QL_EXIT; + } + + a = args = NULL; + e = envs = NULL; + while ((fgets(buf, 4096, fp)) != NULL) { + int cmd = buf[0]; + char *t; + int n; + + // remove return code + buf[strlen(buf) - 1] = '\0'; + if (cmd == QL_COMMAND) { + t = strchr(buf, '='); + if (!t || + (t[1] != QL_RET_RESUME && t[1] != QL_RET_FINAL)) { + fprintf(stderr, "invalid file format\n"); + exit(1); + } + t++; + if (*t == QL_RET_RESUME) { + ret = QL_CONTINUE; +#ifdef QL_DEBUG + printf("COM = %c ret = %d\n", *t, ret); +#endif + } + else { + ret = QL_EXIT; +#ifdef QL_DEBUG + printf(" ret = %d",ret); +#endif + } + t = strchr(t, ' '); + if (t) { + n = atoi(t + 1); + args = malloc(sizeof(char *) * (n + 1)); + a = args; + t = strchr(t + 1, ' '); + if (t) { + n = atoi(t + 1); + envs = malloc(sizeof(char *) * (n + 1)); + e = envs; + } + } + + } + else if (cmd == QL_ARG) { + if (!args) + continue; + t = strchr(buf, ' '); + if (!t) + continue; + n = atoi(t + 1); + t = strchr(t + 1, ' '); + if (!t) + continue; + t++; + *a = malloc(n + 1); + esc_get(t, *a); + a++; + } + else if (cmd == QL_ENV) { + if (!envs) + continue; + t = strchr(buf, ' '); + if (!t) + continue; + n = atoi(t + 1); + t = strchr(t + 1, ' '); + if (!t) + continue; + t++; + *e = malloc(n + 1); + esc_get(t, *e); + e++; + } + else { + } + } + fclose(fp); + + if (args) { + *a = NULL; + if (mck_ql_argv) + freev(mck_ql_argv); + mck_ql_argv = args; + if (argv) + *argv = args; + for (mck_ql_argc = 0; mck_ql_argv[mck_ql_argc]; mck_ql_argc++); + if (argc) + *argc = mck_ql_argc; + } + if (envs) { + *e = NULL; + if (mck_ql_env) + freev(mck_ql_env); + mck_ql_env = envs; + environ = envs; + } + + syscall(804); +#ifdef QL_DEBUG + printf(" return rtn = %d\n",ret); +#endif + return ret; + +} + +int MPI_Init(int *argc,char ***argv){ + int rc = 0; + + rc = PMPI_Init(argc,argv); + if (rc == MPI_SUCCESS) + ql_init(); + + return rc; +} + +void +mpi_init_(int *ierr) +{ + extern void pmpi_init_(int *ierr) __attribute__ ((__weak__)); + + if (!pmpi_init_) { + *ierr = MPI_ERR_OTHER; + return; + } + + pmpi_init_(ierr); + if (*ierr == MPI_SUCCESS) + ql_init(); + + return; +} + +void ql_client_(int *ierr) +{ + int argc; + char **argv; + + *ierr = ql_client(&argc, &argv); +} diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index 14e9ede4..f935e988 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -5,6 +5,7 @@ OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += rbtree.o +OBJS += pager.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions diff --git a/kernel/include/process.h b/kernel/include/process.h index 79efc9b7..495e3a26 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -50,6 +50,7 @@ #define VR_MEMTYPE_WB 0x00000000 /* write-back */ #define VR_MEMTYPE_UC 0x01000000 /* uncachable */ #define VR_MEMTYPE_MASK 0x0f000000 +#define VR_PAGEOUT 0x10000000 #define PROT_TO_VR_FLAG(prot) (((unsigned long)(prot) << 16) & VR_PROT_MASK) #define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4) @@ -248,6 +249,7 @@ struct process_vm; struct vm_regions; struct vm_range; +struct swapinfo; #define HASH_SIZE 73 @@ -711,6 +713,7 @@ struct process_vm { struct list_head vm_range_numa_policy_list; struct vm_range *range_cache[VM_RANGE_CACHE_SIZE]; int range_cache_ind; + struct swapinfo *swapinfo; }; static inline int has_cap_ipc_lock(struct thread *th) diff --git a/kernel/include/swapfmt.h.in b/kernel/include/swapfmt.h.in new file mode 100644 index 00000000..7203bc18 --- /dev/null +++ b/kernel/include/swapfmt.h.in @@ -0,0 +1,34 @@ +/* + * \file swapfmt.h + * License details are found in the file LICENSE. + * \brief + * swapped out file format + * \author Yutaka Ishikawa + */ + +#define MCKERNEL_SWAP "McKernel swap" +#define MCKERNEL_SWAP_VERSION "@MCKERNEL_VERSION@" +#define SWAP_HLEN 16 +struct swap_header { + char magic[SWAP_HLEN]; /* MCKernel swap */ + char version[SWAP_HLEN]; /* same as McKernel version */ + unsigned int count_sarea; /* count of swaped area info */ + unsigned int count_marea; /* count of mlocked area info */ +}; + +struct swap_areainfo { + unsigned long start; /* virtual address */ + unsigned long end; /* virtual address */ + unsigned long pos; /* swap: file position in this file + * mlock: physical address */ + unsigned long flag; /* flag in vm_range */ +}; + +/* + * + * +-------------------- + * | swap_header + * +-------------------- + * | swap_areainfo[...] + * | swap_ainfo[...] + */ diff --git a/kernel/pager.c b/kernel/pager.c new file mode 100644 index 00000000..cf38e8de --- /dev/null +++ b/kernel/pager.c @@ -0,0 +1,837 @@ +/* + * \file pager.c + * License details are found in the file LICENSE. + * \brief + * paging system + * \author Yutaka Ishikawa + */ +/* + * HISTORY: + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define O_RDONLY 00000000 +#define O_WRONLY 00000001 +#define O_RDWR 00000002 +#define O_CREAT 00000100 +#define O_TRUNC 00001000 +#define SEEK_SET 0 /* from include/uapi/linux/fs.h in Linux */ +#define SEEK_CUR 1 /* from include/uapi/linux/fs.h in Linux */ +#define IS_TEXT(start, region) ((start) == (region)->text_start) +#define IS_DATA(start, region) ((start) == (region)->data_start) +#define IS_STACK(start, region) ((start) == (region)->stack_start) +#define IS_INVALID_USERADDRESS(addr, region) \ + ((((unsigned long) addr) < region->user_start) \ + || ((unsigned long) addr) >= region->user_end) +#define IS_INVALID_LENGTH(len, region) \ + ((len) > (region->user_end - region->user_start)) +#define IS_READONLY(flag) (((flag)&VR_PROT_WRITE) == 0) +#define IS_NOTUSER(flag) (((flag)&VR_AP_USER) == 0) + + +//#define DEBUG_PRINT_PROCESS + +#ifdef DEBUG_PRINT_PROCESS +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +/* + * Contiguous pages are represented by the "addrpair" structure. + * - The swap_area, whose type is "struct arealist", keeps swappable pages + * using "areaent" structures that keeps a list of the "addrpair" structures. + * - The mlock_area is also the "struct arealist" struct, keeping pages locked byt + * both McKernel and Linux. + * - The mlock_container is also the "struct arealist" type, keeping pages loc + */ +/* + * The page areas are independently managed by McKernel and Linux. + * Pages locked by Linuxkernel are not known by McKernel. To get the information, + * the mlockcntnr structure is used. + * The mlockcntnr keeps the list of + */ +#define MLOCKADDRS_SIZE 128 +struct addrpair { + unsigned long start; + unsigned long end; + unsigned long flag; +}; +struct areaent { + struct areaent *next; + int count; + struct addrpair pair[MLOCKADDRS_SIZE]; +}; + +struct arealist { + struct areaent *head; + struct areaent *tail; + int count; +}; + +struct mlockcntnr { + struct areaent *from; + int ccount; + struct areaent *cur; +}; + +struct swapinfo { + struct swap_header *swphdr; + struct swap_areainfo *swap_info, *mlock_info; + + struct arealist swap_area; + struct arealist mlock_area; + struct mlockcntnr mlock_container; +#define UDATA_BUFSIZE (8*1024) + char *swapfname; + char *udata_buf; /* To read-store data from Linux to user space */ + + void *user_buf; + size_t ubuf_size, ubuf_alloced; +}; + +static void +area_print(struct vm_regions *region) +{ + dkprintf("text %016lx:%016lx\n", region->text_start, region->text_end); + dkprintf("data %016lx:%016lx\n", region->data_start, region->data_end); + dkprintf("brk %016lx:%016lx\n", region->brk_start, region->brk_end); + dkprintf("map %016lx:%016lx\n", region->map_start, region->map_end); + dkprintf("stack %016lx:%016lx\n", region->stack_start, region->stack_end); + dkprintf("user %016lx:%016lx\n", region->user_start, region->user_end); +} + + +static int +myalloc_init(struct swapinfo *si, void *p, size_t sz) +{ + extern SYSCALL_DECLARE(mlock); + ihk_mc_user_context_t ctx0; + int cc; + + /* pin the buffer down in McKernel side */ + ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) p; + ihk_mc_syscall_arg1(&ctx0) = sz; + cc = sys_mlock(__NR_mlock, &ctx0); + if (cc < 0) return cc; + /* init */ + si->user_buf = p; + si->ubuf_size = sz; + si->ubuf_alloced = 0; + dkprintf("myalloc_init: buffer(%p) size(0x%lx)\n", si->user_buf, si->ubuf_size); + return 0; +} + +void +myalloc_finalize(struct swapinfo *si) +{ + extern SYSCALL_DECLARE(munlock); + ihk_mc_user_context_t ctx0; + + /* unpindown in McKernel side */ + ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) si->user_buf; + ihk_mc_syscall_arg1(&ctx0) = si->ubuf_size; + sys_munlock(__NR_munlock, &ctx0); +} + +void * +myalloc(struct swapinfo *si, size_t sz) +{ + void *p = NULL; + + if ((si->ubuf_alloced + sz) < si->ubuf_size) { + p = (void*) &((char*)si->user_buf)[si->ubuf_alloced]; + si->ubuf_alloced += sz; + } + return p; +} + +void +myfree() +{ + /* nothing so far */ +} + +static int +linux_open(char *fname, int flag, int mode) +{ + ihk_mc_user_context_t ctx0; + int fd; + + ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) fname; + ihk_mc_syscall_arg1(&ctx0) = flag; + ihk_mc_syscall_arg2(&ctx0) = mode; + fd = syscall_generic_forwarding(__NR_open, &ctx0); + return fd; +} + +static int +linux_unlink(char *fname) +{ + ihk_mc_user_context_t ctx0; + + ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) fname; + return syscall_generic_forwarding(__NR_unlink, &ctx0); +} + +static ssize_t +linux_read(int fd, void *buf, size_t count) +{ + ihk_mc_user_context_t ctx0; + ssize_t sz; + size_t count0 = count; + + ihk_mc_syscall_arg0(&ctx0) = fd; + sz = 0; + for (;;) { + ssize_t sz0; + + ihk_mc_syscall_arg1(&ctx0) = (uintptr_t) buf; + ihk_mc_syscall_arg2(&ctx0) = count; + sz0 = syscall_generic_forwarding(__NR_read, &ctx0); + if (sz0 == -EINTR) + continue; + if (sz0 <= 0) { + if (sz == 0) + sz = sz0; + break; + } + sz += sz0; + if (sz == count0) + break; + count -= sz0; + buf = (char *)buf + sz0; + } + return sz; +} + +static ssize_t +linux_write(int fd, void *buf, size_t count) +{ + ihk_mc_user_context_t ctx0; + ssize_t sz; + size_t count0 = count; + + ihk_mc_syscall_arg0(&ctx0) = fd; + sz = 0; + for (;;) { + ssize_t sz0; + + ihk_mc_syscall_arg1(&ctx0) = (uintptr_t) buf; + ihk_mc_syscall_arg2(&ctx0) = count; + sz0 = syscall_generic_forwarding(__NR_write, &ctx0); + if (sz0 == -EINTR) + continue; + if (sz0 <= 0) { + if (sz == 0) + sz = sz0; + break; + } + sz += sz0; + if (sz == count0) + break; + count -= sz0; + buf = (char *)buf + sz0; + } + return sz; +} + +static off_t +linux_lseek(int fd, off_t off, int whence) +{ + ihk_mc_user_context_t ctx0; + int cc; + + ihk_mc_syscall_arg0(&ctx0) = fd; + ihk_mc_syscall_arg1(&ctx0) = off; + ihk_mc_syscall_arg2(&ctx0) = whence; + cc = syscall_generic_forwarding(__NR_lseek, &ctx0); + return cc; +} + +static int +linux_close(int fd) +{ + ihk_mc_user_context_t ctx0; + int cc; + + ihk_mc_syscall_arg0(&ctx0) = fd; + cc = syscall_generic_forwarding(__NR_close, &ctx0); + return cc; +} + +/* + * The munmap syscall from McKernel is handled by mccntrl module. + * An extra argument, flag, is to set new remote page table if not zero. + */ +static int +linux_munmap(void *addr, size_t len, int flag) +{ + ihk_mc_user_context_t ctx0; + int cc; + + ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) addr; + ihk_mc_syscall_arg1(&ctx0) = len; + ihk_mc_syscall_arg2(&ctx0) = flag; + cc = syscall_generic_forwarding(__NR_munmap, &ctx0); + return cc; +} + +static int +pager_open(struct swapinfo *si, char *fname, int flag, int mode) +{ + int fd; + strcpy(si->udata_buf, fname); + fd = linux_open(si->udata_buf, flag, mode); + return fd; +} + +static int +pager_unlink(struct swapinfo *si, char *fname) +{ + strcpy(si->udata_buf, fname); + return linux_unlink(si->udata_buf); +} + +static ssize_t +pager_read(struct swapinfo *si, int fd, void *start, size_t size) +{ + ssize_t off, sz, rs; + + kprintf("pager_read: %lx (%lx)\n", start, size); + for (off = 0; off < size; off += sz) { + sz = size - off; + sz = (sz > UDATA_BUFSIZE) ? UDATA_BUFSIZE : sz; + rs = linux_read(fd, si->udata_buf, sz); + if (rs != sz) return rs; + copy_to_user(start + off, si->udata_buf, sz); + } + return off; +} + +static ssize_t +pager_write(int fd, void *start, size_t size) +{ + ssize_t sz; + + sz = linux_write(fd, start, size); + return sz; +} + +static int +mlocklist_req(unsigned long start, unsigned long end, struct addrpair *addr, int nent) +{ + ihk_mc_user_context_t ctx0; + int cc; + +#define PAGER_REQ_MLOCK_LIST 0x0008 + ihk_mc_syscall_arg0(&ctx0) = PAGER_REQ_MLOCK_LIST; + ihk_mc_syscall_arg1(&ctx0) = start; + ihk_mc_syscall_arg2(&ctx0) = end; + ihk_mc_syscall_arg3(&ctx0) = (unsigned long) addr; + ihk_mc_syscall_arg4(&ctx0) = nent; + cc = syscall_generic_forwarding(__NR_mmap, &ctx0); + return cc; +} + +/* + * If the last entry of addrpair is -1, more paged locked by Linux exist. + */ +static int +mlocklist_morereq(struct swapinfo *si, unsigned long *start) +{ + struct areaent *ent = si->mlock_area.tail; + + dkprintf("mlocklist_morereq: start = %ld and = %ld\n", + ent->pair[ent->count].start, ent->pair[ent->count].end); + if (ent->pair[ent->count].start != (unsigned long) -1) { + return 0; + } + *start = ent->pair[ent->count].end; + return 1; +} + +static int +arealist_alloc(struct swapinfo *si, struct arealist *areap) +{ + areap->head = areap->tail = myalloc(si, sizeof(struct areaent)); + if (areap->head == NULL) return -ENOMEM; + memset(areap->head, 0, sizeof(struct areaent)); + return 0; +} + +static int +arealist_init(struct swapinfo *si) +{ + int cc; + + if ((cc = arealist_alloc(si, &si->swap_area)) < 0) return cc; + cc = arealist_alloc(si, &si->mlock_area); + return cc; +} + + +static void +arealist_free(struct arealist *area) +{ + struct areaent *tmp; + for (tmp = area->head; tmp != NULL; tmp = tmp->next) { + myfree(tmp); + } + memset(area, 0, sizeof(struct arealist)); + return; +} + +/* + * returns the start address of addrpair and its size + */ +static int +arealist_get(struct swapinfo *si, struct addrpair **pair, struct arealist *area) +{ + struct areaent *tmp; + struct areaent *tail = area->tail; + if (tail->count < MLOCKADDRS_SIZE - 1) { /* at least two entries are needed */ + if (pair) *pair = &tail->pair[tail->count]; + return MLOCKADDRS_SIZE - tail->count; + } + tmp = myalloc(si, sizeof(struct areaent)); + if (tmp == NULL) { + return -1; + } + memset(tmp, 0, sizeof(struct areaent)); + area->tail->next = tmp; + area->tail = tmp; + if (pair) *pair = area->tail->pair; + return MLOCKADDRS_SIZE; +}; + +static void +arealist_update(int cnt, struct arealist *area) +{ + area->tail->count += cnt; + area->count += cnt; +} + +static int +arealist_add(struct swapinfo *si, unsigned long start, unsigned long end, + unsigned long flag, struct arealist *area) +{ + int cc; + struct addrpair *addr; + + cc = arealist_get(si, &addr, area); + if (cc < 0) return -1; + addr->start = start; addr->end = end; addr->flag = flag; + arealist_update(1, area); + return 0; +} + +static int +arealist_preparewrite(struct arealist *areap, struct swap_areainfo *info, + ssize_t off, struct process_vm *vm, int flag) +{ + struct areaent *ent; + int count = 0; + ssize_t totsz = 0; + struct page_table *pt = vm->address_space->page_table; + + for (ent = areap->head; ent != NULL; ent = ent->next) { + int i; + for (i = 0; i < ent->count; i++, count++) { + ssize_t sz = ent->pair[i].end - ent->pair[i].start; + info[count].start = ent->pair[i].start; + info[count].end = ent->pair[i].end; + info[count].flag = ent->pair[i].flag; + if (flag) { /* position in file */ + info[count].pos = off + totsz; + } else { /* physical memory */ + if (ihk_mc_pt_virt_to_phys(pt, + (void*) ent->pair[i].start, + &info[count].pos)) { + kprintf("Cannot get phys\n"); + } + } + totsz += sz; + } + } + return count; +} + +static ssize_t +arealist_write(int fd, struct swap_areainfo *info, int count) +{ + ssize_t sz; + + sz = linux_write(fd, info, sizeof(struct swap_areainfo)*count); + if (sz != sizeof(struct swap_areainfo)*count) return -1; + return 0; +} + +static void +arealist_print(char *msg, struct arealist *areap, int count) +{ + struct areaent *ent; + kprintf("%s: %d\n", msg, count); + for (ent = areap->head; ent != NULL; ent = ent->next) { + int i; + for (i = 0; i < ent->count; i++) { + kprintf("\t%p -- %p\n", + (void*) ent->pair[i].start, (void*) ent->pair[i].end); + } + } +} + +/* + * + */ +static int +mlockcntnr_sethead(struct swapinfo *si) +{ + int cnt; + cnt = arealist_get(si, 0, &si->mlock_area); /* Adjust arealist */ + if (cnt < 0) return -1; + si->mlock_container.from = si->mlock_container.cur = si->mlock_area.tail; + si->mlock_container.ccount = si->mlock_area.tail->count; + return 0; +} + +static int +mlockcntnr_isempty(struct swapinfo *si) +{ + return si->mlock_container.from == si->mlock_area.tail + && si->mlock_container.ccount == si->mlock_area.tail->count; +} + +static int +mlockcntnr_addrent(struct swapinfo *si, struct addrpair *laddr) +{ + if (si->mlock_container.ccount == si->mlock_container.cur->count) { + struct areaent *tmp = si->mlock_container.cur->next; + if (tmp == 0) return 0; + si->mlock_container.cur = tmp; + si->mlock_container.ccount = 1; + } + *laddr = si->mlock_container.cur->pair[si->mlock_container.ccount - 1]; + si->mlock_container.ccount++; + return 1; +} + +static void +print_area(char *label, unsigned long start, unsigned long sz, + struct vm_regions *region) +{ + char *type; + + if (start == region->text_start) { + type = "text"; + } else if (start == region->data_start) { + type = "data"; + } else if (start == region->brk_start) { + type = "brk"; + } else if (start == region->stack_start) { + type = "stack"; + } else if (start == region->user_start) { + type = "user"; + } else if (start >= region->map_start + && start <= region->stack_start) { + type = "map"; + } else { + type = "other"; + } + kprintf("%s: %s write(%p, %ld)\n", label, type, start, sz); +} + +void +print_region(char *msg, struct process_vm *vm) +{ + struct vm_range *range, *next; + + kprintf("%s:\n", msg); + list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + if (range->memobj != NULL) continue; + kprintf("\t%016lx:%016lx (%lx)\n", + range->start, range->end, range->flag); + } +} + +static void +debug_dump(char *msg, unsigned char *p) +{ + kprintf("%s-> %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x" + ":%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", + msg, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], + p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); +} + +int +do_pagein(int flag) +{ + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; + int fd, i; + ssize_t pos, sz, rs; + struct swapinfo *si = vm->swapinfo; + + dkprintf("do_pagein: flag(%d) currss(%lx)\n", flag, vm->currss); + fd = pager_open(si, si->swapfname, O_RDONLY, 0); + pager_unlink(si, si->swapfname); + if (fd < 0) { + kprintf("do_pagein: Cannot open file: %s\n", si->swapfname); + return fd; + } + /* + * In the current implementaion, the following working areas remain + * in the physical memory area: + * swphdr, swap_info and mlock_info + */ + pos = sizeof(struct swap_header); + pos += sizeof(struct swap_areainfo)*si->swphdr->count_sarea; + pos += sizeof(struct swap_areainfo)*si->swphdr->count_marea; + rs = linux_lseek(fd, pos, SEEK_SET); + for (i = 0; i < si->swphdr->count_sarea; i++) { + extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt); + sz = si->swap_info[i].end - si->swap_info[i].start; + dkprintf("pagein: %016lx:%016lx sz(%lx)\n", si->swap_info[i].start, si->swap_info[i].end, sz); + rs = pager_read(si, fd, (void*) si->swap_info[i].start, sz); + if (rs != sz) goto err; + // ihk_mc_pt_print_pte(vm->address_space->page_table, (void*) si->swap_info[i].start); + } + linux_close(fd); + print_region("after pagin", vm); + kprintf("do_pagein: done, currss(%lx)\n", vm->currss); + vm->swapinfo = NULL; + kfree(si->swapfname); + kfree(si); + return 0; +err: + linux_close(fd); + ekprintf("pagein: read error: return(%lx) size(%lx)\n", rs, sz); + vm->swapinfo = NULL; + kfree(si->swapfname); + kfree(si); + return -1; +} + +int +do_pageout(char *fname, void *buf, size_t size, int flag) +{ + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; + struct vm_regions *region = &vm->region; + struct vm_range *range, *next; + struct addrpair *addr; + int i, fd; + long cc; + unsigned long start, end; + ssize_t pos, sz; + struct swapinfo *si; + + fd = -1; + dkprintf("do_pageout: buf(%p) size(%d) flag(%d) currss(%lx)\n", + buf, size, flag, vm->currss); + if (IS_INVALID_USERADDRESS(fname, region) + || IS_INVALID_USERADDRESS(buf, region) + || IS_INVALID_LENGTH(size, region)) { + return -EINVAL; + } + if (!(si = kmalloc(sizeof(struct swapinfo), IHK_MC_AP_NOWAIT))) { + ekprintf("do_pageout: Cannot allocate working memory in kmalloc\n"); + return -ENOMEM; + } + memset(si, '\0', sizeof(struct swapinfo)); + cc = myalloc_init(si, buf, size); + if (cc < 0) { + kfree(si); + ekprintf("do_pageout: Cannot pin buf (%p) down\n", buf); + return cc; + } + si->udata_buf = myalloc(si, UDATA_BUFSIZE); + si->swapfname = kmalloc(strlen(fname) + 1, IHK_MC_AP_NOWAIT); + if (si->swapfname == NULL) { + kfree(si); + ekprintf("do_pageout: Cannot allocate working memory in kmalloc\n"); + return -ENOMEM; + } + if (strcpy_from_user(si->swapfname, fname)) { + cc = -EFAULT; + goto err; + } + cc = arealist_init(si); + if (cc < 0) { + ekprintf("do_pageout: user buffer area is needed more than %d byte\n", + UDATA_BUFSIZE + sizeof(struct areaent)*2); + goto err; + } + + fd = linux_open(fname, O_RDWR|O_CREAT|O_TRUNC, 0600); + if (fd < 0) { + ekprintf("do_pageout: Cannot open/create file: %s\n", fname); + cc = fd; + goto err; + } + area_print(region); + + /* looking at ranges except for non anoymous, text, and data */ + list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + if (range->memobj != NULL) continue; + if (IS_TEXT(range->start, region) + || IS_STACK(range->start, region) + || IS_INVALID_USERADDRESS(range->start, region) + || IS_READONLY(range->flag) + || IS_NOTUSER(range->flag)) continue; + if (range->flag & VR_LOCKED) { + /* this range is locked by McKernel */ + cc = arealist_add(si, range->start, range->end, + range->flag, &si->mlock_area); + if (cc < 0) goto nomem; + continue; + } + start = range->start; end = range->end; + if ((cc = mlockcntnr_sethead(si)) < 0) goto nomem; + /* Requesting mlock list in Linux Kernel. We do not know how much + * addrpair entries are needed. The Linux side stores -1 in + * the last entry of addrpair to inform more entries exist. + * the mlocklist_morereq function checks this condition. */ + do { + if ((cc = arealist_get(si, &addr, &si->mlock_area)) < 0) goto nomem; + cc = mlocklist_req(start, end, addr, cc); + arealist_update(cc, &si->mlock_area); + } while (mlocklist_morereq(si, &start)); + /* */ + if (mlockcntnr_isempty(si)) { /* whole range is going to swap */ + cc = arealist_add(si, range->start, range->end, + range->flag, &si->swap_area); + } else { /* partial range is going to swap */ + for (start = range->start; start < range->end;) { + struct addrpair laddr; + if (mlockcntnr_addrent(si, &laddr) == 0) { + /* No more entry locked by Linux */ + cc = arealist_add(si, start, range->end, + range->flag, + &si->swap_area); + if (cc < 0) goto nomem; + break; + } + if (start < laddr.start) { + /* swap range from start to laddr.start */ + cc = arealist_add(si, start, laddr.start, + range->flag, + &si->swap_area); + if (cc < 0) goto nomem; + } + start = laddr.end; + kprintf("do_pageout: start(%ld) range->end(%ld)\n", + start, range->end); + break; + } + } + } + arealist_print("SWAP", &si->swap_area, si->swap_area.count); + arealist_print("MLOCK", &si->mlock_area, si->mlock_area.count); + si->swap_info = myalloc(si, sizeof(struct swap_areainfo)* si->swap_area.count); + si->mlock_info = myalloc(si, sizeof(struct swap_areainfo)* si->mlock_area.count); + if (si->swap_info == NULL || si->mlock_info == NULL) goto nomem; + + /* preparing page store */ + si->swphdr = myalloc(si, sizeof(struct swap_header)); + strncpy(si->swphdr->magic, MCKERNEL_SWAP, SWAP_HLEN); + strncpy(si->swphdr->version, MCKERNEL_SWAP_VERSION, SWAP_HLEN); + si->swphdr->count_sarea = si->swap_area.count; + si->swphdr->count_marea = si->mlock_area.count; + if ((cc = pager_write(fd, si->swphdr, sizeof(struct swap_header))) + != sizeof(struct swap_header)) { + if (cc >= 0) + cc = -EIO; + goto err; + } + pos = linux_lseek(fd, 0, SEEK_CUR); + pos += sizeof(struct swap_areainfo)*(si->swap_area.count+si->mlock_area.count); + cc = arealist_preparewrite(&si->swap_area, si->swap_info, pos, vm, 1); + if (cc != si->swap_area.count) { + ekprintf("do_pageout: ERROR file ent(%d) != list ent(%d) in swap_area\n", + cc, si->swap_area.count); + } + cc = arealist_preparewrite(&si->mlock_area, si->mlock_info, 0, vm, 0); + if (cc != si->mlock_area.count) { + ekprintf("do_pageout: ERROR file ent(%d) != list ent(%d) in swap_area\n", + cc, si->mlock_area.count); + } + /* arealists are stored */ + if ((cc = arealist_write(fd, si->swap_info, si->swap_area.count)) < 0) goto err; + if ((cc = arealist_write(fd, si->mlock_info, si->mlock_area.count)) < 0) goto err; + /* now pages are stored */ + for (i = 0; i < si->swap_area.count; i++) { + sz = si->swap_info[i].end - si->swap_info[i].start; + if ((cc = pager_write(fd, (void*) si->swap_info[i].start, sz)) != sz) { + if (cc >= 0) + cc = -EIO; + goto err; + } + } + if (flag && 0x04) { + kprintf("skipping physical memory removal\n"); + goto free_exit; + } + kprintf("removing physical memory\n"); + for (i = 0; i < si->swap_area.count; i++) { + cc = ihk_mc_pt_free_range(vm->address_space->page_table, + vm, + (void*) si->swap_info[i].start, + (void*) si->swap_info[i].end, NULL); + if (cc < 0) { + kprintf("ihk_mc_pt_clear_range returns: %d\n", cc); + } + } +#if 0 + range->flag |= VR_PAGEOUT; +#endif + cc = linux_close(fd); + fd = -1; + /* + * Unmapping McKernel's user virtual spaces in Linux side. + * From here to the completion of do_pagein, the nonlocking user spaces + * except TEXT, STACK, readonly pages, are not invalid. + */ + for (i = 0; i < si->swap_area.count; i++) { + sz = si->swap_info[i].end - si->swap_info[i].start; + cc = linux_munmap((void*) si->swap_info[i].start, sz, 0); + if (cc < 0) { + kprintf("do_pageout: Cannot munmap: %lx len(%lx)\n", + si->swap_info[i].start, sz); + } + } + cc = 0; + vm->swapinfo = si; + goto free_exit; +err: + ekprintf("do_pageout: write error: %d\n", cc); + goto free_exit; +nomem: + ekprintf("do_pageout: cannot allocate working memory\n"); + cc = -ENOMEM; +free_exit: + if (fd >= 0) + linux_close(fd); + dkprintf("do_pageout: done, currss(%lx)\n", vm->currss); + arealist_free(&si->mlock_area); arealist_free(&si->swap_area); + if (cc != 0) { + pager_unlink(si, si->swapfname); + kfree(si->swapfname); + kfree(si); + } + return cc; +} diff --git a/kernel/syscall.c b/kernel/syscall.c index ca738f90..873b8a07 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -9220,6 +9220,102 @@ SYSCALL_DECLARE(get_system) return 0; } +/* + * swapoout(const char *filename, void *workarea, size_t size) + */ +SYSCALL_DECLARE(swapout) +{ + extern int do_pageout(const char*, void*, size_t, int); + extern int do_pagein(int); + char *fname = (char *)ihk_mc_syscall_arg0(ctx); + char *buf = (char *)ihk_mc_syscall_arg1(ctx); + size_t size = (size_t)ihk_mc_syscall_arg2(ctx); + int flag = (int)ihk_mc_syscall_arg3(ctx); + ihk_mc_user_context_t ctx0; + int cc; + + dkprintf("[%d]swapout(%lx,%lx,%lx,%ld)\n", + ihk_mc_get_processor_id(), fname, buf, size, flag); + + if (fname == NULL || flag == 0x01) { /* for development purupse */ + kprintf("swapout: skipping real swap\n"); + cc = syscall_generic_forwarding(__NR_swapout, &ctx0); + kprintf("swapout: return from Linux\n"); + return cc; + } + /* pageout */ + cc = do_pageout(fname, buf, size, flag); + if (cc < 0) return cc; + if (flag == 0x02) { + kprintf("swapout: skipping calling swapout in Linux\n"); + } else { + kprintf("swapout: before calling swapout in Linux\n"); + cc = syscall_generic_forwarding(__NR_swapout, &ctx0); + kprintf("swapout: after calling swapout in Linux cc(%d)\n", cc); + } + /* Though swapout in Linux side returns error, needs to call + * pagein to recover the image */ + cc = do_pagein(flag); + kprintf("swapout: after calling do_pagein cc(%d)\n", cc); + return cc; +} + +SYSCALL_DECLARE(linux_mlock) +{ + ihk_mc_user_context_t ctx0; + const uintptr_t addr = ihk_mc_syscall_arg0(ctx); + const size_t len = ihk_mc_syscall_arg1(ctx); + int cc; + + kprintf("linux_mlock: %p %ld\n", (void*) addr, len); + ihk_mc_syscall_arg0(&ctx0) = addr; + ihk_mc_syscall_arg1(&ctx0) = len; + cc = syscall_generic_forwarding(802, &ctx0); + return cc; +} + +SYSCALL_DECLARE(linux_spawn) +{ + int rc; + + rc = syscall_generic_forwarding(__NR_linux_spawn, ctx); + return rc; +} + +SYSCALL_DECLARE(suspend_threads) +{ + struct thread *mythread = cpu_local_var(current); + struct thread *thread; + struct process *proc = mythread->proc; + + list_for_each_entry(thread, &proc->threads_list, siblings_list) { + if (thread == mythread) + continue; + do_kill(mythread, proc->pid, thread->tid, SIGSTOP, NULL, 0); + } + list_for_each_entry(thread, &proc->threads_list, siblings_list) { + if (thread == mythread) + continue; + while (thread->status != PS_STOPPED) + cpu_pause(); + } + return 0; +} + +SYSCALL_DECLARE(resume_threads) +{ + struct thread *mythread = cpu_local_var(current); + struct thread *thread; + struct process *proc = mythread->proc; + + list_for_each_entry(thread, &proc->threads_list, siblings_list) { + if (thread == mythread) + continue; + do_kill(mythread, proc->pid, thread->tid, SIGCONT, NULL, 0); + } + return 0; +} + void reset_cputime() { diff --git a/test/qlmpi/dump-pages.c b/test/qlmpi/dump-pages.c new file mode 100644 index 00000000..9dd05b60 --- /dev/null +++ b/test/qlmpi/dump-pages.c @@ -0,0 +1,127 @@ +#include +#include +#include +#include +#include +#include "swapfmt.h" + +struct swap_header header; +struct swap_areainfo *meminfo, *lckinfo; + +void +show(unsigned *data, int cnt) +{ + printf("\t"); + while (--cnt) { + printf("%08lx ", *data++); + } + printf("\n"); +} + +unsigned long +convhex(char *cp) +{ + unsigned long val = 0; + + while (*cp != '\n' && *cp != 0) { + if (isdigit(*cp)) { + val = (val<<4) + *cp - '0'; + } else if (isupper(*cp) && isxdigit(*cp)) { + val = (val<<4) + *cp - 'A' + 10; + } else if (isxdigit(*cp)) { + val = (val<<4) + *cp - 'a' + 10; + } else { + break; + } + cp++; + } + return val; +} + +ssize_t +findpos(unsigned long addr) +{ + int i; + ssize_t pos = 0; + for (i = 0; i < header.count_sarea; i++) { + if (addr >= meminfo[i].start && addr < meminfo[i].end) { + pos = meminfo[i].pos; + pos += addr - meminfo[i].start; + } + } + return pos; +} + +int +main(int argc, char **argv) +{ + FILE *fp; + char *fname, *cp; + int interractive = 0; + int i; + + if (argc >= 2) { + fname = argv[1]; + if (argc >= 3) interractive = 1; + } else { + fname = "/tmp/pages"; + } + if ((fp = fopen(fname, "r")) == 0) { + fprintf(stderr, "Cannot open file: %s\n", fname); + exit(-1); + } + fread(&header, sizeof(header), 1, fp); + printf("magic : %s\n", header.magic); + printf("version : %d\n", header.version); + printf("swap area count : %d\n", header.count_sarea); + printf("mlock area count: %d\n", header.count_marea); + printf("SWAP:\n"); + printf("\t start end : file position (flags)\n"); + meminfo = malloc(sizeof(struct swap_areainfo)* header.count_sarea); + lckinfo = malloc(sizeof(struct swap_areainfo)* header.count_marea); + fread(meminfo, sizeof(struct swap_areainfo), header.count_sarea, fp); + fread(lckinfo, sizeof(struct swap_areainfo), header.count_marea, fp); + + for (i = 0; i < header.count_sarea; i++) { + printf("\t%016lx -- %016lx : %010lx (%lx)\n", + meminfo[i].start, meminfo[i].end, meminfo[i].pos, meminfo[i].flag); + } + printf("MLOCK:\n"); + printf("\t start end : physical address (flags)\n"); + for (i = 0; i < header.count_marea; i++) { + printf("\t%016lx -- %016lx : %010lx (%lx)\n", + lckinfo[i].start, lckinfo[i].end, lckinfo[i].pos, lckinfo[i].flag); + } + + if (!interractive) goto ending; + do { + char buf1[128], buf2[128], data[8*8 + 1]; + char cmd;; + ssize_t sz; + int cc; + unsigned long addr; + ssize_t fpos; + + fprintf(stdout, "> "); fflush(stdout); + cp = fgets(buf1, 128, stdin); + if (cp == NULL) break; + cc = sscanf(buf1, "%c %s", &cmd, buf2); + if (cc != 2) continue; + addr = convhex(buf2); + fpos = findpos(addr); + if (fpos == 0) continue; + printf("%lx (fpos(%lx)):\n", addr, fpos); + fseek(fp, fpos, SEEK_SET); + if ((sz = fread(&data, 8*8, 1, fp)) != 1) goto err; + if (cmd == 's') { + data[8*8] = 0; + printf("\t%s", data); + } else { + show((unsigned*) data, 8); + } + } while (cp != NULL); +err: +ending: + fclose(fp); + return 0; +} diff --git a/test/qlmpi/qlmpi_sample.c b/test/qlmpi/qlmpi_sample.c new file mode 100644 index 00000000..2cdcd4e2 --- /dev/null +++ b/test/qlmpi/qlmpi_sample.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include +#define BUF_SIZE (16*1024) + +#include + +int data[1024*1024]; +char sym2[1024*1024] = { 10, 20, 30, 0 }; +char *sym1 = "aaaaaa"; +char buffer[BUF_SIZE]; +char *ptr1, *ptr2; + +int +swapout(char *fname, void *buf, size_t sz, int flag) +{ + int rc; + rc = syscall(801, fname, buf, sz, flag); + return rc; +} +int +linux_mlock(const void *addr, size_t len) +{ + int rc; + rc = syscall(802, addr, len); + return rc; +} + +int +main(int argc, char **argv) +{ + int rc; + int i; + + MPI_Init(&argc, &argv); + +ql_loop: + printf("***** Arguments Info ****************\n"); + printf(" argc: %d\n", argc); + for (i = 0; i < argc; i++) { + printf(" argv[%d]: %s\n", i, argv[i]); + } + printf("QL_SUCCESS:%d\n", QL_SUCCESS); + printf("************************************\n\n"); + + printf("&data = %p\n", data); + printf("&sym1 = %p\n", &sym1); + printf("&sym2 = %p\n", sym2); + printf("&rc = %p\n", &rc); + ptr1 = malloc(1024); + ptr2 = malloc(1024*1024); + printf("ptr1 = %p\n", ptr1); + printf("ptr1 = %p\n", ptr2); + /* + * testing mlock in mckernel side + */ + rc = mlock(data, 16*1024); + printf("McKernel mlock returns: %d\n", rc); + /* + * testing mlock in linux side + */ + sprintf((char*) data, "hello\n"); + rc = linux_mlock(data, 16*1024); + printf("linux_mlock returns: %d\n", rc); + + rc = ql_client(&argc, &argv); + + printf("ql_client returns: %d\n", rc); + if (rc == QL_CONTINUE) { + goto ql_loop; + } + + MPI_Finalize(); + printf("qlmpi_sample finished!!\n"); + return 0; +} diff --git a/test/qlmpi/qlmpi_testsuite/CT20.f b/test/qlmpi/qlmpi_testsuite/CT20.f new file mode 100644 index 00000000..5e3869fd --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/CT20.f @@ -0,0 +1,73 @@ +c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!! + include 'mpif.h' + integer dsize + parameter(dsize=536870912) + character val*10 + integer ival + integer ierr + integer i + integer*4 dat(dsize) + common dat + integer rank + integer size + integer st(MPI_STATUS_SIZE) + + call MPI_INIT(ierr) + 1000 continue + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + +c size check + if(size.ne.2)then + if(rank.eq.0)then + print*,'bad MPI size' + endif + call MPI_FINALIZE(ierr) + stop 1 + endif + +c read argument + iargs = iargc() + if(iargs.ne.1)then + print *,'bad argument' + call MPI_FINALIZE(ierr) + stop 1 + endif + call getarg(1, val) + read(val, '(i10)')ival + print *,'val=',ival + +c test + if(rank.eq.0)then + do 10 i=1, dsize + dat(i) = -1 + 10 continue + print *,'r1 val=',ival + call MPI_RECV(dat, dsize, MPI_INTEGER4, 1, 0, MPI_COMM_WORLD, + c st, ierr) + print *,'r2 val=',ival + do 20 i=1, dsize + if(dat(i).ne.ival)then + print *,'*** bad value idx=',i,', dat=',dat(i), + c ' , val=',ival + goto 100 + endif + 20 continue + print *,'*** MPI_Send/Recv OK *** ' + 100 continue + else + do 30 i=1, dsize + dat(i) = ival + 30 continue + call MPI_SEND(dat, dsize, MPI_INTEGER4, 0, 0, MPI_COMM_WORLD, + c ierr) + endif + +c repeat? + call ql_client(ierr) + if(ierr.eq.1)then + print *,'repeat' + goto 1000 + endif + call MPI_FINALIZE(ierr) + end diff --git a/test/qlmpi/qlmpi_testsuite/CT20.sh b/test/qlmpi/qlmpi_testsuite/CT20.sh new file mode 100755 index 00000000..e98b09d4 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/CT20.sh @@ -0,0 +1,27 @@ +#!/bin/sh +PPOSDIR=/home/satoken/ppos +export PATH=$(PPOSDIR)/bin:$PATH +echo CT20001 device mapping program test START +echo CT20002 program 1 START +echo CT20003 check '"MPI_Send/Recv OK"' +ql_mpiexec_start -machinefile hostfile20 ./CT20a 1 +echo CT20004 program 1 suspend +echo CT20005 program 2 START +echo CT20006 check '"MPI_Send/Recv OK"' +ql_mpiexec_start -machinefile hostfile20 ./CT20b 2 +echo CT20007 program 2 suspend +echo CT20008 program 1 resume +echo CT20009 check '"MPI_Send/Recv OK"' +ql_mpiexec_start -machinefile hostfile20 ./CT20a 3 +echo CT20010 program 1 suspend +echo CT20011 program 2 resume +echo CT20012 check '"MPI_Send/Recv OK"' +ql_mpiexec_start -machinefile hostfile20 ./CT20b 4 +echo CT20013 program 2 suspend +echo CT20014 program 1 resume +ql_mpiexec_finalize -machinefile hostfile20 ./CT20a +echo CT20015 program 1 END +echo CT20016 program 2 resume +ql_mpiexec_finalize -machinefile hostfile20 ./CT20b +echo CT20017 program 2 END +echo CT20018 device mapping program test END diff --git a/test/qlmpi/qlmpi_testsuite/CT21.f b/test/qlmpi/qlmpi_testsuite/CT21.f new file mode 100644 index 00000000..35e0dc8a --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/CT21.f @@ -0,0 +1,62 @@ +c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!! + include 'mpif.h' + integer size + parameter(size=536870912) + character file*10 + character val*10 + integer ival + integer ierr + integer i + integer*4 dat(size) + common dat + character myname*10 + + call getarg(0, myname) + call MPI_INIT(ierr) + 1000 continue + iargs = iargc() + if(iargs.ne.2)then + print *,'bad argument' + call MPI_FINALIZE(ierr) + stop 1 + endif + call getarg(1, file) + call getarg(2, val) + read(val, '(i10)')ival + print *,' file=',file,', val=',ival + open(1, file=file, status='old', form='unformatted', + c access='stream', err=999) + do 10 i=1, size + dat(i) = -1 + 10 continue + read(1, err=998)(dat(i), i=1, size) + do 20 i=1, size + if(dat(i).ne.ival)then + print *,'*** FAIL *** BAD VALUE idx=',i,', val=',dat(i) + goto 100 + endif + 20 continue + print *,' *** data read OK ***' + 100 continue + close(1) + call ql_client(ierr) + if(ierr.eq.1)then + print *,'resume' + goto 1000 + endif + call MPI_FINALIZE(ierr) + stop 0 + + 998 continue + close(1) + print *,'read error' + goto 9999 + + 999 continue + print *,'open error' + goto 9999 + + 9999 continue + call MPI_FINALIZE(ierr) + stop 1 + end diff --git a/test/qlmpi/qlmpi_testsuite/CT21.sh b/test/qlmpi/qlmpi_testsuite/CT21.sh new file mode 100755 index 00000000..15ed44d4 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/CT21.sh @@ -0,0 +1,27 @@ +#!/bin/sh +export PPOSDIR=/home/satoken/ppos +export PATH=$PPOSDIR/bin:$PATH +echo CT21001 mcexec page table update test START +echo CT21002 program 1 START +echo CT21003 check '"data read OK"' +ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21a file1 1 +echo CT21004 program 1 suspend +echo CT21005 program 2 START +echo CT21006 check '"data read OK"' +ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21b file1 1 +echo CT21007 program 2 suspend +echo CT21008 program 1 resume +echo CT21009 check '"data read OK"' +ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21a file2 2 +echo CT21010 program 1 suspend +echo CT21011 program 2 resume +echo CT21012 check '"data read OK"' +ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21b file2 2 +echo CT21013 program 2 suspend +echo CT21014 program 1 resume +ql_mpiexec_finalize -machinefile hostfile21 -n 1 ./CT21a +echo CT21015 program 1 END +echo CT21016 program 2 resume +ql_mpiexec_finalize -machinefile hostfile21 -n 1 ./CT21b +echo CT21017 program 2 END +echo CT21018 mcexec page table update test END diff --git a/test/qlmpi/qlmpi_testsuite/CT22.f b/test/qlmpi/qlmpi_testsuite/CT22.f new file mode 100644 index 00000000..b302572d --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/CT22.f @@ -0,0 +1,27 @@ +c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!! +!$ use omp_lib + include 'mpif.h' + integer rank + integer size + external omp_get_thread_num + external omp_get_num_threads + integer omp_get_thread_num + integer omp_get_num_threads + + call MPI_INIT(ierr) + 1000 continue + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + +!$omp parallel + print '(1h ,4hmpi=,i2,1h/,i2,6h, omp=,i2,1h/,i2)', + c rank, size, omp_get_thread_num(), omp_get_num_threads() +!$omp end parallel +c repeat? + call ql_client(ierr) + if(ierr.eq.1)then + print *,'repeat' + goto 1000 + endif + call MPI_FINALIZE(ierr) + end diff --git a/test/qlmpi/qlmpi_testsuite/CT22.sh b/test/qlmpi/qlmpi_testsuite/CT22.sh new file mode 100755 index 00000000..067695d7 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/CT22.sh @@ -0,0 +1,27 @@ +#!/bin/sh +PPOSDIR=/home/satoken/ppos +export PATH=$(PPOSDIR)/bin:$PATH +echo CT22001 OMP test START +echo CT22002 program 1 START +echo CT22003 check rank info +ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a +echo CT22004 program 1 suspend +echo CT22005 program 2 START +echo CT22006 check rank info +ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b +echo CT22007 program 2 suspend +echo CT22008 program 1 resume +echo CT22009 check rank info +ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a +echo CT22010 program 1 suspend +echo CT22011 program 2 resume +echo CT22012 check rank info +ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b +echo CT22013 program 2 suspend +echo CT22014 program 1 resume +ql_mpiexec_finalize -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a +echo CT22015 program 1 END +echo CT22016 program 2 resume +ql_mpiexec_finalize -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b +echo CT22017 program 2 END +echo CT22018 OMP test END diff --git a/test/qlmpi/qlmpi_testsuite/Makefile b/test/qlmpi/qlmpi_testsuite/Makefile new file mode 100644 index 00000000..8a2d63a1 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/Makefile @@ -0,0 +1,57 @@ +PPOSDIR=/home/satoken/ppos +MPIDIR=/usr/lib64/mpich-3.2 +MPIBINDIR=$(MPIDIR)/bin +MPILIBDIR=$(MPIDIR)/lib +CC=gcc + +MPIF90=$(MPIBINDIR)/mpif90 +MPICC=$(MPIBINDIR)/mpicc +LIBDIR=$(PPOSDIR)/lib +LDFLAGS=-L$(LIBDIR) -lqlmpi -Wl,-rpath=$(LIBDIR) -Wl,-rpath,$(MPILIBDIR) +CFLAGS= -I$(PPOSDIR)/include + +TARGETS= usr_prg_A usr_prg_B usr_prg_C usr_prg_irreg CT20a CT20b CT21a CT21b file1 file2 CT22a CT22b + +all:: $(TARGETS) + +usr_prg_A: usr_prg_A.c + $(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c + +usr_prg_B: usr_prg_B.c + $(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c + +usr_prg_C: usr_prg_C.c + $(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c + +usr_prg_irreg: usr_prg_irreg.c + $(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c + +CT20a: CT20.f + $(MPIF90) -o $@ $< $(LDFLAGS) + +CT20b: CT20.f + $(MPIF90) -o $@ $< $(LDFLAGS) + +CT21a: CT21.f + $(MPIF90) -o $@ $< $(LDFLAGS) + +CT21b: CT21.f + $(MPIF90) -o $@ $< $(LDFLAGS) + +CT22a: CT22.f + $(MPIF90) -O -fopenmp -o $@ $< $(LDFLAGS) + +CT22b: CT22.f + $(MPIF90) -O -fopenmp -o $@ $< $(LDFLAGS) + +file1: gendata + ./gendata 1 536870912 > $@ + +file2: gendata + ./gendata 2 536870912 > $@ + +gendata: gendata.c + $(CC) -o $@ $< + +clean:: + rm -f $(TARGETS) gendata diff --git a/test/qlmpi/qlmpi_testsuite/config b/test/qlmpi/qlmpi_testsuite/config new file mode 100644 index 00000000..52164055 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/config @@ -0,0 +1,9 @@ +MCMOD_DIR=/home/satoken/ppos + +START=${MCMOD_DIR}/bin/ql_mpiexec_start +FINALIZE=${MCMOD_DIR}/bin/ql_mpiexec_finalize + +USR_PRG_A=./usr_prg_A +USR_PRG_B=./usr_prg_B +USR_PRG_C=./usr_prg_C +USR_PRG_IRREG=./usr_prg_irreg diff --git a/test/qlmpi/qlmpi_testsuite/gendata.c b/test/qlmpi/qlmpi_testsuite/gendata.c new file mode 100644 index 00000000..82dc72da --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/gendata.c @@ -0,0 +1,40 @@ +#include +#include +#include +#include +int +main(int argc, char **argv) +{ + int data; + long count; + long i; + int *buf; + int r; + char *p; + + if (argc != 3) { + fprintf(stderr, "BAD argument\n"); + exit(1); + } + data = atoi(argv[1]); + count = atol(argv[2]); + + fprintf(stderr, "data=%d count=%ld\n", data, count); + buf = malloc(sizeof(int) * count); + for (i = 0; i < count; i++) + buf[i] = data; + + for (r = sizeof(int) * count, p = (char *)buf; r;) { + int rc = write(1, p, r); + if (rc == -EINTR) + continue; + if (rc <= 0) { + fprintf(stderr, "write error: %d", errno); + exit(1); + } + r -= rc; + p += rc; + } + close(1); + exit(0); +} diff --git a/test/qlmpi/qlmpi_testsuite/go_ql_test.sh b/test/qlmpi/qlmpi_testsuite/go_ql_test.sh new file mode 100755 index 00000000..ed451fcb --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/go_ql_test.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +for test_param in `ls -1 ./test_cases/CT*.txt` +do + source ${test_param} + ./ql_normal.sh ${test_param} 2>&1 | tee ./result/${TEST_PREFIX}.log +done + +./ql_irreg.sh ./test_cases/ECT91.txt + diff --git a/test/qlmpi/qlmpi_testsuite/hostfile20 b/test/qlmpi/qlmpi_testsuite/hostfile20 new file mode 100644 index 00000000..f5198db1 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/hostfile20 @@ -0,0 +1,2 @@ +wallaby14:1 +wallaby15:1 diff --git a/test/qlmpi/qlmpi_testsuite/hostfile21 b/test/qlmpi/qlmpi_testsuite/hostfile21 new file mode 100644 index 00000000..3e5ec98d --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/hostfile21 @@ -0,0 +1 @@ +wallaby14 diff --git a/test/qlmpi/qlmpi_testsuite/hostfile22 b/test/qlmpi/qlmpi_testsuite/hostfile22 new file mode 100644 index 00000000..f5198db1 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/hostfile22 @@ -0,0 +1,2 @@ +wallaby14:1 +wallaby15:1 diff --git a/test/qlmpi/qlmpi_testsuite/ql_irreg.sh b/test/qlmpi/qlmpi_testsuite/ql_irreg.sh new file mode 100755 index 00000000..9bc95c19 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/ql_irreg.sh @@ -0,0 +1,210 @@ +#!/bin/sh + +# Functions +function ok_out() { + echo "[OK] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1" + (( TEST_NUM++ )) + TEST_CODE=`printf %03d ${TEST_NUM}` +} + +function ng_out() { + echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1" + exit 1 +} + +function ng_out_cont { + echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1" + (( TEST_NUM++ )) + TEST_CODE=`printf %03d ${TEST_NUM}` +} + +if [ $# -lt 1 ]; then + echo "too few arguments." + echo "usage: `basename $0` " +fi + +TEST_PARAM_FILE=$1 +TEST_NUM=1 +TEST_CODE=001 + +ME=`whoami` + +# read config +source ./config + +# read test param +source ${TEST_PARAM_FILE} + +# make machinefile +mkdir ./machinefiles &> /dev/null +MFILE=./machinefiles/mfile_${TEST_PREFIX} +echo ${MASTER}:${PROC_PER_NODE} > ${MFILE} +for slave in ${SLAVE} +do + echo ${slave}:${PROC_PER_NODE} >> ${MFILE} +done + +PROC_NUM=`expr ${PROC_PER_NODE} \* ${MPI_NODE_NUM}` + +# read machinefile +declare -a node_arry +while read line +do + node_arry+=(${line%:*}) +done < ${MFILE} +MASTER=${node_arry[0]} + +# make result directory +RESULT_DIR=./result/${TEST_PREFIX} +mkdir -p ${RESULT_DIR} + +RANK_MAX=`expr ${PROC_NUM} - 1` + +# Log files +start_1st_A_log=${RESULT_DIR}/exec_1st_A.log +start_1st_B_log=${RESULT_DIR}/exec_1st_B.log +start_1st_C_log=${RESULT_DIR}/exec_1st_C.log + +start_2nd_A_log=${RESULT_DIR}/exec_2nd_A.log +start_2nd_B_log=${RESULT_DIR}/exec_2nd_B.log +start_2nd_C_log=${RESULT_DIR}/exec_2nd_C.log + +finalize_A_log=${RESULT_DIR}/finalize_A.log +finalize_B_log=${RESULT_DIR}/finalize_B.log +finalize_C_log=${RESULT_DIR}/finalize_C.log + +# Arguments +args_1st_A="1234 hoge 02hoge" +args_2nd_A="foo 99bar test" + +# Env +envs_1st_A="1st_exec_A" +envs_2nd_A="This_is_2nd_exec_A" + +BK_PATH=${PATH} + +### テスト開始時点でql_serverとテスト用MPIプログラムが各ノードで実行されていない +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -c 'ql_(server|talker)'"` + if [ ${cnt} -gt 0 ]; then + ng_out "ql_server is running on ${node}" + fi + + cnt=`ssh $node "pgrep -u ${ME} -c 'mpiexec'"` + if [ ${cnt} -gt 0 ]; then + ng_out "other MPI program is running on ${node}" + fi +done + +### machinefile is not specified +env QL_TEST=${envs_1st_A} ${START} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "machinefile is not specified, but ql_mpiexec_start returns 0" +else + ok_out "machinefile is not specified, so ql_mpiexec_start returns not 0. returns ${rc}" +fi + +### MPI program is not specified +env QL_TEST=${envs_1st_A} ${START} -n ${PROC_NUM} > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "MPI program is not specified, but ql_mpiexec_start returns 0" +else + ok_out "MPI program is not specified, so ql_mpiexec_start returns not 0. returns ${rc}" +fi + +### specified machinefile does not exist +env QL_TEST=${envs_1st_A} ${START} -machinefile dose_not_exist -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "specified machinefile does not exist, but ql_mpiexec_start returns 0" +else + ok_out "specified machinefile does not exist, so ql_mpiexec_start returns not 0. returns ${rc}" +fi + +### specified MPI program does not exist +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} dose_not_exist ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "specified MPI program does not exist, but ql_mpiexec_start returns 0" +else + ok_out "specified MPI program does not exist, so ql_mpiexec_start returns not 0. returns ${rc}" +fi + +### mpiexec is not found +PATH="/usr/bin" +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "mpiexec is not found, but ql_mpiexec_start returns 0" +else + ok_out "mpiexec is not found, so ql_mpiexec_start returns not 0. returns ${rc}" +fi +PATH=${BK_PATH} + +### mpiexec abort +PATH="./util:/usr/bin" +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "mpiexec abort, but ql_mpiexec_start returns 0" +else + ok_out "mpiexec abort, so ql_mpiexec_start returns not 0. returns ${rc}" +fi +PATH=${BK_PATH} + +### machinefile is not specified +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log + +${FINALIZE} -n ${PROC_NUM} ${USR_PRG_A} >> ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "machinefile is not specified, but ql_mpiexec_finalize returns 0" +else + ok_out "machinefile is not specified, so ql_mpiexec_finalize returns not 0. returns ${rc}" +fi + +### MPI program is not specified +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} >> ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "MPI program is not specified, but ql_mpiexec_finalize returns 0" +else + ok_out "MPI program is not specified, so ql_mpiexec_finalize returns not 0. returns ${rc}" +fi + +### specified machinefile is wrong +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +${FINALIZE} -machinefile ./util/wrong_mfile -n ${PROC_NUM} ${USR_PRG_A} >> ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "specified machinefile is wrong, but ql_mpiexec_finalize returns 0" +else + ok_out "specified machinefile is wrong, so ql_mpiexec_finalize returns not 0. returns ${rc}" +fi + +### specified MPI program name is wrong +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log +${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} >> ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "specified MPI program name is wrong, but ql_mpiexec_finalize returns 0" +else + ok_out "specified MPI program name is wrong, so ql_mpiexec_finalize returns not 0. returns ${rc}" +fi + +${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} > /dev/null + +### one of MPI process aborts +abort_rank=`expr ${PROC_NUM} - 1` +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_IRREG} 0 > ${RESULT_DIR}/${TEST_CODE}.log +rc=$? +if [ ${rc} -eq 0 ]; then + ng_out_cont "one of MPI processes aborts, but ql_mpiexec_start returns 0" +else + ok_out "one of MPI processes aborts, so ql_mpiexec_start returns not 0. returns ${rc}" +fi + diff --git a/test/qlmpi/qlmpi_testsuite/ql_normal.sh b/test/qlmpi/qlmpi_testsuite/ql_normal.sh new file mode 100755 index 00000000..056fb1e5 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/ql_normal.sh @@ -0,0 +1,348 @@ +#!/bin/sh + +# Functions +function ok_out() { + echo "[OK] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1" + (( TEST_NUM++ )) + TEST_CODE=`printf %03d ${TEST_NUM}` +} + +function ng_out() { + echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1" + exit 1 +} + +if [ $# -lt 1 ]; then + echo "too few arguments." + echo "usage: `basename $0` " +fi + +TEST_PARAM_FILE=$1 +TEST_NUM=1 +TEST_CODE=001 + +ME=`whoami` + +# read config +source ./config + +# read test param +source ${TEST_PARAM_FILE} + +# make machinefile +mkdir ./machinefiles &> /dev/null +MFILE=./machinefiles/mfile_${TEST_PREFIX} +echo ${MASTER}:${PROC_PER_NODE} > ${MFILE} +for slave in ${SLAVE} +do + echo ${slave}:${PROC_PER_NODE} >> ${MFILE} +done + +PROC_NUM=`expr ${PROC_PER_NODE} \* ${MPI_NODE_NUM}` + +# read machinefile +declare -a node_arry +while read line +do + node_arry+=(${line%:*}) +done < ${MFILE} +MASTER=${node_arry[0]} + +# make result directory +RESULT_DIR=./result/${TEST_PREFIX} +mkdir -p ${RESULT_DIR} + +RANK_MAX=`expr ${PROC_NUM} - 1` + +# Log files +start_1st_A_log=${RESULT_DIR}/exec_1st_A.log +start_1st_B_log=${RESULT_DIR}/exec_1st_B.log +start_1st_C_log=${RESULT_DIR}/exec_1st_C.log + +start_2nd_A_log=${RESULT_DIR}/exec_2nd_A.log +start_2nd_B_log=${RESULT_DIR}/exec_2nd_B.log +start_2nd_C_log=${RESULT_DIR}/exec_2nd_C.log + +finalize_A_log=${RESULT_DIR}/finalize_A.log +finalize_B_log=${RESULT_DIR}/finalize_B.log +finalize_C_log=${RESULT_DIR}/finalize_C.log + +# Arguments +args_1st_A="1234 hoge 02hoge" +args_2nd_A="foo 99bar test" + +# Env +envs_1st_A="1st_exec_A" +envs_2nd_A="This_is_2nd_exec_A" + +### テスト開始時点でql_serverとテスト用MPIプログラムが各ノードで実行されていない +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -c 'ql_(server|talker)'"` + if [ ${cnt} -gt 0 ]; then + ng_out "ql_server is running on ${node}" + fi + + cnt=`ssh $node "pgrep -u ${ME} -c 'mpiexec'"` + if [ ${cnt} -gt 0 ]; then + ng_out "other MPI program is running on ${node}" + fi +done +ok_out "ql_server and usr_prgs are not running on each node" + +### usr_prg_A を実行するql_mpiexec_start の返り値が0 (成功) +env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${start_1st_A_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "ql_mpiexec_start usr_prg_A (first exec) returns 0" +else + ng_out "ql_mpiexec_start usr_prg_A (first exec) returns ${rc}" +fi + +### 初回実行後、マスターノード上でql_serverが動作している +cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"` +if [ ${cnt} -ne 1 ]; then + ng_out "ql_server is not running on master node" +else + ok_out "ql_server is running on master node" +fi + +### 各ノードのusr_prg_A の引数が実行時に指定したものと一致している +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "argv="` + tgt=${line#*argv=} + if [ "X${tgt}" != "X${USR_PRG_A} ${args_1st_A}" ]; then + ng_out "usr_prg_A's args is incorrect on rank:${rank}\n ${line}" + fi +done +ok_out "usr_prg_A's args are correct on each node" + +### 各ノードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "QL_TEST="` + tgt=${line#*QL_TEST=} + if [ "X${tgt}" != "X${envs_1st_A}" ]; then + ng_out "usr_prg_A's env (QL_TEST) is incorrect on each node:${rank}\n ${line}" + fi +done +ok_out "usr_prg_A's env (QL_TEST) is correct on each node" + +### 各ノードのusr_prg_A の計算処理が完了 +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "done="` + tgt=${line#*done=} + if [ "X${tgt}" != "Xyes" ]; then + ng_out "usr_prg_A's calculation is not done on rank:${rank}" + fi +done +ok_out "usr_prg_A's calculation is done on each node" + +### ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_A'" | grep " exe" | wc -l` + if [ ${cnt} -eq 0 ]; then + ng_out "usr_prg_A is not running on ${node}" + else + echo " ${cnt} programs is waiting on ${node}" + fi +done +ok_out "usr_prg_A is waiting for resume-req on each node" + +### usr_prg_B を実行するql_mpiexec_start の返り値が0 (成功) +${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} 1 2 3 > ${start_1st_B_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "ql_mpiexec_start usr_prg_B (first exec) returns 0" +else + ng_out "ql_mpiexec_start usr_prg_B (first exec) returns ${rc}" +fi + +### 各ノードのusr_prg_B の計算処理が完了 +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_1st_B_log} | grep -e "done="` + tgt=${line#*done=} + if [ "X${tgt}" != "Xyes" ]; then + ng_out "usr_prg_B's calculation is not done on rank:${rank}" + fi +done +ok_out "usr_prg_B's calculation is done on each node" + +### ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_B'" | grep " exe" | wc -l` + if [ ${cnt} -eq 0 ]; then + ng_out "usr_prg_B is not running on ${node}" + else + echo " ${cnt} programs is waiting on ${node}" + fi +done +ok_out "usr_prg_B is waiting for resume-req on each node" + +### usr_prg_C を実行するql_mpiexec_start の返り値が0 (成功) +${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_C} a b c > ${start_1st_C_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "ql_mpiexec_start usr_prg_C (first exec) returns 0" +else + ng_out "ql_mpiexec_start usr_prg_C (first exec) returns ${rc}" +fi + +### 各ノードのusr_prg_C の計算処理が完了 +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_1st_C_log} | grep -e "done="` + tgt=${line#*done=} + if [ "X${tgt}" != "Xyes" ]; then + ng_out "usr_prg_C's calculation is not done on rank:${rank}" + fi +done +ok_out "usr_prg_C's calculation is done on each node" + +### ql_mpiexec_start の完了後、usr_prg_C が再開指示待ちになっている +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_C'" | grep " exe" | wc -l` + if [ ${cnt} -eq 0 ]; then + ng_out "usr_prg_C is not running on ${node}" + else + echo " ${cnt} programs is waiting on ${node}" + fi +done +ok_out "usr_prg_C is waiting for resume-req on each node" + +### usr_prg_A を再実行するql_mpiexec_start の返り値が0 (成功) +env QL_TEST=${envs_2nd_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_2nd_A} > ${start_2nd_A_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "(again) ql_mpiexec_start usr_prg_A returns 0" +else + ng_out "(again) ql_mpiexec_start usr_prg_A returns ${rc}" +fi + +### 各ノードのusr_prg_A の引数が再実行時に指定したものと一致している +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "argv="` + tgt=${line#*argv=} + if [ "X${tgt}" != "X${USR_PRG_A} ${args_2nd_A}" ]; then + ng_out "usr_prg_A's args is incorrect on rank:${rank}\n ${line}" + fi +done +ok_out "(again) usr_prg_A's args are correct on each node" + +### 各ノードのusr_prg_A テスト用に指定した環境変数が再実行時に指定したものと一致している +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "QL_TEST="` + tgt=${line#*QL_TEST=} + if [ "X${tgt}" != "X${envs_2nd_A}" ]; then + ng_out "usr_prg_A's env (QL_TEST) is incorrect on each node:${rank}\n ${line}" + fi +done +ok_out "(again) usr_prg_A's env (QL_TEST) is correct on each node" + +### 各ノードのusr_prg_A の計算処理が完了 +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "done="` + tgt=${line#*done=} + if [ "X${tgt}" != "Xyes" ]; then + ng_out "usr_prg_A's calculation is not done on rank:${rank}" + fi +done +ok_out "(again) usr_prg_A's calculation is done on each node" + +### ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_A'" | grep " exe" | wc -l` + if [ ${cnt} -eq 0 ]; then + ng_out "usr_prg_A is not running on ${node}" + else + echo " ${cnt} programs is waiting on ${node}" + fi +done +ok_out "(again) usr_prg_A is waiting for resume-req on each node" + +### usr_prg_B を再実行するql_mpiexec_start の返り値が0 (成功) +${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} 10 20 30 40 > ${start_2nd_B_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "(again) ql_mpiexec_start usr_prg_B returns 0" +else + ng_out "(again) ql_mpiexec_start usr_prg_B returns ${rc}" +fi + +### 各ノードのusr_prg_B の計算処理が完了 +for rank in `seq 0 ${RANK_MAX}` +do + line=`grep -e "^${rank}:" ${start_2nd_B_log} | grep -e "done="` + tgt=${line#*done=} + if [ "X${tgt}" != "Xyes" ]; then + ng_out "usr_prg_B's calculation is not done on rank:${rank}" + fi +done +ok_out "(again) usr_prg_B's calculation is done on each node" + +### ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている +for node in ${node_arry[@]} +do + cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_B'" | grep " exe" | wc -l` + if [ ${cnt} -eq 0 ]; then + ng_out "usr_prg_B is not running on ${node}" + else + echo " ${cnt} programs is waiting on ${node}" + fi +done +ok_out "(again) usr_prg_B is waiting for resume-req on each node" + +### usr_prg_A を終了するql_mpiexec_finalize の返り値が0 (成功) +${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} > ${finalize_A_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "ql_mpiexec_finalize usr_prg_A return 0" +else + ng_out "ql_mpiexec_finalize usr_prg_A return ${rc}" +fi + +### usr_prg_B を終了するql_mpiexec_finalize の返り値が0 (成功) +${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} > ${finalize_B_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "ql_mpiexec_finalize usr_prg_B return 0" +else + ng_out "ql_mpiexec_finalize usr_prg_B return ${rc}" +fi + +### usr_prg_Bの終了後、ql_serverがマスターノード上で動作している +cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"` +if [ ${cnt} -ne 1 ]; then + ng_out "ql_server is not running on master node" +else + ok_out "ql_server is still running on master node" +fi + +### usr_prg_C を終了するql_mpiexec_finalize の返り値が0 (成功) +${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_C} > ${finalize_C_log} +rc=$? +if [ ${rc} -eq 0 ]; then + ok_out "ql_mpiexec_finalize usr_prg_C return 0" +else + ng_out "ql_mpiexec_finalize usr_prg_C return ${rc}" +fi + +### すべてのMPIプログラムが終了したので、ql_serverが終了した +cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"` +sleep 1 +if [ ${cnt} -eq 0 ]; then + ok_out "ql_server is not running on master node" +else + ng_out "ql_server is still running on master node" +fi + diff --git a/test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt b/test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt new file mode 100644 index 00000000..a72cd2cc --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt @@ -0,0 +1,230 @@ +プロセス高速起動 +結合テスト仕様 (ql_mpiexec_start/finalize) + +下記の条件を組み合わせた構成で動作を検証する +・MPIノード数 + 1, 2 +・プロセス数/ノード + 1, 2, MAX(mckernelのCPU割り当て数) +・ql_mpiexecコマンドの実行ノード + master, not-master + +組み合わせ: + MPIノード数 proc/node mpiexec実行ノード +パターン01: 1 1 master +パターン02: 1 1 not-master +パターン03: 1 2 master +パターン04: 1 2 not-master +パターン05: 1 MAX master +パターン06: 1 MAX not-master +パターン07: 2 1 master +パターン08: 2 1 not-master +パターン09: 2 2 master +パターン10: 2 2 not-master +パターン11: 2 MAX master +パターン12: 2 MAX not-master + + +CTxx 3つのMPIプログラム(A, B, C) をA, B, C, A, B の順に実行する +□ CTxx001 ql_server and usr_prgs are not running on each node + テスト開始時点でql_serverとテスト用MPIプログラムが各ノードで実行されていない +□ CTxx002 ql_mpiexec_start usr_prg_A (first exec) returns 0 + usr_prg_A を実行するql_mpiexec_start の返り値が0 (成功) +□ CTxx003 ql_server is running on master node + 初回実行後、マスターノード上でql_serverが動作している +□ CTxx004 usr_prg_A's args are correct on each node + 各ノードのusr_prg_A の引数が実行時に指定したものと一致している +□ CTxx005 usr_prg_A's env (QL_TEST) is correct on each node + 各ノードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している +□ CTxx006 usr_prg_A's calculation is done on each node + 各ノードのusr_prg_A の計算処理が完了 +□ CTxx007 usr_prg_A is waiting for resume-req on each node + ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている +□ CTxx008 ql_mpiexec_start usr_prg_B (first exec) returns 0 + usr_prg_B を実行するql_mpiexec_start の返り値が0 (成功) +□ CTxx009 usr_prg_B's calculation is done on each node + 各ノードのusr_prg_B の計算処理が完了 +□ CTxx010 usr_prg_B is waiting for resume-req on each node + ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている +□ CTxx011 ql_mpiexec_start usr_prg_C (first exec) returns 0 + usr_prg_C を実行するql_mpiexec_start の返り値が0 (成功) +□ CTxx012 usr_prg_C's calculation is done on each node + 各ノードのusr_prg_C の計算処理が完了 +□ CTxx013 usr_prg_C is waiting for resume-req on each node + ql_mpiexec_start の完了後、usr_prg_C が再開指示待ちになっている + +□ CTxx014 (again) ql_mpiexec_start usr_prg_A returns 0 + usr_prg_A を再実行するql_mpiexec_start の返り値が0 (成功) +□ CTxx015 (again) usr_prg_A's args are correct on each node + 各ノードのusr_prg_A の引数が再実行時に指定したものと一致している +□ CTxx016 (again) usr_prg_A's env (QL_TEST) is correct on each node + 各ノードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している +□ CTxx017 (again) usr_prg_A's calculation is done on each node + 各ノードのusr_prg_A の計算処理が完了 +□ CTxx018 (again) usr_prg_A is waiting for resume-req on each node + ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている +□ CTxx019 (again) ql_mpiexec_start usr_prg_B returns 0 + usr_prg_B を再実行するql_mpiexec_start の返り値が0 (成功) +□ CTxx020 (again) usr_prg_B's calculation is done on each node + 各ノードのusr_prg_B の計算処理が完了 +□ CTxx021 (again) usr_prg_B is waiting for resume-req on each node + ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている + +□ CTxx022 ql_mpiexec_finalize usr_prg_A return 0 + usr_prg_A を終了するql_mpiexec_finalize の返り値が0 (成功) +□ CTxx023 ql_mpiexec_finalize usr_prg_B return 0 + usr_prg_B を終了するql_mpiexec_finalize の返り値が0 (成功) +□ CTxx024 ql_server is still running on master node + usr_prg_Bの終了後、ql_serverがマスターノード上で動作している +□ CTxx025 ql_mpiexec_finalize usr_prg_C return 0 + usr_prg_C を終了するql_mpiexec_finalize の返り値が0 (成功) +□ CTxx026 ql_server is not running on master node + すべてのMPIプログラムが終了したので、ql_serverが終了した + +CT20 デバイスマッピング (IBのバッファ) +IBを使用するMPIプログラムを2本作成する。 +send/recvのバッファはcommon領域に2GB使用する。 +送信バッファは送信毎に異なる内容(送信回数の値など)を設定し、 +受信側で検証できるようにする。 +デバイスマッピングに異常があると、検証で失敗する。 +尚、McKernelに割り当てるメモリ量は3GBとする。 +□ CT20001 device mapping program test START +□ CT20002 program 1 START + qlmpi_start によってテストプログラム 1 起動 (1プロセス/ノード x 2ノード) +□ CT20003 MPI_Send/Recv OK + 2 プロセス間でMPI_Send/Recvを実行 +□ CT20004 program 1 suspend + テストプログラム 1 が停止 +□ CT20005 program 2 START + qlmpi_start によってテストプログラム 2 起動 (1プロセス/ノード x 2ノード) +□ CT20006 MPI_Send/Recv OK + 2 プロセス間でMPI_Send/Recvを実行 +□ CT20007 program 1 suspend + テストプログラム 2 が停止 +□ CT20008 program 1 resume + qlmpi_start によってテストプログラム 1 が再開 +□ CT20009 MPI_Send/Recv OK + 2 プロセス間でMPI_Send/Recvを実行 +□ CT20010 program 1 suspend + テストプログラム 1 が停止 +□ CT20011 program 2 resume + qlmpi_start によってテストプログラム 2 が再開 +□ CT20012 MPI_Send/Recv OK + 2 プロセス間でMPI_Send/Recvを実行 +□ CT20013 program 2 suspend + テストプログラム 2 が停止 +□ CT20014 program 1 resume + qlmpi_finalize によってテストプログラム 1 が再開 +□ CT20015 program 1 END + テストプログラム1が終了 +□ CT20016 program 2 resume + qlmpi_finalize によってテストプログラム 2 が再開 +□ CT20017 program 2 END + テストプログラム2が終了 +□ CT20018 device mapping program test END + +CT21 mcexecのページテーブル更新確認 +ファイルをreadするプログラムを作成する。 +ファイル名によって、ファイル内容が確定できるようにする。 +(例えば、ファイル名が"1"のファイルは"1"で埋め尽くされているなど) +入力領域はcommon領域に2GB確保する。 +ファイルのサイズも2GBとする。 +qlmpi_start毎にファイルを切り替える(コマンドラインに渡すなど)。 +入力結果を検証する。 +もし、ページインで最初と異なる物理ページにバッファが割り当てられ、且つ、 +mcexecのページテーブルが更新されていない場合は、readによって関係無いページが +破壊される。また、ファイルの読み込み結果も検証で失敗する。 +尚、McKernelに割り当てるメモリ量は3GBとする。 +□ CT21001 mcexec page table update test START +□ CT21002 program 1 START + qlmpi_start によってテストプログラム 1 起動 (1プロセス) +□ CT21003 data read OK + ファイルを読み込んで内容確認した結果、問題なし +□ CT21004 program 1 suspend + テストプログラム 1 が停止 +□ CT21005 program 2 START + qlmpi_start によってテストプログラム 2 起動 (1プロセス) +□ CT21006 data read OK + ファイルを読み込んで内容確認した結果、問題なし +□ CT21007 program 1 suspend + テストプログラム 2 が停止 +□ CT21008 program 1 resume + qlmpi_start によってテストプログラム 1 が再開 +□ CT21009 data read OK + ファイルを読み込んで内容確認した結果、問題なし +□ CT21010 program 1 suspend + テストプログラム 1 が停止 +□ CT21011 program 2 resume + qlmpi_start によってテストプログラム 2 が再開 +□ CT21012 data read OK + ファイルを読み込んで内容確認した結果、問題なし +□ CT21013 program 2 suspend + テストプログラム 2 が停止 +□ CT21014 program 1 resume + qlmpi_finalize によってテストプログラム 1 が再開 +□ CT21015 program 1 END + テストプログラム1が終了 +□ CT21016 program 2 resume + qlmpi_finalize によってテストプログラム 2 が再開 +□ CT21017 program 2 END + テストプログラム2が終了 +□ CT21018 mcexec page table update test END + +CT22 OMP +OMP で複数のスレッドを使用する状況のテスト。 +□ CT22001 device mapping program test START +□ CT22002 program 1 START + qlmpi_start によってテストプログラム 1 起動 (1プロセス/ノード x 2ノード) +□ CT22003 check rank info + MPIとOMPのプロセス情報が出力されていることを確認する。 + 以下のように出力されればOK(順不同)。 + mpi= 0/ 2, omp= 1/ 4 + mpi= 0/ 2, omp= 3/ 4 + mpi= 0/ 2, omp= 0/ 4 + mpi= 0/ 2, omp= 2/ 4 + mpi= 1/ 2, omp= 1/ 4 + mpi= 1/ 2, omp= 0/ 4 + mpi= 1/ 2, omp= 3/ 4 + mpi= 1/ 2, omp= 2/ 4 +□ CT22004 program 1 suspend + テストプログラム 1 が停止 +□ CT22005 program 2 START + qlmpi_start によってテストプログラム 2 起動 (1プロセス/ノード x 2ノード) +□ CT22006 check rank info + MPIとOMPのプロセス情報が出力されていることを確認する。 +□ CT22007 program 1 suspend + テストプログラム 2 が停止 +□ CT22008 program 1 resume + qlmpi_start によってテストプログラム 1 が再開 +□ CT22009 check rank info + MPIとOMPのプロセス情報が出力されていることを確認する。 +□ CT22010 program 1 suspend + テストプログラム 1 が停止 +□ CT22011 program 2 resume + qlmpi_start によってテストプログラム 2 が再開 +□ CT22012 check rank info + MPIとOMPのプロセス情報が出力されていることを確認する。 +□ CT22013 program 2 suspend + テストプログラム 2 が停止 +□ CT22014 program 1 resume + qlmpi_finalize によってテストプログラム 1 が再開 +□ CT22015 program 1 END + テストプログラム1が終了 +□ CT22016 program 2 resume + qlmpi_finalize によってテストプログラム 2 が再開 +□ CT22017 program 2 END + テストプログラム2が終了 +□ CT22018 device mapping program test END + +CT91 異常系 +□ CT91001 machinefile is not specified, so ql_mpiexec_start returns not 0 +□ CT91002 MPI program is not specified, so ql_mpiexec_start returns not 0 +□ CT91003 specified machinefile does not exist, so ql_mpiexec_start returns not 0 +□ CT91004 specified MPI program does not exist, so ql_mpiexec_start returns not 0 +□ CT91005 mpiexec is not found, so ql_mpiexec_start returns not 0 +□ CT91006 mpiexec abort, so ql_mpiexec_start returns not 0 +□ CT91007 machinefile is not specified, so ql_mpiexec_finalize returns not 0 +□ CT91008 MPI program is not specified, so ql_mpiexec_finalize returns not 0 +□ CT91009 specified machinefile is wrong, so ql_mpiexec_finalize returns not 0 +□ CT91010 specified MPI program name is wrong, so ql_mpiexec_finalize returns not 0 +□ CT91011 one of MPI processes aborts, so ql_mpiexec_start returns not 0 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT01.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT01.txt new file mode 100644 index 00000000..b6476322 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT01.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT01 +MPI_NODE_NUM=1 +PROC_PER_NODE=1 +MASTER=wallaby15 +SLAVE= diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT02.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT02.txt new file mode 100644 index 00000000..24fe462c --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT02.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT02 +MPI_NODE_NUM=1 +PROC_PER_NODE=1 +MASTER=wallaby14 +SLAVE= diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT03.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT03.txt new file mode 100644 index 00000000..91bb92c8 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT03.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT03 +MPI_NODE_NUM=1 +PROC_PER_NODE=2 +MASTER=wallaby15 +SLAVE= diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT04.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT04.txt new file mode 100644 index 00000000..434bcb30 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT04.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT04 +MPI_NODE_NUM=1 +PROC_PER_NODE=2 +MASTER=wallaby14 +SLAVE= diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT05.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT05.txt new file mode 100644 index 00000000..3f4916b9 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT05.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT05 +MPI_NODE_NUM=1 +PROC_PER_NODE=8 +MASTER=wallaby15 +SLAVE= diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT06.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT06.txt new file mode 100644 index 00000000..c1c9182e --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT06.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT06 +MPI_NODE_NUM=1 +PROC_PER_NODE=8 +MASTER=wallaby14 +SLAVE= diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT07.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT07.txt new file mode 100644 index 00000000..9579653f --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT07.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT07 +MPI_NODE_NUM=2 +PROC_PER_NODE=1 +MASTER=wallaby15 +SLAVE=wallaby14 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT08.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT08.txt new file mode 100644 index 00000000..2ae0a82b --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT08.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT08 +MPI_NODE_NUM=2 +PROC_PER_NODE=1 +MASTER=wallaby14 +SLAVE=wallaby15 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT09.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT09.txt new file mode 100644 index 00000000..06653e8d --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT09.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT09 +MPI_NODE_NUM=2 +PROC_PER_NODE=2 +MASTER=wallaby15 +SLAVE=wallaby14 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT10.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT10.txt new file mode 100644 index 00000000..35791d42 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT10.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT10 +MPI_NODE_NUM=2 +PROC_PER_NODE=2 +MASTER=wallaby14 +SLAVE=wallaby15 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT11.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT11.txt new file mode 100644 index 00000000..cde26a9b --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT11.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT11 +MPI_NODE_NUM=2 +PROC_PER_NODE=8 +MASTER=wallaby15 +SLAVE=wallaby14 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/CT12.txt b/test/qlmpi/qlmpi_testsuite/test_cases/CT12.txt new file mode 100644 index 00000000..621915a5 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT12.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=CT12 +MPI_NODE_NUM=2 +PROC_PER_NODE=8 +MASTER=wallaby14 +SLAVE=wallaby15 diff --git a/test/qlmpi/qlmpi_testsuite/test_cases/ECT91.txt b/test/qlmpi/qlmpi_testsuite/test_cases/ECT91.txt new file mode 100644 index 00000000..bf535747 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/test_cases/ECT91.txt @@ -0,0 +1,5 @@ +TEST_PREFIX=ECT91 +MPI_NODE_NUM=2 +PROC_PER_NODE=8 +MASTER=wallaby14 +SLAVE=wallaby15 diff --git a/test/qlmpi/qlmpi_testsuite/usr_prg_A.c b/test/qlmpi/qlmpi_testsuite/usr_prg_A.c new file mode 100644 index 00000000..6053eb11 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/usr_prg_A.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int +main(int argc, char **argv) +{ + int rc; + int i; + int num_procs, my_rank; + char hname[128]; + char argv_str[1024]; + + gethostname(hname, 128); + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + +ql_loop: + printf("INFO This is A. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid()); + memset(argv_str, '\0', sizeof(argv_str)); + + printf("%d:argc=%d\n", my_rank, argc); + for (i = 0; i < argc; i++) { + if (i > 0) { + strcat(argv_str, " "); + } + strcat(argv_str, argv[i]); + } + printf("%d:argv=%s\n", my_rank, argv_str); + + printf("%d:QL_TEST=%s\n", my_rank, getenv("QL_TEST")); + + printf("%d:done=yes\n", my_rank); + fflush(stdout); + + rc = ql_client(&argc, &argv); + + //printf("ql_client returns: %d\n", rc); + if (rc == QL_CONTINUE) { + printf("%d:resume=go_back\n", my_rank); + goto ql_loop; + } + else { + printf("%d:resume=go_finalize\n", my_rank); + } + + MPI_Finalize(); + printf("%d:finish=yes\n", my_rank); + return 0; +} diff --git a/test/qlmpi/qlmpi_testsuite/usr_prg_B.c b/test/qlmpi/qlmpi_testsuite/usr_prg_B.c new file mode 100644 index 00000000..f1deed96 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/usr_prg_B.c @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int +main(int argc, char **argv) +{ + int rc; + int num_procs, my_rank; + char hname[128]; + + gethostname(hname, 128); + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + +ql_loop: + printf("INFO This is B. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid()); + + printf("%d:done=yes\n", my_rank); + fflush(stdout); + + rc = ql_client(&argc, &argv); + + //printf("ql_client returns: %d\n", rc); + if (rc == QL_CONTINUE) { + printf("%d:resume=go_back\n", my_rank); + goto ql_loop; + } + else { + printf("%d:resume=go_finalize\n", my_rank); + } + + MPI_Finalize(); + printf("%d:finish=yes\n", my_rank); + return 0; +} diff --git a/test/qlmpi/qlmpi_testsuite/usr_prg_C.c b/test/qlmpi/qlmpi_testsuite/usr_prg_C.c new file mode 100644 index 00000000..68de7162 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/usr_prg_C.c @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int +main(int argc, char **argv) +{ + int rc; + int num_procs, my_rank; + char hname[128]; + + gethostname(hname, 128); + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + +ql_loop: + printf("INFO This is C. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid()); + + printf("%d:done=yes\n", my_rank); + fflush(stdout); + + rc = ql_client(&argc, &argv); + + //printf("ql_client returns: %d\n", rc); + if (rc == QL_CONTINUE) { + printf("%d:resume=go_back\n", my_rank); + goto ql_loop; + } + else { + printf("%d:resume=go_finalize\n", my_rank); + } + + MPI_Finalize(); + printf("%d:finish=yes\n", my_rank); + return 0; +} diff --git a/test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c b/test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c new file mode 100644 index 00000000..8824d42d --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int +main(int argc, char **argv) +{ + int rc; + int num_procs, my_rank; + char hname[128]; + int abort_rank = 0; + + gethostname(hname, 128); + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + +ql_loop: + printf("INFO This is irreg. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid()); + if (argc > 2) { + abort_rank = atoi(argv[1]); + } + + if (my_rank != abort_rank) { + printf("%d:done=yes\n", my_rank); + fflush(stdout); + } + else { + printf("%d:done=abort\n", my_rank); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, -1); + } + + rc = ql_client(&argc, &argv); + + //printf("ql_client returns: %d\n", rc); + if (rc == QL_CONTINUE) { + printf("%d:resume=go_back\n", my_rank); + goto ql_loop; + } + else { + printf("%d:resume=go_finalize\n", my_rank); + } + + MPI_Finalize(); + printf("%d:finish=yes\n", my_rank); + return 0; +} diff --git a/test/qlmpi/qlmpi_testsuite/util/mpiexec b/test/qlmpi/qlmpi_testsuite/util/mpiexec new file mode 100755 index 00000000..003c345b --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/util/mpiexec @@ -0,0 +1,5 @@ +#!/bin/sh + +# This is dummy mpiexec for irregular test +echo "dummy mpiexec abort!!" >&2 +exit 1 diff --git a/test/qlmpi/qlmpi_testsuite/util/wrong_mfile b/test/qlmpi/qlmpi_testsuite/util/wrong_mfile new file mode 100644 index 00000000..3bd1f0e2 --- /dev/null +++ b/test/qlmpi/qlmpi_testsuite/util/wrong_mfile @@ -0,0 +1,2 @@ +foo +bar diff --git a/test/qlmpi/swaptest.c b/test/qlmpi/swaptest.c new file mode 100644 index 00000000..7683ebef --- /dev/null +++ b/test/qlmpi/swaptest.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#define BUF_SIZE (32*1024) + +int data[1024*1024]; +char sym2[1024*1024] = { 10, 20, 30, 0 }; +char sym3[1024*1024] = { 10, 20, 30, 0 }; +char *sym1 = "aaaaaa"; +char buffer[BUF_SIZE]; +char *ptr1, *ptr2; +char fnamebuf[1024]; + +int +swapout(char *fname, void *buf, size_t sz, int flag) +{ + int cc; + cc = syscall(801, fname, buf, sz, flag); + return cc; +} +int +linux_mlock(const void *addr, size_t len) +{ + int cc; + cc = syscall(802, addr, len); + return cc; +} + + +int +main(int argc, char **argv) +{ + int cc; + int flag = 0; + + if (argc == 2) { + flag = atoi(argv[1]); + } + switch (flag) { + case 1: + printf("skipping real paging for debugging and just calling swapout in Linux\n"); + break; + case 2: + printf("skipping calling swapout in Linux\n"); + break; + } + printf("&data = %p\n", data); + printf("&sym1 = %p\n", &sym1); + printf("&sym2 = %p\n", sym2); + printf("&sym3 = %p\n", sym3); + printf("&cc = %p\n", &cc); + ptr1 = malloc(1024); + ptr2 = malloc(1024*1024); + printf("ptr1 = %p\n", ptr1); + printf("ptr2 = %p\n", ptr2); + sprintf((char*) data, "hello\n"); + /* + * testing mlock in mckernel side + */ + cc = mlock(data, 16*1024); + printf("McKernel mlock returns: %d\n", cc); + /* + * testing mlock in linux side + */ + cc = linux_mlock(data, 16*1024); + printf("linux_mlock returns: %d\n", cc); + strcpy(sym2, "returns: %d\n"); + strcpy(sym3, "data = %d\n"); + + /* buf area will be used in swapout systemcall for debugging */ + strcpy(fnamebuf, "/tmp/pages"); + cc = swapout(fnamebuf, buffer, BUF_SIZE, flag); + printf("swapout returns: %d\n", cc); + printf("data = %s", data); + printf(sym2, cc); + printf(sym3, data); + return 0; +}