Add qlmpi and swap to mckernel (This is rebase commit for merging to development)

2017-07-23 21:19:15 +09:00
parent 74f15783d2
commit 236a072311
61 changed files with 6638 additions and 24 deletions
--- a/Makefile.in
+++ b/Makefile.in
@ -1,5 +1,6 @@
 TARGET = @TARGET@
 SBINDIR = @SBINDIR@
+INCDIR =  @INCDIR@
 ETCDIR = @ETCDIR@
 MANDIR = @MANDIR@

@ -53,6 +54,8 @@ install::
 		mkdir -p -m 755 $(ETCDIR); \
 		install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
 		install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
+		mkdir -p -m 755 $(INCDIR); \
+		install -m 644 kernel/include/swapfmt.h $(INCDIR); \
 		mkdir -p -m 755 $(MANDIR)/man1; \
 		install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
 		;; \
--- a/arch/x86/kernel/include/syscall_list.h
+++ b/arch/x86/kernel/include/syscall_list.h
@ -68,6 +68,7 @@ SYSCALL_DELEGATED(69, msgsnd)
 SYSCALL_DELEGATED(70, msgrcv)
 SYSCALL_HANDLED(72, fcntl)
 SYSCALL_DELEGATED(79, getcwd)
+SYSCALL_DELEGATED(87, unlink)
 SYSCALL_DELEGATED(89, readlink)
 SYSCALL_HANDLED(96, gettimeofday)
 SYSCALL_HANDLED(97, getrlimit)
@ -157,4 +158,10 @@ SYSCALL_HANDLED(730, util_migrate_inter_kernel)
 SYSCALL_HANDLED(731, util_indicate_clone)
 SYSCALL_HANDLED(732, get_system)

+/* McKernel Specific */
+SYSCALL_HANDLED(801, swapout)
+SYSCALL_HANDLED(802, linux_mlock)
+SYSCALL_HANDLED(803, suspend_threads)
+SYSCALL_HANDLED(804, resume_threads)
+SYSCALL_HANDLED(811, linux_spawn)
 /**** End of File ****/
--- a/config.h.in
+++ b/config.h.in
@ -6,6 +6,9 @@
 /* whether memdump feature is enabled */
 #undef ENABLE_MEMDUMP

+/* whether mcoverlayfs is enabled */
+#undef ENABLE_QLMPI
+
 /* whether rusage is enabled */
 #undef ENABLE_RUSAGE

@ -72,9 +75,15 @@
 /* Define to address of kernel symbol vdso_start, or 0 if exported */
 #undef MCCTRL_KSYM_vdso_start

+/* Define to address of kernel symbol walk_page_range, or 0 if exported */
+#undef MCCTRL_KSYM_walk_page_range
+
 /* Define to address of kernel symbol zap_page_range, or 0 if exported */
 #undef MCCTRL_KSYM_zap_page_range

+/* McKernel specific headers */
+#undef MCKERNEL_INCDIR
+
 /* McKernel specific libraries */
 #undef MCKERNEL_LIBDIR

@ -101,3 +110,9 @@

 /* Define to 1 if you have the ANSI C header files. */
 #undef STDC_HEADERS
+
+/* install directory for system binary. */
+#undef SBINDIR
+
+/* install directory for binary. */
+#undef BINDIR
--- a/274
+++ b/274
@ -628,13 +628,16 @@ IHK_RELEASE_DATE
 DCFA_VERSION
 MCKERNEL_VERSION
 IHK_VERSION
+ENABLE_QLMPI
 ENABLE_RUSAGE
 ENABLE_MCOVERLAYFS
 MANDIR
 KERNDIR
 KMODDIR
 ETCDIR
+INCDIR
 MCKERNEL_LIBDIR
+MCKERNEL_INCDIR
 SBINDIR
 BINDIR
 TARGET
@ -642,6 +645,7 @@ UNAME_R
 KDIR
 ARCH
 XCC
+FGREP
 EGREP
 GREP
 CPP
@ -693,6 +697,9 @@ SHELL'
 ac_subst_files=''
 ac_user_opts='
 enable_option_checking
+with_mpi
+with_mpi_include
+with_mpi_lib
 with_kernelsrc
 with_target
 with_system_map
@ -700,6 +707,7 @@ enable_dcfa
 enable_memdump
 enable_mcoverlayfs
 enable_rusage
+enable_qlmpi
 with_uname_r
 '
      ac_precious_vars='build_alias
@ -1324,10 +1332,16 @@ Optional Features:
  --enable-memdump        enable dumping memory and analyzing a dump
  --enable-mcoverlayfs    enable mcoverlayfs implementation
  --enable-rusage         enable rusage implementation
+  --enable-qlmpi          enable qlmpi implementation

 Optional Packages:
  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-mpi=PATH         specify path where mpi include directory and lib
+                          directory can be found
+  --with-mpi-include=PATH specify path where mpi include directory can be
+                          found
+  --with-mpi-lib=PATH     specify path where mpi lib directory can be found
  --with-kernelsrc=path   Path to 'kernel src', default is
                          /lib/modules/uname_r/build
  --with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}
@ -2060,6 +2074,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu



+
+
+
+
 IHK_VERSION=0.9.0
 MCKERNEL_VERSION=0.9.0
 DCFA_VERSION=0.9.0
@ -3309,6 +3327,187 @@ if test "x$numa_lib_found" != "xyes"; then :
  as_fn_error $? "Unable to find NUMA library, missing numactl-devel?" "$LINENO" 5
 fi

+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
+$as_echo_n "checking for fgrep... " >&6; }
+if ${ac_cv_path_FGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
+   then ac_cv_path_FGREP="$GREP -F"
+   else
+     if test -z "$FGREP"; then
+  ac_path_FGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in fgrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_FGREP" || continue
+# Check for GNU ac_path_FGREP and select it if it is found.
+  # Check for GNU $ac_path_FGREP
+case `"$ac_path_FGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'FGREP' >> "conftest.nl"
+    "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_FGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_FGREP="$ac_path_FGREP"
+      ac_path_FGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_FGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_FGREP"; then
+    as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_FGREP=$FGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5
+$as_echo "$ac_cv_path_FGREP" >&6; }
+ FGREP="$ac_cv_path_FGREP"
+
+
+
+
+# Check whether --with-mpi was given.
+if test "${with_mpi+set}" = set; then :
+  withval=$with_mpi; case "$withval" in #(
+  yes|no|'') :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-mpi=PATH expects a valid PATH" >&5
+$as_echo "$as_me: WARNING: --without-mpi=PATH expects a valid PATH" >&2;}
+                          with_mpi="" ;; #(
+  *) :
+     ;;
+esac
+else
+  with_mpi=
+fi
+
+
+# Check whether --with-mpi-include was given.
+if test "${with_mpi_include+set}" = set; then :
+  withval=$with_mpi_include; case "$withval" in #(
+  yes|no|'') :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-mpi-include=PATH expects a valid PATH" >&5
+$as_echo "$as_me: WARNING: --without-mpi-include=PATH expects a valid PATH" >&2;}
+                          with_mpi_include="" ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+# Check whether --with-mpi-lib was given.
+if test "${with_mpi_lib+set}" = set; then :
+  withval=$with_mpi_lib; case "$withval" in #(
+  yes|no|'') :
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: --without-mpi-lib=PATH expects a valid PATH" >&5
+$as_echo "$as_me: WARNING: --without-mpi-lib=PATH expects a valid PATH" >&2;}
+                          with_mpi_lib="" ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+    # The args have been sanitized into empty/non-empty values above.
+    # Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options
+    # taking priority
+
+    if test -n "${with_mpi_include}"; then :
+
+
+	if echo "$CPPFLAGS" | $FGREP -e "\<-I${with_mpi_include}\>" >/dev/null 2>&1; then :
+  echo "CPPFLAGS(='$CPPFLAGS') contains '-I${with_mpi_include}', not appending" >&5
+else
+  echo "CPPFLAGS(='$CPPFLAGS') does not contain '-I${with_mpi_include}', appending" >&5
+		CPPFLAGS="$CPPFLAGS -I${with_mpi_include}"
+
+fi
+
+else
+  if test -n "${with_mpi}"; then :
+
+
+	if echo "$CPPFLAGS" | $FGREP -e "\<-I${with_mpi}/include\>" >/dev/null 2>&1; then :
+  echo "CPPFLAGS(='$CPPFLAGS') contains '-I${with_mpi}/include', not appending" >&5
+else
+  echo "CPPFLAGS(='$CPPFLAGS') does not contain '-I${with_mpi}/include', appending" >&5
+		CPPFLAGS="$CPPFLAGS -I${with_mpi}/include"
+
+fi
+
+fi
+fi
+
+    if test -n "${with_mpi_lib}"; then :
+
+
+	if echo "$LDFLAGS" | $FGREP -e "\<-L${with_mpi_lib}\>" >/dev/null 2>&1; then :
+  echo "LDFLAGS(='$LDFLAGS') contains '-L${with_mpi_lib}', not appending" >&5
+else
+  echo "LDFLAGS(='$LDFLAGS') does not contain '-L${with_mpi_lib}', appending" >&5
+		LDFLAGS="$LDFLAGS -L${with_mpi_lib}"
+
+fi
+
+else
+  if test -n "${with_mpi}"; then :
+
+
+	if echo "$LDFLAGS" | $FGREP -e "\<-L${with_mpi}/lib\>" >/dev/null 2>&1; then :
+  echo "LDFLAGS(='$LDFLAGS') contains '-L${with_mpi}/lib', not appending" >&5
+else
+  echo "LDFLAGS(='$LDFLAGS') does not contain '-L${with_mpi}/lib', appending" >&5
+		LDFLAGS="$LDFLAGS -L${with_mpi}/lib"
+
+fi
+
+                  if test -d "${with_mpi}/lib64"; then :
+
+
+	if echo "$LDFLAGS" | $FGREP -e "\<-L${with_mpi}/lib64\>" >/dev/null 2>&1; then :
+  echo "LDFLAGS(='$LDFLAGS') contains '-L${with_mpi}/lib64', not appending" >&5
+else
+  echo "LDFLAGS(='$LDFLAGS') does not contain '-L${with_mpi}/lib64', appending" >&5
+		LDFLAGS="$LDFLAGS -L${with_mpi}/lib64"
+
+fi
+
+fi
+
+fi
+
+fi
+
+

 # Check whether --with-kernelsrc was given.
 if test "${with_kernelsrc+set}" = set; then :
@ -3368,6 +3567,14 @@ else
 fi


+# Check whether --enable-qlmpi was given.
+if test "${enable_qlmpi+set}" = set; then :
+  enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
+else
+  ENABLE_QLMPI=no
+fi
+
+

 # Check whether --with-uname_r was given.
 if test "${with_uname_r+set}" = set; then :
@ -4059,9 +4266,15 @@ case $WITH_TARGET in
 	if test "X$SBINDIR" = X; then
 		SBINDIR="$prefix/sbin"
 	fi
+	if test "X$MCKERNEL_INCDIR" = X; then
+		MCKERNEL_INCDIR="$prefix/include"
+	fi
        if test "X$MCKERNEL_LIBDIR" = X; then
                MCKERNEL_LIBDIR="$prefix/lib"
        fi
+	if test "X$INCDIR" = X; then
+		INCDIR="$prefix/include"
+	fi
 	if test "X$ETCDIR" = X; then
 		ETCDIR="$prefix/etc"
 	fi
@ -4423,6 +4636,31 @@ _ACEOF
  fi


+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol walk_page_range" >&5
+$as_echo_n "checking System.map for symbol walk_page_range... " >&6; }
+  mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " walk_page_range\$" | cut -d\  -f1`
+  if test -z $mcctrl_addr; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
+$as_echo "not found" >&6; }
+  else
+    mcctrl_result=$mcctrl_addr
+    mcctrl_addr="0x$mcctrl_addr"
+
+      if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_walk_page_range\$" >/dev/null`; then
+        mcctrl_result="exported"
+        mcctrl_addr="0"
+      fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
+$as_echo "$mcctrl_result" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define MCCTRL_KSYM_walk_page_range $mcctrl_addr
+_ACEOF
+
+  fi
+
+
 case $ENABLE_MEMDUMP in
 	yes|no|auto)
 		;;
@ -4526,6 +4764,17 @@ else
 $as_echo "$as_me: mcoverlayfs is disabled" >&6;}
 fi

+if test "x$ENABLE_QLMPI" = "xyes" ; then
+
+$as_echo "#define ENABLE_QLMPI 1" >>confdefs.h
+
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: qlmpi is enabled" >&5
+$as_echo "$as_me: qlmpi is enabled" >&6;}
+else
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: qlmpi is disabled" >&5
+$as_echo "$as_me: qlmpi is disabled" >&6;}
+fi
+
 case $ENABLE_RUSAGE in
 	yes|no)
 		;;
@ -4548,6 +4797,14 @@ else
 $as_echo "$as_me: rusage is disabled" >&6;}
 fi

+if test "x$MCKERNEL_INCDIR" != "x" ; then
+
+cat >>confdefs.h <<_ACEOF
+#define MCKERNEL_INCDIR "$MCKERNEL_INCDIR"
+_ACEOF
+
+fi
+
 if test "x$MCKERNEL_LIBDIR" != "x" ; then

 cat >>confdefs.h <<_ACEOF
@ -4557,6 +4814,20 @@ _ACEOF
 fi


+cat >>confdefs.h <<_ACEOF
+#define BINDIR "$BINDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define SBINDIR "$SBINDIR"
+_ACEOF
+
+
+
+
+
+



@ -4583,7 +4854,7 @@ fi

 ac_config_headers="$ac_config_headers config.h"

-ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"
+ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"


 if test "x$enable_dcfa" = xyes; then :
@ -5293,6 +5564,7 @@ do
    "executer/kernel/mcoverlayfs/linux-4.6.7/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/linux-4.6.7/Makefile" ;;
    "kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;;
    "kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;;
+    "kernel/include/swapfmt.h") CONFIG_FILES="$CONFIG_FILES kernel/include/swapfmt.h" ;;
    "arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;;
    "arch/x86/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-attached-mic.sh" ;;
    "arch/x86/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-builtin-x86.sh" ;;
--- a/configure.ac
+++ b/configure.ac
@ -8,6 +8,76 @@ m4_define([DCFA_RELEASE_DATE_m4],[2013-11-18])dnl

 AC_INIT([mckernel], MCKERNEL_VERSION_m4)

+dnl *** PAC_APPEND_FLAG is copied from aclocal_util.m4 of mpich-3.3a2 ***
+dnl Usage: PAC_APPEND_FLAG([-02], [CFLAGS])
+dnl appends the given argument to the specified shell variable unless the
+dnl argument is already present in the variable
+AC_DEFUN([PAC_APPEND_FLAG],[
+	AC_REQUIRE([AC_PROG_FGREP])
+	AS_IF(
+		[echo "$$2" | $FGREP -e "\<$1\>" >/dev/null 2>&1],
+		[echo "$2(='$$2') contains '$1', not appending" >&AS_MESSAGE_LOG_FD],
+		[echo "$2(='$$2') does not contain '$1', appending" >&AS_MESSAGE_LOG_FD
+		$2="$$2 $1"]
+	)
+])
+
+dnl *** PAC_SET_HEADER_LIB_PATH is copied from aclocal_libs.m4 of mpich-3.3a2 ***
+dnl PAC_SET_HEADER_LIB_PATH(with_option,[default_path])
+dnl This macro looks for the --with-xxx=, --with-xxx-include and --with-xxx-lib=
+dnl options and sets the library and include paths.
+dnl
+dnl TODO as written, this macro cannot handle a "with_option" arg that has "-"
+dnl characters in it.  Use AS_TR_SH (and possibly AS_VAR_* macros) to handle
+dnl this case if it ever arises.
+AC_DEFUN([PAC_SET_HEADER_LIB_PATH],[
+    AC_ARG_WITH([$1],
+                [AC_HELP_STRING([--with-$1=PATH],
+                                [specify path where $1 include directory and lib directory can be found])],
+
+                [AS_CASE(["$withval"],
+                         [yes|no|''],
+                         [AC_MSG_WARN([--with[out]-$1=PATH expects a valid PATH])
+                          with_$1=""])],
+                [with_$1=$2])
+    AC_ARG_WITH([$1-include],
+                [AC_HELP_STRING([--with-$1-include=PATH],
+                                [specify path where $1 include directory can be found])],
+                [AS_CASE(["$withval"],
+                         [yes|no|''],
+                         [AC_MSG_WARN([--with[out]-$1-include=PATH expects a valid PATH])
+                          with_$1_include=""])],
+                [])
+    AC_ARG_WITH([$1-lib],
+                [AC_HELP_STRING([--with-$1-lib=PATH],
+                                [specify path where $1 lib directory can be found])],
+                [AS_CASE(["$withval"],
+                         [yes|no|''],
+                         [AC_MSG_WARN([--with[out]-$1-lib=PATH expects a valid PATH])
+                          with_$1_lib=""])],
+                [])
+
+    # The args have been sanitized into empty/non-empty values above.
+    # Now append -I/-L args to CPPFLAGS/LDFLAGS, with more specific options
+    # taking priority
+
+    AS_IF([test -n "${with_$1_include}"],
+          [PAC_APPEND_FLAG([-I${with_$1_include}],[CPPFLAGS])],
+          [AS_IF([test -n "${with_$1}"],
+                 [PAC_APPEND_FLAG([-I${with_$1}/include],[CPPFLAGS])])])
+
+    AS_IF([test -n "${with_$1_lib}"],
+          [PAC_APPEND_FLAG([-L${with_$1_lib}],[LDFLAGS])],
+          [AS_IF([test -n "${with_$1}"],
+                 dnl is adding lib64 by default really the right thing to do?  What if
+                 dnl we are on a 32-bit host that happens to have both lib dirs available?
+                 [PAC_APPEND_FLAG([-L${with_$1}/lib],[LDFLAGS])
+                  AS_IF([test -d "${with_$1}/lib64"],
+		        [PAC_APPEND_FLAG([-L${with_$1}/lib64],[LDFLAGS])])
+                 ])
+          ])
+])
+
 IHK_VERSION=IHK_VERSION_m4
 MCKERNEL_VERSION=MCKERNEL_VERSION_m4
 DCFA_VERSION=DCFA_VERSION_m4
@ -24,6 +94,8 @@ AC_CHECK_LIB([numa],[numa_run_on_node],[numa_lib_found=yes])
 AS_IF([test "x$numa_lib_found" != "xyes"],
 	[AC_MSG_ERROR([Unable to find NUMA library, missing numactl-devel?])])

+PAC_SET_HEADER_LIB_PATH([mpi])
+
 AC_ARG_WITH([kernelsrc],
  AC_HELP_STRING(
    [--with-kernelsrc=path],[Path to 'kernel src', default is /lib/modules/uname_r/build]),
@ -61,6 +133,12 @@ AC_ARG_ENABLE([rusage],
 	      [ENABLE_RUSAGE=$enableval],
 	      [ENABLE_RUSAGE=yes])

+AC_ARG_ENABLE([qlmpi],
+	      AC_HELP_STRING([--enable-qlmpi],
+			     [enable qlmpi implementation]),
+	      [ENABLE_QLMPI=$enableval],
+	      [ENABLE_QLMPI=no])
+
 AC_ARG_WITH([uname_r],
  AC_HELP_STRING(
    [--with-uname_r=uname_r],[Value of '`uname -r`' on the target platform, default is local value]),
@ -172,9 +250,15 @@ case $WITH_TARGET in
 	if test "X$SBINDIR" = X; then
 		SBINDIR="$prefix/sbin"
 	fi
+	if test "X$MCKERNEL_INCDIR" = X; then
+		MCKERNEL_INCDIR="$prefix/include"
+	fi
        if test "X$MCKERNEL_LIBDIR" = X; then
                MCKERNEL_LIBDIR="$prefix/lib"
        fi
+	if test "X$INCDIR" = X; then
+		INCDIR="$prefix/include"
+	fi
 	if test "X$ETCDIR" = X; then
 		ETCDIR="$prefix/etc"
 	fi
@ -262,6 +346,7 @@ MCCTRL_FIND_KSYM([__vvar_page])
 MCCTRL_FIND_KSYM([hpet_address])
 MCCTRL_FIND_KSYM([hv_clock])
 MCCTRL_FIND_KSYM([sys_readlink])
+MCCTRL_FIND_KSYM([walk_page_range])

 case $ENABLE_MEMDUMP in
 	yes|no|auto)
@ -305,6 +390,13 @@ else
 	AC_MSG_NOTICE([mcoverlayfs is disabled])
 fi

+if test "x$ENABLE_QLMPI" = "xyes" ; then
+	AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled])
+	AC_MSG_NOTICE([qlmpi is enabled])
+else
+	AC_MSG_NOTICE([qlmpi is disabled])
+fi
+
 case $ENABLE_RUSAGE in
 	yes|no)
 		;;
@ -323,10 +415,17 @@ else
 	AC_MSG_NOTICE([rusage is disabled])
 fi

+if test "x$MCKERNEL_INCDIR" != "x" ; then
+   AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
+fi
+
 if test "x$MCKERNEL_LIBDIR" != "x" ; then
   AC_DEFINE_UNQUOTED(MCKERNEL_LIBDIR,"$MCKERNEL_LIBDIR",[McKernel specific libraries])
 fi

+AC_DEFINE_UNQUOTED(BINDIR,"$BINDIR",[Path of install directory for binary])
+AC_DEFINE_UNQUOTED(SBINDIR,"$SBINDIR",[Path of install directory for system binary])
+
 AC_SUBST(CC)
 AC_SUBST(XCC)
 AC_SUBST(ARCH)
@ -335,7 +434,9 @@ AC_SUBST(UNAME_R)
 AC_SUBST(TARGET)
 AC_SUBST(BINDIR)
 AC_SUBST(SBINDIR)
+AC_SUBST(MCKERNEL_INCDIR)
 AC_SUBST(MCKERNEL_LIBDIR)
+AC_SUBST(INCDIR)
 AC_SUBST(ETCDIR)
 AC_SUBST(KMODDIR)
 AC_SUBST(KERNDIR)
@ -343,6 +444,7 @@ AC_SUBST(MANDIR)
 AC_SUBST(CFLAGS)
 AC_SUBST(ENABLE_MCOVERLAYFS)
 AC_SUBST(ENABLE_RUSAGE)
+AC_SUBST(ENABLE_QLMPI)

 AC_SUBST(IHK_VERSION)
 AC_SUBST(MCKERNEL_VERSION)
@ -365,6 +467,7 @@ AC_CONFIG_FILES([
 	executer/kernel/mcoverlayfs/linux-4.6.7/Makefile
 	kernel/Makefile
 	kernel/Makefile.build
+	kernel/include/swapfmt.h
 	arch/x86/tools/mcreboot-attached-mic.sh
 	arch/x86/tools/mcshutdown-attached-mic.sh
 	arch/x86/tools/mcreboot-builtin-x86.sh
--- a/executer/include/md5.h
+++ b/executer/include/md5.h
@ -0,0 +1,91 @@
+/*
+  Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  L. Peter Deutsch
+  ghost@aladdin.com
+
+ */
+/* $Id: md5.h,v 1.4 2002/04/13 19:20:28 lpd Exp $ */
+/*
+  Independent implementation of MD5 (RFC 1321).
+
+  This code implements the MD5 Algorithm defined in RFC 1321, whose
+  text is available at
+	http://www.ietf.org/rfc/rfc1321.txt
+  The code is derived from the text of the RFC, including the test suite
+  (section A.5) but excluding the rest of Appendix A.  It does not include
+  any code or documentation that is identified in the RFC as being
+  copyrighted.
+
+  The original and principal author of md5.h is L. Peter Deutsch
+  <ghost@aladdin.com>.  Other authors are noted in the change history
+  that follows (in reverse chronological order):
+
+  2002-04-13 lpd Removed support for non-ANSI compilers; removed
+	references to Ghostscript; clarified derivation from RFC 1321;
+	now handles byte order either statically or dynamically.
+  1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
+  1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5);
+	added conditionalization for C++ compilation from Martin
+	Purschke <purschke@bnl.gov>.
+  1999-05-03 lpd Original version.
+ */
+
+#ifndef md5_INCLUDED
+#  define md5_INCLUDED
+
+/*
+ * This package supports both compile-time and run-time determination of CPU
+ * byte order.  If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be
+ * compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is
+ * defined as non-zero, the code will be compiled to run only on big-endian
+ * CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to
+ * run on either big- or little-endian CPUs, but will run slightly less
+ * efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined.
+ */
+
+typedef unsigned char md5_byte_t; /* 8-bit byte */
+typedef unsigned int md5_word_t; /* 32-bit word */
+
+/* Define the state of the MD5 Algorithm. */
+typedef struct md5_state_s {
+    md5_word_t count[2];	/* message length in bits, lsw first */
+    md5_word_t abcd[4];		/* digest buffer */
+    md5_byte_t buf[64];		/* accumulate block */
+} md5_state_t;
+
+#ifdef __cplusplus
+extern "C" 
+{
+#endif
+
+/* Initialize the algorithm. */
+void md5_init(md5_state_t *pms);
+
+/* Append a string to the message. */
+void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes);
+
+/* Finish the message and return the digest. */
+void md5_finish(md5_state_t *pms, md5_byte_t digest[16]);
+
+#ifdef __cplusplus
+}  /* end extern "C" */
+#endif
+
+#endif /* md5_INCLUDED */
--- a/executer/include/pmi.h
+++ b/executer/include/pmi.h
@ -0,0 +1,473 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
+/*
+ *  (C) 2001 by Argonne National Laboratory.
+ *      See COPYRIGHT in top-level directory.
+ */
+
+#ifndef PMI_H_INCLUDED
+#define PMI_H_INCLUDED
+
+#ifdef USE_PMI2_API
+#error This header file defines the PMI v1 API, but PMI2 was selected
+#endif
+
+/* prototypes for the PMI interface in MPICH */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*D
+PMI_CONSTANTS - PMI definitions
+
+Error Codes:
+ PMI_SUCCESS - operation completed successfully
+. PMI_FAIL - operation failed
+. PMI_ERR_NOMEM - input buffer not large enough
+. PMI_ERR_INIT - PMI not initialized
+. PMI_ERR_INVALID_ARG - invalid argument
+. PMI_ERR_INVALID_KEY - invalid key argument
+. PMI_ERR_INVALID_KEY_LENGTH - invalid key length argument
+. PMI_ERR_INVALID_VAL - invalid val argument
+. PMI_ERR_INVALID_VAL_LENGTH - invalid val length argument
+. PMI_ERR_INVALID_LENGTH - invalid length argument
+. PMI_ERR_INVALID_NUM_ARGS - invalid number of arguments
+. PMI_ERR_INVALID_ARGS - invalid args argument
+. PMI_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument
+. PMI_ERR_INVALID_KEYVALP - invalid keyvalp argument
+- PMI_ERR_INVALID_SIZE - invalid size argument
+
+Booleans:
+ PMI_TRUE - true
+- PMI_FALSE - false
+
+D*/
+#define PMI_SUCCESS                  0
+#define PMI_FAIL                    -1
+#define PMI_ERR_INIT                 1
+#define PMI_ERR_NOMEM                2
+#define PMI_ERR_INVALID_ARG          3
+#define PMI_ERR_INVALID_KEY          4
+#define PMI_ERR_INVALID_KEY_LENGTH   5
+#define PMI_ERR_INVALID_VAL          6
+#define PMI_ERR_INVALID_VAL_LENGTH   7
+#define PMI_ERR_INVALID_LENGTH       8
+#define PMI_ERR_INVALID_NUM_ARGS     9
+#define PMI_ERR_INVALID_ARGS        10
+#define PMI_ERR_INVALID_NUM_PARSED  11
+#define PMI_ERR_INVALID_KEYVALP     12
+#define PMI_ERR_INVALID_SIZE        13
+
+/* PMI Group functions */
+
+/*@
+PMI_Init - initialize the Process Manager Interface
+
+Output Parameter:
+. spawned - spawned flag
+
+Return values:
+ PMI_SUCCESS - initialization completed successfully
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - initialization failed
+
+Notes:
+Initialize PMI for this process group. The value of spawned indicates whether
+this process was created by 'PMI_Spawn_multiple'.  'spawned' will be 'PMI_TRUE' if
+this process group has a parent and 'PMI_FALSE' if it does not.
+
+@*/
+int PMI_Init( int *spawned );
+
+/*@
+PMI_Initialized - check if PMI has been initialized
+
+Output Parameter:
+. initialized - boolean value
+
+Return values:
+ PMI_SUCCESS - initialized successfully set
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to set the variable
+
+Notes:
+On successful output, initialized will either be 'PMI_TRUE' or 'PMI_FALSE'.
+
+ PMI_TRUE - initialize has been called.
+- PMI_FALSE - initialize has not been called or previously failed.
+
+@*/
+int PMI_Initialized( int *initialized );
+
+/*@
+PMI_Finalize - finalize the Process Manager Interface
+
+Return values:
+ PMI_SUCCESS - finalization completed successfully
+- PMI_FAIL - finalization failed
+
+Notes:
+ Finalize PMI for this process group.
+
+@*/
+int PMI_Finalize( void );
+
+/*@
+PMI_Get_size - obtain the size of the process group
+
+Output Parameters:
+. size - pointer to an integer that receives the size of the process group
+
+Return values:
+ PMI_SUCCESS - size successfully obtained
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to return the size
+
+Notes:
+This function returns the size of the process group to which the local process
+belongs.
+
+@*/
+int PMI_Get_size( int *size );
+
+/*@
+PMI_Get_rank - obtain the rank of the local process in the process group
+
+Output Parameters:
+. rank - pointer to an integer that receives the rank in the process group
+
+Return values:
+ PMI_SUCCESS - rank successfully obtained
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to return the rank
+
+Notes:
+This function returns the rank of the local process in its process group.
+
+@*/
+int PMI_Get_rank( int *rank );
+
+/*@
+PMI_Get_universe_size - obtain the universe size
+
+Output Parameters:
+. size - pointer to an integer that receives the size
+
+Return values:
+ PMI_SUCCESS - size successfully obtained
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to return the size
+
+
+@*/
+int PMI_Get_universe_size( int *size );
+
+/*@
+PMI_Get_appnum - obtain the application number
+
+Output parameters:
+. appnum - pointer to an integer that receives the appnum
+
+Return values:
+ PMI_SUCCESS - appnum successfully obtained
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to return the size
+
+
+@*/
+int PMI_Get_appnum( int *appnum );
+
+/*@
+PMI_Publish_name - publish a name 
+
+Input parameters:
+. service_name - string representing the service being published
+. port - string representing the port on which to contact the service
+
+Return values:
+ PMI_SUCCESS - port for service successfully published
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to publish service
+
+
+@*/
+int PMI_Publish_name( const char service_name[], const char port[] );
+
+/*@
+PMI_Unpublish_name - unpublish a name
+
+Input parameters:
+. service_name - string representing the service being unpublished
+
+Return values:
+ PMI_SUCCESS - port for service successfully published
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to unpublish service
+
+
+@*/
+int PMI_Unpublish_name( const char service_name[] );
+
+/*@
+PMI_Lookup_name - lookup a service by name
+
+Input parameters:
+. service_name - string representing the service being published
+
+Output parameters:
+. port - string representing the port on which to contact the service
+
+Return values:
+ PMI_SUCCESS - port for service successfully obtained
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to lookup service
+
+
+@*/
+int PMI_Lookup_name( const char service_name[], char port[] );
+
+/*@
+PMI_Barrier - barrier across the process group
+
+Return values:
+ PMI_SUCCESS - barrier successfully finished
+- PMI_FAIL - barrier failed
+
+Notes:
+This function is a collective call across all processes in the process group
+the local process belongs to.  It will not return until all the processes
+have called 'PMI_Barrier()'.
+
+@*/
+int PMI_Barrier( void );
+
+/*@
+PMI_Abort - abort the process group associated with this process
+
+Input Parameters:
+ exit_code - exit code to be returned by this process
+- error_msg - error message to be printed
+
+Return values:
+. none - this function should not return
+@*/
+int PMI_Abort(int exit_code, const char error_msg[]);
+
+/* PMI Keymap functions */
+/*@
+PMI_KVS_Get_my_name - obtain the name of the keyval space the local process group has access to
+
+Input Parameters:
+. length - length of the kvsname character array
+
+Output Parameters:
+. kvsname - a string that receives the keyval space name
+
+Return values:
+ PMI_SUCCESS - kvsname successfully obtained
+. PMI_ERR_INVALID_ARG - invalid argument
+. PMI_ERR_INVALID_LENGTH - invalid length argument
+- PMI_FAIL - unable to return the kvsname
+
+Notes:
+This function returns the name of the keyval space that this process and all
+other processes in the process group have access to.  The output parameter,
+kvsname, must be at least as long as the value returned by
+'PMI_KVS_Get_name_length_max()'.
+
+@*/
+int PMI_KVS_Get_my_name( char kvsname[], int length );
+
+/*@
+PMI_KVS_Get_name_length_max - obtain the length necessary to store a kvsname
+
+Output Parameter:
+. length - maximum length required to hold a keyval space name
+
+Return values:
+ PMI_SUCCESS - length successfully set
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to set the length
+
+Notes:
+This function returns the string length required to store a keyval space name.
+
+A routine is used rather than setting a maximum value in 'pmi.h' to allow
+different implementations of PMI to be used with the same executable.  These
+different implementations may allow different maximum lengths; by using a 
+routine here, we can interface with a variety of implementations of PMI.
+
+@*/
+int PMI_KVS_Get_name_length_max( int *length );
+
+/*@
+PMI_KVS_Get_key_length_max - obtain the length necessary to store a key
+
+Output Parameter:
+. length - maximum length required to hold a key string.
+
+Return values:
+ PMI_SUCCESS - length successfully set
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to set the length
+
+Notes:
+This function returns the string length required to store a key.
+
+@*/
+int PMI_KVS_Get_key_length_max( int *length );
+
+/*@
+PMI_KVS_Get_value_length_max - obtain the length necessary to store a value
+
+Output Parameter:
+. length - maximum length required to hold a keyval space value
+
+Return values:
+ PMI_SUCCESS - length successfully set
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - unable to set the length
+
+Notes:
+This function returns the string length required to store a value from a
+keyval space.
+
+@*/
+int PMI_KVS_Get_value_length_max( int *length );
+
+/*@
+PMI_KVS_Put - put a key/value pair in a keyval space
+
+Input Parameters:
+ kvsname - keyval space name
+. key - key
+- value - value
+
+Return values:
+ PMI_SUCCESS - keyval pair successfully put in keyval space
+. PMI_ERR_INVALID_KVS - invalid kvsname argument
+. PMI_ERR_INVALID_KEY - invalid key argument
+. PMI_ERR_INVALID_VAL - invalid val argument
+- PMI_FAIL - put failed
+
+Notes:
+This function puts the key/value pair in the specified keyval space.  The
+value is not visible to other processes until 'PMI_KVS_Commit()' is called.  
+The function may complete locally.  After 'PMI_KVS_Commit()' is called, the
+value may be retrieved by calling 'PMI_KVS_Get()'.  All keys put to a keyval
+space must be unique to the keyval space.  You may not put more than once
+with the same key.
+
+@*/
+int PMI_KVS_Put( const char kvsname[], const char key[], const char value[]);
+
+/*@
+PMI_KVS_Commit - commit all previous puts to the keyval space
+
+Input Parameters:
+. kvsname - keyval space name
+
+Return values:
+ PMI_SUCCESS - commit succeeded
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - commit failed
+
+Notes:
+This function commits all previous puts since the last 'PMI_KVS_Commit()' into
+the specified keyval space. It is a process local operation.
+
+@*/
+int PMI_KVS_Commit( const char kvsname[] );
+
+/*@
+PMI_KVS_Get - get a key/value pair from a keyval space
+
+Input Parameters:
+ kvsname - keyval space name
+. key - key
+- length - length of value character array
+
+Output Parameters:
+. value - value
+
+Return values:
+ PMI_SUCCESS - get succeeded
+. PMI_ERR_INVALID_KVS - invalid kvsname argument
+. PMI_ERR_INVALID_KEY - invalid key argument
+. PMI_ERR_INVALID_VAL - invalid val argument
+. PMI_ERR_INVALID_LENGTH - invalid length argument
+- PMI_FAIL - get failed
+
+Notes:
+This function gets the value of the specified key in the keyval space.
+
+@*/
+int PMI_KVS_Get( const char kvsname[], const char key[], char value[], int length);
+
+/* PMI Process Creation functions */
+
+/*S
+PMI_keyval_t - keyval structure used by PMI_Spawn_mulitiple
+
+Fields:
+ key - name of the key
+- val - value of the key
+
+S*/
+typedef struct PMI_keyval_t
+{
+    const char * key;
+    char * val;
+} PMI_keyval_t;
+
+/*@
+PMI_Spawn_multiple - spawn a new set of processes
+
+Input Parameters:
+ count - count of commands
+. cmds - array of command strings
+. argvs - array of argv arrays for each command string
+. maxprocs - array of maximum processes to spawn for each command string
+. info_keyval_sizes - array giving the number of elements in each of the 
+  'info_keyval_vectors'
+. info_keyval_vectors - array of keyval vector arrays
+. preput_keyval_size - Number of elements in 'preput_keyval_vector'
+- preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space
+
+Output Parameter:
+. errors - array of errors for each command
+
+Return values:
+ PMI_SUCCESS - spawn successful
+. PMI_ERR_INVALID_ARG - invalid argument
+- PMI_FAIL - spawn failed
+
+Notes:
+This function spawns a set of processes into a new process group.  The 'count'
+field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs',
+'info_keyval_sizes' and 'info_keyval_vectors'.  The 'preput_keyval_size' refers
+to the size of the 'preput_keyval_vector' array.  The 'preput_keyval_vector'
+contains keyval pairs that will be put in the keyval space of the newly
+created process group before the processes are started.  The 'maxprocs' array
+specifies the desired number of processes to create for each 'cmd' string.  
+The actual number of processes may be less than the numbers specified in
+maxprocs.  The acceptable number of processes spawned may be controlled by
+``soft'' keyvals in the info arrays.  The ``soft'' option is specified by
+mpiexec in the MPI-2 standard.  Environment variables may be passed to the
+spawned processes through PMI implementation specific 'info_keyval' parameters.
+@*/
+int PMI_Spawn_multiple(int count,
+                       const char * cmds[],
+                       const char ** argvs[],
+                       const int maxprocs[],
+                       const int info_keyval_sizesp[],
+                       const PMI_keyval_t * info_keyval_vectors[],
+                       int preput_keyval_size,
+                       const PMI_keyval_t preput_keyval_vector[],
+                       int errors[]);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/executer/include/qlmpi.h
+++ b/executer/include/qlmpi.h
@ -0,0 +1,58 @@
+
+#ifndef __HEADER_QLMPI_H
+#define __HEADER_QLMPI_H
+
+/* UerProgram executed */
+#define QL_EXEC_END 'E'
+/* qlmpiexec_finalize */
+#define QL_RET_FINAL 'F'
+/* UserProgram resume */
+#define QL_RET_RESUME 'R'
+/* Connect from ql_mpiexec_start/Finalize*/
+#define QL_COM_CONN 'N'
+/* Abnormal end */
+#define QL_AB_END 'A'
+
+/* Client kind */
+/* mpiexec moniter Program */
+#define QL_MONITOR 1
+/* mcexec */
+#define QL_MCEXEC_PRO 2
+/* ql_mcexec_start ql_mpiexec_finalize */
+#define QL_MPEXEC 3
+
+
+#define QL_SOCK "ql_sock"
+
+#define QL_MAX_PATH 4096
+#define QL_PARAM_PATH "./"
+#define QL_PARAM_EXTE ".param"
+#define QL_SWAP_PATH "/tmp"
+#define QL_SOCKT_PATH "/run/user"
+
+#define QL_NAME "QL_NAME"
+#define QL_SWAP_ENV "QL_SWAP_PATH"
+#define QL_PARAM_ENV "QL_PARAM_PATH"
+#define QL_SOCK_ENV "QL_SOCKET_PATH"
+
+#define QL_BUF_MAX 256
+
+
+struct client_fd {
+	int fd;		//FD
+	int client;	//Client Kind
+	char *name;	//QL_NAME
+	int status;	//execute status
+};
+
+int ql_recv(int fd,char ** buf);
+
+int ql_send(int fd,int command,char *buf);
+
+
+#define QL_COMMAND '0'
+#define QL_ARG '1'
+#define QL_ENV '2'
+
+//#define QL_DEBUG
+#endif
--- a/executer/include/qlmpilib.h
+++ b/executer/include/qlmpilib.h
@ -0,0 +1,10 @@
+
+#ifndef __HEADER_QLMPILIB_H
+#define __HEADER_QLMPILIB_H
+
+int ql_client(int *argc, char ***argv);
+
+#define QL_CONTINUE 1
+#define QL_EXIT 0
+
+#endif
--- a/executer/kernel/mcctrl/syscall.c
+++ b/executer/kernel/mcctrl/syscall.c
@ -890,9 +890,11 @@ static struct vm_operations_struct rus_vmops = {
 static int rus_mmap(struct file *file, struct vm_area_struct *vma)
 {
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP;
+//	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP;
+	vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP;
 #else
-	vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP;
+//	vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP;
+	vma->vm_flags |= VM_DONTDUMP | VM_MIXEDMAP;
 #endif
 	vma->vm_ops = &rus_vmops;
 	return 0;
@ -1713,6 +1715,75 @@ out:
 	return error;
 }

+#ifdef MCCTRL_KSYM_walk_page_range
+static void
+(*mcctrl_walk_page_range)(unsigned long addr, unsigned long end, struct mm_walk *walk)
+#if MCCTRL_KSYM_walk_page_range
+	= (void *)MCCTRL_KSYM_walk_page_range;
+#else
+	= &walk_page_range;
+#endif
+#endif
+
+static int mywalk(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk)
+{
+	unsigned long		pfn;
+	struct page		*page;
+
+	if (pte == NULL) {
+		kprintf("mywalk: ptr(%p)\n", pte);
+		return 0;
+	}
+	pfn = pte_pfn(*pte);
+	page = pfn_to_page(pfn);
+	if (page == NULL) {
+		kprintf("mywalk: pte(%p) page is null\n", pte);
+		return 0;
+	}
+	if (PageLocked(page)) {
+		kprintf("mywalk: MLOCK (%p)\n", (void*) addr);
+	}
+	if (addr > 0x700000 && addr < 0x705000) {
+		kprintf("mywalk: %p(%lx)\n", (void*) addr, page->flags);
+	}
+	return 0;
+}
+
+static long pager_req_mlock_list(ihk_os_t os, unsigned long start,
+				 unsigned long end, void *addr, int nent)
+{
+	struct addrpair {
+		unsigned long	start;
+		unsigned long	end;
+		unsigned long	flag;
+	} *addrpair = (struct addrpair *) addr;
+	int			cnt = 0;
+	struct mm_struct	*mm = current->mm;
+	struct vm_area_struct	*vma;
+
+	kprintf("pager_req_mlock_list: addr(%p)\n", addr);
+	vma = find_vma(current->mm, 0x7010a0);
+	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+		if (vma->vm_start < start || vma->vm_start > end) continue;
+		kprintf("\t%p: %p -- %p\t%lx\n", vma,
+			(void*)vma->vm_start, (void*)vma->vm_end,
+			vma->vm_flags & VM_LOCKED);
+		if (vma->vm_flags & VM_LOCKED) {
+			kprintf("\t locked\n");
+			if (++cnt >= nent) { /* last entry is a marker */
+				addrpair->start = (unsigned long) -1;
+				goto full;
+			}
+			addrpair->start = vma->vm_start;
+			addrpair->end = vma->vm_end;
+			addrpair->flag = vma->vm_flags;
+			addrpair++;
+		}
+	}
+full:
+	return cnt;
+}
+
 static long pager_call(ihk_os_t os, struct syscall_request *req)
 {
 	long ret;
@ -1726,6 +1797,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
 #define	PAGER_REQ_MAP		0x0005
 #define	PAGER_REQ_PFN		0x0006
 #define	PAGER_REQ_UNMAP		0x0007
+#define PAGER_REQ_MLOCK_LIST	0x0008
 	case PAGER_REQ_CREATE:
 		ret = pager_req_create(os, req->args[1], req->args[2]);
 		break;
@ -1754,7 +1826,11 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
 	case PAGER_REQ_UNMAP:
 		ret = pager_req_unmap(os, req->args[1]);
 		break;
-
+	case PAGER_REQ_MLOCK_LIST:
+		ret = pager_req_mlock_list(os, (unsigned long) req->args[1],
+					   (unsigned long) req->args[2],
+					   (void*) req->args[3], (int) req->args[4]);
+		break;
 	default:
 		ret = -ENOSYS;
 		printk("pager_call(%#lx):unknown req %ld\n", req->args[0], ret);
--- a/executer/user/Makefile.in
+++ b/executer/user/Makefile.in
@ -1,22 +1,33 @@
 CC=@CC@
+MCC=mpicc
 BINDIR=@BINDIR@
+SBINDIR=@SBINDIR@
 prefix=@prefix@
 exec_prefix=@exec_prefix@
 LIBDIR=@libdir@
+MCKERNEL_INCDIR=@MCKERNEL_INCDIR@
 MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@
 KDIR ?= @KDIR@
 CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH}
+LDFLAGS=@LDFLAGS@
 VPATH=@abs_srcdir@
 TARGET=mcexec libsched_yield
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
 LIBS=@LIBS@
 ARCH=@ARCH@
 IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
+MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread
+ENABLE_QLMPI=@ENABLE_QLMPI@
+
+ifeq ($(ENABLE_QLMPI),yes)
+	MCEXEC_LIBS += -lmpi
+	TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so
+endif

 all: $(TARGET)

 mcexec: mcexec.c libmcexec.a
-	$(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -DLIBDIR=\"$(LIBDIR)\" -fPIE -pie -L. -lmcexec -lrt -lnuma -pthread -o $@ $^ $(EXTRA_OBJS)
+	$(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) $(LDFLAGS) -DLIBDIR=\"$(LIBDIR)\" -fPIE -pie -L. $(MCEXEC_LIBS) -o $@ $^ $(EXTRA_OBJS)

 eclair: eclair.c
 	$(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS)
@ -27,6 +38,27 @@ libsched_yield: libsched_yield.c
 libmcexec.a::
 	(cd arch/${ARCH}; make)

+libqlmpi.so: qlmpilib.c
+	$(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $<
+
+libqlfort.so: libqlfort.c
+	$(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $< -ldl
+
+ql_server: ql_server.c
+	$(CC) $(CFLAGS) -o $@ $^ 
+
+ql_mpiexec_start: ql_mpiexec_start.o md5.o
+	$(CC) $^ $(CFLAGS) -pthread -o $@
+
+ql_mpiexec_finalize.o: ql_mpiexec_start.c
+	$(CC) $(CFLAGS) -DQL_MPIEXEC_FINALIZE -c -o $@ $<
+
+ql_mpiexec_finalize: ql_mpiexec_finalize.o md5.o
+	$(CC) $^ $(CFLAGS) -pthread -o $@
+
+ql_talker: ql_talker.o
+	$(CC) $^ $(CFLAGS) -o $@
+
 clean::
 	(cd arch/${ARCH}; make clean)
 	$(RM) $(TARGET) *.o
@ -39,5 +71,13 @@ install::
 	install -m 755 mcexec $(BINDIR)
 	mkdir -p -m 755 $(MCKERNEL_LIBDIR)
 	install -m 755 libsched_yield.so.1.0.0 $(MCKERNEL_LIBDIR)
+ifeq ($(ENABLE_QLMPI),yes)
+	install -m 644 ../include/qlmpilib.h $(MCKERNEL_INCDIR)
+	install -m 755 libqlmpi.so $(MCKERNEL_LIBDIR)
+	install -m 755 libqlfort.so $(MCKERNEL_LIBDIR)
+	install -m 755 ql_server $(SBINDIR)
+	install -m 755 ql_mpiexec_start $(BINDIR)
+	install -m 755 ql_mpiexec_finalize $(BINDIR)
+	install -m 755 ql_talker $(SBINDIR)
+endif
 	@uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR)
-
--- a/executer/user/libqlfort.c
+++ b/executer/user/libqlfort.c
@ -0,0 +1,101 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dlfcn.h>
+
+static int *mck_ql_argc;
+static char ***mck_ql_argv;
+static int (*intel_iargc)();
+static int (*intel_getarg)(int *, char *, int, int);
+static int (*gfortran_iargc)();
+static int (*gfortran_getarg)(int *, char *, int);
+static void (*mpi_init)(int *);
+static int dl_init_flag;
+
+static inline void
+init()
+{
+	if (dl_init_flag)
+		return;
+
+	mck_ql_argc = dlsym(RTLD_NEXT, "mck_ql_argc");
+	mck_ql_argv = dlsym(RTLD_NEXT, "mck_ql_argv");
+	intel_iargc = dlsym(RTLD_NEXT, "for_iargc");
+	intel_getarg = dlsym(RTLD_NEXT, "for_getarg");
+	gfortran_iargc = dlsym(RTLD_NEXT, "_gfortran_iargc");
+	gfortran_getarg = dlsym(RTLD_NEXT, "_gfortran_getarg_i4");
+	mpi_init = dlsym(RTLD_NEXT, "mpi_init_");
+	dl_init_flag = 1;
+}
+
+// for GNU Fortran
+int
+_gfortran_iargc()
+{
+	init();
+
+	if (mck_ql_argc && mck_ql_argv && *mck_ql_argv)
+		return *mck_ql_argc - 1;
+	if (gfortran_iargc)
+		return gfortran_iargc();
+	return 0;
+}
+
+void
+_gfortran_getarg_i4(int *n, char *arg, int arg_len)
+{
+	int l;
+
+	init();
+	if (mck_ql_argc && mck_ql_argv && *mck_ql_argv) {
+		memset(arg, ' ', arg_len);
+		if (*n < 0 || *n > *mck_ql_argc)
+			return;
+		l = strlen((*mck_ql_argv)[*n]);
+		if (l > arg_len)
+			l = arg_len;
+		strncpy(arg, (*mck_ql_argv)[*n], l);
+		return;
+	}
+	if (gfortran_getarg) {
+		gfortran_getarg(n, arg, arg_len);
+		return;
+	}
+	return;
+}
+
+// for Intel Fortran
+int
+for_iargc()
+{
+	init();
+	if (mck_ql_argc && mck_ql_argv && *mck_ql_argv)
+		return *mck_ql_argc - 1;
+	if (intel_iargc)
+		return intel_iargc();
+	return 0;
+}
+
+void
+for_getarg(int *n, char *arg, int dmy1, int arg_len)
+{
+	int l;
+
+	init();
+	if (mck_ql_argc && mck_ql_argv && *mck_ql_argv) {
+		memset(arg, ' ', arg_len);
+		if (*n < 0 || *n > *mck_ql_argc)
+			return;
+		l = strlen((*mck_ql_argv)[*n]);
+		if (l > arg_len)
+			l = arg_len;
+		strncpy(arg, (*mck_ql_argv)[*n], l);
+		return;
+	}
+	if (intel_getarg) {
+		intel_getarg(n, arg, dmy1, arg_len);
+		return;
+	}
+	return;
+}
--- a/executer/user/mcexec.c
+++ b/executer/user/mcexec.c
@ -73,7 +73,12 @@
 #include "../../config.h"
 #include <numa.h>
 #include <numaif.h>
+#include <spawn.h>
 #include <sys/personality.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "../include/pmi.h"
+#include "../include/qlmpi.h"

 //#define DEBUG

@ -1568,6 +1573,37 @@ opendev()
 	return fd;
 }

+static void ld_preload_init()
+{
+	char envbuf[PATH_MAX];
+	char *old_ld_preload;
+
+	if (disable_sched_yield) {
+		sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR);
+		__dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path);
+		if (setenv("LD_PRELOAD", envbuf, 1) < 0) {
+			printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n",
+					__FUNCTION__);
+		}
+	}
+	/* Set LD_PRELOAD to McKernel specific value */
+	else if (getenv(ld_preload_envname)) {
+		if (setenv("LD_PRELOAD", getenv(ld_preload_envname), 1) < 0) {
+			printf("%s: warning: failed to set LD_PRELOAD environment variable\n",
+					__FUNCTION__);
+		}
+		unsetenv(ld_preload_envname);
+	}
+
+#ifdef ENABLE_QLMPI
+	sprintf(envbuf, "%s/libqlfort.so", MCKERNEL_LIBDIR);
+	if ((old_ld_preload = getenv("LD_PRELOAD"))) {
+		sprintf(strchr(envbuf, '\0'), " %s", old_ld_preload);
+	}
+	setenv("LD_PRELOAD", envbuf, 1);
+#endif
+}
+
 int main(int argc, char **argv)
 {
 	int ret = 0;
@ -1683,24 +1719,7 @@ int main(int argc, char **argv)
 	if (opendev() == -1)
 		exit(EXIT_FAILURE);

-	if (disable_sched_yield) {
-		char sched_yield_lib_path[PATH_MAX];
-		sprintf(sched_yield_lib_path, "%s/libsched_yield.so.1.0.0",
-			MCKERNEL_LIBDIR);
-		__dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path);
-		if (setenv("LD_PRELOAD", sched_yield_lib_path, 1) < 0) {
-			printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n",
-					__FUNCTION__);
-		}
-	}
-	/* Set LD_PRELOAD to McKernel specific value */
-	else if (getenv(ld_preload_envname)) {
-		if (setenv("LD_PRELOAD", getenv(ld_preload_envname), 1) < 0) {
-			printf("%s: warning: failed to set LD_PRELOAD environment variable\n",
-					__FUNCTION__);
-		}
-		unsetenv(ld_preload_envname);
-	}
+	ld_preload_init();

 	/* Collect environment variables */
 	envs_len = flatten_strings(-1, NULL, environ, &envs);
@ -3416,6 +3435,194 @@ return_execve2:
 			}
 			do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
 			break;
+		case 801: {// swapout
+#ifdef ENABLE_QLMPI
+			int rc;
+			int spawned;
+			int rank;
+			int ql_fd = -1;
+			int len;
+			struct sockaddr_un unix_addr;
+			char msg_buf[QL_BUF_MAX];
+			char *ql_name;
+
+			rc = PMI_Init(&spawned);
+			if (rc != 0) {
+				fprintf(stderr, "swapout(): ERROR: failed to init PMI\n");
+				ret = -1;
+				goto return_swapout;
+			}
+			rc = PMI_Get_rank(&rank);
+			if (rc != 0) {
+				fprintf(stderr, "swapout(): ERROR: failed to get Rank\n");
+				ret = -1;
+				goto return_swapout;
+			}
+
+			// swap synchronization 
+			rc = PMI_Barrier();
+
+			if (rank == 0) {
+				// tell ql_server what calculation is done.
+				ql_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+				if (ql_fd < 0) {
+					fprintf(stderr, "swapout(): ERROR: failed to open socket\n");
+					ret = -1;
+					goto return_swapout;
+				}
+
+				unix_addr.sun_family = AF_UNIX;
+				strcpy(unix_addr.sun_path, getenv("QL_SOCKET_FILE"));
+				len = sizeof(unix_addr.sun_family) + strlen(unix_addr.sun_path) + 1;
+				rc = connect(ql_fd, (struct sockaddr*)&unix_addr, len);
+				if (rc < 0) {
+					fprintf(stderr, "swapout(): ERROR: failed to connect ql_server\n");
+					ret = -1;
+					goto return_swapout;
+				}
+
+				ql_name = getenv(QL_NAME);
+				sprintf(msg_buf, "%c %04x %s",
+				        QL_EXEC_END, (unsigned int)strlen(ql_name), ql_name);
+				rc = send(ql_fd, msg_buf, strlen(msg_buf) + 1, 0);
+				if (rc < 0) {
+					fprintf(stderr, "swapout(): ERROR: failed to send QL_EXEC_END\n");
+					ret = -1;
+					goto return_swapout;
+				}
+				
+				// wait resume-req from ql_server.
+#ifdef QL_DEBUG
+				fprintf(stdout, "INFO: waiting resume-req ...\n");
+#endif
+				rc = recv(ql_fd, msg_buf, strlen(msg_buf) + 1, 0);
+
+				if (rc < 0) {
+					fprintf(stderr, "swapout(): ERROR: failed to recieve\n");
+					ret = -1;
+					goto return_swapout;
+				}
+
+				// parse message
+				if (msg_buf[0] == QL_RET_RESUME) {
+#ifdef QL_DEBUG
+					fprintf(stdout, "INFO: recieved resume-req\n");
+#endif
+				}
+				else {
+					fprintf(stderr, "swapout(): ERROR: recieved unexpected requsest from ql_server\n");
+					ret = -1;
+					goto return_swapout;
+				}
+
+				// resume-req synchronization
+				rc = PMI_Barrier();
+			}
+			else {
+				// resume-req synchronization 
+				rc = PMI_Barrier();
+			}
+			
+			ret = 0;
+
+return_swapout:
+			if (ql_fd >= 0) {
+				close(ql_fd);
+			}
+
+			do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
+#else
+			printf("mcexec has not been compiled with ENABLE_QLMPI\n");
+			ret = -1;
+			do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
+#endif // ENABLE_QLMPI
+			break;
+		}
+		case 802: /* debugging purpose */
+			printf("linux mlock(%p, %ld)\n",
+			       (void *)w.sr.args[0], w.sr.args[1]);
+			printf("str(%p)=%s", (void*)w.sr.args[0], (char*)w.sr.args[0]);
+			ret = mlock((void *)w.sr.args[0], w.sr.args[1]);
+			do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
+			break;
+
+#ifndef ARG_MAX
+#define ARG_MAX 256
+#endif
+		case 811: { // linux_spawn
+			int rc, i;
+			pid_t pid;
+			size_t slen;
+			char *exec_path = NULL;
+			char* argv[ARG_MAX];
+			char** spawn_args = (char**)w.sr.args[1];
+
+			if (!w.sr.args[0] || ! spawn_args) {
+				fprintf(stderr, "linux_spawn(): ERROR: invalid argument \n");
+				ret = -1;
+				goto return_linux_spawn;
+			}
+
+			// copy exec_path
+			slen = strlen((char*)w.sr.args[0]) + 1;
+			if (slen <= 0 || slen >= PATH_MAX) {
+				fprintf(stderr, "linux_spawn(): ERROR: invalid exec_path \n");
+				ret = -1;
+				goto return_linux_spawn;
+			}
+			exec_path = malloc(slen);
+			if (!exec_path) {
+				fprintf(stderr, "linux_spawn(): ERROR: failed to allocating exec_path\n");
+				ret = -1;
+				goto return_linux_spawn;
+			}
+			memset(exec_path, '\0', slen);
+
+			rc = do_strncpy_from_user(fd, exec_path, (void *)w.sr.args[0], slen);
+			if (rc < 0) {
+				fprintf(stderr, "linux_spawn(): ERROR: failed to strncpy from user\n");
+				ret = -1;
+				goto return_linux_spawn;
+			}
+
+			// copy args to argv[]
+			for (i = 0; spawn_args[i] != NULL; i++) {
+				slen = strlen(spawn_args[i]) + 1;
+				argv[i] = malloc(slen);
+				if (!argv[i]) {
+					fprintf(stderr, "linux_spawn(): ERROR: failed to allocating argv[%d]\n", i);
+					ret = -1;
+					goto return_linux_spawn;
+				}
+				memset(argv[i], '\0', slen);
+				rc = do_strncpy_from_user(fd, argv[i], spawn_args[i], slen);
+				if (rc < 0) {
+					fprintf(stderr, "linux_spawn(): ERROR: failed to strncpy from user\n");
+					ret = -1;
+					goto return_linux_spawn;
+				}
+			}
+
+			rc = posix_spawn(&pid, exec_path, NULL, NULL, argv, NULL);
+			if (rc != 0) {
+				fprintf(stderr, "linux_spawn(): ERROR: posix_spawn returned %d\n", rc);
+				ret = -1;
+				goto return_linux_spawn;
+			}
+
+			ret = 0;
+return_linux_spawn:
+			// free allocated memory
+			if (exec_path) {
+				free(exec_path);
+			}
+			for (i = 0; argv[i] != NULL; i++) {
+				free(argv[i]);
+			}
+
+			do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
+			break;
+		}

 		default:
 			ret = do_generic_syscall(&w);
--- a/executer/user/md5.c
+++ b/executer/user/md5.c
@ -0,0 +1,381 @@
+/*
+  Copyright (C) 1999, 2000, 2002 Aladdin Enterprises.  All rights reserved.
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  L. Peter Deutsch
+  ghost@aladdin.com
+
+ */
+/* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */
+/*
+  Independent implementation of MD5 (RFC 1321).
+
+  This code implements the MD5 Algorithm defined in RFC 1321, whose
+  text is available at
+	http://www.ietf.org/rfc/rfc1321.txt
+  The code is derived from the text of the RFC, including the test suite
+  (section A.5) but excluding the rest of Appendix A.  It does not include
+  any code or documentation that is identified in the RFC as being
+  copyrighted.
+
+  The original and principal author of md5.c is L. Peter Deutsch
+  <ghost@aladdin.com>.  Other authors are noted in the change history
+  that follows (in reverse chronological order):
+
+  2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order
+	either statically or dynamically; added missing #include <string.h>
+	in library.
+  2002-03-11 lpd Corrected argument list for main(), and added int return
+	type, in test program and T value program.
+  2002-02-21 lpd Added missing #include <stdio.h> in test program.
+  2000-07-03 lpd Patched to eliminate warnings about "constant is
+	unsigned in ANSI C, signed in traditional"; made test program
+	self-checking.
+  1999-11-04 lpd Edited comments slightly for automatic TOC extraction.
+  1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5).
+  1999-05-03 lpd Original version.
+ */
+
+#include "../include/md5.h"
+#include <string.h>
+
+#undef BYTE_ORDER	/* 1 = big-endian, -1 = little-endian, 0 = unknown */
+#ifdef ARCH_IS_BIG_ENDIAN
+#  define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1)
+#else
+#  define BYTE_ORDER 0
+#endif
+
+#define T_MASK ((md5_word_t)~0)
+#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
+#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
+#define T3    0x242070db
+#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
+#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
+#define T6    0x4787c62a
+#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
+#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
+#define T9    0x698098d8
+#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
+#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
+#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
+#define T13    0x6b901122
+#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
+#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
+#define T16    0x49b40821
+#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
+#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
+#define T19    0x265e5a51
+#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
+#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
+#define T22    0x02441453
+#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
+#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
+#define T25    0x21e1cde6
+#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
+#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
+#define T28    0x455a14ed
+#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
+#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
+#define T31    0x676f02d9
+#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
+#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
+#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
+#define T35    0x6d9d6122
+#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
+#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
+#define T38    0x4bdecfa9
+#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
+#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
+#define T41    0x289b7ec6
+#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
+#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
+#define T44    0x04881d05
+#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
+#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
+#define T47    0x1fa27cf8
+#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
+#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
+#define T50    0x432aff97
+#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
+#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
+#define T53    0x655b59c3
+#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
+#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
+#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
+#define T57    0x6fa87e4f
+#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
+#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
+#define T60    0x4e0811a1
+#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
+#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
+#define T63    0x2ad7d2bb
+#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
+
+
+static void
+md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/)
+{
+    md5_word_t
+	a = pms->abcd[0], b = pms->abcd[1],
+	c = pms->abcd[2], d = pms->abcd[3];
+    md5_word_t t;
+#if BYTE_ORDER > 0
+    /* Define storage only for big-endian CPUs. */
+    md5_word_t X[16];
+#else
+    /* Define storage for little-endian or both types of CPUs. */
+    md5_word_t xbuf[16];
+    const md5_word_t *X;
+#endif
+
+    {
+#if BYTE_ORDER == 0
+	/*
+	 * Determine dynamically whether this is a big-endian or
+	 * little-endian machine, since we can use a more efficient
+	 * algorithm on the latter.
+	 */
+	static const int w = 1;
+
+	if (*((const md5_byte_t *)&w)) /* dynamic little-endian */
+#endif
+#if BYTE_ORDER <= 0		/* little-endian */
+	{
+	    /*
+	     * On little-endian machines, we can process properly aligned
+	     * data without copying it.
+	     */
+	    if (!((data - (const md5_byte_t *)0) & 3)) {
+		/* data are properly aligned */
+		X = (const md5_word_t *)data;
+	    } else {
+		/* not aligned */
+		memcpy(xbuf, data, 64);
+		X = xbuf;
+	    }
+	}
+#endif
+#if BYTE_ORDER == 0
+	else			/* dynamic big-endian */
+#endif
+#if BYTE_ORDER >= 0		/* big-endian */
+	{
+	    /*
+	     * On big-endian machines, we must arrange the bytes in the
+	     * right order.
+	     */
+	    const md5_byte_t *xp = data;
+	    int i;
+
+#  if BYTE_ORDER == 0
+	    X = xbuf;		/* (dynamic only) */
+#  else
+#    define xbuf X		/* (static only) */
+#  endif
+	    for (i = 0; i < 16; ++i, xp += 4)
+		xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
+	}
+#endif
+    }
+
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+    /* Round 1. */
+    /* Let [abcd k s i] denote the operation
+       a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
+#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + F(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+    /* Do the following 16 operations. */
+    SET(a, b, c, d,  0,  7,  T1);
+    SET(d, a, b, c,  1, 12,  T2);
+    SET(c, d, a, b,  2, 17,  T3);
+    SET(b, c, d, a,  3, 22,  T4);
+    SET(a, b, c, d,  4,  7,  T5);
+    SET(d, a, b, c,  5, 12,  T6);
+    SET(c, d, a, b,  6, 17,  T7);
+    SET(b, c, d, a,  7, 22,  T8);
+    SET(a, b, c, d,  8,  7,  T9);
+    SET(d, a, b, c,  9, 12, T10);
+    SET(c, d, a, b, 10, 17, T11);
+    SET(b, c, d, a, 11, 22, T12);
+    SET(a, b, c, d, 12,  7, T13);
+    SET(d, a, b, c, 13, 12, T14);
+    SET(c, d, a, b, 14, 17, T15);
+    SET(b, c, d, a, 15, 22, T16);
+#undef SET
+
+     /* Round 2. */
+     /* Let [abcd k s i] denote the operation
+          a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
+#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + G(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+     /* Do the following 16 operations. */
+    SET(a, b, c, d,  1,  5, T17);
+    SET(d, a, b, c,  6,  9, T18);
+    SET(c, d, a, b, 11, 14, T19);
+    SET(b, c, d, a,  0, 20, T20);
+    SET(a, b, c, d,  5,  5, T21);
+    SET(d, a, b, c, 10,  9, T22);
+    SET(c, d, a, b, 15, 14, T23);
+    SET(b, c, d, a,  4, 20, T24);
+    SET(a, b, c, d,  9,  5, T25);
+    SET(d, a, b, c, 14,  9, T26);
+    SET(c, d, a, b,  3, 14, T27);
+    SET(b, c, d, a,  8, 20, T28);
+    SET(a, b, c, d, 13,  5, T29);
+    SET(d, a, b, c,  2,  9, T30);
+    SET(c, d, a, b,  7, 14, T31);
+    SET(b, c, d, a, 12, 20, T32);
+#undef SET
+
+     /* Round 3. */
+     /* Let [abcd k s t] denote the operation
+          a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + H(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+     /* Do the following 16 operations. */
+    SET(a, b, c, d,  5,  4, T33);
+    SET(d, a, b, c,  8, 11, T34);
+    SET(c, d, a, b, 11, 16, T35);
+    SET(b, c, d, a, 14, 23, T36);
+    SET(a, b, c, d,  1,  4, T37);
+    SET(d, a, b, c,  4, 11, T38);
+    SET(c, d, a, b,  7, 16, T39);
+    SET(b, c, d, a, 10, 23, T40);
+    SET(a, b, c, d, 13,  4, T41);
+    SET(d, a, b, c,  0, 11, T42);
+    SET(c, d, a, b,  3, 16, T43);
+    SET(b, c, d, a,  6, 23, T44);
+    SET(a, b, c, d,  9,  4, T45);
+    SET(d, a, b, c, 12, 11, T46);
+    SET(c, d, a, b, 15, 16, T47);
+    SET(b, c, d, a,  2, 23, T48);
+#undef SET
+
+     /* Round 4. */
+     /* Let [abcd k s t] denote the operation
+          a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+#define SET(a, b, c, d, k, s, Ti)\
+  t = a + I(b,c,d) + X[k] + Ti;\
+  a = ROTATE_LEFT(t, s) + b
+     /* Do the following 16 operations. */
+    SET(a, b, c, d,  0,  6, T49);
+    SET(d, a, b, c,  7, 10, T50);
+    SET(c, d, a, b, 14, 15, T51);
+    SET(b, c, d, a,  5, 21, T52);
+    SET(a, b, c, d, 12,  6, T53);
+    SET(d, a, b, c,  3, 10, T54);
+    SET(c, d, a, b, 10, 15, T55);
+    SET(b, c, d, a,  1, 21, T56);
+    SET(a, b, c, d,  8,  6, T57);
+    SET(d, a, b, c, 15, 10, T58);
+    SET(c, d, a, b,  6, 15, T59);
+    SET(b, c, d, a, 13, 21, T60);
+    SET(a, b, c, d,  4,  6, T61);
+    SET(d, a, b, c, 11, 10, T62);
+    SET(c, d, a, b,  2, 15, T63);
+    SET(b, c, d, a,  9, 21, T64);
+#undef SET
+
+     /* Then perform the following additions. (That is increment each
+        of the four registers by the value it had before this block
+        was started.) */
+    pms->abcd[0] += a;
+    pms->abcd[1] += b;
+    pms->abcd[2] += c;
+    pms->abcd[3] += d;
+}
+
+void
+md5_init(md5_state_t *pms)
+{
+    pms->count[0] = pms->count[1] = 0;
+    pms->abcd[0] = 0x67452301;
+    pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
+    pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
+    pms->abcd[3] = 0x10325476;
+}
+
+void
+md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes)
+{
+    const md5_byte_t *p = data;
+    int left = nbytes;
+    int offset = (pms->count[0] >> 3) & 63;
+    md5_word_t nbits = (md5_word_t)(nbytes << 3);
+
+    if (nbytes <= 0)
+	return;
+
+    /* Update the message length. */
+    pms->count[1] += nbytes >> 29;
+    pms->count[0] += nbits;
+    if (pms->count[0] < nbits)
+	pms->count[1]++;
+
+    /* Process an initial partial block. */
+    if (offset) {
+	int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
+
+	memcpy(pms->buf + offset, p, copy);
+	if (offset + copy < 64)
+	    return;
+	p += copy;
+	left -= copy;
+	md5_process(pms, pms->buf);
+    }
+
+    /* Process full blocks. */
+    for (; left >= 64; p += 64, left -= 64)
+	md5_process(pms, p);
+
+    /* Process a final partial block. */
+    if (left)
+	memcpy(pms->buf, p, left);
+}
+
+void
+md5_finish(md5_state_t *pms, md5_byte_t digest[16])
+{
+    static const md5_byte_t pad[64] = {
+	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    md5_byte_t data[8];
+    int i;
+
+    /* Save the length before padding. */
+    for (i = 0; i < 8; ++i)
+	data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3));
+    /* Pad to 56 bytes mod 64. */
+    md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1);
+    /* Append the length. */
+    md5_append(pms, data, 8);
+    for (i = 0; i < 16; ++i)
+	digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3));
+}
--- a/executer/user/ql_mpiexec_start.c
+++ b/executer/user/ql_mpiexec_start.c
--- a/executer/user/ql_server.c
+++ b/executer/user/ql_server.c
@ -0,0 +1,597 @@
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sys/un.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "../include/qlmpi.h"
+
+#define	NALLOC	10
+#define NOLOG
+
+#ifndef NOLOG
+#define LOGFILE "ql_server.log"
+int log_open(char *f_name);
+int log_close();
+void log_printf(const char *format, ...);
+void log_dump(struct client_fd *fd_list,int fd_size);
+
+FILE * log_fp;
+#endif
+
+int listen_fd = -1;
+char file_path[1024];
+
+int check_ql_server( char * path,char * file ,char *filep){
+	struct stat st;
+	int rc;
+
+	sprintf(filep,"%s/%s",path,file);
+
+	rc = stat(filep,&st);
+	if (rc == 0) {
+		fprintf(stderr,"socket file exests. %s\n",filep);
+		return rc;
+	}
+	else {
+		rc = stat(path,&st);
+		if ( rc == 0) {
+			fprintf(stderr,"dir(file) exests. %s %d\n",path,rc);
+			return 1;
+		}
+		else {
+			mode_t m = st.st_mode;
+			if (S_ISDIR(m)) {
+				fprintf(stderr,"dir exests. %s %d\n",path,rc);
+				return rc; /* dir exist */
+			}
+			else {
+				if (mkdir(path, (S_IRUSR | S_IWUSR | S_IRWXU |
+						S_IRGRP | S_IWGRP | S_IRWXG |
+						S_IROTH | S_IWOTH | S_IRWXO)) == 0) {
+					fprintf(stderr,"dir create. %s %d\n",path,rc);
+					return 1;
+				}
+				fprintf(stderr,"mkdir error. %s %d\n",path,rc);
+				return 0; /* mkdir error */
+			}
+		}
+	}
+}
+
+void terminate(int rc){
+
+	if (listen_fd >= 0) {
+		shutdown(listen_fd, 2);
+		close(listen_fd);
+		unlink(file_path);
+	}
+#ifndef NOLOG
+	log_close();
+#endif
+	exit(rc);
+}
+
+int s_fd_list(char * p_name,int client_type ,
+		struct client_fd *fd_list,int fd_size){
+	int i;
+	for (i = 0; fd_size > i; i++) {
+		if ((fd_list[i].client == client_type) && 
+			(!strcmp(fd_list[i].name,p_name)) && 
+			(fd_list[i].fd != -1)) {
+			break;
+		}
+	}
+	return i;
+}
+
+int main( int argc, char *argv[]){
+	int i,j, fd, rc = 0, len, maxfd;
+	int fd_size ;
+	struct client_fd *fd_list;
+	fd_set rset, allset;
+	struct sockaddr_un	unix_addr;
+	char	*buf;
+	int s_indx;
+#ifndef NOLOG
+	int e_no; /*errno copy*/
+#endif
+	char * null_buff = "";
+	
+	if (argc < 3 ) {
+		fprintf(stderr," few args \n");
+		exit(-1);
+	}
+
+	for (i = 0; i < 4096; i++)
+		close(i);
+	open("/dev/null", O_RDONLY);
+	open("/dev/null", O_WRONLY);
+	open("/dev/null", O_WRONLY);
+
+	if (!check_ql_server(argv[1], argv[2] ,file_path)) {
+		fprintf(stderr,"ql_server already exists.\n");
+		exit(-1);
+	}
+	signal(SIGINT, terminate);
+	signal(SIGTERM, terminate);
+
+	listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (listen_fd < 0) {
+		fprintf(stderr,"listen error.\n");
+		terminate(rc);
+	}
+
+#ifndef NOLOG
+	log_open(argv[1]);
+#endif
+	unix_addr.sun_family = AF_UNIX;
+	strcpy(unix_addr.sun_path, file_path);
+#ifndef NOLOG
+	log_printf("file_path =%s \n",file_path);
+#endif
+	len = sizeof(unix_addr.sun_family) + strlen(unix_addr.sun_path) + 1;
+	rc = bind(listen_fd, (struct sockaddr *)&unix_addr, len);
+
+	if (rc < 0) {
+#ifndef NOLOG
+		log_printf("bind error \n",file_path);
+#endif
+		terminate(rc);
+	}
+
+	// become a daemon
+	if (fork())
+		exit(0);
+	if (fork())
+		exit(0);
+	setsid();
+
+	rc = listen(listen_fd, 5);
+	if (rc < 0) {
+#ifndef NOLOG
+		log_printf("listen error \n");
+#endif
+		terminate(rc);
+	}
+
+	FD_ZERO(&allset);
+	FD_SET(listen_fd, &allset);
+	maxfd = listen_fd;
+	fd_size = NALLOC;
+	fd_list = malloc(sizeof(struct client_fd)*fd_size);
+	for (i = 0; i < fd_size; i++) {
+		fd_list[i].fd = -1;
+	}
+
+#ifndef NOLOG
+	log_printf("loop_start \n");
+#endif
+	for (;;) {
+		memcpy(&rset, &allset, sizeof(rset));
+		rc = select(maxfd + 1, &rset, NULL, NULL, NULL);
+		if (rc == -1) {
+#ifndef NOLOG
+			e_no = errno;
+			log_printf("server:select error.\n");
+			log_printf("select error string by strerror: %s\n", 
+				strerror(e_no));
+			log_printf("select error code: %d\n", e_no);
+#endif
+			terminate(rc);
+		}
+#ifndef NOLOG
+		log_printf("server:select.\n");
+#endif
+
+		if (FD_ISSET(listen_fd, &rset)) {
+			len = sizeof(unix_addr);
+			fd = accept(listen_fd, (struct sockaddr *)&unix_addr, 
+				(socklen_t*)&len);
+			if (fd < 0) {
+#ifndef NOLOG
+				log_printf("server:accept error.\n");
+#endif
+				terminate(fd);
+			}
+#ifndef NOLOG
+			log_printf("server:accept (%d).\n", fd);
+#endif
+			for (i = 0; fd_size > i; i++) {
+				if (fd_list[i].fd == -1) {
+					fd_list[i].fd = fd;
+					break;
+				}
+			}
+			if (i >= fd_size) {
+				fd_list = realloc(fd_list, 
+					sizeof(int)*(fd_size+NALLOC));
+				for (i = fd_size; i < (fd_size + NALLOC); i++) {
+					fd_list[i].fd = -1;
+				}
+				fd_list[fd_size].fd = fd;
+				fd_size += NALLOC;
+			}
+			FD_SET(fd, &allset);
+			if (fd > maxfd) {
+				maxfd = fd;
+			}
+		}
+
+		for (i = 0; i < fd_size; i++) {
+			if (fd_list[i].fd == -1)
+				continue;
+			fd = fd_list[i].fd;
+			if (!FD_ISSET(fd, &rset))
+				continue;
+			rc = ql_recv(fd, &buf);
+#ifndef NOLOG
+			log_printf("ql_recv (%d) index = %d fd = %d \n", rc,i,fd);
+#endif
+			if(rc < 0){
+#ifndef NOLOG
+				log_printf("server:recv (%d) error.\n", fd);
+#endif
+				terminate(rc);
+			}
+			if (rc == 0) {
+#ifndef NOLOG
+				log_printf("server:closed (%d).\n", fd);
+#endif
+				fd_list[i].fd = -1;
+				if (strcmp(fd_list[i].name,null_buff)) {
+					free(fd_list[i].name);
+					fd_list[i].name = null_buff;
+				}
+				FD_CLR(fd, &allset);
+				maxfd = -1;
+				for (j = 0; fd_size > j ; j++) {
+					if (fd > maxfd) {
+						maxfd = fd;
+					}
+				}
+				close(fd);
+#ifndef NOLOG
+				log_printf("index = %d\n",i);
+				log_dump(fd_list,fd_size);
+#endif
+				if (maxfd == -1) {
+					terminate(rc);
+				}
+				continue;
+			}
+
+			if (rc == QL_EXEC_END){ /* swapout from mcexec */
+				fd_list[i].client = QL_MCEXEC_PRO;
+				fd_list[i].name = buf;
+				fd_list[i].status = QL_EXEC_END;
+#ifndef NOLOG
+				log_printf("index = %d\n",i);
+				log_dump(fd_list,fd_size);
+#endif
+/* send E command to ql_talker */
+				if ((s_indx = s_fd_list(fd_list[i].name,
+					QL_MPEXEC,fd_list,
+					fd_size)) <  fd_size) {
+#ifndef NOLOG
+					log_printf("E command to talker %d \n",s_indx); 
+#endif
+					rc = ql_send(fd_list[s_indx].fd,
+						QL_EXEC_END,NULL);
+/*  fd close for ql_talker */
+					FD_CLR(fd_list[s_indx].fd, &allset);
+					maxfd = -1;
+					close(fd_list[s_indx].fd);
+					free(fd_list[s_indx].name);
+					fd_list[s_indx].fd = -1;
+					fd_list[s_indx].name = null_buff;
+					for (j = 0; fd_size > j ; j++) {
+						if (fd_list[j].fd > maxfd) {
+							maxfd = fd_list[j].fd;
+						}
+					}
+					if (maxfd == -1) terminate(0);
+				}
+				else{
+					/* ql_talker not found */
+#ifndef NOLOG
+					log_printf("ql_talker not found\n",i);
+#endif
+				/* send I command to mcexec and param_file put A command*/
+				}
+#ifndef NOLOG
+				log_printf("index = %d\n",i);
+				log_dump(fd_list,fd_size);
+#endif
+			}
+			else if (rc == QL_RET_RESUME) {
+				/* recv R command from ql_talker */
+				fd_list[i].client = QL_MPEXEC;
+				fd_list[i].name = buf;
+				fd_list[i].status = QL_RET_RESUME;
+#ifndef NOLOG
+				log_printf("index = %d,fd_size=%d\n",
+					i,fd_size);
+				log_dump(fd_list,fd_size);
+#endif
+				/* send R command to mcexec */
+				if (((s_indx = s_fd_list(fd_list[i].name,
+					QL_MCEXEC_PRO ,
+					fd_list,fd_size)) <  fd_size) && 
+					fd_list[s_indx].status == QL_EXEC_END) {
+#ifndef NOLOG
+					log_printf("R command to mcexec %d \n",s_indx);
+					log_dump(fd_list,fd_size);
+#endif
+					rc = ql_send(fd_list[s_indx].fd,
+						QL_RET_RESUME,NULL);
+					fd_list[s_indx].status = QL_RET_RESUME;
+					FD_CLR(fd_list[s_indx].fd, &allset);
+					close(fd_list[s_indx].fd);
+					free(fd_list[s_indx].name);
+					fd_list[s_indx].fd = -1;
+					fd_list[s_indx].name = null_buff;
+					maxfd = -1;
+					for (j = 0; fd_size > j ; j++) {
+						if (fd_list[j].fd > maxfd) {
+							maxfd = fd_list[j].fd;
+						}
+					}
+					if (maxfd == -1) terminate(0);
+				}
+				else{
+/* mcexec not found */
+/* send A command to ql_talker */
+#ifndef NOLOG
+					log_printf("send A command index = %d,fd_size=%d\n",
+						i,fd_size);
+					log_dump(fd_list,fd_size);
+#endif
+					rc = ql_send(fd_list[i].fd,
+						QL_AB_END,NULL);
+/*  fd close for ql_talker */
+					FD_CLR(fd_list[i].fd, &allset);
+					close(fd_list[i].fd);
+					free(fd_list[i].name);
+					fd_list[i].fd = -1;
+//					fd_list[i].name = NULL;
+					fd_list[i].name = null_buff;
+					maxfd = -1;
+					for (j = 0; fd_size > j ; j++) {
+						if (fd_list[j].fd > maxfd) {
+							maxfd = fd_list[j].fd;
+						}
+					}
+					if (maxfd == -1) terminate(0);
+				}
+#ifndef NOLOG
+				log_printf("index = %d,s_indx=%d\n",
+					i,s_indx);
+				log_dump(fd_list,fd_size);
+#endif
+			}
+			else if (rc == QL_COM_CONN) {
+				/* connect from ql_mpiexec_* */
+				fd_list[i].client = QL_MPEXEC;
+				fd_list[i].name = buf;
+				fd_list[i].status = QL_COM_CONN;
+#ifndef NOLOG
+				log_printf("N command index = %d,fd_size=%d\n",
+					i,fd_size);
+				log_dump(fd_list,fd_size);
+#endif
+				if ((s_indx = s_fd_list(fd_list[i].name,
+					QL_MCEXEC_PRO,fd_list,
+					fd_size)) <  fd_size) {
+					rc = ql_send(fd_list[i].fd,
+						QL_EXEC_END,NULL);
+/*  fd close for ql_talker */
+					FD_CLR(fd_list[i].fd, &allset);
+					maxfd = -1;
+					close(fd_list[i].fd);
+					free(fd_list[i].name);
+					fd_list[i].fd = -1;
+					fd_list[i].name = null_buff;
+					for (j = 0; fd_size > j ; j++) {
+						if (fd_list[j].fd > maxfd) {
+							maxfd = fd_list[j].fd;
+						}
+					}
+				//	if (maxfd == -1) terminate(0);
+				}
+#ifndef NOLOG
+				log_dump(fd_list,fd_size);
+#endif
+			}
+			else if(rc == QL_RET_FINAL) {
+				/*  F command from Monitor Process */
+				fd_list[i].client = QL_MONITOR;
+				fd_list[i].name = buf;
+				fd_list[i].status = QL_RET_FINAL;
+#ifndef NOLOG
+				log_printf("F command index = %d,fd_size=%d\n",
+					i,fd_size);
+				log_dump(fd_list,fd_size);
+#endif
+				/* search ql_mpiexec_start process */
+				if ((s_indx = s_fd_list(fd_list[i].name,
+					QL_MPEXEC,fd_list,
+					fd_size)) <  fd_size) {
+				/* send A command */
+					rc = ql_send(fd_list[s_indx].fd,
+						QL_AB_END,NULL);
+				/* table clear */
+					FD_CLR(fd_list[s_indx].fd, &allset);
+					maxfd = -1;
+					close(fd_list[s_indx].fd);
+					free(fd_list[s_indx].name);
+					fd_list[s_indx].fd = -1;
+					fd_list[s_indx].name = null_buff;
+					for (j = 0; fd_size > j ; j++) {
+						if (fd_list[j].fd > maxfd) {
+							maxfd = fd_list[j].fd;
+						}
+					}
+				}
+				/* search mcexec process */
+				if ((s_indx = s_fd_list(fd_list[i].name,
+					QL_MCEXEC_PRO,fd_list,
+					fd_size)) <  fd_size) {
+				/* table clear */
+					FD_CLR(fd_list[s_indx].fd, &allset);
+					maxfd = -1;
+					close(fd_list[s_indx].fd);
+					free(fd_list[s_indx].name);
+					fd_list[s_indx].fd = -1;
+					fd_list[s_indx].name = null_buff;
+					for (j = 0; fd_size > j ; j++) {
+						if (fd_list[j].fd > maxfd) {
+							maxfd = fd_list[j].fd;
+						}
+					}
+				}
+				FD_CLR(fd_list[i].fd, &allset);
+				close(fd_list[i].fd);
+				free(fd_list[i].name);
+				fd_list[i].fd = -1;
+				fd_list[i].name = null_buff;
+				maxfd = -1;
+				for (j = 0; fd_size > j ; j++) {
+					if (fd_list[j].fd > maxfd) {
+						maxfd = fd_list[j].fd;
+					}
+				}
+#ifndef NOLOG
+				log_printf("F command end index = %d,fd_size=%d\n",
+					i,fd_size);
+				log_dump(fd_list,fd_size);
+#endif
+				if (maxfd == -1)
+					terminate(0);
+			}
+			else {
+#ifndef NOLOG
+				log_printf("server:unknwon commond %d (%d).\n",
+				           rc, fd);
+#endif
+			}
+#ifndef NOLOG
+			log_printf("server:recv (%d) .\n", fd);
+#endif
+		}
+	}
+	terminate(0);
+}
+
+#ifndef NOLOG
+int log_open(char *f_path){
+	char f_name[1024];
+	sprintf(f_name,"%s/%s",f_path,LOGFILE);
+	if ((log_fp = fopen(f_name,"w")) == NULL) {
+		log_fp = stderr;
+	}
+	return 0;
+}
+
+int log_close(){
+	if (log_fp != stdout) {
+		fclose(log_fp);
+	}
+	return 0;
+}
+
+void log_printf(const char *format, ...){
+	va_list arglist;
+	char log[1024];
+
+	va_start(arglist, format);
+	vsprintf(log, format, arglist);
+	fprintf(log_fp, "%s\n", log);
+	va_end(arglist);
+	fflush(log_fp);
+}
+
+void log_dump(struct client_fd *fd_list,int fd_size){
+	int i;
+	for (i = 0; fd_size > i; i++) {
+		if (fd_list[i].fd != -1) {
+			log_printf("|%4d|%4d|%c|%s|\n",fd_list[i].fd,
+				fd_list[i].client,(char)fd_list[i].status,
+				fd_list[i].name);
+		}
+		else{
+			log_printf("|%4d|0000| |    |\n",fd_list[i].fd);
+		}
+	}
+	log_printf("-----------------------\n");
+}
+#endif
+
+int ql_recv(int fd,char ** buf){
+	char l_buf[QL_BUF_MAX];
+	char comm;
+	int size = 0;
+	int rc;
+	int ret;
+
+	rc = recv(fd, l_buf, QL_BUF_MAX, 0);
+#ifndef NOLOG
+	log_printf("rc = %d,l_buf=%s\n",rc,l_buf);
+#endif
+	if (rc <= 0) {
+		return rc;
+	}
+	
+	sscanf(l_buf, "%c %x", &comm, &size);
+	ret = (int)(comm);
+#ifndef NOLOG
+	log_printf("COMM=%c size = %x rc= %d\n", ret, size, rc);
+#endif
+	if (size > 0) {
+		*buf = malloc(size+1);
+		memcpy(*buf, &l_buf[7], size);
+		buf[size] = 0x00;
+#ifndef NOLOG
+		log_printf("COMM=%c size = %x *buf= %s\n",ret,size,*buf);
+#endif
+	}
+#ifndef NOLOG
+	log_printf("ret = %d\n", ret);
+#endif
+	return ret;
+}
+
+int ql_send(int fd,int command,char *buf){
+	char *lbuf;
+	int size;
+	int rc;
+
+	if (buf != NULL) {
+		size = strlen(buf);
+		lbuf = alloca(size+7+1);
+		sprintf(lbuf,"%c %04x %s",command,size,buf);
+	}
+	else{
+		size = 0;
+		lbuf = alloca(6+1);
+		sprintf(lbuf,"%c 0000",command);
+	}
+#ifndef NOLOG
+	log_printf("send lbuf=%s",lbuf);
+#endif
+	rc=send(fd,lbuf,strlen(lbuf),0);
+	return rc;
+}
+
--- a/executer/user/ql_talker.c
+++ b/executer/user/ql_talker.c
@ -0,0 +1,101 @@
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+#include "../include/qlmpi.h"
+
+int	fd = -1;
+
+#define BUF_MAX 256
+
+void terminate(int rc)
+{
+	if(fd >= 0){
+		shutdown(fd, 2);
+		close(fd);
+	}
+	exit(rc);
+}
+
+int main(int argc, char* argv[])
+{
+	int	rc=-1, len;
+	struct sockaddr_un	unix_addr;
+	char	buf[BUF_MAX];
+
+	signal(SIGINT, terminate);
+	signal(SIGTERM, terminate);
+
+	if (argc < 5) {
+#ifdef QL_DEBUG
+		printf("too few arguments\n");
+#endif
+		return rc;
+	}
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0) {
+#ifdef QL_DEBUG
+		printf("client:socket error.\n");
+#endif
+		terminate(rc);
+	}
+#ifdef QL_DEBUG
+	printf("client:socket.\n");
+#endif
+	unix_addr.sun_family = AF_UNIX;
+	strcpy(unix_addr.sun_path, argv[4]);
+#ifdef QL_DEBUG
+	printf("socket_path %s\n",argv[4]);
+#endif
+	len = sizeof(unix_addr.sun_family)+strlen(unix_addr.sun_path) + 1;
+	rc = connect(fd, (struct sockaddr*)&unix_addr, len);
+	if (rc < 0) {
+#ifdef QL_DEBUG
+		printf("client:connect error.\n");
+		printf("%s %s\n", unix_addr.sun_path, strerror(errno));
+#endif
+		terminate(rc);
+	}
+
+	if (argv[1][0]) {
+		sprintf(buf,"%s %04x %s",argv[1],
+					(unsigned int)strlen(argv[3]),argv[3]);
+		rc = send(fd, buf, strlen(buf) + 1, 0);
+		if (rc < 0) {
+#ifdef QL_DEBUG
+			printf("send error.\n");
+#endif
+			terminate(rc);
+		}
+	}
+	if (strcmp(argv[2],"-n")) {
+#ifdef QL_DEBUG
+		printf("waiting reply message from ql_server ...\n");
+#endif
+		rc = recv(fd, buf, 256, 0);
+#ifdef QL_DEBUG
+		printf("%s\n",buf);
+#endif
+		if (rc < 0) {
+#ifdef QL_DEBUG
+			printf("recv error\n");
+#endif
+			terminate(rc);
+		}
+		if (buf[0] == argv[2][0]){
+			terminate(0);
+		}
+		if (buf[0] == QL_AB_END){
+			/* abnormal end */
+			terminate(-2);
+		}
+	}
+
+	terminate(0);
+	return rc; /*not reached */
+}
--- a/executer/user/qlmpilib.c
+++ b/executer/user/qlmpilib.c
@ -0,0 +1,320 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include "mpi.h"
+#include "../include/qlmpilib.h"
+#include "../include/qlmpi.h"
+#include "../include/pmi.h"
+
+
+#define BUF_SIZE        (32*1024)
+#define NALLOC 10
+#define QL_SUCCESS 0
+#define QL_NORMAL 2 
+
+//#define QL_DEBUG
+
+static char ql_name[33];
+static char swap_file[1024];
+static char param_file[1024];
+static int ql_mode_flg = 0; /* 0 is normal */
+static int rank = -1;
+static char buffer[BUF_SIZE];
+static int ql_initialized;
+int mck_ql_argc = NALLOC;
+char **mck_ql_argv;
+char **mck_ql_env;
+
+static void freev(char **v)
+{
+	char **a;
+
+	for (a = v; *a; a++)
+		free(*a);
+	free(v);
+}
+
+static void esc_get(char *in, char *out)
+{
+	char *p;
+	char *q;
+	int c;
+
+	for (p = in, q = out; *p; p++) {
+		if (*p == '%' && p[1] && p[2]) {
+			int i;
+			for (i = 0, c = 0; i < 2; i++) {
+				p++;
+				c <<= 4;
+				if (*p >= '0' && *p <= '9')
+					c += *p - '0';
+				else if (*p >= 'A' && *p <= 'F')
+					c += *p - 'A' + 10;
+				else if (*p >= 'a' && *p <= 'f')
+					c += *p - 'a' + 10;
+			}
+			*(q++) = c;
+		}
+		else
+			*(q++) = *p;
+	}
+	*q = '\0';
+}
+
+static int swapout(char *fname, void *buf, size_t sz, int flag)
+{
+	int         cc;
+
+	cc = syscall(801, fname, buf, sz, flag);
+
+	return cc;
+}
+
+static int ql_get_option() {
+	char *env_str;
+
+	env_str = getenv(QL_NAME);
+	if (env_str == NULL) {
+		return 0;
+	}
+	else{
+		strcpy(ql_name,env_str);
+		return 1;
+	}
+	
+}
+
+int ql_init() {
+	char tmp_path[1024];
+	char *env_str;
+
+	if (ql_initialized) {
+		return QL_CONTINUE;
+	}
+
+	ql_mode_flg = ql_get_option();
+#ifdef QL_DEBUG
+	printf("flg = %d \n",ql_mode_flg);
+#endif
+
+	if (ql_mode_flg) {
+		MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+		/* get param_file path */
+		env_str = getenv(QL_PARAM_ENV);
+		if (env_str == NULL) {
+			sprintf(tmp_path,"%s/",getenv("HOME"));
+		}
+		else{
+			sprintf(tmp_path,"%s/",env_str);
+		}
+		sprintf(param_file,"%s%s%s",tmp_path,ql_name,QL_PARAM_EXTE);
+
+#ifdef QL_DEBUG
+		printf("param_file = %s\n",param_file);
+#endif
+
+		/* get swap_file path*/
+		env_str = getenv(QL_SWAP_ENV);
+		if (env_str == NULL) {
+			strcpy(tmp_path,QL_SWAP_PATH);
+		}
+		else{
+			strcpy(tmp_path,env_str);
+		}
+		sprintf(swap_file,"%s/%s%d",tmp_path,ql_name,rank);
+
+#ifdef QL_DEBUG
+		printf("swap_file = %s rank=%d\n",swap_file,rank);
+#endif
+		ql_initialized = 1;
+		return QL_SUCCESS;		
+	}
+
+	ql_initialized = 1;
+	return QL_NORMAL;
+}
+
+int ql_client(int *argc,char ***argv)
+{
+	int rc;
+	int ret = QL_EXIT;
+	char buf[4096];
+	FILE *fp;
+	char **envs;
+	char **args;
+	char **a;
+	char **e;
+
+	if (ql_mode_flg == 0) return(QL_EXIT);
+
+	syscall(803);
+	rc = PMI_Barrier();
+	
+	rc = swapout(swap_file, buffer, BUF_SIZE, 0);
+
+#ifdef QL_DEBUG
+	printf(" swapout rc=%d\n",rc);
+#endif
+	if (rc == -1) {
+		/* terminate due to swap error */
+		syscall(804);
+		return QL_EXIT;
+	}
+
+	/* param file */
+	if ((fp = fopen(param_file,"r")) == NULL) {
+		/* param file open error */
+#ifdef QL_DEBUG
+		printf("param_file open error\n");
+#endif
+		syscall(804);
+		return QL_EXIT;
+	}
+
+	a = args = NULL;
+	e = envs = NULL;
+	while ((fgets(buf, 4096, fp)) != NULL) {
+		int cmd = buf[0];
+		char *t;
+		int n;
+
+		// remove return code
+		buf[strlen(buf) - 1] = '\0';
+		if (cmd == QL_COMMAND) {
+			t = strchr(buf, '=');
+			if (!t ||
+			    (t[1] != QL_RET_RESUME && t[1] != QL_RET_FINAL)) {
+				fprintf(stderr, "invalid file format\n");
+				exit(1);
+			}
+			t++;
+			if (*t == QL_RET_RESUME) {
+				ret = QL_CONTINUE;
+#ifdef QL_DEBUG
+				printf("COM = %c ret = %d\n", *t, ret);
+#endif
+			}
+			else {
+				ret = QL_EXIT;
+#ifdef QL_DEBUG
+				printf(" ret = %d",ret);
+#endif
+			}
+			t = strchr(t, ' ');
+			if (t) {
+				n = atoi(t + 1);
+				args = malloc(sizeof(char *) * (n + 1));
+				a = args;
+				t = strchr(t + 1, ' ');
+				if (t) {
+					n = atoi(t + 1);
+					envs = malloc(sizeof(char *) * (n + 1));
+					e = envs;
+				}
+			}
+
+		}
+		else if (cmd == QL_ARG) {
+			if (!args)
+				continue;
+			t = strchr(buf, ' ');
+			if (!t)
+				continue;
+			n = atoi(t + 1);
+			t = strchr(t + 1, ' ');
+			if (!t)
+				continue;
+			t++;
+			*a = malloc(n + 1);
+			esc_get(t, *a);
+			a++;
+		}
+		else if (cmd == QL_ENV) {
+			if (!envs)
+				continue;
+			t = strchr(buf, ' ');
+			if (!t)
+				continue;
+			n = atoi(t + 1);
+			t = strchr(t + 1, ' ');
+			if (!t)
+				continue;
+			t++;
+			*e = malloc(n + 1);
+			esc_get(t, *e);
+			e++;
+		}
+		else {
+		}
+	}
+	fclose(fp);
+
+	if (args) {
+		*a = NULL;
+		if (mck_ql_argv)
+			freev(mck_ql_argv);
+		mck_ql_argv = args;
+		if (argv)
+			*argv = args;
+		for (mck_ql_argc = 0; mck_ql_argv[mck_ql_argc]; mck_ql_argc++);
+		if (argc)
+			*argc = mck_ql_argc;
+	}
+	if (envs) {
+		*e = NULL;
+		if (mck_ql_env)
+			freev(mck_ql_env);
+		mck_ql_env = envs;
+		environ = envs;
+	}
+
+	syscall(804);
+#ifdef QL_DEBUG
+	printf(" return rtn = %d\n",ret);
+#endif
+	return ret;
+	
+}
+
+int MPI_Init(int *argc,char ***argv){
+	int rc = 0;
+	
+	rc = PMPI_Init(argc,argv);
+	if (rc == MPI_SUCCESS)
+		ql_init();
+	
+	return rc;
+}
+
+void
+mpi_init_(int *ierr)
+{
+	extern void pmpi_init_(int *ierr) __attribute__ ((__weak__));
+
+	if (!pmpi_init_) {
+		*ierr = MPI_ERR_OTHER;
+		return;
+	}
+
+	pmpi_init_(ierr);
+	if (*ierr == MPI_SUCCESS)
+		ql_init();
+
+	return;
+}
+
+void ql_client_(int *ierr)
+{
+	int argc;
+	char **argv;
+
+	*ierr = ql_client(&argc, &argv);
+}
--- a/kernel/Makefile.build.in
+++ b/kernel/Makefile.build.in
@ -5,6 +5,7 @@ OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
 OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
 OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
 OBJS += rbtree.o
+OBJS += pager.o
 DEPSRCS=$(wildcard $(SRC)/*.c)

 CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__  -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions
--- a/kernel/include/process.h
+++ b/kernel/include/process.h
@ -50,6 +50,7 @@
 #define	VR_MEMTYPE_WB      0x00000000	/* write-back */
 #define	VR_MEMTYPE_UC      0x01000000	/* uncachable */
 #define	VR_MEMTYPE_MASK    0x0f000000
+#define VR_PAGEOUT	   0x10000000

 #define	PROT_TO_VR_FLAG(prot)	(((unsigned long)(prot) << 16) & VR_PROT_MASK)
 #define	VRFLAG_PROT_TO_MAXPROT(vrflag)	(((vrflag) & VR_PROT_MASK) << 4)
@ -248,6 +249,7 @@ struct process_vm;
 struct vm_regions;
 struct vm_range;

+struct swapinfo;

 #define HASH_SIZE	73

@ -711,6 +713,7 @@ struct process_vm {
 	struct list_head vm_range_numa_policy_list;
 	struct vm_range *range_cache[VM_RANGE_CACHE_SIZE];
 	int range_cache_ind;
+	struct swapinfo *swapinfo;
 };

 static inline int has_cap_ipc_lock(struct thread *th)
--- a/kernel/include/swapfmt.h.in
+++ b/kernel/include/swapfmt.h.in
@ -0,0 +1,34 @@
+/*
+ * \file swapfmt.h
+ *  License details are found in the file LICENSE.
+ * \brief
+ *	swapped out file format
+ * \author Yutaka Ishikawa <ishikawa@riken.jp>
+ */
+
+#define MCKERNEL_SWAP	"McKernel swap"
+#define MCKERNEL_SWAP_VERSION "@MCKERNEL_VERSION@"
+#define SWAP_HLEN	16
+struct swap_header {
+	char		magic[SWAP_HLEN];	/* MCKernel swap */
+	char		version[SWAP_HLEN];	/* same as McKernel version */
+	unsigned int	count_sarea;	/* count of swaped area info */
+	unsigned int	count_marea;	/* count of mlocked area info */
+};
+
+struct swap_areainfo {
+	unsigned long	start;	/* virtual address */
+	unsigned long	end;	/* virtual address */
+	unsigned long	pos;	/* swap: file position in this file
+				 *  mlock: physical address */
+	unsigned long	flag;	/* flag in vm_range */
+};
+
+/*
+ *
+ *	+--------------------
+ *	|   swap_header
+ *	+--------------------
+ *	| swap_areainfo[...]
+ *	| swap_ainfo[...]
+ */
--- a/kernel/pager.c
+++ b/kernel/pager.c
@ -0,0 +1,837 @@
+/*
+ * \file pager.c
+ *  License details are found in the file LICENSE.
+ * \brief
+ *	paging system
+ * \author Yutaka Ishikawa <ishikawa@riken.jp>
+ */
+/*
+ * HISTORY:
+ */
+#include <types.h>
+#include <kmsg.h>
+#include <ihk/cpu.h>
+#include <cpulocal.h>
+#include <ihk/mm.h>
+#include <ihk/debug.h>
+#include <ihk/ikc.h>
+#include <errno.h>
+#include <cls.h>
+#include <syscall.h>
+#include <kmalloc.h>
+#include <process.h>
+#include <swapfmt.h>
+
+#define O_RDONLY	00000000
+#define O_WRONLY	00000001
+#define O_RDWR		00000002
+#define O_CREAT		00000100
+#define O_TRUNC		00001000
+#define SEEK_SET	0	 /* from include/uapi/linux/fs.h in Linux */
+#define SEEK_CUR	1	 /* from include/uapi/linux/fs.h in Linux */
+#define IS_TEXT(start, region) ((start) == (region)->text_start)
+#define IS_DATA(start, region) ((start) == (region)->data_start)
+#define IS_STACK(start, region) ((start) == (region)->stack_start)
+#define IS_INVALID_USERADDRESS(addr, region)	\
+	((((unsigned long) addr) < region->user_start)	\
+	|| ((unsigned long) addr) >= region->user_end)
+#define IS_INVALID_LENGTH(len, region)	\
+	((len) > (region->user_end - region->user_start))
+#define IS_READONLY(flag)	(((flag)&VR_PROT_WRITE) == 0)
+#define IS_NOTUSER(flag)	(((flag)&VR_AP_USER) == 0)
+
+
+//#define DEBUG_PRINT_PROCESS
+
+#ifdef DEBUG_PRINT_PROCESS
+#define dkprintf(...) kprintf(__VA_ARGS__)
+#define ekprintf(...) kprintf(__VA_ARGS__)
+#else
+#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
+#define ekprintf(...) kprintf(__VA_ARGS__)
+#endif
+
+/*
+ * Contiguous pages are represented by the "addrpair" structure.
+ * - The swap_area, whose type is "struct arealist", keeps swappable pages
+ *   using "areaent" structures that keeps a list of the "addrpair" structures.
+ * - The mlock_area is also the "struct arealist" struct, keeping pages locked byt
+ *   both McKernel and Linux.
+ * - The mlock_container is also the "struct arealist" type, keeping pages loc
+ */
+/*
+ * The page areas are independently managed by McKernel and Linux.
+ * Pages locked by Linuxkernel are not known by McKernel. To get the information,
+ * the mlockcntnr structure is used.
+ * The mlockcntnr keeps the list of 
+ */
+#define MLOCKADDRS_SIZE	128
+struct addrpair {
+	unsigned long	start;
+	unsigned long	end;
+	unsigned long	flag;
+};
+struct areaent {
+	struct areaent	*next;
+	int		count;
+	struct addrpair	pair[MLOCKADDRS_SIZE];
+};
+
+struct arealist {
+	struct areaent	*head;
+	struct areaent	*tail;
+	int		count;
+};
+
+struct mlockcntnr {
+	struct areaent	*from;
+	int		ccount;
+	struct areaent	*cur;
+};
+
+struct swapinfo {
+	struct swap_header	*swphdr;
+	struct swap_areainfo	*swap_info, *mlock_info;
+
+	struct arealist	swap_area;
+	struct arealist	mlock_area;
+	struct mlockcntnr mlock_container;
+#define UDATA_BUFSIZE	(8*1024)
+	char	*swapfname;
+	char	*udata_buf;	/* To read-store data from Linux to user space */
+
+	void	*user_buf;
+	size_t	ubuf_size, ubuf_alloced;
+};
+
+static void
+area_print(struct vm_regions *region)
+{
+	dkprintf("text  %016lx:%016lx\n", region->text_start, region->text_end);
+	dkprintf("data  %016lx:%016lx\n", region->data_start, region->data_end);
+	dkprintf("brk   %016lx:%016lx\n", region->brk_start, region->brk_end);
+	dkprintf("map   %016lx:%016lx\n", region->map_start, region->map_end);
+	dkprintf("stack %016lx:%016lx\n", region->stack_start, region->stack_end);
+	dkprintf("user  %016lx:%016lx\n", region->user_start, region->user_end);
+}
+
+
+static int
+myalloc_init(struct swapinfo *si, void *p, size_t sz)
+{
+	extern SYSCALL_DECLARE(mlock);
+	ihk_mc_user_context_t ctx0;
+	int	cc;
+
+	/* pin the buffer down in McKernel side */
+	ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) p;
+	ihk_mc_syscall_arg1(&ctx0) = sz;
+	cc = sys_mlock(__NR_mlock, &ctx0);
+	if (cc < 0) return cc;
+	/* init */
+	si->user_buf = p;
+	si->ubuf_size = sz;
+	si->ubuf_alloced = 0;
+	dkprintf("myalloc_init: buffer(%p) size(0x%lx)\n", si->user_buf, si->ubuf_size);
+	return 0;
+}
+
+void
+myalloc_finalize(struct swapinfo *si)
+{
+	extern SYSCALL_DECLARE(munlock);
+	ihk_mc_user_context_t ctx0;
+
+	/* unpindown in McKernel side */
+	ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) si->user_buf;
+	ihk_mc_syscall_arg1(&ctx0) = si->ubuf_size;
+	sys_munlock(__NR_munlock, &ctx0);
+}
+
+void *
+myalloc(struct swapinfo *si, size_t sz)
+{
+	void	*p = NULL;
+
+	if ((si->ubuf_alloced + sz) < si->ubuf_size) {
+		p = (void*) &((char*)si->user_buf)[si->ubuf_alloced];
+		si->ubuf_alloced += sz;
+	}
+	return p;
+}
+
+void
+myfree()
+{
+	/* nothing so far */
+}
+
+static int
+linux_open(char *fname, int flag, int mode)
+{
+	ihk_mc_user_context_t ctx0;
+	int		fd;
+
+	ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) fname;
+	ihk_mc_syscall_arg1(&ctx0) = flag;
+	ihk_mc_syscall_arg2(&ctx0) = mode;
+	fd = syscall_generic_forwarding(__NR_open, &ctx0);
+	return fd;
+}
+
+static int
+linux_unlink(char *fname)
+{
+	ihk_mc_user_context_t ctx0;
+
+	ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) fname;
+	return syscall_generic_forwarding(__NR_unlink, &ctx0);
+}
+
+static ssize_t
+linux_read(int fd, void *buf, size_t count)
+{
+	ihk_mc_user_context_t ctx0;
+	ssize_t		sz;
+	size_t count0 = count;
+
+	ihk_mc_syscall_arg0(&ctx0) = fd;
+	sz = 0;
+	for (;;) {
+		ssize_t sz0;
+
+		ihk_mc_syscall_arg1(&ctx0) = (uintptr_t) buf;
+		ihk_mc_syscall_arg2(&ctx0) = count;
+		sz0 = syscall_generic_forwarding(__NR_read, &ctx0);
+		if (sz0 == -EINTR)
+			continue;
+		if (sz0 <= 0) {
+			if (sz == 0)
+				sz = sz0;
+			break;
+		}
+		sz += sz0;
+		if (sz == count0)
+			break;
+		count -= sz0;
+		buf = (char *)buf + sz0;
+	}
+	return sz;
+}
+
+static ssize_t
+linux_write(int fd, void *buf, size_t count)
+{
+	ihk_mc_user_context_t ctx0;
+	ssize_t		sz;
+	size_t count0 = count;
+
+	ihk_mc_syscall_arg0(&ctx0) = fd;
+	sz = 0;
+	for (;;) {
+		ssize_t sz0;
+
+		ihk_mc_syscall_arg1(&ctx0) = (uintptr_t) buf;
+		ihk_mc_syscall_arg2(&ctx0) = count;
+		sz0 = syscall_generic_forwarding(__NR_write, &ctx0);
+		if (sz0 == -EINTR)
+			continue;
+		if (sz0 <= 0) {
+			if (sz == 0)
+				sz = sz0;
+			break;
+		}
+		sz += sz0;
+		if (sz == count0)
+			break;
+		count -= sz0;
+		buf = (char *)buf + sz0;
+	}
+	return sz;
+}
+
+static off_t
+linux_lseek(int fd, off_t off, int whence)
+{
+	ihk_mc_user_context_t ctx0;
+	int		cc;
+
+	ihk_mc_syscall_arg0(&ctx0) = fd;
+	ihk_mc_syscall_arg1(&ctx0) = off;
+	ihk_mc_syscall_arg2(&ctx0) = whence;
+	cc = syscall_generic_forwarding(__NR_lseek, &ctx0);
+	return cc;
+}
+
+static int
+linux_close(int fd)
+{
+	ihk_mc_user_context_t ctx0;
+	int		cc;
+
+	ihk_mc_syscall_arg0(&ctx0) = fd;
+	cc = syscall_generic_forwarding(__NR_close, &ctx0);
+	return cc;
+}
+
+/*
+ * The munmap syscall from McKernel is handled by mccntrl module.
+ * An extra argument, flag, is to set new remote page table if not zero.
+ */
+static int
+linux_munmap(void *addr, size_t len, int flag)
+{
+	ihk_mc_user_context_t ctx0;
+	int		cc;
+
+	ihk_mc_syscall_arg0(&ctx0) = (uintptr_t) addr;
+	ihk_mc_syscall_arg1(&ctx0) = len;
+	ihk_mc_syscall_arg2(&ctx0) = flag;
+	cc = syscall_generic_forwarding(__NR_munmap, &ctx0);
+	return cc;
+}
+
+static int
+pager_open(struct swapinfo *si, char *fname, int flag, int mode)
+{
+	int	fd;
+	strcpy(si->udata_buf, fname);
+	fd = linux_open(si->udata_buf, flag, mode);
+	return fd;
+}
+
+static int
+pager_unlink(struct swapinfo *si, char *fname)
+{
+	strcpy(si->udata_buf, fname);
+	return linux_unlink(si->udata_buf);
+}
+
+static ssize_t
+pager_read(struct swapinfo *si, int fd, void *start, size_t size)
+{
+	ssize_t		off, sz, rs;
+
+	kprintf("pager_read: %lx (%lx)\n", start, size);
+	for (off = 0; off < size; off += sz) {
+		sz = size - off;
+		sz = (sz > UDATA_BUFSIZE) ? UDATA_BUFSIZE : sz;
+		rs = linux_read(fd, si->udata_buf, sz);
+		if (rs != sz) return rs;
+		copy_to_user(start + off, si->udata_buf, sz);
+	}
+	return off;
+}
+
+static ssize_t
+pager_write(int fd, void *start, size_t size)
+{
+	ssize_t		sz;
+
+	sz = linux_write(fd, start, size);
+	return sz;
+}
+
+static int
+mlocklist_req(unsigned long start, unsigned long end, struct addrpair *addr, int nent)
+{
+	ihk_mc_user_context_t ctx0;
+	int		cc;
+
+#define PAGER_REQ_MLOCK_LIST	0x0008
+	ihk_mc_syscall_arg0(&ctx0) = PAGER_REQ_MLOCK_LIST;
+	ihk_mc_syscall_arg1(&ctx0) = start;
+	ihk_mc_syscall_arg2(&ctx0) = end;
+	ihk_mc_syscall_arg3(&ctx0) = (unsigned long) addr;
+	ihk_mc_syscall_arg4(&ctx0) = nent;
+	cc = syscall_generic_forwarding(__NR_mmap, &ctx0);
+	return cc;
+}
+
+/*
+ * If the last entry of addrpair is -1, more paged locked by Linux exist.
+ */
+static int
+mlocklist_morereq(struct swapinfo *si, unsigned long *start)
+{
+	struct areaent	*ent = si->mlock_area.tail;
+
+	dkprintf("mlocklist_morereq: start = %ld and = %ld\n",
+		ent->pair[ent->count].start, ent->pair[ent->count].end);
+	if (ent->pair[ent->count].start != (unsigned long) -1) {
+		return 0;
+	}
+	*start = ent->pair[ent->count].end;
+	return 1;
+}
+
+static int
+arealist_alloc(struct swapinfo *si, struct arealist *areap)
+{
+	areap->head = areap->tail = myalloc(si, sizeof(struct areaent));
+	if (areap->head == NULL) return -ENOMEM;
+	memset(areap->head, 0, sizeof(struct areaent));
+	return 0;
+}
+
+static int
+arealist_init(struct swapinfo *si)
+{
+	int	cc;
+
+	if ((cc = arealist_alloc(si, &si->swap_area)) < 0) return cc;
+	cc = arealist_alloc(si, &si->mlock_area);
+	return cc;
+}
+
+
+static void
+arealist_free(struct arealist *area)
+{
+	struct areaent	*tmp;
+	for (tmp = area->head; tmp != NULL; tmp = tmp->next) {
+		myfree(tmp);
+	}
+	memset(area, 0, sizeof(struct arealist));
+	return;
+}
+
+/*
+ * returns the start address of addrpair and its size
+ */
+static int
+arealist_get(struct swapinfo *si, struct addrpair **pair, struct arealist *area)
+{
+	struct areaent	*tmp;
+	struct areaent	*tail = area->tail;
+	if (tail->count < MLOCKADDRS_SIZE - 1) { /* at least two entries are needed */
+		if (pair) *pair = &tail->pair[tail->count];
+		return MLOCKADDRS_SIZE - tail->count;
+	}
+	tmp = myalloc(si, sizeof(struct areaent));
+	if (tmp == NULL) {
+		return -1;
+	}
+	memset(tmp, 0, sizeof(struct areaent));
+	area->tail->next = tmp;
+	area->tail = tmp;
+	if (pair) *pair = area->tail->pair;
+	return MLOCKADDRS_SIZE;
+};
+
+static void
+arealist_update(int cnt, struct arealist *area)
+{
+	area->tail->count += cnt;
+	area->count += cnt;
+}
+
+static int
+arealist_add(struct swapinfo *si, unsigned long start, unsigned long end,
+             unsigned long flag, struct arealist *area)
+{
+	int	cc;
+	struct addrpair	*addr;
+
+	cc = arealist_get(si, &addr, area);
+	if (cc < 0) return -1;
+	addr->start = start; addr->end = end; addr->flag = flag;
+	arealist_update(1, area);
+	return 0;
+}
+
+static int
+arealist_preparewrite(struct arealist *areap, struct swap_areainfo *info,
+		      ssize_t off, struct process_vm *vm, int flag)
+{
+	struct areaent		*ent;
+	int			count = 0;
+	ssize_t			totsz = 0;
+	struct page_table	*pt = vm->address_space->page_table;
+
+	for (ent = areap->head; ent != NULL; ent = ent->next) {
+		int i;
+		for (i = 0; i < ent->count; i++, count++) {
+			ssize_t sz = ent->pair[i].end - ent->pair[i].start; 
+			info[count].start = ent->pair[i].start;
+			info[count].end = ent->pair[i].end;
+			info[count].flag = ent->pair[i].flag;
+			if (flag) { /* position in file */
+				info[count].pos = off + totsz;
+			} else { /* physical memory */
+				if (ihk_mc_pt_virt_to_phys(pt,
+						(void*) ent->pair[i].start,
+						 &info[count].pos)) {
+					kprintf("Cannot get phys\n");
+				}
+			}
+			totsz += sz;
+		}
+	}
+	return count;
+}
+
+static ssize_t
+arealist_write(int fd, struct swap_areainfo *info, int count)
+{
+	ssize_t	       sz;
+
+	sz = linux_write(fd, info, sizeof(struct swap_areainfo)*count);
+	if (sz != sizeof(struct swap_areainfo)*count) return -1;
+	return 0;
+}
+
+static void
+arealist_print(char *msg, struct arealist *areap, int count)
+{
+	struct areaent	*ent;
+	kprintf("%s: %d\n", msg, count);
+	for (ent = areap->head; ent != NULL; ent = ent->next) {
+		int i;
+		for (i = 0; i < ent->count; i++) {
+			kprintf("\t%p -- %p\n",
+				(void*) ent->pair[i].start, (void*) ent->pair[i].end);
+		}
+	}
+}
+
+/*
+ * 
+ */
+static int
+mlockcntnr_sethead(struct swapinfo *si)
+{
+	int	cnt;
+	cnt = arealist_get(si, 0, &si->mlock_area); /* Adjust arealist */
+	if (cnt < 0) return -1;
+	si->mlock_container.from = si->mlock_container.cur = si->mlock_area.tail;
+	si->mlock_container.ccount = si->mlock_area.tail->count;
+	return 0;
+}
+
+static int
+mlockcntnr_isempty(struct swapinfo *si)
+{
+	return si->mlock_container.from == si->mlock_area.tail
+		&& si->mlock_container.ccount == si->mlock_area.tail->count;
+}
+
+static int
+mlockcntnr_addrent(struct swapinfo *si, struct addrpair *laddr)
+{
+	if (si->mlock_container.ccount == si->mlock_container.cur->count) {
+		struct areaent	*tmp = si->mlock_container.cur->next;
+		if (tmp == 0) return 0;
+		si->mlock_container.cur = tmp;
+		si->mlock_container.ccount = 1;
+	}
+	*laddr = si->mlock_container.cur->pair[si->mlock_container.ccount - 1];
+	si->mlock_container.ccount++;
+	return 1;
+}
+
+static void
+print_area(char *label, unsigned long start, unsigned long sz,
+	     struct vm_regions *region)
+{
+	char *type;
+
+	if (start == region->text_start) {
+		type = "text";
+	} else if (start == region->data_start) {
+		type = "data";
+	} else if (start == region->brk_start) {
+		type = "brk";
+	} else if (start == region->stack_start) {
+		type = "stack";
+	} else if (start == region->user_start) {
+		type = "user";
+	} else if (start >= region->map_start
+		   && start <= region->stack_start) {
+		type = "map";
+	} else {
+		type = "other";
+	}
+	kprintf("%s: %s write(%p, %ld)\n", label, type, start, sz);
+}
+
+void
+print_region(char *msg, struct process_vm *vm)
+{
+	struct vm_range		*range, *next;
+
+	kprintf("%s:\n", msg);
+	list_for_each_entry_safe(range, next, &vm->vm_range_list, list) {
+		if (range->memobj != NULL) continue;
+		kprintf("\t%016lx:%016lx (%lx)\n",
+			range->start, range->end, range->flag);
+	}
+}
+
+static void
+debug_dump(char *msg, unsigned char *p)
+{
+	kprintf("%s-> %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x"
+		":%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
+		msg, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
+		p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+}
+
+int
+do_pagein(int flag)
+{
+	struct thread		*thread = cpu_local_var(current);
+	struct process_vm	*vm = thread->vm;
+	int		fd, i;
+	ssize_t		pos, sz, rs;
+	struct swapinfo *si = vm->swapinfo;
+
+	dkprintf("do_pagein: flag(%d) currss(%lx)\n", flag, vm->currss);
+	fd = pager_open(si, si->swapfname, O_RDONLY, 0);
+	pager_unlink(si, si->swapfname);
+	if (fd < 0) {
+		kprintf("do_pagein: Cannot open file: %s\n", si->swapfname);
+		return fd;
+	}
+	/*
+	 * In the current implementaion, the following working areas remain
+	 * in the physical memory area:
+	 *	swphdr, swap_info and mlock_info
+	 */
+	pos = sizeof(struct swap_header);
+	pos += sizeof(struct swap_areainfo)*si->swphdr->count_sarea;
+	pos += sizeof(struct swap_areainfo)*si->swphdr->count_marea;
+	rs = linux_lseek(fd, pos, SEEK_SET);
+	for (i = 0; i < si->swphdr->count_sarea; i++) {
+		extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
+		sz = si->swap_info[i].end - si->swap_info[i].start;
+		dkprintf("pagein: %016lx:%016lx sz(%lx)\n", si->swap_info[i].start, si->swap_info[i].end, sz);
+		rs = pager_read(si, fd, (void*) si->swap_info[i].start, sz);
+		if (rs != sz) goto err;
+		// ihk_mc_pt_print_pte(vm->address_space->page_table, (void*) si->swap_info[i].start);
+	}
+	linux_close(fd);
+	print_region("after pagin", vm);
+	kprintf("do_pagein: done, currss(%lx)\n", vm->currss);
+	vm->swapinfo = NULL;
+	kfree(si->swapfname);
+	kfree(si);
+	return 0;
+err:
+	linux_close(fd);
+	ekprintf("pagein: read error: return(%lx) size(%lx)\n", rs, sz);
+	vm->swapinfo = NULL;
+	kfree(si->swapfname);
+	kfree(si);
+	return -1;
+}
+
+int
+do_pageout(char *fname, void *buf, size_t size, int flag)
+{
+	struct thread		*thread = cpu_local_var(current);
+	struct process_vm	*vm = thread->vm;
+	struct vm_regions	*region = &vm->region;
+	struct vm_range		*range, *next;
+	struct addrpair		*addr;
+	int		i, fd;
+	long		cc;
+	unsigned long	start, end;
+	ssize_t		pos, sz;
+	struct swapinfo *si;
+
+	fd = -1;
+	dkprintf("do_pageout: buf(%p) size(%d) flag(%d) currss(%lx)\n",
+		 buf, size, flag, vm->currss);
+	if (IS_INVALID_USERADDRESS(fname, region)
+	    || IS_INVALID_USERADDRESS(buf, region)
+	    || IS_INVALID_LENGTH(size, region)) {
+		return -EINVAL;
+	}
+	if (!(si = kmalloc(sizeof(struct swapinfo), IHK_MC_AP_NOWAIT))) {
+		ekprintf("do_pageout: Cannot allocate working memory in kmalloc\n");
+		return -ENOMEM;
+	}
+	memset(si, '\0', sizeof(struct swapinfo));
+	cc = myalloc_init(si, buf, size);
+	if (cc < 0) {
+		kfree(si);
+		ekprintf("do_pageout: Cannot pin buf (%p) down\n", buf);
+		return cc;
+	}
+	si->udata_buf = myalloc(si, UDATA_BUFSIZE);
+	si->swapfname = kmalloc(strlen(fname) + 1, IHK_MC_AP_NOWAIT);
+	if (si->swapfname == NULL) {
+		kfree(si);
+		ekprintf("do_pageout: Cannot allocate working memory in kmalloc\n");
+		return -ENOMEM;
+	}
+	if (strcpy_from_user(si->swapfname, fname)) {
+		cc = -EFAULT;
+		goto err;
+	}
+	cc = arealist_init(si);
+	if (cc < 0) {
+		ekprintf("do_pageout: user buffer area is needed more than %d byte\n",
+			 UDATA_BUFSIZE + sizeof(struct areaent)*2);
+		goto err;
+	}
+
+	fd = linux_open(fname, O_RDWR|O_CREAT|O_TRUNC, 0600);
+	if (fd < 0) {
+		ekprintf("do_pageout: Cannot open/create file: %s\n", fname);
+		cc = fd;
+		goto err;
+	}
+	area_print(region);
+
+	/* looking at ranges except for non anoymous, text, and data */
+	list_for_each_entry_safe(range, next, &vm->vm_range_list, list) {
+		if (range->memobj != NULL) continue;
+		if (IS_TEXT(range->start, region)
+		    || IS_STACK(range->start, region)
+		    || IS_INVALID_USERADDRESS(range->start, region)
+		    || IS_READONLY(range->flag)
+		    || IS_NOTUSER(range->flag)) continue;
+		if (range->flag & VR_LOCKED) {
+			/* this range is locked by McKernel */
+			cc = arealist_add(si, range->start, range->end,
+					  range->flag, &si->mlock_area);
+			if (cc < 0) goto nomem;
+			continue;
+		}
+		start = range->start; end = range->end;
+		if ((cc = mlockcntnr_sethead(si)) < 0) goto nomem;
+		/* Requesting mlock list in Linux Kernel. We do not know how much
+		 * addrpair entries are needed. The Linux side stores -1 in
+		 * the last entry of addrpair to inform more entries exist.
+		 * the mlocklist_morereq function checks this condition. */
+		do {
+			if ((cc = arealist_get(si, &addr, &si->mlock_area)) < 0) goto nomem;
+			cc = mlocklist_req(start, end, addr, cc);
+			arealist_update(cc, &si->mlock_area);
+		} while (mlocklist_morereq(si, &start));
+		/* */
+		if (mlockcntnr_isempty(si)) { /* whole range is going to swap */
+			cc = arealist_add(si, range->start, range->end,
+					  range->flag, &si->swap_area);
+		} else { /*  partial range is going to swap */
+			for (start = range->start; start < range->end;) {
+				struct addrpair	laddr;
+				if (mlockcntnr_addrent(si, &laddr) == 0) {
+					/* No more entry locked by Linux */
+					cc = arealist_add(si, start, range->end,
+							  range->flag,
+							  &si->swap_area);
+					if (cc < 0) goto nomem;
+					break;
+				}
+				if (start < laddr.start) {
+					/* swap range from start to laddr.start */
+					cc = arealist_add(si, start, laddr.start,
+							  range->flag,
+							  &si->swap_area);
+					if (cc < 0) goto nomem;
+				}
+				start = laddr.end;
+				kprintf("do_pageout: start(%ld) range->end(%ld)\n",
+					start, range->end);
+				break;
+			}
+		}
+	}
+	arealist_print("SWAP", &si->swap_area, si->swap_area.count);
+	arealist_print("MLOCK", &si->mlock_area, si->mlock_area.count);
+	si->swap_info = myalloc(si, sizeof(struct swap_areainfo)* si->swap_area.count);
+	si->mlock_info =  myalloc(si, sizeof(struct swap_areainfo)* si->mlock_area.count);
+	if (si->swap_info == NULL || si->mlock_info == NULL) goto nomem;
+
+	/* preparing page store */
+	si->swphdr = myalloc(si, sizeof(struct swap_header));
+	strncpy(si->swphdr->magic, MCKERNEL_SWAP, SWAP_HLEN);
+	strncpy(si->swphdr->version, MCKERNEL_SWAP_VERSION, SWAP_HLEN);
+	si->swphdr->count_sarea = si->swap_area.count;
+	si->swphdr->count_marea = si->mlock_area.count;
+	if ((cc = pager_write(fd, si->swphdr, sizeof(struct swap_header)))
+	    != sizeof(struct swap_header)) {
+		if (cc >= 0)
+			cc = -EIO;
+		goto err;
+	}
+	pos = linux_lseek(fd, 0, SEEK_CUR);
+	pos += sizeof(struct swap_areainfo)*(si->swap_area.count+si->mlock_area.count);
+	cc = arealist_preparewrite(&si->swap_area, si->swap_info, pos, vm, 1);
+	if (cc != si->swap_area.count) {
+		ekprintf("do_pageout: ERROR file ent(%d) != list ent(%d) in swap_area\n",
+			 cc, si->swap_area.count);
+	}
+	cc = arealist_preparewrite(&si->mlock_area, si->mlock_info, 0, vm, 0);
+	if (cc != si->mlock_area.count) {
+		ekprintf("do_pageout: ERROR file ent(%d) != list ent(%d) in swap_area\n",
+			 cc, si->mlock_area.count);
+	}
+	/* arealists are stored */
+	if ((cc = arealist_write(fd, si->swap_info, si->swap_area.count)) < 0) goto err;
+	if ((cc = arealist_write(fd, si->mlock_info, si->mlock_area.count)) < 0) goto err;
+	/* now pages are stored */
+	for (i = 0; i < si->swap_area.count; i++) {
+		sz = si->swap_info[i].end - si->swap_info[i].start;
+		if ((cc = pager_write(fd, (void*) si->swap_info[i].start, sz)) != sz) {
+			if (cc >= 0)
+				cc = -EIO;
+			goto err;
+		}
+	}
+	if (flag && 0x04) {
+		kprintf("skipping physical memory removal\n");
+		goto free_exit;
+	}
+	kprintf("removing physical memory\n");
+	for (i = 0; i < si->swap_area.count; i++) {
+		cc = ihk_mc_pt_free_range(vm->address_space->page_table,
+					  vm,
+					  (void*) si->swap_info[i].start,
+					  (void*) si->swap_info[i].end, NULL);
+		if (cc < 0) {
+			kprintf("ihk_mc_pt_clear_range returns: %d\n", cc);
+		}
+	}
+#if 0
+		range->flag |= VR_PAGEOUT;
+#endif
+	cc = linux_close(fd);
+	fd = -1;
+	/*
+	 * Unmapping McKernel's user virtual spaces in Linux side.
+	 * From here to the completion of do_pagein, the nonlocking user spaces
+	 * except TEXT, STACK, readonly pages, are not invalid.
+	 */
+	for (i = 0; i < si->swap_area.count; i++) {
+		sz = si->swap_info[i].end - si->swap_info[i].start;
+		cc = linux_munmap((void*) si->swap_info[i].start, sz, 0);
+		if (cc < 0) {
+			kprintf("do_pageout: Cannot munmap: %lx len(%lx)\n",
+				si->swap_info[i].start, sz);
+		}
+	}
+	cc = 0;
+	vm->swapinfo = si;
+	goto free_exit;
+err:
+	ekprintf("do_pageout: write error: %d\n", cc);
+	goto free_exit;
+nomem:
+	ekprintf("do_pageout: cannot allocate working memory\n");
+	cc = -ENOMEM;
+free_exit:
+	if (fd >= 0)
+		linux_close(fd);
+	dkprintf("do_pageout: done, currss(%lx)\n", vm->currss);
+	arealist_free(&si->mlock_area); arealist_free(&si->swap_area); 
+	if (cc != 0) {
+		pager_unlink(si, si->swapfname);
+		kfree(si->swapfname);
+		kfree(si);
+	}
+	return cc;
+}
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@ -9220,6 +9220,102 @@ SYSCALL_DECLARE(get_system)
 	return 0;
 }

+/*
+ * swapoout(const char *filename, void *workarea, size_t size)
+ */
+SYSCALL_DECLARE(swapout)
+{
+	extern int do_pageout(const char*, void*, size_t, int);
+	extern int do_pagein(int);
+	char	*fname = (char *)ihk_mc_syscall_arg0(ctx);
+	char	*buf = (char *)ihk_mc_syscall_arg1(ctx);
+	size_t	size = (size_t)ihk_mc_syscall_arg2(ctx);
+	int	flag = (int)ihk_mc_syscall_arg3(ctx);
+	ihk_mc_user_context_t ctx0;
+	int	cc;
+
+	dkprintf("[%d]swapout(%lx,%lx,%lx,%ld)\n",
+		 ihk_mc_get_processor_id(), fname, buf, size, flag);
+
+	if (fname == NULL || flag == 0x01) { /* for development purupse */
+		kprintf("swapout: skipping real swap\n");
+		cc = syscall_generic_forwarding(__NR_swapout, &ctx0);
+		kprintf("swapout: return from Linux\n");
+		return cc;
+	}
+	/* pageout */
+	cc = do_pageout(fname, buf, size, flag);
+	if (cc < 0) return cc;
+	if (flag == 0x02) {
+		kprintf("swapout: skipping calling swapout in Linux\n");
+	} else {
+		kprintf("swapout: before calling swapout in Linux\n");
+		cc = syscall_generic_forwarding(__NR_swapout, &ctx0);
+		kprintf("swapout: after calling swapout in Linux cc(%d)\n", cc);
+	}
+	/* Though swapout in Linux side returns error, needs to call
+	 * pagein to recover the image */
+	cc = do_pagein(flag);
+	kprintf("swapout: after calling do_pagein cc(%d)\n", cc);
+	return cc;
+}
+
+SYSCALL_DECLARE(linux_mlock)
+{
+	ihk_mc_user_context_t ctx0;
+	const uintptr_t addr = ihk_mc_syscall_arg0(ctx);
+	const size_t len = ihk_mc_syscall_arg1(ctx);
+	int		cc;
+
+	kprintf("linux_mlock: %p %ld\n", (void*) addr, len);
+	ihk_mc_syscall_arg0(&ctx0) = addr;
+	ihk_mc_syscall_arg1(&ctx0) = len;
+	cc = syscall_generic_forwarding(802, &ctx0);
+	return cc;
+}
+
+SYSCALL_DECLARE(linux_spawn)
+{
+	int rc;
+
+	rc = syscall_generic_forwarding(__NR_linux_spawn, ctx);
+	return rc;
+}
+
+SYSCALL_DECLARE(suspend_threads)
+{
+	struct thread *mythread = cpu_local_var(current);
+	struct thread *thread;
+	struct process *proc = mythread->proc;
+
+	list_for_each_entry(thread, &proc->threads_list, siblings_list) {
+		if (thread == mythread)
+			continue;
+		do_kill(mythread, proc->pid, thread->tid, SIGSTOP, NULL, 0);
+	}
+	list_for_each_entry(thread, &proc->threads_list, siblings_list) {
+		if (thread == mythread)
+			continue;
+		while (thread->status != PS_STOPPED)
+			cpu_pause();
+	}
+	return 0;
+}
+
+SYSCALL_DECLARE(resume_threads)
+{
+	struct thread *mythread = cpu_local_var(current);
+	struct thread *thread;
+	struct process *proc = mythread->proc;
+
+	list_for_each_entry(thread, &proc->threads_list, siblings_list) {
+		if (thread == mythread)
+			continue;
+		do_kill(mythread, proc->pid, thread->tid, SIGCONT, NULL, 0);
+	}
+	return 0;
+}
+
 void
 reset_cputime()
 {
--- a/test/qlmpi/dump-pages.c
+++ b/test/qlmpi/dump-pages.c
@ -0,0 +1,127 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "swapfmt.h"
+
+struct swap_header	header;
+struct swap_areainfo	*meminfo, *lckinfo;
+
+void
+show(unsigned *data, int cnt)
+{
+    printf("\t");
+    while (--cnt) {
+	printf("%08lx ", *data++);
+    }
+    printf("\n");
+}
+
+unsigned long
+convhex(char *cp)
+{
+    unsigned long	val = 0;
+
+    while (*cp != '\n' && *cp != 0) {
+	if (isdigit(*cp)) {
+	    val = (val<<4) + *cp - '0';
+	} else if (isupper(*cp) && isxdigit(*cp)) {
+	    val = (val<<4) + *cp - 'A' + 10;
+	} else if (isxdigit(*cp)) {
+	    val = (val<<4) + *cp - 'a' + 10;
+	} else {
+	    break;
+	}
+	cp++;
+    }
+    return val;
+}
+
+ssize_t
+findpos(unsigned long addr)
+{
+    int		i;
+    ssize_t	pos = 0;
+    for (i = 0; i < header.count_sarea; i++) {
+	if (addr >= meminfo[i].start && addr < meminfo[i].end) {
+	    pos = meminfo[i].pos;
+	    pos += addr - meminfo[i].start;
+	}
+    }
+    return pos;
+}
+
+int
+main(int argc, char **argv)
+{
+    FILE	*fp;
+    char	*fname, *cp;
+    int		interractive = 0;
+    int		i;
+
+    if (argc >= 2) {
+	fname = argv[1];
+	if (argc >= 3) interractive = 1;
+    } else {
+	fname = "/tmp/pages";
+    }
+    if ((fp = fopen(fname, "r")) == 0) {
+	fprintf(stderr, "Cannot open file: %s\n", fname);
+	exit(-1);
+    }
+    fread(&header, sizeof(header), 1, fp);
+    printf("magic           : %s\n", header.magic);
+    printf("version         : %d\n", header.version);
+    printf("swap area count : %d\n", header.count_sarea);
+    printf("mlock area count: %d\n", header.count_marea);
+    printf("SWAP:\n");
+    printf("\t    start               end          : file position (flags)\n");
+    meminfo = malloc(sizeof(struct swap_areainfo)* header.count_sarea);
+    lckinfo = malloc(sizeof(struct swap_areainfo)* header.count_marea);
+    fread(meminfo, sizeof(struct swap_areainfo), header.count_sarea, fp);
+    fread(lckinfo, sizeof(struct swap_areainfo), header.count_marea, fp);
+
+    for (i = 0; i < header.count_sarea; i++) {
+	printf("\t%016lx -- %016lx : %010lx (%lx)\n",
+	       meminfo[i].start, meminfo[i].end, meminfo[i].pos, meminfo[i].flag);
+    }
+    printf("MLOCK:\n");
+    printf("\t    start               end          : physical address (flags)\n");
+    for (i = 0; i < header.count_marea; i++) {
+	printf("\t%016lx -- %016lx : %010lx (%lx)\n",
+	       lckinfo[i].start, lckinfo[i].end, lckinfo[i].pos, lckinfo[i].flag);
+    }
+
+    if (!interractive) goto ending;
+    do {
+	char	buf1[128], buf2[128], data[8*8 + 1];
+	char	cmd;;
+	ssize_t	sz;
+	int	cc;
+	unsigned long	addr;
+	ssize_t		fpos;
+
+	fprintf(stdout, "> "); fflush(stdout);
+	cp = fgets(buf1, 128, stdin);
+	if (cp == NULL) break;
+	cc = sscanf(buf1, "%c %s", &cmd, buf2);
+	if (cc != 2) continue;
+	addr = convhex(buf2);
+	fpos = findpos(addr);
+	if (fpos == 0) continue;
+	printf("%lx (fpos(%lx)):\n", addr, fpos);
+	fseek(fp, fpos, SEEK_SET);
+	if ((sz = fread(&data, 8*8, 1, fp)) != 1) goto err;
+	if (cmd == 's') {
+	    data[8*8] = 0;
+	    printf("\t%s", data);
+	} else {
+	    show((unsigned*) data, 8);
+	}
+    } while (cp != NULL);
+err:
+ending:
+    fclose(fp);
+    return 0;
+}
--- a/test/qlmpi/qlmpi_sample.c
+++ b/test/qlmpi/qlmpi_sample.c
@ -0,0 +1,78 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define BUF_SIZE (16*1024)
+
+#include <qlmpilib.h>
+
+int data[1024*1024];
+char sym2[1024*1024] = { 10, 20, 30, 0 };
+char *sym1 = "aaaaaa";
+char buffer[BUF_SIZE];
+char *ptr1, *ptr2;
+
+int
+swapout(char *fname, void *buf, size_t sz, int flag)
+{
+	int rc;
+	rc = syscall(801, fname, buf, sz, flag);
+	return rc;
+}
+int
+linux_mlock(const void *addr, size_t len)
+{
+	int rc;
+	rc = syscall(802, addr, len);
+	return rc;
+}
+
+int
+main(int argc, char **argv)
+{
+	int rc;
+	int i;
+
+	MPI_Init(&argc, &argv);
+
+ql_loop:
+	printf("***** Arguments Info ****************\n");
+	printf(" argc: %d\n", argc);
+	for (i = 0; i < argc; i++) {
+		printf(" argv[%d]: %s\n", i, argv[i]);
+	}
+	printf("QL_SUCCESS:%d\n", QL_SUCCESS);
+	printf("************************************\n\n");
+
+	printf("&data = %p\n", data);
+	printf("&sym1 = %p\n", &sym1);
+	printf("&sym2 = %p\n", sym2);
+	printf("&rc = %p\n", &rc);
+	ptr1 = malloc(1024);
+	ptr2 = malloc(1024*1024);
+	printf("ptr1 = %p\n", ptr1);
+	printf("ptr1 = %p\n", ptr2);
+	/*
+	 * testing mlock in mckernel side
+	 */
+	rc = mlock(data, 16*1024);
+	printf("McKernel mlock returns: %d\n", rc);
+	/*
+	 * testing mlock in linux side
+	 */
+	sprintf((char*) data, "hello\n");
+	rc = linux_mlock(data, 16*1024);
+	printf("linux_mlock returns: %d\n", rc);
+
+	rc = ql_client(&argc, &argv);
+
+	printf("ql_client returns: %d\n", rc);
+	if (rc == QL_CONTINUE) {
+		goto ql_loop;
+	}
+
+	MPI_Finalize();
+	printf("qlmpi_sample finished!!\n");
+	return 0;
+}
--- a/test/qlmpi/qlmpi_testsuite/CT20.f
+++ b/test/qlmpi/qlmpi_testsuite/CT20.f
@ -0,0 +1,73 @@
+c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!!
+      include 'mpif.h'
+      integer dsize
+      parameter(dsize=536870912)
+      character val*10
+      integer ival
+      integer ierr
+      integer i
+      integer*4 dat(dsize)
+      common dat
+      integer rank
+      integer size
+      integer st(MPI_STATUS_SIZE)
+
+      call MPI_INIT(ierr)
+ 1000 continue
+      call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
+      call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
+
+c     size check
+      if(size.ne.2)then
+        if(rank.eq.0)then
+          print*,'bad MPI size'
+        endif
+        call MPI_FINALIZE(ierr)
+        stop 1
+      endif
+
+c     read argument
+      iargs = iargc()
+      if(iargs.ne.1)then
+        print *,'bad argument'
+        call MPI_FINALIZE(ierr)
+        stop 1
+      endif
+      call getarg(1, val)
+      read(val, '(i10)')ival
+      print *,'val=',ival
+
+c     test
+      if(rank.eq.0)then
+        do 10 i=1, dsize
+          dat(i) = -1
+   10   continue
+      print *,'r1 val=',ival
+        call MPI_RECV(dat, dsize, MPI_INTEGER4, 1, 0, MPI_COMM_WORLD,
+     c                st, ierr)
+      print *,'r2 val=',ival
+        do 20 i=1, dsize
+          if(dat(i).ne.ival)then
+            print *,'*** bad value idx=',i,', dat=',dat(i),
+     c              ' , val=',ival
+            goto 100
+          endif
+   20   continue
+        print *,'*** MPI_Send/Recv OK *** '
+  100   continue
+      else
+        do 30 i=1, dsize
+          dat(i) = ival
+   30   continue
+        call MPI_SEND(dat, dsize, MPI_INTEGER4, 0, 0, MPI_COMM_WORLD,
+     c                ierr)
+      endif
+
+c     repeat?
+      call ql_client(ierr)
+      if(ierr.eq.1)then
+        print *,'repeat'
+        goto 1000
+      endif
+      call MPI_FINALIZE(ierr)
+      end
--- a/test/qlmpi/qlmpi_testsuite/CT20.sh
+++ b/test/qlmpi/qlmpi_testsuite/CT20.sh
@ -0,0 +1,27 @@
+#!/bin/sh
+PPOSDIR=/home/satoken/ppos
+export PATH=$(PPOSDIR)/bin:$PATH
+echo CT20001 device mapping program test START
+echo CT20002 program 1 START
+echo CT20003 check '"MPI_Send/Recv OK"'
+ql_mpiexec_start -machinefile hostfile20 ./CT20a 1
+echo CT20004 program 1 suspend
+echo CT20005 program 2 START
+echo CT20006 check '"MPI_Send/Recv OK"'
+ql_mpiexec_start -machinefile hostfile20 ./CT20b 2
+echo CT20007 program 2 suspend
+echo CT20008 program 1 resume
+echo CT20009 check '"MPI_Send/Recv OK"'
+ql_mpiexec_start -machinefile hostfile20 ./CT20a 3
+echo CT20010 program 1 suspend
+echo CT20011 program 2 resume
+echo CT20012 check '"MPI_Send/Recv OK"'
+ql_mpiexec_start -machinefile hostfile20 ./CT20b 4
+echo CT20013 program 2 suspend
+echo CT20014 program 1 resume
+ql_mpiexec_finalize -machinefile hostfile20 ./CT20a
+echo CT20015 program 1 END
+echo CT20016 program 2 resume
+ql_mpiexec_finalize -machinefile hostfile20 ./CT20b
+echo CT20017 program 2 END
+echo CT20018 device mapping program test END
--- a/test/qlmpi/qlmpi_testsuite/CT21.f
+++ b/test/qlmpi/qlmpi_testsuite/CT21.f
@ -0,0 +1,62 @@
+c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!!
+      include 'mpif.h'
+      integer size
+      parameter(size=536870912)
+      character file*10
+      character val*10
+      integer ival
+      integer ierr
+      integer i
+      integer*4 dat(size)
+      common dat
+      character myname*10
+
+      call getarg(0, myname)
+      call MPI_INIT(ierr)
+ 1000 continue
+      iargs = iargc()
+      if(iargs.ne.2)then
+        print *,'bad argument'
+        call MPI_FINALIZE(ierr)
+        stop 1
+      endif
+      call getarg(1, file)
+      call getarg(2, val)
+      read(val, '(i10)')ival
+      print *,' file=',file,', val=',ival
+      open(1, file=file, status='old', form='unformatted',
+     c     access='stream', err=999)
+      do 10 i=1, size
+        dat(i) = -1
+   10 continue
+      read(1, err=998)(dat(i), i=1, size)
+      do 20 i=1, size
+        if(dat(i).ne.ival)then
+          print *,'*** FAIL *** BAD VALUE idx=',i,', val=',dat(i)
+          goto 100
+        endif
+   20 continue
+      print *,' *** data read OK ***'
+  100 continue
+      close(1)
+      call ql_client(ierr)
+      if(ierr.eq.1)then
+        print *,'resume'
+        goto 1000
+      endif
+      call MPI_FINALIZE(ierr)
+      stop 0
+
+  998 continue
+      close(1)
+      print *,'read error'
+      goto 9999
+
+  999 continue
+      print *,'open error'
+      goto 9999
+
+ 9999 continue
+      call MPI_FINALIZE(ierr)
+      stop 1
+      end
--- a/test/qlmpi/qlmpi_testsuite/CT21.sh
+++ b/test/qlmpi/qlmpi_testsuite/CT21.sh
@ -0,0 +1,27 @@
+#!/bin/sh
+export PPOSDIR=/home/satoken/ppos
+export PATH=$PPOSDIR/bin:$PATH
+echo CT21001 mcexec page table update test START
+echo CT21002 program 1 START
+echo CT21003 check '"data read OK"'
+ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21a file1 1
+echo CT21004 program 1 suspend
+echo CT21005 program 2 START
+echo CT21006 check '"data read OK"'
+ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21b file1 1
+echo CT21007 program 2 suspend
+echo CT21008 program 1 resume
+echo CT21009 check '"data read OK"'
+ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21a file2 2
+echo CT21010 program 1 suspend
+echo CT21011 program 2 resume
+echo CT21012 check '"data read OK"'
+ql_mpiexec_start -machinefile hostfile21 -n 1 ./CT21b file2 2
+echo CT21013 program 2 suspend
+echo CT21014 program 1 resume
+ql_mpiexec_finalize -machinefile hostfile21 -n 1 ./CT21a
+echo CT21015 program 1 END
+echo CT21016 program 2 resume
+ql_mpiexec_finalize -machinefile hostfile21 -n 1 ./CT21b
+echo CT21017 program 2 END
+echo CT21018 mcexec page table update test END
--- a/test/qlmpi/qlmpi_testsuite/CT22.f
+++ b/test/qlmpi/qlmpi_testsuite/CT22.f
@ -0,0 +1,27 @@
+c---+c---1----+----2----+----3----+----4----+----5----+----6----+----7--!!!!!!!!
+!$ use omp_lib
+      include 'mpif.h'
+      integer rank
+      integer size
+      external omp_get_thread_num
+      external omp_get_num_threads
+      integer omp_get_thread_num
+      integer omp_get_num_threads
+
+      call MPI_INIT(ierr)
+ 1000 continue
+      call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
+      call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
+
+!$omp parallel
+      print '(1h ,4hmpi=,i2,1h/,i2,6h, omp=,i2,1h/,i2)',
+     c      rank, size, omp_get_thread_num(), omp_get_num_threads()
+!$omp end parallel
+c     repeat?
+      call ql_client(ierr)
+      if(ierr.eq.1)then
+        print *,'repeat'
+        goto 1000
+      endif
+      call MPI_FINALIZE(ierr)
+      end
--- a/test/qlmpi/qlmpi_testsuite/CT22.sh
+++ b/test/qlmpi/qlmpi_testsuite/CT22.sh
@ -0,0 +1,27 @@
+#!/bin/sh
+PPOSDIR=/home/satoken/ppos
+export PATH=$(PPOSDIR)/bin:$PATH
+echo CT22001 OMP test START
+echo CT22002 program 1 START
+echo CT22003 check rank info
+ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a
+echo CT22004 program 1 suspend
+echo CT22005 program 2 START
+echo CT22006 check rank info
+ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b
+echo CT22007 program 2 suspend
+echo CT22008 program 1 resume
+echo CT22009 check rank info
+ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a
+echo CT22010 program 1 suspend
+echo CT22011 program 2 resume
+echo CT22012 check rank info
+ql_mpiexec_start -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b
+echo CT22013 program 2 suspend
+echo CT22014 program 1 resume
+ql_mpiexec_finalize -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22a
+echo CT22015 program 1 END
+echo CT22016 program 2 resume
+ql_mpiexec_finalize -genv OMP_NUM_THREADS 4 -machinefile hostfile22 ./CT22b
+echo CT22017 program 2 END
+echo CT22018 OMP test END
--- a/test/qlmpi/qlmpi_testsuite/Makefile
+++ b/test/qlmpi/qlmpi_testsuite/Makefile
@ -0,0 +1,57 @@
+PPOSDIR=/home/satoken/ppos
+MPIDIR=/usr/lib64/mpich-3.2
+MPIBINDIR=$(MPIDIR)/bin
+MPILIBDIR=$(MPIDIR)/lib
+CC=gcc
+
+MPIF90=$(MPIBINDIR)/mpif90
+MPICC=$(MPIBINDIR)/mpicc
+LIBDIR=$(PPOSDIR)/lib
+LDFLAGS=-L$(LIBDIR) -lqlmpi -Wl,-rpath=$(LIBDIR) -Wl,-rpath,$(MPILIBDIR)
+CFLAGS= -I$(PPOSDIR)/include
+
+TARGETS= usr_prg_A usr_prg_B usr_prg_C usr_prg_irreg CT20a CT20b CT21a CT21b file1 file2 CT22a CT22b
+
+all:: $(TARGETS)
+
+usr_prg_A: usr_prg_A.c
+	$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
+
+usr_prg_B: usr_prg_B.c
+	$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
+
+usr_prg_C: usr_prg_C.c
+	$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
+
+usr_prg_irreg: usr_prg_irreg.c
+	$(MPICC) $(CFLAGS) $(LDFLAGS) -lqlmpi -o $@ $@.c
+
+CT20a: CT20.f
+	$(MPIF90) -o $@ $< $(LDFLAGS)
+
+CT20b: CT20.f
+	$(MPIF90) -o $@ $< $(LDFLAGS)
+
+CT21a: CT21.f
+	$(MPIF90) -o $@ $< $(LDFLAGS)
+
+CT21b: CT21.f
+	$(MPIF90) -o $@ $< $(LDFLAGS)
+
+CT22a: CT22.f
+	$(MPIF90) -O -fopenmp -o $@ $< $(LDFLAGS)
+
+CT22b: CT22.f
+	$(MPIF90) -O -fopenmp -o $@ $< $(LDFLAGS)
+
+file1: gendata
+	./gendata 1 536870912 > $@
+
+file2: gendata
+	./gendata 2 536870912 > $@
+
+gendata: gendata.c
+	$(CC) -o $@ $<
+
+clean::
+	rm -f $(TARGETS) gendata
--- a/test/qlmpi/qlmpi_testsuite/config
+++ b/test/qlmpi/qlmpi_testsuite/config
@ -0,0 +1,9 @@
+MCMOD_DIR=/home/satoken/ppos
+
+START=${MCMOD_DIR}/bin/ql_mpiexec_start
+FINALIZE=${MCMOD_DIR}/bin/ql_mpiexec_finalize
+
+USR_PRG_A=./usr_prg_A
+USR_PRG_B=./usr_prg_B
+USR_PRG_C=./usr_prg_C
+USR_PRG_IRREG=./usr_prg_irreg
--- a/test/qlmpi/qlmpi_testsuite/gendata.c
+++ b/test/qlmpi/qlmpi_testsuite/gendata.c
@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+int
+main(int argc, char **argv)
+{
+	int data;
+	long count;
+	long i;
+	int *buf;
+	int r;
+	char *p;
+
+	if (argc != 3) {
+		fprintf(stderr, "BAD argument\n");
+		exit(1);
+	}
+	data = atoi(argv[1]);
+	count = atol(argv[2]);
+
+	fprintf(stderr, "data=%d count=%ld\n", data, count);
+	buf = malloc(sizeof(int) * count);
+	for (i = 0; i < count; i++)
+		buf[i] = data;
+
+	for (r = sizeof(int) * count, p = (char *)buf; r;) {
+		int rc = write(1, p, r);
+		if (rc == -EINTR)
+			continue;
+		if (rc <= 0) {
+			fprintf(stderr, "write error: %d", errno);
+			exit(1);
+		}
+		r -= rc;
+		p += rc;
+	}
+	close(1);
+	exit(0);
+}
--- a/test/qlmpi/qlmpi_testsuite/go_ql_test.sh
+++ b/test/qlmpi/qlmpi_testsuite/go_ql_test.sh
@ -0,0 +1,10 @@
+#!/bin/sh
+
+for test_param in `ls -1 ./test_cases/CT*.txt`
+do
+	source ${test_param}
+	./ql_normal.sh ${test_param} 2>&1 | tee ./result/${TEST_PREFIX}.log
+done
+
+./ql_irreg.sh ./test_cases/ECT91.txt
+
--- a/test/qlmpi/qlmpi_testsuite/hostfile20
+++ b/test/qlmpi/qlmpi_testsuite/hostfile20
@ -0,0 +1,2 @@
+wallaby14:1
+wallaby15:1
--- a/test/qlmpi/qlmpi_testsuite/hostfile21
+++ b/test/qlmpi/qlmpi_testsuite/hostfile21
@ -0,0 +1 @@
+wallaby14
--- a/test/qlmpi/qlmpi_testsuite/hostfile22
+++ b/test/qlmpi/qlmpi_testsuite/hostfile22
@ -0,0 +1,2 @@
+wallaby14:1
+wallaby15:1
--- a/test/qlmpi/qlmpi_testsuite/ql_irreg.sh
+++ b/test/qlmpi/qlmpi_testsuite/ql_irreg.sh
@ -0,0 +1,210 @@
+#!/bin/sh
+
+# Functions
+function ok_out() {
+	echo "[OK] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
+	(( TEST_NUM++ ))
+	TEST_CODE=`printf %03d ${TEST_NUM}`
+}
+
+function ng_out() {
+	echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
+	exit 1
+}
+
+function ng_out_cont {
+	echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
+	(( TEST_NUM++ ))
+	TEST_CODE=`printf %03d ${TEST_NUM}`
+}
+
+if [ $# -lt 1 ]; then
+	echo "too few arguments."
+	echo "usage: `basename $0` <param_file>"
+fi
+
+TEST_PARAM_FILE=$1
+TEST_NUM=1
+TEST_CODE=001
+
+ME=`whoami`
+
+# read config
+source ./config
+
+# read test param
+source ${TEST_PARAM_FILE}
+
+# make machinefile
+mkdir ./machinefiles &> /dev/null
+MFILE=./machinefiles/mfile_${TEST_PREFIX}
+echo ${MASTER}:${PROC_PER_NODE} > ${MFILE}
+for slave in ${SLAVE}
+do
+	echo ${slave}:${PROC_PER_NODE} >> ${MFILE}
+done
+
+PROC_NUM=`expr ${PROC_PER_NODE} \* ${MPI_NODE_NUM}`
+
+# read machinefile
+declare -a node_arry
+while read line
+do
+	node_arry+=(${line%:*})
+done < ${MFILE}
+MASTER=${node_arry[0]}
+
+# make result directory
+RESULT_DIR=./result/${TEST_PREFIX}
+mkdir -p ${RESULT_DIR}
+
+RANK_MAX=`expr ${PROC_NUM} - 1`
+
+# Log files
+start_1st_A_log=${RESULT_DIR}/exec_1st_A.log
+start_1st_B_log=${RESULT_DIR}/exec_1st_B.log
+start_1st_C_log=${RESULT_DIR}/exec_1st_C.log
+
+start_2nd_A_log=${RESULT_DIR}/exec_2nd_A.log
+start_2nd_B_log=${RESULT_DIR}/exec_2nd_B.log
+start_2nd_C_log=${RESULT_DIR}/exec_2nd_C.log
+
+finalize_A_log=${RESULT_DIR}/finalize_A.log
+finalize_B_log=${RESULT_DIR}/finalize_B.log
+finalize_C_log=${RESULT_DIR}/finalize_C.log
+
+# Arguments
+args_1st_A="1234 hoge 02hoge"
+args_2nd_A="foo 99bar test"
+
+# Env
+envs_1st_A="1st_exec_A"
+envs_2nd_A="This_is_2nd_exec_A"
+
+BK_PATH=${PATH}
+
+### テスト開始時点でql_serverとテスト用MPIプログラムが各ノードで実行されていない
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -c 'ql_(server|talker)'"`
+	if [ ${cnt} -gt 0 ]; then
+		ng_out "ql_server is running on ${node}"
+	fi
+
+	cnt=`ssh $node "pgrep -u ${ME} -c 'mpiexec'"`
+	if [ ${cnt} -gt 0 ]; then
+		ng_out "other MPI program is running on ${node}"
+	fi
+done
+
+### machinefile is not specified
+env QL_TEST=${envs_1st_A} ${START} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "machinefile is not specified, but ql_mpiexec_start returns 0"
+else
+	ok_out "machinefile is not specified, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+
+### MPI program is not specified
+env QL_TEST=${envs_1st_A} ${START} -n ${PROC_NUM} > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "MPI program is not specified, but ql_mpiexec_start returns 0"
+else
+	ok_out "MPI program is not specified, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+
+### specified machinefile does not exist
+env QL_TEST=${envs_1st_A} ${START} -machinefile dose_not_exist -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "specified machinefile does not exist, but ql_mpiexec_start returns 0"
+else
+	ok_out "specified machinefile does not exist, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+
+### specified MPI program does not exist
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} dose_not_exist ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "specified MPI program does not exist, but ql_mpiexec_start returns 0"
+else
+	ok_out "specified MPI program does not exist, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+
+### mpiexec is not found
+PATH="/usr/bin"
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "mpiexec is not found, but ql_mpiexec_start returns 0"
+else
+	ok_out "mpiexec is not found, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+PATH=${BK_PATH}
+
+### mpiexec abort
+PATH="./util:/usr/bin"
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "mpiexec abort, but ql_mpiexec_start returns 0"
+else
+	ok_out "mpiexec abort, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+PATH=${BK_PATH}
+
+### machinefile is not specified
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+
+${FINALIZE} -n ${PROC_NUM} ${USR_PRG_A} >> ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "machinefile is not specified, but ql_mpiexec_finalize returns 0"
+else
+	ok_out "machinefile is not specified, so ql_mpiexec_finalize returns not 0. returns ${rc}"
+fi
+
+### MPI program is not specified
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} >> ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "MPI program is not specified, but ql_mpiexec_finalize returns 0"
+else
+	ok_out "MPI program is not specified, so ql_mpiexec_finalize returns not 0. returns ${rc}"
+fi
+
+### specified machinefile is wrong
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+${FINALIZE} -machinefile ./util/wrong_mfile -n ${PROC_NUM} ${USR_PRG_A} >> ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "specified machinefile is wrong, but ql_mpiexec_finalize returns 0"
+else
+	ok_out "specified machinefile is wrong, so ql_mpiexec_finalize returns not 0. returns ${rc}"
+fi
+
+### specified MPI program name is wrong
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${RESULT_DIR}/${TEST_CODE}.log
+${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} >> ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "specified MPI program name is wrong, but ql_mpiexec_finalize returns 0"
+else
+	ok_out "specified MPI program name is wrong, so ql_mpiexec_finalize returns not 0. returns ${rc}"
+fi
+
+${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} > /dev/null
+
+### one of MPI process aborts
+abort_rank=`expr ${PROC_NUM} - 1`
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_IRREG} 0 > ${RESULT_DIR}/${TEST_CODE}.log
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ng_out_cont "one of MPI processes aborts, but ql_mpiexec_start returns  0"
+else
+	ok_out "one of MPI processes aborts, so ql_mpiexec_start returns not 0. returns ${rc}"
+fi
+
--- a/test/qlmpi/qlmpi_testsuite/ql_normal.sh
+++ b/test/qlmpi/qlmpi_testsuite/ql_normal.sh
@ -0,0 +1,348 @@
+#!/bin/sh
+
+# Functions
+function ok_out() {
+	echo "[OK] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
+	(( TEST_NUM++ ))
+	TEST_CODE=`printf %03d ${TEST_NUM}`
+}
+
+function ng_out() {
+	echo "[NG] ${TEST_PREFIX}`printf %03d ${TEST_NUM}` $1"
+	exit 1
+}
+
+if [ $# -lt 1 ]; then
+	echo "too few arguments."
+	echo "usage: `basename $0` <param_file>"
+fi
+
+TEST_PARAM_FILE=$1
+TEST_NUM=1
+TEST_CODE=001
+
+ME=`whoami`
+
+# read config
+source ./config
+
+# read test param
+source ${TEST_PARAM_FILE}
+
+# make machinefile
+mkdir ./machinefiles &> /dev/null
+MFILE=./machinefiles/mfile_${TEST_PREFIX}
+echo ${MASTER}:${PROC_PER_NODE} > ${MFILE}
+for slave in ${SLAVE}
+do
+	echo ${slave}:${PROC_PER_NODE} >> ${MFILE}
+done
+
+PROC_NUM=`expr ${PROC_PER_NODE} \* ${MPI_NODE_NUM}`
+
+# read machinefile
+declare -a node_arry
+while read line
+do
+	node_arry+=(${line%:*})
+done < ${MFILE}
+MASTER=${node_arry[0]}
+
+# make result directory
+RESULT_DIR=./result/${TEST_PREFIX}
+mkdir -p ${RESULT_DIR}
+
+RANK_MAX=`expr ${PROC_NUM} - 1`
+
+# Log files
+start_1st_A_log=${RESULT_DIR}/exec_1st_A.log
+start_1st_B_log=${RESULT_DIR}/exec_1st_B.log
+start_1st_C_log=${RESULT_DIR}/exec_1st_C.log
+
+start_2nd_A_log=${RESULT_DIR}/exec_2nd_A.log
+start_2nd_B_log=${RESULT_DIR}/exec_2nd_B.log
+start_2nd_C_log=${RESULT_DIR}/exec_2nd_C.log
+
+finalize_A_log=${RESULT_DIR}/finalize_A.log
+finalize_B_log=${RESULT_DIR}/finalize_B.log
+finalize_C_log=${RESULT_DIR}/finalize_C.log
+
+# Arguments
+args_1st_A="1234 hoge 02hoge"
+args_2nd_A="foo 99bar test"
+
+# Env
+envs_1st_A="1st_exec_A"
+envs_2nd_A="This_is_2nd_exec_A"
+
+### テスト開始時点でql_serverとテスト用MPIプログラムが各ノードで実行されていない
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -c 'ql_(server|talker)'"`
+	if [ ${cnt} -gt 0 ]; then
+		ng_out "ql_server is running on ${node}"
+	fi
+
+	cnt=`ssh $node "pgrep -u ${ME} -c 'mpiexec'"`
+	if [ ${cnt} -gt 0 ]; then
+		ng_out "other MPI program is running on ${node}"
+	fi
+done
+ok_out "ql_server and  usr_prgs are not running on each node"
+
+### usr_prg_A を実行するql_mpiexec_start の返り値が0 (成功)
+env QL_TEST=${envs_1st_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_1st_A} > ${start_1st_A_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "ql_mpiexec_start usr_prg_A (first exec) returns 0"
+else
+	ng_out "ql_mpiexec_start usr_prg_A (first exec) returns ${rc}"
+fi
+
+### 初回実行後、マスターノード上でql_serverが動作している
+cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"`
+if [ ${cnt} -ne 1 ]; then
+	ng_out "ql_server is not running on master node"
+else
+	ok_out "ql_server is running on master node"
+fi
+
+### 各ノードのusr_prg_A の引数が実行時に指定したものと一致している
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "argv="`
+	tgt=${line#*argv=}
+	if [ "X${tgt}" != "X${USR_PRG_A} ${args_1st_A}" ]; then
+		ng_out "usr_prg_A's args is incorrect on rank:${rank}\n ${line}"
+	fi
+done
+ok_out "usr_prg_A's args are correct on each node"
+
+### 各ノードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "QL_TEST="`
+	tgt=${line#*QL_TEST=}
+	if [ "X${tgt}" != "X${envs_1st_A}" ]; then
+		ng_out "usr_prg_A's env (QL_TEST) is incorrect on each node:${rank}\n ${line}"
+	fi
+done
+ok_out "usr_prg_A's env (QL_TEST) is correct on each node"
+
+### 各ノードのusr_prg_A の計算処理が完了
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_1st_A_log} | grep -e "done="`
+	tgt=${line#*done=}
+	if [ "X${tgt}" != "Xyes" ]; then
+		ng_out "usr_prg_A's calculation is not done on rank:${rank}"
+	fi
+done
+ok_out "usr_prg_A's calculation is done on each node"
+
+### ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_A'" | grep " exe" | wc -l`
+	if [ ${cnt} -eq 0 ]; then
+		ng_out "usr_prg_A is not running on ${node}"
+	else
+		echo "  ${cnt} programs is waiting on ${node}"
+	fi
+done
+ok_out "usr_prg_A is waiting for resume-req on each node"
+
+### usr_prg_B を実行するql_mpiexec_start の返り値が0 (成功)
+${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} 1 2 3 > ${start_1st_B_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "ql_mpiexec_start usr_prg_B (first exec) returns 0"
+else
+	ng_out "ql_mpiexec_start usr_prg_B (first exec) returns ${rc}"
+fi
+
+### 各ノードのusr_prg_B の計算処理が完了
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_1st_B_log} | grep -e "done="`
+	tgt=${line#*done=}
+	if [ "X${tgt}" != "Xyes" ]; then
+		ng_out "usr_prg_B's calculation is not done on rank:${rank}"
+	fi
+done
+ok_out "usr_prg_B's calculation is done on each node"
+
+### ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_B'" | grep " exe" | wc -l`
+	if [ ${cnt} -eq 0 ]; then
+		ng_out "usr_prg_B is not running on ${node}"
+	else
+		echo "  ${cnt} programs is waiting on ${node}"
+	fi
+done
+ok_out "usr_prg_B is waiting for resume-req on each node"
+
+### usr_prg_C を実行するql_mpiexec_start の返り値が0 (成功)
+${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_C} a b c > ${start_1st_C_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "ql_mpiexec_start usr_prg_C (first exec) returns 0"
+else
+	ng_out "ql_mpiexec_start usr_prg_C (first exec) returns ${rc}"
+fi
+
+### 各ノードのusr_prg_C の計算処理が完了
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_1st_C_log} | grep -e "done="`
+	tgt=${line#*done=}
+	if [ "X${tgt}" != "Xyes" ]; then
+		ng_out "usr_prg_C's calculation is not done on rank:${rank}"
+	fi
+done
+ok_out "usr_prg_C's calculation is done on each node"
+
+### ql_mpiexec_start の完了後、usr_prg_C が再開指示待ちになっている
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_C'" | grep " exe" | wc -l`
+	if [ ${cnt} -eq 0 ]; then
+		ng_out "usr_prg_C is not running on ${node}"
+	else
+		echo "  ${cnt} programs is waiting on ${node}"
+	fi
+done
+ok_out "usr_prg_C is waiting for resume-req on each node"
+
+### usr_prg_A を再実行するql_mpiexec_start の返り値が0 (成功)
+env QL_TEST=${envs_2nd_A} ${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} ${args_2nd_A} > ${start_2nd_A_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "(again) ql_mpiexec_start usr_prg_A returns 0"
+else
+	ng_out "(again) ql_mpiexec_start usr_prg_A returns ${rc}"
+fi
+
+### 各ノードのusr_prg_A の引数が再実行時に指定したものと一致している
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "argv="`
+	tgt=${line#*argv=}
+	if [ "X${tgt}" != "X${USR_PRG_A} ${args_2nd_A}" ]; then
+		ng_out "usr_prg_A's args is incorrect on rank:${rank}\n ${line}"
+	fi
+done
+ok_out "(again) usr_prg_A's args are correct on each node"
+
+### 各ノードのusr_prg_A テスト用に指定した環境変数が再実行時に指定したものと一致している
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "QL_TEST="`
+	tgt=${line#*QL_TEST=}
+	if [ "X${tgt}" != "X${envs_2nd_A}" ]; then
+		ng_out "usr_prg_A's env (QL_TEST) is incorrect on each node:${rank}\n ${line}"
+	fi
+done
+ok_out "(again) usr_prg_A's env (QL_TEST) is correct on each node"
+
+### 各ノードのusr_prg_A の計算処理が完了
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_2nd_A_log} | grep -e "done="`
+	tgt=${line#*done=}
+	if [ "X${tgt}" != "Xyes" ]; then
+		ng_out "usr_prg_A's calculation is not done on rank:${rank}"
+	fi
+done
+ok_out "(again) usr_prg_A's calculation is done on each node"
+
+### ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_A'" | grep " exe" | wc -l`
+	if [ ${cnt} -eq 0 ]; then
+		ng_out "usr_prg_A is not running on ${node}"
+	else
+		echo "  ${cnt} programs is waiting on ${node}"
+	fi
+done
+ok_out "(again) usr_prg_A is waiting for resume-req on each node"
+
+### usr_prg_B を再実行するql_mpiexec_start の返り値が0 (成功)
+${START} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} 10 20 30 40 > ${start_2nd_B_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "(again) ql_mpiexec_start usr_prg_B returns 0"
+else
+	ng_out "(again) ql_mpiexec_start usr_prg_B returns ${rc}"
+fi
+
+### 各ノードのusr_prg_B の計算処理が完了
+for rank in `seq 0 ${RANK_MAX}`
+do
+	line=`grep -e "^${rank}:" ${start_2nd_B_log} | grep -e "done="`
+	tgt=${line#*done=}
+	if [ "X${tgt}" != "Xyes" ]; then
+		ng_out "usr_prg_B's calculation is not done on rank:${rank}"
+	fi
+done
+ok_out "(again) usr_prg_B's calculation is done on each node"
+
+### ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
+for node in ${node_arry[@]}
+do
+	cnt=`ssh $node "pgrep -u ${ME} -fl 'usr_prg_B'" | grep " exe" | wc -l`
+	if [ ${cnt} -eq 0 ]; then
+		ng_out "usr_prg_B is not running on ${node}"
+	else
+		echo "  ${cnt} programs is waiting on ${node}"
+	fi
+done
+ok_out "(again) usr_prg_B is waiting for resume-req on each node"
+
+### usr_prg_A を終了するql_mpiexec_finalize の返り値が0 (成功)
+${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_A} > ${finalize_A_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "ql_mpiexec_finalize usr_prg_A return 0"
+else
+	ng_out "ql_mpiexec_finalize usr_prg_A return ${rc}"
+fi
+
+### usr_prg_B を終了するql_mpiexec_finalize の返り値が0 (成功)
+${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_B} > ${finalize_B_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "ql_mpiexec_finalize usr_prg_B return 0"
+else
+	ng_out "ql_mpiexec_finalize usr_prg_B return ${rc}"
+fi
+
+### usr_prg_Bの終了後、ql_serverがマスターノード上で動作している
+cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"`
+if [ ${cnt} -ne 1 ]; then
+	ng_out "ql_server is not running on master node"
+else
+	ok_out "ql_server is still running on master node"
+fi
+
+### usr_prg_C を終了するql_mpiexec_finalize の返り値が0 (成功)
+${FINALIZE} -machinefile ${MFILE} -n ${PROC_NUM} ${USR_PRG_C} > ${finalize_C_log}
+rc=$?
+if [ ${rc} -eq 0 ]; then
+	ok_out "ql_mpiexec_finalize usr_prg_C return 0"
+else
+	ng_out "ql_mpiexec_finalize usr_prg_C return ${rc}"
+fi
+
+### すべてのMPIプログラムが終了したので、ql_serverが終了した
+cnt=`ssh ${MASTER} "pgrep -u ${ME} -c 'ql_server'"`
+sleep 1
+if [ ${cnt} -eq 0 ]; then
+	ok_out "ql_server is not running on master node"
+else
+	ng_out "ql_server is still running on master node"
+fi
+
--- a/test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt
+++ b/test/qlmpi/qlmpi_testsuite/qlmpi_testcase.txt
@ -0,0 +1,230 @@
+プロセス高速起動
+結合テスト仕様 (ql_mpiexec_start/finalize)
+
+下記の条件を組み合わせた構成で動作を検証する
+・MPIノード数 
+   1, 2
+・プロセス数/ノード
+   1, 2, MAX(mckernelのCPU割り当て数)
+・ql_mpiexecコマンドの実行ノード
+   master, not-master
+
+組み合わせ：
+            MPIノード数    proc/node   mpiexec実行ノード 
+パターン01:       1             1           master
+パターン02:       1             1           not-master
+パターン03:       1             2           master
+パターン04:       1             2           not-master
+パターン05:       1             MAX         master
+パターン06:       1             MAX         not-master
+パターン07:       2             1           master
+パターン08:       2             1           not-master
+パターン09:       2             2           master
+パターン10:       2             2           not-master
+パターン11:       2             MAX         master
+パターン12:       2             MAX         not-master
+
+
+CTxx 3つのMPIプログラム(A, B, C) をA, B, C, A, B の順に実行する
+□ CTxx001 ql_server and  usr_prgs are not running on each node
+   テスト開始時点でql_serverとテスト用MPIプログラムが各ノードで実行されていない
+□ CTxx002 ql_mpiexec_start usr_prg_A (first exec) returns 0
+   usr_prg_A を実行するql_mpiexec_start の返り値が0 (成功)
+□ CTxx003 ql_server is running on master node
+   初回実行後、マスターノード上でql_serverが動作している
+□ CTxx004 usr_prg_A's args are correct on each node
+   各ノードのusr_prg_A の引数が実行時に指定したものと一致している
+□ CTxx005 usr_prg_A's env (QL_TEST) is correct on each node
+   各ノードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している
+□ CTxx006 usr_prg_A's calculation is done on each node
+   各ノードのusr_prg_A の計算処理が完了
+□ CTxx007 usr_prg_A is waiting for resume-req on each node
+   ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
+□ CTxx008 ql_mpiexec_start usr_prg_B (first exec) returns 0
+   usr_prg_B を実行するql_mpiexec_start の返り値が0 (成功)
+□ CTxx009 usr_prg_B's calculation is done on each node
+   各ノードのusr_prg_B の計算処理が完了
+□ CTxx010 usr_prg_B is waiting for resume-req on each node
+   ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
+□ CTxx011 ql_mpiexec_start usr_prg_C (first exec) returns 0
+   usr_prg_C を実行するql_mpiexec_start の返り値が0 (成功)
+□ CTxx012 usr_prg_C's calculation is done on each node
+   各ノードのusr_prg_C の計算処理が完了
+□ CTxx013 usr_prg_C is waiting for resume-req on each node
+   ql_mpiexec_start の完了後、usr_prg_C が再開指示待ちになっている
+
+□ CTxx014 (again) ql_mpiexec_start usr_prg_A returns 0
+   usr_prg_A を再実行するql_mpiexec_start の返り値が0 (成功)
+□ CTxx015 (again) usr_prg_A's args are correct on each node
+   各ノードのusr_prg_A の引数が再実行時に指定したものと一致している
+□ CTxx016 (again) usr_prg_A's env (QL_TEST) is correct on each node
+   各ノードのusr_prg_A テスト用に指定した環境変数が実行時に指定したものと一致している
+□ CTxx017 (again) usr_prg_A's calculation is done on each node
+   各ノードのusr_prg_A の計算処理が完了
+□ CTxx018 (again) usr_prg_A is waiting for resume-req on each node
+   ql_mpiexec_start の完了後、usr_prg_A が再開指示待ちになっている
+□ CTxx019 (again) ql_mpiexec_start usr_prg_B returns 0
+   usr_prg_B を再実行するql_mpiexec_start の返り値が0 (成功)
+□ CTxx020 (again) usr_prg_B's calculation is done on each node
+   各ノードのusr_prg_B の計算処理が完了
+□ CTxx021 (again) usr_prg_B is waiting for resume-req on each node
+   ql_mpiexec_start の完了後、usr_prg_B が再開指示待ちになっている
+
+□ CTxx022 ql_mpiexec_finalize usr_prg_A return 0
+   usr_prg_A を終了するql_mpiexec_finalize の返り値が0 (成功)
+□ CTxx023 ql_mpiexec_finalize usr_prg_B return 0
+   usr_prg_B を終了するql_mpiexec_finalize の返り値が0 (成功)
+□ CTxx024 ql_server is still running on master node
+   usr_prg_Bの終了後、ql_serverがマスターノード上で動作している
+□ CTxx025 ql_mpiexec_finalize usr_prg_C return 0
+   usr_prg_C を終了するql_mpiexec_finalize の返り値が0 (成功)
+□ CTxx026 ql_server is not running on master node
+   すべてのMPIプログラムが終了したので、ql_serverが終了した
+
+CT20 デバイスマッピング (IBのバッファ)
+IBを使用するMPIプログラムを2本作成する。
+send/recvのバッファはcommon領域に2GB使用する。
+送信バッファは送信毎に異なる内容(送信回数の値など)を設定し、
+受信側で検証できるようにする。
+デバイスマッピングに異常があると、検証で失敗する。
+尚、McKernelに割り当てるメモリ量は3GBとする。
+□ CT20001 device mapping program test START
+□ CT20002 program 1 START
+   qlmpi_start によってテストプログラム 1 起動 (1プロセス/ノード x 2ノード)
+□ CT20003 MPI_Send/Recv OK
+   2 プロセス間でMPI_Send/Recvを実行
+□ CT20004 program 1 suspend
+   テストプログラム 1 が停止
+□ CT20005 program 2 START
+   qlmpi_start によってテストプログラム 2 起動 (1プロセス/ノード x 2ノード)
+□ CT20006 MPI_Send/Recv OK
+   2 プロセス間でMPI_Send/Recvを実行
+□ CT20007 program 1 suspend
+   テストプログラム 2 が停止
+□ CT20008 program 1 resume
+   qlmpi_start によってテストプログラム 1 が再開
+□ CT20009 MPI_Send/Recv OK
+   2 プロセス間でMPI_Send/Recvを実行
+□ CT20010 program 1 suspend
+   テストプログラム 1 が停止
+□ CT20011 program 2 resume
+   qlmpi_start によってテストプログラム 2 が再開
+□ CT20012 MPI_Send/Recv OK
+   2 プロセス間でMPI_Send/Recvを実行
+□ CT20013 program 2 suspend
+   テストプログラム 2 が停止
+□ CT20014 program 1 resume
+   qlmpi_finalize によってテストプログラム 1 が再開
+□ CT20015 program 1 END
+   テストプログラム1が終了
+□ CT20016 program 2 resume
+   qlmpi_finalize によってテストプログラム 2 が再開
+□ CT20017 program 2 END
+   テストプログラム2が終了
+□ CT20018 device mapping program test END
+
+CT21 mcexecのページテーブル更新確認
+ファイルをreadするプログラムを作成する。
+ファイル名によって、ファイル内容が確定できるようにする。
+(例えば、ファイル名が"1"のファイルは"1"で埋め尽くされているなど)
+入力領域はcommon領域に2GB確保する。
+ファイルのサイズも2GBとする。
+qlmpi_start毎にファイルを切り替える(コマンドラインに渡すなど)。
+入力結果を検証する。
+もし、ページインで最初と異なる物理ページにバッファが割り当てられ、且つ、
+mcexecのページテーブルが更新されていない場合は、readによって関係無いページが
+破壊される。また、ファイルの読み込み結果も検証で失敗する。
+尚、McKernelに割り当てるメモリ量は3GBとする。
+□ CT21001 mcexec page table update test START
+□ CT21002 program 1 START
+   qlmpi_start によってテストプログラム 1 起動 (1プロセス)
+□ CT21003 data read OK
+   ファイルを読み込んで内容確認した結果、問題なし
+□ CT21004 program 1 suspend
+   テストプログラム 1 が停止
+□ CT21005 program 2 START
+   qlmpi_start によってテストプログラム 2 起動 (1プロセス)
+□ CT21006 data read OK
+   ファイルを読み込んで内容確認した結果、問題なし
+□ CT21007 program 1 suspend
+   テストプログラム 2 が停止
+□ CT21008 program 1 resume
+   qlmpi_start によってテストプログラム 1 が再開
+□ CT21009 data read OK
+   ファイルを読み込んで内容確認した結果、問題なし
+□ CT21010 program 1 suspend
+   テストプログラム 1 が停止
+□ CT21011 program 2 resume
+   qlmpi_start によってテストプログラム 2 が再開
+□ CT21012 data read OK
+   ファイルを読み込んで内容確認した結果、問題なし
+□ CT21013 program 2 suspend
+   テストプログラム 2 が停止
+□ CT21014 program 1 resume
+   qlmpi_finalize によってテストプログラム 1 が再開
+□ CT21015 program 1 END
+   テストプログラム1が終了
+□ CT21016 program 2 resume
+   qlmpi_finalize によってテストプログラム 2 が再開
+□ CT21017 program 2 END
+   テストプログラム2が終了
+□ CT21018 mcexec page table update test END
+
+CT22 OMP
+OMP で複数のスレッドを使用する状況のテスト。
+□ CT22001 device mapping program test START
+□ CT22002 program 1 START
+   qlmpi_start によってテストプログラム 1 起動 (1プロセス/ノード x 2ノード)
+□ CT22003 check rank info
+   MPIとOMPのプロセス情報が出力されていることを確認する。
+   以下のように出力されればOK(順不同)。
+ mpi= 0/ 2, omp= 1/ 4
+ mpi= 0/ 2, omp= 3/ 4
+ mpi= 0/ 2, omp= 0/ 4
+ mpi= 0/ 2, omp= 2/ 4
+ mpi= 1/ 2, omp= 1/ 4
+ mpi= 1/ 2, omp= 0/ 4
+ mpi= 1/ 2, omp= 3/ 4
+ mpi= 1/ 2, omp= 2/ 4
+□ CT22004 program 1 suspend
+   テストプログラム 1 が停止
+□ CT22005 program 2 START
+   qlmpi_start によってテストプログラム 2 起動 (1プロセス/ノード x 2ノード)
+□ CT22006 check rank info
+   MPIとOMPのプロセス情報が出力されていることを確認する。
+□ CT22007 program 1 suspend
+   テストプログラム 2 が停止
+□ CT22008 program 1 resume
+   qlmpi_start によってテストプログラム 1 が再開
+□ CT22009 check rank info
+   MPIとOMPのプロセス情報が出力されていることを確認する。
+□ CT22010 program 1 suspend
+   テストプログラム 1 が停止
+□ CT22011 program 2 resume
+   qlmpi_start によってテストプログラム 2 が再開
+□ CT22012 check rank info
+   MPIとOMPのプロセス情報が出力されていることを確認する。
+□ CT22013 program 2 suspend
+   テストプログラム 2 が停止
+□ CT22014 program 1 resume
+   qlmpi_finalize によってテストプログラム 1 が再開
+□ CT22015 program 1 END
+   テストプログラム1が終了
+□ CT22016 program 2 resume
+   qlmpi_finalize によってテストプログラム 2 が再開
+□ CT22017 program 2 END
+   テストプログラム2が終了
+□ CT22018 device mapping program test END
+
+CT91 異常系
+□ CT91001 machinefile is not specified, so ql_mpiexec_start returns not 0
+□ CT91002 MPI program is not specified, so ql_mpiexec_start returns not 0
+□ CT91003 specified machinefile does not exist, so ql_mpiexec_start returns not 0
+□ CT91004 specified MPI program does not exist, so ql_mpiexec_start returns not 0
+□ CT91005 mpiexec is not found, so ql_mpiexec_start returns not 0
+□ CT91006 mpiexec abort, so ql_mpiexec_start returns not 0
+□ CT91007 machinefile is not specified, so ql_mpiexec_finalize returns not 0
+□ CT91008 MPI program is not specified, so ql_mpiexec_finalize returns not 0
+□ CT91009 specified machinefile is wrong, so ql_mpiexec_finalize returns not 0
+□ CT91010 specified MPI program name is wrong, so ql_mpiexec_finalize returns not 0
+□ CT91011 one of MPI processes aborts, so ql_mpiexec_start returns not 0
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT01.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT01.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT01
+MPI_NODE_NUM=1
+PROC_PER_NODE=1
+MASTER=wallaby15
+SLAVE=
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT02.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT02.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT02
+MPI_NODE_NUM=1
+PROC_PER_NODE=1
+MASTER=wallaby14
+SLAVE=
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT03.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT03.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT03
+MPI_NODE_NUM=1
+PROC_PER_NODE=2
+MASTER=wallaby15
+SLAVE=
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT04.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT04.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT04
+MPI_NODE_NUM=1
+PROC_PER_NODE=2
+MASTER=wallaby14
+SLAVE=
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT05.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT05.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT05
+MPI_NODE_NUM=1
+PROC_PER_NODE=8
+MASTER=wallaby15
+SLAVE=
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT06.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT06.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT06
+MPI_NODE_NUM=1
+PROC_PER_NODE=8
+MASTER=wallaby14
+SLAVE=
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT07.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT07.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT07
+MPI_NODE_NUM=2
+PROC_PER_NODE=1
+MASTER=wallaby15
+SLAVE=wallaby14
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT08.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT08.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT08
+MPI_NODE_NUM=2
+PROC_PER_NODE=1
+MASTER=wallaby14
+SLAVE=wallaby15
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT09.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT09.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT09
+MPI_NODE_NUM=2
+PROC_PER_NODE=2
+MASTER=wallaby15
+SLAVE=wallaby14
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT10.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT10.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT10
+MPI_NODE_NUM=2
+PROC_PER_NODE=2
+MASTER=wallaby14
+SLAVE=wallaby15
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT11.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT11.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT11
+MPI_NODE_NUM=2
+PROC_PER_NODE=8
+MASTER=wallaby15
+SLAVE=wallaby14
--- a/test/qlmpi/qlmpi_testsuite/test_cases/CT12.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/CT12.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=CT12
+MPI_NODE_NUM=2
+PROC_PER_NODE=8
+MASTER=wallaby14
+SLAVE=wallaby15
--- a/test/qlmpi/qlmpi_testsuite/test_cases/ECT91.txt
+++ b/test/qlmpi/qlmpi_testsuite/test_cases/ECT91.txt
@ -0,0 +1,5 @@
+TEST_PREFIX=ECT91
+MPI_NODE_NUM=2
+PROC_PER_NODE=8
+MASTER=wallaby14
+SLAVE=wallaby15
--- a/test/qlmpi/qlmpi_testsuite/usr_prg_A.c
+++ b/test/qlmpi/qlmpi_testsuite/usr_prg_A.c
@ -0,0 +1,59 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mpi.h>
+
+#include <qlmpilib.h>
+
+int
+main(int argc, char **argv)
+{
+	int rc;
+	int i;
+	int num_procs, my_rank;
+	char hname[128];
+	char argv_str[1024];
+
+	gethostname(hname, 128);
+
+	MPI_Init(&argc, &argv);
+	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+ql_loop:
+	printf("INFO This is A. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
+	memset(argv_str, '\0', sizeof(argv_str));
+
+	printf("%d:argc=%d\n", my_rank, argc);
+	for (i = 0; i < argc; i++) {
+		if (i > 0) {
+			strcat(argv_str, " ");
+		}
+		strcat(argv_str, argv[i]);
+	}
+	printf("%d:argv=%s\n", my_rank, argv_str);
+	
+	printf("%d:QL_TEST=%s\n", my_rank, getenv("QL_TEST"));
+
+	printf("%d:done=yes\n", my_rank);
+	fflush(stdout);
+
+	rc = ql_client(&argc, &argv);
+
+	//printf("ql_client returns: %d\n", rc);
+	if (rc == QL_CONTINUE) {
+		printf("%d:resume=go_back\n", my_rank);
+		goto ql_loop;
+	}
+	else {
+		printf("%d:resume=go_finalize\n", my_rank);
+	}
+
+	MPI_Finalize();
+	printf("%d:finish=yes\n", my_rank);
+	return 0;
+}
--- a/test/qlmpi/qlmpi_testsuite/usr_prg_B.c
+++ b/test/qlmpi/qlmpi_testsuite/usr_prg_B.c
@ -0,0 +1,45 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mpi.h>
+
+#include <qlmpilib.h>
+
+int
+main(int argc, char **argv)
+{
+	int rc;
+	int num_procs, my_rank;
+	char hname[128];
+
+	gethostname(hname, 128);
+
+	MPI_Init(&argc, &argv);
+	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+ql_loop:
+	printf("INFO This is B. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
+
+	printf("%d:done=yes\n", my_rank);
+	fflush(stdout);
+
+	rc = ql_client(&argc, &argv);
+
+	//printf("ql_client returns: %d\n", rc);
+	if (rc == QL_CONTINUE) {
+		printf("%d:resume=go_back\n", my_rank);
+		goto ql_loop;
+	}
+	else {
+		printf("%d:resume=go_finalize\n", my_rank);
+	}
+
+	MPI_Finalize();
+	printf("%d:finish=yes\n", my_rank);
+	return 0;
+}
--- a/test/qlmpi/qlmpi_testsuite/usr_prg_C.c
+++ b/test/qlmpi/qlmpi_testsuite/usr_prg_C.c
@ -0,0 +1,45 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mpi.h>
+
+#include <qlmpilib.h>
+
+int
+main(int argc, char **argv)
+{
+	int rc;
+	int num_procs, my_rank;
+	char hname[128];
+
+	gethostname(hname, 128);
+
+	MPI_Init(&argc, &argv);
+	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+ql_loop:
+	printf("INFO This is C. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
+
+	printf("%d:done=yes\n", my_rank);
+	fflush(stdout);
+
+	rc = ql_client(&argc, &argv);
+
+	//printf("ql_client returns: %d\n", rc);
+	if (rc == QL_CONTINUE) {
+		printf("%d:resume=go_back\n", my_rank);
+		goto ql_loop;
+	}
+	else {
+		printf("%d:resume=go_finalize\n", my_rank);
+	}
+
+	MPI_Finalize();
+	printf("%d:finish=yes\n", my_rank);
+	return 0;
+}
--- a/test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c
+++ b/test/qlmpi/qlmpi_testsuite/usr_prg_irreg.c
@ -0,0 +1,56 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mpi.h>
+
+#include <qlmpilib.h>
+
+int
+main(int argc, char **argv)
+{
+	int rc;
+	int num_procs, my_rank;
+	char hname[128];
+	int abort_rank = 0;
+
+	gethostname(hname, 128);
+
+	MPI_Init(&argc, &argv);
+	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
+	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+ql_loop:
+	printf("INFO This is irreg. host=%s, rank:%d, pid:%d\n", hname, my_rank, getpid());
+	if (argc > 2) {
+		abort_rank = atoi(argv[1]);
+	}
+
+	if (my_rank != abort_rank) {
+		printf("%d:done=yes\n", my_rank);
+		fflush(stdout);
+	}
+	else {
+		printf("%d:done=abort\n", my_rank);
+		fflush(stdout);
+		MPI_Abort(MPI_COMM_WORLD, -1);
+	}
+
+	rc = ql_client(&argc, &argv);
+
+	//printf("ql_client returns: %d\n", rc);
+	if (rc == QL_CONTINUE) {
+		printf("%d:resume=go_back\n", my_rank);
+		goto ql_loop;
+	}
+	else {
+		printf("%d:resume=go_finalize\n", my_rank);
+	}
+
+	MPI_Finalize();
+	printf("%d:finish=yes\n", my_rank);
+	return 0;
+}
--- a/test/qlmpi/qlmpi_testsuite/util/mpiexec
+++ b/test/qlmpi/qlmpi_testsuite/util/mpiexec
@ -0,0 +1,5 @@
+#!/bin/sh
+
+# This is dummy mpiexec for irregular test
+echo "dummy mpiexec abort!!" >&2
+exit 1
--- a/test/qlmpi/qlmpi_testsuite/util/wrong_mfile
+++ b/test/qlmpi/qlmpi_testsuite/util/wrong_mfile
@ -0,0 +1,2 @@
+foo
+bar
--- a/test/qlmpi/swaptest.c
+++ b/test/qlmpi/swaptest.c
@ -0,0 +1,81 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define BUF_SIZE	(32*1024)
+
+int	data[1024*1024];
+char	sym2[1024*1024] = { 10, 20, 30, 0 };
+char	sym3[1024*1024] = { 10, 20, 30, 0 };
+char	*sym1 = "aaaaaa";
+char	buffer[BUF_SIZE];
+char	*ptr1, *ptr2;
+char	fnamebuf[1024];
+
+int
+swapout(char *fname, void *buf, size_t sz, int flag)
+{
+    int		cc;
+    cc = syscall(801, fname, buf, sz, flag);
+    return cc;
+}
+int
+linux_mlock(const void *addr, size_t len)
+{
+    int		cc;
+    cc = syscall(802, addr, len);
+    return cc;
+}
+
+
+int
+main(int argc, char **argv)
+{
+    int		cc;
+    int		flag = 0;
+
+    if (argc == 2) {
+	flag = atoi(argv[1]);
+    }
+    switch (flag) {
+    case 1:
+	printf("skipping real paging for debugging and just calling swapout in Linux\n");
+	break;
+    case 2:
+	printf("skipping calling swapout in Linux\n");
+	break;
+    }
+    printf("&data = %p\n", data);
+    printf("&sym1 = %p\n", &sym1);
+    printf("&sym2 = %p\n", sym2);
+    printf("&sym3 = %p\n", sym3);
+    printf("&cc = %p\n", &cc);
+    ptr1 = malloc(1024);
+    ptr2 = malloc(1024*1024);
+    printf("ptr1 = %p\n", ptr1);
+    printf("ptr2 = %p\n", ptr2);
+    sprintf((char*) data, "hello\n");
+    /*
+     * testing mlock in mckernel side
+     */
+    cc = mlock(data, 16*1024);
+    printf("McKernel mlock returns: %d\n", cc);
+    /*
+     * testing mlock in linux side
+     */
+    cc = linux_mlock(data, 16*1024);
+    printf("linux_mlock returns: %d\n", cc);
+    strcpy(sym2, "returns: %d\n");
+    strcpy(sym3, "data =  %d\n");
+
+    /* buf area will be used in swapout systemcall for debugging */
+    strcpy(fnamebuf, "/tmp/pages");
+    cc = swapout(fnamebuf, buffer, BUF_SIZE, flag);
+    printf("swapout returns: %d\n", cc);
+    printf("data = %s", data);
+    printf(sym2, cc);
+    printf(sym3, data);
+    return 0;
+}