From e601248bdc52cd33de47761e4077aff38861832c Mon Sep 17 00:00:00 2001 From: Yoichi Umezawa Date: Mon, 8 Feb 2016 09:38:27 +0900 Subject: [PATCH 01/21] procfs: fix mcos%d/PID/auxv size --- kernel/procfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/procfs.c b/kernel/procfs.c index 3ba78b5f..4cc6b2da 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -620,7 +620,7 @@ void process_procfs_request(unsigned long rarg) * mcos%d/PID/auxv */ if (strcmp(p, "auxv") == 0) { - unsigned int limit = AUXV_LEN * sizeof(int); + unsigned int limit = AUXV_LEN * sizeof(unsigned long); unsigned int len = r->count; if (r->offset < limit) { if (limit < r->offset + r->count) { From 0ce698eb1f5b8a5b85b1305ef96e65346a18a6da Mon Sep 17 00:00:00 2001 From: Yoichi Umezawa Date: Mon, 8 Feb 2016 11:36:03 +0900 Subject: [PATCH 02/21] mcexec: support for /sys mounted by mcoverlayfs --- arch/x86/tools/mcreboot-smp-x86.sh.in | 16 ++++++++++++++++ executer/user/mcexec.c | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/x86/tools/mcreboot-smp-x86.sh.in b/arch/x86/tools/mcreboot-smp-x86.sh.in index 786254e2..4e1619cc 100644 --- a/arch/x86/tools/mcreboot-smp-x86.sh.in +++ b/arch/x86/tools/mcreboot-smp-x86.sh.in @@ -44,6 +44,7 @@ fi # Remove mcoverlay if loaded if [ "$enable_mcoverlay" != "" ]; then if [ "`lsmod | grep mcoverlay`" != "" ]; then + if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_sys`" != "" ]; then umount -l /tmp/mcos/mcos0_sys; fi if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_proc`" != "" ]; then umount -l /tmp/mcos/mcos0_proc; fi if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi @@ -123,5 +124,20 @@ if [ "$enable_mcoverlay" != "" ]; then if [ ! -e /tmp/mcos/mcos0_proc_work ]; then mkdir -p /tmp/mcos/mcos0_proc_work; fi if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then echo "error: mount /tmp/mcos/mcos0_proc"; exit; fi mount --make-rprivate /proc + if [ ! -e /tmp/mcos/mcos0_sys ]; then mkdir -p /tmp/mcos/mcos0_sys; fi + if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then mkdir -p /tmp/mcos/mcos0_sys_upper; fi + if [ ! -e /tmp/mcos/mcos0_sys_work ]; then mkdir -p /tmp/mcos/mcos0_sys_work; fi + if ! mount -t mcoverlay mcoverlay -o lowerdir=/sys/devices/virtual/mcos/mcos0/sys:/sys,upperdir=/tmp/mcos/mcos0_sys_upper,workdir=/tmp/mcos/mcos0_sys_work,nocopyupw,nofscheck /tmp/mcos/mcos0_sys; then echo "error: mount /tmp/mcos/mcos0_sys"; exit; fi + mount --make-rprivate /sys + for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do + if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then + rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid + fi + done + for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do + if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then + rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid + fi + done fi diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index b4b001b1..7f69f495 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1304,6 +1304,7 @@ int main(int argc, char **argv) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) __dprintf("mcoverlay enable\n"); char mcos_procdir[PATH_MAX]; + char mcos_sysdir[PATH_MAX]; error = isunshare(); if (error == 0) { @@ -1319,6 +1320,13 @@ int main(int argc, char **argv) strerror(errno)); return 1; } + + sprintf(mcos_sysdir, "/tmp/mcos/mcos%d_sys", mcosid); + if (mount(mcos_sysdir, "/sys", NULL, MS_BIND, NULL)) { + fprintf(stderr, "Error: Failed to mount. (%s)\n", + strerror(errno)); + return 1; + } } else if (error == -1) { return 1; } From f214ff1b57fb1504c67a1bfaebd61df9624ceab0 Mon Sep 17 00:00:00 2001 From: Yoichi Umezawa Date: Mon, 8 Feb 2016 16:00:52 +0900 Subject: [PATCH 03/21] mcctrl: add MCEXEC_UP_SYS_MOUNT, MCEXEC_UP_SYS_UNSHARE --- configure | 303 +++++++++++++++++++++++++---- configure.ac | 65 +++++++ executer/include/uprotocol.h | 15 ++ executer/kernel/mcctrl/config.h.in | 25 +++ executer/kernel/mcctrl/control.c | 90 +++++++++ executer/kernel/mcctrl/driver.c | 2 + 6 files changed, 460 insertions(+), 40 deletions(-) create mode 100644 executer/kernel/mcctrl/config.h.in diff --git a/configure b/configure index d340febf..5c932b69 100755 --- a/configure +++ b/configure @@ -649,6 +649,7 @@ ac_user_opts=' enable_option_checking with_kernelsrc with_target +with_system_map enable_dcfa ' ac_precious_vars='build_alias @@ -1277,6 +1278,8 @@ Optional Packages: /lib/modules/uname_r/build --with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86} target, default is attached-mic + --with-system_map=path Path to 'System.map file', default is + /boot/System.map-uname_r Some influential environment variables: CC C compiler command @@ -1784,6 +1787,15 @@ else fi + +# Check whether --with-system_map was given. +if test "${with_system_map+set}" = set; then : + withval=$with_system_map; WITH_SYSTEM_MAP=$withval +else + WITH_SYSTEM_MAP=yes +fi + + # Check whether --enable-dcfa was given. if test "${enable_dcfa+set}" = set; then : enableval=$enable_dcfa; @@ -3798,6 +3810,96 @@ esac KDIR="$WITH_KERNELSRC" TARGET="$WITH_TARGET" +MCCTRL_LINUX_SYMTAB="" +case "X$WITH_SYSTEM_MAP" in + Xyes | Xno | X) + MCCTRL_LINUX_SYMTAB="" + ;; + *) + MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for System.map" >&5 +$as_echo_n "checking for System.map... " >&6; } +if test -f "$MCCTRL_LINUX_SYMTAB"; then + MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB" +elif test -f "/boot/System.map-`uname -r`"; then + MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`" +elif test -f "$KDIR/System.map"; then + MCCTRL_LINUX_SYMTAB="$KDIR/System.map" +fi + +if test "$MCCTRL_LINUX_SYMTAB" == ""; then + as_fn_error $? "could not find" "$LINENO" 5 +fi + +if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then + as_fn_error $? "could not read System.map file, no read permission?" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MCCTRL_LINUX_SYMTAB" >&5 +$as_echo "$MCCTRL_LINUX_SYMTAB" >&6; } + +MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB" + +# MCCTRL_FIND_KSYM(SYMBOL) +# ------------------------------------------------------ +# Search System.map for address of the given symbol and +# do one of three things in config.h: +# If not found, leave MCCTRL_KSYM_foo undefined +# If found to be exported, "#define MCCTRL_KSYM_foo 0" +# If found not to be exported, "#define MCCTRL_KSYM_foo 0x" + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_mount" >&5 +$as_echo_n "checking System.map for symbol sys_mount... " >&6; } + mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_mount\$" | cut -d\ -f1` + if test -z $mcctrl_addr; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +$as_echo "not found" >&6; } + else + mcctrl_result=$mcctrl_addr + mcctrl_addr="0x$mcctrl_addr" + + if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_mount\$" >/dev/null`; then + mcctrl_result="exported" + mcctrl_addr="0" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 +$as_echo "$mcctrl_result" >&6; } + +cat >>confdefs.h <<_ACEOF +#define MCCTRL_KSYM_sys_mount $mcctrl_addr +_ACEOF + + fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5 +$as_echo_n "checking System.map for symbol sys_unshare... " >&6; } + mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1` + if test -z $mcctrl_addr; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +$as_echo "not found" >&6; } + else + mcctrl_result=$mcctrl_addr + mcctrl_addr="0x$mcctrl_addr" + + if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_unshare\$" >/dev/null`; then + mcctrl_result="exported" + mcctrl_addr="0" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 +$as_echo "$mcctrl_result" >&6; } + +cat >>confdefs.h <<_ACEOF +#define MCCTRL_KSYM_sys_unshare $mcctrl_addr +_ACEOF + + fi @@ -3816,6 +3918,10 @@ TARGET="$WITH_TARGET" + + +ac_config_headers="$ac_config_headers executer/kernel/mcctrl/config.h" + ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcoverlayfs/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" @@ -3915,43 +4021,7 @@ test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' -# Transform confdefs.h into DEFS. -# Protect against shell expansion while executing Makefile rules. -# Protect against Makefile macro expansion. -# -# If the first sed substitution is executed (which looks for macros that -# take arguments), then branch to the quote section. Otherwise, -# look for a macro that doesn't take arguments. -ac_script=' -:mline -/\\$/{ - N - s,\\\n,, - b mline -} -t clear -:clear -s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g -t quote -s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g -t quote -b any -:quote -s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g -s/\[/\\&/g -s/\]/\\&/g -s/\$/$$/g -H -:any -${ - g - s/^\n// - s/\n/ /g - p -} -' -DEFS=`sed -n "$ac_script" confdefs.h` - +DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= @@ -4385,11 +4455,15 @@ case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" +config_headers="$ac_config_headers" _ACEOF @@ -4410,10 +4484,15 @@ Usage: $0 [OPTION]... [TAG]... --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE Configuration files: $config_files +Configuration headers: +$config_headers + Report bugs to the package provider." _ACEOF @@ -4474,7 +4553,18 @@ do esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; - --he | --h | --help | --hel | -h ) + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) @@ -4530,6 +4620,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 for ac_config_target in $ac_config_targets do case $ac_config_target in + "executer/kernel/mcctrl/config.h") CONFIG_HEADERS="$CONFIG_HEADERS executer/kernel/mcctrl/config.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "executer/user/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/Makefile" ;; "executer/kernel/mcctrl/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/Makefile" ;; @@ -4556,6 +4647,7 @@ done # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers fi # Have a temporary directory for convenience. Make it in the build tree @@ -4743,8 +4835,116 @@ fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF -eval set X " :F $CONFIG_FILES " +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS " shift for ac_tag do @@ -4952,7 +5152,30 @@ which seems to be undefined. Please make sure it is defined" >&2;} esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; - + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi + ;; esac diff --git a/configure.ac b/configure.ac index 1eb76487..f5cce81e 100644 --- a/configure.ac +++ b/configure.ac @@ -27,6 +27,11 @@ AC_ARG_WITH([target], [--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]), [WITH_TARGET=$withval],[WITH_TARGET=yes]) +AC_ARG_WITH([system_map], + AS_HELP_STRING( + [--with-system_map=path],[Path to 'System.map file', default is /boot/System.map-uname_r]), + [WITH_SYSTEM_MAP=$withval],[WITH_SYSTEM_MAP=yes]) + AC_ARG_ENABLE([dcfa], [AS_HELP_STRING( [--enable-dcfa],[Enable DCFA modules])],[],[enable_dcfa=no]) @@ -139,6 +144,65 @@ esac KDIR="$WITH_KERNELSRC" TARGET="$WITH_TARGET" +MCCTRL_LINUX_SYMTAB="" +case "X$WITH_SYSTEM_MAP" in + Xyes | Xno | X) + MCCTRL_LINUX_SYMTAB="" + ;; + *) + MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP" + ;; +esac + +AC_MSG_CHECKING([[for System.map]]) +if test -f "$MCCTRL_LINUX_SYMTAB"; then + MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB" +elif test -f "/boot/System.map-`uname -r`"; then + MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`" +elif test -f "$KDIR/System.map"; then + MCCTRL_LINUX_SYMTAB="$KDIR/System.map" +fi + +if test "$MCCTRL_LINUX_SYMTAB" == ""; then + AC_MSG_ERROR([could not find]) +fi + +if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then + AC_MSG_ERROR([could not read System.map file, no read permission?]) +fi +AC_MSG_RESULT([$MCCTRL_LINUX_SYMTAB]) + +MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB" + +# MCCTRL_FIND_KSYM(SYMBOL) +# ------------------------------------------------------ +# Search System.map for address of the given symbol and +# do one of three things in config.h: +# If not found, leave MCCTRL_KSYM_foo undefined +# If found to be exported, "#define MCCTRL_KSYM_foo 0" +# If found not to be exported, "#define MCCTRL_KSYM_foo 0x" +AC_DEFUN([MCCTRL_FIND_KSYM],[ + AC_MSG_CHECKING([[System.map for symbol $1]]) + mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " $1\$" | cut -d\ -f1` + if test -z $mcctrl_addr; then + AC_MSG_RESULT([not found]) + else + mcctrl_result=$mcctrl_addr + mcctrl_addr="0x$mcctrl_addr" + m4_ifval([$2],[],[ + if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_$1\$" >/dev/null`; then + mcctrl_result="exported" + mcctrl_addr="0" + fi + ]) + AC_MSG_RESULT([$mcctrl_result]) + AC_DEFINE_UNQUOTED(MCCTRL_KSYM_[]$1,$mcctrl_addr,[Define to address of kernel symbol $1, or 0 if exported]) + fi +]) + +MCCTRL_FIND_KSYM([sys_mount]) +MCCTRL_FIND_KSYM([sys_unshare]) + AC_SUBST(CC) AC_SUBST(XCC) AC_SUBST(ARCH) @@ -157,6 +221,7 @@ AC_SUBST(IHK_RELEASE_DATE) AC_SUBST(MCKERNEL_RELEASE_DATE) AC_SUBST(DCFA_RESEASE_DATE) +AC_CONFIG_HEADERS([executer/kernel/mcctrl/config.h]) AC_CONFIG_FILES([ Makefile executer/user/Makefile diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 57cda78d..6247cc7b 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -48,6 +48,9 @@ #define MCEXEC_UP_OPEN_EXEC 0x30a02912 #define MCEXEC_UP_CLOSE_EXEC 0x30a02913 +#define MCEXEC_UP_SYS_MOUNT 0x30a02914 +#define MCEXEC_UP_SYS_UNSHARE 0x30a02915 + #define MCEXEC_UP_DEBUG_LOG 0x40000000 #define MCEXEC_UP_TRANSFER_TO_REMOTE 0 @@ -167,4 +170,16 @@ struct newprocess_desc { int pid; }; +struct sys_mount_desc { + char *dev_name; + char *dir_name; + char *type; + unsigned long flags; + void *data; +}; + +struct sys_unshare_desc { + unsigned long unshare_flags; +}; + #endif diff --git a/executer/kernel/mcctrl/config.h.in b/executer/kernel/mcctrl/config.h.in new file mode 100644 index 00000000..2cd067e0 --- /dev/null +++ b/executer/kernel/mcctrl/config.h.in @@ -0,0 +1,25 @@ +/* executer/kernel/mcctrl/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to address of kernel symbol sys_mount, or 0 if exported */ +#undef MCCTRL_KSYM_sys_mount + +/* Define to address of kernel symbol sys_unshare, or 0 if exported */ +#undef MCCTRL_KSYM_sys_unshare + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index f2a88b9b..434f16fb 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -36,6 +36,7 @@ #include #include #include +#include "config.h" #include "mcctrl.h" //#define DEBUG @@ -46,6 +47,28 @@ #define dprintk(...) #endif +#ifdef MCCTRL_KSYM_sys_unshare +#if MCCTRL_KSYM_sys_unshare +typedef int (*int_star_fn_ulong_t)(unsigned long); +int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = + (int_star_fn_ulong_t) + MCCTRL_KSYM_sys_unshare; +#else // exported +int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = NULL; +#endif +#endif + +#ifdef MCCTRL_KSYM_sys_mount +#if MCCTRL_KSYM_sys_mount +typedef int (*int_star_fn_char_char_char_ulong_void_t)(char *, char *, char *, unsigned long, void *); +int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = + (int_star_fn_char_char_char_ulong_void_t) + MCCTRL_KSYM_sys_mount; +#else // exported +int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL; +#endif +#endif + //static DECLARE_WAIT_QUEUE_HEAD(wq_prepare); //extern struct mcctrl_channel *channels; int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu); @@ -1011,6 +1034,67 @@ long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __use return 0; } +long mcexec_sys_mount(struct sys_mount_desc *__user arg) +{ + struct sys_mount_desc desc; + struct cred *promoted; + const struct cred *original; + int ret; + + if (copy_from_user(&desc, arg, sizeof(desc))) { + return -EFAULT; + } + + promoted = prepare_creds(); + if (!promoted) { + return -ENOMEM; + } + cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); + original = override_creds(promoted); + +#if MCCTRL_KSYM_sys_mount + ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type, + desc.flags, desc.data); +#else + ret = -EFAULT; +#endif + + revert_creds(original); + put_cred(promoted); + + return ret; +} + +long mcexec_sys_unshare(struct sys_unshare_desc *__user arg) +{ + struct sys_unshare_desc desc; + struct cred *promoted; + const struct cred *original; + int ret; + + if (copy_from_user(&desc, arg, sizeof(desc))) { + return -EFAULT; + } + + promoted = prepare_creds(); + if (!promoted) { + return -ENOMEM; + } + cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); + original = override_creds(promoted); + +#if MCCTRL_KSYM_sys_unshare + ret = mcctrl_sys_unshare(desc.unshare_flags); +#else + ret = -EFAULT; +#endif + + revert_creds(original); + put_cred(promoted); + + return ret; +} + long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, struct file *file) { @@ -1065,6 +1149,12 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, case MCEXEC_UP_GET_CREDV: return mcexec_getcredv((int *)arg); + case MCEXEC_UP_SYS_MOUNT: + return mcexec_sys_mount((struct sys_mount_desc *)arg); + + case MCEXEC_UP_SYS_UNSHARE: + return mcexec_sys_unshare((struct sys_unshare_desc *)arg); + case MCEXEC_UP_DEBUG_LOG: return mcexec_debug_log(os, arg); } diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index de8c84b2..60a545bb 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -68,6 +68,8 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = { { .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl }, + { .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl }, + { .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl }, }; From 137e0a799cff22a82eb49788f766c58f4affb538 Mon Sep 17 00:00:00 2001 From: Yoichi Umezawa Date: Mon, 8 Feb 2016 16:27:03 +0900 Subject: [PATCH 04/21] mcexec: unshare and mount request through mcctrl --- executer/user/Makefile.in | 2 +- executer/user/mcexec.c | 90 +++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index 0bd60514..330f448e 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -16,5 +16,5 @@ clean: install: mkdir -p -m 755 $(BINDIR) - install -o root -m 4755 mcexec $(BINDIR) + install -m 755 mcexec $(BINDIR) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 7f69f495..9057ba2c 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1301,47 +1301,6 @@ int main(int argc, char **argv) ++optind; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) - __dprintf("mcoverlay enable\n"); - char mcos_procdir[PATH_MAX]; - char mcos_sysdir[PATH_MAX]; - - error = isunshare(); - if (error == 0) { - if (unshare(CLONE_NEWNS)) { - fprintf(stderr, "Error: Failed to unshare. (%s)\n", - strerror(errno)); - return 1; - } - - sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid); - if (mount(mcos_procdir, "/proc", NULL, MS_BIND, NULL)) { - fprintf(stderr, "Error: Failed to mount. (%s)\n", - strerror(errno)); - return 1; - } - - sprintf(mcos_sysdir, "/tmp/mcos/mcos%d_sys", mcosid); - if (mount(mcos_sysdir, "/sys", NULL, MS_BIND, NULL)) { - fprintf(stderr, "Error: Failed to mount. (%s)\n", - strerror(errno)); - return 1; - } - } else if (error == -1) { - return 1; - } -#else - __dprintf("mcoverlay disable\n"); -#endif - - __dprintf("before seteuid(): uid=%d, euid=%d\n", getuid(), geteuid()); - if (seteuid(getuid())) { - fprintf(stderr, "Error: Failed to seteuid. (%s)\n", - strerror(errno)); - return 1; - } - __dprintf("after seteuid(): uid=%d, euid=%d\n", getuid(), geteuid()); - sprintf(dev, "/dev/mcos%d", mcosid); /* No more arguments? */ @@ -1363,6 +1322,55 @@ int main(int argc, char **argv) return 1; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) + __dprintf("mcoverlay enable\n"); + char mcos_procdir[PATH_MAX]; + char mcos_sysdir[PATH_MAX]; + struct sys_unshare_desc unshare_desc; + struct sys_mount_desc mount_desc; + + error = isunshare(); + if (error == 0) { + unshare_desc.unshare_flags = CLONE_NEWNS; + if (ioctl(fd, MCEXEC_UP_SYS_UNSHARE, + (unsigned long)&unshare_desc) != 0) { + fprintf(stderr, "Error: Failed to unshare. (%s)\n", + strerror(errno)); + return 1; + } + + sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid); + mount_desc.dev_name = mcos_procdir; + mount_desc.dir_name = "/proc"; + mount_desc.type = NULL; + mount_desc.flags = MS_BIND; + mount_desc.data = NULL; + if (ioctl(fd, MCEXEC_UP_SYS_MOUNT, + (unsigned long)&mount_desc) != 0) { + fprintf(stderr, "Error: Failed to mount /proc. (%s)\n", + strerror(errno)); + return 1; + } + + sprintf(mcos_sysdir, "/tmp/mcos/mcos%d_sys", mcosid); + mount_desc.dev_name = mcos_sysdir; + mount_desc.dir_name = "/sys"; + mount_desc.type = NULL; + mount_desc.flags = MS_BIND; + mount_desc.data = NULL; + if (ioctl(fd, MCEXEC_UP_SYS_MOUNT, + (unsigned long)&mount_desc) != 0) { + fprintf(stderr, "Error: Failed to mount /sys. (%s)\n", + strerror(errno)); + return 1; + } + } else if (error == -1) { + return 1; + } +#else + __dprintf("mcoverlay disable\n"); +#endif + if (lookup_exec_path(argv[optind], path, sizeof(path)) != 0) { fprintf(stderr, "error: finding file: %s\n", argv[optind]); return 1; From 7f01d273d02628c362efaf2473e55ea3bfa2d5f1 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 9 Feb 2016 12:45:58 +0900 Subject: [PATCH 05/21] mcctrl: fix out-of-tree build (not finding config.h) --- executer/kernel/mcctrl/Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/executer/kernel/mcctrl/Makefile.in b/executer/kernel/mcctrl/Makefile.in index a1af82ea..8b0c293b 100644 --- a/executer/kernel/mcctrl/Makefile.in +++ b/executer/kernel/mcctrl/Makefile.in @@ -7,7 +7,7 @@ IHK_BASE=$(src)/../../../../ihk obj-m += mcctrl.o -ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" +ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o mcctrl-y += sysfs.o sysfs_files.o From 14c5bc08c23a0f96315889d095e03baea6c00f18 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Tue, 9 Feb 2016 14:06:36 +0900 Subject: [PATCH 06/21] mcexec: check Linux version from actual kernel tree instead of system wide include --- executer/user/Makefile.in | 3 ++- executer/user/mcexec.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index 330f448e..e784df2e 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -1,5 +1,6 @@ CC=@CC@ BINDIR=@BINDIR@ +KDIR ?= @KDIR@ CFLAGS=-Wall -O -fPIE -pie VPATH=@abs_srcdir@ TARGET=mcexec @@ -7,7 +8,7 @@ TARGET=mcexec all: $(TARGET) mcexec: mcexec.c - $(CC) $(CFLAGS) $(EXTRA_CFLAGS) -pthread -o $@ $^ $(EXTRA_OBJS) + $(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -pthread -o $@ $^ $(EXTRA_OBJS) clean: $(RM) $(TARGET) *.o diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 9057ba2c..2da3ed03 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include "../include/uprotocol.h" //#define DEBUG From c0cc6ac6db14abe97ef3f0c4559e1a6070892dc9 Mon Sep 17 00:00:00 2001 From: Ken Sato Date: Tue, 9 Feb 2016 14:54:53 +0900 Subject: [PATCH 07/21] Add skeleton for perf_event_open. --- arch/x86/kernel/include/syscall_list.h | 5 +- executer/user/mcexec.c | 5 + kernel/include/process.h | 14 ++- kernel/process.c | 1 + kernel/syscall.c | 159 ++++++++++++++++++++----- 5 files changed, 148 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 04bc1e8f..2f955348 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -20,7 +20,7 @@ * syscall_name[] only, no handler exists. */ -SYSCALL_DELEGATED(0, read) +SYSCALL_HANDLED(0, read) SYSCALL_DELEGATED(1, write) SYSCALL_DELEGATED(2, open) SYSCALL_HANDLED(3, close) @@ -35,7 +35,7 @@ SYSCALL_HANDLED(12, brk) SYSCALL_HANDLED(13, rt_sigaction) SYSCALL_HANDLED(14, rt_sigprocmask) SYSCALL_HANDLED(15, rt_sigreturn) -SYSCALL_DELEGATED(16, ioctl) +SYSCALL_HANDLED(16, ioctl) SYSCALL_DELEGATED(17, pread64) SYSCALL_DELEGATED(18, pwrite64) SYSCALL_DELEGATED(20, writev) @@ -133,6 +133,7 @@ SYSCALL_HANDLED(279, move_pages) SYSCALL_DELEGATED(281, epoll_pwait) SYSCALL_HANDLED(282, signalfd) SYSCALL_HANDLED(289, signalfd4) +SYSCALL_HANDLED(298, perf_event_open) #ifdef DCFA_KMOD SYSCALL_HANDLED(303, mod_call) #endif diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 2da3ed03..1992a449 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -2198,6 +2198,11 @@ return_execve2: do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; + case __NR_perf_event_open: + ret = open("/dev/null", O_RDONLY); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + case __NR_rt_sigaction: act_sigaction(&w); do_syscall_return(fd, cpu, 0, 0, 0, 0, 0); diff --git a/kernel/include/process.h b/kernel/include/process.h index 44d0d83b..4ad8b8d6 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -312,18 +312,22 @@ struct vm_regions { struct process_vm; -struct sigfd { - struct sigfd *next; +struct mckfd { + struct mckfd *next; int fd; - __sigset_t mask; + long data; + void *opt; + long (*read_cb)(struct mckfd *, ihk_mc_user_context_t *); + int (*ioctl_cb)(struct mckfd *, ihk_mc_user_context_t *); + int (*close_cb)(struct mckfd *, ihk_mc_user_context_t *); }; + #define SFD_CLOEXEC 02000000 #define SFD_NONBLOCK 04000 struct sig_common { ihk_spinlock_t lock; ihk_atomic_t use; - struct sigfd *sigfd; struct k_sigaction action[_NSIG]; struct list_head sigpending; }; @@ -425,6 +429,8 @@ struct process { /* Store signal sent to parent when the process terminates. */ int termsig; + ihk_spinlock_t mckfd_lock; + struct mckfd *mckfd; }; void hold_thread(struct thread *ftn); diff --git a/kernel/process.c b/kernel/process.c index 2889c528..3b0e0a07 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -96,6 +96,7 @@ init_process(struct process *proc, struct process *parent) INIT_LIST_HEAD(&proc->ptraced_children_list); mcs_rwlock_init(&proc->threads_lock); mcs_rwlock_init(&proc->children_lock); + ihk_mc_spinlock_init(&proc->mckfd_lock); waitq_init(&proc->waitpid_q); ihk_atomic_set(&proc->refcount, 2); } diff --git a/kernel/syscall.c b/kernel/syscall.c index a60afd41..41e5639f 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -2359,34 +2359,85 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) return 0; } +SYSCALL_DECLARE(read) +{ + int fd = ihk_mc_syscall_arg0(ctx); + long rc; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct mckfd *fdp; + long irqstate; + + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for(fdp = proc->mckfd; fdp; fdp = fdp->next) + if(fdp->fd == fd) + break; + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + + if(fdp && fdp->read_cb){ +kprintf("read: found system fd %d\n", fd); + rc = fdp->read_cb(fdp, ctx); + } + else{ + rc = syscall_generic_forwarding(__NR_read, ctx); + } + return rc; +} + +SYSCALL_DECLARE(ioctl) +{ + int fd = ihk_mc_syscall_arg0(ctx); + long rc; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct mckfd *fdp; + long irqstate; + + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for(fdp = proc->mckfd; fdp; fdp = fdp->next) + if(fdp->fd == fd) + break; + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + + if(fdp && fdp->ioctl_cb){ +kprintf("ioctl: found system fd %d\n", fd); + rc = fdp->ioctl_cb(fdp, ctx); + } + else{ + rc = syscall_generic_forwarding(__NR_ioctl, ctx); + } + return rc; +} + SYSCALL_DECLARE(close) { int fd = ihk_mc_syscall_arg0(ctx); - int rc; + long rc; struct thread *thread = cpu_local_var(current); - struct sigfd *sfd; - struct sigfd *sb; - long irqstate; + struct process *proc = thread->proc; + struct mckfd *fdp; + struct mckfd *fdq; + long irqstate; - irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); - for(sfd = thread->sigcommon->sigfd, sb = NULL; sfd; sb = sfd, sfd = sfd->next) - if(sfd->fd == fd) + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for(fdp = proc->mckfd, fdq = NULL; fdp; fdq = fdp, fdp = fdp->next) + if(fdp->fd == fd) break; - if(sfd){ - struct syscall_request request IHK_DMA_ALIGN; - if(sb) - sb->next = sfd->next; + + if(fdp){ +kprintf("close: found system fd %d pid=%d\n", fd, proc->pid); + if(fdq) + fdq->next = fdp->next; else - thread->sigcommon->sigfd = sfd->next; - ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); - request.number = __NR_signalfd4; - request.args[0] = 1; - request.args[1] = sfd->fd; - kfree(sfd); - rc = do_syscall(&request, ihk_mc_get_processor_id(), 0); + proc->mckfd = fdp->next; + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + if(fdp->close_cb) + fdp->close_cb(fdp, ctx); + kfree(fdp); + rc = syscall_generic_forwarding(__NR_close, ctx); } else{ - ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); rc = syscall_generic_forwarding(__NR_close, ctx); } return rc; @@ -2487,7 +2538,8 @@ SYSCALL_DECLARE(signalfd4) { int fd = ihk_mc_syscall_arg0(ctx); struct thread *thread = cpu_local_var(current); - struct sigfd *sfd; + struct process *proc = thread->proc; + struct mckfd *sfd; long irqstate; sigset_t *maskp = (sigset_t *)ihk_mc_syscall_arg1(ctx);; __sigset_t mask; @@ -2501,10 +2553,9 @@ SYSCALL_DECLARE(signalfd4) if(flags & ~(SFD_NONBLOCK | SFD_CLOEXEC)) return -EINVAL; - irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); if(fd == -1){ struct syscall_request request IHK_DMA_ALIGN; - ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); + request.number = __NR_signalfd4; request.args[0] = 0; request.args[1] = flags; @@ -2512,25 +2563,73 @@ SYSCALL_DECLARE(signalfd4) if(fd < 0){ return fd; } - sfd = kmalloc(sizeof(struct sigfd), IHK_MC_AP_NOWAIT); + sfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT); if(!sfd) return -ENOMEM; sfd->fd = fd; - irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); - sfd->next = thread->sigcommon->sigfd; - thread->sigcommon->sigfd = sfd; + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + sfd->next = proc->mckfd; + proc->mckfd = sfd; } else{ - for(sfd = thread->sigcommon->sigfd; sfd; sfd = sfd->next) + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + for(sfd = proc->mckfd; sfd; sfd = sfd->next) if(sfd->fd == fd) break; if(!sfd){ - ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); return -EINVAL; } } - memcpy(&sfd->mask, &mask, sizeof mask); - ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); + memcpy(&sfd->data, &mask, sizeof mask); + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); + return sfd->fd; +} + +static long +perf_event_read(struct mckfd *sfd, ihk_mc_user_context_t *ctx) +{ + return 0; +} + +static int +perf_event_ioctl(struct mckfd *sfd, ihk_mc_user_context_t *ctx) +{ + return 0; +} + +static int +perf_event_close(struct mckfd *sfd, ihk_mc_user_context_t *ctx) +{ + return 0; +} + +SYSCALL_DECLARE(perf_event_open) +{ + struct syscall_request request IHK_DMA_ALIGN; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct mckfd *sfd; + int fd; + long irqstate; + + request.number = __NR_perf_event_open; + request.args[0] = 0; + fd = do_syscall(&request, ihk_mc_get_processor_id(), 0); + if(fd < 0){ + return fd; + } + sfd = kmalloc(sizeof(struct mckfd), IHK_MC_AP_NOWAIT); + if(!sfd) + return -ENOMEM; + sfd->fd = fd; + sfd->read_cb = perf_event_read; + sfd->ioctl_cb = perf_event_ioctl; + sfd->close_cb = perf_event_close; + irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + sfd->next = proc->mckfd; + proc->mckfd = sfd; + ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); return sfd->fd; } From a866192db7ce3d93ec84ff6fe8b2c802f6785dc2 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Wed, 10 Feb 2016 08:11:02 +0900 Subject: [PATCH 08/21] refactoring /proc --- executer/kernel/mcctrl/control.c | 38 +- executer/kernel/mcctrl/driver.c | 6 + executer/kernel/mcctrl/ikc.c | 19 +- executer/kernel/mcctrl/mcctrl.h | 14 + executer/kernel/mcctrl/procfs.c | 1027 ++++++++++++++++++------------ kernel/include/syscall.h | 3 + kernel/init.c | 1 - kernel/process.c | 5 +- kernel/procfs.c | 462 ++++---------- kernel/syscall.c | 2 - 10 files changed, 793 insertions(+), 784 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 434f16fb..f8362e66 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -72,9 +72,6 @@ int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long //static DECLARE_WAIT_QUEUE_HEAD(wq_prepare); //extern struct mcctrl_channel *channels; int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu); -extern int procfs_create_entry(void *os, int ref, int osnum, int pid, char *name, - int mode, void *opaque); -extern void procfs_delete_entry(void *os, int osnum, char *fname); static long mcexec_prepare_image(ihk_os_t os, struct program_load_desc * __user udesc) @@ -290,12 +287,15 @@ static void release_handler(ihk_os_t os, void *param) { struct handlerinfo *info = param; struct ikc_scd_packet isp; + int os_ind = ihk_host_os_get_index(os); memset(&isp, '\0', sizeof isp); isp.msg = SCD_MSG_CLEANUP_PROCESS; isp.pid = info->pid; mcctrl_ikc_send(os, 0, &isp); + if(os_ind >= 0) + delete_pid_entry(os_ind, info->pid); kfree(param); } @@ -861,7 +861,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) struct mckernel_exec_file *mcef_iter; int retval; int os_ind = ihk_host_os_get_index(os); - char *proc_name, *pathbuf, *fullpath; + char *pathbuf, *fullpath; if (os_ind < 0) { return EINVAL; @@ -872,12 +872,6 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) return ENOMEM; } - proc_name = kmalloc(PATH_MAX, GFP_TEMPORARY); - if (!proc_name) { - retval = ENOMEM; - goto out_error_free_path; - } - file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) { @@ -896,8 +890,6 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) goto out_put_file; } - snprintf(proc_name, 1024, "mcos%d/%d/exe", os_ind, current->tgid); - spin_lock_irq(&mckernel_exec_file_lock); /* Find previous file (if exists) and drop it */ list_for_each_entry(mcef_iter, &mckernel_exec_files, list) { @@ -906,9 +898,6 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) fput(mcef_iter->fp); list_del(&mcef_iter->list); kfree(mcef_iter); - /* Drop old /proc/self/exe */ - procfs_delete_entry(os, os_ind, proc_name); - dprintk("%d open_exec dropped previous executable \n", (int)current->tgid); break; } } @@ -920,15 +909,12 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename) list_add_tail(&mcef->list, &mckernel_exec_files); /* Create /proc/self/exe entry */ - if (procfs_create_entry(os, 0, os_ind, current->tgid, proc_name, - S_IFLNK, fullpath) != 0) { - printk("ERROR: could not create a procfs entry for %s.\n", proc_name); - } + add_pid_entry(os_ind, current->tgid); + proc_exe_link(os_ind, current->tgid, fullpath); spin_unlock(&mckernel_exec_file_lock); dprintk("%d open_exec and holding file: %s\n", (int)current->tgid, filename); - kfree(proc_name); kfree(pathbuf); return 0; @@ -937,9 +923,6 @@ out_put_file: fput(file); out_error_free: - kfree(proc_name); - -out_error_free_path: kfree(pathbuf); return -retval; } @@ -950,7 +933,6 @@ int mcexec_close_exec(ihk_os_t os) struct mckernel_exec_file *mcef = NULL; int found = 0; int os_ind = ihk_host_os_get_index(os); - char proc_name[1024]; if (os_ind < 0) { return EINVAL; @@ -969,14 +951,6 @@ int mcexec_close_exec(ihk_os_t os) } } - /* Remove /proc/self/exe and /proc/self directory - * TODO: instead of removing directory explicitly, detect in procfs_delete_entry() - * when a directory becomes empty and remove it automatically */ - snprintf(proc_name, 1024, "mcos%d/%d/exe", os_ind, current->tgid); - procfs_delete_entry(os, os_ind, proc_name); - snprintf(proc_name, 1024, "mcos%d/%d", os_ind, current->tgid); - procfs_delete_entry(os, os_ind, proc_name); - spin_unlock(&mckernel_exec_file_lock); return (found ? 0 : EINVAL); diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index 60a545bb..81e5f572 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -82,6 +82,12 @@ static struct ihk_os_user_call mcctrl_uc[OS_MAX_MINOR]; static ihk_os_t os[OS_MAX_MINOR]; +ihk_os_t +osnum_to_os(int n) +{ + return os[n]; +} + static int __init mcctrl_init(void) { int i; diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index 9a7a2a4d..fe0e0721 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -41,9 +41,6 @@ void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg); -void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg); -void procfs_delete(void *__os, int osnum, unsigned long arg); -void procfs_answer(unsigned long arg, int err); void sig_done(unsigned long arg, int err); static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, @@ -69,14 +66,6 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg); break; - case SCD_MSG_PROCFS_CREATE: - procfs_create(__os, pisp->ref, pisp->osnum, pisp->pid, pisp->arg); - break; - - case SCD_MSG_PROCFS_DELETE: - procfs_delete(__os, pisp->osnum, pisp->arg); - break; - case SCD_MSG_PROCFS_ANSWER: procfs_answer(pisp->arg, pisp->err); break; @@ -98,6 +87,14 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pisp->sysfs_arg1, pisp->sysfs_arg2); break; + case SCD_MSG_PROCFS_TID_CREATE: + add_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg); + break; + + case SCD_MSG_PROCFS_TID_DELETE: + delete_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg); + break; + default: printk(KERN_ERR "mcctrl:syscall_packet_handler:" "unknown message (%d.%d.%d.%d.%d.%#lx)\n", diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index abd0d1bc..0354065e 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -81,6 +81,8 @@ #define SCD_MSG_SYSFS_RESP_SETUP 0x41 /* #define SCD_MSG_SYSFS_REQ_CLEANUP 0x42 */ /* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */ +#define SCD_MSG_PROCFS_TID_CREATE 0x44 +#define SCD_MSG_PROCFS_TID_DELETE 0x45 #define DMA_PIN_SHIFT 21 @@ -226,6 +228,8 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp); +ihk_os_t osnum_to_os(int n); + /* syscall.c */ int init_peer_channel_registry(struct mcctrl_usrdata *ud); void destroy_peer_channel_registry(struct mcctrl_usrdata *ud); @@ -244,6 +248,7 @@ struct procfs_read { int ret; /* read bytes (answer) */ int status; /* non-zero if done (answer) */ int newcpu; /* migrated new cpu (answer) */ + int readwrite; /* 0:read, 1:write */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ }; @@ -253,6 +258,15 @@ struct procfs_file { char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ }; +void procfs_answer(unsigned int arg, int err); +void add_tid_entry(int osnum, int pid, int tid); +void add_pid_entry(int osnum, int pid); +void delete_tid_entry(int osnum, int pid, int tid); +void delete_pid_entry(int osnum, int pid); +void proc_exe_link(int osnum, int pid, const char *path); +void procfs_init(int osnum); +void procfs_exit(int osnum); + /* sysfs_files.c */ void setup_sysfs_files(ihk_os_t os); diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index c316a86a..16714b98 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -28,6 +28,35 @@ #define dprintk(...) #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) +typedef uid_t kuid_t; +typedef gid_t kgid_t; +#endif + +struct procfs_entry { + char *name; + mode_t mode; + const struct file_operations *fops; +}; + +#define NOD(NAME, MODE, FOP) { \ + .name = (NAME), \ + .mode = MODE, \ + .fops = FOP, \ +} +#define PROC_DIR(NAME, MODE) \ + NOD(NAME, (S_IFDIR|(MODE)), NULL) +#define PROC_REG(NAME, MODE, fops) \ + NOD(NAME, (S_IFREG|(MODE)), fops) +#define PROC_TERM \ + NOD(NULL, 0, NULL) + +static const struct procfs_entry tid_entry_stuff[]; +static const struct procfs_entry pid_entry_stuff[]; +static const struct procfs_entry base_entry_stuff[]; +static const struct file_operations mckernel_forward_ro; +static const struct file_operations mckernel_forward; + static DECLARE_WAIT_QUEUE_HEAD(procfsq); static ssize_t mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); @@ -39,11 +68,10 @@ struct procfs_list_entry { struct list_head list; struct proc_dir_entry *entry; struct procfs_list_entry *parent; - ihk_os_t os; + struct list_head children; int osnum; - int pid; - int cpu; - char fname[PROCFS_NAME_MAX]; + char *data; + char name[0]; }; /* @@ -52,324 +80,25 @@ struct procfs_list_entry { * always nearer to the list top than its parent node * file. */ - LIST_HEAD(procfs_file_list); static ihk_spinlock_t procfs_file_list_lock; -loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig) +static char * +getpath(struct procfs_list_entry *e, char *buf, int bufsize) { - switch (orig) { - case 0: - file->f_pos = offset; - break; - case 1: - file->f_pos += offset; - break; - default: - return -EINVAL; + char *w = buf + bufsize - 1; + + *w = '\0'; + for(;;){ + int l = strlen(e->name); + w -= l; + memcpy(w, e->name, l); + e = e->parent; + if(!e) + return w; + w--; + *w = '/'; } - return file->f_pos; -} - -static const struct file_operations mckernel_procfs_file_operations = { - .llseek = mckernel_procfs_lseek, - .read = mckernel_procfs_read, - .write = NULL, -}; - - -/** - * \brief Return specified procfs entry. - * - * \param p a name of the procfs file - * \param osnum os number - * \param mode if zero create a directory otherwise a file or link - * \param opaque additional context dependent information - * - * return value: NULL: Something wrong has occurred. - * otherwise: address of the proc_dir_entry structure of the procfs file - * - * p should not be NULL nor terminated by "/". - * - * We create a procfs entry if there is not already one. - * This process is recursive to the root of the procfs tree. - */ -/* - * XXX: Two or more entries which have same name can be created. - * - * get_procfs_list_entry() avoids creating an entry which has already been created. - * But, it allows creating an entry which is being created by another thread. - * - * This problem occurred when two requests which created files with a common - * ancestor directory which was not explicitly created were racing. - */ - -struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode, void *opaque, const struct cred *cred) -{ - char *r; - struct proc_dir_entry *pde = NULL; - struct procfs_list_entry *e, *ret = NULL, *parent = NULL; - char name[PROCFS_NAME_MAX]; - unsigned long irqflags; - - dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode); - irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); - list_for_each_entry(e, &procfs_file_list, list) { - if (e == NULL) { - kprintf("ERROR: The procfs_file_list has a null entry.\n"); - return NULL; - } - if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) { - /* We found the entry */ - ret = e; - break; - } - } - ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); - if (ret != NULL) { - return ret; - } - r = strrchr(p, '/'); - if (r != NULL) { - /* We have non-null parent dir. */ - strncpy(name, p, r - p); - name[r - p] = '\0'; - parent = get_procfs_list_entry(name, osnum, 0, NULL, cred); - if (parent == NULL) { - /* We counld not get a parent procfs entry. Give up.*/ - return NULL; - } - } - ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL); - if (ret == NULL) { - kprintf("ERROR: not enough memory to create PROCFS entry.\n"); - return NULL; - } - /* Fill the fname field of the entry */ - strncpy(ret->fname, p, PROCFS_NAME_MAX); - - if (r != NULL) { - strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1); - } else { - strncpy(name, p, PROCFS_NAME_MAX); - } - if (mode == 0) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - pde = proc_mkdir(name, parent ? parent->entry : NULL); -#else - pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret); -#endif - } else if (mode & S_IFLNK) { - pde = proc_symlink(name, parent->entry, (char *)opaque); - } else { -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - pde = create_proc_entry(name, mode, parent->entry); - if (pde) - pde->proc_fops = &mckernel_procfs_file_operations; -#else - pde = proc_create_data(name, mode, parent->entry, - &mckernel_procfs_file_operations, ret); -#endif - if(pde && cred) - proc_set_user(pde, cred->uid, cred->gid); - } - if (pde == NULL) { - kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p); - kfree(ret); - return NULL; - } -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - pde->data = ret; -#endif - ret->osnum = osnum; - ret->entry = pde; - ret->parent = parent; - - irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); - list_add(&(ret->list), &procfs_file_list); - ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); - - dprintk("get_procfs_list_entry: %s done\n", p); - return ret; -} - -/** - * \brief Create procfs create (internal, can be called directly from host Linux). - * - * \param os (opaque) os variable - * \param ref cpuid of the requesting mckernel process - * \param osnum osnum of the requesting mckernel process - * \param pid pid of the requesting mckernel process - * \param name path of the file - * \param mode mode of the file (e.g., dir, link, regular, etc.) - * \param opaque context dependent additional argument - */ - -int procfs_create_entry(void *os, int ref, int osnum, int pid, char *name, - int mode, void *opaque, const struct cred *cred) -{ - struct procfs_list_entry *e; - - e = get_procfs_list_entry(name, osnum, mode, opaque, cred); - if (e == NULL) { - printk("ERROR: could not create a procfs entry for %s.\n", name); - return EINVAL; - } - - e->os = os; - e->cpu = ref; - e->pid = pid; - - return 0; -} - -/** - * \brief Create a procfs entry. - * - * \param __os (opeque) os variable - * \param ref cpuid of the requesting mckernel process - * \param osnum osnum of the requesting mckernel process - * \param pid pid of the requesting mckernel process - * \param arg sent argument - */ - -void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg) -{ - ihk_device_t dev = ihk_os_to_dev(__os); - unsigned long parg; - struct procfs_file *f; - int mode; - char name[PROCFS_NAME_MAX]; - struct task_struct *task = NULL; - const struct cred *tcred = NULL; - - if(pid > 0){ - task = pid_task(find_vpid(pid), PIDTYPE_PID); - if(task){ - tcred = __task_cred(task); - } - } - - dprintk("procfs_create: osnum: %d, cpu: %d, pid: %d\n", osnum, ref, pid); - - parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file)); - f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0); - - dprintk("name: %s mode: %o\n", f->fname, f->mode); - - strncpy(name, f->fname, PROCFS_NAME_MAX); - mode = f->mode; - - if (name[PROCFS_NAME_MAX - 1] != '\0') { - printk("ERROR: procfs_creat: file name not properly terminated.\n"); - goto quit; - } - - if (procfs_create_entry(__os, ref, osnum, pid, name, mode, NULL, tcred) != 0) { - printk("ERROR: could not create a procfs entry for %s.\n", name); - goto quit; - } - -quit: - f->status = 1; /* Now the peer can free the data. */ - ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file)); - ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file)); - dprintk("procfs_create: done\n"); -} - -/** - * \brief Delete a procfs entry and all of its subtree (internal). - * - * \param __os (opaque) os variable - * \param osnum os number - * - * NOTE: procfs_file_list_lock has to be held here. - */ - -void __procfs_delete_entry_recursively(struct procfs_list_entry *e) -{ - struct procfs_list_entry *le; - struct procfs_list_entry *parent = NULL; - char name[PROCFS_NAME_MAX]; - char *r; - - /* See if there are any children of this entry */ -retry: - list_for_each_entry(le, &procfs_file_list, list) { - if (le->parent != e) { - continue; - } - - __procfs_delete_entry_recursively(le); - /* List may have changed... */ - goto retry; - } - - /* No more children, remove entry */ - list_del(&e->list); -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - e->entry->read_proc = NULL; - e->entry->data = NULL; -#endif - parent = e->parent; - r = strrchr(e->fname, '/'); - if (r == NULL) { - strncpy(name, e->fname, PROCFS_NAME_MAX); - } else { - strncpy(name, r + 1, PROCFS_NAME_MAX); - } - dprintk("found and removed %s from the list.\n", name); - remove_proc_entry(name, parent->entry); - kfree(e); -} - -/** - * \brief Delete a procfs entry (internal). - * - * \param __os (opaque) os variable - * \param osnum os number - */ - -void procfs_delete_entry(void *os, int osnum, char *fname) -{ - struct procfs_list_entry *e; - unsigned long irqflags; - - irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); - list_for_each_entry(e, &procfs_file_list, list) { - if ((strncmp(e->fname, fname, PROCFS_NAME_MAX) == 0) && (e->osnum == osnum)) { - __procfs_delete_entry_recursively(e); - break; - } - } - ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); -} - -/** - * \brief Delete a procfs entry (internal, can be called directly from host Linux). - * - * \param __os (opaque) os variable - * \param osnum os number - * \param arg sent argument - */ - -void procfs_delete(void *__os, int osnum, unsigned long arg) -{ - struct procfs_file *f; - ihk_device_t dev = ihk_os_to_dev(__os); - unsigned long parg; - - dprintk("procfs_delete: \n"); - parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file)); - f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0); - dprintk("fname: %s.\n", f->fname); - - procfs_delete_entry(__os, osnum, f->fname); - - f->status = 1; /* Now the peer can free the data. */ - ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file)); - ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file)); - dprintk("procfs_delete: done\n"); } /** @@ -378,13 +107,390 @@ void procfs_delete(void *__os, int osnum, unsigned long arg) * \param arg sent argument * \param err error info (redundant) */ - -void procfs_answer(unsigned int arg, int err) +void +procfs_answer(unsigned int arg, int err) { dprintk("procfs: received SCD_MSG_PROCFS_ANSWER message(err = %d).\n", err); wake_up_interruptible(&procfsq); } +static struct procfs_list_entry * +find_procfs_entry(struct procfs_list_entry *parent, const char *name) +{ + struct list_head *list; + struct procfs_list_entry *e; + + if(parent == NULL) + list = &procfs_file_list; + else + list = &parent->children; + + list_for_each_entry(e, list, list) { + if(!strcmp(e->name, name)) + return e; + } + + return NULL; +} + +static void +delete_procfs_entries(struct procfs_list_entry *top) +{ + struct procfs_list_entry *e; + struct procfs_list_entry *n; + + list_del(&top->list); + + list_for_each_entry_safe(e, n, &top->children, list) { + delete_procfs_entries(e); + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + e->entry->read_proc = NULL; + e->entry->data = NULL; +#endif + remove_proc_entry(top->name, top->parent? top->parent->entry: NULL); + if(top->data) + kfree(top->data); + kfree(top); +} + +static struct procfs_list_entry * +add_procfs_entry(struct procfs_list_entry *parent, const char *name, int mode, + kuid_t uid, kgid_t gid, const void *opaque) +{ + struct procfs_list_entry *e = find_procfs_entry(parent, name); + struct proc_dir_entry *pde; + struct proc_dir_entry *parent_pde = NULL; + int f_mode = mode & 0777; + + if(e) + delete_procfs_entries(e); + + e = kmalloc(sizeof(struct procfs_list_entry) + strlen(name) + 1, + GFP_KERNEL); + if(!e){ + kprintf("ERROR: not enough memory to create PROCFS entry.\n"); + return NULL; + } + memset(e, '\0', sizeof(struct procfs_list_entry)); + INIT_LIST_HEAD(&e->children); + strcpy(e->name, name); + + if(parent) + parent_pde = parent->entry; + + if (mode & S_IFDIR) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + pde = proc_mkdir(name, parent_pde); +#else + pde = proc_mkdir_data(name, f_mode, parent_pde, e); +#endif + } + else if ((mode & S_IFLNK) == S_IFLNK) { + pde = proc_symlink(name, parent_pde, (char *)opaque); + } + else { + const struct file_operations *fop; + + if(opaque) + fop = (const struct file_operations *)opaque; + else if(mode & S_IWUSR) + fop = &mckernel_forward; + else + fop = &mckernel_forward_ro; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + pde = create_proc_entry(name, f_mode, parent_pde); + if(pde) + pde->proc_fops = fop; +#else + pde = proc_create_data(name, f_mode, parent_pde, fop, e); + if(pde) + proc_set_user(pde, uid, gid); +#endif + } + if(!pde){ + kprintf("ERROR: cannot create a PROCFS entry for %s.\n", name); + kfree(e); + return NULL; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + pde->uid = uid; + pde->gid = gid; + pde->data = e; +#endif + + if(parent) + e->osnum = parent->osnum; + e->entry = pde; + e->parent = parent; + list_add(&(e->list), parent? &(parent->children): &procfs_file_list); + + return e; +} + +static void +add_procfs_entries(struct procfs_list_entry *parent, + const struct procfs_entry *entries, kuid_t uid, kgid_t gid) +{ + const struct procfs_entry *p; + + for(p = entries; p->name; p++){ + add_procfs_entry(parent, p->name, p->mode, uid, gid, p->fops); + } +} + +static const struct cred * +get_pid_cred(int pid) +{ + struct task_struct *task = NULL; + + if(pid > 0){ + task = pid_task(find_vpid(pid), PIDTYPE_PID); + if(task){ + return __task_cred(task); + } + } + return NULL; +} + +static struct procfs_list_entry * +find_base_entry(int osnum) +{ + char name[12]; + + sprintf(name, "mcos%d", osnum); + return find_procfs_entry(NULL, name); +} + +static struct procfs_list_entry * +find_pid_entry(int osnum, int pid) +{ + struct procfs_list_entry *e; + char name[12]; + + if(!(e = find_base_entry(osnum))) + return NULL; + sprintf(name, "%d", pid); + return find_procfs_entry(e, name); +} + +static struct procfs_list_entry * +find_tid_entry(int osnum, int pid, int tid) +{ + struct procfs_list_entry *e; + char name[12]; + + if(!(e = find_pid_entry(osnum, pid))) + return NULL; + if(!(e = find_procfs_entry(e, "task"))) + return NULL; + sprintf(name, "%d", tid); + return find_procfs_entry(e, name); +} + +static struct procfs_list_entry * +get_base_entry(int osnum) +{ + struct procfs_list_entry *e; + char name[12]; + kuid_t uid = KUIDT_INIT(0); + kgid_t gid = KGIDT_INIT(0); + + sprintf(name, "mcos%d", osnum); + e = find_procfs_entry(NULL, name); + if(!e){ + e = add_procfs_entry(NULL, name, S_IFDIR | 0555, + uid, gid, NULL); + e->osnum = osnum; + } + return e; +} + +static struct procfs_list_entry * +get_pid_entry(int osnum, int pid) +{ + struct procfs_list_entry *parent; + struct procfs_list_entry *e; + char name[12]; + kuid_t uid = KUIDT_INIT(0); + kgid_t gid = KGIDT_INIT(0); + + sprintf(name, "mcos%d", osnum); + if(!(parent = find_procfs_entry(NULL, name))) + return NULL; + sprintf(name, "%d", pid); + e = find_procfs_entry(parent, name); + if(!e) + e = add_procfs_entry(parent, name, S_IFDIR | 0555, + uid, gid, NULL); + return e; +} + +static struct procfs_list_entry * +get_tid_entry(int osnum, int pid, int tid) +{ + struct procfs_list_entry *parent; + struct procfs_list_entry *e; + char name[12]; + kuid_t uid = KUIDT_INIT(0); + kgid_t gid = KGIDT_INIT(0); + + sprintf(name, "mcos%d", osnum); + if(!(parent = find_procfs_entry(NULL, name))) + return NULL; + sprintf(name, "%d", pid); + if(!(parent = find_procfs_entry(parent, name))) + return NULL; + if(!(parent = find_procfs_entry(parent, "task"))) + return NULL; + sprintf(name, "%d", tid); + e = find_procfs_entry(parent, name); + if(!e) + e = add_procfs_entry(parent, name, S_IFDIR | 0555, + uid, gid, NULL); + return e; +} + +static void +_add_tid_entry(int osnum, int pid, int tid, const struct cred *cred) +{ + struct procfs_list_entry *parent; + struct procfs_list_entry *exe; + + parent = get_tid_entry(osnum, pid, tid); + if(parent){ + add_procfs_entries(parent, tid_entry_stuff, + cred->uid, cred->gid); + exe = find_procfs_entry(parent->parent->parent, "exe"); + if(exe){ + add_procfs_entry(parent, "exe", S_IFLNK | 0777, + cred->uid, cred->gid, exe->data); + } + + } +} + +void +add_tid_entry(int osnum, int pid, int tid) +{ + unsigned long irqflag; + const struct cred *cred = get_pid_cred(pid); + + if(!cred) + return; + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + _add_tid_entry(osnum, pid, tid, cred); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + +void +add_pid_entry(int osnum, int pid) +{ + struct procfs_list_entry *parent; + unsigned long irqflag; + const struct cred *cred = get_pid_cred(pid); + + if(!cred) + return; + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + parent = get_pid_entry(osnum, pid); + add_procfs_entries(parent, pid_entry_stuff, cred->uid, cred->gid); + _add_tid_entry(osnum, pid, pid, cred); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + +void +delete_tid_entry(int osnum, int pid, int tid) +{ + unsigned long irqflag; + struct procfs_list_entry *e; + + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + e = find_tid_entry(osnum, pid, tid); + if(e) + delete_procfs_entries(e); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + +void +delete_pid_entry(int osnum, int pid) +{ + unsigned long irqflag; + struct procfs_list_entry *e; + + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + e = find_pid_entry(osnum, pid); + if(e) + delete_procfs_entries(e); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + +void +proc_exe_link(int osnum, int pid, const char *path) +{ + struct procfs_list_entry *parent; + unsigned long irqflag; + kuid_t uid = KUIDT_INIT(0); + kgid_t gid = KGIDT_INIT(0); + + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + parent = find_pid_entry(osnum, pid); + if(parent){ + struct procfs_list_entry *task; + struct procfs_list_entry *e; + + e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid, + path); + e->data = kmalloc(strlen(path) + 1, GFP_KERNEL); + strcpy(e->data, path); + task = find_procfs_entry(parent, "task"); + list_for_each_entry(parent, &task->children, list) { + add_procfs_entry(parent, "exe", S_IFLNK | 0777, + uid, gid, path); + } + } + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + +/** + * \brief Initialization for procfs + * + * \param osnum os number + */ +void +procfs_init(int osnum) +{ + struct procfs_list_entry *parent; + unsigned long irqflag; + kuid_t uid = KUIDT_INIT(0); + kgid_t gid = KGIDT_INIT(0); + + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + parent = get_base_entry(osnum); + add_procfs_entries(parent, base_entry_stuff, uid, gid); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + +/** + * \brief Finalization for procfs + * + * \param osnum os number + */ +void +procfs_exit(int osnum) +{ + unsigned long irqflag; + struct procfs_list_entry *e; + + irqflag = ihk_ikc_spinlock_lock(&procfs_file_list_lock); + e = find_base_entry(osnum); + if(e) + delete_procfs_entries(e); + ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflag); +} + /** * \brief The callback funciton for McKernel procfs * @@ -396,11 +502,11 @@ mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct inode * inode = file->f_path.dentry->d_inode; - char *kern_buffer; + char *kern_buffer = NULL; int order = 0; - volatile struct procfs_read *r; + volatile struct procfs_read *r = NULL; struct ikc_scd_packet isp; - int ret, retrycount = 0; + int ret; unsigned long pbuf; unsigned long count = nbytes; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) @@ -410,9 +516,12 @@ mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, struct procfs_list_entry *e = PDE_DATA(inode); #endif loff_t offset = *ppos; + char pathbuf[PROCFS_NAME_MAX]; + char *path; + path = getpath(e, pathbuf, 256); dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n", - e->fname, offset, count); + path, offset, count); if (count <= 0 || offset < 0) { return 0; @@ -437,23 +546,22 @@ mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL); if (r == NULL) { - return -ENOMEM; + ret = -ENOMEM; + goto out; } -retry: - dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu); - r->pbuf = pbuf; r->eof = 0; r->ret = -EIO; /* default */ r->status = 0; r->offset = offset; r->count = count; - strncpy((char *)r->fname, e->fname, PROCFS_NAME_MAX); + r->readwrite = 0; + strncpy((char *)r->fname, path, PROCFS_NAME_MAX); isp.msg = SCD_MSG_PROCFS_REQUEST; - isp.ref = e->cpu; + isp.ref = 0; isp.arg = virt_to_phys(r); - ret = mcctrl_ikc_send(e->os, e->cpu, &isp); + ret = mcctrl_ikc_send(osnum_to_os(e->osnum), 0, &isp); if (ret < 0) { goto out; /* error */ @@ -471,18 +579,6 @@ retry: /* Wake up and check the result. */ dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof); - if ((r->ret == 0) && (r->eof != 1)) { - /* A miss-hit caused by migration has occurred. - * We simply retry the query with a new CPU. - */ - if (retrycount++ > 10) { - kprintf("ERROR: mckernel_procfs_read: excessive retry.\n"); - goto out; - } - e->cpu = r->newcpu; - dprintk("retry\n"); - goto retry; - } if (r->ret > 0) { if (copy_to_user(buf, kern_buffer, r->ret)) { @@ -496,75 +592,198 @@ retry: ret = r->ret; out: - free_pages((uintptr_t)kern_buffer, order); - kfree((void *)r); + if(kern_buffer) + free_pages((uintptr_t)kern_buffer, order); + if(r) + kfree((void *)r); return ret; } -/** - * \brief Initialization for procfs - * - * \param osnum os number - */ - -void procfs_init(int osnum) { -} - -/** - * \brief Finalization for procfs - * - * \param osnum os number - */ - -void procfs_exit(int osnum) { - char buf[20], *r; - int error; - mm_segment_t old_fs = get_fs(); - struct kstat stat; - struct procfs_list_entry *parent; - struct procfs_list_entry *e, *temp = NULL; - unsigned long irqflags; - - dprintk("remove remaining mckernel procfs files.\n"); - - irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock); - list_for_each_entry_safe(e, temp, &procfs_file_list, list) { - if (e->osnum == osnum) { - dprintk("found entry for %s.\n", e->fname); - list_del(&e->list); +static ssize_t +mckernel_procfs_write(struct file *file, const char __user *buf, size_t nbytes, + loff_t *ppos) +{ + struct inode * inode = file->f_path.dentry->d_inode; + char *kern_buffer = NULL; + int order = 0; + volatile struct procfs_read *r = NULL; + struct ikc_scd_packet isp; + int ret; + unsigned long pbuf; + unsigned long count = nbytes; #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - e->entry->read_proc = NULL; - e->entry->data = NULL; -#endif - parent = e->parent; - r = strrchr(e->fname, '/'); - if (r == NULL) { - r = e->fname; - } else { - r += 1; - } - if (parent) { - remove_proc_entry(r, parent->entry); - } - dprintk("free the entry\n"); - kfree(e); - } - dprintk("iterate it.\n"); + struct proc_dir_entry *dp = PDE(inode); + struct procfs_list_entry *e = dp->data; +#else + struct procfs_list_entry *e = PDE_DATA(inode); +#endif + loff_t offset = *ppos; + char pathbuf[PROCFS_NAME_MAX]; + char *path; + + path = getpath(e, pathbuf, 256); + dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n", + path, offset, count); + + if (count <= 0 || offset < 0) { + return 0; } - ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags); - - sprintf(buf, "/proc/mcos%d", osnum); - - set_fs(KERNEL_DS); - error = vfs_stat (buf, &stat); - set_fs(old_fs); - if (error != 0) { - return; + + while ((1 << order) < count) ++order; + if (order > 12) { + order -= 12; + } + else { + order = 1; } - printk("procfs_exit: We have to remove unexpectedly remaining %s.\n", buf); + /* NOTE: we need physically contigous memory to pass through IKC */ + kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order); + if (!kern_buffer) { + printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n"); + return -ENOMEM; + } + if (copy_from_user(kern_buffer, buf, nbytes)) { + ret = -EFAULT; + goto out; + } + + pbuf = virt_to_phys(kern_buffer); - /* remove remnant of previous mcos%d */ - remove_proc_entry(buf + 6, NULL); + r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL); + if (r == NULL) { + ret = -ENOMEM; + goto out; + } + dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu); + + r->pbuf = pbuf; + r->eof = 0; + r->ret = -EIO; /* default */ + r->status = 0; + r->offset = offset; + r->count = count; + r->readwrite = 1; + strncpy((char *)r->fname, path, PROCFS_NAME_MAX); + isp.msg = SCD_MSG_PROCFS_REQUEST; + isp.ref = 0; + isp.arg = virt_to_phys(r); + + ret = mcctrl_ikc_send(osnum_to_os(e->osnum), 0, &isp); + + if (ret < 0) { + goto out; /* error */ + } + + /* Wait for a reply. */ + ret = -EIO; /* default exit code */ + dprintk("now wait for a relpy\n"); + + /* Wait for the status field of the procfs_read structure set ready. */ + if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) { + kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n"); + goto out; + } + + /* Wake up and check the result. */ + dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof); + + if (r->ret > 0) { + *ppos += r->ret; + } + ret = r->ret; + +out: + if(kern_buffer) + free_pages((uintptr_t)kern_buffer, order); + if(r) + kfree((void *)r); + + return ret; } + +static loff_t +mckernel_procfs_lseek(struct file *file, loff_t offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + break; + case 1: + file->f_pos += offset; + break; + default: + return -EINVAL; + } + return file->f_pos; +} + +static const struct file_operations mckernel_forward_ro = { + .llseek = mckernel_procfs_lseek, + .read = mckernel_procfs_read, + .write = NULL, +}; + +static const struct file_operations mckernel_forward = { + .llseek = mckernel_procfs_lseek, + .read = mckernel_procfs_read, + .write = mckernel_procfs_write, +}; + +static const struct procfs_entry tid_entry_stuff[] = { + PROC_REG("auxv", S_IRUSR, NULL), + PROC_REG("clear_refs", S_IWUSR, NULL), + PROC_REG("cmdline", S_IRUGO, NULL), + PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), + PROC_REG("environ", S_IRUSR, NULL), +// PROC_LNK("exe", mckernel_readlink), + PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), + PROC_REG("maps", S_IRUGO, NULL), + PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), + PROC_REG("pagemap", S_IRUGO, NULL), + PROC_REG("smaps", S_IRUGO, NULL), + PROC_REG("stat", S_IRUGO, NULL), + PROC_REG("statm", S_IRUGO, NULL), + PROC_REG("status", S_IRUGO, NULL), + PROC_REG("syscall", S_IRUGO, NULL), + PROC_REG("wchan", S_IRUGO, NULL), + PROC_TERM +}; + +static const struct procfs_entry pid_entry_stuff[] = { + PROC_REG("auxv", S_IRUSR, NULL), + PROC_REG("clear_refs", S_IWUSR, NULL), + PROC_REG("cmdline", S_IRUGO, NULL), + PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), + PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL), + PROC_REG("environ", S_IRUSR, NULL), +// PROC_LNK("exe", mckernel_readlink), + PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), + PROC_REG("maps", S_IRUGO, NULL), + PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), + PROC_REG("pagemap", S_IRUGO, NULL), + PROC_REG("smaps", S_IRUGO, NULL), + PROC_REG("stat", S_IRUGO, NULL), + PROC_REG("statm", S_IRUGO, NULL), + PROC_REG("status", S_IRUGO, NULL), + PROC_REG("syscall", S_IRUGO, NULL), + PROC_DIR("task", S_IRUGO|S_IXUGO), + PROC_REG("wchan", S_IRUGO, NULL), + PROC_TERM +}; + +static const struct procfs_entry base_entry_stuff[] = { + PROC_REG("cmdline", S_IRUGO, NULL), + PROC_REG("cpuinfo", S_IRUGO, NULL), + PROC_REG("meminfo", S_IRUGO, NULL), + PROC_REG("pagetypeinfo",S_IRUGO, NULL), + PROC_REG("softirq", S_IRUGO, NULL), + PROC_REG("stat", S_IRUGO, NULL), + PROC_REG("uptime", S_IRUGO, NULL), + PROC_REG("version", S_IRUGO, NULL), + PROC_REG("vmallocinfo",S_IRUSR, NULL), + PROC_REG("vmstat", S_IRUGO, NULL), + PROC_REG("zoneinfo", S_IRUGO, NULL), + PROC_TERM +}; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 5c3822d2..e7c86079 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -66,6 +66,8 @@ #define SCD_MSG_SYSFS_RESP_SETUP 0x41 /* #define SCD_MSG_SYSFS_REQ_CLEANUP 0x42 */ /* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */ +#define SCD_MSG_PROCFS_TID_CREATE 0x44 +#define SCD_MSG_PROCFS_TID_DELETE 0x45 #define ARCH_SET_GS 0x1001 #define ARCH_SET_FS 0x1002 @@ -314,6 +316,7 @@ struct procfs_read { int ret; /* read bytes (answer) */ int status; /* non-zero if done (answer) */ int newcpu; /* migrated new cpu (answer) */ + int readwrite; /* 0:read, 1:write */ char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ }; diff --git a/kernel/init.c b/kernel/init.c index 1e651c0b..8e09d4fe 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -283,7 +283,6 @@ static void post_init(void) zero_tsc(); ap_start(); - create_os_procfs_files(); sysfs_init(); populate_sysfs(); } diff --git a/kernel/process.c b/kernel/process.c index 3b0e0a07..49a48f08 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -63,6 +63,8 @@ extern int num_processors; extern ihk_spinlock_t cpuid_head_lock; int ptrace_detach(int pid, int data); extern unsigned long do_kill(struct thread *, int pid, int tid, int sig, struct siginfo *info, int ptracecont); +extern void procfs_create_thread(struct thread *); +extern void procfs_delete_thread(struct thread *); struct list_head resource_set_list; mcs_rwlock_lock_t resource_set_lock; @@ -2166,6 +2168,7 @@ void release_thread(struct thread *thread) vm = thread->vm; + procfs_delete_thread(thread); destroy_thread(thread); release_process_vm(vm); @@ -2753,7 +2756,7 @@ void runq_add_thread(struct thread *thread, int cpu_id) __runq_add_thread(thread, cpu_id); ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - create_proc_procfs_files(thread->proc->pid, cpu_id); + procfs_create_thread(thread); /* Kick scheduler */ if (cpu_id != ihk_mc_get_processor_id()) diff --git a/kernel/procfs.c b/kernel/procfs.c index 4cc6b2da..525d0cf1 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -38,212 +38,37 @@ extern int sscanf(const char * buf, const char * fmt, ...); extern int osnum; -void create_proc_procfs_files(int pid, int cpuid); -void delete_proc_procfs_files(int pid); -void create_os_procfs_files(void); -void delete_os_procfs_files(void); - -static void create_proc_procfs_file(int pid, char *fname, int mode, int cpuid); -static void delete_proc_procfs_file(int pid, char *fname); -static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, int cpuid); - int copy_from_user(void *dst, const void *src, size_t siz); int copy_to_user(void *dst, const void *src, size_t siz); -/** - * \brief Create all procfs files for process. - * - * \param pid pid of the process - * \param cpuid cpuid of the process - */ - -void create_proc_procfs_files(int pid, int cpuid) -{ - char fname[PROCFS_NAME_MAX]; - - dprintf("create procfs files:\n"); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid); - create_proc_procfs_file(pid, fname, 0400, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid); - create_proc_procfs_file(pid, fname, 0444, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid); - create_proc_procfs_file(pid, fname, 0400, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid); - create_proc_procfs_file(pid, fname, 0444, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid); - create_proc_procfs_file(pid, fname, 0444, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid); - create_proc_procfs_file(pid, fname, 0444, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid); - create_proc_procfs_file(pid, fname, 0400, cpuid); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/stat", osnum, pid, pid); - create_proc_procfs_file(pid, fname, 0444, cpuid); - - dprintf("create procfs files: done\n"); -} - -/** - * \brief Create a procfs file for process. - * - * \param pid pid of the process - * \param fname file name of the procfs file - * \param mode file mode - * \param cpuid cpuid of the process - */ - -static void create_proc_procfs_file(int pid, char *fname, int mode, int cpuid) -{ - dprintf("create procfs file: %s, mode: %o, cpuid: %d\n", fname, mode, cpuid); - operate_proc_procfs_file(pid, fname, SCD_MSG_PROCFS_CREATE, mode, cpuid); -} - -/** - * \brief Delete all procfs files for process. - * - * \param pid pid of the process - */ - -void delete_proc_procfs_files(int pid) -{ - char fname[PROCFS_NAME_MAX]; - - dprintf("delete procfs files for pid %d.\n", pid); - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/stat", osnum, pid, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d", osnum, pid, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task", osnum, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid); - delete_proc_procfs_file(pid, fname); - - snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid); - delete_proc_procfs_file(pid, fname); - - /* NOTE: Directory is removed on the host when mcexec drops the executable */ - dprintf("delete procfs files for pid %d: done\n", pid); -} - -/** - * \brief Delete a procfs file for process. - * - * \param pid pid of the process - * \param fname file name of the procfs file - */ - -static void delete_proc_procfs_file(int pid, char *fname) -{ - dprintf("delete procfs file: %s\n", fname); - operate_proc_procfs_file(pid, fname, SCD_MSG_PROCFS_DELETE, 0, 0); - dprintf("delete procfs file: %s done\n", fname); -} - -/** - * \brief create a procfs file for this operating system - * \param fname relative path name from "host:/proc". - * \param mode permissions of the file to be created - * - * Though operate_proc_procfs_file() is intended to create a process - * specific file, it is reused to create a OS specific file by - * specifying -1 as the pid parameter. - */ -static void create_os_procfs_file(char *fname, int mode) -{ - const pid_t pid = -1; - const int msg = SCD_MSG_PROCFS_CREATE; - const int cpuid = ihk_mc_get_processor_id(); /* i.e. BSP */ - - operate_proc_procfs_file(pid, fname, msg, mode, cpuid); - return; -} - -/** - * \brief create all procfs files for this operating system - */ -void create_os_procfs_files(void) -{ - char *fname = NULL; - size_t n; - - fname = kmalloc(PROCFS_NAME_MAX, IHK_MC_AP_CRITICAL); - - n = snprintf(fname, PROCFS_NAME_MAX, "mcos%d/stat", osnum); - if (n >= PROCFS_NAME_MAX) panic("/proc/stat"); - create_os_procfs_file(fname, 0444); - - return; -} - -/** - * \brief Create/delete a procfs file for process. - * - * \param pid pid of the process - * \param fname file name of the procfs file - * \param msg message (create/delete) - * \param mode file mode - * \param cpuid cpuid of the process - */ - -static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, int cpuid) +static void +procfs_thread_ctl(struct thread *thread, int msg) { struct ihk_ikc_channel_desc *syscall_channel; - struct ikc_scd_packet pckt; - struct procfs_file *f; - int ret; + struct ikc_scd_packet packet; syscall_channel = cpu_local_var(syscall_channel); + memset(&packet, '\0', sizeof packet); + packet.arg = thread->tid; + packet.msg = msg; + packet.osnum = osnum; + packet.ref = thread->cpu_id; + packet.pid = thread->proc->pid; + packet.err = 0; - f = kmalloc(sizeof(struct procfs_file), IHK_MC_AP_NOWAIT); - if (!f) { - kprintf("ERROR: not enough memory for dealing procfs file %s!", - fname); - return; - } - f->status = 0; - f->mode = mode; - strncpy(f->fname, fname, PROCFS_NAME_MAX); - pckt.arg = virt_to_phys(f); - pckt.msg = msg; - pckt.osnum = osnum; - pckt.ref = cpuid; - pckt.pid = pid; - pckt.err = 0; + ihk_ikc_send(syscall_channel, &packet, 0); +} - ret = ihk_ikc_send(syscall_channel, &pckt, 0); - if (ret < 0) { - kprintf("ERROR: sending IKC msg, ret: %d\n", ret); - } +void +procfs_create_thread(struct thread *thread) +{ + procfs_thread_ctl(thread, SCD_MSG_PROCFS_TID_CREATE); +} - while (f->status != 1) { - cpu_pause(); - } - kfree(f); +void +procfs_delete_thread(struct thread *thread) +{ + procfs_thread_ctl(thread, SCD_MSG_PROCFS_TID_DELETE); } /** @@ -251,12 +76,13 @@ static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, in * * \param rarg returned argument */ - -void process_procfs_request(unsigned long rarg) +void +process_procfs_request(unsigned long rarg) { unsigned long parg, pbuf; - struct thread *thread = cpu_local_var(current); - struct process *proc = thread->proc; + struct thread *thread = NULL; + struct process *proc = NULL; + struct process_vm *vm = NULL; struct procfs_read *r; struct ikc_scd_packet packet; int rosnum, ret, pid, tid, ans = -EIO, eof = 0; @@ -266,7 +92,8 @@ void process_procfs_request(unsigned long rarg) unsigned long offset; int count; int npages; - int is_current = 1; /* is 'proc' same as 'current'? */ + int readwrite = 0; + dprintf("process_procfs_request: invoked.\n"); @@ -296,6 +123,7 @@ void process_procfs_request(unsigned long rarg) goto bufunavail; } + readwrite = r->readwrite; count = r->count; offset = r->offset; dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count); @@ -334,32 +162,38 @@ void process_procfs_request(unsigned long rarg) */ ret = sscanf(p, "%d/", &pid); if (ret == 1) { - if (pid != cpu_local_var(current)->proc->pid) { - /* We are not located in the proper cpu for some reason. */ - - dprintf("mismatched pid. We are %d, but requested pid is %d.\n", - pid, cpu_local_var(current)->pid); - tid = pid; /* main thread */ - thread = find_thread(pid, tid, &lock); - if (!thread) { - dprintf("We cannot find the proper cpu for requested pid.\n"); - goto end; - } - else if (thread->cpu_id != ihk_mc_get_processor_id()) { - /* The target process has gone by migration. */ - r->newcpu = thread->cpu_id; - dprintf("expected cpu id is %d.\n", thread->cpu_id); - thread_unlock(thread, &lock); - ans = 0; - goto end; - } - else { - thread_unlock(thread, &lock); - /* 'proc' is not 'current' */ - is_current = 0; - } - proc = thread->proc; + proc = find_process(pid, &lock); + if(proc == NULL){ + kprintf("process_procfs_request: no such pid %d\n", pid); + goto end; } + p = strchr(p, '/') + 1; + ret = sscanf(p, "task/%d/", &tid); + if(ret == 1){ + struct mcs_rwlock_node tlock; + mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &tlock); + list_for_each_entry(thread, &proc->threads_list, + siblings_list) { + if(thread->tid == tid) + break; + } + if(thread == NULL){ + mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, + &tlock); + process_unlock(proc, &lock); + kprintf("process_procfs_request: no such tid %d-%d\n", pid, tid); + goto end; + } + hold_thread(thread); + mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &tlock); + p = strchr(p, '/') + 1; + p = strchr(p, '/') + 1; + } + hold_process(proc); + vm = proc->vm; + if(vm) + hold_process_vm(vm); + process_unlock(proc, &lock); } else if (!strcmp(p, "stat")) { /* "/proc/stat" */ extern int num_processors; /* kernel/ap.c */ @@ -390,10 +224,9 @@ void process_procfs_request(unsigned long rarg) goto end; } else { + kprintf("unsupported procfs entry: %s\n", p); goto end; } - dprintf("matched PID: %d.\n", pid); - p = strchr(p, '/') + 1; /* * mcos%d/PID/mem @@ -403,9 +236,8 @@ void process_procfs_request(unsigned long rarg) */ if (strcmp(p, "mem") == 0) { struct vm_range *range; - struct process_vm *vm = proc->vm; - if (!is_current) { + if (proc != cpu_local_var(current)->proc) { uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER; unsigned long offset = r->offset; unsigned long left = r->count; @@ -423,8 +255,8 @@ void process_procfs_request(unsigned long rarg) if(size > left) size = left; - ret = page_fault_process_vm(proc->vm, - (void *)offset, reason); + ret = page_fault_process_vm(vm, (void *)offset, + reason); if(ret){ if(ans == 0) ans = -EIO; @@ -479,7 +311,6 @@ void process_procfs_request(unsigned long rarg) */ if (strcmp(p, "maps") == 0) { struct vm_range *range; - struct process_vm *vm = proc->vm; int left = r->count - 1; /* extra 1 for terminating NULL */ int written = 0; char *_buf = buf; @@ -537,7 +368,6 @@ void process_procfs_request(unsigned long rarg) * mcos%d/PID/pagemap */ if (strcmp(p, "pagemap") == 0) { - struct process_vm *vm = proc->vm; uint64_t *_buf = (uint64_t *)buf; uint64_t start, end; @@ -673,112 +503,72 @@ void process_procfs_request(unsigned long rarg) * The offset is treated as the beginning of the virtual address area * of the process. The count is the length of the area. */ - tid = pid; - ret = sscanf(p, "task/%d/", &tid); - if (ret == 1) { - p = strchr(p, '/') + 1; - p = strchr(p, '/') + 1; - if (!strcmp(p, "mem")){ - struct vm_range *range; - struct process_vm *vm = proc->vm; + if (!strcmp(p, "stat")) { + char tmp[1024]; + int len; - if (!is_current) { - goto end; + /* + * pid (comm) state ppid + * pgrp session tty_nr tpgid + * flags minflt cminflt majflt + * cmajflt utime stime cutime + * cstime priority nice num_threads + * itrealvalue starttime vsize rss + * rsslim startcode endcode startstack + * kstkesp kstkeip signal blocked + * sigignore sigcatch wchan nswap + * cnswap exit_signal processor rt_priority + * policy delayacct_blkio_ticks guest_time cguest_time + */ + ans = sprintf(tmp, + "%d (%s) %c %d " // pid... + "%d %d %d %d " // pgrp... + "%u %lu %lu %lu " // flags... + "%lu %lu %lu %ld " // cmajflt... + "%ld %ld %ld %ld " // cstime... + "%ld %llu %lu %ld " // itrealvalue... + "%lu %lu %lu %lu " // rsslim... + "%lu %lu %lu %lu " // kstkesp... + "%lu %lu %lu %lu " // sigignore... + "%lu %d %d %u " // cnswap... + "%u %llu %lu %ld\n", // policy... + 0, "exe", 'R', 0, // pid... + 0, 0, 0, 0, // pgrp... + 0, 0L, 0L, 0L, // flags... + 0L, 0L, 0L, 0L, // cmajflt... + 0L, 0L, 0L, 0L, // cstime... + 0L, 0LL, 0L, 0L, // itrealvalue... + 0L, 0L, 0L, 0L, // rsslim... + 0L, 0L, 0L, 0L, // kstkesp... + 0L, 0L, 0L, 0L, // sigignore... + 0L, 0, thread->cpu_id, 0, // cnswap... + 0, 0LL, 0L, 0L // policy... + ); + thread_unlock(thread, &lock); + dprintf("tmp=%s\n", tmp); + + len = strlen(tmp); + if (r->offset < len) { + if (r->offset + r->count < len) { + ans = r->count; + } else { + eof = 1; + ans = len; } - if (pid != tid) { - /* We are not multithreaded yet. */ - goto end; - } - list_for_each_entry(range, &vm->vm_range_list, list) { - dprintf("range: %lx - %lx\n", range->start, range->end); - if ((range->start <= r->offset) && - (r->offset < range->end)) { - unsigned int len = r->count; - if (range->end < r->offset + r->count) { - len = range->end - r->offset; - } - memcpy((void *)buf, (void *)range->start, len); - ans = len; - break; - } - } - goto end; + strncpy(buf, tmp + r->offset, ans); + } else if (r->offset == len) { + ans = 0; + eof = 1; } - - if (!strcmp(p, "stat")) { - char tmp[1024]; - int len; - - if ((thread = find_thread(pid, tid, &lock))){ - dprintf("thread found! pid=%d tid=%d\n", pid, tid); - /* - * pid (comm) state ppid - * pgrp session tty_nr tpgid - * flags minflt cminflt majflt - * cmajflt utime stime cutime - * cstime priority nice num_threads - * itrealvalue starttime vsize rss - * rsslim startcode endcode startstack - * kstkesp kstkeip signal blocked - * sigignore sigcatch wchan nswap - * cnswap exit_signal processor rt_priority - * policy delayacct_blkio_ticks guest_time cguest_time - */ - ans = sprintf(tmp, - "%d (%s) %c %d " // pid... - "%d %d %d %d " // pgrp... - "%u %lu %lu %lu " // flags... - "%lu %lu %lu %ld " // cmajflt... - "%ld %ld %ld %ld " // cstime... - "%ld %llu %lu %ld " // itrealvalue... - "%lu %lu %lu %lu " // rsslim... - "%lu %lu %lu %lu " // kstkesp... - "%lu %lu %lu %lu " // sigignore... - "%lu %d %d %u " // cnswap... - "%u %llu %lu %ld\n", // policy... - 0, "exe", 'R', 0, // pid... - 0, 0, 0, 0, // pgrp... - 0, 0L, 0L, 0L, // flags... - 0L, 0L, 0L, 0L, // cmajflt... - 0L, 0L, 0L, 0L, // cstime... - 0L, 0LL, 0L, 0L, // itrealvalue... - 0L, 0L, 0L, 0L, // rsslim... - 0L, 0L, 0L, 0L, // kstkesp... - 0L, 0L, 0L, 0L, // sigignore... - 0L, 0, thread->cpu_id, 0, // cnswap... - 0, 0LL, 0L, 0L // policy... - ); - thread_unlock(thread, &lock); - dprintf("tmp=%s\n", tmp); - - len = strlen(tmp); - if (r->offset < len) { - if (r->offset + r->count < len) { - ans = r->count; - } else { - eof = 1; - ans = len; - } - strncpy(buf, tmp + r->offset, ans); - } else if (r->offset == len) { - ans = 0; - eof = 1; - } - goto end; - } - else{ - dprintf("no thread found pid=%d tid=%d\n", pid, tid); - } - } - dprintf("could not find a matching entry for task/%d/%s.\n", tid, p); goto end; } - /* - * Processing for pattern "mcos%d/PID/xxx" files should be here. - */ - dprintf("could not find a matching entry for %s.\n", p); + if(thread) + kprintf("unsupported procfs entry: %d/task/%d/%s\n", pid, tid, p); + else + kprintf("unsupported procfs entry: %d/%s\n", pid, p); + end: ihk_mc_unmap_virtual(buf, npages, 0); dprintf("ret: %d, eof: %d\n", ans, eof); @@ -799,6 +589,12 @@ dataunavail: if (ret < 0) { kprintf("ERROR: sending IKC msg, ret: %d\n", ret); } + if(proc) + release_process(proc); + if(thread) + release_thread(thread); + if(vm) + release_process_vm(vm); return; } diff --git a/kernel/syscall.c b/kernel/syscall.c index 41e5639f..348595c9 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -632,8 +632,6 @@ terminate(int rc, int sig) list_add_tail(&mythread->siblings_list, &proc->threads_list); mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); - delete_proc_procfs_files(proc->pid); - vm = proc->vm; free_all_process_memory_range(vm); From 5d43c135dbee222aeebd9d233cd4595355d0ce7b Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Wed, 10 Feb 2016 17:10:54 +0900 Subject: [PATCH 09/21] procfs: (temporary fix) unsupported files are closed --- executer/kernel/mcctrl/procfs.c | 64 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index 16714b98..39f44f6f 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -732,58 +732,58 @@ static const struct file_operations mckernel_forward = { }; static const struct procfs_entry tid_entry_stuff[] = { - PROC_REG("auxv", S_IRUSR, NULL), - PROC_REG("clear_refs", S_IWUSR, NULL), - PROC_REG("cmdline", S_IRUGO, NULL), - PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), - PROC_REG("environ", S_IRUSR, NULL), +// PROC_REG("auxv", S_IRUSR, NULL), +// PROC_REG("clear_refs", S_IWUSR, NULL), +// PROC_REG("cmdline", S_IRUGO, NULL), +// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), +// PROC_REG("environ", S_IRUSR, NULL), // PROC_LNK("exe", mckernel_readlink), - PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), - PROC_REG("maps", S_IRUGO, NULL), +// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), +// PROC_REG("maps", S_IRUGO, NULL), PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), - PROC_REG("pagemap", S_IRUGO, NULL), - PROC_REG("smaps", S_IRUGO, NULL), +// PROC_REG("pagemap", S_IRUGO, NULL), +// PROC_REG("smaps", S_IRUGO, NULL), PROC_REG("stat", S_IRUGO, NULL), - PROC_REG("statm", S_IRUGO, NULL), - PROC_REG("status", S_IRUGO, NULL), - PROC_REG("syscall", S_IRUGO, NULL), - PROC_REG("wchan", S_IRUGO, NULL), +// PROC_REG("statm", S_IRUGO, NULL), +// PROC_REG("status", S_IRUGO, NULL), +// PROC_REG("syscall", S_IRUGO, NULL), +// PROC_REG("wchan", S_IRUGO, NULL), PROC_TERM }; static const struct procfs_entry pid_entry_stuff[] = { PROC_REG("auxv", S_IRUSR, NULL), - PROC_REG("clear_refs", S_IWUSR, NULL), +// PROC_REG("clear_refs", S_IWUSR, NULL), PROC_REG("cmdline", S_IRUGO, NULL), - PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), - PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL), - PROC_REG("environ", S_IRUSR, NULL), +// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL), +// PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL), +// PROC_REG("environ", S_IRUSR, NULL), // PROC_LNK("exe", mckernel_readlink), - PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), +// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL), PROC_REG("maps", S_IRUGO, NULL), PROC_REG("mem", S_IRUSR|S_IWUSR, NULL), PROC_REG("pagemap", S_IRUGO, NULL), PROC_REG("smaps", S_IRUGO, NULL), - PROC_REG("stat", S_IRUGO, NULL), - PROC_REG("statm", S_IRUGO, NULL), +// PROC_REG("stat", S_IRUGO, NULL), +// PROC_REG("statm", S_IRUGO, NULL), PROC_REG("status", S_IRUGO, NULL), - PROC_REG("syscall", S_IRUGO, NULL), +// PROC_REG("syscall", S_IRUGO, NULL), PROC_DIR("task", S_IRUGO|S_IXUGO), - PROC_REG("wchan", S_IRUGO, NULL), +// PROC_REG("wchan", S_IRUGO, NULL), PROC_TERM }; static const struct procfs_entry base_entry_stuff[] = { - PROC_REG("cmdline", S_IRUGO, NULL), - PROC_REG("cpuinfo", S_IRUGO, NULL), - PROC_REG("meminfo", S_IRUGO, NULL), - PROC_REG("pagetypeinfo",S_IRUGO, NULL), - PROC_REG("softirq", S_IRUGO, NULL), +// PROC_REG("cmdline", S_IRUGO, NULL), +// PROC_REG("cpuinfo", S_IRUGO, NULL), +// PROC_REG("meminfo", S_IRUGO, NULL), +// PROC_REG("pagetypeinfo",S_IRUGO, NULL), +// PROC_REG("softirq", S_IRUGO, NULL), PROC_REG("stat", S_IRUGO, NULL), - PROC_REG("uptime", S_IRUGO, NULL), - PROC_REG("version", S_IRUGO, NULL), - PROC_REG("vmallocinfo",S_IRUSR, NULL), - PROC_REG("vmstat", S_IRUGO, NULL), - PROC_REG("zoneinfo", S_IRUGO, NULL), +// PROC_REG("uptime", S_IRUGO, NULL), +// PROC_REG("version", S_IRUGO, NULL), +// PROC_REG("vmallocinfo",S_IRUSR, NULL), +// PROC_REG("vmstat", S_IRUGO, NULL), +// PROC_REG("zoneinfo", S_IRUGO, NULL), PROC_TERM }; From 2d3fda1d0b4f36fbab3d06198a45462b7db0d0bb Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Sun, 14 Feb 2016 22:36:58 +0900 Subject: [PATCH 10/21] flatten_strings: fix align (REQ-1) --- executer/kernel/mcctrl/control.c | 4 ++-- executer/user/mcexec.c | 12 +++++++----- kernel/host.c | 14 +++++++------- lib/string.c | 12 +++++++----- 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index f8362e66..fddc84e0 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -125,10 +125,10 @@ static long mcexec_prepare_image(ihk_os_t os, pdesc->args = (void*)virt_to_phys(args); printk("args: 0x%lX\n", (unsigned long)pdesc->args); - printk("argc: %d\n", *(int*)args); + printk("argc: %ld\n", *(long *)args); pdesc->envs = (void*)virt_to_phys(envs); printk("envs: 0x%lX\n", (unsigned long)pdesc->envs); - printk("envc: %d\n", *(int*)envs); + printk("envc: %ld\n", *(long *)envs); isp.msg = SCD_MSG_PREPARE_PROCESS; isp.ref = pdesc->cpu; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 1992a449..2216dba9 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -776,7 +776,7 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) } /* Count full length */ - full_len = sizeof(int) + sizeof(char *); // Counter and terminating NULL + full_len = sizeof(long) + sizeof(char *); // Counter and terminating NULL if (first) { full_len += sizeof(char *) + strlen(first) + 1; } @@ -786,6 +786,8 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) full_len += sizeof(char *) + strlen(strings[string_i]) + 1; } + full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1); + _flat = (char *)malloc(full_len); if (!_flat) { return 0; @@ -794,14 +796,14 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) memset(_flat, 0, full_len); /* Number of strings */ - *((int*)_flat) = nr_strings + (first ? 1 : 0); + *((long *)_flat) = nr_strings + (first ? 1 : 0); // Actual offset - flat_offset = sizeof(int) + sizeof(char *) * (nr_strings + 1 + + flat_offset = sizeof(long) + sizeof(char *) * (nr_strings + 1 + (first ? 1 : 0)); if (first) { - *((char **)(_flat + sizeof(int))) = (void *)flat_offset; + *((char **)(_flat + sizeof(long))) = (void *)flat_offset; memcpy(_flat + flat_offset, first, strlen(first) + 1); flat_offset += strlen(first) + 1; } @@ -809,7 +811,7 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) for (string_i = 0; string_i < nr_strings; ++string_i) { /* Fabricate the string */ - *((char **)(_flat + sizeof(int) + (string_i + (first ? 1 : 0)) + *((char **)(_flat + sizeof(long) + (string_i + (first ? 1 : 0)) * sizeof(char *))) = (void *)flat_offset; memcpy(_flat + flat_offset, strings[string_i], strlen(strings[string_i]) + 1); flat_offset += strlen(strings[string_i]) + 1; diff --git a/kernel/host.c b/kernel/host.c index 6376a6b9..78ed7f7e 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -242,7 +242,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, p->args_len = args_len; } - dkprintf("args copy, nr: %d\n", *((int*)args_envs_r)); + dkprintf("args copy, nr: %d\n", *((long *)args_envs_r)); memcpy_long(args_envs, args_envs_r, p->args_len + sizeof(long) - 1); @@ -277,7 +277,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, p->envs_len = envs_len; } - dkprintf("envs copy, nr: %d\n", *((int*)args_envs_r)); + dkprintf("envs copy, nr: %d\n", *((long *)args_envs_r)); memcpy_long(args_envs + p->args_len, args_envs_r, p->envs_len + sizeof(long) - 1); @@ -289,10 +289,10 @@ int prepare_process_ranges_args_envs(struct thread *thread, flush_tlb(); // Update variables - argc = *((int*)(args_envs)); + argc = *((long *)(args_envs)); dkprintf("argc: %d\n", argc); - argv = (char **)(args_envs + (sizeof(int))); + argv = (char **)(args_envs + (sizeof(long))); if(proc->saved_cmdline){ kfree(proc->saved_cmdline); proc->saved_cmdline_len = 0; @@ -309,17 +309,17 @@ int prepare_process_ranges_args_envs(struct thread *thread, *a = (char *)addr + (unsigned long)*a; // Process' address space! } - envc = *((int*)(args_envs + p->args_len)); + envc = *((long *)(args_envs + p->args_len)); dkprintf("envc: %d\n", envc); - env = (char **)(args_envs + p->args_len + sizeof(int)); + env = (char **)(args_envs + p->args_len + sizeof(long)); while (*env) { char **_env = env; //dkprintf("%s\n", args_envs + p->args_len + (unsigned long)*env); *env = (char *)addr + p->args_len + (unsigned long)*env; env = ++_env; } - env = (char **)(args_envs + p->args_len + sizeof(int)); + env = (char **)(args_envs + p->args_len + sizeof(long)); dkprintf("env OK\n"); diff --git a/lib/string.c b/lib/string.c index 010b9c53..2e9bfb2e 100644 --- a/lib/string.c +++ b/lib/string.c @@ -223,7 +223,7 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) } /* Count full length */ - full_len = sizeof(int) + sizeof(char *); // Counter and terminating NULL + full_len = sizeof(long) + sizeof(char *); // Counter and terminating NULL if (first) { full_len += sizeof(char *) + strlen(first) + 1; } @@ -233,6 +233,8 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) full_len += sizeof(char *) + strlen(strings[string_i]) + 1; } + full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1); + _flat = (char *)kmalloc(full_len, IHK_MC_AP_NOWAIT); if (!_flat) { return 0; @@ -241,14 +243,14 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) memset(_flat, 0, full_len); /* Number of strings */ - *((int*)_flat) = nr_strings + (first ? 1 : 0); + *((long *)_flat) = nr_strings + (first ? 1 : 0); // Actual offset - flat_offset = sizeof(int) + sizeof(char *) * (nr_strings + 1 + + flat_offset = sizeof(long) + sizeof(char *) * (nr_strings + 1 + (first ? 1 : 0)); if (first) { - *((char **)(_flat + sizeof(int))) = (void *)flat_offset; + *((char **)(_flat + sizeof(long))) = (void *)flat_offset; memcpy(_flat + flat_offset, first, strlen(first) + 1); flat_offset += strlen(first) + 1; } @@ -256,7 +258,7 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat) for (string_i = 0; string_i < nr_strings; ++string_i) { /* Fabricate the string */ - *((char **)(_flat + sizeof(int) + (string_i + (first ? 1 : 0)) + *((char **)(_flat + sizeof(long) + (string_i + (first ? 1 : 0)) * sizeof(char *))) = (void *)flat_offset; memcpy(_flat + flat_offset, strings[string_i], strlen(strings[string_i]) + 1); flat_offset += strlen(strings[string_i]) + 1; From 6cda6792a9bf591006102b43f32dd37ed61c2f7f Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Sun, 14 Feb 2016 22:47:52 +0900 Subject: [PATCH 11/21] process_msg_init_acked: don't use PA --- kernel/host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/host.c b/kernel/host.c index 78ed7f7e..0f7dca4d 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -449,7 +449,7 @@ static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_para static void process_msg_init_acked(struct ihk_ikc_channel_desc *c, unsigned long pphys) { - struct ikc_scd_init_param *param = (void *)pphys; + struct ikc_scd_init_param *param = phys_to_virt(pphys); struct syscall_params *lparam; enum ihk_mc_pt_attribute attr; From c328d26b8d1a88c0fc0b6c2302f7fbe90d03591d Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Mon, 15 Feb 2016 15:10:00 +0900 Subject: [PATCH 12/21] procfs(/proc//task//stat): fix memory corruption refs #722 --- kernel/procfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/procfs.c b/kernel/procfs.c index 525d0cf1..221323fb 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -545,7 +545,6 @@ process_procfs_request(unsigned long rarg) 0L, 0, thread->cpu_id, 0, // cnswap... 0, 0LL, 0L, 0L // policy... ); - thread_unlock(thread, &lock); dprintf("tmp=%s\n", tmp); len = strlen(tmp); From da13bd408a6cd3bdf9ec561898460a7b2c2d2933 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Mon, 15 Feb 2016 18:20:58 +0900 Subject: [PATCH 13/21] mcexec: add to initialize some structures (REQ-56) refs #718 --- executer/user/mcexec.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 2216dba9..51a2b026 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -185,7 +185,8 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp) desc = malloc(sizeof(struct program_load_desc) + sizeof(struct program_image_section) * nhdrs); - memset(desc, '\0', sizeof(struct program_load_desc)); + memset(desc, '\0', sizeof(struct program_load_desc) + + sizeof(struct program_image_section) * nhdrs); desc->shell_path[0] = '\0'; fseek(fp, hdr.e_phoff, SEEK_SET); j = 0; @@ -673,6 +674,7 @@ void transfer_image(int fd, struct program_load_desc *desc) desc->sections[i].offset, flen); while (s < e) { + memset(&pt, '\0', sizeof pt); pt.rphys = rpa; pt.userp = dma_buf; pt.size = PAGE_SIZE; @@ -894,6 +896,7 @@ sendsig(int sig, siginfo_t *siginfo, void *context) remote_tid = -1; } + memset(&sigdesc, '\0', sizeof sigdesc); sigdesc.cpu = cpu; sigdesc.pid = (int)pid; sigdesc.tid = remote_tid; @@ -920,6 +923,7 @@ act_signalfd4(struct syscall_wait_desc *w) switch(mode){ case 0: /* new signalfd */ sfd = malloc(sizeof(struct sigfd)); + memset(sfd, '\0', sizeof(struct sigfd)); tmp = w->sr.args[1]; flags = 0; if(tmp & SFD_NONBLOCK) @@ -1328,11 +1332,14 @@ int main(int argc, char **argv) __dprintf("mcoverlay enable\n"); char mcos_procdir[PATH_MAX]; char mcos_sysdir[PATH_MAX]; - struct sys_unshare_desc unshare_desc; - struct sys_mount_desc mount_desc; error = isunshare(); if (error == 0) { + struct sys_unshare_desc unshare_desc; + struct sys_mount_desc mount_desc; + + memset(&unshare_desc, '\0', sizeof unshare_desc); + memset(&mount_desc, '\0', sizeof mount_desc); unshare_desc.unshare_flags = CLONE_NEWNS; if (ioctl(fd, MCEXEC_UP_SYS_UNSHARE, (unsigned long)&unshare_desc) != 0) { @@ -1540,6 +1547,7 @@ void do_syscall_return(int fd, int cpu, { struct syscall_ret_desc desc; + memset(&desc, '\0', sizeof desc); desc.cpu = cpu; desc.ret = ret; desc.src = src; @@ -1556,6 +1564,7 @@ void do_syscall_load(int fd, int cpu, unsigned long dest, unsigned long src, { struct syscall_load_desc desc; + memset(&desc, '\0', sizeof desc); desc.cpu = cpu; desc.src = src; desc.dest = dest; @@ -1605,6 +1614,7 @@ static long do_strncpy_from_user(int fd, void *dest, void *src, unsigned long n) struct strncpy_from_user_desc desc; int ret; + memset(&desc, '\0', sizeof desc); desc.dest = dest; desc.src = src; desc.n = n; @@ -1736,6 +1746,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock) char pathbuf[PATH_MAX]; char tmpbuf[PATH_MAX]; + memset(&w, '\0', sizeof w); w.cpu = cpu; w.pid = getpid(); @@ -2158,6 +2169,7 @@ return_execve1: fprintf(stderr, "execve(): error allocating desc\n"); goto return_execve2; } + memset(desc, '\0', w.sr.args[2]); /* Copy descriptor from co-kernel side */ trans.userp = (void*)desc; From 8c40f94aa82492371c798c2752c2b060132d8837 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Tue, 16 Feb 2016 13:21:29 +0900 Subject: [PATCH 14/21] /proc//mem: support read/write --- kernel/procfs.c | 151 ++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 82 deletions(-) diff --git a/kernel/procfs.c b/kernel/procfs.c index 221323fb..76107d0e 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -94,7 +94,6 @@ process_procfs_request(unsigned long rarg) int npages; int readwrite = 0; - dprintf("process_procfs_request: invoked.\n"); syscall_channel = get_cpu_local_var(0)->syscall_channel; @@ -162,33 +161,42 @@ process_procfs_request(unsigned long rarg) */ ret = sscanf(p, "%d/", &pid); if (ret == 1) { + struct mcs_rwlock_node tlock; + int tids; + struct thread *thread1 = NULL; + proc = find_process(pid, &lock); if(proc == NULL){ kprintf("process_procfs_request: no such pid %d\n", pid); goto end; } p = strchr(p, '/') + 1; - ret = sscanf(p, "task/%d/", &tid); - if(ret == 1){ - struct mcs_rwlock_node tlock; - mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &tlock); - list_for_each_entry(thread, &proc->threads_list, - siblings_list) { - if(thread->tid == tid) - break; - } - if(thread == NULL){ - mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, - &tlock); - process_unlock(proc, &lock); - kprintf("process_procfs_request: no such tid %d-%d\n", pid, tid); - goto end; - } - hold_thread(thread); - mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &tlock); + if((tids = sscanf(p, "task/%d/", &tid)) == 1){ p = strchr(p, '/') + 1; p = strchr(p, '/') + 1; } + else + tid = pid; + + mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &tlock); + list_for_each_entry(thread, &proc->threads_list, siblings_list){ + if(thread->tid == tid) + break; + if(!thread1) + thread1 = thread; + } + if(thread == NULL){ + kprintf("process_procfs_request: no such tid %d-%d\n", pid, tid); + if(tids){ + process_unlock(proc, &lock); + mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &tlock); + goto end; + } + thread = thread1; + } + if(thread) + hold_thread(thread); + mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &tlock); hold_process(proc); vm = proc->vm; if(vm) @@ -235,73 +243,52 @@ process_procfs_request(unsigned long rarg) * of the process. The count is the length of the area. */ if (strcmp(p, "mem") == 0) { - struct vm_range *range; + uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER; + unsigned long offset = r->offset; + unsigned long left = r->count; + int ret; + struct page_table *pt = vm->address_space->page_table; - if (proc != cpu_local_var(current)->proc) { - uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER; - unsigned long offset = r->offset; - unsigned long left = r->count; - int ret; + ans = 0; + if(left == 0) + goto end; - ans = 0; - if(left == 0) - goto end; - - while(left){ - unsigned long pa; - char *va; - int pos = offset & (PAGE_SIZE - 1); - int size = PAGE_SIZE - pos; - - if(size > left) - size = left; - ret = page_fault_process_vm(vm, (void *)offset, - reason); - if(ret){ - if(ans == 0) - ans = -EIO; - goto end; - } - ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, - (void *)offset, &pa); - if(ret){ - if(ans == 0) - ans = -EIO; - goto end; - } - va = phys_to_virt(pa); - memcpy(buf + ans, va, size); - offset += size; - left -= size; - ans += size; - } +#if 0 + if(!(proc->ptrace & PT_TRACED) || + !(proc->status & (PS_STOPPED | PS_TRACED))){ + ans = -EIO; + goto end; } - else{ - unsigned long offset = r->offset; - unsigned long left = r->count; - unsigned long pos; - unsigned long l; - ans = 0; - list_for_each_entry(range, &vm->vm_range_list, list) { - dprintf("range: %lx - %lx\n", range->start, range->end); - while (left && - (range->start <= offset) && - (offset < range->end)) { - pos = offset & (PAGE_SIZE - 1); - l = PAGE_SIZE - pos; - if(l > left) - l = left; - if(copy_from_user(buf, (void *)offset, l)){ - if(ans == 0) - ans = -EIO; - goto end; - } - buf += l; - ans += l; - offset += l; - left -= l; - } +#endif + + while(left){ + unsigned long pa; + char *va; + int pos = offset & (PAGE_SIZE - 1); + int size = PAGE_SIZE - pos; + + if(size > left) + size = left; + ret = page_fault_process_vm(vm, (void *)offset, reason); + if(ret){ + if(ans == 0) + ans = -EIO; + goto end; } + ret = ihk_mc_pt_virt_to_phys(pt, (void *)offset, &pa); + if(ret){ + if(ans == 0) + ans = -EIO; + goto end; + } + va = phys_to_virt(pa); + if(readwrite) + memcpy(va, buf + ans, size); + else + memcpy(buf + ans, va, size); + offset += size; + left -= size; + ans += size; } goto end; } From e7c37b800096af8ad292edc62c461bfea3876d3d Mon Sep 17 00:00:00 2001 From: Yoichi Umezawa Date: Tue, 16 Feb 2016 16:05:40 +0900 Subject: [PATCH 15/21] mcreboot-smp-x86.sh: fix Failed to mount /sys/devices/virtual/mcos/mcos0/sys --- arch/x86/tools/mcreboot-smp-x86.sh.in | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/tools/mcreboot-smp-x86.sh.in b/arch/x86/tools/mcreboot-smp-x86.sh.in index 4e1619cc..3c93c908 100644 --- a/arch/x86/tools/mcreboot-smp-x86.sh.in +++ b/arch/x86/tools/mcreboot-smp-x86.sh.in @@ -124,6 +124,10 @@ if [ "$enable_mcoverlay" != "" ]; then if [ ! -e /tmp/mcos/mcos0_proc_work ]; then mkdir -p /tmp/mcos/mcos0_proc_work; fi if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then echo "error: mount /tmp/mcos/mcos0_proc"; exit; fi mount --make-rprivate /proc + while [ ! -e /sys/devices/virtual/mcos/mcos0/sys ] + do + sleep 1 + done if [ ! -e /tmp/mcos/mcos0_sys ]; then mkdir -p /tmp/mcos/mcos0_sys; fi if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then mkdir -p /tmp/mcos/mcos0_sys_upper; fi if [ ! -e /tmp/mcos/mcos0_sys_work ]; then mkdir -p /tmp/mcos/mcos0_sys_work; fi From a997af71be912d68b2e1f38cc1f81b1a91b66b74 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Wed, 17 Feb 2016 12:48:12 +0900 Subject: [PATCH 16/21] support tkill refs #664 --- arch/x86/kernel/include/syscall_list.h | 1 + kernel/syscall.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 2f955348..6f429df3 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -107,6 +107,7 @@ SYSCALL_HANDLED(158, arch_prctl) SYSCALL_HANDLED(160, setrlimit) SYSCALL_HANDLED(164, settimeofday) SYSCALL_HANDLED(186, gettid) +SYSCALL_HANDLED(200, tkill) SYSCALL_DELEGATED(201, time) SYSCALL_HANDLED(202, futex) SYSCALL_HANDLED(203, sched_setaffinity) diff --git a/kernel/syscall.c b/kernel/syscall.c index 348595c9..41c27e6f 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -2075,6 +2075,24 @@ SYSCALL_DECLARE(tgkill) return do_kill(thread, tgid, tid, sig, &info, 0); } +SYSCALL_DECLARE(tkill) +{ + int tid = ihk_mc_syscall_arg0(ctx); + int sig = ihk_mc_syscall_arg1(ctx); + struct thread *thread = cpu_local_var(current); + struct siginfo info; + + memset(&info, '\0', sizeof info); + info.si_signo = sig; + info.si_code = SI_TKILL; + info._sifields._kill.si_pid = thread->proc->pid; + + if(tid <= 0) + return -EINVAL; + + return do_kill(thread, -1, tid, sig, &info, 0); +} + int * getcred(int *_buf) { From eba2be8a35c17fd8d2c811bfca9e297d1fd70a17 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Thu, 18 Feb 2016 13:14:18 +0900 Subject: [PATCH 17/21] support times --- arch/x86/kernel/cpu.c | 10 +++ arch/x86/kernel/include/syscall_list.h | 1 + arch/x86/kernel/interrupt.S | 4 +- arch/x86/kernel/syscall.c | 10 ++- kernel/include/process.h | 11 +++- kernel/include/syscall.h | 3 + kernel/include/time.h | 22 +++++++ kernel/mem.c | 3 + kernel/process.c | 7 ++ kernel/syscall.c | 91 ++++++++++++++++++++++++++ 10 files changed, 159 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index 57543763..d8528c97 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -82,6 +82,7 @@ void x86_init_perfctr(void); int gettime_local_support = 0; extern int kprintf(const char *format, ...); +extern int interrupt_from_user(void *); static struct idt_entry{ uint32_t desc[4]; @@ -786,6 +787,8 @@ void handle_interrupt(int vector, struct x86_user_context *regs) lapic_ack(); ++v->in_interrupt; + set_cputime(interrupt_from_user(regs)? 1: 2); + dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n", ihk_mc_get_processor_id(), vector, regs->gpr.rip); @@ -862,12 +865,14 @@ void handle_interrupt(int vector, struct x86_user_context *regs) check_signal(0, regs, 0); check_need_resched(); + set_cputime(0); --v->in_interrupt; } void gpe_handler(struct x86_user_context *regs) { + set_cputime(interrupt_from_user(regs)? 1: 2); kprintf("General protection fault (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip); arch_show_interrupt_context(regs); @@ -877,6 +882,7 @@ void gpe_handler(struct x86_user_context *regs) set_signal(SIGSEGV, regs, NULL); check_signal(0, regs, 0); check_need_resched(); + set_cputime(0); // panic("GPF"); } @@ -886,6 +892,7 @@ void debug_handler(struct x86_user_context *regs) int si_code = 0; struct siginfo info; + set_cputime(interrupt_from_user(regs)? 1: 2); #ifdef DEBUG_PRINT_CPU kprintf("debug exception (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip); @@ -905,12 +912,14 @@ void debug_handler(struct x86_user_context *regs) set_signal(SIGTRAP, regs, &info); check_signal(0, regs, 0); check_need_resched(); + set_cputime(0); } void int3_handler(struct x86_user_context *regs) { struct siginfo info; + set_cputime(interrupt_from_user(regs)? 1: 2); #ifdef DEBUG_PRINT_CPU kprintf("int3 exception (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip); @@ -922,6 +931,7 @@ void int3_handler(struct x86_user_context *regs) set_signal(SIGTRAP, regs, &info); check_signal(0, regs, 0); check_need_resched(); + set_cputime(0); } diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 6f429df3..daf632b7 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -69,6 +69,7 @@ SYSCALL_DELEGATED(79, getcwd) SYSCALL_DELEGATED(89, readlink) SYSCALL_HANDLED(96, gettimeofday) SYSCALL_HANDLED(97, getrlimit) +SYSCALL_HANDLED(100, times) SYSCALL_HANDLED(101, ptrace) SYSCALL_HANDLED(102, getuid) SYSCALL_HANDLED(104, getgid) diff --git a/arch/x86/kernel/interrupt.S b/arch/x86/kernel/interrupt.S index 760c2722..74f88fd7 100644 --- a/arch/x86/kernel/interrupt.S +++ b/arch/x86/kernel/interrupt.S @@ -209,7 +209,9 @@ enter_user_mode: callq release_runq_lock movq $0, %rdi movq %rsp, %rsi - call check_signal + call check_signal + movq $0, %rdi + call set_cputime POP_ALL_REGS addq $8, %rsp iretq diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index dea1ca0f..b091e595 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -756,6 +756,14 @@ hassigpending(struct thread *thread) return getsigpending(thread, 0); } +int +interrupt_from_user(void *regs0) +{ + struct x86_user_context *regs = regs0; + + return !(regs->gpr.rsp & 0x8000000000000000); +} + void check_signal(unsigned long rc, void *regs0, int num) { @@ -785,7 +793,7 @@ check_signal(unsigned long rc, void *regs0, int num) return; } - if(regs != NULL && (regs->gpr.rsp & 0x8000000000000000)) { + if(regs != NULL && !interrupt_from_user(regs)) { return; } diff --git a/kernel/include/process.h b/kernel/include/process.h index 4ad8b8d6..9eeee9b8 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -347,7 +347,7 @@ typedef void pgio_func_t(void *arg); * special "init" process */ struct process { struct list_head hash_list; - mcs_rwlock_lock_t update_lock; // lock for parent, status, ...? + mcs_rwlock_lock_t update_lock; // lock for parent, status, cpu time... // process vm struct process_vm *vm; @@ -431,6 +431,10 @@ struct process { ihk_spinlock_t mckfd_lock; struct mckfd *mckfd; + + // cpu time (summary) + struct timespec stime; + struct timespec utime; }; void hold_thread(struct thread *ftn); @@ -518,6 +522,11 @@ struct thread { unsigned long *ptrace_debugreg; /* debug registers for ptrace */ struct sig_pending *ptrace_recvsig; struct sig_pending *ptrace_sendsig; + + // cpu time + struct timespec stime; + struct timespec utime; + struct timespec btime; }; struct process_vm { diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index e7c86079..0b96fcb9 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -337,4 +337,7 @@ struct tod_data_s { }; extern struct tod_data_s tod_data; /* residing in arch-dependent file */ +void reset_cputime(); +void set_cputime(int mode); + #endif diff --git a/kernel/include/time.h b/kernel/include/time.h index 83e920f5..5df0d8fb 100644 --- a/kernel/include/time.h +++ b/kernel/include/time.h @@ -53,5 +53,27 @@ struct timezone int tz_dsttime; /* Nonzero if DST is ever in effect. */ }; +static inline void +ts_add(struct timespec *ats, const struct timespec *bts) +{ + ats->tv_sec += bts->tv_sec; + ats->tv_nsec += bts->tv_nsec; + while(ats->tv_nsec >= 1000000000){ + ats->tv_sec++; + ats->tv_nsec -= 1000000000; + } +} + +static inline void +ts_sub(struct timespec *ats, const struct timespec *bts) +{ + ats->tv_sec -= bts->tv_sec; + ats->tv_nsec -= bts->tv_nsec; + while(ats->tv_nsec < 0){ + ats->tv_sec--; + ats->tv_nsec += 1000000000; + } +} + #endif // __TIME_H diff --git a/kernel/mem.c b/kernel/mem.c index 1fadb31f..a06140cf 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -53,6 +53,7 @@ static unsigned long pa_start, pa_end; static struct page *pa_pages; extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt); +extern int interrupt_from_user(void *); struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE]; @@ -369,6 +370,7 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) struct thread *thread = cpu_local_var(current); int error; + set_cputime(interrupt_from_user(regs)? 1: 2); dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n", ihk_mc_get_processor_id(), fault_addr, reason, regs); @@ -427,6 +429,7 @@ out: ihk_mc_get_processor_id(), fault_addr, reason, regs, error); check_need_resched(); + set_cputime(0); return; } diff --git a/kernel/process.c b/kernel/process.c index 49a48f08..c609cbbb 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2161,11 +2161,17 @@ void destroy_thread(struct thread *thread) void release_thread(struct thread *thread) { struct process_vm *vm; + struct mcs_rwlock_node lock; if (!ihk_atomic_dec_and_test(&thread->refcount)) { return; } + mcs_rwlock_writer_lock_noirq(&thread->proc->update_lock, &lock); + ts_add(&thread->proc->stime, &thread->stime); + ts_add(&thread->proc->utime, &thread->utime); + mcs_rwlock_writer_unlock_noirq(&thread->proc->update_lock, &lock); + vm = thread->vm; procfs_delete_thread(thread); @@ -2547,6 +2553,7 @@ redo: if (prev != next) { switch_ctx = 1; v->current = next; + reset_cputime(); } if (switch_ctx) { diff --git a/kernel/syscall.c b/kernel/syscall.c index 41c27e6f..6985749c 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -113,6 +113,7 @@ extern unsigned long ihk_mc_get_ns_per_tsc(void); extern int ptrace_detach(int pid, int data); extern void debug_log(unsigned long); extern void free_all_process_memory_range(struct process_vm *vm); +extern struct cpu_local_var *clv; int prepare_process_ranges_args_envs(struct thread *thread, struct program_load_desc *pn, @@ -401,6 +402,10 @@ do_wait(int pid, int *status, int options, void *rusage) ret = wait_zombie(thread, child, status, options); mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); if(!(options & WNOWAIT)){ + mcs_rwlock_writer_lock_noirq(&proc->update_lock, &lock); + ts_add(&proc->stime, &child->stime); + ts_add(&proc->utime, &child->utime); + mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &lock); release_process(child); } goto out_found; @@ -2034,6 +2039,37 @@ SYSCALL_DECLARE(set_tid_address) return cpu_local_var(current)->proc->pid; } +static unsigned long +timespec_to_jiffy(const struct timespec *ats) +{ + return ats->tv_sec * 100 + ats->tv_nsec / 10000000; +} + +SYSCALL_DECLARE(times) +{ + struct tms { + unsigned long tms_utime; + unsigned long tms_stime; + unsigned long tms_cutime; + unsigned long tms_cstime; + }; + struct tms mytms; + struct tms *buf = (struct tms *)ihk_mc_syscall_arg0(ctx); + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct timespec ats = {0, 0}; + + mytms.tms_utime = timespec_to_jiffy(&thread->utime); + mytms.tms_stime = timespec_to_jiffy(&thread->stime); + mytms.tms_cutime = timespec_to_jiffy(&proc->utime); + mytms.tms_cstime = timespec_to_jiffy(&proc->stime); + if(copy_to_user(buf, &mytms, sizeof mytms)) + return -EFAULT; + if(gettime_local_support) + calculate_time_from_tsc(&ats); + return timespec_to_jiffy(&ats); +} + SYSCALL_DECLARE(kill) { int pid = ihk_mc_syscall_arg0(ctx); @@ -6604,13 +6640,67 @@ SYSCALL_DECLARE(pmc_reset) return ihk_mc_perfctr_reset(counter); } +void +reset_cputime() +{ + struct thread *thread; + + if(clv == NULL) + return; + + if(!(thread = cpu_local_var(current))) + return; + + thread->btime.tv_sec = 0; + thread->btime.tv_nsec = 0; +} + +void +set_cputime(int mode) +{ + struct thread *thread; + struct timespec ats; + + if(!gettime_local_support) + return; + + if(clv == NULL) + return; + + if(!(thread = cpu_local_var(current))) + return; + + calculate_time_from_tsc(&ats); + if(thread->btime.tv_sec != 0 && thread->btime.tv_nsec != 0){ + struct timespec dts; + + dts.tv_sec = ats.tv_sec; + dts.tv_nsec = ats.tv_nsec; + ts_sub(&dts, &thread->btime); + if(mode == 1) + ts_add(&thread->utime, &dts); + else + ts_add(&thread->stime, &dts); + } + if(mode == 2){ + thread->btime.tv_sec = 0; + thread->btime.tv_nsec = 0; + } + else{ + thread->btime.tv_sec = ats.tv_sec; + thread->btime.tv_nsec = ats.tv_nsec; + } +} + long syscall(int num, ihk_mc_user_context_t *ctx) { long l; + set_cputime(1); if(cpu_local_var(current)->proc->status == PS_EXITED && (num != __NR_exit && num != __NR_exit_group)){ check_signal(-EINVAL, NULL, 0); + set_cputime(0); return -EINVAL; } @@ -6669,5 +6759,6 @@ long syscall(int num, ihk_mc_user_context_t *ctx) ptrace_syscall_exit(cpu_local_var(current)); } + set_cputime(0); return l; } From 307b2b8da5e252bd9074565193f9343b0c24bb23 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Thu, 18 Feb 2016 17:43:13 +0900 Subject: [PATCH 18/21] clock_gettime: support clock_id CLOCK_PROCESS_CPUTIME_ID and CLOCK_THREAD_CPUTIME_ID --- kernel/include/process.h | 6 +++ kernel/syscall.c | 80 +++++++++++++++++++++++++++++++++++----- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/kernel/include/process.h b/kernel/include/process.h index 9eeee9b8..a9fded27 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -435,6 +435,10 @@ struct process { // cpu time (summary) struct timespec stime; struct timespec utime; + + // cpu time (children) + struct timespec stime_children; + struct timespec utime_children; }; void hold_thread(struct thread *ftn); @@ -527,6 +531,8 @@ struct thread { struct timespec stime; struct timespec utime; struct timespec btime; + int times_update; + int in_kernel; }; struct process_vm { diff --git a/kernel/syscall.c b/kernel/syscall.c index 6985749c..09daebed 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -403,8 +403,10 @@ do_wait(int pid, int *status, int options, void *rusage) mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); if(!(options & WNOWAIT)){ mcs_rwlock_writer_lock_noirq(&proc->update_lock, &lock); - ts_add(&proc->stime, &child->stime); - ts_add(&proc->utime, &child->utime); + ts_add(&proc->stime_children, &child->stime); + ts_add(&proc->utime_children, &child->utime); + ts_add(&proc->stime_children, &child->stime_children); + ts_add(&proc->utime_children, &child->utime_children); mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &lock); release_process(child); } @@ -2057,16 +2059,28 @@ SYSCALL_DECLARE(times) struct tms *buf = (struct tms *)ihk_mc_syscall_arg0(ctx); struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; - struct timespec ats = {0, 0}; + struct timespec ats; mytms.tms_utime = timespec_to_jiffy(&thread->utime); mytms.tms_stime = timespec_to_jiffy(&thread->stime); - mytms.tms_cutime = timespec_to_jiffy(&proc->utime); - mytms.tms_cstime = timespec_to_jiffy(&proc->stime); + ats.tv_sec = proc->utime.tv_sec; + ats.tv_nsec = proc->utime.tv_nsec; + ts_add(&ats, &proc->utime_children); + mytms.tms_cutime = timespec_to_jiffy(&ats); + ats.tv_sec = proc->stime.tv_sec; + ats.tv_nsec = proc->stime.tv_nsec; + ts_add(&ats, &proc->stime_children); + mytms.tms_cstime = timespec_to_jiffy(&ats); if(copy_to_user(buf, &mytms, sizeof mytms)) return -EFAULT; - if(gettime_local_support) + if(gettime_local_support){ calculate_time_from_tsc(&ats); + } + else{ + ats.tv_sec = 0; + ats.tv_nsec = 0; + } + return timespec_to_jiffy(&ats); } @@ -5247,6 +5261,41 @@ SYSCALL_DECLARE(clock_gettime) dkprintf("clock_gettime(): %d\n", error); return error; } + else if(clock_id == CLOCK_PROCESS_CPUTIME_ID){ + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct thread *child; + struct mcs_rwlock_node lock; + + mcs_rwlock_reader_lock_noirq(&proc->children_lock, &lock); + list_for_each_entry(child, &proc->threads_list, siblings_list){ + if(child != thread && + child->status == PS_RUNNING && + !child->in_kernel){ + child->times_update = 0; + ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(child->cpu_id)->apic_id, 0xd1); + } + } + ats.tv_sec = proc->utime.tv_sec; + ats.tv_nsec = proc->utime.tv_nsec; + ts_add(&ats, &proc->stime); + list_for_each_entry(child, &proc->threads_list, siblings_list){ + while(!child->times_update) + cpu_pause(); + ts_add(&ats, &child->utime); + ts_add(&ats, &child->stime); + } + mcs_rwlock_reader_unlock_noirq(&proc->children_lock, &lock); + return copy_to_user(ts, &ats, sizeof ats); + } + else if(clock_id == CLOCK_THREAD_CPUTIME_ID){ + struct thread *thread = cpu_local_var(current); + + ats.tv_sec = thread->utime.tv_sec; + ats.tv_nsec = thread->utime.tv_nsec; + ts_add(&ats, &thread->stime); + return copy_to_user(ts, &ats, sizeof ats); + } /* Otherwise offload */ request.number = __NR_clock_gettime; @@ -6655,21 +6704,30 @@ reset_cputime() thread->btime.tv_nsec = 0; } +/** + * mode == 0: kernel -> user + * mode == 1: user -> kernel + * mode == 2: kernel -> kernel + */ void set_cputime(int mode) { struct thread *thread; struct timespec ats; - - if(!gettime_local_support) - return; + struct cpu_local_var *v; if(clv == NULL) return; - if(!(thread = cpu_local_var(current))) + v = get_this_cpu_local_var(); + if(!(thread = v->current)) return; + if(!gettime_local_support){ + thread->times_update = 1; + return; + } + calculate_time_from_tsc(&ats); if(thread->btime.tv_sec != 0 && thread->btime.tv_nsec != 0){ struct timespec dts; @@ -6690,6 +6748,8 @@ set_cputime(int mode) thread->btime.tv_sec = ats.tv_sec; thread->btime.tv_nsec = ats.tv_nsec; } + thread->times_update = 1; + thread->in_kernel = mode; } long syscall(int num, ihk_mc_user_context_t *ctx) From 2c50b716fd2a6bfadd5118b0ea07d5278a01aa2b Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Fri, 19 Feb 2016 15:25:05 +0900 Subject: [PATCH 19/21] support setitimer/getitimer --- arch/x86/kernel/include/syscall_list.h | 2 + kernel/include/process.h | 8 ++ kernel/include/time.h | 45 ++++++ kernel/process.c | 36 +++-- kernel/syscall.c | 181 ++++++++++++++++++++++++- 5 files changed, 256 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index daf632b7..33212922 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -51,6 +51,8 @@ SYSCALL_HANDLED(30, shmat) SYSCALL_HANDLED(31, shmctl) SYSCALL_HANDLED(34, pause) SYSCALL_HANDLED(35, nanosleep) +SYSCALL_HANDLED(36, getitimer) +SYSCALL_HANDLED(38, setitimer) SYSCALL_HANDLED(39, getpid) SYSCALL_HANDLED(56, clone) SYSCALL_DELEGATED(57, fork) diff --git a/kernel/include/process.h b/kernel/include/process.h index a9fded27..e7e88e4d 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -533,6 +533,13 @@ struct thread { struct timespec btime; int times_update; int in_kernel; + + // interval timers + int itimer_enabled; + struct itimerval itimer_virtual; + struct itimerval itimer_prof; + struct timespec itimer_virtual_value; + struct timespec itimer_prof_value; }; struct process_vm { @@ -636,5 +643,6 @@ void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock); void chain_process(struct process *); void chain_thread(struct thread *); void proc_init(); +void set_timer(); #endif diff --git a/kernel/include/time.h b/kernel/include/time.h index 5df0d8fb..b4c1bffd 100644 --- a/kernel/include/time.h +++ b/kernel/include/time.h @@ -53,6 +53,15 @@ struct timezone int tz_dsttime; /* Nonzero if DST is ever in effect. */ }; +#define ITIMER_REAL 0 +#define ITIMER_VIRTUAL 1 +#define ITIMER_PROF 2 + +struct itimerval { + struct timeval it_interval; + struct timeval it_value; +}; + static inline void ts_add(struct timespec *ats, const struct timespec *bts) { @@ -75,5 +84,41 @@ ts_sub(struct timespec *ats, const struct timespec *bts) } } +static inline void +tv_add(struct timeval *ats, const struct timeval *bts) +{ + ats->tv_sec += bts->tv_sec; + ats->tv_usec += bts->tv_usec; + while(ats->tv_usec >= 1000000){ + ats->tv_sec++; + ats->tv_usec -= 1000000; + } +} + +static inline void +tv_sub(struct timeval *ats, const struct timeval *bts) +{ + ats->tv_sec -= bts->tv_sec; + ats->tv_usec -= bts->tv_usec; + while(ats->tv_usec < 0){ + ats->tv_sec--; + ats->tv_usec += 1000000; + } +} + +static inline void +tv_to_ts(struct timespec *ats, const struct timeval *bts) +{ + ats->tv_sec = bts->tv_sec; + ats->tv_nsec = bts->tv_usec * 1000; +} + +static inline void +ts_to_tv(struct timeval *ats, const struct timespec *bts) +{ + ats->tv_sec = bts->tv_sec; + ats->tv_usec = bts->tv_nsec / 1000; +} + #endif // __TIME_H diff --git a/kernel/process.c b/kernel/process.c index c609cbbb..32d74a86 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2478,6 +2478,26 @@ ack: ihk_mc_spinlock_unlock(&cur_v->migq_lock, irqstate); } +void +set_timer() +{ + struct cpu_local_var *v = get_this_cpu_local_var(); + + /* Toggle timesharing if CPU core is oversubscribed */ + if (v->runq_len > 1 || v->current->itimer_enabled) { + if (!cpu_local_var(timer_enabled)) { + lapic_timer_enable(10000000); + cpu_local_var(timer_enabled) = 1; + } + } + else { + if (cpu_local_var(timer_enabled)) { + lapic_timer_disable(); + cpu_local_var(timer_enabled) = 0; + } + } +} + void schedule(void) { struct cpu_local_var *v; @@ -2516,20 +2536,6 @@ redo: list_add_tail(&prev->sched_list, &(v->runq)); ++v->runq_len; } - - /* Toggle timesharing if CPU core is oversubscribed */ - if (v->runq_len > 1) { - if (!cpu_local_var(timer_enabled)) { - lapic_timer_enable(10000000); - cpu_local_var(timer_enabled) = 1; - } - } - else { - if (cpu_local_var(timer_enabled)) { - lapic_timer_disable(); - cpu_local_var(timer_enabled) = 0; - } - } } if (v->flags & CPU_FLAG_NEED_MIGRATE) { @@ -2556,6 +2562,8 @@ redo: reset_cputime(); } + set_timer(); + if (switch_ctx) { dkprintf("schedule: %d => %d \n", prev ? prev->tid : 0, next ? next->tid : 0); diff --git a/kernel/syscall.c b/kernel/syscall.c index 09daebed..a51cfeeb 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -5238,6 +5238,125 @@ static void calculate_time_from_tsc(struct timespec *ts) return; } +SYSCALL_DECLARE(setitimer) +{ + int which = (int)ihk_mc_syscall_arg0(ctx); + struct itimerval *new = (struct itimerval *)ihk_mc_syscall_arg1(ctx); + struct itimerval *old = (struct itimerval *)ihk_mc_syscall_arg2(ctx); + struct syscall_request request IHK_DMA_ALIGN; + struct thread *thread = cpu_local_var(current); + int timer_start = 1; + struct itimerval wkval; + struct timeval tv; + + if(which != ITIMER_REAL && + which != ITIMER_VIRTUAL && + which != ITIMER_PROF) + return -EINVAL; + + if(which == ITIMER_REAL){ + request.number = __NR_setitimer; + request.args[0] = ihk_mc_syscall_arg0(ctx); + request.args[1] = ihk_mc_syscall_arg1(ctx); + request.args[2] = ihk_mc_syscall_arg2(ctx); + + return do_syscall(&request, ihk_mc_get_processor_id(), 0); + } + else if(which == ITIMER_VIRTUAL){ + if(old){ + memcpy(&wkval, &thread->itimer_virtual, sizeof wkval); + if(wkval.it_value.tv_sec != 0 || + wkval.it_value.tv_usec != 0){ + ts_to_tv(&tv, &thread->itimer_virtual_value); + tv_sub(&wkval.it_value, &tv); + } + if(copy_to_user(old, &wkval, sizeof wkval)) + return -EFAULT; + } + if(!new){ + return 0; + } + if(copy_from_user(&thread->itimer_virtual, new, sizeof(struct itimerval))) + thread->itimer_virtual_value.tv_sec = 0; + thread->itimer_virtual_value.tv_nsec = 0; + if(thread->itimer_virtual.it_value.tv_sec == 0 && + thread->itimer_virtual.it_value.tv_usec == 0) + timer_start = 0; + } + else if(which == ITIMER_PROF){ + if(old){ + memcpy(&wkval, &thread->itimer_prof, sizeof wkval); + if(wkval.it_value.tv_sec != 0 || + wkval.it_value.tv_usec != 0){ + ts_to_tv(&tv, &thread->itimer_prof_value); + tv_sub(&wkval.it_value, &tv); + } + if(copy_to_user(old, &wkval, sizeof wkval)) + return -EFAULT; + } + if(!new){ + return 0; + } + if(copy_from_user(&thread->itimer_prof, new, sizeof(struct itimerval))) + thread->itimer_prof_value.tv_sec = 0; + thread->itimer_prof_value.tv_nsec = 0; + if(thread->itimer_prof.it_value.tv_sec == 0 && + thread->itimer_prof.it_value.tv_usec == 0) + timer_start = 0; + } + thread->itimer_enabled = timer_start; + set_timer(); + return 0; +} + +SYSCALL_DECLARE(getitimer) +{ + int which = (int)ihk_mc_syscall_arg0(ctx); + struct itimerval *old = (struct itimerval *)ihk_mc_syscall_arg1(ctx); + struct syscall_request request IHK_DMA_ALIGN; + struct thread *thread = cpu_local_var(current); + struct itimerval wkval; + struct timeval tv; + + if(which != ITIMER_REAL && + which != ITIMER_VIRTUAL && + which != ITIMER_PROF) + return -EINVAL; + + if(which == ITIMER_REAL){ + request.number = __NR_getitimer; + request.args[0] = ihk_mc_syscall_arg0(ctx); + request.args[1] = ihk_mc_syscall_arg1(ctx); + + return do_syscall(&request, ihk_mc_get_processor_id(), 0); + } + else if(which == ITIMER_VIRTUAL){ + if(old){ + memcpy(&wkval, &thread->itimer_virtual, sizeof wkval); + if(wkval.it_value.tv_sec != 0 || + wkval.it_value.tv_usec != 0){ + ts_to_tv(&tv, &thread->itimer_virtual_value); + tv_sub(&wkval.it_value, &tv); + } + if(copy_to_user(old, &wkval, sizeof wkval)) + return -EFAULT; + } + } + else if(which == ITIMER_PROF){ + if(old){ + memcpy(&wkval, &thread->itimer_prof, sizeof wkval); + if(wkval.it_value.tv_sec != 0 || + wkval.it_value.tv_usec != 0){ + ts_to_tv(&tv, &thread->itimer_prof_value); + tv_sub(&wkval.it_value, &tv); + } + if(copy_to_user(old, &wkval, sizeof wkval)) + return -EFAULT; + } + } + return 0; +} + SYSCALL_DECLARE(clock_gettime) { /* TODO: handle clock_id */ @@ -6735,11 +6854,17 @@ set_cputime(int mode) dts.tv_sec = ats.tv_sec; dts.tv_nsec = ats.tv_nsec; ts_sub(&dts, &thread->btime); - if(mode == 1) + if(mode == 1){ ts_add(&thread->utime, &dts); - else + ts_add(&thread->itimer_virtual_value, &dts); + ts_add(&thread->itimer_prof_value, &dts); + } + else{ ts_add(&thread->stime, &dts); + ts_add(&thread->itimer_prof_value, &dts); + } } + if(mode == 2){ thread->btime.tv_sec = 0; thread->btime.tv_nsec = 0; @@ -6750,6 +6875,58 @@ set_cputime(int mode) } thread->times_update = 1; thread->in_kernel = mode; + + if(thread->itimer_enabled){ + struct timeval tv; + int ev = 0; + + if(thread->itimer_virtual.it_value.tv_sec != 0 || + thread->itimer_virtual.it_value.tv_usec){ + ts_to_tv(&tv, &thread->itimer_virtual_value); + tv_sub(&tv, &thread->itimer_virtual.it_value); + if(tv.tv_sec > 0 || + (tv.tv_sec == 0 && + tv.tv_usec > 0)){ + thread->itimer_virtual_value.tv_sec = 0; + thread->itimer_virtual_value.tv_nsec = 0; + thread->itimer_virtual.it_value.tv_sec = + thread->itimer_virtual.it_interval.tv_sec; + thread->itimer_virtual.it_value.tv_usec = + thread->itimer_virtual.it_interval.tv_usec; + do_kill(thread, thread->proc->pid, thread->tid, + SIGVTALRM, NULL, 0); + ev = 1; + } + } + + if(thread->itimer_prof.it_value.tv_sec != 0 || + thread->itimer_prof.it_value.tv_usec){ + ts_to_tv(&tv, &thread->itimer_prof_value); + tv_sub(&tv, &thread->itimer_prof.it_value); + if(tv.tv_sec > 0 || + (tv.tv_sec == 0 && + tv.tv_usec > 0)){ + thread->itimer_prof_value.tv_sec = 0; + thread->itimer_prof_value.tv_nsec = 0; + thread->itimer_prof.it_value.tv_sec = + thread->itimer_prof.it_interval.tv_sec; + thread->itimer_prof.it_value.tv_usec = + thread->itimer_prof.it_interval.tv_usec; + do_kill(thread, thread->proc->pid, thread->tid, + SIGPROF, NULL, 0); + ev = 1; + } + } + if(ev){ + if(thread->itimer_virtual.it_value.tv_sec == 0 && + thread->itimer_virtual.it_value.tv_usec == 0 && + thread->itimer_prof.it_value.tv_sec == 0 && + thread->itimer_prof.it_value.tv_usec == 0){ + thread->itimer_enabled = 0; + set_timer(); + } + } + } } long syscall(int num, ihk_mc_user_context_t *ctx) From e2b28da32fd2778b1d569457898375ba93241144 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Sun, 21 Feb 2016 14:55:34 +0900 Subject: [PATCH 20/21] signal handler support gdb stepi command --- arch/x86/kernel/cpu.c | 2 ++ arch/x86/kernel/syscall.c | 23 +++++++++++++++++++++++ kernel/procfs.c | 3 +++ 3 files changed, 28 insertions(+) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index d8528c97..058b3e90 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -893,6 +893,7 @@ void debug_handler(struct x86_user_context *regs) struct siginfo info; set_cputime(interrupt_from_user(regs)? 1: 2); +kprintf("debug_handler rip=%lx\n", regs->gpr.rip); #ifdef DEBUG_PRINT_CPU kprintf("debug exception (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip); @@ -920,6 +921,7 @@ void int3_handler(struct x86_user_context *regs) struct siginfo info; set_cputime(interrupt_from_user(regs)? 1: 2); +kprintf("int3_handler rip=%lx\n", regs->gpr.rip); #ifdef DEBUG_PRINT_CPU kprintf("int3 exception (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip); diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index b091e595..957f52dd 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -32,6 +32,8 @@ int write_process_vm(struct process_vm *vm, void *dst, const void *src, size_t s long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact); long syscall(int num, ihk_mc_user_context_t *ctx); extern void save_fp_regs(struct thread *proc); +void set_signal(int sig, void *regs0, siginfo_t *info); +void check_signal(unsigned long rc, void *regs0, int num); //#define DEBUG_PRINT_SC @@ -178,6 +180,17 @@ SYSCALL_DECLARE(rt_sigreturn) if(sigsp->restart){ return syscall(sigsp->num, (ihk_mc_user_context_t *)regs); } + if(regs->gpr.rflags & RFLAGS_TF){ + struct siginfo info; + + regs->gpr.rax = sigsp->sigrc; + memset(&info, '\0', sizeof info); + regs->gpr.rflags &= ~RFLAGS_TF; + info.si_code = TRAP_TRACE; + set_signal(SIGTRAP, regs, &info); + check_signal(0, regs, 0); + check_need_resched(); + } return sigsp->sigrc; } @@ -595,6 +608,16 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi thread->sigmask.__val[0] |= pending->sigmask.__val[0]; kfree(pending); ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); + if(regs->gpr.rflags & RFLAGS_TF){ + struct siginfo info; + + memset(&info, '\0', sizeof info); + regs->gpr.rflags &= ~RFLAGS_TF; + info.si_code = TRAP_TRACE; + set_signal(SIGTRAP, regs, &info); + check_signal(0, regs, 0); + check_need_resched(); + } } else { int coredumped = 0; diff --git a/kernel/procfs.c b/kernel/procfs.c index 76107d0e..894268b6 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -261,6 +261,9 @@ process_procfs_request(unsigned long rarg) } #endif + if(readwrite == 0) + reason = PF_POPULATE | PF_USER; + while(left){ unsigned long pa; char *va; From 27c3ed7e96c984349e8c645f0e8164581c988dc8 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Sun, 21 Feb 2016 15:17:42 +0900 Subject: [PATCH 21/21] remove debug print --- arch/x86/kernel/cpu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index 058b3e90..d8528c97 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -893,7 +893,6 @@ void debug_handler(struct x86_user_context *regs) struct siginfo info; set_cputime(interrupt_from_user(regs)? 1: 2); -kprintf("debug_handler rip=%lx\n", regs->gpr.rip); #ifdef DEBUG_PRINT_CPU kprintf("debug exception (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip); @@ -921,7 +920,6 @@ void int3_handler(struct x86_user_context *regs) struct siginfo info; set_cputime(interrupt_from_user(regs)? 1: 2); -kprintf("int3_handler rip=%lx\n", regs->gpr.rip); #ifdef DEBUG_PRINT_CPU kprintf("int3 exception (err: %lx, %lx:%lx)\n", regs->gpr.error, regs->gpr.cs, regs->gpr.rip);