diff --git a/configure b/configure index 081428ff..5c365cdc 100755 --- a/configure +++ b/configure @@ -3117,6 +3117,31 @@ _ACEOF fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5 +$as_echo_n "checking System.map for symbol sys_umount... " >&6; } + mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1` + if test -z $mcctrl_addr; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +$as_echo "not found" >&6; } + else + mcctrl_result=$mcctrl_addr + mcctrl_addr="0x$mcctrl_addr" + + if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then + mcctrl_result="exported" + mcctrl_addr="0" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 +$as_echo "$mcctrl_result" >&6; } + +cat >>confdefs.h <<_ACEOF +#define MCCTRL_KSYM_sys_umount $mcctrl_addr +_ACEOF + + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5 $as_echo_n "checking System.map for symbol sys_unshare... " >&6; } mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1` diff --git a/configure.ac b/configure.ac index 2773f7f1..c5319391 100644 --- a/configure.ac +++ b/configure.ac @@ -221,6 +221,7 @@ AC_DEFUN([MCCTRL_FIND_KSYM],[ ]) MCCTRL_FIND_KSYM([sys_mount]) +MCCTRL_FIND_KSYM([sys_umount]) MCCTRL_FIND_KSYM([sys_unshare]) MCCTRL_FIND_KSYM([zap_page_range]) MCCTRL_FIND_KSYM([vdso_image_64]) diff --git a/executer/config.h.in b/executer/config.h.in index 02f2af41..e1a8c26f 100644 --- a/executer/config.h.in +++ b/executer/config.h.in @@ -51,6 +51,9 @@ /* Define to address of kernel symbol sys_readlink, or 0 if exported */ #undef MCCTRL_KSYM_sys_readlink +/* Define to address of kernel symbol sys_umount, or 0 if exported */ +#undef MCCTRL_KSYM_sys_umount + /* Define to address of kernel symbol sys_unshare, or 0 if exported */ #undef MCCTRL_KSYM_sys_unshare diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 6c94ad86..34ad9a1a 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -49,7 +49,8 @@ #define MCEXEC_UP_CLOSE_EXEC 0x30a02913 #define MCEXEC_UP_SYS_MOUNT 0x30a02914 -#define MCEXEC_UP_SYS_UNSHARE 0x30a02915 +#define MCEXEC_UP_SYS_UMOUNT 0x30a02915 +#define MCEXEC_UP_SYS_UNSHARE 0x30a02916 #define MCEXEC_UP_DEBUG_LOG 0x40000000 @@ -196,6 +197,10 @@ struct sys_mount_desc { void *data; }; +struct sys_umount_desc { + char *dir_name; +}; + struct sys_unshare_desc { unsigned long unshare_flags; }; diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 1bd2ffd8..3cac5da6 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -66,7 +66,18 @@ int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long (int_star_fn_char_char_char_ulong_void_t) MCCTRL_KSYM_sys_mount; #else // exported -int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL; +int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount; +#endif +#endif + +#ifdef MCCTRL_KSYM_sys_umount +#if MCCTRL_KSYM_sys_umount +typedef int (*int_fn_char_star_int_t)(char *, int); +int (*mcctrl_sys_umount)(char *dir_name, int flags) = + (int_fn_char_star_int_t) + MCCTRL_KSYM_sys_umount; +#else // exported +int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount; #endif #endif @@ -1154,7 +1165,7 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg) cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); original = override_creds(promoted); -#if MCCTRL_KSYM_sys_mount +#ifdef MCCTRL_KSYM_sys_mount ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type, desc.flags, desc.data); #else @@ -1167,6 +1178,38 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg) return ret; } +long mcexec_sys_umount(struct sys_mount_desc *__user arg) +{ + struct sys_umount_desc desc; + struct cred *promoted; + const struct cred *original; + int ret; + + if (copy_from_user(&desc, arg, sizeof(desc))) { + return -EFAULT; + } + + promoted = prepare_creds(); + if (!promoted) { + return -ENOMEM; + } + cap_raise(promoted->cap_effective, CAP_SYS_ADMIN); + original = override_creds(promoted); + +#ifdef MCCTRL_KSYM_sys_umount + ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE); + kprintf("%s: mcctrl_sys_umount: %d\n", __FUNCTION__, ret); +#else + ret = -EFAULT; + kprintf("%s: mcctrl_sys_umount not defined?\n", __FUNCTION__); +#endif + + revert_creds(original); + put_cred(promoted); + + return ret; +} + long mcexec_sys_unshare(struct sys_unshare_desc *__user arg) { struct sys_unshare_desc desc; @@ -1254,6 +1297,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, case MCEXEC_UP_SYS_MOUNT: return mcexec_sys_mount((struct sys_mount_desc *)arg); + case MCEXEC_UP_SYS_UMOUNT: + return mcexec_sys_umount((struct sys_mount_desc *)arg); + case MCEXEC_UP_SYS_UNSHARE: return mcexec_sys_unshare((struct sys_unshare_desc *)arg); diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index 20a6fa39..0b2475fe 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -69,6 +69,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = { { .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl }, + { .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl }, { .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl }, }; diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 62a3f169..4cf01bc8 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1148,75 +1148,41 @@ void init_worker_threads(int fd) #ifdef ENABLE_MCOVERLAYFS #define READ_BUFSIZE 1024 -static int isunshare(void) +static int find_mount_prefix(char *prefix) { - int err = 0; - int ret; - int fd; + FILE *fp; + char *line = NULL; + size_t len = 0; + ssize_t read; char proc_path[PATH_MAX]; - ssize_t len_read; - char buf_read[READ_BUFSIZE + 1]; - char *buf_read_off; - char *buf_find; - char buf_cmp[READ_BUFSIZE + 1]; - char *buf_cmp_off; - ssize_t len_copy; + int ret = 0; snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid()); - fd = open(proc_path, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "Error: Failed to open %s.\n", proc_path); + + fp = fopen(proc_path, "r"); + if (fp == NULL) { return -1; } - buf_cmp_off = buf_cmp; - while (1) { - len_read = read(fd, buf_read, READ_BUFSIZE); - if (len_read == -1) { - fprintf(stderr, "Error: Failed to read.\n"); - err = -1; - break; - } + while ((read = getline(&line, &len, fp)) != -1) { + if (strlen(line) < strlen(prefix)) + continue; - buf_read_off = buf_read; - while (1) { - if ((len_read - (buf_read_off - buf_read)) <= 0) { - break; - } - buf_find = memchr(buf_read_off, '\n', - len_read - (buf_read_off - buf_read)); - if (buf_find) { - len_copy = buf_find - buf_read_off; - } else { - len_copy = len_read - (buf_read_off - buf_read); - } - memcpy(buf_cmp_off, buf_read_off, len_copy); - *(buf_cmp_off + len_copy) = '\0'; - - if (buf_find) { - buf_read_off = buf_read_off + len_copy + 1; - buf_cmp_off = buf_cmp; - ret = strncmp(buf_cmp, "mcoverlay /proc ", 16); - if (!ret) { - err = 1; - break; - } - } else { - buf_read_off = buf_read_off + len_copy; - buf_cmp_off = buf_cmp_off + len_copy; - break; - } - } - - if (err == 1 || len_read == 0) { + if (!strncmp(line, prefix, strlen(prefix))) { + ret = 1; break; } } - close(fd); + if (line) + free(line); - __dprintf("err=%d\n", err); - return err; + return ret; +} + +static int isunshare(void) +{ + return find_mount_prefix("mcoverlay /proc "); } #endif // ENABLE_MCOVERLAYFS @@ -1415,6 +1381,7 @@ int main(int argc, char **argv) if (error == 0) { struct sys_unshare_desc unshare_desc; struct sys_mount_desc mount_desc; + struct sys_umount_desc umount_desc; memset(&unshare_desc, '\0', sizeof unshare_desc); memset(&mount_desc, '\0', sizeof mount_desc); @@ -1426,6 +1393,53 @@ int main(int argc, char **argv) return 1; } + /* + * Umount cgroup filesystems that may expose invalid NUMA + * information + */ + if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu,cpuacct")) { + umount_desc.dir_name = "/sys/fs/cgroup/cpu,cpuacct"; + + if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT, + (unsigned long)&umount_desc) != 0) { + fprintf(stderr, + "WARNING: Failed to umount cgroup/cpu,cpuacct. (%s)\n", + strerror(errno)); + } + } + else if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu")) { + umount_desc.dir_name = "/sys/fs/cgroup/cpu"; + + if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT, + (unsigned long)&umount_desc) != 0) { + fprintf(stderr, + "WARNING: Failed to umount cgroup/cpu. (%s)\n", + strerror(errno)); + } + } + + if (find_mount_prefix("cgroup /sys/fs/cgroup/cpuset")) { + umount_desc.dir_name = "/sys/fs/cgroup/cpuset"; + + if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT, + (unsigned long)&umount_desc) != 0) { + fprintf(stderr, + "WARNING: Failed to umount cgroup/cpuset. (%s)\n", + strerror(errno)); + } + } + + if (find_mount_prefix("cgroup /sys/fs/cgroup/memory")) { + umount_desc.dir_name = "/sys/fs/cgroup/memory/"; + + if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT, + (unsigned long)&umount_desc) != 0) { + fprintf(stderr, + "WARNING: Failed to umount cgroup/memory. (%s)\n", + strerror(errno)); + } + } + sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid); mount_desc.dev_name = mcos_procdir; mount_desc.dir_name = "/proc";