mcexec/mcctrl: unmount cgroups (privately) which expose invalid NUMA info
This commit is contained in:
25
configure
vendored
25
configure
vendored
@ -3117,6 +3117,31 @@ _ACEOF
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5
|
||||||
|
$as_echo_n "checking System.map for symbol sys_umount... " >&6; }
|
||||||
|
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1`
|
||||||
|
if test -z $mcctrl_addr; then
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
|
||||||
|
$as_echo "not found" >&6; }
|
||||||
|
else
|
||||||
|
mcctrl_result=$mcctrl_addr
|
||||||
|
mcctrl_addr="0x$mcctrl_addr"
|
||||||
|
|
||||||
|
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then
|
||||||
|
mcctrl_result="exported"
|
||||||
|
mcctrl_addr="0"
|
||||||
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
|
||||||
|
$as_echo "$mcctrl_result" >&6; }
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define MCCTRL_KSYM_sys_umount $mcctrl_addr
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
|
||||||
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
|
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
|
||||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
|
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
|
||||||
|
|||||||
@ -221,6 +221,7 @@ AC_DEFUN([MCCTRL_FIND_KSYM],[
|
|||||||
])
|
])
|
||||||
|
|
||||||
MCCTRL_FIND_KSYM([sys_mount])
|
MCCTRL_FIND_KSYM([sys_mount])
|
||||||
|
MCCTRL_FIND_KSYM([sys_umount])
|
||||||
MCCTRL_FIND_KSYM([sys_unshare])
|
MCCTRL_FIND_KSYM([sys_unshare])
|
||||||
MCCTRL_FIND_KSYM([zap_page_range])
|
MCCTRL_FIND_KSYM([zap_page_range])
|
||||||
MCCTRL_FIND_KSYM([vdso_image_64])
|
MCCTRL_FIND_KSYM([vdso_image_64])
|
||||||
|
|||||||
@ -51,6 +51,9 @@
|
|||||||
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
||||||
#undef MCCTRL_KSYM_sys_readlink
|
#undef MCCTRL_KSYM_sys_readlink
|
||||||
|
|
||||||
|
/* Define to address of kernel symbol sys_umount, or 0 if exported */
|
||||||
|
#undef MCCTRL_KSYM_sys_umount
|
||||||
|
|
||||||
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
||||||
#undef MCCTRL_KSYM_sys_unshare
|
#undef MCCTRL_KSYM_sys_unshare
|
||||||
|
|
||||||
|
|||||||
@ -49,7 +49,8 @@
|
|||||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||||
|
|
||||||
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
||||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02915
|
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915
|
||||||
|
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916
|
||||||
|
|
||||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||||
|
|
||||||
@ -196,6 +197,10 @@ struct sys_mount_desc {
|
|||||||
void *data;
|
void *data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct sys_umount_desc {
|
||||||
|
char *dir_name;
|
||||||
|
};
|
||||||
|
|
||||||
struct sys_unshare_desc {
|
struct sys_unshare_desc {
|
||||||
unsigned long unshare_flags;
|
unsigned long unshare_flags;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -66,7 +66,18 @@ int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long
|
|||||||
(int_star_fn_char_char_char_ulong_void_t)
|
(int_star_fn_char_char_char_ulong_void_t)
|
||||||
MCCTRL_KSYM_sys_mount;
|
MCCTRL_KSYM_sys_mount;
|
||||||
#else // exported
|
#else // exported
|
||||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL;
|
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MCCTRL_KSYM_sys_umount
|
||||||
|
#if MCCTRL_KSYM_sys_umount
|
||||||
|
typedef int (*int_fn_char_star_int_t)(char *, int);
|
||||||
|
int (*mcctrl_sys_umount)(char *dir_name, int flags) =
|
||||||
|
(int_fn_char_star_int_t)
|
||||||
|
MCCTRL_KSYM_sys_umount;
|
||||||
|
#else // exported
|
||||||
|
int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1154,7 +1165,7 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
|||||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||||
original = override_creds(promoted);
|
original = override_creds(promoted);
|
||||||
|
|
||||||
#if MCCTRL_KSYM_sys_mount
|
#ifdef MCCTRL_KSYM_sys_mount
|
||||||
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
||||||
desc.flags, desc.data);
|
desc.flags, desc.data);
|
||||||
#else
|
#else
|
||||||
@ -1167,6 +1178,38 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long mcexec_sys_umount(struct sys_mount_desc *__user arg)
|
||||||
|
{
|
||||||
|
struct sys_umount_desc desc;
|
||||||
|
struct cred *promoted;
|
||||||
|
const struct cred *original;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
promoted = prepare_creds();
|
||||||
|
if (!promoted) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||||
|
original = override_creds(promoted);
|
||||||
|
|
||||||
|
#ifdef MCCTRL_KSYM_sys_umount
|
||||||
|
ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE);
|
||||||
|
kprintf("%s: mcctrl_sys_umount: %d\n", __FUNCTION__, ret);
|
||||||
|
#else
|
||||||
|
ret = -EFAULT;
|
||||||
|
kprintf("%s: mcctrl_sys_umount not defined?\n", __FUNCTION__);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
revert_creds(original);
|
||||||
|
put_cred(promoted);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
||||||
{
|
{
|
||||||
struct sys_unshare_desc desc;
|
struct sys_unshare_desc desc;
|
||||||
@ -1254,6 +1297,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
|||||||
case MCEXEC_UP_SYS_MOUNT:
|
case MCEXEC_UP_SYS_MOUNT:
|
||||||
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
||||||
|
|
||||||
|
case MCEXEC_UP_SYS_UMOUNT:
|
||||||
|
return mcexec_sys_umount((struct sys_mount_desc *)arg);
|
||||||
|
|
||||||
case MCEXEC_UP_SYS_UNSHARE:
|
case MCEXEC_UP_SYS_UNSHARE:
|
||||||
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
||||||
|
|
||||||
|
|||||||
@ -69,6 +69,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
|||||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
||||||
|
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1148,75 +1148,41 @@ void init_worker_threads(int fd)
|
|||||||
|
|
||||||
#ifdef ENABLE_MCOVERLAYFS
|
#ifdef ENABLE_MCOVERLAYFS
|
||||||
#define READ_BUFSIZE 1024
|
#define READ_BUFSIZE 1024
|
||||||
static int isunshare(void)
|
static int find_mount_prefix(char *prefix)
|
||||||
{
|
{
|
||||||
int err = 0;
|
FILE *fp;
|
||||||
int ret;
|
char *line = NULL;
|
||||||
int fd;
|
size_t len = 0;
|
||||||
|
ssize_t read;
|
||||||
char proc_path[PATH_MAX];
|
char proc_path[PATH_MAX];
|
||||||
ssize_t len_read;
|
int ret = 0;
|
||||||
char buf_read[READ_BUFSIZE + 1];
|
|
||||||
char *buf_read_off;
|
|
||||||
char *buf_find;
|
|
||||||
char buf_cmp[READ_BUFSIZE + 1];
|
|
||||||
char *buf_cmp_off;
|
|
||||||
ssize_t len_copy;
|
|
||||||
|
|
||||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
||||||
fd = open(proc_path, O_RDONLY);
|
|
||||||
if (fd < 0) {
|
fp = fopen(proc_path, "r");
|
||||||
fprintf(stderr, "Error: Failed to open %s.\n", proc_path);
|
if (fp == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_cmp_off = buf_cmp;
|
while ((read = getline(&line, &len, fp)) != -1) {
|
||||||
while (1) {
|
if (strlen(line) < strlen(prefix))
|
||||||
len_read = read(fd, buf_read, READ_BUFSIZE);
|
continue;
|
||||||
if (len_read == -1) {
|
|
||||||
fprintf(stderr, "Error: Failed to read.\n");
|
|
||||||
err = -1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf_read_off = buf_read;
|
if (!strncmp(line, prefix, strlen(prefix))) {
|
||||||
while (1) {
|
ret = 1;
|
||||||
if ((len_read - (buf_read_off - buf_read)) <= 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buf_find = memchr(buf_read_off, '\n',
|
|
||||||
len_read - (buf_read_off - buf_read));
|
|
||||||
if (buf_find) {
|
|
||||||
len_copy = buf_find - buf_read_off;
|
|
||||||
} else {
|
|
||||||
len_copy = len_read - (buf_read_off - buf_read);
|
|
||||||
}
|
|
||||||
memcpy(buf_cmp_off, buf_read_off, len_copy);
|
|
||||||
*(buf_cmp_off + len_copy) = '\0';
|
|
||||||
|
|
||||||
if (buf_find) {
|
|
||||||
buf_read_off = buf_read_off + len_copy + 1;
|
|
||||||
buf_cmp_off = buf_cmp;
|
|
||||||
ret = strncmp(buf_cmp, "mcoverlay /proc ", 16);
|
|
||||||
if (!ret) {
|
|
||||||
err = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
buf_read_off = buf_read_off + len_copy;
|
|
||||||
buf_cmp_off = buf_cmp_off + len_copy;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err == 1 || len_read == 0) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fd);
|
if (line)
|
||||||
|
free(line);
|
||||||
|
|
||||||
__dprintf("err=%d\n", err);
|
return ret;
|
||||||
return err;
|
}
|
||||||
|
|
||||||
|
static int isunshare(void)
|
||||||
|
{
|
||||||
|
return find_mount_prefix("mcoverlay /proc ");
|
||||||
}
|
}
|
||||||
#endif // ENABLE_MCOVERLAYFS
|
#endif // ENABLE_MCOVERLAYFS
|
||||||
|
|
||||||
@ -1415,6 +1381,7 @@ int main(int argc, char **argv)
|
|||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
struct sys_unshare_desc unshare_desc;
|
struct sys_unshare_desc unshare_desc;
|
||||||
struct sys_mount_desc mount_desc;
|
struct sys_mount_desc mount_desc;
|
||||||
|
struct sys_umount_desc umount_desc;
|
||||||
|
|
||||||
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
||||||
memset(&mount_desc, '\0', sizeof mount_desc);
|
memset(&mount_desc, '\0', sizeof mount_desc);
|
||||||
@ -1426,6 +1393,53 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Umount cgroup filesystems that may expose invalid NUMA
|
||||||
|
* information
|
||||||
|
*/
|
||||||
|
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu,cpuacct")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/cpu,cpuacct";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/cpu,cpuacct. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/cpu";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/cpu. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpuset")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/cpuset";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/cpuset. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_mount_prefix("cgroup /sys/fs/cgroup/memory")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/memory/";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/memory. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
||||||
mount_desc.dev_name = mcos_procdir;
|
mount_desc.dev_name = mcos_procdir;
|
||||||
mount_desc.dir_name = "/proc";
|
mount_desc.dir_name = "/proc";
|
||||||
|
|||||||
Reference in New Issue
Block a user