mcexec/mcctrl: unmount cgroups (privately) which expose invalid NUMA info
This commit is contained in:
25
configure
vendored
25
configure
vendored
@ -3117,6 +3117,31 @@ _ACEOF
|
||||
fi
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5
|
||||
$as_echo_n "checking System.map for symbol sys_umount... " >&6; }
|
||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1`
|
||||
if test -z $mcctrl_addr; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
|
||||
$as_echo "not found" >&6; }
|
||||
else
|
||||
mcctrl_result=$mcctrl_addr
|
||||
mcctrl_addr="0x$mcctrl_addr"
|
||||
|
||||
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then
|
||||
mcctrl_result="exported"
|
||||
mcctrl_addr="0"
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
|
||||
$as_echo "$mcctrl_result" >&6; }
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define MCCTRL_KSYM_sys_umount $mcctrl_addr
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
|
||||
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
|
||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
|
||||
|
||||
@ -221,6 +221,7 @@ AC_DEFUN([MCCTRL_FIND_KSYM],[
|
||||
])
|
||||
|
||||
MCCTRL_FIND_KSYM([sys_mount])
|
||||
MCCTRL_FIND_KSYM([sys_umount])
|
||||
MCCTRL_FIND_KSYM([sys_unshare])
|
||||
MCCTRL_FIND_KSYM([zap_page_range])
|
||||
MCCTRL_FIND_KSYM([vdso_image_64])
|
||||
|
||||
@ -51,6 +51,9 @@
|
||||
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_readlink
|
||||
|
||||
/* Define to address of kernel symbol sys_umount, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_umount
|
||||
|
||||
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_unshare
|
||||
|
||||
|
||||
@ -49,7 +49,8 @@
|
||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||
|
||||
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02915
|
||||
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915
|
||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
@ -196,6 +197,10 @@ struct sys_mount_desc {
|
||||
void *data;
|
||||
};
|
||||
|
||||
struct sys_umount_desc {
|
||||
char *dir_name;
|
||||
};
|
||||
|
||||
struct sys_unshare_desc {
|
||||
unsigned long unshare_flags;
|
||||
};
|
||||
|
||||
@ -66,7 +66,18 @@ int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long
|
||||
(int_star_fn_char_char_char_ulong_void_t)
|
||||
MCCTRL_KSYM_sys_mount;
|
||||
#else // exported
|
||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL;
|
||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_sys_umount
|
||||
#if MCCTRL_KSYM_sys_umount
|
||||
typedef int (*int_fn_char_star_int_t)(char *, int);
|
||||
int (*mcctrl_sys_umount)(char *dir_name, int flags) =
|
||||
(int_fn_char_star_int_t)
|
||||
MCCTRL_KSYM_sys_umount;
|
||||
#else // exported
|
||||
int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -1154,7 +1165,7 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||
original = override_creds(promoted);
|
||||
|
||||
#if MCCTRL_KSYM_sys_mount
|
||||
#ifdef MCCTRL_KSYM_sys_mount
|
||||
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
||||
desc.flags, desc.data);
|
||||
#else
|
||||
@ -1167,6 +1178,38 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
||||
return ret;
|
||||
}
|
||||
|
||||
long mcexec_sys_umount(struct sys_mount_desc *__user arg)
|
||||
{
|
||||
struct sys_umount_desc desc;
|
||||
struct cred *promoted;
|
||||
const struct cred *original;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
promoted = prepare_creds();
|
||||
if (!promoted) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||
original = override_creds(promoted);
|
||||
|
||||
#ifdef MCCTRL_KSYM_sys_umount
|
||||
ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE);
|
||||
kprintf("%s: mcctrl_sys_umount: %d\n", __FUNCTION__, ret);
|
||||
#else
|
||||
ret = -EFAULT;
|
||||
kprintf("%s: mcctrl_sys_umount not defined?\n", __FUNCTION__);
|
||||
#endif
|
||||
|
||||
revert_creds(original);
|
||||
put_cred(promoted);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
||||
{
|
||||
struct sys_unshare_desc desc;
|
||||
@ -1254,6 +1297,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
case MCEXEC_UP_SYS_MOUNT:
|
||||
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_SYS_UMOUNT:
|
||||
return mcexec_sys_umount((struct sys_mount_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_SYS_UNSHARE:
|
||||
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
||||
|
||||
|
||||
@ -69,6 +69,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
@ -1148,75 +1148,41 @@ void init_worker_threads(int fd)
|
||||
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
#define READ_BUFSIZE 1024
|
||||
static int isunshare(void)
|
||||
static int find_mount_prefix(char *prefix)
|
||||
{
|
||||
int err = 0;
|
||||
int ret;
|
||||
int fd;
|
||||
FILE *fp;
|
||||
char *line = NULL;
|
||||
size_t len = 0;
|
||||
ssize_t read;
|
||||
char proc_path[PATH_MAX];
|
||||
ssize_t len_read;
|
||||
char buf_read[READ_BUFSIZE + 1];
|
||||
char *buf_read_off;
|
||||
char *buf_find;
|
||||
char buf_cmp[READ_BUFSIZE + 1];
|
||||
char *buf_cmp_off;
|
||||
ssize_t len_copy;
|
||||
int ret = 0;
|
||||
|
||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
||||
fd = open(proc_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Error: Failed to open %s.\n", proc_path);
|
||||
|
||||
fp = fopen(proc_path, "r");
|
||||
if (fp == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf_cmp_off = buf_cmp;
|
||||
while (1) {
|
||||
len_read = read(fd, buf_read, READ_BUFSIZE);
|
||||
if (len_read == -1) {
|
||||
fprintf(stderr, "Error: Failed to read.\n");
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
while ((read = getline(&line, &len, fp)) != -1) {
|
||||
if (strlen(line) < strlen(prefix))
|
||||
continue;
|
||||
|
||||
buf_read_off = buf_read;
|
||||
while (1) {
|
||||
if ((len_read - (buf_read_off - buf_read)) <= 0) {
|
||||
break;
|
||||
}
|
||||
buf_find = memchr(buf_read_off, '\n',
|
||||
len_read - (buf_read_off - buf_read));
|
||||
if (buf_find) {
|
||||
len_copy = buf_find - buf_read_off;
|
||||
} else {
|
||||
len_copy = len_read - (buf_read_off - buf_read);
|
||||
}
|
||||
memcpy(buf_cmp_off, buf_read_off, len_copy);
|
||||
*(buf_cmp_off + len_copy) = '\0';
|
||||
|
||||
if (buf_find) {
|
||||
buf_read_off = buf_read_off + len_copy + 1;
|
||||
buf_cmp_off = buf_cmp;
|
||||
ret = strncmp(buf_cmp, "mcoverlay /proc ", 16);
|
||||
if (!ret) {
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
buf_read_off = buf_read_off + len_copy;
|
||||
buf_cmp_off = buf_cmp_off + len_copy;
|
||||
if (!strncmp(line, prefix, strlen(prefix))) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 1 || len_read == 0) {
|
||||
break;
|
||||
}
|
||||
if (line)
|
||||
free(line);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
__dprintf("err=%d\n", err);
|
||||
return err;
|
||||
static int isunshare(void)
|
||||
{
|
||||
return find_mount_prefix("mcoverlay /proc ");
|
||||
}
|
||||
#endif // ENABLE_MCOVERLAYFS
|
||||
|
||||
@ -1415,6 +1381,7 @@ int main(int argc, char **argv)
|
||||
if (error == 0) {
|
||||
struct sys_unshare_desc unshare_desc;
|
||||
struct sys_mount_desc mount_desc;
|
||||
struct sys_umount_desc umount_desc;
|
||||
|
||||
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
||||
memset(&mount_desc, '\0', sizeof mount_desc);
|
||||
@ -1426,6 +1393,53 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Umount cgroup filesystems that may expose invalid NUMA
|
||||
* information
|
||||
*/
|
||||
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu,cpuacct")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/cpu,cpuacct";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/cpu,cpuacct. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
else if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/cpu";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/cpu. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpuset")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/cpuset";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/cpuset. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (find_mount_prefix("cgroup /sys/fs/cgroup/memory")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/memory/";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/memory. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
||||
mount_desc.dev_name = mcos_procdir;
|
||||
mount_desc.dir_name = "/proc";
|
||||
|
||||
Reference in New Issue
Block a user