Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e4b3a88fc6 | |||
| 69a5c53074 | |||
| 259583e936 | |||
| 0f826290d0 | |||
| e46f027894 | |||
| 3e093f6a40 | |||
| 00996b551f | |||
| 24d8697cef | |||
| be4f6741f9 | |||
| 7a2f67f5f0 | |||
| bba0425267 |
@ -396,12 +396,15 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
fi
|
||||
# TODO: How de we revert this in case of failure??
|
||||
mount --make-rprivate /sys
|
||||
|
||||
rm -rf /tmp/mcos/mcos0_sys/setup_complete
|
||||
|
||||
# Hide NUMA related files which are outside the LWK partition
|
||||
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/drivers/processor/$cpuid
|
||||
else
|
||||
for nodeid in `find /sys/devices/system/cpu/$cpuid/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid/$nodeid" ]; then
|
||||
@ -412,7 +415,8 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
done
|
||||
for nodeid in `find /sys/devices/system/node/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/node/$nodeid
|
||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/*
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/node/devices/$nodeid
|
||||
else
|
||||
# Delete non-existent symlinks
|
||||
for cpuid in `find /sys/devices/system/node/$nodeid/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
@ -424,6 +428,7 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/memory*
|
||||
fi
|
||||
done
|
||||
rm -f /tmp/mcos/mcos0_sys/devices/system/node/has_*
|
||||
for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||
|
||||
25
configure
vendored
25
configure
vendored
@ -3117,6 +3117,31 @@ _ACEOF
|
||||
fi
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5
|
||||
$as_echo_n "checking System.map for symbol sys_umount... " >&6; }
|
||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1`
|
||||
if test -z $mcctrl_addr; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
|
||||
$as_echo "not found" >&6; }
|
||||
else
|
||||
mcctrl_result=$mcctrl_addr
|
||||
mcctrl_addr="0x$mcctrl_addr"
|
||||
|
||||
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then
|
||||
mcctrl_result="exported"
|
||||
mcctrl_addr="0"
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
|
||||
$as_echo "$mcctrl_result" >&6; }
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define MCCTRL_KSYM_sys_umount $mcctrl_addr
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
|
||||
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
|
||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
|
||||
|
||||
@ -221,6 +221,7 @@ AC_DEFUN([MCCTRL_FIND_KSYM],[
|
||||
])
|
||||
|
||||
MCCTRL_FIND_KSYM([sys_mount])
|
||||
MCCTRL_FIND_KSYM([sys_umount])
|
||||
MCCTRL_FIND_KSYM([sys_unshare])
|
||||
MCCTRL_FIND_KSYM([zap_page_range])
|
||||
MCCTRL_FIND_KSYM([vdso_image_64])
|
||||
|
||||
@ -51,6 +51,9 @@
|
||||
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_readlink
|
||||
|
||||
/* Define to address of kernel symbol sys_umount, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_umount
|
||||
|
||||
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_unshare
|
||||
|
||||
|
||||
@ -41,6 +41,7 @@
|
||||
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
|
||||
#define MCEXEC_UP_GET_CRED 0x30a0290a
|
||||
#define MCEXEC_UP_GET_CREDV 0x30a0290b
|
||||
#define MCEXEC_UP_GET_NODES 0x30a0290c
|
||||
|
||||
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
|
||||
#define MCEXEC_UP_FREE_DMA 0x30a02911
|
||||
@ -49,7 +50,8 @@
|
||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||
|
||||
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02915
|
||||
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915
|
||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
@ -196,6 +198,10 @@ struct sys_mount_desc {
|
||||
void *data;
|
||||
};
|
||||
|
||||
struct sys_umount_desc {
|
||||
char *dir_name;
|
||||
};
|
||||
|
||||
struct sys_unshare_desc {
|
||||
unsigned long unshare_flags;
|
||||
};
|
||||
|
||||
@ -66,7 +66,18 @@ int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long
|
||||
(int_star_fn_char_char_char_ulong_void_t)
|
||||
MCCTRL_KSYM_sys_mount;
|
||||
#else // exported
|
||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL;
|
||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_sys_umount
|
||||
#if MCCTRL_KSYM_sys_umount
|
||||
typedef int (*int_fn_char_star_int_t)(char *, int);
|
||||
int (*mcctrl_sys_umount)(char *dir_name, int flags) =
|
||||
(int_fn_char_star_int_t)
|
||||
MCCTRL_KSYM_sys_umount;
|
||||
#else // exported
|
||||
int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -439,6 +450,16 @@ static long mcexec_get_cpu(ihk_os_t os)
|
||||
return info->n_cpus;
|
||||
}
|
||||
|
||||
static long mcexec_get_nodes(ihk_os_t os)
|
||||
{
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
|
||||
if (!usrdata || !usrdata->mem_info)
|
||||
return -EINVAL;
|
||||
|
||||
return usrdata->mem_info->n_numa_nodes;
|
||||
}
|
||||
|
||||
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||
struct mcctrl_per_proc_data *ppd)
|
||||
{
|
||||
@ -1154,7 +1175,7 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||
original = override_creds(promoted);
|
||||
|
||||
#if MCCTRL_KSYM_sys_mount
|
||||
#ifdef MCCTRL_KSYM_sys_mount
|
||||
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
||||
desc.flags, desc.data);
|
||||
#else
|
||||
@ -1167,6 +1188,36 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
||||
return ret;
|
||||
}
|
||||
|
||||
long mcexec_sys_umount(struct sys_mount_desc *__user arg)
|
||||
{
|
||||
struct sys_umount_desc desc;
|
||||
struct cred *promoted;
|
||||
const struct cred *original;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
promoted = prepare_creds();
|
||||
if (!promoted) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||
original = override_creds(promoted);
|
||||
|
||||
#ifdef MCCTRL_KSYM_sys_umount
|
||||
ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE);
|
||||
#else
|
||||
ret = -EFAULT;
|
||||
#endif
|
||||
|
||||
revert_creds(original);
|
||||
put_cred(promoted);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
||||
{
|
||||
struct sys_unshare_desc desc;
|
||||
@ -1225,6 +1276,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
case MCEXEC_UP_GET_CPU:
|
||||
return mcexec_get_cpu(os);
|
||||
|
||||
case MCEXEC_UP_GET_NODES:
|
||||
return mcexec_get_nodes(os);
|
||||
|
||||
case MCEXEC_UP_STRNCPY_FROM_USER:
|
||||
return mcexec_strncpy_from_user(os,
|
||||
(struct strncpy_from_user_desc *)arg);
|
||||
@ -1254,6 +1308,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
case MCEXEC_UP_SYS_MOUNT:
|
||||
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_SYS_UMOUNT:
|
||||
return mcexec_sys_umount((struct sys_mount_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_SYS_UNSHARE:
|
||||
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
||||
|
||||
|
||||
@ -60,6 +60,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_LOAD_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_NODES, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
||||
@ -69,6 +70,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
@ -281,6 +281,7 @@ struct mcctrl_usrdata {
|
||||
unsigned long cpu_online[CPU_LONGS];
|
||||
struct ihk_cpu_info *cpu_info;
|
||||
struct ihk_mem_info *mem_info;
|
||||
nodemask_t numa_online;
|
||||
struct list_head cpu_topology_list;
|
||||
struct list_head node_topology_list;
|
||||
};
|
||||
|
||||
@ -92,27 +92,19 @@ void setup_local_snooping_samples(ihk_os_t os)
|
||||
|
||||
void setup_local_snooping_files(ihk_os_t os)
|
||||
{
|
||||
struct ihk_cpu_info *info;
|
||||
struct mcctrl_usrdata *udp = ihk_host_os_get_usrdata(os);
|
||||
struct sysfsm_bitmap_param param;
|
||||
static unsigned long cpu_offline = 0x0;
|
||||
int i;
|
||||
int error;
|
||||
|
||||
info = ihk_os_get_cpu_info(os);
|
||||
if (!info) {
|
||||
eprintk("mcctrl:ihk_os_get_cpu_info failed.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
memset(udp->cpu_online, 0, sizeof(udp->cpu_online));
|
||||
for (i = 0; i < info->n_cpus; i++) {
|
||||
udp->cpu_online[i / BITS_PER_LONG] =
|
||||
udp->cpu_online[i / BITS_PER_LONG] | (1 << (i % BITS_PER_LONG));
|
||||
for (i = 0; i < udp->cpu_info->n_cpus; i++) {
|
||||
set_bit(i, udp->cpu_online);
|
||||
}
|
||||
|
||||
param.nbits = CPU_LONGS * BITS_PER_LONG;
|
||||
param.ptr = udp->cpu_online;
|
||||
param.ptr = &udp->cpu_online;
|
||||
dprintk("mcctrl:setup_local_snooping_files: CPU_LONGS=%d, BITS_PER_LONG=%d\n",
|
||||
CPU_LONGS, BITS_PER_LONG);
|
||||
|
||||
@ -666,6 +658,7 @@ out:
|
||||
static int setup_node_files(struct mcctrl_usrdata *udp)
|
||||
{
|
||||
int error;
|
||||
int node;
|
||||
struct node_topology *p;
|
||||
struct sysfsm_bitmap_param param;
|
||||
|
||||
@ -677,10 +670,21 @@ static int setup_node_files(struct mcctrl_usrdata *udp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(&udp->numa_online, 0, sizeof(udp->numa_online));
|
||||
for (node = 0; node < udp->mem_info->n_numa_nodes; ++node) {
|
||||
node_set(node, udp->numa_online);
|
||||
}
|
||||
|
||||
param.nbits = MAX_NUMNODES;
|
||||
param.ptr = &udp->numa_online;
|
||||
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_pbl, ¶m, 0444,
|
||||
"/sys/devices/system/node/online");
|
||||
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_pbl, ¶m, 0444,
|
||||
"/sys/devices/system/node/possible");
|
||||
|
||||
list_for_each_entry(p, &udp->node_topology_list, chain) {
|
||||
struct sysfs_handle handle;
|
||||
int cpu;
|
||||
int node;
|
||||
size_t offset = 0;
|
||||
param.nbits = nr_cpumask_bits;
|
||||
param.ptr = &p->cpumap;
|
||||
@ -697,8 +701,6 @@ static int setup_node_files(struct mcctrl_usrdata *udp)
|
||||
mckernel_numa_2_linux_numa(udp, node)
|
||||
));
|
||||
}
|
||||
offset += snprintf(&p->mckernel_numa_distance_s[offset],
|
||||
NODE_DISTANCE_S_SIZE - offset, "%s", "\n");
|
||||
|
||||
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_s,
|
||||
p->mckernel_numa_distance_s, 0444,
|
||||
|
||||
@ -41,6 +41,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <sched.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <termios.h>
|
||||
#include <sys/ioctl.h>
|
||||
@ -1148,75 +1149,41 @@ void init_worker_threads(int fd)
|
||||
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
#define READ_BUFSIZE 1024
|
||||
static int isunshare(void)
|
||||
static int find_mount_prefix(char *prefix)
|
||||
{
|
||||
int err = 0;
|
||||
int ret;
|
||||
int fd;
|
||||
FILE *fp;
|
||||
char *line = NULL;
|
||||
size_t len = 0;
|
||||
ssize_t read;
|
||||
char proc_path[PATH_MAX];
|
||||
ssize_t len_read;
|
||||
char buf_read[READ_BUFSIZE + 1];
|
||||
char *buf_read_off;
|
||||
char *buf_find;
|
||||
char buf_cmp[READ_BUFSIZE + 1];
|
||||
char *buf_cmp_off;
|
||||
ssize_t len_copy;
|
||||
int ret = 0;
|
||||
|
||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
||||
fd = open(proc_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Error: Failed to open %s.\n", proc_path);
|
||||
|
||||
fp = fopen(proc_path, "r");
|
||||
if (fp == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf_cmp_off = buf_cmp;
|
||||
while (1) {
|
||||
len_read = read(fd, buf_read, READ_BUFSIZE);
|
||||
if (len_read == -1) {
|
||||
fprintf(stderr, "Error: Failed to read.\n");
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
while ((read = getline(&line, &len, fp)) != -1) {
|
||||
if (strlen(line) < strlen(prefix))
|
||||
continue;
|
||||
|
||||
buf_read_off = buf_read;
|
||||
while (1) {
|
||||
if ((len_read - (buf_read_off - buf_read)) <= 0) {
|
||||
break;
|
||||
}
|
||||
buf_find = memchr(buf_read_off, '\n',
|
||||
len_read - (buf_read_off - buf_read));
|
||||
if (buf_find) {
|
||||
len_copy = buf_find - buf_read_off;
|
||||
} else {
|
||||
len_copy = len_read - (buf_read_off - buf_read);
|
||||
}
|
||||
memcpy(buf_cmp_off, buf_read_off, len_copy);
|
||||
*(buf_cmp_off + len_copy) = '\0';
|
||||
|
||||
if (buf_find) {
|
||||
buf_read_off = buf_read_off + len_copy + 1;
|
||||
buf_cmp_off = buf_cmp;
|
||||
ret = strncmp(buf_cmp, "mcoverlay /proc ", 16);
|
||||
if (!ret) {
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
buf_read_off = buf_read_off + len_copy;
|
||||
buf_cmp_off = buf_cmp_off + len_copy;
|
||||
if (!strncmp(line, prefix, strlen(prefix))) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 1 || len_read == 0) {
|
||||
break;
|
||||
}
|
||||
if (line)
|
||||
free(line);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
__dprintf("err=%d\n", err);
|
||||
return err;
|
||||
static int isunshare(void)
|
||||
{
|
||||
return find_mount_prefix("mcoverlay /proc ");
|
||||
}
|
||||
#endif // ENABLE_MCOVERLAYFS
|
||||
|
||||
@ -1415,6 +1382,7 @@ int main(int argc, char **argv)
|
||||
if (error == 0) {
|
||||
struct sys_unshare_desc unshare_desc;
|
||||
struct sys_mount_desc mount_desc;
|
||||
struct sys_umount_desc umount_desc;
|
||||
|
||||
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
||||
memset(&mount_desc, '\0', sizeof mount_desc);
|
||||
@ -1426,6 +1394,53 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Umount cgroup filesystems that may expose invalid NUMA
|
||||
* information
|
||||
*/
|
||||
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu,cpuacct")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/cpu,cpuacct";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/cpu,cpuacct. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
else if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/cpu";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/cpu. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpuset")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/cpuset";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/cpuset. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (find_mount_prefix("cgroup /sys/fs/cgroup/memory")) {
|
||||
umount_desc.dir_name = "/sys/fs/cgroup/memory/";
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||
(unsigned long)&umount_desc) != 0) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Failed to umount cgroup/memory. (%s)\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
||||
mount_desc.dev_name = mcos_procdir;
|
||||
mount_desc.dir_name = "/proc";
|
||||
@ -1686,6 +1701,97 @@ do_generic_syscall(
|
||||
ret = -errno;
|
||||
}
|
||||
|
||||
/* Overlayfs /sys/X directory lseek() problem work around */
|
||||
if (w->sr.number == __NR_lseek && ret == -EINVAL) {
|
||||
char proc_path[512];
|
||||
char path[512];
|
||||
struct stat sb;
|
||||
|
||||
sprintf(proc_path, "/proc/self/fd/%d", (int)w->sr.args[0]);
|
||||
|
||||
/* Get filename */
|
||||
if (readlink(proc_path, path, sizeof(path)) < 0) {
|
||||
fprintf(stderr, "%s: error: readlink() failed for %s\n",
|
||||
__FUNCTION__, proc_path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Not in /sys? */
|
||||
if (strncmp(path, "/sys/", 5))
|
||||
goto out;
|
||||
|
||||
/* Stat */
|
||||
if (stat(path, &sb) < 0) {
|
||||
fprintf(stderr, "%s: error stat() failed for %s\n",
|
||||
__FUNCTION__, path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Not dir? */
|
||||
if ((sb.st_mode & S_IFMT) != S_IFDIR)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
/* Fake that nodeX in /sys/devices/system/node do not exist,
|
||||
* where X >= number of LWK NUMA nodes */
|
||||
else if (w->sr.number == __NR_getdents && ret > 0) {
|
||||
struct linux_dirent {
|
||||
long d_ino;
|
||||
off_t d_off;
|
||||
unsigned short d_reclen;
|
||||
char d_name[];
|
||||
};
|
||||
struct linux_dirent *d;
|
||||
char *buf = (char *)w->sr.args[1];
|
||||
int bpos = 0;
|
||||
int nodes,len;
|
||||
char proc_path[PATH_MAX];
|
||||
char path[PATH_MAX];
|
||||
|
||||
sprintf(proc_path, "/proc/self/fd/%d", (int)w->sr.args[0]);
|
||||
|
||||
/* Get filename */
|
||||
if ((len = readlink(proc_path, path, sizeof(path))) < 0) {
|
||||
fprintf(stderr, "%s: error: readlink() failed for %s\n",
|
||||
__FUNCTION__, proc_path);
|
||||
goto out;
|
||||
}
|
||||
path[len] = 0;
|
||||
|
||||
/* Not /sys/devices/system/node ? */
|
||||
if (strcmp(path, "/sys/devices/system/node"))
|
||||
goto out;
|
||||
|
||||
nodes = ioctl(fd, MCEXEC_UP_GET_NODES, 0);
|
||||
if (nodes == -1) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
d = (struct linux_dirent *) (buf + bpos);
|
||||
for (bpos = 0; bpos < ret; ) {
|
||||
int nodeid, tmp_reclen;
|
||||
d = (struct linux_dirent *) (buf + bpos);
|
||||
|
||||
if (sscanf(d->d_name, "node%d", &nodeid) != 1) {
|
||||
bpos += d->d_reclen;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nodeid >= nodes) {
|
||||
tmp_reclen = d->d_reclen;
|
||||
memmove(buf + bpos,
|
||||
buf + bpos + tmp_reclen,
|
||||
ret - bpos - tmp_reclen);
|
||||
ret -= tmp_reclen;
|
||||
continue;
|
||||
}
|
||||
|
||||
bpos += d->d_reclen;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
__dprintf("do_generic_syscall(%ld):%ld (%#lx)\n", w->sr.number, ret, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -209,7 +209,7 @@ cpu_sysfs_setup(void)
|
||||
/* setup table */
|
||||
info = kmalloc(sizeof(*info) * num_processors, IHK_MC_AP_CRITICAL);
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
info[cpu].online = 10+cpu;
|
||||
info[cpu].online = 1;
|
||||
}
|
||||
fake_cpu_infos = info;
|
||||
|
||||
|
||||
@ -369,6 +369,13 @@ struct vm_range {
|
||||
int padding;
|
||||
};
|
||||
|
||||
struct vm_range_numa_policy {
|
||||
struct list_head list;
|
||||
unsigned long start, end;
|
||||
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||
int numa_mem_policy;
|
||||
};
|
||||
|
||||
struct vm_regions {
|
||||
unsigned long vm_start, vm_end;
|
||||
unsigned long text_start, text_end;
|
||||
@ -660,6 +667,8 @@ struct process_vm {
|
||||
long currss;
|
||||
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||
int numa_mem_policy;
|
||||
/* Protected by memory_range_lock */
|
||||
struct list_head vm_range_numa_policy_list;
|
||||
};
|
||||
|
||||
static inline int has_cap_ipc_lock(struct thread *th)
|
||||
|
||||
@ -210,6 +210,7 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
|
||||
|
||||
ihk_atomic_set(&vm->refcount, 1);
|
||||
INIT_LIST_HEAD(&vm->vm_range_list);
|
||||
INIT_LIST_HEAD(&vm->vm_range_numa_policy_list);
|
||||
vm->address_space = asp;
|
||||
vm->proc = owner;
|
||||
vm->exiting = 0;
|
||||
@ -2483,6 +2484,7 @@ void sched_init(void)
|
||||
ihk_mc_init_context(&idle_thread->ctx, NULL, idle);
|
||||
ihk_mc_spinlock_init(&idle_thread->vm->memory_range_lock);
|
||||
INIT_LIST_HEAD(&idle_thread->vm->vm_range_list);
|
||||
INIT_LIST_HEAD(&idle_thread->vm->vm_range_numa_policy_list);
|
||||
idle_thread->proc->pid = 0;
|
||||
idle_thread->tid = ihk_mc_get_processor_id();
|
||||
|
||||
|
||||
407
kernel/syscall.c
407
kernel/syscall.c
@ -7081,7 +7081,332 @@ out:
|
||||
|
||||
SYSCALL_DECLARE(mbind)
|
||||
{
|
||||
return -ENOSYS;
|
||||
unsigned long addr = ihk_mc_syscall_arg0(ctx);
|
||||
unsigned long len = ihk_mc_syscall_arg1(ctx);
|
||||
int mode = ihk_mc_syscall_arg2(ctx);
|
||||
unsigned long *nodemask =
|
||||
(unsigned long *)ihk_mc_syscall_arg3(ctx);
|
||||
unsigned long maxnode = ihk_mc_syscall_arg4(ctx);
|
||||
unsigned flags = ihk_mc_syscall_arg5(ctx);
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
unsigned long nodemask_bits = 0;
|
||||
int mode_flags = 0;
|
||||
int error = 0;
|
||||
int bit;
|
||||
struct vm_range *range;
|
||||
struct vm_range_numa_policy *range_policy, *range_policy_iter;
|
||||
struct vm_range_numa_policy *range_policy_next = NULL;
|
||||
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||
|
||||
/* Validate arguments */
|
||||
if (addr & ~PAGE_MASK) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
len = (len + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
if (addr + len < addr || addr == (addr + len)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(numa_mask, 0, sizeof(numa_mask));
|
||||
|
||||
if (maxnode) {
|
||||
nodemask_bits = ALIGN(maxnode, 8);
|
||||
if (maxnode > (PAGE_SIZE << 3)) {
|
||||
dkprintf("%s: ERROR: nodemask_bits bigger than PAGE_SIZE bits\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
||||
dkprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||
__FUNCTION__);
|
||||
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
||||
}
|
||||
}
|
||||
|
||||
if ((mode & MPOL_F_STATIC_NODES) && (mode & MPOL_F_RELATIVE_NODES)) {
|
||||
dkprintf("%s: error: MPOL_F_STATIC_NODES & MPOL_F_RELATIVE_NODES\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((flags & MPOL_MF_STRICT) && (flags & MPOL_MF_MOVE)) {
|
||||
dkprintf("%s: error: MPOL_MF_STRICT & MPOL_MF_MOVE\n",
|
||||
__FUNCTION__);
|
||||
/*
|
||||
* XXX: man page claims the correct error code is EIO,
|
||||
* but LTP tests for EINVAL.
|
||||
*/
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mode_flags = (mode & (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES));
|
||||
mode &= ~(MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES);
|
||||
|
||||
if (mode_flags & MPOL_F_RELATIVE_NODES) {
|
||||
/* Not supported.. */
|
||||
dkprintf("%s: error: MPOL_F_RELATIVE_NODES not supported\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case MPOL_DEFAULT:
|
||||
if (nodemask && nodemask_bits) {
|
||||
error = copy_from_user(numa_mask, nodemask,
|
||||
(nodemask_bits >> 3));
|
||||
if (error) {
|
||||
dkprintf("%s: error: copy_from_user numa_mask\n",
|
||||
__FUNCTION__);
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!bitmap_empty(numa_mask, nodemask_bits)) {
|
||||
dkprintf("%s: ERROR: nodemask not empty for MPOL_DEFAULT\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MPOL_BIND:
|
||||
case MPOL_INTERLEAVE:
|
||||
case MPOL_PREFERRED:
|
||||
/* Special case for MPOL_PREFERRED with empty nodemask */
|
||||
if (mode == MPOL_PREFERRED && !nodemask) {
|
||||
error = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (flags & MPOL_MF_STRICT) {
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = copy_from_user(numa_mask, nodemask,
|
||||
(nodemask_bits >> 3));
|
||||
if (error) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!nodemask || bitmap_empty(numa_mask, nodemask_bits)) {
|
||||
dkprintf("%s: ERROR: nodemask not specified\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify NUMA mask */
|
||||
for_each_set_bit(bit, numa_mask, maxnode) {
|
||||
if (bit >= ihk_mc_get_nr_numa_nodes()) {
|
||||
dkprintf("%s: %d is bigger than # of NUMA nodes\n",
|
||||
__FUNCTION__, bit);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Validate address range */
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
|
||||
range = lookup_process_memory_range(vm, addr, addr + len);
|
||||
if (!range) {
|
||||
dkprintf("%s: ERROR: range is invalid\n", __FUNCTION__);
|
||||
error = -EFAULT;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
/* Do the actual policy setting */
|
||||
switch (mode) {
|
||||
/*
|
||||
* Man page claims MPOL_DEFAULT should remove any range specific
|
||||
* policies so that process wise policy will be used. LTP on the
|
||||
* other hand seems to test if MPOL_DEFAULT is set as a range policy.
|
||||
* MPOL_DEFAULT thus behaves the same as the rest of the policies
|
||||
* for now.
|
||||
*/
|
||||
#if 0
|
||||
case MPOL_DEFAULT:
|
||||
/* Delete or adjust any overlapping range settings */
|
||||
list_for_each_entry_safe(range_policy_iter, range_policy_next,
|
||||
&vm->vm_range_numa_policy_list, list) {
|
||||
int keep = 0;
|
||||
unsigned long orig_end = range_policy_iter->end;
|
||||
|
||||
if (range_policy_iter->end < addr ||
|
||||
range_policy_iter->start > addr + len) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Do we need to keep the front? */
|
||||
if (range_policy_iter->start < addr) {
|
||||
range_policy_iter->end = addr;
|
||||
keep = 1;
|
||||
}
|
||||
|
||||
/* Do we need to keep the end? */
|
||||
if (orig_end > addr + len) {
|
||||
/* Are we keeping front already? */
|
||||
if (keep) {
|
||||
/* Add a new entry after */
|
||||
range_policy = kmalloc(sizeof(*range_policy),
|
||||
IHK_MC_AP_NOWAIT);
|
||||
if (!range_policy) {
|
||||
kprintf("%s: error allocating range_policy\n",
|
||||
__FUNCTION__);
|
||||
error = -ENOMEM;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
memcpy(range_policy, range_policy_iter,
|
||||
sizeof(*range_policy));
|
||||
range_policy->start = addr + len;
|
||||
range_policy->end = orig_end;
|
||||
list_add(&range_policy->list,
|
||||
&range_policy_iter->list);
|
||||
}
|
||||
else {
|
||||
range_policy_iter->start = addr + len;
|
||||
keep = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!keep) {
|
||||
list_del(&range_policy_iter->list);
|
||||
kfree(range_policy_iter);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
#endif
|
||||
case MPOL_DEFAULT:
|
||||
case MPOL_BIND:
|
||||
case MPOL_INTERLEAVE:
|
||||
case MPOL_PREFERRED:
|
||||
/* Adjust any overlapping range settings and add new one */
|
||||
range_policy_next = NULL;
|
||||
list_for_each_entry(range_policy_iter,
|
||||
&vm->vm_range_numa_policy_list, list) {
|
||||
int adjusted = 0;
|
||||
unsigned long orig_end = range_policy_iter->end;
|
||||
|
||||
if (range_policy_iter->end < addr)
|
||||
continue;
|
||||
|
||||
/* Special case of entirely overlapping */
|
||||
if (range_policy_iter->start == addr &&
|
||||
range_policy_iter->end == addr + len) {
|
||||
range_policy = range_policy_iter;
|
||||
goto mbind_update_only;
|
||||
}
|
||||
|
||||
/* Overlapping partially? */
|
||||
if (range_policy_iter->start < addr) {
|
||||
orig_end = range_policy_iter->end;
|
||||
range_policy_iter->end = addr;
|
||||
adjusted = 1;
|
||||
}
|
||||
|
||||
/* Do we need to keep the end? */
|
||||
if (orig_end > addr + len) {
|
||||
if (adjusted) {
|
||||
/* Add a new entry after */
|
||||
range_policy = kmalloc(sizeof(*range_policy),
|
||||
IHK_MC_AP_NOWAIT);
|
||||
if (!range_policy) {
|
||||
dkprintf("%s: error allocating range_policy\n",
|
||||
__FUNCTION__);
|
||||
error = -ENOMEM;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
memcpy(range_policy, range_policy_iter,
|
||||
sizeof(*range_policy));
|
||||
range_policy->start = addr + len;
|
||||
range_policy->end = orig_end;
|
||||
list_add(&range_policy->list,
|
||||
&range_policy_iter->list);
|
||||
range_policy_next = range_policy;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
range_policy_iter->start = addr + len;
|
||||
range_policy_next = range_policy_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Next one in ascending address order? */
|
||||
if (range_policy_iter->start >= addr + len) {
|
||||
range_policy_next = range_policy_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add a new entry */
|
||||
range_policy = kmalloc(sizeof(*range_policy),
|
||||
IHK_MC_AP_NOWAIT);
|
||||
if (!range_policy) {
|
||||
dkprintf("%s: error allocating range_policy\n",
|
||||
__FUNCTION__);
|
||||
error = -ENOMEM;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
memset(range_policy, 0, sizeof(*range_policy));
|
||||
range_policy->start = addr;
|
||||
range_policy->end = addr + len;
|
||||
|
||||
if (range_policy_next) {
|
||||
list_add_tail(&range_policy->list,
|
||||
&range_policy_next->list);
|
||||
}
|
||||
else {
|
||||
list_add_tail(&range_policy->list,
|
||||
&vm->vm_range_numa_policy_list);
|
||||
}
|
||||
|
||||
mbind_update_only:
|
||||
if (mode == MPOL_DEFAULT) {
|
||||
memset(range_policy->numa_mask, 0, sizeof(numa_mask));
|
||||
for (bit = 0; bit < ihk_mc_get_nr_numa_nodes(); ++bit) {
|
||||
set_bit(bit, range_policy->numa_mask);
|
||||
}
|
||||
}
|
||||
else {
|
||||
memcpy(range_policy->numa_mask, &numa_mask,
|
||||
sizeof(numa_mask));
|
||||
}
|
||||
range_policy->numa_mem_policy = mode;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
|
||||
unlock_out:
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
out:
|
||||
return error;
|
||||
} /* sys_mbind() */
|
||||
|
||||
SYSCALL_DECLARE(set_mempolicy)
|
||||
@ -7094,6 +7419,8 @@ SYSCALL_DECLARE(set_mempolicy)
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
int error = 0;
|
||||
int bit, valid_mask;
|
||||
struct vm_range_numa_policy *range_policy_iter;
|
||||
struct vm_range_numa_policy *range_policy_next = NULL;
|
||||
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||
|
||||
memset(numa_mask, 0, sizeof(numa_mask));
|
||||
@ -7108,7 +7435,7 @@ SYSCALL_DECLARE(set_mempolicy)
|
||||
}
|
||||
|
||||
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
||||
kprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||
dkprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||
__FUNCTION__);
|
||||
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
||||
}
|
||||
@ -7137,7 +7464,14 @@ SYSCALL_DECLARE(set_mempolicy)
|
||||
set_bit(bit, vm->numa_mask);
|
||||
}
|
||||
|
||||
/* TODO: delete all mbind() specified regions */
|
||||
/* Delete all range settings */
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
list_for_each_entry_safe(range_policy_iter, range_policy_next,
|
||||
&vm->vm_range_numa_policy_list, list) {
|
||||
list_del(&range_policy_iter->list);
|
||||
kfree(range_policy_iter);
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
vm->numa_mem_policy = mode;
|
||||
error = 0;
|
||||
@ -7224,16 +7558,25 @@ SYSCALL_DECLARE(get_mempolicy)
|
||||
unsigned long addr = ihk_mc_syscall_arg3(ctx);
|
||||
unsigned long flags = ihk_mc_syscall_arg4(ctx);
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
int error;
|
||||
struct vm_range_numa_policy *range_policy = NULL;
|
||||
int error = 0;
|
||||
int policy;
|
||||
|
||||
if (((flags & MPOL_F_ADDR) && !addr) ||
|
||||
(!(flags & MPOL_F_ADDR) && addr) ||
|
||||
if ((!(flags & MPOL_F_ADDR) && addr) ||
|
||||
(flags & ~(MPOL_F_ADDR | MPOL_F_NODE | MPOL_F_MEMS_ALLOWED)) ||
|
||||
((flags & MPOL_F_NODE) && !(flags & MPOL_F_ADDR) &&
|
||||
vm->numa_mem_policy == MPOL_INTERLEAVE)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: man page claims the correct error code is EINVAL,
|
||||
* but LTP tests for EFAULT.
|
||||
*/
|
||||
if ((flags & MPOL_F_ADDR) && !addr) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (maxnode) {
|
||||
if (maxnode < ihk_mc_get_nr_numa_nodes()) {
|
||||
return -EINVAL;
|
||||
@ -7247,18 +7590,62 @@ SYSCALL_DECLARE(get_mempolicy)
|
||||
}
|
||||
}
|
||||
|
||||
/* Special case of MPOL_F_MEMS_ALLOWED */
|
||||
if (flags == MPOL_F_MEMS_ALLOWED) {
|
||||
if (nodemask) {
|
||||
error = copy_to_user(nodemask,
|
||||
cpu_local_var(current)->vm->numa_mask,
|
||||
(nodemask_bits >> 3));
|
||||
if (error) {
|
||||
error = -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Address range specific? */
|
||||
if (flags & MPOL_F_ADDR) {
|
||||
struct vm_range_numa_policy *range_policy_iter;
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
range = lookup_process_memory_range(vm, addr, addr + 1);
|
||||
if (!range) {
|
||||
dkprintf("%s: ERROR: range is invalid\n", __FUNCTION__);
|
||||
error = -EFAULT;
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_for_each_entry(range_policy_iter,
|
||||
&vm->vm_range_numa_policy_list, list) {
|
||||
if (range_policy_iter->start > addr ||
|
||||
range_policy_iter->end <= addr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
range_policy = range_policy_iter;
|
||||
break;
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
}
|
||||
|
||||
/* Return policy */
|
||||
policy = range_policy ? range_policy->numa_mem_policy :
|
||||
vm->numa_mem_policy;
|
||||
|
||||
if (mode) {
|
||||
error = copy_to_user(mode,
|
||||
&cpu_local_var(current)->vm->numa_mem_policy,
|
||||
sizeof(int));
|
||||
error = copy_to_user(mode, &policy, sizeof(int));
|
||||
if (error) {
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (nodemask) {
|
||||
if (nodemask && (policy != MPOL_DEFAULT)) {
|
||||
error = copy_to_user(nodemask,
|
||||
range_policy ? range_policy->numa_mask :
|
||||
cpu_local_var(current)->vm->numa_mask,
|
||||
(nodemask_bits >> 3));
|
||||
if (error) {
|
||||
|
||||
Reference in New Issue
Block a user