Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e4b3a88fc6 | |||
| 69a5c53074 | |||
| 259583e936 | |||
| 0f826290d0 | |||
| e46f027894 | |||
| 3e093f6a40 | |||
| 00996b551f | |||
| 24d8697cef | |||
| be4f6741f9 | |||
| 7a2f67f5f0 | |||
| bba0425267 |
@ -396,12 +396,15 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
fi
|
fi
|
||||||
# TODO: How de we revert this in case of failure??
|
# TODO: How de we revert this in case of failure??
|
||||||
mount --make-rprivate /sys
|
mount --make-rprivate /sys
|
||||||
|
|
||||||
rm -rf /tmp/mcos/mcos0_sys/setup_complete
|
rm -rf /tmp/mcos/mcos0_sys/setup_complete
|
||||||
|
|
||||||
# Hide NUMA related files which are outside the LWK partition
|
# Hide NUMA related files which are outside the LWK partition
|
||||||
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
||||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid
|
rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid
|
||||||
|
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||||
|
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/drivers/processor/$cpuid
|
||||||
else
|
else
|
||||||
for nodeid in `find /sys/devices/system/cpu/$cpuid/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
for nodeid in `find /sys/devices/system/cpu/$cpuid/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
||||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid/$nodeid" ]; then
|
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid/$nodeid" ]; then
|
||||||
@ -412,7 +415,8 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
done
|
done
|
||||||
for nodeid in `find /sys/devices/system/node/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
for nodeid in `find /sys/devices/system/node/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
||||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid" ]; then
|
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid" ]; then
|
||||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/node/$nodeid
|
rm -rf /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/*
|
||||||
|
rm -rf /tmp/mcos/mcos0_sys/bus/node/devices/$nodeid
|
||||||
else
|
else
|
||||||
# Delete non-existent symlinks
|
# Delete non-existent symlinks
|
||||||
for cpuid in `find /sys/devices/system/node/$nodeid/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
for cpuid in `find /sys/devices/system/node/$nodeid/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||||
@ -424,6 +428,7 @@ if [ "$enable_mcoverlay" == "yes" ]; then
|
|||||||
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/memory*
|
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/memory*
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
rm -f /tmp/mcos/mcos0_sys/devices/system/node/has_*
|
||||||
for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then
|
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then
|
||||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||||
|
|||||||
25
configure
vendored
25
configure
vendored
@ -3117,6 +3117,31 @@ _ACEOF
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_umount" >&5
|
||||||
|
$as_echo_n "checking System.map for symbol sys_umount... " >&6; }
|
||||||
|
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_umount\$" | cut -d\ -f1`
|
||||||
|
if test -z $mcctrl_addr; then
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
|
||||||
|
$as_echo "not found" >&6; }
|
||||||
|
else
|
||||||
|
mcctrl_result=$mcctrl_addr
|
||||||
|
mcctrl_addr="0x$mcctrl_addr"
|
||||||
|
|
||||||
|
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_sys_umount\$" >/dev/null`; then
|
||||||
|
mcctrl_result="exported"
|
||||||
|
mcctrl_addr="0"
|
||||||
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5
|
||||||
|
$as_echo "$mcctrl_result" >&6; }
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define MCCTRL_KSYM_sys_umount $mcctrl_addr
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol sys_unshare" >&5
|
||||||
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
|
$as_echo_n "checking System.map for symbol sys_unshare... " >&6; }
|
||||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
|
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " sys_unshare\$" | cut -d\ -f1`
|
||||||
|
|||||||
@ -221,6 +221,7 @@ AC_DEFUN([MCCTRL_FIND_KSYM],[
|
|||||||
])
|
])
|
||||||
|
|
||||||
MCCTRL_FIND_KSYM([sys_mount])
|
MCCTRL_FIND_KSYM([sys_mount])
|
||||||
|
MCCTRL_FIND_KSYM([sys_umount])
|
||||||
MCCTRL_FIND_KSYM([sys_unshare])
|
MCCTRL_FIND_KSYM([sys_unshare])
|
||||||
MCCTRL_FIND_KSYM([zap_page_range])
|
MCCTRL_FIND_KSYM([zap_page_range])
|
||||||
MCCTRL_FIND_KSYM([vdso_image_64])
|
MCCTRL_FIND_KSYM([vdso_image_64])
|
||||||
|
|||||||
@ -51,6 +51,9 @@
|
|||||||
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
||||||
#undef MCCTRL_KSYM_sys_readlink
|
#undef MCCTRL_KSYM_sys_readlink
|
||||||
|
|
||||||
|
/* Define to address of kernel symbol sys_umount, or 0 if exported */
|
||||||
|
#undef MCCTRL_KSYM_sys_umount
|
||||||
|
|
||||||
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
||||||
#undef MCCTRL_KSYM_sys_unshare
|
#undef MCCTRL_KSYM_sys_unshare
|
||||||
|
|
||||||
|
|||||||
@ -41,6 +41,7 @@
|
|||||||
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
|
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
|
||||||
#define MCEXEC_UP_GET_CRED 0x30a0290a
|
#define MCEXEC_UP_GET_CRED 0x30a0290a
|
||||||
#define MCEXEC_UP_GET_CREDV 0x30a0290b
|
#define MCEXEC_UP_GET_CREDV 0x30a0290b
|
||||||
|
#define MCEXEC_UP_GET_NODES 0x30a0290c
|
||||||
|
|
||||||
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
|
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
|
||||||
#define MCEXEC_UP_FREE_DMA 0x30a02911
|
#define MCEXEC_UP_FREE_DMA 0x30a02911
|
||||||
@ -49,7 +50,8 @@
|
|||||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||||
|
|
||||||
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
||||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02915
|
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915
|
||||||
|
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916
|
||||||
|
|
||||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||||
|
|
||||||
@ -196,6 +198,10 @@ struct sys_mount_desc {
|
|||||||
void *data;
|
void *data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct sys_umount_desc {
|
||||||
|
char *dir_name;
|
||||||
|
};
|
||||||
|
|
||||||
struct sys_unshare_desc {
|
struct sys_unshare_desc {
|
||||||
unsigned long unshare_flags;
|
unsigned long unshare_flags;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -66,7 +66,18 @@ int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long
|
|||||||
(int_star_fn_char_char_char_ulong_void_t)
|
(int_star_fn_char_char_char_ulong_void_t)
|
||||||
MCCTRL_KSYM_sys_mount;
|
MCCTRL_KSYM_sys_mount;
|
||||||
#else // exported
|
#else // exported
|
||||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL;
|
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MCCTRL_KSYM_sys_umount
|
||||||
|
#if MCCTRL_KSYM_sys_umount
|
||||||
|
typedef int (*int_fn_char_star_int_t)(char *, int);
|
||||||
|
int (*mcctrl_sys_umount)(char *dir_name, int flags) =
|
||||||
|
(int_fn_char_star_int_t)
|
||||||
|
MCCTRL_KSYM_sys_umount;
|
||||||
|
#else // exported
|
||||||
|
int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -439,6 +450,16 @@ static long mcexec_get_cpu(ihk_os_t os)
|
|||||||
return info->n_cpus;
|
return info->n_cpus;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static long mcexec_get_nodes(ihk_os_t os)
|
||||||
|
{
|
||||||
|
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||||
|
|
||||||
|
if (!usrdata || !usrdata->mem_info)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return usrdata->mem_info->n_numa_nodes;
|
||||||
|
}
|
||||||
|
|
||||||
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||||
struct mcctrl_per_proc_data *ppd)
|
struct mcctrl_per_proc_data *ppd)
|
||||||
{
|
{
|
||||||
@ -1154,7 +1175,7 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
|||||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||||
original = override_creds(promoted);
|
original = override_creds(promoted);
|
||||||
|
|
||||||
#if MCCTRL_KSYM_sys_mount
|
#ifdef MCCTRL_KSYM_sys_mount
|
||||||
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
||||||
desc.flags, desc.data);
|
desc.flags, desc.data);
|
||||||
#else
|
#else
|
||||||
@ -1167,6 +1188,36 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long mcexec_sys_umount(struct sys_mount_desc *__user arg)
|
||||||
|
{
|
||||||
|
struct sys_umount_desc desc;
|
||||||
|
struct cred *promoted;
|
||||||
|
const struct cred *original;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
promoted = prepare_creds();
|
||||||
|
if (!promoted) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||||
|
original = override_creds(promoted);
|
||||||
|
|
||||||
|
#ifdef MCCTRL_KSYM_sys_umount
|
||||||
|
ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE);
|
||||||
|
#else
|
||||||
|
ret = -EFAULT;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
revert_creds(original);
|
||||||
|
put_cred(promoted);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
||||||
{
|
{
|
||||||
struct sys_unshare_desc desc;
|
struct sys_unshare_desc desc;
|
||||||
@ -1225,6 +1276,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
|||||||
case MCEXEC_UP_GET_CPU:
|
case MCEXEC_UP_GET_CPU:
|
||||||
return mcexec_get_cpu(os);
|
return mcexec_get_cpu(os);
|
||||||
|
|
||||||
|
case MCEXEC_UP_GET_NODES:
|
||||||
|
return mcexec_get_nodes(os);
|
||||||
|
|
||||||
case MCEXEC_UP_STRNCPY_FROM_USER:
|
case MCEXEC_UP_STRNCPY_FROM_USER:
|
||||||
return mcexec_strncpy_from_user(os,
|
return mcexec_strncpy_from_user(os,
|
||||||
(struct strncpy_from_user_desc *)arg);
|
(struct strncpy_from_user_desc *)arg);
|
||||||
@ -1254,6 +1308,9 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
|||||||
case MCEXEC_UP_SYS_MOUNT:
|
case MCEXEC_UP_SYS_MOUNT:
|
||||||
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
||||||
|
|
||||||
|
case MCEXEC_UP_SYS_UMOUNT:
|
||||||
|
return mcexec_sys_umount((struct sys_mount_desc *)arg);
|
||||||
|
|
||||||
case MCEXEC_UP_SYS_UNSHARE:
|
case MCEXEC_UP_SYS_UNSHARE:
|
||||||
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
||||||
|
|
||||||
|
|||||||
@ -60,6 +60,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
|||||||
{ .request = MCEXEC_UP_LOAD_SYSCALL, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_LOAD_SYSCALL, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
||||||
|
{ .request = MCEXEC_UP_GET_NODES, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
||||||
@ -69,6 +70,7 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
|||||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
||||||
|
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
||||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||||
};
|
};
|
||||||
|
|||||||
@ -281,6 +281,7 @@ struct mcctrl_usrdata {
|
|||||||
unsigned long cpu_online[CPU_LONGS];
|
unsigned long cpu_online[CPU_LONGS];
|
||||||
struct ihk_cpu_info *cpu_info;
|
struct ihk_cpu_info *cpu_info;
|
||||||
struct ihk_mem_info *mem_info;
|
struct ihk_mem_info *mem_info;
|
||||||
|
nodemask_t numa_online;
|
||||||
struct list_head cpu_topology_list;
|
struct list_head cpu_topology_list;
|
||||||
struct list_head node_topology_list;
|
struct list_head node_topology_list;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -92,27 +92,19 @@ void setup_local_snooping_samples(ihk_os_t os)
|
|||||||
|
|
||||||
void setup_local_snooping_files(ihk_os_t os)
|
void setup_local_snooping_files(ihk_os_t os)
|
||||||
{
|
{
|
||||||
struct ihk_cpu_info *info;
|
|
||||||
struct mcctrl_usrdata *udp = ihk_host_os_get_usrdata(os);
|
struct mcctrl_usrdata *udp = ihk_host_os_get_usrdata(os);
|
||||||
struct sysfsm_bitmap_param param;
|
struct sysfsm_bitmap_param param;
|
||||||
static unsigned long cpu_offline = 0x0;
|
static unsigned long cpu_offline = 0x0;
|
||||||
int i;
|
int i;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
info = ihk_os_get_cpu_info(os);
|
|
||||||
if (!info) {
|
|
||||||
eprintk("mcctrl:ihk_os_get_cpu_info failed.\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(udp->cpu_online, 0, sizeof(udp->cpu_online));
|
memset(udp->cpu_online, 0, sizeof(udp->cpu_online));
|
||||||
for (i = 0; i < info->n_cpus; i++) {
|
for (i = 0; i < udp->cpu_info->n_cpus; i++) {
|
||||||
udp->cpu_online[i / BITS_PER_LONG] =
|
set_bit(i, udp->cpu_online);
|
||||||
udp->cpu_online[i / BITS_PER_LONG] | (1 << (i % BITS_PER_LONG));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
param.nbits = CPU_LONGS * BITS_PER_LONG;
|
param.nbits = CPU_LONGS * BITS_PER_LONG;
|
||||||
param.ptr = udp->cpu_online;
|
param.ptr = &udp->cpu_online;
|
||||||
dprintk("mcctrl:setup_local_snooping_files: CPU_LONGS=%d, BITS_PER_LONG=%d\n",
|
dprintk("mcctrl:setup_local_snooping_files: CPU_LONGS=%d, BITS_PER_LONG=%d\n",
|
||||||
CPU_LONGS, BITS_PER_LONG);
|
CPU_LONGS, BITS_PER_LONG);
|
||||||
|
|
||||||
@ -666,6 +658,7 @@ out:
|
|||||||
static int setup_node_files(struct mcctrl_usrdata *udp)
|
static int setup_node_files(struct mcctrl_usrdata *udp)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
int node;
|
||||||
struct node_topology *p;
|
struct node_topology *p;
|
||||||
struct sysfsm_bitmap_param param;
|
struct sysfsm_bitmap_param param;
|
||||||
|
|
||||||
@ -677,10 +670,21 @@ static int setup_node_files(struct mcctrl_usrdata *udp)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(&udp->numa_online, 0, sizeof(udp->numa_online));
|
||||||
|
for (node = 0; node < udp->mem_info->n_numa_nodes; ++node) {
|
||||||
|
node_set(node, udp->numa_online);
|
||||||
|
}
|
||||||
|
|
||||||
|
param.nbits = MAX_NUMNODES;
|
||||||
|
param.ptr = &udp->numa_online;
|
||||||
|
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_pbl, ¶m, 0444,
|
||||||
|
"/sys/devices/system/node/online");
|
||||||
|
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_pbl, ¶m, 0444,
|
||||||
|
"/sys/devices/system/node/possible");
|
||||||
|
|
||||||
list_for_each_entry(p, &udp->node_topology_list, chain) {
|
list_for_each_entry(p, &udp->node_topology_list, chain) {
|
||||||
struct sysfs_handle handle;
|
struct sysfs_handle handle;
|
||||||
int cpu;
|
int cpu;
|
||||||
int node;
|
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
param.nbits = nr_cpumask_bits;
|
param.nbits = nr_cpumask_bits;
|
||||||
param.ptr = &p->cpumap;
|
param.ptr = &p->cpumap;
|
||||||
@ -697,8 +701,6 @@ static int setup_node_files(struct mcctrl_usrdata *udp)
|
|||||||
mckernel_numa_2_linux_numa(udp, node)
|
mckernel_numa_2_linux_numa(udp, node)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
offset += snprintf(&p->mckernel_numa_distance_s[offset],
|
|
||||||
NODE_DISTANCE_S_SIZE - offset, "%s", "\n");
|
|
||||||
|
|
||||||
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_s,
|
sysfsm_createf(udp->os, SYSFS_SNOOPING_OPS_s,
|
||||||
p->mckernel_numa_distance_s, 0444,
|
p->mckernel_numa_distance_s, 0444,
|
||||||
|
|||||||
@ -41,6 +41,7 @@
|
|||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
|
#include <dirent.h>
|
||||||
|
|
||||||
#include <termios.h>
|
#include <termios.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
@ -1148,75 +1149,41 @@ void init_worker_threads(int fd)
|
|||||||
|
|
||||||
#ifdef ENABLE_MCOVERLAYFS
|
#ifdef ENABLE_MCOVERLAYFS
|
||||||
#define READ_BUFSIZE 1024
|
#define READ_BUFSIZE 1024
|
||||||
static int isunshare(void)
|
static int find_mount_prefix(char *prefix)
|
||||||
{
|
{
|
||||||
int err = 0;
|
FILE *fp;
|
||||||
int ret;
|
char *line = NULL;
|
||||||
int fd;
|
size_t len = 0;
|
||||||
|
ssize_t read;
|
||||||
char proc_path[PATH_MAX];
|
char proc_path[PATH_MAX];
|
||||||
ssize_t len_read;
|
int ret = 0;
|
||||||
char buf_read[READ_BUFSIZE + 1];
|
|
||||||
char *buf_read_off;
|
|
||||||
char *buf_find;
|
|
||||||
char buf_cmp[READ_BUFSIZE + 1];
|
|
||||||
char *buf_cmp_off;
|
|
||||||
ssize_t len_copy;
|
|
||||||
|
|
||||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
||||||
fd = open(proc_path, O_RDONLY);
|
|
||||||
if (fd < 0) {
|
fp = fopen(proc_path, "r");
|
||||||
fprintf(stderr, "Error: Failed to open %s.\n", proc_path);
|
if (fp == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_cmp_off = buf_cmp;
|
while ((read = getline(&line, &len, fp)) != -1) {
|
||||||
while (1) {
|
if (strlen(line) < strlen(prefix))
|
||||||
len_read = read(fd, buf_read, READ_BUFSIZE);
|
continue;
|
||||||
if (len_read == -1) {
|
|
||||||
fprintf(stderr, "Error: Failed to read.\n");
|
|
||||||
err = -1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
buf_read_off = buf_read;
|
if (!strncmp(line, prefix, strlen(prefix))) {
|
||||||
while (1) {
|
ret = 1;
|
||||||
if ((len_read - (buf_read_off - buf_read)) <= 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
buf_find = memchr(buf_read_off, '\n',
|
|
||||||
len_read - (buf_read_off - buf_read));
|
|
||||||
if (buf_find) {
|
|
||||||
len_copy = buf_find - buf_read_off;
|
|
||||||
} else {
|
|
||||||
len_copy = len_read - (buf_read_off - buf_read);
|
|
||||||
}
|
|
||||||
memcpy(buf_cmp_off, buf_read_off, len_copy);
|
|
||||||
*(buf_cmp_off + len_copy) = '\0';
|
|
||||||
|
|
||||||
if (buf_find) {
|
|
||||||
buf_read_off = buf_read_off + len_copy + 1;
|
|
||||||
buf_cmp_off = buf_cmp;
|
|
||||||
ret = strncmp(buf_cmp, "mcoverlay /proc ", 16);
|
|
||||||
if (!ret) {
|
|
||||||
err = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
buf_read_off = buf_read_off + len_copy;
|
|
||||||
buf_cmp_off = buf_cmp_off + len_copy;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err == 1 || len_read == 0) {
|
if (line)
|
||||||
break;
|
free(line);
|
||||||
}
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fd);
|
static int isunshare(void)
|
||||||
|
{
|
||||||
__dprintf("err=%d\n", err);
|
return find_mount_prefix("mcoverlay /proc ");
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
#endif // ENABLE_MCOVERLAYFS
|
#endif // ENABLE_MCOVERLAYFS
|
||||||
|
|
||||||
@ -1415,6 +1382,7 @@ int main(int argc, char **argv)
|
|||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
struct sys_unshare_desc unshare_desc;
|
struct sys_unshare_desc unshare_desc;
|
||||||
struct sys_mount_desc mount_desc;
|
struct sys_mount_desc mount_desc;
|
||||||
|
struct sys_umount_desc umount_desc;
|
||||||
|
|
||||||
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
||||||
memset(&mount_desc, '\0', sizeof mount_desc);
|
memset(&mount_desc, '\0', sizeof mount_desc);
|
||||||
@ -1426,6 +1394,53 @@ int main(int argc, char **argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Umount cgroup filesystems that may expose invalid NUMA
|
||||||
|
* information
|
||||||
|
*/
|
||||||
|
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu,cpuacct")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/cpu,cpuacct";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/cpu,cpuacct. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (find_mount_prefix("cgroup /sys/fs/cgroup/cpu")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/cpu";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/cpu. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_mount_prefix("cgroup /sys/fs/cgroup/cpuset")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/cpuset";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/cpuset. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (find_mount_prefix("cgroup /sys/fs/cgroup/memory")) {
|
||||||
|
umount_desc.dir_name = "/sys/fs/cgroup/memory/";
|
||||||
|
|
||||||
|
if (ioctl(fd, MCEXEC_UP_SYS_UMOUNT,
|
||||||
|
(unsigned long)&umount_desc) != 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Failed to umount cgroup/memory. (%s)\n",
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
||||||
mount_desc.dev_name = mcos_procdir;
|
mount_desc.dev_name = mcos_procdir;
|
||||||
mount_desc.dir_name = "/proc";
|
mount_desc.dir_name = "/proc";
|
||||||
@ -1686,6 +1701,97 @@ do_generic_syscall(
|
|||||||
ret = -errno;
|
ret = -errno;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Overlayfs /sys/X directory lseek() problem work around */
|
||||||
|
if (w->sr.number == __NR_lseek && ret == -EINVAL) {
|
||||||
|
char proc_path[512];
|
||||||
|
char path[512];
|
||||||
|
struct stat sb;
|
||||||
|
|
||||||
|
sprintf(proc_path, "/proc/self/fd/%d", (int)w->sr.args[0]);
|
||||||
|
|
||||||
|
/* Get filename */
|
||||||
|
if (readlink(proc_path, path, sizeof(path)) < 0) {
|
||||||
|
fprintf(stderr, "%s: error: readlink() failed for %s\n",
|
||||||
|
__FUNCTION__, proc_path);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not in /sys? */
|
||||||
|
if (strncmp(path, "/sys/", 5))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* Stat */
|
||||||
|
if (stat(path, &sb) < 0) {
|
||||||
|
fprintf(stderr, "%s: error stat() failed for %s\n",
|
||||||
|
__FUNCTION__, path);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not dir? */
|
||||||
|
if ((sb.st_mode & S_IFMT) != S_IFDIR)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
/* Fake that nodeX in /sys/devices/system/node do not exist,
|
||||||
|
* where X >= number of LWK NUMA nodes */
|
||||||
|
else if (w->sr.number == __NR_getdents && ret > 0) {
|
||||||
|
struct linux_dirent {
|
||||||
|
long d_ino;
|
||||||
|
off_t d_off;
|
||||||
|
unsigned short d_reclen;
|
||||||
|
char d_name[];
|
||||||
|
};
|
||||||
|
struct linux_dirent *d;
|
||||||
|
char *buf = (char *)w->sr.args[1];
|
||||||
|
int bpos = 0;
|
||||||
|
int nodes,len;
|
||||||
|
char proc_path[PATH_MAX];
|
||||||
|
char path[PATH_MAX];
|
||||||
|
|
||||||
|
sprintf(proc_path, "/proc/self/fd/%d", (int)w->sr.args[0]);
|
||||||
|
|
||||||
|
/* Get filename */
|
||||||
|
if ((len = readlink(proc_path, path, sizeof(path))) < 0) {
|
||||||
|
fprintf(stderr, "%s: error: readlink() failed for %s\n",
|
||||||
|
__FUNCTION__, proc_path);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
path[len] = 0;
|
||||||
|
|
||||||
|
/* Not /sys/devices/system/node ? */
|
||||||
|
if (strcmp(path, "/sys/devices/system/node"))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
nodes = ioctl(fd, MCEXEC_UP_GET_NODES, 0);
|
||||||
|
if (nodes == -1) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
d = (struct linux_dirent *) (buf + bpos);
|
||||||
|
for (bpos = 0; bpos < ret; ) {
|
||||||
|
int nodeid, tmp_reclen;
|
||||||
|
d = (struct linux_dirent *) (buf + bpos);
|
||||||
|
|
||||||
|
if (sscanf(d->d_name, "node%d", &nodeid) != 1) {
|
||||||
|
bpos += d->d_reclen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodeid >= nodes) {
|
||||||
|
tmp_reclen = d->d_reclen;
|
||||||
|
memmove(buf + bpos,
|
||||||
|
buf + bpos + tmp_reclen,
|
||||||
|
ret - bpos - tmp_reclen);
|
||||||
|
ret -= tmp_reclen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bpos += d->d_reclen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
__dprintf("do_generic_syscall(%ld):%ld (%#lx)\n", w->sr.number, ret, ret);
|
__dprintf("do_generic_syscall(%ld):%ld (%#lx)\n", w->sr.number, ret, ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -209,7 +209,7 @@ cpu_sysfs_setup(void)
|
|||||||
/* setup table */
|
/* setup table */
|
||||||
info = kmalloc(sizeof(*info) * num_processors, IHK_MC_AP_CRITICAL);
|
info = kmalloc(sizeof(*info) * num_processors, IHK_MC_AP_CRITICAL);
|
||||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||||
info[cpu].online = 10+cpu;
|
info[cpu].online = 1;
|
||||||
}
|
}
|
||||||
fake_cpu_infos = info;
|
fake_cpu_infos = info;
|
||||||
|
|
||||||
|
|||||||
@ -369,6 +369,13 @@ struct vm_range {
|
|||||||
int padding;
|
int padding;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct vm_range_numa_policy {
|
||||||
|
struct list_head list;
|
||||||
|
unsigned long start, end;
|
||||||
|
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||||
|
int numa_mem_policy;
|
||||||
|
};
|
||||||
|
|
||||||
struct vm_regions {
|
struct vm_regions {
|
||||||
unsigned long vm_start, vm_end;
|
unsigned long vm_start, vm_end;
|
||||||
unsigned long text_start, text_end;
|
unsigned long text_start, text_end;
|
||||||
@ -660,6 +667,8 @@ struct process_vm {
|
|||||||
long currss;
|
long currss;
|
||||||
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||||
int numa_mem_policy;
|
int numa_mem_policy;
|
||||||
|
/* Protected by memory_range_lock */
|
||||||
|
struct list_head vm_range_numa_policy_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline int has_cap_ipc_lock(struct thread *th)
|
static inline int has_cap_ipc_lock(struct thread *th)
|
||||||
|
|||||||
@ -210,6 +210,7 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
|
|||||||
|
|
||||||
ihk_atomic_set(&vm->refcount, 1);
|
ihk_atomic_set(&vm->refcount, 1);
|
||||||
INIT_LIST_HEAD(&vm->vm_range_list);
|
INIT_LIST_HEAD(&vm->vm_range_list);
|
||||||
|
INIT_LIST_HEAD(&vm->vm_range_numa_policy_list);
|
||||||
vm->address_space = asp;
|
vm->address_space = asp;
|
||||||
vm->proc = owner;
|
vm->proc = owner;
|
||||||
vm->exiting = 0;
|
vm->exiting = 0;
|
||||||
@ -2483,6 +2484,7 @@ void sched_init(void)
|
|||||||
ihk_mc_init_context(&idle_thread->ctx, NULL, idle);
|
ihk_mc_init_context(&idle_thread->ctx, NULL, idle);
|
||||||
ihk_mc_spinlock_init(&idle_thread->vm->memory_range_lock);
|
ihk_mc_spinlock_init(&idle_thread->vm->memory_range_lock);
|
||||||
INIT_LIST_HEAD(&idle_thread->vm->vm_range_list);
|
INIT_LIST_HEAD(&idle_thread->vm->vm_range_list);
|
||||||
|
INIT_LIST_HEAD(&idle_thread->vm->vm_range_numa_policy_list);
|
||||||
idle_thread->proc->pid = 0;
|
idle_thread->proc->pid = 0;
|
||||||
idle_thread->tid = ihk_mc_get_processor_id();
|
idle_thread->tid = ihk_mc_get_processor_id();
|
||||||
|
|
||||||
|
|||||||
407
kernel/syscall.c
407
kernel/syscall.c
@ -7081,7 +7081,332 @@ out:
|
|||||||
|
|
||||||
SYSCALL_DECLARE(mbind)
|
SYSCALL_DECLARE(mbind)
|
||||||
{
|
{
|
||||||
return -ENOSYS;
|
unsigned long addr = ihk_mc_syscall_arg0(ctx);
|
||||||
|
unsigned long len = ihk_mc_syscall_arg1(ctx);
|
||||||
|
int mode = ihk_mc_syscall_arg2(ctx);
|
||||||
|
unsigned long *nodemask =
|
||||||
|
(unsigned long *)ihk_mc_syscall_arg3(ctx);
|
||||||
|
unsigned long maxnode = ihk_mc_syscall_arg4(ctx);
|
||||||
|
unsigned flags = ihk_mc_syscall_arg5(ctx);
|
||||||
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||||
|
unsigned long nodemask_bits = 0;
|
||||||
|
int mode_flags = 0;
|
||||||
|
int error = 0;
|
||||||
|
int bit;
|
||||||
|
struct vm_range *range;
|
||||||
|
struct vm_range_numa_policy *range_policy, *range_policy_iter;
|
||||||
|
struct vm_range_numa_policy *range_policy_next = NULL;
|
||||||
|
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||||
|
|
||||||
|
/* Validate arguments */
|
||||||
|
if (addr & ~PAGE_MASK) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = (len + PAGE_SIZE - 1) & PAGE_MASK;
|
||||||
|
if (addr + len < addr || addr == (addr + len)) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(numa_mask, 0, sizeof(numa_mask));
|
||||||
|
|
||||||
|
if (maxnode) {
|
||||||
|
nodemask_bits = ALIGN(maxnode, 8);
|
||||||
|
if (maxnode > (PAGE_SIZE << 3)) {
|
||||||
|
dkprintf("%s: ERROR: nodemask_bits bigger than PAGE_SIZE bits\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
||||||
|
dkprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((mode & MPOL_F_STATIC_NODES) && (mode & MPOL_F_RELATIVE_NODES)) {
|
||||||
|
dkprintf("%s: error: MPOL_F_STATIC_NODES & MPOL_F_RELATIVE_NODES\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((flags & MPOL_MF_STRICT) && (flags & MPOL_MF_MOVE)) {
|
||||||
|
dkprintf("%s: error: MPOL_MF_STRICT & MPOL_MF_MOVE\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
/*
|
||||||
|
* XXX: man page claims the correct error code is EIO,
|
||||||
|
* but LTP tests for EINVAL.
|
||||||
|
*/
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
mode_flags = (mode & (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES));
|
||||||
|
mode &= ~(MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES);
|
||||||
|
|
||||||
|
if (mode_flags & MPOL_F_RELATIVE_NODES) {
|
||||||
|
/* Not supported.. */
|
||||||
|
dkprintf("%s: error: MPOL_F_RELATIVE_NODES not supported\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case MPOL_DEFAULT:
|
||||||
|
if (nodemask && nodemask_bits) {
|
||||||
|
error = copy_from_user(numa_mask, nodemask,
|
||||||
|
(nodemask_bits >> 3));
|
||||||
|
if (error) {
|
||||||
|
dkprintf("%s: error: copy_from_user numa_mask\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!bitmap_empty(numa_mask, nodemask_bits)) {
|
||||||
|
dkprintf("%s: ERROR: nodemask not empty for MPOL_DEFAULT\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MPOL_BIND:
|
||||||
|
case MPOL_INTERLEAVE:
|
||||||
|
case MPOL_PREFERRED:
|
||||||
|
/* Special case for MPOL_PREFERRED with empty nodemask */
|
||||||
|
if (mode == MPOL_PREFERRED && !nodemask) {
|
||||||
|
error = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & MPOL_MF_STRICT) {
|
||||||
|
error = -EIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = copy_from_user(numa_mask, nodemask,
|
||||||
|
(nodemask_bits >> 3));
|
||||||
|
if (error) {
|
||||||
|
error = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nodemask || bitmap_empty(numa_mask, nodemask_bits)) {
|
||||||
|
dkprintf("%s: ERROR: nodemask not specified\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify NUMA mask */
|
||||||
|
for_each_set_bit(bit, numa_mask, maxnode) {
|
||||||
|
if (bit >= ihk_mc_get_nr_numa_nodes()) {
|
||||||
|
dkprintf("%s: %d is bigger than # of NUMA nodes\n",
|
||||||
|
__FUNCTION__, bit);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate address range */
|
||||||
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||||
|
|
||||||
|
range = lookup_process_memory_range(vm, addr, addr + len);
|
||||||
|
if (!range) {
|
||||||
|
dkprintf("%s: ERROR: range is invalid\n", __FUNCTION__);
|
||||||
|
error = -EFAULT;
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do the actual policy setting */
|
||||||
|
switch (mode) {
|
||||||
|
/*
|
||||||
|
* Man page claims MPOL_DEFAULT should remove any range specific
|
||||||
|
* policies so that process wise policy will be used. LTP on the
|
||||||
|
* other hand seems to test if MPOL_DEFAULT is set as a range policy.
|
||||||
|
* MPOL_DEFAULT thus behaves the same as the rest of the policies
|
||||||
|
* for now.
|
||||||
|
*/
|
||||||
|
#if 0
|
||||||
|
case MPOL_DEFAULT:
|
||||||
|
/* Delete or adjust any overlapping range settings */
|
||||||
|
list_for_each_entry_safe(range_policy_iter, range_policy_next,
|
||||||
|
&vm->vm_range_numa_policy_list, list) {
|
||||||
|
int keep = 0;
|
||||||
|
unsigned long orig_end = range_policy_iter->end;
|
||||||
|
|
||||||
|
if (range_policy_iter->end < addr ||
|
||||||
|
range_policy_iter->start > addr + len) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do we need to keep the front? */
|
||||||
|
if (range_policy_iter->start < addr) {
|
||||||
|
range_policy_iter->end = addr;
|
||||||
|
keep = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do we need to keep the end? */
|
||||||
|
if (orig_end > addr + len) {
|
||||||
|
/* Are we keeping front already? */
|
||||||
|
if (keep) {
|
||||||
|
/* Add a new entry after */
|
||||||
|
range_policy = kmalloc(sizeof(*range_policy),
|
||||||
|
IHK_MC_AP_NOWAIT);
|
||||||
|
if (!range_policy) {
|
||||||
|
kprintf("%s: error allocating range_policy\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(range_policy, range_policy_iter,
|
||||||
|
sizeof(*range_policy));
|
||||||
|
range_policy->start = addr + len;
|
||||||
|
range_policy->end = orig_end;
|
||||||
|
list_add(&range_policy->list,
|
||||||
|
&range_policy_iter->list);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
range_policy_iter->start = addr + len;
|
||||||
|
keep = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!keep) {
|
||||||
|
list_del(&range_policy_iter->list);
|
||||||
|
kfree(range_policy_iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case MPOL_DEFAULT:
|
||||||
|
case MPOL_BIND:
|
||||||
|
case MPOL_INTERLEAVE:
|
||||||
|
case MPOL_PREFERRED:
|
||||||
|
/* Adjust any overlapping range settings and add new one */
|
||||||
|
range_policy_next = NULL;
|
||||||
|
list_for_each_entry(range_policy_iter,
|
||||||
|
&vm->vm_range_numa_policy_list, list) {
|
||||||
|
int adjusted = 0;
|
||||||
|
unsigned long orig_end = range_policy_iter->end;
|
||||||
|
|
||||||
|
if (range_policy_iter->end < addr)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Special case of entirely overlapping */
|
||||||
|
if (range_policy_iter->start == addr &&
|
||||||
|
range_policy_iter->end == addr + len) {
|
||||||
|
range_policy = range_policy_iter;
|
||||||
|
goto mbind_update_only;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Overlapping partially? */
|
||||||
|
if (range_policy_iter->start < addr) {
|
||||||
|
orig_end = range_policy_iter->end;
|
||||||
|
range_policy_iter->end = addr;
|
||||||
|
adjusted = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do we need to keep the end? */
|
||||||
|
if (orig_end > addr + len) {
|
||||||
|
if (adjusted) {
|
||||||
|
/* Add a new entry after */
|
||||||
|
range_policy = kmalloc(sizeof(*range_policy),
|
||||||
|
IHK_MC_AP_NOWAIT);
|
||||||
|
if (!range_policy) {
|
||||||
|
dkprintf("%s: error allocating range_policy\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(range_policy, range_policy_iter,
|
||||||
|
sizeof(*range_policy));
|
||||||
|
range_policy->start = addr + len;
|
||||||
|
range_policy->end = orig_end;
|
||||||
|
list_add(&range_policy->list,
|
||||||
|
&range_policy_iter->list);
|
||||||
|
range_policy_next = range_policy;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
range_policy_iter->start = addr + len;
|
||||||
|
range_policy_next = range_policy_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Next one in ascending address order? */
|
||||||
|
if (range_policy_iter->start >= addr + len) {
|
||||||
|
range_policy_next = range_policy_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add a new entry */
|
||||||
|
range_policy = kmalloc(sizeof(*range_policy),
|
||||||
|
IHK_MC_AP_NOWAIT);
|
||||||
|
if (!range_policy) {
|
||||||
|
dkprintf("%s: error allocating range_policy\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto unlock_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(range_policy, 0, sizeof(*range_policy));
|
||||||
|
range_policy->start = addr;
|
||||||
|
range_policy->end = addr + len;
|
||||||
|
|
||||||
|
if (range_policy_next) {
|
||||||
|
list_add_tail(&range_policy->list,
|
||||||
|
&range_policy_next->list);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
list_add_tail(&range_policy->list,
|
||||||
|
&vm->vm_range_numa_policy_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
mbind_update_only:
|
||||||
|
if (mode == MPOL_DEFAULT) {
|
||||||
|
memset(range_policy->numa_mask, 0, sizeof(numa_mask));
|
||||||
|
for (bit = 0; bit < ihk_mc_get_nr_numa_nodes(); ++bit) {
|
||||||
|
set_bit(bit, range_policy->numa_mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memcpy(range_policy->numa_mask, &numa_mask,
|
||||||
|
sizeof(numa_mask));
|
||||||
|
}
|
||||||
|
range_policy->numa_mem_policy = mode;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = 0;
|
||||||
|
|
||||||
|
unlock_out:
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||||
|
out:
|
||||||
|
return error;
|
||||||
} /* sys_mbind() */
|
} /* sys_mbind() */
|
||||||
|
|
||||||
SYSCALL_DECLARE(set_mempolicy)
|
SYSCALL_DECLARE(set_mempolicy)
|
||||||
@ -7094,6 +7419,8 @@ SYSCALL_DECLARE(set_mempolicy)
|
|||||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
int bit, valid_mask;
|
int bit, valid_mask;
|
||||||
|
struct vm_range_numa_policy *range_policy_iter;
|
||||||
|
struct vm_range_numa_policy *range_policy_next = NULL;
|
||||||
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||||
|
|
||||||
memset(numa_mask, 0, sizeof(numa_mask));
|
memset(numa_mask, 0, sizeof(numa_mask));
|
||||||
@ -7108,7 +7435,7 @@ SYSCALL_DECLARE(set_mempolicy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
||||||
kprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
dkprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||||
__FUNCTION__);
|
__FUNCTION__);
|
||||||
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
||||||
}
|
}
|
||||||
@ -7137,7 +7464,14 @@ SYSCALL_DECLARE(set_mempolicy)
|
|||||||
set_bit(bit, vm->numa_mask);
|
set_bit(bit, vm->numa_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: delete all mbind() specified regions */
|
/* Delete all range settings */
|
||||||
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||||
|
list_for_each_entry_safe(range_policy_iter, range_policy_next,
|
||||||
|
&vm->vm_range_numa_policy_list, list) {
|
||||||
|
list_del(&range_policy_iter->list);
|
||||||
|
kfree(range_policy_iter);
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||||
|
|
||||||
vm->numa_mem_policy = mode;
|
vm->numa_mem_policy = mode;
|
||||||
error = 0;
|
error = 0;
|
||||||
@ -7224,16 +7558,25 @@ SYSCALL_DECLARE(get_mempolicy)
|
|||||||
unsigned long addr = ihk_mc_syscall_arg3(ctx);
|
unsigned long addr = ihk_mc_syscall_arg3(ctx);
|
||||||
unsigned long flags = ihk_mc_syscall_arg4(ctx);
|
unsigned long flags = ihk_mc_syscall_arg4(ctx);
|
||||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||||
int error;
|
struct vm_range_numa_policy *range_policy = NULL;
|
||||||
|
int error = 0;
|
||||||
|
int policy;
|
||||||
|
|
||||||
if (((flags & MPOL_F_ADDR) && !addr) ||
|
if ((!(flags & MPOL_F_ADDR) && addr) ||
|
||||||
(!(flags & MPOL_F_ADDR) && addr) ||
|
|
||||||
(flags & ~(MPOL_F_ADDR | MPOL_F_NODE | MPOL_F_MEMS_ALLOWED)) ||
|
(flags & ~(MPOL_F_ADDR | MPOL_F_NODE | MPOL_F_MEMS_ALLOWED)) ||
|
||||||
((flags & MPOL_F_NODE) && !(flags & MPOL_F_ADDR) &&
|
((flags & MPOL_F_NODE) && !(flags & MPOL_F_ADDR) &&
|
||||||
vm->numa_mem_policy == MPOL_INTERLEAVE)) {
|
vm->numa_mem_policy == MPOL_INTERLEAVE)) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX: man page claims the correct error code is EINVAL,
|
||||||
|
* but LTP tests for EFAULT.
|
||||||
|
*/
|
||||||
|
if ((flags & MPOL_F_ADDR) && !addr) {
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
if (maxnode) {
|
if (maxnode) {
|
||||||
if (maxnode < ihk_mc_get_nr_numa_nodes()) {
|
if (maxnode < ihk_mc_get_nr_numa_nodes()) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -7247,18 +7590,62 @@ SYSCALL_DECLARE(get_mempolicy)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Special case of MPOL_F_MEMS_ALLOWED */
|
||||||
|
if (flags == MPOL_F_MEMS_ALLOWED) {
|
||||||
|
if (nodemask) {
|
||||||
|
error = copy_to_user(nodemask,
|
||||||
|
cpu_local_var(current)->vm->numa_mask,
|
||||||
|
(nodemask_bits >> 3));
|
||||||
|
if (error) {
|
||||||
|
error = -EFAULT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Address range specific? */
|
||||||
|
if (flags & MPOL_F_ADDR) {
|
||||||
|
struct vm_range_numa_policy *range_policy_iter;
|
||||||
|
struct vm_range *range;
|
||||||
|
|
||||||
|
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||||
|
range = lookup_process_memory_range(vm, addr, addr + 1);
|
||||||
|
if (!range) {
|
||||||
|
dkprintf("%s: ERROR: range is invalid\n", __FUNCTION__);
|
||||||
|
error = -EFAULT;
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry(range_policy_iter,
|
||||||
|
&vm->vm_range_numa_policy_list, list) {
|
||||||
|
if (range_policy_iter->start > addr ||
|
||||||
|
range_policy_iter->end <= addr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_policy = range_policy_iter;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return policy */
|
||||||
|
policy = range_policy ? range_policy->numa_mem_policy :
|
||||||
|
vm->numa_mem_policy;
|
||||||
|
|
||||||
if (mode) {
|
if (mode) {
|
||||||
error = copy_to_user(mode,
|
error = copy_to_user(mode, &policy, sizeof(int));
|
||||||
&cpu_local_var(current)->vm->numa_mem_policy,
|
|
||||||
sizeof(int));
|
|
||||||
if (error) {
|
if (error) {
|
||||||
error = -EFAULT;
|
error = -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nodemask) {
|
if (nodemask && (policy != MPOL_DEFAULT)) {
|
||||||
error = copy_to_user(nodemask,
|
error = copy_to_user(nodemask,
|
||||||
|
range_policy ? range_policy->numa_mask :
|
||||||
cpu_local_var(current)->vm->numa_mask,
|
cpu_local_var(current)->vm->numa_mask,
|
||||||
(nodemask_bits >> 3));
|
(nodemask_bits >> 3));
|
||||||
if (error) {
|
if (error) {
|
||||||
|
|||||||
Reference in New Issue
Block a user