get/set_mempolicy(): support for query/set process level policy
This commit is contained in:
@ -168,6 +168,13 @@
|
|||||||
|
|
||||||
#define PROCESS_NUMA_MASK_BITS 64
|
#define PROCESS_NUMA_MASK_BITS 64
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
|
||||||
|
* passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
|
||||||
|
* The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Policies */
|
||||||
enum {
|
enum {
|
||||||
MPOL_DEFAULT,
|
MPOL_DEFAULT,
|
||||||
MPOL_PREFERRED,
|
MPOL_PREFERRED,
|
||||||
@ -177,6 +184,51 @@ enum {
|
|||||||
MPOL_MAX, /* always last member of enum */
|
MPOL_MAX, /* always last member of enum */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum mpol_rebind_step {
|
||||||
|
MPOL_REBIND_ONCE, /* do rebind work at once(not by two step) */
|
||||||
|
MPOL_REBIND_STEP1, /* first step(set all the newly nodes) */
|
||||||
|
MPOL_REBIND_STEP2, /* second step(clean all the disallowed nodes)*/
|
||||||
|
MPOL_REBIND_NSTEP,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Flags for set_mempolicy */
|
||||||
|
#define MPOL_F_STATIC_NODES (1 << 15)
|
||||||
|
#define MPOL_F_RELATIVE_NODES (1 << 14)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
|
||||||
|
* either set_mempolicy() or mbind().
|
||||||
|
*/
|
||||||
|
#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
|
||||||
|
|
||||||
|
/* Flags for get_mempolicy */
|
||||||
|
#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */
|
||||||
|
#define MPOL_F_ADDR (1<<1) /* look up vma using address */
|
||||||
|
#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
|
||||||
|
|
||||||
|
/* Flags for mbind */
|
||||||
|
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
|
||||||
|
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform
|
||||||
|
to policy */
|
||||||
|
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */
|
||||||
|
#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */
|
||||||
|
#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */
|
||||||
|
|
||||||
|
#define MPOL_MF_VALID (MPOL_MF_STRICT | \
|
||||||
|
MPOL_MF_MOVE | \
|
||||||
|
MPOL_MF_MOVE_ALL)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Internal flags that share the struct mempolicy flags word with
|
||||||
|
* "mode flags". These flags are allocated from bit 0 up, as they
|
||||||
|
* are never OR'ed into the mode in mempolicy API arguments.
|
||||||
|
*/
|
||||||
|
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
|
||||||
|
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
|
||||||
|
#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
|
||||||
|
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
|
||||||
|
#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */
|
||||||
|
|
||||||
#include <waitq.h>
|
#include <waitq.h>
|
||||||
#include <futex.h>
|
#include <futex.h>
|
||||||
|
|
||||||
|
|||||||
157
kernel/syscall.c
157
kernel/syscall.c
@ -52,6 +52,8 @@
|
|||||||
#include <mc_perf_event.h>
|
#include <mc_perf_event.h>
|
||||||
#include <march.h>
|
#include <march.h>
|
||||||
#include <process.h>
|
#include <process.h>
|
||||||
|
#include <bitops.h>
|
||||||
|
#include <bitmap.h>
|
||||||
|
|
||||||
/* Headers taken from kitten LWK */
|
/* Headers taken from kitten LWK */
|
||||||
#include <lwk/stddef.h>
|
#include <lwk/stddef.h>
|
||||||
@ -7084,7 +7086,132 @@ SYSCALL_DECLARE(mbind)
|
|||||||
|
|
||||||
SYSCALL_DECLARE(set_mempolicy)
|
SYSCALL_DECLARE(set_mempolicy)
|
||||||
{
|
{
|
||||||
return -ENOSYS;
|
int mode = ihk_mc_syscall_arg0(ctx);
|
||||||
|
unsigned long *nodemask =
|
||||||
|
(unsigned long *)ihk_mc_syscall_arg1(ctx);
|
||||||
|
unsigned long maxnode = ihk_mc_syscall_arg2(ctx);
|
||||||
|
unsigned long nodemask_bits = 0;
|
||||||
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||||
|
int error = 0;
|
||||||
|
int bit, valid_mask;
|
||||||
|
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
|
||||||
|
|
||||||
|
memset(numa_mask, 0, sizeof(numa_mask));
|
||||||
|
|
||||||
|
if (maxnode) {
|
||||||
|
nodemask_bits = ALIGN(maxnode, 8);
|
||||||
|
if (maxnode > (PAGE_SIZE << 3)) {
|
||||||
|
dkprintf("%s: ERROR: nodemask_bits bigger than PAGE_SIZE bits\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
||||||
|
kprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case MPOL_DEFAULT:
|
||||||
|
if (nodemask && nodemask_bits) {
|
||||||
|
error = copy_from_user(numa_mask, nodemask,
|
||||||
|
(nodemask_bits >> 3));
|
||||||
|
if (error) {
|
||||||
|
error = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!bitmap_empty(numa_mask, nodemask_bits)) {
|
||||||
|
dkprintf("%s: ERROR: nodemask not empty for MPOL_DEFAULT\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(vm->numa_mask, 0, sizeof(numa_mask));
|
||||||
|
for (bit = 0; bit < ihk_mc_get_nr_numa_nodes(); ++bit) {
|
||||||
|
set_bit(bit, vm->numa_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: delete all mbind() specified regions */
|
||||||
|
|
||||||
|
vm->numa_mem_policy = mode;
|
||||||
|
error = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MPOL_BIND:
|
||||||
|
case MPOL_INTERLEAVE:
|
||||||
|
case MPOL_PREFERRED:
|
||||||
|
/* Special case for MPOL_PREFERRED with empty nodemask */
|
||||||
|
if (mode == MPOL_PREFERRED && !nodemask) {
|
||||||
|
memset(vm->numa_mask, 0, sizeof(numa_mask));
|
||||||
|
for (bit = 0; bit < ihk_mc_get_nr_numa_nodes(); ++bit) {
|
||||||
|
set_bit(bit, vm->numa_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
vm->numa_mem_policy = mode;
|
||||||
|
error = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nodemask) {
|
||||||
|
dkprintf("%s: ERROR: nodemask not specified\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = copy_from_user(numa_mask, nodemask,
|
||||||
|
(nodemask_bits >> 3));
|
||||||
|
if (error) {
|
||||||
|
error = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify NUMA mask */
|
||||||
|
valid_mask = 0;
|
||||||
|
for_each_set_bit(bit, numa_mask, maxnode) {
|
||||||
|
if (bit >= ihk_mc_get_nr_numa_nodes()) {
|
||||||
|
dkprintf("%s: %d is bigger than # of NUMA nodes\n",
|
||||||
|
__FUNCTION__, bit);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Is there at least one node which is allowed
|
||||||
|
* in current mask? */
|
||||||
|
if (test_bit(bit, vm->numa_mask)) {
|
||||||
|
valid_mask = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!valid_mask) {
|
||||||
|
dkprintf("%s: ERROR: invalid nodemask\n", __FUNCTION__);
|
||||||
|
error = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update current mask by clearing non-requested nodes */
|
||||||
|
for_each_set_bit(bit, vm->numa_mask, maxnode) {
|
||||||
|
if (!test_bit(bit, numa_mask)) {
|
||||||
|
clear_bit(bit, vm->numa_mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vm->numa_mem_policy = mode;
|
||||||
|
error = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
error = -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
return error;
|
||||||
} /* sys_set_mempolicy() */
|
} /* sys_set_mempolicy() */
|
||||||
|
|
||||||
SYSCALL_DECLARE(get_mempolicy)
|
SYSCALL_DECLARE(get_mempolicy)
|
||||||
@ -7092,21 +7219,40 @@ SYSCALL_DECLARE(get_mempolicy)
|
|||||||
int *mode = (int *)ihk_mc_syscall_arg0(ctx);
|
int *mode = (int *)ihk_mc_syscall_arg0(ctx);
|
||||||
unsigned long *nodemask =
|
unsigned long *nodemask =
|
||||||
(unsigned long *)ihk_mc_syscall_arg1(ctx);
|
(unsigned long *)ihk_mc_syscall_arg1(ctx);
|
||||||
|
unsigned long nodemask_bits = 0;
|
||||||
unsigned long maxnode = ihk_mc_syscall_arg2(ctx);
|
unsigned long maxnode = ihk_mc_syscall_arg2(ctx);
|
||||||
unsigned long addr = ihk_mc_syscall_arg3(ctx);
|
unsigned long addr = ihk_mc_syscall_arg3(ctx);
|
||||||
unsigned long flags = ihk_mc_syscall_arg4(ctx);
|
unsigned long flags = ihk_mc_syscall_arg4(ctx);
|
||||||
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
if (flags || addr) {
|
if (((flags & MPOL_F_ADDR) && !addr) ||
|
||||||
|
(!(flags & MPOL_F_ADDR) && addr) ||
|
||||||
|
(flags & ~(MPOL_F_ADDR | MPOL_F_NODE | MPOL_F_MEMS_ALLOWED)) ||
|
||||||
|
((flags & MPOL_F_NODE) && !(flags & MPOL_F_ADDR) &&
|
||||||
|
vm->numa_mem_policy == MPOL_INTERLEAVE)) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (maxnode) {
|
||||||
|
if (maxnode < ihk_mc_get_nr_numa_nodes()) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
nodemask_bits = ALIGN(maxnode, 8);
|
||||||
|
if (nodemask_bits > PROCESS_NUMA_MASK_BITS) {
|
||||||
|
dkprintf("%s: WARNING: process NUMA mask bits is insufficient\n",
|
||||||
|
__FUNCTION__);
|
||||||
|
nodemask_bits = PROCESS_NUMA_MASK_BITS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (mode) {
|
if (mode) {
|
||||||
error = copy_to_user(mode,
|
error = copy_to_user(mode,
|
||||||
&cpu_local_var(current)->vm->numa_mem_policy,
|
&cpu_local_var(current)->vm->numa_mem_policy,
|
||||||
sizeof(int));
|
sizeof(int));
|
||||||
if (error) {
|
if (error) {
|
||||||
error = -EINVAL;
|
error = -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -7114,10 +7260,9 @@ SYSCALL_DECLARE(get_mempolicy)
|
|||||||
if (nodemask) {
|
if (nodemask) {
|
||||||
error = copy_to_user(nodemask,
|
error = copy_to_user(nodemask,
|
||||||
cpu_local_var(current)->vm->numa_mask,
|
cpu_local_var(current)->vm->numa_mask,
|
||||||
maxnode < (PROCESS_NUMA_MASK_BITS >> 3) ?
|
(nodemask_bits >> 3));
|
||||||
maxnode : (PROCESS_NUMA_MASK_BITS >> 3));
|
|
||||||
if (error) {
|
if (error) {
|
||||||
error = -EINVAL;
|
error = -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user