Compare commits

...

25 Commits

Author SHA1 Message Date
583319125a prerelease: 0.94: fix __mcctrl_os_read_write_cpu_register
Change-Id: Ibcfbe7796347cc9c2148cdea2519fe6c7ca9e97e
2021-02-18 15:23:01 +09:00
9f39d1cd88 move_pages: Fix and support some specs for LTP.
1. When nodes array is NULL, move_pages doesn't move any pages,
 instead will return the node where each page
 currently resides by status array.
2. Check whether all specified node is online or not.

Change-Id: Ie3534997833d797e2a9f595d1107b07d46e1c6cf
Refs: #1523
2021-02-18 06:16:17 +00:00
a0d446b27f smp: make smp_call_func() arch independent
Change-Id: Ib60604ceb3274b173bd7f96cf57c8c35c1889e44
2021-02-18 06:16:17 +00:00
f3c875b8e6 mbind: Use range_policy's numamask as priority on MPOL_BIND
Change-Id: Iaaa7998945c6e2b42d91d34a2f7b05db1f4d696d
2021-02-18 06:16:17 +00:00
9f1e6d707c get_mempolicy: Support (MPOL_F_NODE | MPOL_F_ADDR) specified
If flags specifies both MPOL_F_NODE and MPOL_F_ADDR,
get_mempolicy() will return the node ID of the node on
which the address addr is allocated into the location pointed to by mode.

Change-Id: Id485e3f4838e3679d877a95e53b21e3421cac88a
2021-02-18 06:16:17 +00:00
aef50d710c mempolicy: Support MPOL_INTERLEAVE
Change-Id: I6357892d792b2de8ea859a0a6799250f05066713
Refs: #959
2021-02-18 06:16:17 +00:00
7f0594d784 TO RESET: mbind: do nothing
Fixes: 00007daf ("mbind: do nothing (workaround for Fugaku)")

Change-Id: Id41940bebd2cbcc3e8637eadd4847984627b1c72
2021-02-18 06:16:17 +00:00
866f5c51a0 docs: add limitation of system calls that call copy_to_user()
Change-Id: If449c73f8d5949ab5526ea598b0f713ed4431157
Refs: #1514
2021-02-18 13:04:53 +09:00
48b1d548f2 __mcctrl_os_read_write_cpu_register: fix timeout
Change-Id: Id5a7d316d793bd535f24fd353b214aa12af1dab4
2021-02-15 08:56:04 +00:00
822b64b03c docs: add limitation related to Fujitsu TCS xos_hwb
Change-Id: I83a1ecd7a0b6d3bcde2b902cd526dfd4feb9e23a
2021-02-15 16:03:52 +09:00
aca83bcd3d Tofu: fault stack area if VM range doesn't exist in STAG registration
Change-Id: I407a8954ccaf22019b3082fd6eee68e772d1cb26
2021-02-15 14:46:58 +09:00
c7145c4b38 xpmem: fault stack area of remote process if VM range doesn't yet exist
Change-Id: I2bbb745cc9b79ab4f9ea81b242f35f1b88ad531e
2021-02-15 14:46:58 +09:00
a82d161be8 prerelease: 0.93: investigate smp_ihk_os_panic_notifier
Change-Id: I997b41f80038603261de2e8232b6b8ca200cd8cd
2021-02-09 21:39:49 -05:00
7152269a59 spec: create one rpm including .ko and binaries
Don't use kernel_module_package not to create a separate
kmod-mckernel-*.rpm containing .ko files.

Change-Id: I25b7ff662476bfc735d319b57cdf2da82f2c6aa7
2021-02-09 20:55:38 -05:00
31c08bcb7d spec, docs: update cmake options
Change-Id: Ib8277413a413b5ce956a48f7e3d9922311937ea8
2021-02-09 20:55:38 -05:00
dffb0918a2 docs: add capstone installation options
Change-Id: I96aa9a6405c17f8d9653f3d3894f0e71a57ab460
2021-02-09 06:10:32 +00:00
23cd14af7d __mcctrl_os_read_write_cpu_register: timeout in 1 sec for when McKernel can't respond
Change-Id: Ia2d5f64e107697dda1f3bae499eb3afb8a7aedba
2021-02-09 06:09:11 +00:00
a5cf2019bc cmake: fix detection of Fugaku native compilation
Change-Id: I4210e9b57223c3869464caea10c2d414e9484e14
2021-02-09 06:06:13 +00:00
11b9fe0377 page_fault_handler: fix missing increment of in_page_fault on SEGV
This integrates some of the changes of the following commit:
1cf0bd5a ("TO RESET: add debug instruments, map Linux areas for tofu")

Change-Id: Iffd8432d5a7b35f20bd45829a125583a0363dbf0
2021-02-09 00:56:15 -05:00
4905c8e638 mcexec: propagate error in __NR_gettid handler
Change-Id: I0e0f06199970fe839065567dcd5418d017b6ec00
2021-02-03 18:53:33 -05:00
3d71c6a8eb mcexec_transfer_image(): map exact size of remote memory (instead of forcing PAGE_SIZE)
Change-Id: Ic66770af6cdb15b7a2e18a08cbcd1736e5558bdf
2021-02-03 18:53:33 -05:00
1cea75dd51 mcexec: fix strncat missing NULL and pclose of uninitialized
Change-Id: I9ce4004580845a983949caa5668b2f950880cd24
2021-02-02 01:51:57 +00:00
661ba0ce4a docs: add editing spec file when building rpm
Change-Id: Ic8dc9d8c6aef6d2180844891d743a09f4a3bdd9d
2021-01-29 01:23:35 +00:00
7e82adc761 prerelease: 0.92: fix uninitialized usrdata->cpu_topology_list
Change-Id: Ia12970bda1225898823a67c2d0461144fc62ebb9
2021-01-29 09:50:53 +09:00
1f9fbe82db mcctrl: fix access to uninitialized usrdata->cpu_topology_list
Change-Id: I25a9182b9b470bb069f4f755a67fb50b88817cd2
2021-01-29 09:34:24 +09:00
35 changed files with 1728 additions and 285 deletions

View File

@ -10,7 +10,7 @@ project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.1")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "0.91")
set(MCKERNEL_RELEASE "0.94")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
@ -65,7 +65,7 @@ if(ENABLE_TOFU)
endif()
# when compiling on a compute-node
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2"
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2 | grep -o CN"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)

View File

@ -1972,15 +1972,15 @@ int arch_cpu_read_write_register(
return ret;
}
int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg)
{
/* TODO: skeleton for smp_call_func */
return -1;
}
void arch_flush_icache_all(void)
{
asm("ic ialluis");
dsb(ish);
}
int ihk_mc_get_smp_handler_irq(void)
{
return LOCAL_SMP_FUNC_CALL_VECTOR;
}
/*** end of file ***/

View File

@ -17,6 +17,7 @@
#define INTRID_STACK_TRACE 5
#define INTRID_MULTI_INTR 6
#define INTRID_MULTI_NMI 7
#define LOCAL_SMP_FUNC_CALL_VECTOR 1 /* same as IKC */
/* use PPI interrupt number */
#define INTRID_PERF_OVF 23

View File

@ -16,6 +16,7 @@
#include <uio.h>
#include <syscall.h>
#include <rusage_private.h>
#include <memory.h>
#include <ihk/debug.h>
void terminate_mcexec(int, int);
@ -2250,8 +2251,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
@ -2269,8 +2272,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
mpsr->nodes_ready = 1;
break;
case 1:
@ -2292,8 +2297,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
sizeof(void *) * count);
break;
case 1:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
mpsr->nodes_ready = 1;
break;
case 2:
@ -2322,8 +2329,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
sizeof(void *) * (count / 2));
break;
case 2:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
mpsr->nodes_ready = 1;
break;
case 3:
@ -2349,13 +2358,15 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
}
/* NUMA verification in parallel */
for (i = i_s; i < i_e; i++) {
if (mpsr->nodes[i] < 0 ||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
!test_bit(mpsr->nodes[i],
mpsr->proc->vm->numa_mask)) {
mpsr->phase_ret = -EINVAL;
break;
if (mpsr->user_nodes) {
for (i = i_s; i < i_e; i++) {
if (mpsr->nodes[i] < 0 ||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
!test_bit(mpsr->nodes[i],
mpsr->proc->vm->numa_mask)) {
mpsr->phase_ret = -EINVAL;
break;
}
}
}
@ -2387,7 +2398,7 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
/* PTE valid? */
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
mpsr->status[i] = -ENOENT;
mpsr->status[i] = -EFAULT;
mpsr->ptep[i] = NULL;
continue;
}
@ -2451,6 +2462,26 @@ pte_out:
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
/*
* When nodes array is NULL, move_pages doesn't move any pages,
* instead will return the node where each page
* currently resides by status array.
*/
if (!mpsr->user_nodes) {
/* get nid in parallel */
for (i = i_s; i < i_e; i++) {
if (mpsr->status[i] < 0) {
continue;
}
mpsr->status[i] = phys_to_nid(
pte_get_phys(mpsr->ptep[i]));
}
mpsr->phase_ret = 0;
goto out; // return node information
}
/* Processing of move pages */
if (cpu_index == 0) {
/* Allocate new pages on target NUMA nodes */
for (i = 0; i < count; i++) {
@ -2463,8 +2494,11 @@ pte_out:
/* TODO: store pgalign info in an array as well? */
if (mpsr->nr_pages[i] > 1) {
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
pgalign = PTL2_SHIFT - PTL1_SHIFT;
int nr_pages;
for (pgalign = 0, nr_pages = mpsr->nr_pages[i];
nr_pages != 1; pgalign++, nr_pages >>= 1) {
}
}
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],

View File

@ -80,7 +80,11 @@ static void (*lapic_icr_write)(unsigned int h, unsigned int l);
static void (*lapic_wait_icr_idle)(void);
void (*x86_issue_ipi)(unsigned int apicid, unsigned int low);
int running_on_kvm(void);
static void smp_func_call_handler(void);
void smp_func_call_handler(void);
int ihk_mc_get_smp_handler_irq(void)
{
return LOCAL_SMP_FUNC_CALL_VECTOR;
}
void init_processors_local(int max_id);
void assign_processor_id(void);
@ -2170,144 +2174,6 @@ int arch_cpu_read_write_register(
return 0;
}
/*
* Generic remote CPU function invocation facility.
*/
static void smp_func_call_handler(void)
{
int irq_flags;
struct smp_func_call_request *req;
int reqs_left;
reiterate:
req = NULL;
reqs_left = 0;
irq_flags = ihk_mc_spinlock_lock(
&cpu_local_var(smp_func_req_lock));
/* Take requests one-by-one */
if (!list_empty(&cpu_local_var(smp_func_req_list))) {
req = list_first_entry(&cpu_local_var(smp_func_req_list),
struct smp_func_call_request, list);
list_del(&req->list);
reqs_left = !list_empty(&cpu_local_var(smp_func_req_list));
}
ihk_mc_spinlock_unlock(&cpu_local_var(smp_func_req_lock),
irq_flags);
if (req) {
req->ret = req->sfcd->func(req->cpu_index,
req->sfcd->nr_cpus, req->sfcd->arg);
ihk_atomic_dec(&req->sfcd->cpus_left);
}
if (reqs_left)
goto reiterate;
}
int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg)
{
int cpu, nr_cpus = 0;
int cpu_index = 0;
int this_cpu_index = 0;
struct smp_func_call_data sfcd;
struct smp_func_call_request *reqs;
int ret = 0;
int call_on_this_cpu = 0;
cpu_set_t cpu_set;
/* Sanity checks */
if (!__cpu_set || !__func) {
return -EINVAL;
}
/* Make sure it won't change in between */
cpu_set = *__cpu_set;
for_each_set_bit(cpu, (unsigned long *)&cpu_set,
sizeof(cpu_set) * BITS_PER_BYTE) {
if (cpu == ihk_mc_get_processor_id()) {
call_on_this_cpu = 1;
}
++nr_cpus;
}
if (!nr_cpus) {
return -EINVAL;
}
reqs = kmalloc(sizeof(*reqs) * nr_cpus, IHK_MC_AP_NOWAIT);
if (!reqs) {
ret = -ENOMEM;
goto free_out;
}
sfcd.nr_cpus = nr_cpus;
sfcd.func = __func;
sfcd.arg = __arg;
ihk_atomic_set(&sfcd.cpus_left,
call_on_this_cpu ? nr_cpus - 1 : nr_cpus);
/* Add requests and send IPIs */
cpu_index = 0;
for_each_set_bit(cpu, (unsigned long *)&cpu_set,
sizeof(cpu_set) * BITS_PER_BYTE) {
unsigned long irq_flags;
reqs[cpu_index].cpu_index = cpu_index;
reqs[cpu_index].ret = 0;
if (cpu == ihk_mc_get_processor_id()) {
this_cpu_index = cpu_index;
++cpu_index;
continue;
}
reqs[cpu_index].sfcd = &sfcd;
irq_flags =
ihk_mc_spinlock_lock(&get_cpu_local_var(cpu)->smp_func_req_lock);
list_add_tail(&reqs[cpu_index].list,
&get_cpu_local_var(cpu)->smp_func_req_list);
ihk_mc_spinlock_unlock(&get_cpu_local_var(cpu)->smp_func_req_lock,
irq_flags);
ihk_mc_interrupt_cpu(cpu, LOCAL_SMP_FUNC_CALL_VECTOR);
++cpu_index;
}
/* Is this CPU involved? */
if (call_on_this_cpu) {
reqs[this_cpu_index].ret =
__func(this_cpu_index, nr_cpus, __arg);
}
/* Wait for the rest of the CPUs */
while (ihk_atomic_read(&sfcd.cpus_left) > 0) {
cpu_pause();
}
/* Check return values, if error, report the first non-zero */
for (cpu_index = 0; cpu_index < nr_cpus; ++cpu_index) {
if (reqs[cpu_index].ret != 0) {
ret = reqs[cpu_index].ret;
goto free_out;
}
}
ret = 0;
free_out:
kfree(reqs);
return ret;
}
extern int nmi_mode;
extern long freeze_thaw(void *nmi_ctx);

View File

@ -32,6 +32,7 @@
#include <limits.h>
#include <syscall.h>
#include <rusage_private.h>
#include <memory.h>
#include <ihk/debug.h>
void terminate_mcexec(int, int);
@ -2302,8 +2303,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
@ -2321,8 +2324,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
mpsr->nodes_ready = 1;
break;
case 1:
@ -2344,8 +2349,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
sizeof(void *) * count);
break;
case 1:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
mpsr->nodes_ready = 1;
break;
case 2:
@ -2374,8 +2381,10 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
sizeof(void *) * (count / 2));
break;
case 2:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
if (mpsr->user_nodes) {
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
}
mpsr->nodes_ready = 1;
break;
case 3:
@ -2401,13 +2410,15 @@ int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
}
/* NUMA verification in parallel */
for (i = i_s; i < i_e; i++) {
if (mpsr->nodes[i] < 0 ||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
!test_bit(mpsr->nodes[i],
mpsr->proc->vm->numa_mask)) {
mpsr->phase_ret = -EINVAL;
break;
if (mpsr->user_nodes) {
for (i = i_s; i < i_e; i++) {
if (mpsr->nodes[i] < 0 ||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
!test_bit(mpsr->nodes[i],
mpsr->proc->vm->numa_mask)) {
mpsr->phase_ret = -EINVAL;
break;
}
}
}
@ -2503,6 +2514,26 @@ pte_out:
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
/*
* When nodes array is NULL, move_pages doesn't move any pages,
* instead will return the node where each page
* currently resides by status array.
*/
if (!mpsr->user_nodes) {
/* get nid in parallel */
for (i = i_s; i < i_e; i++) {
if (mpsr->status[i] < 0) {
continue;
}
mpsr->status[i] = phys_to_nid(
pte_get_phys(mpsr->ptep[i]));
}
mpsr->phase_ret = 0;
goto out; // return node information
}
/* Processing of move pages */
if (cpu_index == 0) {
/* Allocate new pages on target NUMA nodes */
for (i = 0; i < count; i++) {

View File

@ -129,11 +129,29 @@ Create the tarball and the spec file:
make dist
cp mckernel-<version>.tar.gz <rpmbuild>/SOURCES
(optional) Edit the following line in ``scripts/mckernel.spec`` to change
cmake options. For example:
::
%cmake -DCMAKE_BUILD_TYPE=Release \
-DUNAME_R=%{kernel_version} \
-DKERNEL_DIR=%{kernel_dir} \
%{?cmake_libdir:-DCMAKE_INSTALL_LIBDIR=%{cmake_libdir}} \
%{?build_target:-DBUILD_TARGET=%{build_target}} \
%{?toolchain_file:-DCMAKE_TOOLCHAIN_FILE=%{toolchain_file}} \
-DENABLE_TOFU=ON -DENABLE_FUGAKU_HACKS=ON \
-DENABLE_KRM_WORKAROUND=OFF -DWITH_KRM=ON \
-DENABLE_FUGAKU_DEBUG=OFF \
.
Create the rpm package:
When not cross-compiling:
"""""""""""""""""""""""""
Then build the rpm:
::
rpmbuild -ba scripts/mckernel.spec

View File

@ -202,3 +202,21 @@ Limitations
28. munlockall() is not supported and returns zero.
29. scheduling behavior is not Linux compatible. For example, sometimes one of the two processes on the same CPU continues to run after yielding.
30. (Fujitsu TCS-only) A job following the one in which __mcctrl_os_read_write_cpu_register() returns ``-ETIME`` fails because xos_hwb related CPU state isn't finalized. You can tell if the function returned ``-ETIME`` by checking if the following line appeared in the Linux kernel message:
::
__mcctrl_os_read_write_cpu_register: ERROR sending IKC msg: -62
You can re-initialize xos_hwb related CPU state by the following command:
::
sudo systemctl restart xos_hwb
31. System calls can write the mcexec VMAs with PROT_WRITE flag not
set. This is because we never turn off PROT_WRITE of the mcexec
VMAs to circumvent the issue "set_host_vma(): do NOT read protect
Linux VMA".

View File

@ -4,24 +4,41 @@ Advanced: Enable Utility Thread offloading Interface (UTI)
UTI enables a runtime such as MPI runtime to spawn utility threads such
as MPI asynchronous progress threads to Linux cores.
Install capstone
~~~~~~~~~~~~~~~~~~~~
Install ``capstone`` and ``capstone-devel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When compute nodes don't have access to repositories
""""""""""""""""""""""""""""""""""""""""""""""""""""
When compute nodes don't have access to EPEL repository
"""""""""""""""""""""""""""""""""""""""""""""""""""""""
Install EPEL capstone-devel:
Install EPEL ``capstone`` and ``capstone-devel``:
::
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
sudo yum install capstone-devel
sudo yum install capstone capstone-devel
When compute nodes don't have access to repositories
""""""""""""""""""""""""""""""""""""""""""""""""""""
When compute nodes don't have access to EPEL repository
"""""""""""""""""""""""""""""""""""""""""""""""""""""""
Ask the system administrator to install ``capstone-devel``. Note that it is in the EPEL repository.
A. Ask the system administrator to install ``capstone`` and ``capstone-devel``. Note that it is in the EPEL repository.
B. Download the rpm with the machine in which you are the administrator:
::
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
sudo yum install yum-utils
yumdownloader capstone capstone-devel
and then install it to your home directory of the login node:
::
cd $HOME/$(uname -p)
rpm2cpio capstone-4.0.1-9.el8.aarch64.rpm | cpio -idv
rpm2cpio capstone-devel-4.0.1-9.el8.aarch64.rpm | cpio -idv
sed -i 's#/usr/#'"$HOME"'/'"$(uname -p)"'/usr/#' $HOME/$(uname -p)/usr/lib64/pkgconfig/capstone.pc
Install syscall_intercept
@ -31,7 +48,24 @@ Install syscall_intercept
git clone https://github.com/RIKEN-SysSoft/syscall_intercept.git
mkdir build && cd build
cmake <syscall_intercept>/arch/aarch64 -DCMAKE_INSTALL_PREFIX=<syscall-intercept-install> -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
When ``capstone`` and ``capstone-devel`` are installed into the system directory:
::
cmake ../syscall_intercept/arch/aarch64 -DCMAKE_INSTALL_PREFIX=${HOME}/$(uname -p)/usr -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
When ``capstone`` and ``capstone-devel`` are installed into your home directory:
::
CMAKE_PREFIX_PATH=${HOME}/$(uname -p)/usr cmake ../syscall_intercept/arch/aarch64 -DCMAKE_INSTALL_PREFIX=${HOME}/$(uname -p)/usr -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
Install:
::
make && make install && make test
Install UTI for McKernel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -48,16 +82,17 @@ Install:
Install McKernel
~~~~~~~~~~~~~~~~~~~~
Add ``-DENABLE_UTI=ON`` option to ``cmake``:
``cmake`` with the additional options:
::
CMAKE_PREFIX_PATH=<syscall-intercept-install> cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel -DENABLE_UTI=ON $HOME/src/ihk+mckernel/mckernel
CMAKE_PREFIX_PATH=${HOME}/$(uname -p)/usr cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel -DENABLE_UTI=ON $HOME/src/ihk+mckernel/mckernel
make -j install
Run programs
~~~~~~~~~~~~~~~~
Add ``--enable-uti`` option to ``mcexec``:
``mcexec`` with ``--enable-uti`` option:
::

View File

@ -270,16 +270,17 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
return -EFAULT;
}
#ifdef CONFIG_MIC
if (pt.size > PAGE_SIZE) {
printk("mcexec_transfer_image(): ERROR: size exceeds PAGE_SIZE\n");
return -EFAULT;
}
phys = ihk_device_map_memory(ihk_os_to_dev(os), pt.rphys, PAGE_SIZE);
#ifdef CONFIG_MIC
rpm = ioremap_wc(phys, PAGE_SIZE);
#else
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
phys = ihk_device_map_memory(ihk_os_to_dev(os), pt.rphys, pt.size);
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, pt.size, NULL, 0);
#endif
if (!rpm) {
@ -304,10 +305,11 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
#ifdef CONFIG_MIC
iounmap(rpm);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, PAGE_SIZE);
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, pt.size);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, pt.size);
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
return ret;
@ -3644,7 +3646,8 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
isp.op = op;
isp.pdesc = virt_to_phys(ldesc);
ret = mcctrl_ikc_send_wait(os, cpu, &isp, 0, NULL, &do_free, 1, ldesc);
/* 1 sec timeout for the case where McKernel can't respond */
ret = mcctrl_ikc_send_wait(os, cpu, &isp, -1000, NULL, &do_free, 1, ldesc);
if (ret != 0) {
printk("%s: ERROR sending IKC msg: %d\n", __FUNCTION__, ret);
goto out;

View File

@ -536,9 +536,6 @@ int prepare_ikc_channels(ihk_os_t os)
usrdata->os = os;
ihk_host_os_set_usrdata(os, usrdata);
ihk_ikc_listen_port(os, &lp_ikc2linux);
ihk_ikc_listen_port(os, &lp_ikc2mckernel);
init_waitqueue_head(&usrdata->wq_procfs);
mutex_init(&usrdata->reserve_lock);
mutex_init(&usrdata->part_exec_lock);
@ -555,6 +552,16 @@ int prepare_ikc_channels(ihk_os_t os)
INIT_LIST_HEAD(&usrdata->wakeup_descs_list);
spin_lock_init(&usrdata->wakeup_descs_lock);
/* ihk_ikc_listen_port should be performed after
* usrdata->cpu_topology_list is initialized because the
* function enables syscall_packet_handler which accesses
* the list (the call path is sysfsm_packet_handler -->
* sysfsm_work_main --> sysfsm_setup --> setup_sysfs_files
* --> setup_cpus_sysfs_files).
*/
ihk_ikc_listen_port(os, &lp_ikc2linux);
ihk_ikc_listen_port(os, &lp_ikc2mckernel);
return 0;
error:

View File

@ -1957,14 +1957,14 @@ opendev()
fprintf(stderr, "%s: warning: LD_PRELOAD line is too long\n", __FUNCTION__); \
return; \
} \
strncat(envbuf, elembuf, remainder); \
strncat(envbuf, elembuf, remainder - 1); \
remainder = PATH_MAX - (strlen(envbuf) + 1); \
nelem++; \
} while (0)
static ssize_t find_libdir(char *libdir, size_t len)
{
FILE *filep;
FILE *filep = NULL;
ssize_t rc;
size_t linelen = 0;
char *line = NULL;
@ -2020,7 +2020,9 @@ static ssize_t find_libdir(char *libdir, size_t len)
}
out:
pclose(filep);
if (filep) {
pclose(filep);
}
free(line);
return rc;
}
@ -4121,6 +4123,7 @@ int main_loop(struct thread_data_s *my_thread)
#endif
case __NR_gettid:{
int rc = 0;
/*
* Number of TIDs and the remote physical address where TIDs are
* expected are passed in arg 4 and 5, respectively.
@ -4132,6 +4135,7 @@ int main_loop(struct thread_data_s *my_thread)
int *tids = malloc(sizeof(int) * w.sr.args[4]);
if (!tids) {
fprintf(stderr, "__NR_gettid(): error allocating TIDs\n");
rc = -ENOMEM;
goto gettid_out;
}
@ -4152,13 +4156,14 @@ int main_loop(struct thread_data_s *my_thread)
trans.direction = MCEXEC_UP_TRANSFER_TO_REMOTE;
if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) {
rc = -EFAULT;
fprintf(stderr, "__NR_gettid(): error transfering TIDs\n");
}
free(tids);
}
gettid_out:
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
do_syscall_return(fd, cpu, rc, 0, 0, 0, 0);
break;
}

2
ihk

Submodule ihk updated: efacf4fceb...823ede5e9a

View File

@ -267,3 +267,154 @@ cpu_sysfs_setup(void)
return;
} /* cpu_sysfs_setup() */
/*
* Generic remote CPU function invocation facility.
*/
void smp_func_call_handler(void)
{
unsigned long irq_flags;
struct smp_func_call_request *req;
int reqs_left;
reiterate:
req = NULL;
reqs_left = 0;
irq_flags = ihk_mc_spinlock_lock(
&cpu_local_var(smp_func_req_lock));
/* Take requests one-by-one */
if (!list_empty(&cpu_local_var(smp_func_req_list))) {
req = list_first_entry(&cpu_local_var(smp_func_req_list),
struct smp_func_call_request, list);
list_del(&req->list);
reqs_left = !list_empty(&cpu_local_var(smp_func_req_list));
}
ihk_mc_spinlock_unlock(&cpu_local_var(smp_func_req_lock),
irq_flags);
if (req) {
req->ret = req->sfcd->func(req->cpu_index,
req->sfcd->nr_cpus, req->sfcd->arg);
ihk_atomic_dec(&req->sfcd->cpus_left);
}
if (reqs_left)
goto reiterate;
}
int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg)
{
int cpu, nr_cpus = 0;
int cpu_index = 0;
int this_cpu_index = 0;
struct smp_func_call_data sfcd;
struct smp_func_call_request *reqs;
int ret = 0;
int call_on_this_cpu = 0;
cpu_set_t cpu_set;
int max_nr_cpus = 4;
/* Sanity checks */
if (!__cpu_set || !__func) {
return -EINVAL;
}
/* Make sure it won't change in between */
cpu_set = *__cpu_set;
for_each_set_bit(cpu, (unsigned long *)&cpu_set,
sizeof(cpu_set) * BITS_PER_BYTE) {
if (cpu == ihk_mc_get_processor_id()) {
call_on_this_cpu = 1;
}
++nr_cpus;
if (nr_cpus == max_nr_cpus)
break;
}
if (!nr_cpus) {
return -EINVAL;
}
reqs = kmalloc(sizeof(*reqs) * nr_cpus, IHK_MC_AP_NOWAIT);
if (!reqs) {
ret = -ENOMEM;
goto free_out;
}
kprintf("%s: interrupting %d CPUs for SMP call..\n", __func__, nr_cpus);
sfcd.nr_cpus = nr_cpus;
sfcd.func = __func;
sfcd.arg = __arg;
ihk_atomic_set(&sfcd.cpus_left,
call_on_this_cpu ? nr_cpus - 1 : nr_cpus);
smp_wmb();
/* Add requests and send IPIs */
cpu_index = 0;
for_each_set_bit(cpu, (unsigned long *)&cpu_set,
sizeof(cpu_set) * BITS_PER_BYTE) {
unsigned long irq_flags;
reqs[cpu_index].cpu_index = cpu_index;
reqs[cpu_index].ret = 0;
if (cpu == ihk_mc_get_processor_id()) {
this_cpu_index = cpu_index;
++cpu_index;
continue;
}
reqs[cpu_index].sfcd = &sfcd;
irq_flags =
ihk_mc_spinlock_lock(&get_cpu_local_var(cpu)->smp_func_req_lock);
list_add_tail(&reqs[cpu_index].list,
&get_cpu_local_var(cpu)->smp_func_req_list);
ihk_mc_spinlock_unlock(&get_cpu_local_var(cpu)->smp_func_req_lock,
irq_flags);
dkprintf("%s: interrupting IRQ: %d -> CPU: %d\n", __func__,
ihk_mc_get_smp_handler_irq(), cpu);
ihk_mc_interrupt_cpu(cpu, ihk_mc_get_smp_handler_irq());
++cpu_index;
if (cpu_index == max_nr_cpus)
break;
}
/* Is this CPU involved? */
if (call_on_this_cpu) {
reqs[this_cpu_index].ret =
__func(this_cpu_index, nr_cpus, __arg);
}
dkprintf("%s: waiting for remote CPUs..\n", __func__);
/* Wait for the rest of the CPUs */
while (smp_load_acquire(&sfcd.cpus_left.counter) > 0) {
cpu_pause();
}
/* Check return values, if error, report the first non-zero */
for (cpu_index = 0; cpu_index < nr_cpus; ++cpu_index) {
if (reqs[cpu_index].ret != 0) {
ret = reqs[cpu_index].ret;
goto free_out;
}
}
kprintf("%s: all CPUs finished SMP call successfully\n", __func__);
ret = 0;
free_out:
kfree(reqs);
return ret;
}

View File

@ -106,9 +106,7 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
#ifdef ENABLE_FUGAKU_HACKS
int in_page_fault;
#endif
int no_preempt;
int timer_enabled;
unsigned long nr_ctx_switches;

View File

@ -406,6 +406,7 @@ struct vm_range_numa_policy {
unsigned long start, end;
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
int numa_mem_policy;
int il_prev;
};
struct vm_regions {
@ -797,6 +798,7 @@ struct process_vm {
long currss;
DECLARE_BITMAP(numa_mask, PROCESS_NUMA_MASK_BITS);
int numa_mem_policy;
int il_prev;
/* Protected by memory_range_lock */
struct rb_root vm_range_numa_policy_tree;
struct vm_range *range_cache[VM_RANGE_CACHE_SIZE];

View File

@ -523,6 +523,18 @@ static void reserve_pages(struct ihk_page_allocator_desc *pa_allocator,
ihk_pagealloc_reserve(pa_allocator, start, end);
}
static int interleave_nodes(int off, unsigned long *numa_mask)
{
int next;
next = find_next_bit(numa_mask, PROCESS_NUMA_MASK_BITS, off + 1);
if (next >= PROCESS_NUMA_MASK_BITS) {
next = find_first_bit(numa_mask, PROCESS_NUMA_MASK_BITS);
}
return next;
}
extern int cpu_local_var_initialized;
static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
ihk_mc_ap_flag flag, int pref_node, int is_user, uintptr_t virt_addr)
@ -538,7 +550,9 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
int numa_mem_policy = -1;
struct process_vm *vm;
struct vm_range *range = NULL;
int chk_shm = 0;
int chk_shm = 0, il_start, looping;
int *il_prev = NULL;
unsigned long *numa_mask = NULL;
if(npages <= 0)
return NULL;
@ -549,31 +563,39 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
!cpu_local_var(current)->vm)
goto distance_based;
/* No explicitly requested NUMA or user policy? */
if ((pref_node == -1) && (!(flag & IHK_MC_AP_USER) ||
cpu_local_var(current)->vm->numa_mem_policy == MPOL_DEFAULT)) {
vm = cpu_local_var(current)->vm;
node = ihk_mc_get_numa_id();
if (virt_addr != -1) {
vm = cpu_local_var(current)->vm;
range_policy_iter = vm_range_policy_search(vm, virt_addr);
if (range_policy_iter) {
range = lookup_process_memory_range(vm, (uintptr_t)virt_addr, ((uintptr_t)virt_addr) + 1);
if (range) {
if( (range->memobj) && (range->memobj->flags == MF_SHM)) {
chk_shm = 1;
}
}
/* Get mempolicy user requested */
if (virt_addr != -1) {
range_policy_iter = vm_range_policy_search(vm, virt_addr);
if (range_policy_iter) {
range = lookup_process_memory_range(vm,
(uintptr_t)virt_addr,
((uintptr_t)virt_addr) + 1);
if ((range && (range->memobj->flags == MF_SHM))) {
chk_shm = 1;
}
/* Use range policy */
numa_mem_policy = range_policy_iter->numa_mem_policy;
numa_mask = range_policy_iter->numa_mask;
il_prev = &range_policy_iter->il_prev;
} else {
/* Use process policy */
numa_mem_policy = vm->numa_mem_policy;
numa_mask = vm->numa_mask;
il_prev = &vm->il_prev;
}
if ((!((range_policy_iter) && (range_policy_iter->numa_mem_policy != MPOL_DEFAULT))) && (chk_shm == 0))
goto distance_based;
}
node = ihk_mc_get_numa_id();
if (!memory_nodes[node].nodes_by_distance)
goto order_based;
/* No explicitly requested NUMA or user policy? */
if ((pref_node == -1) && !(flag & IHK_MC_AP_USER)) {
if ((numa_mem_policy == MPOL_DEFAULT) && (chk_shm == 0)) {
goto distance_based;
}
}
/* Explicit valid node? */
if (pref_node > -1 && pref_node < ihk_mc_get_nr_numa_nodes()) {
@ -615,27 +637,6 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
}
}
if ((virt_addr != -1) && (chk_shm == 0)) {
vm = cpu_local_var(current)->vm;
if (!(range_policy_iter)) {
range_policy_iter = vm_range_policy_search(vm, virt_addr);
}
if (range_policy_iter) {
range = lookup_process_memory_range(vm, (uintptr_t)virt_addr, ((uintptr_t)virt_addr) + 1);
if ((range && (range->memobj->flags == MF_SHM))) {
chk_shm = 1;
} else {
numa_mem_policy = range_policy_iter->numa_mem_policy;
}
}
}
if (numa_mem_policy == -1)
numa_mem_policy = cpu_local_var(current)->vm->numa_mem_policy;
switch (numa_mem_policy) {
case MPOL_BIND:
case MPOL_PREFERRED:
@ -644,9 +645,8 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
* only the ones requested in user policy */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
/* Not part of user requested policy? */
if (!test_bit(memory_nodes[node].nodes_by_distance[i].id,
cpu_local_var(current)->proc->vm->numa_mask)) {
numa_mask)) {
continue;
}
@ -687,7 +687,55 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
break;
case MPOL_INTERLEAVE:
/* TODO: */
/* Initialize interleave */
il_start = *il_prev;
looping = 0;
retry_interleave:
/* Find next node */
numa_id = interleave_nodes(*il_prev, numa_mask);
*il_prev = numa_id;
if (il_start == *il_prev && looping) {
/* All interleave nodes are full */
pa = 0;
break;
}
looping = 1;
#ifdef IHK_RBTREE_ALLOCATOR
{
if (rusage_check_oom(numa_id, npages, is_user)
== -ENOMEM) {
goto retry_interleave;
} else {
pa = ihk_numa_alloc_pages(
&memory_nodes[numa_id],
npages, p2align);
}
#else
list_for_each_entry(pa_allocator,
&memory_nodes[numa_id].allocators,
list) {
if (rusage_check_oom(numa_id, npages, is_user)
== -ENOMEM) {
goto retry_interleave;
} else {
pa = ihk_pagealloc_alloc(pa_allocator,
npages, p2align);
}
#endif
if (pa) {
rusage_page_add(numa_id, npages,
is_user);
dkprintf("%s: policy: CPU @ node %d allocated "
"%d pages from node %d\n",
__func__,
ihk_mc_get_numa_id(),
npages, node);
}
}
break;
default:
@ -1395,7 +1443,6 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
__FUNCTION__, fault_addr, reason, regs);
preempt_disable();
#ifdef ENABLE_FUGAKU_HACKS
++cpu_local_var(in_page_fault);
if (cpu_local_var(in_page_fault) > 1) {
kprintf("%s: PF in PF??\n", __func__);
@ -1408,7 +1455,6 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
panic("PANIC");
}
}
#endif
cpu_enable_interrupt();
@ -1475,6 +1521,7 @@ out_linux:
__func__, thread ? thread->tid : -1, fault_addr,
reason, error);
unhandled_page_fault(thread, fault_addr, reason, regs);
--cpu_local_var(in_page_fault);
preempt_enable();
#ifdef ENABLE_FUGAKU_DEBUG
@ -1511,9 +1558,7 @@ out_linux:
out_ok:
#endif
error = 0;
#ifdef ENABLE_FUGAKU_HACKS
--cpu_local_var(in_page_fault);
#endif
preempt_enable();
out:
dkprintf("%s: addr: %p, reason: %lx, regs: %p -> error: %d\n",
@ -2885,3 +2930,44 @@ retry:
return ptep;
}
int phys_to_nid(unsigned long p)
{
int i, numa_id = -1, _numa_id;
unsigned long _start, _end;
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) {
ihk_mc_get_memory_chunk(i, &_start, &_end, &_numa_id);
if (p >= _start && p < _end) {
numa_id = _numa_id;
goto out;
}
}
out:
return numa_id;
}
int lookup_node(struct process_vm *vm, void *addr)
{
int node, err, reason = PF_POPULATE | PF_USER;
pte_t *ptep;
err = page_fault_process_vm(vm, (void *)addr, reason);
if (err) {
node = err;
goto out;
}
ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)addr, 0, NULL, NULL, NULL);
if (!ptep || !pte_is_present(ptep)) {
node = -ENOENT;
goto out;
}
node = phys_to_nid(pte_get_phys(ptep));
out:
return node;
}

View File

@ -9676,7 +9676,9 @@ SYSCALL_DECLARE(mbind)
return -EINVAL;
}
#ifdef ENABLE_FUGAKU_HACKS
return 0;
#endif
memset(numa_mask, 0, sizeof(numa_mask));
@ -9921,6 +9923,10 @@ mbind_update_only:
sizeof(numa_mask));
}
range_policy->numa_mem_policy = mode;
if (mode == MPOL_INTERLEAVE) {
range_policy->il_prev =
PROCESS_NUMA_MASK_BITS - 1;
}
break;
@ -10082,6 +10088,9 @@ SYSCALL_DECLARE(set_mempolicy)
}
vm->numa_mem_policy = mode;
if (mode == MPOL_INTERLEAVE) {
vm->il_prev = PROCESS_NUMA_MASK_BITS - 1;
}
error = 0;
break;
@ -10144,6 +10153,20 @@ SYSCALL_DECLARE(get_mempolicy)
}
}
/* case of MPOL_F_NODE and MPOL_F_ADDR are specified */
if (flags & MPOL_F_NODE && flags & MPOL_F_ADDR) {
/* return the node ID which addr is allocated by mode */
int nid;
nid = lookup_node(vm, (void *)addr);
error = copy_to_user(mode, &nid, sizeof(int));
if (error) {
error = -EFAULT;
goto out;
}
goto out;
}
/* Special case of MPOL_F_MEMS_ALLOWED */
if (flags == MPOL_F_MEMS_ALLOWED) {
if (nodemask) {
@ -10227,7 +10250,7 @@ SYSCALL_DECLARE(move_pages)
struct move_pages_smp_req mpsr;
struct process_vm *vm = cpu_local_var(current)->vm;
int ret = 0;
int i, ret = 0;
unsigned long t_s, t_e;
@ -10237,18 +10260,20 @@ SYSCALL_DECLARE(move_pages)
if (pid) {
kprintf("%s: ERROR: only self (pid == 0)"
" is supported\n", __FUNCTION__);
return -EINVAL;
ret = -EINVAL;
goto out;
}
switch (flags) {
case MPOL_MF_MOVE_ALL:
/* Check flags */
if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) {
ret = -EINVAL;
goto out;
}
if (flags & MPOL_MF_MOVE_ALL) {
kprintf("%s: ERROR: MPOL_MF_MOVE_ALL"
" not supported\n", __func__);
return -EINVAL;
case MPOL_MF_MOVE:
break;
default:
return -EINVAL;
ret = -EINVAL;
goto out;
}
/* Allocate kernel arrays */
@ -10296,7 +10321,7 @@ t_e = rdtsc(); kprintf("%s: init malloc: %lu \n", __FUNCTION__, t_e - t_s); t_s
goto dealloc_out;
}
if (verify_process_vm(cpu_local_var(current)->vm,
if (user_nodes && verify_process_vm(cpu_local_var(current)->vm,
user_nodes, sizeof(int) * count)) {
ret = -EFAULT;
goto dealloc_out;
@ -10307,6 +10332,18 @@ t_e = rdtsc(); kprintf("%s: init malloc: %lu \n", __FUNCTION__, t_e - t_s); t_s
ret = -EFAULT;
goto dealloc_out;
}
/* Check node ID */
if (user_nodes) {
copy_from_user(nodes, user_nodes, sizeof(int) * count);
for (i = 0; i < count; i++) {
if (nodes[i] < 0 || nodes[i] >= ihk_mc_get_nr_numa_nodes()) {
ret = -ENODEV;
goto dealloc_out;
}
}
}
t_e = rdtsc(); kprintf("%s: init verify: %lu \n", __FUNCTION__, t_e - t_s); t_s = t_e;
#if 0
@ -10399,6 +10436,7 @@ dealloc_out:
kfree(ptep);
kfree(dst_phys);
out:
return ret;
}

View File

@ -1236,6 +1236,7 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
readonly = (req.flags & 1) != 0;
retry:
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
/* Assume smallest page size at first */
@ -1271,6 +1272,20 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
}
if (!range) {
if (vm->region.stack_start <= start &&
vm->region.stack_end > end) {
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
if (page_fault_process_vm(vm, (void *)start,
PF_POPULATE | PF_WRITE | PF_USER) < 0) {
ret = -EINVAL;
goto out;
}
goto retry;
}
ret = -EINVAL;
goto unlock_out;
}
@ -1358,6 +1373,7 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
unlock_out:
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
out:
if(ret == 0){
if(copy_to_user((void *)arg, &req, sizeof(req)) != 0){
kprintf("%s: ret: %d\n", __func__, -EFAULT);

View File

@ -2056,6 +2056,7 @@ static int xpmem_pin_page(
XPMEM_DEBUG("call: tgid=%d, vaddr=0x%lx", tg->tgid, vaddr);
retry:
ihk_rwspinlock_read_lock_noirq(&src_vm->memory_range_lock);
range = lookup_process_memory_range(src_vm, vaddr, vaddr + 1);
@ -2063,6 +2064,20 @@ static int xpmem_pin_page(
ihk_rwspinlock_read_unlock_noirq(&src_vm->memory_range_lock);
if (!range || range->start > vaddr) {
/*
* Grow the stack if address falls into stack region
* so that we can lookup range successfully.
*/
if (src_vm->region.stack_start <= vaddr &&
src_vm->region.stack_end > vaddr) {
if (page_fault_process_vm(src_vm, (void *)vaddr,
PF_POPULATE | PF_WRITE | PF_USER) < 0) {
return -ENOENT;
}
goto retry;
}
return -ENOENT;
}

View File

@ -173,4 +173,7 @@ struct cpu_mapping;
int arch_get_cpu_mapping(struct cpu_mapping **buf, int *nelemsp);
int ihk_mc_ikc_arch_issue_host_ipi(int cpu, int vector);
void smp_func_call_handler(void);
int ihk_mc_get_smp_handler_irq(void);
#endif

View File

@ -20,6 +20,8 @@ struct process_vm;
unsigned long virt_to_phys(void *v);
void *phys_to_virt(unsigned long p);
int phys_to_nid(unsigned long p);
int lookup_node(struct process_vm *vm, void *addr);
int copy_from_user(void *dst, const void *src, size_t siz);
int strlen_user(const char *s);
int strcpy_from_user(char *dst, const char *src);

View File

@ -25,9 +25,17 @@ Source0: mckernel-%{version}.tar.gz
Requires: systemd-libs numactl-libs libdwarf
# kernel_module_package macro does not handle cross build...
# don't use kernel_module_package so that one rpm including .ko and binaries are created
%if "%{?_host_cpu}" == "x86_64" && "%{?_target_cpu}" == "aarch64"
%define cross_compile 1
%else
BuildRequires: systemd-devel numactl-devel binutils-devel kernel-devel libdwarf-devel
# Friendly reminder of the fact that kernel-rpm-macros is no longer included in kernel-devel
%if 0%{?rhel} >= 8
BuildRequires: redhat-rpm-config kernel-rpm-macros elfutils-libelf-devel
%endif
%endif
%if 0%{?rhel} >= 8
Requires: kernel >= %{krequires}
%else
@ -35,17 +43,6 @@ Requires: kernel = %{krequires}
%endif
Requires(post): /usr/sbin/depmod
Requires(postun): /usr/sbin/depmod
%else
BuildRequires: systemd-devel numactl-devel binutils-devel kernel-devel libdwarf-devel
# Friendly reminder of the fact that kernel-rpm-macros is no longer included in kernel-devel
%if 0%{?rhel} >= 8
BuildRequires: redhat-rpm-config kernel-rpm-macros elfutils-libelf-devel kmod
%endif
%if %{defined kernel_module_package_buildreqs}
BuildRequires: %kernel_module_package_buildreqs
%kernel_module_package %{?kmod_flavors}
%endif
%endif
%description
Interface for Heterogeneous Kernels and McKernel.
@ -78,6 +75,9 @@ This package contains headers and libraries required for build apps using IHK/Mc
%{?cmake_libdir:-DCMAKE_INSTALL_LIBDIR=%{cmake_libdir}} \
%{?build_target:-DBUILD_TARGET=%{build_target}} \
%{?toolchain_file:-DCMAKE_TOOLCHAIN_FILE=%{toolchain_file}} \
-DENABLE_TOFU=ON -DENABLE_FUGAKU_HACKS=ON \
-DENABLE_KRM_WORKAROUND=OFF -DWITH_KRM=ON \
-DENABLE_FUGAKU_DEBUG=OFF \
.
%make_build
@ -113,7 +113,6 @@ This package contains headers and libraries required for build apps using IHK/Mc
%{_mandir}/man1/ihkosctl.1.gz
%{_mandir}/man1/mcexec.1.gz
%if 0%{?cross_compile}
/lib/modules/%{kernel_version}/extra/mckernel/ihk.ko
/lib/modules/%{kernel_version}/extra/mckernel/mcctrl.ko
%ifarch x86_64
@ -122,7 +121,6 @@ This package contains headers and libraries required for build apps using IHK/Mc
%ifarch aarch64
/lib/modules/%{kernel_version}/extra/mckernel/ihk-smp-arm64.ko
%endif
%endif
%files devel
%{_includedir}/ihklib.h
@ -135,8 +133,7 @@ This package contains headers and libraries required for build apps using IHK/Mc
%{_includedir}/ihk/ihk_host_driver.h
/lib/modules/%{kernel_version}/extra/mckernel/ihk/linux/core/Module.symvers
%if 0%{?cross_compile}
# scripts from /usr/lib/rpm/redhat/kmodtool (kernel_module_package) as well
# taken from /usr/lib/rpm/redhat/kmodtool (kernel_module_package)
%post
if [ -e "/boot/System.map-%{kernel_version}" ]; then
/usr/sbin/depmod -aeF "/boot/System.map-%{kernel_version}" "%{kernel_version}" > /dev/null || :
@ -162,7 +159,6 @@ if [ -x "/sbin/weak-modules" ]; then
printf '%s\n' "${modules[@]}" \
| /sbin/weak-modules --remove-modules
fi
%endif
%changelog
* Tue Feb 12 2019 Dominique Martinet <dominique.martinet@cea.fr> - 1.6.0-0

30
test/issues/1523/C1523.sh Executable file
View File

@ -0,0 +1,30 @@
#/bin/sh
USELTP=1
USEOSTEST=0
MCREBOOT=0
. ../../common.sh
BOOTPARAM="${BOOTPARAM} -e anon_on_demand"
mcreboot
issue="1523"
tid=01
for tp in move_pages01 move_pages02 move_pages04 move_pages06 move_pages09 move_pages10
do
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt
ok=`grep PASS $tp.txt | wc -l`
ng=`grep FAIL $tp.txt | wc -l`
if [ $ng = 0 ]; then
echo "*** ${tname} PASSED ($ok)"
else
echo "*** ${tname} FAILED (ok=$ok ng=$ng)"
fi
let tid++
echo ""
done

11
test/issues/1523/Makefile Normal file
View File

@ -0,0 +1,11 @@
CFLAGS=-g
LDFLAGS=
TARGET=
all: $(TARGET)
test: all
./C1523.sh
clean:
rm -f $(TARGET) *.o *.txt

21
test/issues/1523/README Normal file
View File

@ -0,0 +1,21 @@
【Issue#1523 動作確認】
□ テスト内容
1. 以下のLTPがPASSすることを確認する
- move_pages01
- move_pages02
- move_pages04
- move_pages06
- move_pages09
- move_pages10
□ 実行手順
$ make test
McKernelのインストール先や、OSTEST, LTPの配置場所は、
$HOME/.mck_test_config を参照している
.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを
$HOMEにコピーし、適宜編集する
□ 実行結果
x86_64_result.log aarch64_result.log 参照。
すべての項目をPASSしていることを確認。

View File

@ -0,0 +1,25 @@
mcstop+release.sh ... done
mcreboot.sh -c 37-43,49-55 -m 2G@2,2G@3 -r 37-43:36+49-55:48 -O -e anon_on_demand ... done
*** C1523T01 start *******************************
move_pages01 1 TPASS : pages are present in expected nodes
*** C1523T01 PASSED (1)
*** C1523T02 start *******************************
move_pages02 1 TPASS : pages are present in expected nodes
*** C1523T02 PASSED (1)
*** C1523T03 start *******************************
move_pages04 1 TPASS : status[1] has expected value
*** C1523T03 PASSED (1)
*** C1523T04 start *******************************
move_pages06 1 TPASS : move_pages failed with ENODEV as expected
*** C1523T04 PASSED (1)
*** C1523T05 start *******************************
move_pages09 1 TPASS : move_pages succeeded
*** C1523T05 PASSED (1)
*** C1523T06 start *******************************
move_pages10 1 TPASS : move_pages failed with EINVAL as expected
*** C1523T06 PASSED (1)

View File

@ -0,0 +1,25 @@
mcstop+release.sh ... done
mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 -O -e anon_on_demand ... done
*** C1523T01 start *******************************
move_pages01 1 TPASS : pages are present in expected nodes
*** C1523T01 PASSED (1)
*** C1523T02 start *******************************
move_pages02 1 TPASS : pages are present in expected nodes
*** C1523T02 PASSED (1)
*** C1523T03 start *******************************
move_pages04 1 TPASS : status[1] has expected value
*** C1523T03 PASSED (1)
*** C1523T04 start *******************************
move_pages06 1 TPASS : move_pages failed with ENODEV as expected
*** C1523T04 PASSED (1)
*** C1523T05 start *******************************
move_pages09 1 TPASS : move_pages succeeded
*** C1523T05 PASSED (1)
*** C1523T06 start *******************************
move_pages10 1 TPASS : move_pages failed with EINVAL as expected
*** C1523T06 PASSED (1)

125
test/issues/959/C959.sh Executable file
View File

@ -0,0 +1,125 @@
#/bin/sh
USELTP=1
USEOSTEST=1
LTP_LIST="mbind01 get_mempolicy01"
OSTEST_MBIND_LIST="1 3 5 9 12 14 15 16 20 24 26 28 30"
BOOTPARAM="-c 1-7 -m 10G@0,10G@1 -O -e anon_on_demand"
. ../../common.sh
issue="959"
tid=01
arch=`uname -p`
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo ${MCEXEC} ./check_mempol_il 1 30 6 3 3 3
if [ $? -eq 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo ${MCEXEC} ./check_mempol_il 2 30 6 3 3 3
if [ $? -eq 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo ${MCEXEC} ./check_mempol_il 1 30 6 2 0 6
if [ $? -eq 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo ${MCEXEC} ./check_mempol_il 2 30 6 2 0 6
if [ $? -eq 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
BOOTPARAM="-c 1-7 -m 10G@0,2G@1 -O -e anon_on_demand"
mcstop
mcreboot
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
${IHKOSCTL} 0 clear_kmsg
sudo ${MCEXEC} ./check_mempol_il 1 30 6 2 4 2
ret=$?
dbg_prints=`${IHKOSCTL} 0 kmsg | grep "TEST_959" | wc -l`
if [ ${ret} -eq 0 -a ${dbg_prints} -gt 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo ${MCEXEC} ./check_mempol_il 2 30 6 2 4 2
ret=$?
dbg_prints=`${IHKOSCTL} 0 kmsg | grep "TEST_959" | wc -l`
if [ ${ret} -eq 0 -a ${dbg_prints} -gt 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
for tp in ${LTP_LIST}
do
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
sudo $MCEXEC $LTPBIN/$tp 2>&1 | tee $tp.txt
ok=`grep PASS $tp.txt | wc -l`
ng=`grep FAIL $tp.txt | wc -l`
if [ $ng = 0 ]; then
echo "*** ${tname} PASSED ($ok)"
else
echo "*** ${tname} FAILED (ok=$ok ng=$ng)"
fi
let tid++
echo ""
done
for tno in ${OSTEST_MBIND_LIST}
do
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
${MCEXEC} ${TESTMCK} -s mbind -n ${tno} -- -n 2 2>&1 | tee test_mck-mbind${tno}.txt
if [ $? = 0 ]; then
echo "*** ${tname} PASSED"
else
echo "*** ${tname} FAILED"
fi
let tid++
echo ""
done

14
test/issues/959/Makefile Normal file
View File

@ -0,0 +1,14 @@
include $(HOME)/.mck_test_config.mk
CFLAGS=-g -O0 -Wall -I$(MCK_DIR)/include
LDFLAGS=-L$(MCK_DIR)/lib64 -lihk -lnuma -Wl,-rpath=$(MCK_DIR)/lib64
TARGET=check_mempol_il
all: $(TARGET)
test: all
./C959.sh
clean:
rm -f $(TARGET) *.o *.txt

87
test/issues/959/README Normal file
View File

@ -0,0 +1,87 @@
【Issue#959 動作確認】
□ テスト内容
本テストは2つのNUMAード(node0, node1)を使用してMPOL_INTERLEAVEの動作を確認するテストである。
2つ以上のNUMAードを持つ環境で実行すること。
1. INTERLEAVEするードセットに十分なメモリ容量がある場合の動作確認
C959T01: set_mempolicyによるmempolicy設定時の動作 (2ード)
node0, node1 からそれぞれ10GBのメモリをMcKernelに割り当てた状態で
下記の処理を確認する
(1) set_mempolicy() でプロセスのmempolicyを、node0, node1 でのINTERLEAVEに設定する
(2) 6GBのメモリを確保し、書き込みを行う
(3) McKernelの2つのNUMAードから均等にメモリが使用されていることを確認する
C959T02: mbindによるmempolicy設定時の動作 (2ード)
node0, node1 からそれぞれ10GBのメモリをMcKernelに割り当てた状態で
下記の処理を確認する
(1) set_mempolicy() でプロセスのmempolicyを、node0 でのBINDに設定する
(2) 6GBのメモリを確保する
(3) mbind() で(2)で確保した領域のmempolicyを、node0, node1 でのINTERLEAVEに設定する
(4) McKernelの2つのNUMAードから均等にメモリが使用されていることを確認する
C959T03: set_mempolicyによるmempolicy設定時の動作 (1ード)
node0, node1 からそれぞれ10GBのメモリをMcKernelに割り当てた状態で
下記の処理を確認する
(1) set_mempolicy() でプロセスのmempolicyを、 node1 でのINTERLEAVEに設定する
(2) 6GBのメモリを確保し、書き込みを行う
(3) McKernelのnode1から 6GBが使用されていることを確認する
C959T04: mbindによるmempolicy設定時の動作 (2ード)
node0, node1 からそれぞれ10GBのメモリをMcKernelに割り当てた状態で
下記の処理を確認する
(1) set_mempolicy() でプロセスのmempolicyを、node0 でのBINDに設定する
(2) 6GBのメモリを確保する
(3) mbind() で(2)で確保した領域のmempolicyを、node1 でのINTERLEAVEに設定する
(4) McKernelのnode1から 6GBが使用されていることを確認する
2. INTERLEAVEするードセットにメモリ容量が不足している場合の動作確認
C959T05: set_mempolicyによるmempolicy設定時の動作
node0 に10GB、 node1 に2GBのメモリをそれぞれMcKernelに割り当てた状態で
下記の処理を確認する
(1) set_mempolicy() でプロセスのmempolicyを、node1 でのINTERLEAVEに設定する
(2) 6GBのメモリを確保し、書き込みを行う
(3) McKernelのnode0から4GB, node1から2GBがそれぞれ使用されていることを確認する
C959T06: mbindによるmempolicy設定時の動作
node0 に10GB、 node1 に2GBのメモリをそれぞれMcKernelに割り当てた状態で
下記の処理を確認する
(1) set_mempolicy() でプロセスのmempolicyを、node0 でのBINDに設定する
(2) 6GBのメモリを確保する
(3) mbind() で(2)で確保した領域のmempolicyを、node1 でのINTERLEAVEに設定する
(4) McKernelのnode0から4GB, node1から2GBがそれぞれ使用されていることを確認する
3. 以下のLTPを用いて既存のmbind機能に影響がないことを確認する
- mbind01
- get_mempolicy01
4. 以下のOSTESTを用いて既存のmbind機能に影響がないことを確認する
- ostest-mbind.000
- ostest-mbind.001
- ostest-mbind.002
- ostest-mbind.003
- ostest-mbind.004
- ostest-mbind.005
- ostest-mbind.006
- ostest-mbind.007
- ostest-mbind.008
- ostest-mbind.009
- ostest-mbind.010
- ostest-mbind.011
- ostest-mbind.012
□ 実行手順
・下記の手順でテストを実行する
$ cd <mckernel>
$ patch -p0 < test/issues/959/test_print.patch
(build mckernel)
$ cd test/issues/959
$ make test
McKernelのインストール先や、OSTEST, LTPの配置場所は、
$HOME/.mck_test_config を参照している
.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを
$HOMEにコピーし、適宜編集する
□ 実行結果
x86_64result.log, aarch64_result.log 参照。
すべての項目をPASSしていることを確認。

View File

@ -0,0 +1,314 @@
mcstop+release.sh ... done
mcreboot.sh -c 1-7 -m 10G@0,10G@1 -O -e anon_on_demand ... done
*** C959T01 start *******************************
INTERLEAVE BIT_MASK: 0x3
set_mempolicy: INTERLEAVE mask 0x3
** Difference of numa_stat **
[OK] NUMA[0] 0xc0000000
[OK] NUMA[1] 0xc0000000
*** C959T01 PASSED ******************************
*** C959T02 start *******************************
INTERLEAVE BIT_MASK: 0x3
set_mempolicy: BIND mask 0x1
mbind : INTERLEAVE mask 0x3
** Difference of numa_stat **
[OK] NUMA[0] 0xc0000000
[OK] NUMA[1] 0xc0000000
*** C959T02 PASSED ******************************
*** C959T03 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x0
[OK] NUMA[1] 0x180000000
*** C959T03 PASSED ******************************
*** C959T04 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: BIND mask 0x1
mbind : INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x0
[OK] NUMA[1] 0x180000000
*** C959T04 PASSED ******************************
mcstop+release.sh ... done
mcreboot.sh -c 1-7 -m 10G@0,2G@1 -O -e anon_on_demand ... done
*** C959T05 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x100000000
[OK] NUMA[1] 0x80000000
*** C959T05 PASSED ******************************
*** C959T06 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: BIND mask 0x1
mbind : INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x100000000
[OK] NUMA[1] 0x80000000
*** C959T06 PASSED ******************************
*** C959T07 start *******************************
tst_test.c:1096: INFO: Timeout per run is 0h 05m 00s
mbind01.c:181: INFO: case MPOL_DEFAULT
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_DEFAULT (target exists)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_BIND (no target)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_BIND
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_INTERLEAVE (no target)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_INTERLEAVE
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_PREFERRED (no target)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_PREFERRED
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case UNKNOWN_POLICY
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_DEFAULT (invalid flags)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_PREFERRED (invalid nodemask)
mbind01.c:230: PASS: Test passed
Summary:
passed 11
failed 0
skipped 0
warnings 0
*** C959T07 PASSED (11)
*** C959T08 start *******************************
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=-1 errno=14 (Bad address)
RESULT: return value(ret)=-1 errno=14 (Bad address)
EXPECT: return value(ret)=-1 errno=22 (Invalid argument)
RESULT: return value(ret)=-1 errno=22 (Invalid argument)
get_mempolicy01 0 TINFO : (case00) START
get_mempolicy01 1 TPASS : (case00) END
get_mempolicy01 0 TINFO : (case01) START
get_mempolicy01 2 TPASS : (case01) END
get_mempolicy01 0 TINFO : (case02) START
get_mempolicy01 3 TPASS : (case02) END
get_mempolicy01 0 TINFO : (case03) START
get_mempolicy01 4 TPASS : (case03) END
get_mempolicy01 0 TINFO : (case04) START
get_mempolicy01 5 TPASS : (case04) END
get_mempolicy01 0 TINFO : (case05) START
get_mempolicy01 6 TPASS : (case05) END
get_mempolicy01 0 TINFO : (case06) START
get_mempolicy01 7 TPASS : (case06) END
get_mempolicy01 0 TINFO : (case07) START
get_mempolicy01 8 TPASS : (case07) END
get_mempolicy01 0 TINFO : (case08) START
get_mempolicy01 9 TPASS : (case08) END
get_mempolicy01 0 TINFO : (case09) START
get_mempolicy01 10 TPASS : (case09) END
get_mempolicy01 0 TINFO : (case10) START
get_mempolicy01 11 TPASS : (case10) END
get_mempolicy01 0 TINFO : (case11) START
get_mempolicy01 12 TPASS : (case11) END
*** C959T08 PASSED (12)
*** C959T09 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 1
ARGS: -n 2
RESULT: ok
*** C959T09 PASSED
*** C959T10 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 3
ARGS: -n 2
RESULT: ok
*** C959T10 PASSED
*** C959T11 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 5
ARGS: -n 2
RESULT: ok
*** C959T11 PASSED
*** C959T12 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 9
ARGS: -n 2
RESULT: ok
*** C959T12 PASSED
*** C959T13 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 12
ARGS: -n 2
RESULT: ok
*** C959T13 PASSED
*** C959T14 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 14
ARGS: -n 2
RESULT: ok
*** C959T14 PASSED
*** C959T15 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 15
ARGS: -n 2
region 0
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 1
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 2
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 3
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 4
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 5
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 6
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
RESULT: ok
*** C959T15 PASSED
*** C959T16 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 16
ARGS: -n 2
region 0
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 1
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 2
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 3
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 4
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 5
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 6
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
RESULT: ok
*** C959T16 PASSED
*** C959T17 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 20
ARGS: -n 2
region 0
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 1
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 2
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 3
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 4
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 5
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 6
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
RESULT: ok
*** C959T17 PASSED
*** C959T18 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 24
ARGS: -n 2
RESULT: ok
*** C959T18 PASSED
*** C959T19 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 26
ARGS: -n 2
nodemask = 0
RESULT: ok
*** C959T19 PASSED
*** C959T20 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 28
ARGS: -n 2
RESULT: ok
*** C959T20 PASSED
*** C959T21 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 30
ARGS: -n 2
RESULT: ok
*** C959T21 PASSED

View File

@ -0,0 +1,139 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <numaif.h>
#include <sys/mman.h>
#include <errno.h>
#include <ihklib.h>
#include <ihk/ihk_rusage.h>
#define NUMA_NUM 2
long long numa_stat_bfr[NUMA_NUM], numa_stat_aft[NUMA_NUM];
long long exp_diff[NUMA_NUM];
int
get_current_numa_stat(long long *stat, int numa_cnt)
{
int i, ret = 0;
struct ihk_os_rusage mck_rusage;
memset(&mck_rusage, 0, sizeof(mck_rusage));
ret = ihk_os_getrusage(0, &mck_rusage, sizeof(mck_rusage));
if (ret) {
perror("ihk_os_getrusage()");
goto out;
}
for (i = 0; i < numa_cnt; i++) {
if (mck_rusage.memory_numa_stat[i] != 0) {
stat[i] = mck_rusage.memory_numa_stat[i];
}
}
out:
return ret;
}
int
main(int argc, char **argv)
{
void *p;
unsigned long mask, bind_mask = 1;
unsigned long ps;
int i, mode, pgshift, pgnum, exp_0, exp_1, ret = 0;
if (argc < 7) {
printf("error: too few arguments\n");
ret = -1;
goto out;
}
mode = atoi(argv[1]); /* 1: set_mempolicy, 2: mbind */
pgshift = atoi(argv[2]);
pgnum = atoi(argv[3]);
mask = atoi(argv[4]);
exp_0 = atoi(argv[5]);
exp_1 = atoi(argv[6]);
ps = 1UL << pgshift;
exp_diff[0] = exp_0 * ps;
exp_diff[1] = exp_1 * ps;
if (mode != 1 && mode != 2) {
printf("error: invalid mode\n");
ret = -1;
goto out;
}
printf("INTERLEAVE BIT_MASK: 0x%lx\n", mask);
get_current_numa_stat(numa_stat_bfr, NUMA_NUM);
switch (mode) {
case 1: /* set_mempolicy */
printf("set_mempolicy: INTERLEAVE mask 0x%lx\n", mask);
if (set_mempolicy(MPOL_INTERLEAVE, &mask, NUMA_NUM)) {
perror("set_mempolicy");
ret = -1;
goto out;
}
p = mmap(NULL, ps * pgnum, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (p == ((void *)-1)) {
perror("mmap");
ret = -1;
goto out;
}
break;
case 2: /* mbind */
printf("set_mempolicy: BIND mask 0x%lx\n", bind_mask);
if (set_mempolicy(MPOL_BIND, &bind_mask, NUMA_NUM)) {
perror("set_mempolicy");
ret = -1;
goto out;
}
p = mmap(NULL, ps * pgnum, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (p == ((void *)-1)) {
perror("mmap");
ret = -1;
goto out;
}
printf("mbind : INTERLEAVE mask 0x%lx\n", mask);
if (mbind(p, ps * pgnum, MPOL_INTERLEAVE, &mask,
NUMA_NUM, 0) == -1) {
perror("mbind");
ret = -1;
goto out;
}
break;
default:
printf("error: invalid mode\n");
ret = -1;
goto out;
}
memset(p, '0', ps * pgnum);
get_current_numa_stat(numa_stat_aft, NUMA_NUM);
printf("** Difference of numa_stat **\n");
for (i = 0; i < NUMA_NUM; i++) {
long long diff = numa_stat_aft[i] - numa_stat_bfr[i];
if (diff == exp_diff[i]) {
printf("[OK] ");
} else {
printf("[NG] ");
ret = -1;
}
printf(" NUMA[%d] 0x%llx\n", i, diff);
}
munmap(p, ps * pgnum);
out:
return ret;
}

View File

@ -0,0 +1,14 @@
diff --git kernel/mem.c kernel/mem.c
index e464eb2..7086c6a 100644
--- kernel/mem.c
+++ kernel/mem.c
@@ -749,6 +749,9 @@ retry_interleave:
#endif
dkprintf("%s: couldn't fulfill user policy for %d pages\n",
__FUNCTION__, npages);
+ if (numa_mem_policy == MPOL_INTERLEAVE) {
+ kprintf("TEST_959: reach HERE\n");
+ }
}
distance_based:

View File

@ -0,0 +1,313 @@
mcstop+release.sh ... done
mcreboot.sh -c 1-7 -m 10G@0,10G@1 -O -e anon_on_demand ... done
*** C959T01 start *******************************
INTERLEAVE BIT_MASK: 0x3
set_mempolicy: INTERLEAVE mask 0x3
** Difference of numa_stat **
[OK] NUMA[0] 0xc0000000
[OK] NUMA[1] 0xc0000000
*** C959T01 PASSED ******************************
*** C959T02 start *******************************
INTERLEAVE BIT_MASK: 0x3
set_mempolicy: BIND mask 0x1
mbind : INTERLEAVE mask 0x3
** Difference of numa_stat **
[OK] NUMA[0] 0xc0000000
[OK] NUMA[1] 0xc0000000
*** C959T02 PASSED ******************************
*** C959T03 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x0
[OK] NUMA[1] 0x180000000
*** C959T03 PASSED ******************************
*** C959T04 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: BIND mask 0x1
mbind : INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x0
[OK] NUMA[1] 0x180000000
*** C959T04 PASSED ******************************
mcstop+release.sh ... done
mcreboot.sh -c 1-7 -m 10G@0,2G@1 -O -e anon_on_demand ... done
*** C959T05 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x100000000
[OK] NUMA[1] 0x80000000
*** C959T05 PASSED ******************************
*** C959T06 start *******************************
INTERLEAVE BIT_MASK: 0x2
set_mempolicy: BIND mask 0x1
mbind : INTERLEAVE mask 0x2
** Difference of numa_stat **
[OK] NUMA[0] 0x100000000
[OK] NUMA[1] 0x80000000
*** C959T06 PASSED ******************************
*** C959T07 start *******************************
tst_test.c:1096: INFO: Timeout per run is 0h 05m 00s
mbind01.c:181: INFO: case MPOL_DEFAULT
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_DEFAULT (target exists)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_BIND (no target)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_BIND
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_INTERLEAVE (no target)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_INTERLEAVE
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_PREFERRED (no target)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_PREFERRED
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case UNKNOWN_POLICY
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_DEFAULT (invalid flags)
mbind01.c:230: PASS: Test passed
mbind01.c:181: INFO: case MPOL_PREFERRED (invalid nodemask)
mbind01.c:230: PASS: Test passed
Summary:
passed 11
failed 0
skipped 0
warnings 0
*** C959T07 PASSED (11)
*** C959T08 start *******************************
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=0 errno=0 (Success)
RESULT: return value(ret)=0 errno=0 (Success)
EXPECT: return value(ret)=-1 errno=14 (Bad address)
RESULT: return value(ret)=-1 errno=14 (Bad address)
EXPECT: return value(ret)=-1 errno=22 (Invalid argument)
RESULT: return value(ret)=-1 errno=22 (Invalid argument)
get_mempolicy01 0 TINFO : (case00) START
get_mempolicy01 1 TPASS : (case00) END
get_mempolicy01 0 TINFO : (case01) START
get_mempolicy01 2 TPASS : (case01) END
get_mempolicy01 0 TINFO : (case02) START
get_mempolicy01 3 TPASS : (case02) END
get_mempolicy01 0 TINFO : (case03) START
get_mempolicy01 4 TPASS : (case03) END
get_mempolicy01 0 TINFO : (case04) START
get_mempolicy01 5 TPASS : (case04) END
get_mempolicy01 0 TINFO : (case05) START
get_mempolicy01 6 TPASS : (case05) END
get_mempolicy01 0 TINFO : (case06) START
get_mempolicy01 7 TPASS : (case06) END
get_mempolicy01 0 TINFO : (case07) START
get_mempolicy01 8 TPASS : (case07) END
get_mempolicy01 0 TINFO : (case08) START
get_mempolicy01 9 TPASS : (case08) END
get_mempolicy01 0 TINFO : (case09) START
get_mempolicy01 10 TPASS : (case09) END
get_mempolicy01 0 TINFO : (case10) START
get_mempolicy01 11 TPASS : (case10) END
get_mempolicy01 0 TINFO : (case11) START
get_mempolicy01 12 TPASS : (case11) END
*** C959T08 PASSED (12)
*** C959T09 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 1
ARGS: -n 2
RESULT: ok
*** C959T09 PASSED
*** C959T10 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 3
ARGS: -n 2
RESULT: ok
*** C959T10 PASSED
*** C959T11 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 5
ARGS: -n 2
RESULT: ok
*** C959T11 PASSED
*** C959T12 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 9
ARGS: -n 2
RESULT: ok
*** C959T12 PASSED
*** C959T13 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 12
ARGS: -n 2
RESULT: ok
*** C959T13 PASSED
*** C959T14 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 14
ARGS: -n 2
RESULT: ok
*** C959T14 PASSED
*** C959T15 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 15
ARGS: -n 2
region 0
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 1
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 2
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 3
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 4
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 5
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 6
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
RESULT: ok
*** C959T15 PASSED
*** C959T16 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 16
ARGS: -n 2
region 0
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 1
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 2
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 3
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 4
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 5
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 6
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
RESULT: ok
*** C959T16 PASSED
*** C959T17 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 20
ARGS: -n 2
region 0
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 1
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 2
get : mode = 3, node_mask = 3
m_expect : mode = 3, node_mask = 3
region 3
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 4
get : mode = 2, node_mask = 1
m_expect : mode = 2, node_mask = 1
region 5
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
region 6
get : mode = 0, node_mask = 0
m_expect : mode = 0, node_mask = 0
RESULT: ok
*** C959T17 PASSED
*** C959T18 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 24
ARGS: -n 2
RESULT: ok
*** C959T18 PASSED
*** C959T19 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 26
ARGS: -n 2
nodemask = 0
RESULT: ok
*** C959T19 PASSED
*** C959T20 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 28
ARGS: -n 2
RESULT: ok
*** C959T20 PASSED
*** C959T21 start *******************************
TEST_SUITE: mbind
TEST_NUMBER: 30
ARGS: -n 2
RESULT: ok
*** C959T21 PASSED