mcexec: memory policy control by environmental variable

Refs: #1470
Change-Id: I3d556cae90d31d81572b1c4e5c680e826577d428
This commit is contained in:
Masamichi Takagi
2020-12-15 14:17:32 +09:00
parent ed670c03af
commit 33dd2e60b1
21 changed files with 729 additions and 6 deletions

View File

@ -0,0 +1,123 @@
execute_process(COMMAND bash -c "gawk '/CPU implementer/ { print \$4; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_IMPLEMENTER OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "gawk '/CPU architecture/ { print \$3; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_ARCH OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "gawk '/CPU variant/ { print \$4; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_VARIANT OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "gawk '/CPU part/ { print \$4; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_PART OUTPUT_STRIP_TRAILING_WHITESPACE)
if(CPU_IMPLEMENTER STREQUAL "0x46" AND CPU_ARCH STREQUAL "8" AND
CPU_VARIANT STREQUAL "0x1" AND CPU_PART STREQUAL "0x001")
message("A64FX detected")
set(CPU_MODEL "a64fx")
add_definitions(-D__a64fx__)
endif()
# find first NUMA available to user (0 or 4 now)
execute_process(COMMAND bash -c "awk -v keyword=nr_free_pages -f ${CMAKE_CURRENT_SOURCE_DIR}/src/zoneinfo.awk /proc/zoneinfo | awk -v page_size=$(getconf PAGE_SIZE) -f ${CMAKE_CURRENT_SOURCE_DIR}/src/zoneinfo_filter.awk | head -n1" OUTPUT_VARIABLE FIRST_USER_NUMA OUTPUT_STRIP_TRAILING_WHITESPACE)
message("FIRST_USER_NUMA: ${FIRST_USER_NUMA}")
add_definitions(-DFIRST_USER_NUMA=${FIRST_USER_NUMA})
if (FIRST_USER_NUMA STREQUAL "4")
execute_process(COMMAND sudo bash -c "echo 0-7 > /sys/fs/cgroup/cpuset/system.slice/cpuset.mems")
endif()
cmake_policy(SET CMP0005 NEW)
# Options: -DWITH_MCK=<McKernel install directory>
add_definitions(-DWITH_MCK=${WITH_MCK})
# Options: -DWITH_MCK_SRC=<McKernel source directory>
add_definitions(-DWITH_MCK_SRC=${WITH_MCK_SRC})
# for autotest
if(NOT DEFINED CMAKE_INSTALL_PREFIX_SCRIPTS)
set(CMAKE_INSTALL_PREFIX_SCRIPTS ${CMAKE_INSTALL_PREFIX}/scripts)
endif()
cmake_minimum_required(VERSION 3.0)
project(issue1470 C)
# CPPFLAGS
set(UNAME_R ${CMAKE_SYSTEM_VERSION} CACHE STRING "Kernel version to build against")
set(KERNEL_DIR "/lib/modules/${UNAME_R}/build" CACHE STRING "kernel build directory")
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_64K_PAGES\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_64K_PAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(CONFIG_ARM64_64K_PAGES STREQUAL "y")
set(PAGE_SIZE "65536")
else()
set(PAGE_SIZE "4096")
endif()
else()
set(PAGE_SIZE "4096")
endif()
message("PAGE_SIZE: ${PAGE_SIZE}")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
add_definitions(-DBUILD_TARGET=smp-x86)
add_definitions(-DKMOD_POSTFIX=smp_x86)
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
add_definitions(-DBUILD_TARGET=smp-arm64)
add_definitions(-DKMOD_POSTFIX=smp_arm64)
endif()
add_definitions(-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX})
# CFLAGS
set(CFLAGS_WARNING "-Wall" "-Wextra" "-Wno-unused-parameter" "-Wno-sign-compare" "-Wno-unused-function" ${EXTRA_WARNINGS} CACHE STRING "Warning flags")
add_compile_options(
-O2
-g
${CFLAGS_WARNING}
)
# -L, this must be done before adding dependants
link_directories("${WITH_MCK}/lib64")
# -Wl,--rpath=, this must be done before adding dependants
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_INSTALL_RPATH "${WITH_MCK}/lib64")
# test driver scripts
foreach(target IN ITEMS
OMPI_MCA_plm_ple_memory_allocation_policy01
OMPI_MCA_plm_ple_memory_allocation_policy02
OMPI_MCA_plm_ple_memory_allocation_policy03
OMPI_MCA_plm_ple_memory_allocation_policy04
OMPI_MCA_plm_ple_memory_allocation_policy05
OMPI_MCA_plm_ple_memory_allocation_policy06
OMPI_MCA_plm_ple_memory_allocation_policy07
)
# String replacement
configure_file(src/${target}.sh.in ihklib-${target} @ONLY)
# Install scripts
install(PROGRAMS ${CMAKE_BINARY_DIR}/ihklib-${target} DESTINATION ${CMAKE_INSTALL_PREFIX_SCRIPTS})
endforeach()
# programs running on McKernel
foreach(target IN ITEMS
check_mempolicy
)
# Add C target
add_executable(${target} src/${target}.c)
# -I
target_include_directories(${target}
PRIVATE "${PROJECT_SOURCE_DIR}/include"
)
# -l
target_link_libraries(${target} PRIVATE numa)
# Install
install(TARGETS ${target} DESTINATION bin)
endforeach()

32
test/issues/1470/README Normal file
View File

@ -0,0 +1,32 @@
============
What to test
============
Check if the mode and nodemask obtained by get_mempolicy() are set to the expected values, with different reserved cpus, cpumask and policy requests.
The following settings are used.
Memory reserved: "1G@4,1G@5,1G@6,1G@7"
CPUs reserved: "12-59", "24-59"
FLIB_AFFINITY_ON_PROCESS:
"12-23", "24-35", "36-47", "48-59",
"12-35", "24-47", "36-59",
"12-47", "24-59",
"12-59"
OMPI_MCA_plm_ple_memory_allocation_policy:
{interleave,bind,prefer}_{local,nonlocal},
{interleave,bind}_all,
localalloc
============
How to build
============
cd <mckernel>/test/issues
mkdir build
cd build
cmake ../1470/ -DCMAKE_INSTALL_PREFIX=<mckernel>/test/issues/install -DWITH_MCK=<mckernel-install> -DWITH_MCK_SRC=<mckernel>
===========
How to test
===========
for i in {1..7}; do <mckernel>/test/issues/install/scripts/ihklib-OMPI_MCA_plm_ple_memory_allocation_policy0$i; done
check if no "[ NG ]" is shown.

View File

@ -0,0 +1,33 @@
#ifndef __OKNG_H_INCLUDED__
#define __OKNG_H_INCLUDED__
#include <stdio.h>
#define _OKNG(verb, jump, cond, fmt, args...) do { \
if (cond) { \
if (verb) \
printf("[ OK ] " fmt, ##args); \
} else { \
printf("[ NG ] " fmt, ##args); \
if (jump) { \
ret = 1; \
goto out; \
} \
} \
} while (0)
#define OKNG(args...) _OKNG(1, 1, ##args)
#define INFO(fmt, args...) printf("[ INFO ] " fmt, ##args)
#define START(fmt, args...) printf("[ START] " fmt, ##args)
#define INTERR(cond, fmt, args...) do { \
if (cond) { \
char msg[4096]; \
sprintf(msg, fmt, ##args); \
printf("[INTERR] %s:%d %s", __FILE__, __LINE__, msg); \
ret = 1; \
goto out; \
} \
} while (0)
#define ARRAY_SIZE_CHECK(array, size) INTERR(sizeof(array)/sizeof(array[0]) != size, "size of array \"%s\" isn't %d\n", #array, size)
#endif

View File

@ -0,0 +1,53 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_local]="3"
mode[bind_local]="2"
mode[prefer_local]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="1"
nodemask[$(seq -s, 24 35)]="2"
nodemask[$(seq -s, 36 47)]="4"
nodemask[$(seq -s, 48 59)]="8"
nodemask[$(seq -s, 12 35)]="3"
nodemask[$(seq -s, 24 47)]="6"
nodemask[$(seq -s, 36 59)]="12"
nodemask[$(seq -s, 12 47)]="7"
nodemask[$(seq -s, 24 59)]="14"
nodemask[$(seq -s, 12 59)]="15"
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_local bind_local prefer_local; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
# check if policy is not set when not specified
if (( i++ == 0 )); then
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m 0 || exit $?
fi
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,48 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_nonlocal]="3"
mode[bind_nonlocal]="2"
mode[prefer_nonlocal]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="14"
nodemask[$(seq -s, 24 35)]="13"
nodemask[$(seq -s, 36 47)]="11"
nodemask[$(seq -s, 48 59)]="7"
nodemask[$(seq -s, 12 35)]="12"
nodemask[$(seq -s, 24 47)]="9"
nodemask[$(seq -s, 36 59)]="3"
nodemask[$(seq -s, 12 47)]="8"
nodemask[$(seq -s, 24 59)]="1"
nodemask[$(seq -s, 12 59)]="0"
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_nonlocal bind_nonlocal prefer_nonlocal; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,34 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_all]="3"
mode[bind_all]="2"
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_all bind_all; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n 15 || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,30 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in localalloc; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m 0 || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,44 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_local]="3"
mode[bind_local]="2"
mode[prefer_local]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="2"
nodemask[$(seq -s, 24 35)]="4"
nodemask[$(seq -s, 36 47)]="8"
nodemask[$(seq -s, 12 35)]="6"
nodemask[$(seq -s, 24 47)]="12"
nodemask[$(seq -s, 12 47)]="14"
# reserve the last 36 cpus
@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_local bind_local prefer_local; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \
$(seq -s, 12 35) $(seq -s, 24 47) \
$(seq -s, 12 47); do
# check nodemask when last 36 cpus are reserved
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,44 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_nonlocal]="3"
mode[bind_nonlocal]="2"
mode[prefer_nonlocal]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="13"
nodemask[$(seq -s, 24 35)]="11"
nodemask[$(seq -s, 36 47)]="7"
nodemask[$(seq -s, 12 35)]="9"
nodemask[$(seq -s, 24 47)]="3"
nodemask[$(seq -s, 12 47)]="1"
# reserve the last 36 cpus
@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_nonlocal bind_nonlocal prefer_nonlocal; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \
$(seq -s, 12 35) $(seq -s, 24 47) \
$(seq -s, 12 47); do
# check nodemask when last 36 cpus are reserved
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,34 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_all]="3"
mode[bind_all]="2"
# reserve the last 36 cpus
@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_all bind_all; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \
$(seq -s, 12 35) $(seq -s, 24 47) \
$(seq -s, 12 47); do
# check nodemask when last 36 cpus are reserved
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n 15 || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,54 @@
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <numaif.h>
#include <okng.h>
#define PLD_PROCESS_NUMA_MASK_BITS 256
int main(int argc, char **argv)
{
long ret;
int mode;
unsigned long nodemask[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)] = { 0 };
int mode_expected = -1;
unsigned long nodemask_expected[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)] = { 0 };
int opt;
while ((opt = getopt(argc, argv, "m:n:")) != -1) {
switch (opt) {
case 'm':
mode_expected = atol(optarg);
break;
case 'n':
nodemask_expected[0] = atoi(optarg);
break;
default: /* '?' */
INTERR(1, "unknown option %c\n", optopt);
}
}
INTERR(mode_expected == -1, "specify -m <mode>\n");
ret = get_mempolicy(&mode, nodemask, PLD_PROCESS_NUMA_MASK_BITS,
NULL, 0);
INTERR(ret, "get_mempolicy failed with %ld\n", ret);
OKNG(mode == mode_expected, "mode: actual (%d), expected (%d)\n",
mode, mode_expected);
/* nodemask is "don't care" when mode is MPOL_DEFAULT */
if (mode_expected != 0) {
OKNG(nodemask[0] == nodemask_expected[0],
"nodemask: actual (%ld), expected (%ld)\n",
nodemask[0],
nodemask_expected[0]);
}
ret = 0;
out:
return ret;
}

View File

@ -0,0 +1,9 @@
BEGIN { id = -1; }
/Node .*, zone\s*(Normal|DMA32)/ { id = substr($2, 1, length($2) - 1); }
{
if ($0 ~ keyword && id != -1) {
printf("id: %d, nr_free_pages: %ld\n", id, $2);
id = -1;
}
}

View File

@ -0,0 +1,13 @@
{
id = substr($2, 1, length($2) - 1);
size = $4;
sizes[id] += size;
}
END {
for (i = 0; i <= id; i++) {
if (sizes[i] * page_size > 2 * 1024 * 1024 * 1024) {
print i;
}
}
}