diff --git a/executer/include/defs.h b/executer/include/defs.h index edccdc70..a5a5d37a 100644 --- a/executer/include/defs.h +++ b/executer/include/defs.h @@ -4671,7 +4671,7 @@ void cmd_ipcs(void); /* ipcs.c */ /* * main.c */ -void main_loop(void); +//void main_loop(void); void exec_command(void); struct command_table_entry *get_command_table_entry(char *); void program_usage(int); diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 4360c0f7..2c1fdd4a 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -113,6 +113,18 @@ typedef unsigned long __cpu_set_unit; #define MPOL_NO_BSS 0x04 #define MPOL_SHM_PREMAP 0x08 +/* should be the same as process.h */ +#define PLD_PROCESS_NUMA_MASK_BITS 256 + +enum { + PLD_MPOL_DEFAULT, + PLD_MPOL_PREFERRED, + PLD_MPOL_BIND, + PLD_MPOL_INTERLEAVE, + PLD_MPOL_LOCAL, + PLD_MPOL_MAX, /* always last member of enum */ +}; + #define PLD_MAGIC 0xcafecafe44332211UL struct program_load_desc { @@ -147,6 +159,10 @@ struct program_load_desc { unsigned long heap_extension; long stack_premap; unsigned long mpol_bind_mask; + int mpol_mode; + unsigned long mpol_nodemask[PLD_PROCESS_NUMA_MASK_BITS / + (sizeof(unsigned long) * 8)]; + int thp_disable; int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */ int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 2f9f5aab..ec19146f 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -84,7 +84,11 @@ #include "../include/pmi.h" #include "../include/qlmpi.h" #include +#include "../include/defs.h" #include "../../lib/include/list.h" +#include "../../lib/include/bitops-set_bit.h" +#include "../../lib/include/bitops-clear_bit.h" +#include "../../lib/include/bitops-test_bit.h" //#define DEBUG #define ADD_ENVS_OPTION @@ -1056,6 +1060,64 @@ static inline cpu_set_t *numa_node_set(int n) return (cpu_set_t *)(numa_nodes + n * cpu_set_size); } +static inline void _numa_local(__cpu_set_unit *localset, + unsigned long *nodemask, int nonlocal) +{ + int i; + + memset(nodemask, 0, PLD_PROCESS_NUMA_MASK_BITS / 8); + + for (i = 0; i < nnodes; i++) { + cpu_set_t *nodeset = numa_node_set(i); + int j; + + if (nonlocal) { + set_bit(i, nodemask); + } + + for (j = 0; j < ncpu; j++) { + if (test_bit(j, localset)) { + __dprintf("%d belongs to local set\n", j); + } + + if (CPU_ISSET_S(j, cpu_set_size, nodeset)) { + __dprintf("%d belongs to node %d\n", j, i); + } + + if (test_bit(j, localset) && + CPU_ISSET_S(j, cpu_set_size, nodeset)) { + if (nonlocal) { + clear_bit(i, nodemask); + } else { + set_bit(i, nodemask); + } + } + } + } +} + +static inline void numa_local(__cpu_set_unit *localset, unsigned long *nodemask) +{ + _numa_local(localset, nodemask, 0); +} + +static inline void numa_nonlocal(__cpu_set_unit *localset, + unsigned long *nodemask) +{ + _numa_local(localset, nodemask, 1); +} + +static inline void numa_all(unsigned long *nodemask) +{ + int i; + + memset(nodemask, 0, PLD_PROCESS_NUMA_MASK_BITS / 8); + + for (i = 0; i < nnodes; i++) { + set_bit(i, nodemask); + } +} + pid_t master_tid; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; @@ -2685,6 +2747,7 @@ int main(int argc, char **argv) desc->heap_extension = heap_extension; desc->mpol_bind_mask = 0; + desc->mpol_mode = PLD_MPOL_MAX; /* not specified */ if (mpol_bind_nodes) { struct bitmask *bind_mask; bind_mask = numa_parse_nodestring_all(mpol_bind_nodes); @@ -2698,6 +2761,54 @@ int main(int argc, char **argv) } } } + /* Fujitsu TCS specific: mempolicy */ + else if (getenv("OMPI_MCA_plm_ple_memory_allocation_policy")) { + char *mpol = + getenv("OMPI_MCA_plm_ple_memory_allocation_policy"); + + __dprintf("OMPI_MCA_plm_ple_memory_allocation_policy: %s\n", + mpol); + + if (!strncmp(mpol, "localalloc", 10)) { + /* MPOL_DEFAULT has the same effect as MPOL_LOCAL */ + desc->mpol_mode = MPOL_DEFAULT; + } + else if (!strncmp(mpol, "interleave_local", 16)) { + desc->mpol_mode = MPOL_INTERLEAVE; + numa_local(desc->cpu_set, desc->mpol_nodemask); + } + else if (!strncmp(mpol, "interleave_nonlocal", 19)) { + desc->mpol_mode = MPOL_INTERLEAVE; + numa_nonlocal(desc->cpu_set, desc->mpol_nodemask); + } + else if (!strncmp(mpol, "interleave_all", 14)) { + desc->mpol_mode = MPOL_INTERLEAVE; + numa_all(desc->mpol_nodemask); + } + else if (!strncmp(mpol, "bind_local", 10)) { + desc->mpol_mode = MPOL_BIND; + numa_local(desc->cpu_set, desc->mpol_nodemask); + } + else if (!strncmp(mpol, "bind_nonlocal", 13)) { + desc->mpol_mode = MPOL_BIND; + numa_nonlocal(desc->cpu_set, desc->mpol_nodemask); + } + else if (!strncmp(mpol, "bind_all", 8)) { + desc->mpol_mode = MPOL_BIND; + numa_all(desc->mpol_nodemask); + } + else if (!strncmp(mpol, "prefer_local", 12)) { + desc->mpol_mode = MPOL_PREFERRED; + numa_local(desc->cpu_set, desc->mpol_nodemask); + } + else if (!strncmp(mpol, "prefer_nonlocal", 15)) { + desc->mpol_mode = MPOL_PREFERRED; + numa_nonlocal(desc->cpu_set, desc->mpol_nodemask); + } + + __dprintf("mpol_mode: %d, mpol_nodemask: %ld\n", + desc->mpol_mode, desc->mpol_nodemask[0]); + } desc->uti_thread_rank = uti_thread_rank; desc->uti_use_last_cpu = uti_use_last_cpu; diff --git a/kernel/host.c b/kernel/host.c index 6718b8be..64a05efb 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -542,6 +542,26 @@ static int process_msg_prepare_process(unsigned long rphys) } vm->numa_mem_policy = MPOL_BIND; } + else if (pn->mpol_mode != MPOL_MAX) { + int bit; + + vm->numa_mem_policy = pn->mpol_mode; + + memset(&vm->numa_mask, 0, sizeof(vm->numa_mask)); + + for_each_set_bit(bit, pn->mpol_nodemask, + PLD_PROCESS_NUMA_MASK_BITS) { + if (bit >= ihk_mc_get_nr_numa_nodes()) { + kprintf("%s: error: NUMA id %d is larger than mask size!\n", + __func__, bit); + return -EINVAL; + } + set_bit(bit, &vm->numa_mask[0]); + } + + dkprintf("%s: numa_mem_policy: %d, numa_mask: %ld\n", + __func__, vm->numa_mem_policy, vm->numa_mask[0]); + } proc->uti_thread_rank = pn->uti_thread_rank; proc->uti_use_last_cpu = pn->uti_use_last_cpu; diff --git a/kernel/include/process.h b/kernel/include/process.h index 2c80509b..269bffca 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -559,6 +559,8 @@ struct process { size_t mpol_threshold; unsigned long heap_extension; unsigned long mpol_bind_mask; + int mpol_mode; + int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */ int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int clone_count; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 2505a64e..de1aad48 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -183,6 +183,18 @@ typedef unsigned long __cpu_set_unit; #define MPOL_NO_BSS 0x04 #define MPOL_SHM_PREMAP 0x08 +/* should be the same as process.h */ +#define PLD_PROCESS_NUMA_MASK_BITS 256 + +enum { + PLD_MPOL_DEFAULT, + PLD_MPOL_PREFERRED, + PLD_MPOL_BIND, + PLD_MPOL_INTERLEAVE, + PLD_MPOL_LOCAL, + PLD_MPOL_MAX, /* always last member of enum */ +}; + #define PLD_MAGIC 0xcafecafe44332211UL struct program_load_desc { @@ -217,6 +229,10 @@ struct program_load_desc { unsigned long heap_extension; long stack_premap; unsigned long mpol_bind_mask; + int mpol_mode; + unsigned long mpol_nodemask[PLD_PROCESS_NUMA_MASK_BITS / + (sizeof(unsigned long) * 8)]; + int thp_disable; int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */ int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ diff --git a/lib/include/bitops-test_bit.h b/lib/include/bitops-test_bit.h new file mode 100644 index 00000000..28edce09 --- /dev/null +++ b/lib/include/bitops-test_bit.h @@ -0,0 +1,11 @@ +#ifndef INCLUDE_BITOPS_TEST_BIT_H +#define INCLUDE_BITOPS_TEST_BIT_H + +static inline int test_bit(int nr, const void *addr) +{ + const uint32_t *p = (const uint32_t *)addr; + + return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; +} + +#endif diff --git a/lib/include/bitops.h b/lib/include/bitops.h index 957b1702..e847e3a4 100644 --- a/lib/include/bitops.h +++ b/lib/include/bitops.h @@ -27,11 +27,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); -static inline int test_bit(int nr, const void *addr) -{ - const uint32_t *p = (const uint32_t *)addr; - return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; -} +#include extern unsigned int __sw_hweight32(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/test/issues/1470/CMakeLists.txt b/test/issues/1470/CMakeLists.txt new file mode 100644 index 00000000..e0e41aeb --- /dev/null +++ b/test/issues/1470/CMakeLists.txt @@ -0,0 +1,123 @@ +execute_process(COMMAND bash -c "gawk '/CPU implementer/ { print \$4; exit; }' /proc/cpuinfo" + OUTPUT_VARIABLE CPU_IMPLEMENTER OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND bash -c "gawk '/CPU architecture/ { print \$3; exit; }' /proc/cpuinfo" + OUTPUT_VARIABLE CPU_ARCH OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND bash -c "gawk '/CPU variant/ { print \$4; exit; }' /proc/cpuinfo" + OUTPUT_VARIABLE CPU_VARIANT OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND bash -c "gawk '/CPU part/ { print \$4; exit; }' /proc/cpuinfo" + OUTPUT_VARIABLE CPU_PART OUTPUT_STRIP_TRAILING_WHITESPACE) + +if(CPU_IMPLEMENTER STREQUAL "0x46" AND CPU_ARCH STREQUAL "8" AND + CPU_VARIANT STREQUAL "0x1" AND CPU_PART STREQUAL "0x001") + message("A64FX detected") + set(CPU_MODEL "a64fx") + add_definitions(-D__a64fx__) +endif() + +# find first NUMA available to user (0 or 4 now) +execute_process(COMMAND bash -c "awk -v keyword=nr_free_pages -f ${CMAKE_CURRENT_SOURCE_DIR}/src/zoneinfo.awk /proc/zoneinfo | awk -v page_size=$(getconf PAGE_SIZE) -f ${CMAKE_CURRENT_SOURCE_DIR}/src/zoneinfo_filter.awk | head -n1" OUTPUT_VARIABLE FIRST_USER_NUMA OUTPUT_STRIP_TRAILING_WHITESPACE) +message("FIRST_USER_NUMA: ${FIRST_USER_NUMA}") +add_definitions(-DFIRST_USER_NUMA=${FIRST_USER_NUMA}) + +if (FIRST_USER_NUMA STREQUAL "4") + execute_process(COMMAND sudo bash -c "echo 0-7 > /sys/fs/cgroup/cpuset/system.slice/cpuset.mems") +endif() + +cmake_policy(SET CMP0005 NEW) + +# Options: -DWITH_MCK= +add_definitions(-DWITH_MCK=${WITH_MCK}) + +# Options: -DWITH_MCK_SRC= +add_definitions(-DWITH_MCK_SRC=${WITH_MCK_SRC}) + +# for autotest +if(NOT DEFINED CMAKE_INSTALL_PREFIX_SCRIPTS) + set(CMAKE_INSTALL_PREFIX_SCRIPTS ${CMAKE_INSTALL_PREFIX}/scripts) +endif() + +cmake_minimum_required(VERSION 3.0) + +project(issue1470 C) + +# CPPFLAGS + +set(UNAME_R ${CMAKE_SYSTEM_VERSION} CACHE STRING "Kernel version to build against") +set(KERNEL_DIR "/lib/modules/${UNAME_R}/build" CACHE STRING "kernel build directory") +execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_64K_PAGES\" { print $2; exit; }" "${KERNEL_DIR}/.config" + OUTPUT_VARIABLE CONFIG_ARM64_64K_PAGES OUTPUT_STRIP_TRAILING_WHITESPACE) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + if(CONFIG_ARM64_64K_PAGES STREQUAL "y") + set(PAGE_SIZE "65536") + else() + set(PAGE_SIZE "4096") + endif() +else() + set(PAGE_SIZE "4096") +endif() +message("PAGE_SIZE: ${PAGE_SIZE}") + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + add_definitions(-DBUILD_TARGET=smp-x86) + add_definitions(-DKMOD_POSTFIX=smp_x86) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + add_definitions(-DBUILD_TARGET=smp-arm64) + add_definitions(-DKMOD_POSTFIX=smp_arm64) +endif() + +add_definitions(-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}) + +# CFLAGS +set(CFLAGS_WARNING "-Wall" "-Wextra" "-Wno-unused-parameter" "-Wno-sign-compare" "-Wno-unused-function" ${EXTRA_WARNINGS} CACHE STRING "Warning flags") +add_compile_options( + -O2 + -g + ${CFLAGS_WARNING} + ) + + +# -L, this must be done before adding dependants +link_directories("${WITH_MCK}/lib64") + +# -Wl,--rpath=, this must be done before adding dependants +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set(CMAKE_INSTALL_RPATH "${WITH_MCK}/lib64") + +# test driver scripts +foreach(target IN ITEMS + OMPI_MCA_plm_ple_memory_allocation_policy01 + OMPI_MCA_plm_ple_memory_allocation_policy02 + OMPI_MCA_plm_ple_memory_allocation_policy03 + OMPI_MCA_plm_ple_memory_allocation_policy04 + OMPI_MCA_plm_ple_memory_allocation_policy05 + OMPI_MCA_plm_ple_memory_allocation_policy06 + OMPI_MCA_plm_ple_memory_allocation_policy07 + ) + + # String replacement + configure_file(src/${target}.sh.in ihklib-${target} @ONLY) + + # Install scripts + install(PROGRAMS ${CMAKE_BINARY_DIR}/ihklib-${target} DESTINATION ${CMAKE_INSTALL_PREFIX_SCRIPTS}) +endforeach() + +# programs running on McKernel +foreach(target IN ITEMS + check_mempolicy + ) + + # Add C target + add_executable(${target} src/${target}.c) + + # -I + target_include_directories(${target} + PRIVATE "${PROJECT_SOURCE_DIR}/include" + ) + + # -l + target_link_libraries(${target} PRIVATE numa) + + # Install + install(TARGETS ${target} DESTINATION bin) + +endforeach() diff --git a/test/issues/1470/README b/test/issues/1470/README new file mode 100644 index 00000000..f2fee8a7 --- /dev/null +++ b/test/issues/1470/README @@ -0,0 +1,32 @@ +============ +What to test +============ +Check if the mode and nodemask obtained by get_mempolicy() are set to the expected values, with different reserved cpus, cpumask and policy requests. +The following settings are used. + + Memory reserved: "1G@4,1G@5,1G@6,1G@7" + + CPUs reserved: "12-59", "24-59" + FLIB_AFFINITY_ON_PROCESS: + "12-23", "24-35", "36-47", "48-59", + "12-35", "24-47", "36-59", + "12-47", "24-59", + "12-59" + OMPI_MCA_plm_ple_memory_allocation_policy: + {interleave,bind,prefer}_{local,nonlocal}, + {interleave,bind}_all, + localalloc + +============ +How to build +============ +cd /test/issues +mkdir build +cd build +cmake ../1470/ -DCMAKE_INSTALL_PREFIX=/test/issues/install -DWITH_MCK= -DWITH_MCK_SRC= + +=========== +How to test +=========== +for i in {1..7}; do /test/issues/install/scripts/ihklib-OMPI_MCA_plm_ple_memory_allocation_policy0$i; done +check if no "[ NG ]" is shown. diff --git a/test/issues/1470/include/okng.h b/test/issues/1470/include/okng.h new file mode 100644 index 00000000..c3cfbebc --- /dev/null +++ b/test/issues/1470/include/okng.h @@ -0,0 +1,33 @@ +#ifndef __OKNG_H_INCLUDED__ +#define __OKNG_H_INCLUDED__ + +#include + +#define _OKNG(verb, jump, cond, fmt, args...) do { \ + if (cond) { \ + if (verb) \ + printf("[ OK ] " fmt, ##args); \ + } else { \ + printf("[ NG ] " fmt, ##args); \ + if (jump) { \ + ret = 1; \ + goto out; \ + } \ + } \ +} while (0) + +#define OKNG(args...) _OKNG(1, 1, ##args) +#define INFO(fmt, args...) printf("[ INFO ] " fmt, ##args) +#define START(fmt, args...) printf("[ START] " fmt, ##args) +#define INTERR(cond, fmt, args...) do { \ + if (cond) { \ + char msg[4096]; \ + sprintf(msg, fmt, ##args); \ + printf("[INTERR] %s:%d %s", __FILE__, __LINE__, msg); \ + ret = 1; \ + goto out; \ + } \ +} while (0) +#define ARRAY_SIZE_CHECK(array, size) INTERR(sizeof(array)/sizeof(array[0]) != size, "size of array \"%s\" isn't %d\n", #array, size) + +#endif diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy01.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy01.sh.in new file mode 100644 index 00000000..899819dd --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy01.sh.in @@ -0,0 +1,53 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +declare -A mode +mode[interleave_local]="3" +mode[bind_local]="2" +mode[prefer_local]="1" + +declare -A nodemask + +nodemask[$(seq -s, 12 23)]="1" +nodemask[$(seq -s, 24 35)]="2" +nodemask[$(seq -s, 36 47)]="4" +nodemask[$(seq -s, 48 59)]="8" +nodemask[$(seq -s, 12 35)]="3" +nodemask[$(seq -s, 24 47)]="6" +nodemask[$(seq -s, 36 59)]="12" +nodemask[$(seq -s, 12 47)]="7" +nodemask[$(seq -s, 24 59)]="14" +nodemask[$(seq -s, 12 59)]="15" + +@WITH_MCK@/sbin/mcstop+release.sh +@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in interleave_local bind_local prefer_local; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \ + $(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \ + $(seq -s, 12 47) $(seq -s, 24 59) \ + $(seq -s, 12 59); do + + # check if policy is not set when not specified + if (( i++ == 0 )); then + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m 0 || exit $? + fi + + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy02.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy02.sh.in new file mode 100644 index 00000000..0285044c --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy02.sh.in @@ -0,0 +1,48 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +declare -A mode +mode[interleave_nonlocal]="3" +mode[bind_nonlocal]="2" +mode[prefer_nonlocal]="1" + +declare -A nodemask + +nodemask[$(seq -s, 12 23)]="14" +nodemask[$(seq -s, 24 35)]="13" +nodemask[$(seq -s, 36 47)]="11" +nodemask[$(seq -s, 48 59)]="7" +nodemask[$(seq -s, 12 35)]="12" +nodemask[$(seq -s, 24 47)]="9" +nodemask[$(seq -s, 36 59)]="3" +nodemask[$(seq -s, 12 47)]="8" +nodemask[$(seq -s, 24 59)]="1" +nodemask[$(seq -s, 12 59)]="0" + +@WITH_MCK@/sbin/mcstop+release.sh +@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in interleave_nonlocal bind_nonlocal prefer_nonlocal; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \ + $(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \ + $(seq -s, 12 47) $(seq -s, 24 59) \ + $(seq -s, 12 59); do + + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy03.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy03.sh.in new file mode 100644 index 00000000..e48dd5d8 --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy03.sh.in @@ -0,0 +1,34 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +declare -A mode +mode[interleave_all]="3" +mode[bind_all]="2" + +@WITH_MCK@/sbin/mcstop+release.sh +@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in interleave_all bind_all; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \ + $(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \ + $(seq -s, 12 47) $(seq -s, 24 59) \ + $(seq -s, 12 59); do + + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n 15 || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy04.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy04.sh.in new file mode 100644 index 00000000..999f7cc5 --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy04.sh.in @@ -0,0 +1,30 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +@WITH_MCK@/sbin/mcstop+release.sh +@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in localalloc; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \ + $(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \ + $(seq -s, 12 47) $(seq -s, 24 59) \ + $(seq -s, 12 59); do + + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m 0 || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy05.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy05.sh.in new file mode 100644 index 00000000..da940333 --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy05.sh.in @@ -0,0 +1,44 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +declare -A mode +mode[interleave_local]="3" +mode[bind_local]="2" +mode[prefer_local]="1" + +declare -A nodemask + +nodemask[$(seq -s, 12 23)]="2" +nodemask[$(seq -s, 24 35)]="4" +nodemask[$(seq -s, 36 47)]="8" +nodemask[$(seq -s, 12 35)]="6" +nodemask[$(seq -s, 24 47)]="12" +nodemask[$(seq -s, 12 47)]="14" + +# reserve the last 36 cpus +@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in interleave_local bind_local prefer_local; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \ + $(seq -s, 12 35) $(seq -s, 24 47) \ + $(seq -s, 12 47); do + + # check nodemask when last 36 cpus are reserved + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy06.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy06.sh.in new file mode 100644 index 00000000..aa55b906 --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy06.sh.in @@ -0,0 +1,44 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +declare -A mode +mode[interleave_nonlocal]="3" +mode[bind_nonlocal]="2" +mode[prefer_nonlocal]="1" + +declare -A nodemask + +nodemask[$(seq -s, 12 23)]="13" +nodemask[$(seq -s, 24 35)]="11" +nodemask[$(seq -s, 36 47)]="7" +nodemask[$(seq -s, 12 35)]="9" +nodemask[$(seq -s, 24 47)]="3" +nodemask[$(seq -s, 12 47)]="1" + +# reserve the last 36 cpus +@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in interleave_nonlocal bind_nonlocal prefer_nonlocal; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \ + $(seq -s, 12 35) $(seq -s, 24 47) \ + $(seq -s, 12 47); do + + # check nodemask when last 36 cpus are reserved + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy07.sh.in b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy07.sh.in new file mode 100644 index 00000000..d9637d98 --- /dev/null +++ b/test/issues/1470/src/OMPI_MCA_plm_ple_memory_allocation_policy07.sh.in @@ -0,0 +1,34 @@ +#!/usr/bin/bash + +# define WORKDIR +SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}") +SCRIPT_NAME="${SCRIPT_PATH##*/}" +TEST_NAME="${SCRIPT_NAME%.sh}" +AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}" +if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then + . ${AUTOTEST_HOME}/bin/config.sh +else + WORKDIR=$(pwd) +fi + +declare -A mode +mode[interleave_all]="3" +mode[bind_all]="2" + +# reserve the last 36 cpus +@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7 + +for policy in interleave_all bind_all; do + for cpuset in \ + $(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \ + $(seq -s, 12 35) $(seq -s, 24 47) \ + $(seq -s, 12 47); do + + # check nodemask when last 36 cpus are reserved + FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n 15 || exit $? + done +done + +@WITH_MCK@/sbin/mcstop+release.sh + +exit 0 diff --git a/test/issues/1470/src/check_mempolicy.c b/test/issues/1470/src/check_mempolicy.c new file mode 100644 index 00000000..77b22434 --- /dev/null +++ b/test/issues/1470/src/check_mempolicy.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include + +#define PLD_PROCESS_NUMA_MASK_BITS 256 + +int main(int argc, char **argv) +{ + long ret; + int mode; + unsigned long nodemask[PLD_PROCESS_NUMA_MASK_BITS / + (sizeof(unsigned long) * 8)] = { 0 }; + int mode_expected = -1; + unsigned long nodemask_expected[PLD_PROCESS_NUMA_MASK_BITS / + (sizeof(unsigned long) * 8)] = { 0 }; + int opt; + + while ((opt = getopt(argc, argv, "m:n:")) != -1) { + switch (opt) { + case 'm': + mode_expected = atol(optarg); + break; + case 'n': + nodemask_expected[0] = atoi(optarg); + break; + default: /* '?' */ + INTERR(1, "unknown option %c\n", optopt); + } + } + + INTERR(mode_expected == -1, "specify -m \n"); + + ret = get_mempolicy(&mode, nodemask, PLD_PROCESS_NUMA_MASK_BITS, + NULL, 0); + INTERR(ret, "get_mempolicy failed with %ld\n", ret); + + OKNG(mode == mode_expected, "mode: actual (%d), expected (%d)\n", + mode, mode_expected); + + /* nodemask is "don't care" when mode is MPOL_DEFAULT */ + if (mode_expected != 0) { + OKNG(nodemask[0] == nodemask_expected[0], + "nodemask: actual (%ld), expected (%ld)\n", + nodemask[0], + nodemask_expected[0]); + } + + ret = 0; + out: + return ret; +} diff --git a/test/issues/1470/src/zoneinfo.awk b/test/issues/1470/src/zoneinfo.awk new file mode 100644 index 00000000..ef3438e8 --- /dev/null +++ b/test/issues/1470/src/zoneinfo.awk @@ -0,0 +1,9 @@ +BEGIN { id = -1; } +/Node .*, zone\s*(Normal|DMA32)/ { id = substr($2, 1, length($2) - 1); } +{ + if ($0 ~ keyword && id != -1) { + printf("id: %d, nr_free_pages: %ld\n", id, $2); + id = -1; + } +} + diff --git a/test/issues/1470/src/zoneinfo_filter.awk b/test/issues/1470/src/zoneinfo_filter.awk new file mode 100644 index 00000000..b21349d5 --- /dev/null +++ b/test/issues/1470/src/zoneinfo_filter.awk @@ -0,0 +1,13 @@ +{ + id = substr($2, 1, length($2) - 1); + size = $4; + sizes[id] += size; +} + +END { + for (i = 0; i <= id; i++) { + if (sizes[i] * page_size > 2 * 1024 * 1024 * 1024) { + print i; + } + } +}