Compare commits

...

30 Commits

Author SHA1 Message Date
d73e6a161c spec: prerelease 0.4 for testing capped best-effort memory reservation
Change-Id: Iec35ea1b7fa6b8930153461c395675f1576042ba
2020-12-29 17:12:14 +09:00
67334b65c3 rus_vm_fault: vmf_insert_pfn: treat VM_FAULT_NOPAGE as success
vmf_insert_pfn is added with the following commit.
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396

Refer to the following page for the meaning of VM_FAULT_NOPAGE.
https://lwn.net/Articles/242237/

Change-Id: I2b0144a20a57c74e0e2e0d2fc24281852f49b717
2020-12-29 16:31:41 +09:00
fe3992a3a2 cmake: add switch to turn on/off Fugaku debug modifications
To prevent "TO RESET: send SIGSTOP instead of SIGV in PF" from making
some tests expecting SIGSEGV fail.

Change-Id: I8bb111cff59fe5b0b2bf6bc652dfd2fa308321ed
2020-12-29 16:31:41 +09:00
5d58100c20 cmake: add switch to turn on/off Fugaku hacks
Change-Id: I2a1ac906a19c4e45ee62acdbf0bc6f77f61974f8
2020-12-29 16:31:41 +09:00
1b106d825c Tofu: fix phys addr calculation for contiguous pages in MBPT/BCH update
Change-Id: I70def9d02bdd7e1e969dedfc277a20df6ed2dff8
2020-12-29 16:31:41 +09:00
a680395093 Tofu: kmalloc cache for stag range
Change-Id: Ib5ea12c7c8cdafa7b699308c4eeb6e9ab39905c7
2020-12-29 16:31:41 +09:00
fd5a1c4b0a TO RESET: send SIGSTOP instead of SIGV in PF
Change-Id: I5f7e07cb89f5f38b7c631d838f0eee0a2a98e246
2020-12-29 16:31:40 +09:00
b3b1883ad8 eclair: turn off gdb pagination by default
Change-Id: I7758d97b90705310bc57cb9b6da6f6af436ea7fb
2020-12-29 16:31:40 +09:00
7145c4d383 TO RESET: stack changes
Change-Id: I325420701dfa5e9eac294be086a9d1e7326d95bc
2020-12-29 16:31:40 +09:00
0b82c8942b Tofu: keep track of stags per memory range
Change-Id: I033beaeee3b141dab4485dd3a2a3848eaa84e54e
2020-12-29 16:31:40 +09:00
75694152f0 Tofu: match page sizes to MBPT and fault PTEs if not present
Change-Id: Ia7aa92005a9941d6399063fec9a0776e73fc88fe
2020-12-29 16:31:40 +09:00
1cf0bd5a78 TO RESET: add debug instruments, map Linux areas for tofu
Change-Id: I09880cad3b87182cb663d414041254817c254759
2020-12-29 16:31:39 +09:00
25943634e9 TO RESET: do_mmap: show debug message when profile is turned on
Change-Id: I18f498f3a8660114b5e038e74179df95a645d232
2020-12-29 16:31:39 +09:00
72f95f92f8 TO RESET: hugefileobj: show debug messages
Change-Id: I904c811c13a59c0db74052bc92f6661a3e1b5d34
2020-12-29 16:31:39 +09:00
ab1014863d TO RESET: page_fault_handler: send SIGSTOP instead of SIGSEGV for debug
Change-Id: Ie281dbf43280464c8f412c8444a6861e43f28beb
2020-12-29 16:31:39 +09:00
4cd7051c2d TO RESET: setup_rt_frame: show debug message
Change-Id: I07d4f2dbba9bdb72f8a2892e6b5bd429b8e0aeec
2020-12-29 16:31:39 +09:00
d5716d3c3a TO RESET: mcctrl_get_request_os_cpu and __mcctrl_os_read_write_cpu_register: show debug messages
Change-Id: Ic8430e3fd6a814b888192233b029c942500a2dc9
2020-12-29 16:31:39 +09:00
2a984a12fe TO RESET: unhandled_page_fault: show instruction address
Change-Id: I29a8d30d9b3e5cfbe5e16b1faaa253e794b8fc5b
2020-12-29 16:31:38 +09:00
3949ab65a8 TO RESET: Add kernel argument to toggle on-demand paging for hugetlbfs map
Change-Id: Id748e0a2afc4ea59142fedb652a15b4007c5dee4
2020-12-29 16:31:33 +09:00
ed923ac82f TO RESET: hugefileobj: pre-allocate on mmap
Set this change to "TO RESET" because one of the Fujitsu tests fails.

Change-Id: Iddc30e8452b3d39da4975079d0c6a035e4f3dbde
2020-12-25 11:34:14 +09:00
191e6f7499 TO RESET: preempt_enable: check if no_preempt isn't negative
Change-Id: I1cef2077c50f3b3020870505dd065d10617f440e
2020-12-25 11:34:14 +09:00
4f7fd90300 TO RESET: lock: check if runq lock is held with IRQs disabled
Change-Id: I9a79ceaf9e399ad3695ed8959ca10c587591751a
2020-12-25 11:34:09 +09:00
8f2c8791bf TO RESET: arm64: enable interrupt on panic
Change-Id: I1ceb321de324f307fc82366b162c72f64184247b
2020-12-24 17:18:37 +09:00
bbfb296c26 TO RESET: mcreboot, mcstop+release.sh: add functions
Change-Id: Ic3992dc4e16b7ade00e93edbd107c64a32068c02
2020-12-24 16:53:27 +09:00
10b17e230c TO RESET: physical memory: free memory consistency checker
Change-Id: I15aa59bb81be4d8f2acfe8d161c8255f70f9e7d3
2020-12-24 16:53:12 +09:00
b268c28e7e TO RESET: mmap: ignore MAP_HUGETLB
Change-Id: Ifd50f24de0747b06d71ebba441ae2ef451f66c4d
2020-12-24 16:51:51 +09:00
2fa1c053d7 spec: prerelease 0.3 for testing ihk_reserve_mem and memory policy
Change-Id: I4fbcfa1f93522fd01af42d1ef13d0be075086773
2020-12-24 15:11:01 +09:00
530110e3a9 Tofu: fix ENABLE_TOFU switching
Change-Id: Ib33323d4b59ea8fb4f5f40dff7ea25a36773d5e2
2020-12-24 15:00:14 +09:00
f6ed44aeec spec: prerelease 0.2 for testing ihk_reserve_mem and memory policy
Change-Id: I9ff171c5d65b5f465ce7a2767be1a710de0a0400
2020-12-24 11:23:17 +09:00
33dd2e60b1 mcexec: memory policy control by environmental variable
Refs: #1470
Change-Id: I3d556cae90d31d81572b1c4e5c680e826577d428
2020-12-24 11:18:01 +09:00
57 changed files with 1853 additions and 39 deletions

View File

@ -10,7 +10,7 @@ project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.1")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "0.1")
set(MCKERNEL_RELEASE "0.4")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
@ -26,10 +26,8 @@ endif()
if (BUILD_TARGET STREQUAL "smp-x86")
set(ARCH "x86_64")
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
elseif (BUILD_TARGET STREQUAL "smp-arm64")
set(ARCH "arm64")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
endif()
include(GNUInstallDirs)
@ -52,6 +50,40 @@ if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
execute_process(COMMAND bash -c "ls -ld /proc/tofu/ 2>/dev/null | wc -l"
OUTPUT_VARIABLE PROC_TOFU OUTPUT_STRIP_TRAILING_WHITESPACE)
if(PROC_TOFU STREQUAL "1")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
else()
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
endif()
if(ENABLE_TOFU)
add_definitions(-DENABLE_TOFU)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_TOFU")
endif()
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" ON)
else()
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" OFF)
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" OFF)
endif()
if(ENABLE_FUGAKU_HACKS)
add_definitions(-DENABLE_FUGAKU_HACKS)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_HACKS")
endif()
if(ENABLE_FUGAKU_DEBUG)
add_definitions(-DENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
@ -255,6 +287,8 @@ message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_TOFU: ${ENABLE_TOFU}")
message("ENABLE_FUGAKU_HACKS: ${ENABLE_FUGAKU_HACKS}")
message("ENABLE_FUGAKU_DEBUG: ${ENABLE_FUGAKU_DEBUG}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")

View File

@ -730,6 +730,49 @@ static void show_context_stack(struct pt_regs *regs)
}
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void handle_IPI(unsigned int vector, struct pt_regs *regs)
{
struct ihk_mc_interrupt_handler *h;
@ -791,6 +834,19 @@ void cpu_safe_halt(void)
cpu_enable_interrupt();
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
extern void __cpu_do_idle(void);
cpu_enable_interrupt();
__cpu_do_idle();
}
#endif
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
@ -856,6 +912,21 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void)
{
unsigned long flags;
unsigned long masked = ICC_PMR_EL1_MASKED;
asm volatile(
"mrs_s %0, " __stringify(ICC_PMR_EL1)
: "=&r" (flags)
:
: "memory");
return (flags == masked);
}
#endif
#else /* defined(CONFIG_HAS_NMI) */
/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_enable */
@ -1377,6 +1448,14 @@ void arch_print_stack(void)
{
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;
return regs->pc;
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;

View File

@ -223,8 +223,12 @@ static int do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
#ifdef ENABLE_TOFU
// XXX: Handle kernel space page faults for Tofu driver
//if (addr < USER_END)
#else
if (addr < USER_END)
#endif
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);

View File

@ -9,6 +9,9 @@
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/debug.h>
#endif
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -31,6 +34,10 @@ typedef struct {
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
#ifdef ENABLE_FUGAKU_HACKS
extern ihk_spinlock_t *get_this_cpu_runq_lock(void);
#endif
extern void preempt_enable(void);
extern void preempt_disable(void);
@ -98,6 +105,18 @@ static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
: "memory");
success = !tmp;
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (success) {
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
}
#endif
#endif
if (!success) {
preempt_enable();
}
@ -182,6 +201,14 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
#endif
#endif
}
#ifdef DEBUG_SPINLOCK

View File

@ -94,7 +94,11 @@ extern char _end[];
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
#ifdef ENABLE_TOFU
# define MAP_VMAP_START UL(0xffff7bdfffff0000)
#else
# define MAP_VMAP_START UL(0xffff780000000000)
#endif
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)

View File

@ -7,6 +7,9 @@
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/monitor.h>
#endif
#include <arch-timer.h>
#include <cls.h>
@ -313,14 +316,27 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
#ifdef ENABLE_FUGAKU_HACKS
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
++v->in_interrupt;
#endif
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
#ifndef ENABLE_FUGAKU_HACKS
handle_IPI(irqnr, regs);
#else
/* Once paniced, only allow CPU stop and NMI IRQs */
if (monitor->status != IHK_OS_MONITOR_PANIC ||
irqnr == INTRID_CPU_STOP ||
irqnr == INTRID_MULTI_NMI) {
handle_IPI(irqnr, regs);
}
#endif
}
irqnr = gic_read_iar();
}
@ -335,7 +351,12 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
#ifndef ENABLE_FUGAKU_HACKS
if (do_check) {
#else
--v->in_interrupt;
if (monitor->status != IHK_OS_MONITOR_PANIC && do_check) {
#endif
check_signal(0, regs, 0);
schedule();
}

View File

@ -217,11 +217,13 @@ static inline int ptl4_index(unsigned long addr)
int idx = (addr >> PTL4_SHIFT) & PTL4_INDEX_MASK;
return idx;
}
#ifdef ENABLE_TOFU
static inline int ptl3_index_linux(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK_LINUX;
return idx;
}
#endif
static inline int ptl3_index(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK;
@ -281,6 +283,7 @@ static inline pte_t* ptl4_offset(const translation_table_t* ptl4, unsigned long
return ptep;
}
#ifdef ENABLE_TOFU
static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
{
pte_t* ptep = NULL;
@ -311,6 +314,7 @@ static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
}
return ptep;
}
#endif
static inline pte_t* ptl3_offset(const pte_t* l4p, unsigned long addr)
{
@ -991,10 +995,12 @@ static void init_normal_area(struct page_table *pt)
tt = get_translation_table(pt);
#ifdef ENABLE_TOFU
setup(tt,
arm64_st_phys_base,
arm64_st_phys_base + (1UL << 40));
return;
#endif
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) {
unsigned long map_start, map_end;
@ -1323,6 +1329,7 @@ out:
return ret;
}
#ifdef ENABLE_TOFU
int ihk_mc_linux_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,
unsigned long *phys,
@ -1373,7 +1380,7 @@ out:
if(size) *size = lsize;
return 0;
}
#endif
int ihk_mc_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,

View File

@ -1071,6 +1071,9 @@ static int setup_rt_frame(int usig, unsigned long rc, int to_restart,
if (k->sa.sa_flags & SA_RESTORER){
regs->regs[30] = (unsigned long)k->sa.sa_restorer;
#ifdef ENABLE_FUGAKU_HACKS
kprintf("%s: SA_RESTORER: 0x%lx\n", __func__, regs->regs[30]);
#endif
} else {
regs->regs[30] = (unsigned long)VDSO_SYMBOL(thread->vm->vdso_addr, sigtramp);
}
@ -1723,6 +1726,7 @@ SYSCALL_DECLARE(mmap)
/* check arguments */
pgsize = PAGE_SIZE;
#ifndef ENABLE_FUGAKU_HACKS
if (flags & MAP_HUGETLB) {
int hugeshift = flags & (0x3F << MAP_HUGE_SHIFT);
@ -1763,6 +1767,11 @@ SYSCALL_DECLARE(mmap)
goto out;
}
}
#else
if (flags & MAP_HUGETLB) {
flags &= ~(MAP_HUGETLB);
}
#endif
#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1))
addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR;

View File

@ -174,13 +174,14 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
arch_show_interrupt_context(regs);
#if 0
#ifdef ENABLE_TOFU
info.si_signo = SIGSTOP;
info.si_errno = 0;
#else
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
#endif
info.si_signo = SIGSTOP;
info.si_errno = 0;
info._sifields._sigfault.si_addr = (void*)regs->pc;
arm64_notify_die("Oops - bad mode", regs, &info, 0);

View File

@ -868,6 +868,49 @@ void show_context_stack(uintptr_t *rbp) {
return;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void interrupt_exit(struct x86_user_context *regs)
{
if (interrupt_from_user(regs)) {
@ -1137,6 +1180,17 @@ void cpu_halt(void)
asm volatile("hlt");
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
cpu_halt();
}
#endif
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@ -1521,6 +1575,16 @@ void arch_print_stack(void)
__print_stack(rbp, 0);
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
return regs->rip;
}
#endif
/*@
@ requires \valid(reg);
@ assigns \nothing;

View File

@ -451,4 +451,12 @@ extern unsigned long ap_trampoline;
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#ifdef ENABLE_FUGAKU_HACKS
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#endif /* !__ASSEMBLY__ */
#endif
#endif

View File

@ -53,5 +53,9 @@ struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);
#ifdef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#define KERNEL_STACK_SIZE LOCALS_SPAN
#endif
#endif

View File

@ -21,7 +21,9 @@
#include <registers.h>
#include <string.h>
#ifndef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#endif
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */

View File

@ -16,9 +16,6 @@
/* whether perf is enabled */
#cmakedefine ENABLE_PERF 1
/* whether built-in tofu driver is enabled */
#cmakedefine ENABLE_TOFU 1
/* whether qlmpi is enabled */
#cmakedefine ENABLE_QLMPI 1

View File

@ -4671,7 +4671,7 @@ void cmd_ipcs(void); /* ipcs.c */
/*
* main.c
*/
void main_loop(void);
//void main_loop(void);
void exec_command(void);
struct command_table_entry *get_command_table_entry(char *);
void program_usage(int);

View File

@ -113,6 +113,18 @@ typedef unsigned long __cpu_set_unit;
#define MPOL_NO_BSS 0x04
#define MPOL_SHM_PREMAP 0x08
/* should be the same as process.h */
#define PLD_PROCESS_NUMA_MASK_BITS 256
enum {
PLD_MPOL_DEFAULT,
PLD_MPOL_PREFERRED,
PLD_MPOL_BIND,
PLD_MPOL_INTERLEAVE,
PLD_MPOL_LOCAL,
PLD_MPOL_MAX, /* always last member of enum */
};
#define PLD_MAGIC 0xcafecafe44332211UL
struct program_load_desc {
@ -147,12 +159,18 @@ struct program_load_desc {
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int mpol_mode;
unsigned long mpol_nodemask[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)];
int thp_disable;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
@ -199,7 +217,9 @@ struct syscall_response {
unsigned long req_thread_status;
long ret;
unsigned long fault_address;
#ifdef ENABLE_TOFU
void *pde_data;
#endif
};
struct syscall_ret_desc {

View File

@ -28,6 +28,7 @@ void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
#endif
#ifdef ENABLE_TOFU
/* Tofu CQ and barrier gate release functions */
struct file_operations *mcctrl_tof_utofu_procfs_ops_cq;
int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
@ -35,6 +36,7 @@ int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
struct file_operations *mcctrl_tof_utofu_procfs_ops_bch;
int (*mcctrl_tof_utofu_release_bch)(struct inode *inode,
struct file *filp);
#endif
int arch_symbols_init(void)
{
@ -52,6 +54,7 @@ int arch_symbols_init(void)
return -EFAULT;
#endif
#ifdef ENABLE_TOFU
mcctrl_tof_utofu_procfs_ops_cq =
(void *)kallsyms_lookup_name("tof_utofu_procfs_ops_cq");
if (WARN_ON(!mcctrl_tof_utofu_procfs_ops_cq))
@ -71,6 +74,7 @@ int arch_symbols_init(void)
(void *)kallsyms_lookup_name("tof_utofu_release_bch");
if (WARN_ON(!mcctrl_tof_utofu_release_bch))
return -EFAULT;
#endif
return 0;
}
@ -360,6 +364,15 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
// page table to translation_table.
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpt, PAGE_SIZE);
#ifdef ENABLE_FUGAKU_HACKS
if (!phys) {
pr_err("%s(): ERROR: VA: 0x%lx, rpt is NULL for PID %d\n",
__func__, rva, task_tgid_vnr(current));
error = -EFAULT;
goto out;
}
#endif
tbl = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
rpa = (unsigned long)tbl->tt_pa;
@ -448,6 +461,7 @@ out:
}
#ifdef ENABLE_TOFU
/*
* Tofu CQ and BCH release handlers
*/
@ -549,3 +563,4 @@ int __mcctrl_tof_utofu_release_bch(struct inode *inode, struct file *filp)
return __mcctrl_tof_utofu_release_handler(inode, filp,
mcctrl_tof_utofu_release_bch);
}
#endif

View File

@ -3582,7 +3582,11 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu)
*ret_cpu = ch->send.queue->read_cpu;
ret = 0;
#ifndef ENABLE_FUGAKU_HACKS
pr_info("%s: OS: %lx, CPU: %d\n",
#else
dprintk("%s: OS: %lx, CPU: %d\n",
#endif
__func__, (unsigned long)os, *ret_cpu);
out_put_ppd:
@ -3646,7 +3650,11 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
/* Notify caller (for future async implementation) */
atomic_set(&desc->sync, 1);
#ifndef ENABLE_FUGAKU_HACKS
dprintk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#else
printk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#endif
__FUNCTION__,
(op == MCCTRL_OS_CPU_READ_REGISTER ? "READ" : "WRITE"), cpu,
desc->addr_ext, desc->val);

View File

@ -50,7 +50,9 @@ extern void procfs_exit(int);
extern void uti_attr_finalize(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
#ifdef ENABLE_TOFU
extern void mcctrl_file_to_pidfd_hash_init(void);
#endif
extern int mcctrl_os_read_cpu_register(ihk_os_t os, int cpu,
struct ihk_os_cpu_register *desc);
@ -233,7 +235,6 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
@ -325,7 +326,9 @@ static int __init mcctrl_init(void)
}
binfmt_mcexec_init();
#ifdef ENABLE_TOFU
mcctrl_file_to_pidfd_hash_init();
#endif
if ((ret = symbols_init()))
goto error;

View File

@ -560,6 +560,7 @@ struct uti_futex_resp {
wait_queue_head_t wq;
};
#ifdef ENABLE_TOFU
/*
* Hash table to keep track of files and related processes
* and file descriptors.
@ -585,3 +586,4 @@ struct mcctrl_file_to_pidfd *mcctrl_file_to_pidfd_hash_lookup(
int mcctrl_file_to_pidfd_hash_remove(struct file *filp,
ihk_os_t os, struct task_struct *group_leader, int fd);
#endif
#endif

View File

@ -692,14 +692,20 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
vma->vm_start, vma->vm_end, pgsize, pix);
}
}
else
else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
error = vmf_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
if (error == VM_FAULT_NOPAGE) {
dprintk("%s: vmf_insert_pfn returned %d\n",
__func__, error);
error = 0;
}
#else
error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
#endif
}
if (error) {
pr_err("%s: vm_insert_pfn returned %d\n",
__func__, error);
@ -1843,6 +1849,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
return ret;
}
#ifdef ENABLE_TOFU
struct list_head mcctrl_file_to_pidfd_hash[MCCTRL_FILE_2_PIDFD_HASH_SIZE];
spinlock_t mcctrl_file_to_pidfd_hash_lock;
@ -1971,7 +1978,7 @@ unlock_out:
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return ret;
}
#endif
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid)
@ -2458,6 +2465,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
dprintk("%s: system call: %lx\n", __FUNCTION__, sc->args[0]);
switch (sc->number) {
#ifdef ENABLE_TOFU
case __NR_close: {
struct fd f;
int fd;
@ -2478,6 +2486,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
break;
}
#endif
case __NR_mmap:
ret = pager_call(os, sc);
break;

View File

@ -1179,7 +1179,7 @@ static int start_gdb(void) {
sprintf(buf, "target remote :%d", ntohs(sin.sin_port));
execlp("gdb", "eclair", "-q", "-ex", "set prompt (eclair) ",
"-ex", buf, opt.kernel_path, NULL);
"-ex", buf, opt.kernel_path, "-ex", "set pagination off", NULL);
perror("execlp");
return 3;
}

View File

@ -68,13 +68,13 @@
#include <sys/user.h>
#endif /* !__aarch64__ */
#include <sys/prctl.h>
#include "../../config.h"
#include "../include/uprotocol.h"
#include <ihk/ihk_host_user.h>
#include "../include/uti.h"
#include <getopt.h>
#include "archdep.h"
#include "arch_args.h"
#include "../../config.h"
#include <numa.h>
#include <numaif.h>
#include <spawn.h>
@ -84,7 +84,11 @@
#include "../include/pmi.h"
#include "../include/qlmpi.h"
#include <sys/xattr.h>
#include "../include/defs.h"
#include "../../lib/include/list.h"
#include "../../lib/include/bitops-set_bit.h"
#include "../../lib/include/bitops-clear_bit.h"
#include "../../lib/include/bitops-test_bit.h"
//#define DEBUG
#define ADD_ENVS_OPTION
@ -200,7 +204,9 @@ static char *mpol_bind_nodes = NULL;
static int uti_thread_rank = 0;
static int uti_use_last_cpu = 0;
static int enable_uti = 0;
#ifdef ENABLE_TOFU
static int enable_tofu = 0;
#endif
/* Partitioned execution (e.g., for MPI) */
static int nr_processes = 0;
@ -1056,6 +1062,64 @@ static inline cpu_set_t *numa_node_set(int n)
return (cpu_set_t *)(numa_nodes + n * cpu_set_size);
}
static inline void _numa_local(__cpu_set_unit *localset,
unsigned long *nodemask, int nonlocal)
{
int i;
memset(nodemask, 0, PLD_PROCESS_NUMA_MASK_BITS / 8);
for (i = 0; i < nnodes; i++) {
cpu_set_t *nodeset = numa_node_set(i);
int j;
if (nonlocal) {
set_bit(i, nodemask);
}
for (j = 0; j < ncpu; j++) {
if (test_bit(j, localset)) {
__dprintf("%d belongs to local set\n", j);
}
if (CPU_ISSET_S(j, cpu_set_size, nodeset)) {
__dprintf("%d belongs to node %d\n", j, i);
}
if (test_bit(j, localset) &&
CPU_ISSET_S(j, cpu_set_size, nodeset)) {
if (nonlocal) {
clear_bit(i, nodemask);
} else {
set_bit(i, nodemask);
}
}
}
}
}
static inline void numa_local(__cpu_set_unit *localset, unsigned long *nodemask)
{
_numa_local(localset, nodemask, 0);
}
static inline void numa_nonlocal(__cpu_set_unit *localset,
unsigned long *nodemask)
{
_numa_local(localset, nodemask, 1);
}
static inline void numa_all(unsigned long *nodemask)
{
int i;
memset(nodemask, 0, PLD_PROCESS_NUMA_MASK_BITS / 8);
for (i = 0; i < nnodes; i++) {
set_bit(i, nodemask);
}
}
pid_t master_tid;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
@ -1725,12 +1789,14 @@ static struct option mcexec_options[] = {
.flag = &enable_uti,
.val = 1,
},
#ifdef ENABLE_TOFU
{
.name = "enable-tofu",
.has_arg = no_argument,
.flag = &enable_tofu,
.val = 1,
},
#endif
{
.name = "debug-mcexec",
.has_arg = no_argument,
@ -2685,6 +2751,7 @@ int main(int argc, char **argv)
desc->heap_extension = heap_extension;
desc->mpol_bind_mask = 0;
desc->mpol_mode = PLD_MPOL_MAX; /* not specified */
if (mpol_bind_nodes) {
struct bitmask *bind_mask;
bind_mask = numa_parse_nodestring_all(mpol_bind_nodes);
@ -2698,6 +2765,54 @@ int main(int argc, char **argv)
}
}
}
/* Fujitsu TCS specific: mempolicy */
else if (getenv("OMPI_MCA_plm_ple_memory_allocation_policy")) {
char *mpol =
getenv("OMPI_MCA_plm_ple_memory_allocation_policy");
__dprintf("OMPI_MCA_plm_ple_memory_allocation_policy: %s\n",
mpol);
if (!strncmp(mpol, "localalloc", 10)) {
/* MPOL_DEFAULT has the same effect as MPOL_LOCAL */
desc->mpol_mode = MPOL_DEFAULT;
}
else if (!strncmp(mpol, "interleave_local", 16)) {
desc->mpol_mode = MPOL_INTERLEAVE;
numa_local(desc->cpu_set, desc->mpol_nodemask);
}
else if (!strncmp(mpol, "interleave_nonlocal", 19)) {
desc->mpol_mode = MPOL_INTERLEAVE;
numa_nonlocal(desc->cpu_set, desc->mpol_nodemask);
}
else if (!strncmp(mpol, "interleave_all", 14)) {
desc->mpol_mode = MPOL_INTERLEAVE;
numa_all(desc->mpol_nodemask);
}
else if (!strncmp(mpol, "bind_local", 10)) {
desc->mpol_mode = MPOL_BIND;
numa_local(desc->cpu_set, desc->mpol_nodemask);
}
else if (!strncmp(mpol, "bind_nonlocal", 13)) {
desc->mpol_mode = MPOL_BIND;
numa_nonlocal(desc->cpu_set, desc->mpol_nodemask);
}
else if (!strncmp(mpol, "bind_all", 8)) {
desc->mpol_mode = MPOL_BIND;
numa_all(desc->mpol_nodemask);
}
else if (!strncmp(mpol, "prefer_local", 12)) {
desc->mpol_mode = MPOL_PREFERRED;
numa_local(desc->cpu_set, desc->mpol_nodemask);
}
else if (!strncmp(mpol, "prefer_nonlocal", 15)) {
desc->mpol_mode = MPOL_PREFERRED;
numa_nonlocal(desc->cpu_set, desc->mpol_nodemask);
}
__dprintf("mpol_mode: %d, mpol_nodemask: %ld\n",
desc->mpol_mode, desc->mpol_nodemask[0]);
}
desc->uti_thread_rank = uti_thread_rank;
desc->uti_use_last_cpu = uti_use_last_cpu;
@ -2705,7 +2820,9 @@ int main(int argc, char **argv)
desc->straight_map = straight_map;
desc->straight_map_threshold = straight_map_threshold;
#ifdef ENABLE_TOFU
desc->enable_tofu = enable_tofu;
#endif
/* user_start and user_end are set by this call */
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {

2
ihk

Submodule ihk updated: 3253a51e5a...675ab08a3c

View File

@ -58,16 +58,43 @@ struct cpu_local_var *get_cpu_local_var(int id)
return clv + id;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked);
#endif
void preempt_enable(void)
{
#ifndef ENABLE_FUGAKU_HACKS
if (cpu_local_var_initialized)
--cpu_local_var(no_preempt);
#else
if (cpu_local_var_initialized) {
--cpu_local_var(no_preempt);
if (cpu_local_var(no_preempt) < 0) {
//cpu_disable_interrupt();
__kprintf("%s: %d\n", __func__, cpu_local_var(no_preempt));
__kprintf("TID: %d, call stack from builtin frame (most recent first):\n",
cpu_local_var(current)->tid);
__show_context_stack(cpu_local_var(current), (uintptr_t)&preempt_enable,
(unsigned long)__builtin_frame_address(0), 1);
//arch_cpu_stop();
//cpu_halt();
#ifdef ENABLE_FUGAKU_HACKS
panic("panic: negative preemption??");
#endif
}
}
#endif
}
void preempt_disable(void)
{
if (cpu_local_var_initialized)
if (cpu_local_var_initialized) {
++cpu_local_var(no_preempt);
}
}
int add_backlog(int (*func)(void *arg), void *arg)
@ -120,3 +147,10 @@ void do_backlog(void)
}
}
}
#ifdef ENABLE_FUGAKU_HACKS
ihk_spinlock_t *get_this_cpu_runq_lock(void)
{
return &get_this_cpu_local_var()->runq_lock;
}
#endif

View File

@ -542,6 +542,26 @@ static int process_msg_prepare_process(unsigned long rphys)
}
vm->numa_mem_policy = MPOL_BIND;
}
else if (pn->mpol_mode != MPOL_MAX) {
int bit;
vm->numa_mem_policy = pn->mpol_mode;
memset(&vm->numa_mask, 0, sizeof(vm->numa_mask));
for_each_set_bit(bit, pn->mpol_nodemask,
PLD_PROCESS_NUMA_MASK_BITS) {
if (bit >= ihk_mc_get_nr_numa_nodes()) {
kprintf("%s: error: NUMA id %d is larger than mask size!\n",
__func__, bit);
return -EINVAL;
}
set_bit(bit, &vm->numa_mask[0]);
}
dkprintf("%s: numa_mem_policy: %d, numa_mask: %ld\n",
__func__, vm->numa_mem_policy, vm->numa_mask[0]);
}
proc->uti_thread_rank = pn->uti_thread_rank;
proc->uti_use_last_cpu = pn->uti_use_last_cpu;
@ -768,7 +788,11 @@ out_remote_pf:
syscall_channel_send(resp_channel, &pckt);
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#else
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#endif
ret = 0;
break;

View File

@ -85,7 +85,11 @@ static int hugefileobj_get_page(struct memobj *memobj, off_t off,
}
memset(obj->pages[pgind], 0, obj->pgsize);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#else
kprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#endif
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
@ -274,13 +278,51 @@ int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
obj->nr_pages = nr_pages;
obj->pages = pages;
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#else
kprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#endif
" for %d pages, pagesize: %lu\n",
__func__,
obj,
virt_addr,
nr_pages,
obj->pgsize);
#ifdef ENABLE_FUGAKU_HACKS
if (!hugetlbfs_on_demand) {
int pgind;
int npages;
#ifndef ENABLE_FUGAKU_HACKS
for (pgind = 0; pgind < obj->nr_pages; ++pgind) {
#else
/* Map in only the last 8 pages */
for (pgind = ((obj->nr_pages > 8) ? (obj->nr_pages - 8) : 0);
pgind < obj->nr_pages; ++pgind) {
#endif
if (obj->pages[pgind]) {
continue;
}
npages = obj->pgsize >> PAGE_SHIFT;
obj->pages[pgind] = ihk_mc_alloc_aligned_pages_user(npages,
obj->pgshift - PTL1_SHIFT,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, 0);
if (!obj->pages[pgind]) {
kprintf("%s: error: could not allocate page for off: %lu"
", page size: %lu\n", __func__, off, obj->pgsize);
continue;
}
memset(obj->pages[pgind], 0, obj->pgsize);
dkprintf("%s: obj: 0x%lx, pre-allocated page for off: %lu"
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
}
#endif
}
obj->memobj.size = len;

View File

@ -106,6 +106,9 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
#ifdef ENABLE_FUGAKU_HACKS
int in_page_fault;
#endif
int no_preempt;
int timer_enabled;
unsigned long nr_ctx_switches;

View File

@ -69,4 +69,7 @@ static inline int page_is_multi_mapped(struct page *page)
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
#ifdef ENABLE_FUGAKU_HACKS
extern int hugetlbfs_on_demand;
#endif
#endif

View File

@ -395,6 +395,9 @@ struct vm_range {
off_t objoff;
int pgshift; /* page size. 0 means THP */
int padding;
#ifdef ENABLE_TOFU
struct list_head tofu_stag_list;
#endif
void *private_data;
};
@ -559,13 +562,17 @@ struct process {
size_t mpol_threshold;
unsigned long heap_extension;
unsigned long mpol_bind_mask;
int mpol_mode;
int uti_thread_rank; /* Spawn on Linux CPU when clone_count reaches this */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int clone_count;
int thp_disable;
int straight_map;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
size_t straight_map_threshold;
// perf_event
@ -590,9 +597,11 @@ struct process {
int coredump_barrier_count, coredump_barrier_count2;
mcs_rwlock_lock_t coredump_lock; // lock for coredump
#ifdef ENABLE_TOFU
#define MAX_FD_PDE 1024
void *fd_pde_data[MAX_FD_PDE];
char *fd_path[MAX_FD_PDE];
#endif
};
/*
@ -751,11 +760,16 @@ struct thread {
struct waitq coredump_wq;
int coredump_status;
#ifdef ENABLE_TOFU
/* Path of file being opened */
char *fd_path_in_open;
#endif
};
#define VM_RANGE_CACHE_SIZE 4
#ifdef ENABLE_TOFU
#define TOFU_STAG_HASH_SIZE 4
#endif
struct process_vm {
struct address_space *address_space;
@ -788,6 +802,12 @@ struct process_vm {
struct vm_range *range_cache[VM_RANGE_CACHE_SIZE];
int range_cache_ind;
struct swapinfo *swapinfo;
#ifdef ENABLE_TOFU
/* Tofu STAG hash */
ihk_spinlock_t tofu_stag_lock;
struct list_head tofu_stag_hash[TOFU_STAG_HASH_SIZE];
#endif
};
static inline int has_cap_ipc_lock(struct thread *th)

View File

@ -183,6 +183,18 @@ typedef unsigned long __cpu_set_unit;
#define MPOL_NO_BSS 0x04
#define MPOL_SHM_PREMAP 0x08
/* should be the same as process.h */
#define PLD_PROCESS_NUMA_MASK_BITS 256
enum {
PLD_MPOL_DEFAULT,
PLD_MPOL_PREFERRED,
PLD_MPOL_BIND,
PLD_MPOL_INTERLEAVE,
PLD_MPOL_LOCAL,
PLD_MPOL_MAX, /* always last member of enum */
};
#define PLD_MAGIC 0xcafecafe44332211UL
struct program_load_desc {
@ -217,12 +229,18 @@ struct program_load_desc {
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int mpol_mode;
unsigned long mpol_nodemask[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)];
int thp_disable;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];

View File

@ -63,6 +63,9 @@ extern int interrupt_from_user(void *);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
#ifdef ENABLE_FUGAKU_HACKS
int hugetlbfs_on_demand;
#endif
int sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
static struct ihk_mc_pa_ops *pa_ops;
@ -744,7 +747,11 @@ distance_based:
}
else {
if (i == 0)
#ifndef ENABLE_FUGAKU_HACKS
kprintf("%s: distance: CPU @ node %d failed to allocate "
#else
dkprintf("%s: distance: CPU @ node %d failed to allocate "
#endif
"%d pages from node %d\n",
__FUNCTION__,
ihk_mc_get_numa_id(),
@ -951,6 +958,9 @@ static void query_free_mem_interrupt_handler(void *priv)
}
kprintf("McKernel free pages in total: %d\n", pages);
#ifdef ENABLE_FUGAKU_HACKS
panic("PANIC");
#endif
if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void);
@ -1286,6 +1296,9 @@ void tlb_flush_handler(int vector)
}
#endif // PROFILE_ENABLE
}
#ifdef ENABLE_FUGAKU_HACKS
extern unsigned long arch_get_instruction_address(const void *reg);
#endif
static void unhandled_page_fault(struct thread *thread, void *fault_addr,
uint64_t reason, void *regs)
@ -1317,6 +1330,22 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
__kprintf("address is out of range!\n");
}
#ifdef ENABLE_FUGAKU_HACKS
{
unsigned long pc = arch_get_instruction_address(regs);
range = lookup_process_memory_range(vm, pc, pc + 1);
if (range) {
__kprintf("PC: 0x%lx (%lx in %s)\n",
pc,
(range->memobj && range->memobj->flags & MF_REG_FILE) ?
pc - range->start + range->objoff :
pc - range->start,
(range->memobj && range->memobj->path) ?
range->memobj->path : "(unknown)");
}
}
#endif
kprintf_unlock(irqflags);
/* TODO */
@ -1324,7 +1353,13 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
#ifndef ENABLE_FUGAKU_HACKS
panic("panic: kernel mode PF");
#else
kprintf("panic: kernel mode PF");
for (;;) cpu_pause();
//panic("panic: kernel mode PF");
#endif
}
//dkprintf("now dump a core file\n");
@ -1360,6 +1395,20 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
__FUNCTION__, fault_addr, reason, regs);
preempt_disable();
#ifdef ENABLE_FUGAKU_HACKS
++cpu_local_var(in_page_fault);
if (cpu_local_var(in_page_fault) > 1) {
kprintf("%s: PF in PF??\n", __func__);
cpu_disable_interrupt();
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
panic("panic: kernel mode PF in PF");
}
while (1) {
panic("PANIC");
}
}
#endif
cpu_enable_interrupt();
@ -1427,6 +1476,13 @@ out_linux:
reason, error);
unhandled_page_fault(thread, fault_addr, reason, regs);
preempt_enable();
#ifdef ENABLE_FUGAKU_DEBUG
kprintf("%s: sending SIGSTOP to TID: %d\n", __func__, thread->tid);
do_kill(thread, thread->proc->pid, thread->tid, SIGSTOP, NULL, 0);
goto out;
#endif
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
@ -1455,6 +1511,9 @@ out_linux:
out_ok:
#endif
error = 0;
#ifdef ENABLE_FUGAKU_HACKS
--cpu_local_var(in_page_fault);
#endif
preempt_enable();
out:
dkprintf("%s: addr: %p, reason: %lx, regs: %p -> error: %d\n",
@ -2041,6 +2100,13 @@ void mem_init(void)
anon_on_demand = 1;
}
#ifdef ENABLE_FUGAKU_HACKS
if (find_command_line("hugetlbfs_on_demand")) {
kprintf("Demand paging on hugetlbfs mappings enabled.\n");
hugetlbfs_on_demand = 1;
}
#endif
/* Init distance vectors */
numa_distances_init();
}

View File

@ -36,6 +36,9 @@
#include <rusage_private.h>
#include <ihk/monitor.h>
#include <ihk/debug.h>
#ifdef ENABLE_TOFU
#include <tofu/tofu_stag_range.h>
#endif
//#define DEBUG_PRINT_PROCESS
@ -269,6 +272,12 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
}
vm->range_cache_ind = 0;
#ifdef ENABLE_TOFU
ihk_mc_spinlock_init(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&vm->tofu_stag_hash[i]);
}
#endif
return 0;
}
@ -955,6 +964,11 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
newrange->pgshift = range->pgshift;
newrange->private_data = range->private_data;
#ifdef ENABLE_TOFU
/* TODO: figure out which entries to put on which list! */
INIT_LIST_HEAD(&newrange->tofu_stag_list);
#endif
if (range->memobj) {
memobj_ref(range->memobj);
newrange->memobj = range->memobj;
@ -1023,6 +1037,28 @@ int join_process_memory_range(struct process_vm *vm,
if (vm->range_cache[i] == merging)
vm->range_cache[i] = surviving;
}
#ifdef ENABLE_TOFU
/* Move Tofu stag range entries */
if (vm->proc->enable_tofu) {
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&merging->tofu_stag_list, list) {
list_del(&tsr->list);
list_add_tail(&tsr->list, &surviving->tofu_stag_list);
dkprintf("%s: stag: %d @ %p:%lu moved in VM range merge\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
#endif
kfree(merging);
error = 0;
@ -1137,6 +1173,24 @@ static int free_process_memory_range(struct process_vm *vm,
}
straight_out:
#ifdef ENABLE_TOFU
if (vm->proc->enable_tofu) {
int entries;
extern int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range);
entries = tofu_stag_range_remove_overlapping(vm, range);
if (entries > 0) {
kprintf("%s: removed %d Tofu stag entries for range 0x%lx:%lu\n",
__func__,
entries,
range->start,
range->end - range->start);
}
}
#endif
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) {
if (vm->range_cache[i] == range)
@ -1428,6 +1482,9 @@ int add_process_memory_range(struct process_vm *vm,
range->pgshift = pgshift;
range->private_data = NULL;
range->straight_start = 0;
#ifdef ENABLE_TOFU
INIT_LIST_HEAD(&range->tofu_stag_list);
#endif
rc = 0;
if (phys == NOPHYS) {
@ -2521,6 +2578,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
__FUNCTION__, size, minsz,
ap_flag ? "(IHK_MC_AP_USER)" : "");
#ifdef ENABLE_FUGAKU_HACKS
/*
* XXX: Fugaku: Fujitsu's runtime remaps the stack
* using hugetlbfs so don't bother allocating too much here..
*/
minsz = 8*1024*1024;
#endif
stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT,
USER_STACK_PAGE_P2ALIGN,
IHK_MC_AP_NOWAIT | ap_flag,

View File

@ -204,6 +204,14 @@ long do_syscall(struct syscall_request *req, int cpu)
++thread->in_syscall_offload;
}
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (req->number == __NR_write && req->args[0] == 1) {
return req->args[2];
}
#endif
#endif
/* The current thread is the requester */
req->rtid = cpu_local_var(current)->tid;
@ -220,7 +228,9 @@ long do_syscall(struct syscall_request *req, int cpu)
req->ttid = 0;
}
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
#ifdef ENABLE_TOFU
res.pde_data = NULL;
#endif
send_syscall(req, cpu, &res);
if (req->rtid == -1) {
@ -381,6 +391,7 @@ long do_syscall(struct syscall_request *req, int cpu)
rc = res.ret;
#ifdef ENABLE_TOFU
if ((req->number == __NR_ioctl && rc == 0) ||
(req->number == __NR_openat && rc > 0)) {
int fd = req->number == __NR_ioctl ? req->args[0] : rc;
@ -407,6 +418,7 @@ long do_syscall(struct syscall_request *req, int cpu)
res.pde_data);
}
}
#endif
if(req->number != __NR_exit_group){
--thread->in_syscall_offload;
@ -1378,6 +1390,15 @@ void terminate(int rc, int sig)
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
vm = proc->vm;
#ifdef ENABLE_TOFU
if (proc->enable_tofu) {
extern void tof_utofu_finalize();
tof_utofu_finalize();
}
#endif
free_all_process_memory_range(vm);
if (proc->saved_cmdline) {
@ -1528,7 +1549,9 @@ int process_cleanup_before_terminate(int pid)
{
struct process *proc;
struct mcs_rwlock_node_irqsave lock;
#ifdef ENABLE_TOFU
int fd;
#endif
proc = find_process(pid, &lock);
if (!proc) {
@ -1536,10 +1559,12 @@ int process_cleanup_before_terminate(int pid)
return 0;
}
#ifdef ENABLE_TOFU
/* Clean up PDE file descriptors */
for (fd = 2; fd < MAX_FD_PDE; ++fd) {
__process_cleanup_fd(proc, fd);
}
#endif
process_unlock(proc, &lock);
return 0;
@ -1922,6 +1947,10 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
__FUNCTION__, proc->straight_va, range->pgshift);
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
#ifdef ENABLE_FUGAKU_HACKS
if (1) { // Un-safe mapping of covering physical range
#endif
error = ihk_mc_pt_set_range(proc->vm->address_space->page_table,
proc->vm,
(void *)range->start,
@ -1948,6 +1977,90 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
proc->straight_pa,
psize,
proc->straight_map_threshold);
#ifdef ENABLE_FUGAKU_HACKS
}
else { // Safe mapping of only LWK memory ranges
size_t max_pgsize = 0;
size_t min_pgsize = 0xFFFFFFFFFFFFFFFF;
/*
* Iterate LWK phsyical memory chunks and map them to their
* corresponding offset in the straight range using the largest
* suitable pages.
*/
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
unsigned long start, end, pa;
void *va, *va_end;
size_t pgsize;
int pg2align;
ihk_mc_get_memory_chunk(i, &start, &end, NULL);
va = proc->straight_va + (start - straight_pa_start);
va_end = va + (end - start);
pa = start;
while (va < va_end) {
pgsize = (va_end - va) + 1;
retry:
error = arch_get_smaller_page_size(NULL, pgsize,
&pgsize, &pg2align);
if (error) {
ekprintf("%s: arch_get_smaller_page_size() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
/* Are virtual or physical not page aligned for this size? */
if (((unsigned long)va & (pgsize - 1)) ||
(pa & (pgsize - 1))) {
goto retry;
}
error = ihk_mc_pt_set_range(
proc->vm->address_space->page_table,
proc->vm,
va,
va + pgsize,
pa,
ptattr,
pg2align + PAGE_SHIFT,
range,
0);
if (error) {
kprintf("%s: ihk_mc_pt_set_range() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
if (pgsize > max_pgsize)
max_pgsize = pgsize;
if (pgsize < min_pgsize)
min_pgsize = pgsize;
va += pgsize;
pa += pgsize;
}
}
region->map_end = (unsigned long)proc->straight_va +
proc->straight_len;
proc->straight_pa = straight_pa_start;
kprintf("%s: straight mapping: 0x%lx:%lu @ "
"min_pgsize: %lu, max_pgsize: %lu\n",
__FUNCTION__,
proc->straight_va,
proc->straight_len,
min_pgsize,
max_pgsize);
}
#endif
}
straight_out:
@ -2276,8 +2389,15 @@ straight_out:
range->straight_start =
(unsigned long)proc->straight_va +
(straight_phys - proc->straight_pa);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx\n",
__FUNCTION__, addr, len, range->straight_start);
__FUNCTION__, addr, len, range->straight_start);
#else
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx"
" (phys: 0x%lx)\n",
__FUNCTION__, addr, len, range->straight_start,
straight_phys);
#endif
if (!zero_at_free) {
memset((void *)phys_to_virt(straight_phys), 0, len);
}
@ -2377,11 +2497,20 @@ out:
if (memobj) {
memobj_unref(memobj);
}
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#else
if (cpu_local_var(current)->profile) {
kprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#endif
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
__FUNCTION__,
addr, len, addr0, len0, prot, flags,
fd, off0, error, addr);
#ifdef ENABLE_FUGAKU_HACKS
}
#endif
return !error ?
(range->straight_start ? range->straight_start : addr) :
@ -2418,6 +2547,11 @@ SYSCALL_DECLARE(munmap)
out:
dkprintf("[%d]sys_munmap(%lx,%lx): %d\n",
ihk_mc_get_processor_id(), addr, len0, error);
#ifdef ENABLE_FUGAKU_HACKS
if (error) {
kprintf("%s: error: %d\n", __func__, error);
}
#endif
return error;
}
@ -3978,7 +4112,9 @@ SYSCALL_DECLARE(open)
goto out;
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = pathname;
#endif
dkprintf("open(): pathname=%s\n", pathname);
if (!strncmp(pathname, XPMEM_DEV_PATH, len)) {
@ -3987,15 +4123,21 @@ SYSCALL_DECLARE(open)
rc = syscall_generic_forwarding(__NR_open, ctx);
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = NULL;
#endif
out:
#ifdef ENABLE_TOFU
if (rc > 0 && rc < MAX_FD_PDE) {
cpu_local_var(current)->proc->fd_path[rc] = pathname;
}
else {
kfree(pathname);
}
#else
kfree(pathname);
#endif
return rc;
}
@ -4023,7 +4165,9 @@ SYSCALL_DECLARE(openat)
goto out;
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = pathname;
#endif
dkprintf("openat(): pathname=%s\n", pathname);
if (!strncmp(pathname, XPMEM_DEV_PATH, len)) {
@ -4032,15 +4176,21 @@ SYSCALL_DECLARE(openat)
rc = syscall_generic_forwarding(__NR_openat, ctx);
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = NULL;
#endif
out:
#ifdef ENABLE_TOFU
if (rc > 0 && rc < MAX_FD_PDE) {
cpu_local_var(current)->proc->fd_path[rc] = pathname;
}
else {
kfree(pathname);
}
#else
kfree(pathname);
#endif
return rc;
}

View File

@ -24,6 +24,7 @@
struct kmalloc_cache_header tofu_scatterlist_cache[8];
struct kmalloc_cache_header tofu_mbpt_cache[8];
struct ihk_mc_page_cache_header tofu_mbpt_sg_pages_cache[8];
struct kmalloc_cache_header tofu_stag_range_cache[8];
typedef ihk_spinlock_t spinlock_t;
@ -43,6 +44,124 @@ typedef void (*tof_core_signal_handler)(int, int, uint64_t, uint64_t);
#include <tofu/tofu_generated-tof_utofu_bg.h>
#include <tofu/tofu_generated-tof_utofu_mbpt.h>
#include <tofu/tofu_stag_range.h>
/*
* Tofu STAG regions list keeps track of stags in a given VM range..
* Per-process tree is protected by process' vm_range_lock.
*/
int tof_utofu_stag_range_insert(struct process_vm *vm,
struct vm_range *range,
uintptr_t start, uintptr_t end,
struct tof_utofu_cq *ucq, int stag)
{
struct tofu_stag_range *tsr; // = kmalloc(sizeof(*tsr), IHK_MC_AP_NOWAIT);
tsr = kmalloc_cache_alloc(&tofu_stag_range_cache[ihk_mc_get_numa_id()],
sizeof(*tsr));
if (!tsr) {
kprintf("%s: error: allocating tofu_stag_range\n", __func__);
return -ENOMEM;
}
tsr->start = start;
tsr->end = end;
tsr->ucq = ucq;
tsr->stag = stag;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_add_tail(&tsr->list, &range->tofu_stag_list);
list_add_tail(&tsr->hash, &vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE]);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
return 0;
}
struct tofu_stag_range *tofu_stag_range_lookup_by_stag(struct process_vm *vm,
int stag)
{
struct tofu_stag_range *tsr;
struct tofu_stag_range *match = NULL;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry(tsr,
&vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE], hash) {
if (tsr->stag == stag) {
match = tsr;
break;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return match;
}
/* XXX: vm->tofu_stag_lock must be held */
void __tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
list_del(&tsr->list);
list_del(&tsr->hash);
//kfree(tsr);
kmalloc_cache_free(tsr);
}
void tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
__tofu_stag_range_remove(vm, tsr);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
static int tof_utofu_free_stag(struct tof_utofu_cq *ucq, int stag);
int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range)
{
struct tofu_stag_range *tsr, *next;
int entries = 0;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&range->tofu_stag_list, list) {
dkprintf("%s: stag: %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
linux_spin_lock(&tsr->ucq->trans.mru_lock);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock(&tsr->ucq->trans.mru_lock);
__tofu_stag_range_remove(vm, tsr);
++entries;
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return entries;
}
#define TOF_UTOFU_VERSION TOF_UAPI_VERSION
#define TOF_UTOFU_NUM_STAG_NTYPES 3
#define TOF_UTOFU_NUM_STAG_BITS(size) ((size) + 13)
@ -146,6 +265,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
else {
*_pgszbits = PTL2_SHIFT;
*_pgszbits = PTL1_CONT_SHIFT;
}
return 0;
}
@ -171,7 +291,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
}
#if 0
#if 1
/* Tofu only support 64kB and 2MB pages */
if (min_shift > PTL1_CONT_SHIFT)
min_shift = PTL1_CONT_SHIFT;
@ -647,7 +767,6 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
//struct page *page;
struct process *proc = cpu_local_var(current)->proc;
uintptr_t iova = 0, va;
int ret;
unsigned long phys = 0;
/* Special case for straight mapping */
@ -697,6 +816,8 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
}
for(va = start; va < end; va += pgsz, ix++){
size_t psize;
pte_t *ptep;
if (tof_utofu_mbpt_is_enabled(mbpt, ix)) {
/* this page is already mapped to mbpt */
@ -715,15 +836,18 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
// return -ENOMEM;
//}
ret = ihk_mc_pt_virt_to_phys(
cpu_local_var(current)->vm->address_space->page_table,
(void *)va, &phys);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)va, 0, NULL, &psize, NULL);
if (ret) {
raw_rc_output(ret);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, va);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
(va & (psize - 1));
//iova = tof_smmu_get_ipa_cq(ucq->tni, ucq->cqid,
// pfn_to_kaddr(page_to_pfn(page)), pgsz);
//if (iova == 0) {
@ -1012,6 +1136,7 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
size_t pgsz;
int ret = -ENOTSUPP;
unsigned long irqflags;
struct vm_range *range = NULL;
ucq = container_of(dev, struct tof_utofu_cq, common);
if(!ucq->common.enabled){
@ -1033,7 +1158,46 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
}
readonly = (req.flags & 1) != 0;
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
/* Assume smallest page size at first */
start = round_down((uintptr_t)req.va, PAGE_SIZE);
end = round_up((uintptr_t)req.va + req.len, PAGE_SIZE);
/* Find range, straight mapping special lookup */
if (vm->proc->straight_va &&
start >= (unsigned long)vm->proc->straight_va &&
end <= ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len) &&
!(start == (unsigned long)vm->proc->straight_va &&
end == ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len))) {
struct vm_range *range_iter;
range_iter = lookup_process_memory_range(vm, 0, -1);
while (range_iter) {
if (range_iter->straight_start &&
start >= range_iter->straight_start &&
start < (range_iter->straight_start +
(range_iter->end - range_iter->start))) {
range = range_iter;
break;
}
range_iter = next_process_memory_range(vm, range_iter);
}
}
else {
range = lookup_process_memory_range(vm, start, end);
}
if (!range) {
ret = -EINVAL;
goto unlock_out;
}
pgszbits = PAGE_SHIFT;
if (req.flags & TOF_UTOFU_ALLOC_STAG_LPG) {
ret = tof_utofu_get_pagesize_locked((uintptr_t)req.va,
@ -1109,6 +1273,12 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
//up(&ucq->ucq_sem);
ihk_mc_spinlock_unlock_noirq(&tofu_tni_cq_lock[ucq->tni][ucq->cqid]);
if (ret == 0) {
tof_utofu_stag_range_insert(vm, range, start, end, ucq, req.stag);
}
unlock_out:
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
if(ret == 0){
@ -1332,6 +1502,21 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
ret = tof_utofu_free_stag(ucq, stags[i]);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
{
struct tofu_stag_range *tsr;
tsr = tofu_stag_range_lookup_by_stag(
cpu_local_var(current)->vm, stags[i]);
if (tsr) {
tofu_stag_range_remove(cpu_local_var(current)->vm, tsr);
}
else {
kprintf("%s: no stag range object for %d??\n", __func__, stags[i]);
}
}
if(ret == 0){
stags[i] = -1;
}
@ -1374,9 +1559,11 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
void tof_utofu_release_cq(void *pde_data)
{
struct tof_utofu_cq *ucq;
int stag;
//int stag;
struct tof_utofu_device *dev;
unsigned long irqflags;
struct process_vm *vm = cpu_local_var(current)->vm;
int do_free = 1;
dev = (struct tof_utofu_device *)pde_data;
ucq = container_of(dev, struct tof_utofu_cq, common);
@ -1384,13 +1571,43 @@ void tof_utofu_release_cq(void *pde_data)
if (!ucq->common.enabled) {
kprintf("%s: UCQ TNI %d, CQ %d is disabled\n",
__func__, ucq->tni, ucq->cqid);
return;
do_free = 0;
}
#if 0
for (stag = 0; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag++) {
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(ucq, stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
#endif
{
int i;
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
if (tsr->ucq != ucq)
continue;
if (do_free) {
dkprintf("%s: removing stag %d for TNI %d CQ %d\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
}
else {
kprintf("%s: WARNING: could not free stag %d for TNI %d CQ %d (UCQ is disabled)\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
}
__tofu_stag_range_remove(vm, tsr);
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
dkprintf("%s: UCQ (pde: %p) TNI %d, CQ %d\n",
@ -1829,14 +2046,22 @@ static int tof_utofu_ioctl_enable_bch(struct tof_utofu_device *dev, unsigned lon
}
if (!phys) {
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
(void *)req.addr, &phys);
size_t psize;
pte_t *ptep;
if (ret) {
raw_rc_output(ret);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)req.addr, 0, NULL, &psize, NULL);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, req.addr);
raw_rc_output(-ENOMEM);
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
((uint64_t)req.addr & (psize - 1));
}
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
@ -2068,6 +2293,7 @@ void tof_utofu_init_globals(void)
memset(tofu_scatterlist_cache, 0, sizeof(tofu_scatterlist_cache));
memset(tofu_mbpt_cache, 0, sizeof(tofu_mbpt_cache));
memset(tofu_mbpt_sg_pages_cache, 0, sizeof(tofu_mbpt_sg_pages_cache));
memset(tofu_stag_range_cache, 0, sizeof(tofu_stag_range_cache));
{
int tni, cq;
@ -2120,6 +2346,24 @@ void tof_utofu_finalize(void)
{
struct tofu_globals *tg = ihk_mc_get_tofu_globals();
/* Could be called from idle.. */
if (cpu_local_var(current)->proc->enable_tofu) {
int i;
struct process_vm *vm = cpu_local_var(current)->vm;
struct tofu_stag_range *tsr, *next;
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
dkprintf("%s: WARNING: stray stag %d for TNI %d CQ %d?\n",
__func__, tsr->stag, tsr->ucq->tni, tsr->ucq->cqid);
}
}
kprintf("%s: STAG processing done\n", __func__);
}
ihk_mc_clear_kernel_range((void *)tg->linux_vmalloc_start,
(void *)tg->linux_vmalloc_end);
}

View File

@ -22,12 +22,18 @@ void panic(const char *msg)
arch_print_stack();
#ifndef ENABLE_FUGAKU_HACKS
/* do not assume anything after this is executed */
arch_cpu_stop();
while (1) {
cpu_halt();
}
#else
while (1) {
cpu_halt_panic();
}
#endif
}
extern void arch_show_interrupt_context(const void*);

View File

@ -0,0 +1,11 @@
#ifndef INCLUDE_BITOPS_TEST_BIT_H
#define INCLUDE_BITOPS_TEST_BIT_H
static inline int test_bit(int nr, const void *addr)
{
const uint32_t *p = (const uint32_t *)addr;
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
}
#endif

View File

@ -27,11 +27,7 @@ unsigned long find_first_bit(const unsigned long *addr,
unsigned long find_first_zero_bit(const unsigned long *addr,
unsigned long size);
static inline int test_bit(int nr, const void *addr)
{
const uint32_t *p = (const uint32_t *)addr;
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
}
#include <bitops-test_bit.h>
extern unsigned int __sw_hweight32(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);

View File

@ -23,7 +23,13 @@ extern int num_processors;
void cpu_enable_interrupt(void);
void cpu_disable_interrupt(void);
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void);
#endif
void cpu_halt(void);
#ifdef ENABLE_FUGAKU_HACKS
void cpu_halt_panic(void);
#endif
void cpu_safe_halt(void);
void cpu_restore_interrupt(unsigned long);
void cpu_pause(void);

View File

@ -227,9 +227,11 @@ int ihk_mc_get_memory_chunk(int id,
unsigned long *start,
unsigned long *end,
int *numa_id);
#ifdef ENABLE_TOFU
int ihk_mc_get_memory_chunk_dma_addr(int id,
int tni, int cqid,
uintptr_t *dma_addr);
#endif
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);

View File

@ -332,6 +332,23 @@ int deferred_zero_at_free = 1;
* of their corresponding memory (i.e., they are on the free memory chunk itself).
*/
#ifdef ENABLE_FUGAKU_HACKS
size_t __count_free_bytes(struct rb_root *root)
{
struct free_chunk *chunk;
struct rb_node *node;
size_t size = 0;
for (node = rb_first(root); node; node = rb_next(node)) {
chunk = container_of(node, struct free_chunk, node);
size += chunk->size;
}
return size;
}
#endif
/*
* Free pages.
* NOTE: locking must be managed by the caller.

View File

@ -0,0 +1,99 @@
#!/bin/bash
# IHK/McKernel user priviledge reboot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2019 RIKEN
#
prefix="@prefix@"
BINDIR="${prefix}/bin"
SBINDIR="${prefix}/sbin"
KERNDIR="@MCKERNELDIR@"
mem=""
cpus=""
ikc_map=""
while getopts c:m:r: OPT
do
case ${OPT} in
c) cpus=${OPTARG}
;;
m) mem=${OPTARG}
;;
r) ikc_map=${OPTARG}
;;
\?) exit 1
;;
esac
done
if [ "${ikc_map}" == "" ]; then
# Query IKC map
if ! ${SBINDIR}/ihkosctl 0 get ikc_map > /dev/null; then
echo "error: querying IKC map" >&2
exit 1
fi
ikc_map=`${SBINDIR}/ihkosctl 0 get ikc_map`
fi
# Shutdown OS
if ! ${SBINDIR}/ihkosctl 0 shutdown; then
echo "error: shuting down OS" >&2
exit 1
fi
sleep 2
# Query IHK-SMP resources and reassign
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
echo "error: querying cpus" >&2
exit 1
fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if [ "${cpus}" == "" ]; then
echo "error: querying CPUs" >&2
exit 1
fi
# Assign CPUs
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then
echo "error: assign CPUs" >&2
exit 1
fi
# Assign memory
for i in `seq 0 15`; do
if ! ${SBINDIR}/ihkosctl 0 assign mem all@${i}; then
echo "error: assign memory" >&2
exit 1
fi
done
if [ "${ikc_map}" != "" ]; then
# Set IKC map
if ! ${SBINDIR}/ihkosctl 0 set ikc_map ${ikc_map}; then
echo "error: setting IKC map" >&2
exit 1
fi
fi
# Load kernel image
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
echo "error: loading kernel image: ${KERNDIR}/mckernel.img" >&2
exit 1
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos dump_level=24"; then
echo "error: setting kernel arguments" >&2
exit 1
fi
# Boot OS instance
if ! ${SBINDIR}/ihkosctl 0 boot; then
echo "error: booting" >&2
exit 1
fi

View File

@ -65,8 +65,9 @@ umask_old=`umask`
idle_halt=""
allow_oversubscribe=""
time_sharing="time_sharing"
force_reserve="no"
while getopts stk:c:m:o:f:r:q:i:d:e:hOT: OPT
while getopts stk:c:m:o:f:r:q:i:d:e:hROT: OPT
do
case ${OPT} in
f) facility=${OPTARG}
@ -97,6 +98,8 @@ do
;;
O) allow_oversubscribe="allow_oversubscribe"
;;
R) force_reserve="yes"
;;
T)
case ${OPTARG} in
1) time_sharing="time_sharing"
@ -343,6 +346,17 @@ if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
fi
fi
if [ ${force_reserve} == "yes" ]; then
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
echo "error: reserving memory" >&2
error_exit "ihk_smp_loaded"
fi
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then
echo "error: reserving CPUs" >&2;
error_exit "mem_reserved"
fi
fi
# Load mcctrl if not loaded
if ! grep mcctrl /proc/modules &>/dev/null; then
if ! taskset -c 0 ${SUDO} insmod ${KMODDIR}/mcctrl.ko 2>/dev/null; then

View File

@ -18,9 +18,10 @@ KERNDIR="@KERNDIR@"
mem=""
cpus=""
kill_in_use=""
dont_unload="no"
RMMOD_PATH=/sbin/rmmod
while getopts r:k OPT
while getopts r:kR OPT
do
case ${OPT} in
r)
@ -29,6 +30,9 @@ do
k)
kill_in_use=1
;;
R)
dont_unload="yes"
;;
\?) exit 1
;;
esac
@ -115,6 +119,10 @@ if ! sudo ${SBINDIR}/ihkconfig 0 release mem "all" > /dev/null; then
exit 1
fi
if [ "${dont_unload}" == "yes" ]; then
exit 0
fi
# Remove delegator if loaded
if grep mcctrl /proc/modules &>/dev/null; then
if ! sudo ${RMMOD_PATH} mcctrl 2>/dev/null; then

View File

@ -0,0 +1,123 @@
execute_process(COMMAND bash -c "gawk '/CPU implementer/ { print \$4; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_IMPLEMENTER OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "gawk '/CPU architecture/ { print \$3; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_ARCH OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "gawk '/CPU variant/ { print \$4; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_VARIANT OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "gawk '/CPU part/ { print \$4; exit; }' /proc/cpuinfo"
OUTPUT_VARIABLE CPU_PART OUTPUT_STRIP_TRAILING_WHITESPACE)
if(CPU_IMPLEMENTER STREQUAL "0x46" AND CPU_ARCH STREQUAL "8" AND
CPU_VARIANT STREQUAL "0x1" AND CPU_PART STREQUAL "0x001")
message("A64FX detected")
set(CPU_MODEL "a64fx")
add_definitions(-D__a64fx__)
endif()
# find first NUMA available to user (0 or 4 now)
execute_process(COMMAND bash -c "awk -v keyword=nr_free_pages -f ${CMAKE_CURRENT_SOURCE_DIR}/src/zoneinfo.awk /proc/zoneinfo | awk -v page_size=$(getconf PAGE_SIZE) -f ${CMAKE_CURRENT_SOURCE_DIR}/src/zoneinfo_filter.awk | head -n1" OUTPUT_VARIABLE FIRST_USER_NUMA OUTPUT_STRIP_TRAILING_WHITESPACE)
message("FIRST_USER_NUMA: ${FIRST_USER_NUMA}")
add_definitions(-DFIRST_USER_NUMA=${FIRST_USER_NUMA})
if (FIRST_USER_NUMA STREQUAL "4")
execute_process(COMMAND sudo bash -c "echo 0-7 > /sys/fs/cgroup/cpuset/system.slice/cpuset.mems")
endif()
cmake_policy(SET CMP0005 NEW)
# Options: -DWITH_MCK=<McKernel install directory>
add_definitions(-DWITH_MCK=${WITH_MCK})
# Options: -DWITH_MCK_SRC=<McKernel source directory>
add_definitions(-DWITH_MCK_SRC=${WITH_MCK_SRC})
# for autotest
if(NOT DEFINED CMAKE_INSTALL_PREFIX_SCRIPTS)
set(CMAKE_INSTALL_PREFIX_SCRIPTS ${CMAKE_INSTALL_PREFIX}/scripts)
endif()
cmake_minimum_required(VERSION 3.0)
project(issue1470 C)
# CPPFLAGS
set(UNAME_R ${CMAKE_SYSTEM_VERSION} CACHE STRING "Kernel version to build against")
set(KERNEL_DIR "/lib/modules/${UNAME_R}/build" CACHE STRING "kernel build directory")
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_64K_PAGES\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_64K_PAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(CONFIG_ARM64_64K_PAGES STREQUAL "y")
set(PAGE_SIZE "65536")
else()
set(PAGE_SIZE "4096")
endif()
else()
set(PAGE_SIZE "4096")
endif()
message("PAGE_SIZE: ${PAGE_SIZE}")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
add_definitions(-DBUILD_TARGET=smp-x86)
add_definitions(-DKMOD_POSTFIX=smp_x86)
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
add_definitions(-DBUILD_TARGET=smp-arm64)
add_definitions(-DKMOD_POSTFIX=smp_arm64)
endif()
add_definitions(-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX})
# CFLAGS
set(CFLAGS_WARNING "-Wall" "-Wextra" "-Wno-unused-parameter" "-Wno-sign-compare" "-Wno-unused-function" ${EXTRA_WARNINGS} CACHE STRING "Warning flags")
add_compile_options(
-O2
-g
${CFLAGS_WARNING}
)
# -L, this must be done before adding dependants
link_directories("${WITH_MCK}/lib64")
# -Wl,--rpath=, this must be done before adding dependants
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_INSTALL_RPATH "${WITH_MCK}/lib64")
# test driver scripts
foreach(target IN ITEMS
OMPI_MCA_plm_ple_memory_allocation_policy01
OMPI_MCA_plm_ple_memory_allocation_policy02
OMPI_MCA_plm_ple_memory_allocation_policy03
OMPI_MCA_plm_ple_memory_allocation_policy04
OMPI_MCA_plm_ple_memory_allocation_policy05
OMPI_MCA_plm_ple_memory_allocation_policy06
OMPI_MCA_plm_ple_memory_allocation_policy07
)
# String replacement
configure_file(src/${target}.sh.in ihklib-${target} @ONLY)
# Install scripts
install(PROGRAMS ${CMAKE_BINARY_DIR}/ihklib-${target} DESTINATION ${CMAKE_INSTALL_PREFIX_SCRIPTS})
endforeach()
# programs running on McKernel
foreach(target IN ITEMS
check_mempolicy
)
# Add C target
add_executable(${target} src/${target}.c)
# -I
target_include_directories(${target}
PRIVATE "${PROJECT_SOURCE_DIR}/include"
)
# -l
target_link_libraries(${target} PRIVATE numa)
# Install
install(TARGETS ${target} DESTINATION bin)
endforeach()

32
test/issues/1470/README Normal file
View File

@ -0,0 +1,32 @@
============
What to test
============
Check if the mode and nodemask obtained by get_mempolicy() are set to the expected values, with different reserved cpus, cpumask and policy requests.
The following settings are used.
Memory reserved: "1G@4,1G@5,1G@6,1G@7"
CPUs reserved: "12-59", "24-59"
FLIB_AFFINITY_ON_PROCESS:
"12-23", "24-35", "36-47", "48-59",
"12-35", "24-47", "36-59",
"12-47", "24-59",
"12-59"
OMPI_MCA_plm_ple_memory_allocation_policy:
{interleave,bind,prefer}_{local,nonlocal},
{interleave,bind}_all,
localalloc
============
How to build
============
cd <mckernel>/test/issues
mkdir build
cd build
cmake ../1470/ -DCMAKE_INSTALL_PREFIX=<mckernel>/test/issues/install -DWITH_MCK=<mckernel-install> -DWITH_MCK_SRC=<mckernel>
===========
How to test
===========
for i in {1..7}; do <mckernel>/test/issues/install/scripts/ihklib-OMPI_MCA_plm_ple_memory_allocation_policy0$i; done
check if no "[ NG ]" is shown.

View File

@ -0,0 +1,33 @@
#ifndef __OKNG_H_INCLUDED__
#define __OKNG_H_INCLUDED__
#include <stdio.h>
#define _OKNG(verb, jump, cond, fmt, args...) do { \
if (cond) { \
if (verb) \
printf("[ OK ] " fmt, ##args); \
} else { \
printf("[ NG ] " fmt, ##args); \
if (jump) { \
ret = 1; \
goto out; \
} \
} \
} while (0)
#define OKNG(args...) _OKNG(1, 1, ##args)
#define INFO(fmt, args...) printf("[ INFO ] " fmt, ##args)
#define START(fmt, args...) printf("[ START] " fmt, ##args)
#define INTERR(cond, fmt, args...) do { \
if (cond) { \
char msg[4096]; \
sprintf(msg, fmt, ##args); \
printf("[INTERR] %s:%d %s", __FILE__, __LINE__, msg); \
ret = 1; \
goto out; \
} \
} while (0)
#define ARRAY_SIZE_CHECK(array, size) INTERR(sizeof(array)/sizeof(array[0]) != size, "size of array \"%s\" isn't %d\n", #array, size)
#endif

View File

@ -0,0 +1,53 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_local]="3"
mode[bind_local]="2"
mode[prefer_local]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="1"
nodemask[$(seq -s, 24 35)]="2"
nodemask[$(seq -s, 36 47)]="4"
nodemask[$(seq -s, 48 59)]="8"
nodemask[$(seq -s, 12 35)]="3"
nodemask[$(seq -s, 24 47)]="6"
nodemask[$(seq -s, 36 59)]="12"
nodemask[$(seq -s, 12 47)]="7"
nodemask[$(seq -s, 24 59)]="14"
nodemask[$(seq -s, 12 59)]="15"
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_local bind_local prefer_local; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
# check if policy is not set when not specified
if (( i++ == 0 )); then
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m 0 || exit $?
fi
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,48 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_nonlocal]="3"
mode[bind_nonlocal]="2"
mode[prefer_nonlocal]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="14"
nodemask[$(seq -s, 24 35)]="13"
nodemask[$(seq -s, 36 47)]="11"
nodemask[$(seq -s, 48 59)]="7"
nodemask[$(seq -s, 12 35)]="12"
nodemask[$(seq -s, 24 47)]="9"
nodemask[$(seq -s, 36 59)]="3"
nodemask[$(seq -s, 12 47)]="8"
nodemask[$(seq -s, 24 59)]="1"
nodemask[$(seq -s, 12 59)]="0"
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_nonlocal bind_nonlocal prefer_nonlocal; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,34 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_all]="3"
mode[bind_all]="2"
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_all bind_all; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n 15 || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,30 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
@WITH_MCK@/sbin/mcstop+release.sh
@WITH_MCK@/sbin/mcreboot.sh -c 12-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in localalloc; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) $(seq -s, 48 59) \
$(seq -s, 12 35) $(seq -s, 24 47) $(seq -s, 36 59) \
$(seq -s, 12 47) $(seq -s, 24 59) \
$(seq -s, 12 59); do
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m 0 || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,44 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_local]="3"
mode[bind_local]="2"
mode[prefer_local]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="2"
nodemask[$(seq -s, 24 35)]="4"
nodemask[$(seq -s, 36 47)]="8"
nodemask[$(seq -s, 12 35)]="6"
nodemask[$(seq -s, 24 47)]="12"
nodemask[$(seq -s, 12 47)]="14"
# reserve the last 36 cpus
@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_local bind_local prefer_local; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \
$(seq -s, 12 35) $(seq -s, 24 47) \
$(seq -s, 12 47); do
# check nodemask when last 36 cpus are reserved
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,44 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_nonlocal]="3"
mode[bind_nonlocal]="2"
mode[prefer_nonlocal]="1"
declare -A nodemask
nodemask[$(seq -s, 12 23)]="13"
nodemask[$(seq -s, 24 35)]="11"
nodemask[$(seq -s, 36 47)]="7"
nodemask[$(seq -s, 12 35)]="9"
nodemask[$(seq -s, 24 47)]="3"
nodemask[$(seq -s, 12 47)]="1"
# reserve the last 36 cpus
@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_nonlocal bind_nonlocal prefer_nonlocal; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \
$(seq -s, 12 35) $(seq -s, 24 47) \
$(seq -s, 12 47); do
# check nodemask when last 36 cpus are reserved
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n ${nodemask[$cpuset]} || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,34 @@
#!/usr/bin/bash
# define WORKDIR
SCRIPT_PATH=$(readlink -m "${BASH_SOURCE[0]}")
SCRIPT_NAME="${SCRIPT_PATH##*/}"
TEST_NAME="${SCRIPT_NAME%.sh}"
AUTOTEST_HOME="${SCRIPT_PATH%/*/*/*}"
if [ -f ${AUTOTEST_HOME}/bin/config.sh ]; then
. ${AUTOTEST_HOME}/bin/config.sh
else
WORKDIR=$(pwd)
fi
declare -A mode
mode[interleave_all]="3"
mode[bind_all]="2"
# reserve the last 36 cpus
@WITH_MCK@/sbin/mcreboot.sh -c 24-59 -m 1G@4,1G@5,1G@6,1G@7
for policy in interleave_all bind_all; do
for cpuset in \
$(seq -s, 12 23) $(seq -s, 24 35) $(seq -s, 36 47) \
$(seq -s, 12 35) $(seq -s, 24 47) \
$(seq -s, 12 47); do
# check nodemask when last 36 cpus are reserved
FLIB_NUM_PROCESS_ON_NODE=1 FLIB_AFFINITY_ON_PROCESS=$cpuset OMPI_MCA_plm_ple_memory_allocation_policy=$policy @WITH_MCK@/bin/mcexec @CMAKE_INSTALL_PREFIX@/bin/check_mempolicy -m ${mode[$policy]} -n 15 || exit $?
done
done
@WITH_MCK@/sbin/mcstop+release.sh
exit 0

View File

@ -0,0 +1,54 @@
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <numaif.h>
#include <okng.h>
#define PLD_PROCESS_NUMA_MASK_BITS 256
int main(int argc, char **argv)
{
long ret;
int mode;
unsigned long nodemask[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)] = { 0 };
int mode_expected = -1;
unsigned long nodemask_expected[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)] = { 0 };
int opt;
while ((opt = getopt(argc, argv, "m:n:")) != -1) {
switch (opt) {
case 'm':
mode_expected = atol(optarg);
break;
case 'n':
nodemask_expected[0] = atoi(optarg);
break;
default: /* '?' */
INTERR(1, "unknown option %c\n", optopt);
}
}
INTERR(mode_expected == -1, "specify -m <mode>\n");
ret = get_mempolicy(&mode, nodemask, PLD_PROCESS_NUMA_MASK_BITS,
NULL, 0);
INTERR(ret, "get_mempolicy failed with %ld\n", ret);
OKNG(mode == mode_expected, "mode: actual (%d), expected (%d)\n",
mode, mode_expected);
/* nodemask is "don't care" when mode is MPOL_DEFAULT */
if (mode_expected != 0) {
OKNG(nodemask[0] == nodemask_expected[0],
"nodemask: actual (%ld), expected (%ld)\n",
nodemask[0],
nodemask_expected[0]);
}
ret = 0;
out:
return ret;
}

View File

@ -0,0 +1,9 @@
BEGIN { id = -1; }
/Node .*, zone\s*(Normal|DMA32)/ { id = substr($2, 1, length($2) - 1); }
{
if ($0 ~ keyword && id != -1) {
printf("id: %d, nr_free_pages: %ld\n", id, $2);
id = -1;
}
}

View File

@ -0,0 +1,13 @@
{
id = substr($2, 1, length($2) - 1);
size = $4;
sizes[id] += size;
}
END {
for (i = 0; i <= id; i++) {
if (sizes[i] * page_size > 2 * 1024 * 1024 * 1024) {
print i;
}
}
}