Compare commits

..

34 Commits

Author SHA1 Message Date
6667321dc1 spec: prerelease 0.6 for testing capped best-effort memory reservation
Change-Id: Iaa91b311ee6879e84ce862aeabb4bd1fcd95d35f
2021-01-07 11:14:22 +09:00
f849745b60 spec: prerelease 0.5 for testing capped best-effort memory reservation
Change-Id: I139d6e24fbadb7313116029005e115053f31a899
2021-01-07 10:56:27 +09:00
78bc06d998 cmake: set default value of ENABLE_FUGAKU_DEBUG to OFF
Change-Id: I70703410922aa1d1440d61ead6e225d92cf60003
2021-01-07 10:42:36 +09:00
d726bd3d11 profile: fix definition of PROFILE_ENABLE and __NR_profile
Change-Id: I3f9f5870f8380d3668e1ccb06fd0f6d3307e3fa4
2021-01-06 01:03:17 +00:00
df37d6867f docs: add scheduling limitations
Change-Id: Ida4a16efa4d47f448da7417a3b4bdb5fb5304fcd
2021-01-06 09:58:38 +09:00
a4b5410d0c docs: add mlockall/munlockall limitations
Change-Id: I01d1c4eb6955baee89f6827748ac8ce4082884da
2021-01-04 12:57:32 +09:00
d73e6a161c spec: prerelease 0.4 for testing capped best-effort memory reservation
Change-Id: Iec35ea1b7fa6b8930153461c395675f1576042ba
2020-12-29 17:12:14 +09:00
67334b65c3 rus_vm_fault: vmf_insert_pfn: treat VM_FAULT_NOPAGE as success
vmf_insert_pfn is added with the following commit.
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396

Refer to the following page for the meaning of VM_FAULT_NOPAGE.
https://lwn.net/Articles/242237/

Change-Id: I2b0144a20a57c74e0e2e0d2fc24281852f49b717
2020-12-29 16:31:41 +09:00
fe3992a3a2 cmake: add switch to turn on/off Fugaku debug modifications
To prevent "TO RESET: send SIGSTOP instead of SIGV in PF" from making
some tests expecting SIGSEGV fail.

Change-Id: I8bb111cff59fe5b0b2bf6bc652dfd2fa308321ed
2020-12-29 16:31:41 +09:00
5d58100c20 cmake: add switch to turn on/off Fugaku hacks
Change-Id: I2a1ac906a19c4e45ee62acdbf0bc6f77f61974f8
2020-12-29 16:31:41 +09:00
1b106d825c Tofu: fix phys addr calculation for contiguous pages in MBPT/BCH update
Change-Id: I70def9d02bdd7e1e969dedfc277a20df6ed2dff8
2020-12-29 16:31:41 +09:00
a680395093 Tofu: kmalloc cache for stag range
Change-Id: Ib5ea12c7c8cdafa7b699308c4eeb6e9ab39905c7
2020-12-29 16:31:41 +09:00
fd5a1c4b0a TO RESET: send SIGSTOP instead of SIGV in PF
Change-Id: I5f7e07cb89f5f38b7c631d838f0eee0a2a98e246
2020-12-29 16:31:40 +09:00
b3b1883ad8 eclair: turn off gdb pagination by default
Change-Id: I7758d97b90705310bc57cb9b6da6f6af436ea7fb
2020-12-29 16:31:40 +09:00
7145c4d383 TO RESET: stack changes
Change-Id: I325420701dfa5e9eac294be086a9d1e7326d95bc
2020-12-29 16:31:40 +09:00
0b82c8942b Tofu: keep track of stags per memory range
Change-Id: I033beaeee3b141dab4485dd3a2a3848eaa84e54e
2020-12-29 16:31:40 +09:00
75694152f0 Tofu: match page sizes to MBPT and fault PTEs if not present
Change-Id: Ia7aa92005a9941d6399063fec9a0776e73fc88fe
2020-12-29 16:31:40 +09:00
1cf0bd5a78 TO RESET: add debug instruments, map Linux areas for tofu
Change-Id: I09880cad3b87182cb663d414041254817c254759
2020-12-29 16:31:39 +09:00
25943634e9 TO RESET: do_mmap: show debug message when profile is turned on
Change-Id: I18f498f3a8660114b5e038e74179df95a645d232
2020-12-29 16:31:39 +09:00
72f95f92f8 TO RESET: hugefileobj: show debug messages
Change-Id: I904c811c13a59c0db74052bc92f6661a3e1b5d34
2020-12-29 16:31:39 +09:00
ab1014863d TO RESET: page_fault_handler: send SIGSTOP instead of SIGSEGV for debug
Change-Id: Ie281dbf43280464c8f412c8444a6861e43f28beb
2020-12-29 16:31:39 +09:00
4cd7051c2d TO RESET: setup_rt_frame: show debug message
Change-Id: I07d4f2dbba9bdb72f8a2892e6b5bd429b8e0aeec
2020-12-29 16:31:39 +09:00
d5716d3c3a TO RESET: mcctrl_get_request_os_cpu and __mcctrl_os_read_write_cpu_register: show debug messages
Change-Id: Ic8430e3fd6a814b888192233b029c942500a2dc9
2020-12-29 16:31:39 +09:00
2a984a12fe TO RESET: unhandled_page_fault: show instruction address
Change-Id: I29a8d30d9b3e5cfbe5e16b1faaa253e794b8fc5b
2020-12-29 16:31:38 +09:00
3949ab65a8 TO RESET: Add kernel argument to toggle on-demand paging for hugetlbfs map
Change-Id: Id748e0a2afc4ea59142fedb652a15b4007c5dee4
2020-12-29 16:31:33 +09:00
ed923ac82f TO RESET: hugefileobj: pre-allocate on mmap
Set this change to "TO RESET" because one of the Fujitsu tests fails.

Change-Id: Iddc30e8452b3d39da4975079d0c6a035e4f3dbde
2020-12-25 11:34:14 +09:00
191e6f7499 TO RESET: preempt_enable: check if no_preempt isn't negative
Change-Id: I1cef2077c50f3b3020870505dd065d10617f440e
2020-12-25 11:34:14 +09:00
4f7fd90300 TO RESET: lock: check if runq lock is held with IRQs disabled
Change-Id: I9a79ceaf9e399ad3695ed8959ca10c587591751a
2020-12-25 11:34:09 +09:00
8f2c8791bf TO RESET: arm64: enable interrupt on panic
Change-Id: I1ceb321de324f307fc82366b162c72f64184247b
2020-12-24 17:18:37 +09:00
bbfb296c26 TO RESET: mcreboot, mcstop+release.sh: add functions
Change-Id: Ic3992dc4e16b7ade00e93edbd107c64a32068c02
2020-12-24 16:53:27 +09:00
10b17e230c TO RESET: physical memory: free memory consistency checker
Change-Id: I15aa59bb81be4d8f2acfe8d161c8255f70f9e7d3
2020-12-24 16:53:12 +09:00
b268c28e7e TO RESET: mmap: ignore MAP_HUGETLB
Change-Id: Ifd50f24de0747b06d71ebba441ae2ef451f66c4d
2020-12-24 16:51:51 +09:00
2fa1c053d7 spec: prerelease 0.3 for testing ihk_reserve_mem and memory policy
Change-Id: I4fbcfa1f93522fd01af42d1ef13d0be075086773
2020-12-24 15:11:01 +09:00
530110e3a9 Tofu: fix ENABLE_TOFU switching
Change-Id: Ib33323d4b59ea8fb4f5f40dff7ea25a36773d5e2
2020-12-24 15:00:14 +09:00
45 changed files with 1144 additions and 40 deletions

View File

@ -10,7 +10,7 @@ project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.1")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "0.2")
set(MCKERNEL_RELEASE "0.6")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
@ -26,10 +26,8 @@ endif()
if (BUILD_TARGET STREQUAL "smp-x86")
set(ARCH "x86_64")
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
elseif (BUILD_TARGET STREQUAL "smp-arm64")
set(ARCH "arm64")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
endif()
include(GNUInstallDirs)
@ -52,6 +50,48 @@ if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
execute_process(COMMAND bash -c "ls -ld /proc/tofu/ 2>/dev/null | wc -l"
OUTPUT_VARIABLE PROC_TOFU OUTPUT_STRIP_TRAILING_WHITESPACE)
if(PROC_TOFU STREQUAL "1")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
else()
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
endif()
if(ENABLE_TOFU)
add_definitions(-DENABLE_TOFU)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_TOFU")
endif()
# when compiling on a compute-node
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)
else()
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" OFF)
endif()
if(ENABLE_FUGAKU_HACKS)
add_definitions(-DENABLE_FUGAKU_HACKS)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_HACKS")
endif()
# SIGSTOP instead of SIGSEGV, additional IHK Linux kmsg
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" OFF)
if(ENABLE_FUGAKU_DEBUG)
add_definitions(-DENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
option(PROFILE_ENABLE "System call profile" ON)
if(PROFILE_ENABLE)
add_definitions(-DPROFILE_ENABLE)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DPROFILE_ENABLE")
endif()
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
@ -255,6 +295,9 @@ message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_TOFU: ${ENABLE_TOFU}")
message("ENABLE_FUGAKU_HACKS: ${ENABLE_FUGAKU_HACKS}")
message("ENABLE_FUGAKU_DEBUG: ${ENABLE_FUGAKU_DEBUG}")
message("PROFILE_ENABLE: ${PROFILE_ENABLE}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")

View File

@ -730,6 +730,49 @@ static void show_context_stack(struct pt_regs *regs)
}
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void handle_IPI(unsigned int vector, struct pt_regs *regs)
{
struct ihk_mc_interrupt_handler *h;
@ -791,6 +834,19 @@ void cpu_safe_halt(void)
cpu_enable_interrupt();
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
extern void __cpu_do_idle(void);
cpu_enable_interrupt();
__cpu_do_idle();
}
#endif
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
@ -856,6 +912,21 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void)
{
unsigned long flags;
unsigned long masked = ICC_PMR_EL1_MASKED;
asm volatile(
"mrs_s %0, " __stringify(ICC_PMR_EL1)
: "=&r" (flags)
:
: "memory");
return (flags == masked);
}
#endif
#else /* defined(CONFIG_HAS_NMI) */
/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_enable */
@ -1377,6 +1448,14 @@ void arch_print_stack(void)
{
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;
return regs->pc;
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;

View File

@ -223,8 +223,12 @@ static int do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
#ifdef ENABLE_TOFU
// XXX: Handle kernel space page faults for Tofu driver
//if (addr < USER_END)
#else
if (addr < USER_END)
#endif
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);

View File

@ -9,6 +9,9 @@
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/debug.h>
#endif
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -31,6 +34,10 @@ typedef struct {
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
#ifdef ENABLE_FUGAKU_HACKS
extern ihk_spinlock_t *get_this_cpu_runq_lock(void);
#endif
extern void preempt_enable(void);
extern void preempt_disable(void);
@ -98,6 +105,18 @@ static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
: "memory");
success = !tmp;
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (success) {
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
}
#endif
#endif
if (!success) {
preempt_enable();
}
@ -182,6 +201,14 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
#endif
#endif
}
#ifdef DEBUG_SPINLOCK

View File

@ -94,7 +94,11 @@ extern char _end[];
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
#ifdef ENABLE_TOFU
# define MAP_VMAP_START UL(0xffff7bdfffff0000)
#else
# define MAP_VMAP_START UL(0xffff780000000000)
#endif
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)

View File

@ -124,7 +124,7 @@ SYSCALL_HANDLED(271, process_vm_writev)
SYSCALL_HANDLED(281, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)

View File

@ -7,6 +7,9 @@
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/monitor.h>
#endif
#include <arch-timer.h>
#include <cls.h>
@ -313,14 +316,27 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
#ifdef ENABLE_FUGAKU_HACKS
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
++v->in_interrupt;
#endif
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
#ifndef ENABLE_FUGAKU_HACKS
handle_IPI(irqnr, regs);
#else
/* Once paniced, only allow CPU stop and NMI IRQs */
if (monitor->status != IHK_OS_MONITOR_PANIC ||
irqnr == INTRID_CPU_STOP ||
irqnr == INTRID_MULTI_NMI) {
handle_IPI(irqnr, regs);
}
#endif
}
irqnr = gic_read_iar();
}
@ -335,7 +351,12 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
#ifndef ENABLE_FUGAKU_HACKS
if (do_check) {
#else
--v->in_interrupt;
if (monitor->status != IHK_OS_MONITOR_PANIC && do_check) {
#endif
check_signal(0, regs, 0);
schedule();
}

View File

@ -217,11 +217,13 @@ static inline int ptl4_index(unsigned long addr)
int idx = (addr >> PTL4_SHIFT) & PTL4_INDEX_MASK;
return idx;
}
#ifdef ENABLE_TOFU
static inline int ptl3_index_linux(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK_LINUX;
return idx;
}
#endif
static inline int ptl3_index(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK;
@ -281,6 +283,7 @@ static inline pte_t* ptl4_offset(const translation_table_t* ptl4, unsigned long
return ptep;
}
#ifdef ENABLE_TOFU
static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
{
pte_t* ptep = NULL;
@ -311,6 +314,7 @@ static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
}
return ptep;
}
#endif
static inline pte_t* ptl3_offset(const pte_t* l4p, unsigned long addr)
{
@ -991,10 +995,12 @@ static void init_normal_area(struct page_table *pt)
tt = get_translation_table(pt);
#ifdef ENABLE_TOFU
setup(tt,
arm64_st_phys_base,
arm64_st_phys_base + (1UL << 40));
return;
#endif
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) {
unsigned long map_start, map_end;
@ -1323,6 +1329,7 @@ out:
return ret;
}
#ifdef ENABLE_TOFU
int ihk_mc_linux_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,
unsigned long *phys,
@ -1373,7 +1380,7 @@ out:
if(size) *size = lsize;
return 0;
}
#endif
int ihk_mc_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,

View File

@ -1071,6 +1071,9 @@ static int setup_rt_frame(int usig, unsigned long rc, int to_restart,
if (k->sa.sa_flags & SA_RESTORER){
regs->regs[30] = (unsigned long)k->sa.sa_restorer;
#ifdef ENABLE_FUGAKU_HACKS
kprintf("%s: SA_RESTORER: 0x%lx\n", __func__, regs->regs[30]);
#endif
} else {
regs->regs[30] = (unsigned long)VDSO_SYMBOL(thread->vm->vdso_addr, sigtramp);
}
@ -1723,6 +1726,7 @@ SYSCALL_DECLARE(mmap)
/* check arguments */
pgsize = PAGE_SIZE;
#ifndef ENABLE_FUGAKU_HACKS
if (flags & MAP_HUGETLB) {
int hugeshift = flags & (0x3F << MAP_HUGE_SHIFT);
@ -1763,6 +1767,11 @@ SYSCALL_DECLARE(mmap)
goto out;
}
}
#else
if (flags & MAP_HUGETLB) {
flags &= ~(MAP_HUGETLB);
}
#endif
#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1))
addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR;

View File

@ -174,13 +174,14 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
arch_show_interrupt_context(regs);
#if 0
#ifdef ENABLE_TOFU
info.si_signo = SIGSTOP;
info.si_errno = 0;
#else
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
#endif
info.si_signo = SIGSTOP;
info.si_errno = 0;
info._sifields._sigfault.si_addr = (void*)regs->pc;
arm64_notify_die("Oops - bad mode", regs, &info, 0);

View File

@ -868,6 +868,49 @@ void show_context_stack(uintptr_t *rbp) {
return;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void interrupt_exit(struct x86_user_context *regs)
{
if (interrupt_from_user(regs)) {
@ -1137,6 +1180,17 @@ void cpu_halt(void)
asm volatile("hlt");
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
cpu_halt();
}
#endif
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@ -1521,6 +1575,16 @@ void arch_print_stack(void)
__print_stack(rbp, 0);
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
return regs->rip;
}
#endif
/*@
@ requires \valid(reg);
@ assigns \nothing;

View File

@ -451,4 +451,12 @@ extern unsigned long ap_trampoline;
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#ifdef ENABLE_FUGAKU_HACKS
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#endif /* !__ASSEMBLY__ */
#endif
#endif

View File

@ -53,5 +53,9 @@ struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);
#ifdef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#define KERNEL_STACK_SIZE LOCALS_SPAN
#endif
#endif

View File

@ -168,7 +168,7 @@ SYSCALL_HANDLED(311, process_vm_writev)
SYSCALL_HANDLED(322, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)

View File

@ -21,7 +21,9 @@
#include <registers.h>
#include <string.h>
#ifndef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#endif
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */

View File

@ -16,9 +16,6 @@
/* whether perf is enabled */
#cmakedefine ENABLE_PERF 1
/* whether built-in tofu driver is enabled */
#cmakedefine ENABLE_TOFU 1
/* whether qlmpi is enabled */
#cmakedefine ENABLE_QLMPI 1

View File

@ -183,3 +183,9 @@ Limitations
26. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.
27. mlockall() is not supported and returns -EPERM.
28. munlockall() is not supported and returns zero.
29. scheduling behavior is not Linux compatible. For example, sometimes one of the two processes on the same CPU continues to run after yielding.

View File

@ -168,7 +168,9 @@ struct program_load_desc {
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
@ -215,7 +217,9 @@ struct syscall_response {
unsigned long req_thread_status;
long ret;
unsigned long fault_address;
#ifdef ENABLE_TOFU
void *pde_data;
#endif
};
struct syscall_ret_desc {

View File

@ -28,6 +28,7 @@ void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
#endif
#ifdef ENABLE_TOFU
/* Tofu CQ and barrier gate release functions */
struct file_operations *mcctrl_tof_utofu_procfs_ops_cq;
int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
@ -35,6 +36,7 @@ int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
struct file_operations *mcctrl_tof_utofu_procfs_ops_bch;
int (*mcctrl_tof_utofu_release_bch)(struct inode *inode,
struct file *filp);
#endif
int arch_symbols_init(void)
{
@ -52,6 +54,7 @@ int arch_symbols_init(void)
return -EFAULT;
#endif
#ifdef ENABLE_TOFU
mcctrl_tof_utofu_procfs_ops_cq =
(void *)kallsyms_lookup_name("tof_utofu_procfs_ops_cq");
if (WARN_ON(!mcctrl_tof_utofu_procfs_ops_cq))
@ -71,6 +74,7 @@ int arch_symbols_init(void)
(void *)kallsyms_lookup_name("tof_utofu_release_bch");
if (WARN_ON(!mcctrl_tof_utofu_release_bch))
return -EFAULT;
#endif
return 0;
}
@ -360,6 +364,15 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
// page table to translation_table.
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpt, PAGE_SIZE);
#ifdef ENABLE_FUGAKU_HACKS
if (!phys) {
pr_err("%s(): ERROR: VA: 0x%lx, rpt is NULL for PID %d\n",
__func__, rva, task_tgid_vnr(current));
error = -EFAULT;
goto out;
}
#endif
tbl = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
rpa = (unsigned long)tbl->tt_pa;
@ -448,6 +461,7 @@ out:
}
#ifdef ENABLE_TOFU
/*
* Tofu CQ and BCH release handlers
*/
@ -549,3 +563,4 @@ int __mcctrl_tof_utofu_release_bch(struct inode *inode, struct file *filp)
return __mcctrl_tof_utofu_release_handler(inode, filp,
mcctrl_tof_utofu_release_bch);
}
#endif

View File

@ -3582,7 +3582,11 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu)
*ret_cpu = ch->send.queue->read_cpu;
ret = 0;
#ifndef ENABLE_FUGAKU_HACKS
pr_info("%s: OS: %lx, CPU: %d\n",
#else
dprintk("%s: OS: %lx, CPU: %d\n",
#endif
__func__, (unsigned long)os, *ret_cpu);
out_put_ppd:
@ -3646,7 +3650,11 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
/* Notify caller (for future async implementation) */
atomic_set(&desc->sync, 1);
#ifndef ENABLE_FUGAKU_HACKS
dprintk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#else
printk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#endif
__FUNCTION__,
(op == MCCTRL_OS_CPU_READ_REGISTER ? "READ" : "WRITE"), cpu,
desc->addr_ext, desc->val);

View File

@ -50,7 +50,9 @@ extern void procfs_exit(int);
extern void uti_attr_finalize(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
#ifdef ENABLE_TOFU
extern void mcctrl_file_to_pidfd_hash_init(void);
#endif
extern int mcctrl_os_read_cpu_register(ihk_os_t os, int cpu,
struct ihk_os_cpu_register *desc);
@ -233,7 +235,6 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
@ -325,7 +326,9 @@ static int __init mcctrl_init(void)
}
binfmt_mcexec_init();
#ifdef ENABLE_TOFU
mcctrl_file_to_pidfd_hash_init();
#endif
if ((ret = symbols_init()))
goto error;

View File

@ -560,6 +560,7 @@ struct uti_futex_resp {
wait_queue_head_t wq;
};
#ifdef ENABLE_TOFU
/*
* Hash table to keep track of files and related processes
* and file descriptors.
@ -585,3 +586,4 @@ struct mcctrl_file_to_pidfd *mcctrl_file_to_pidfd_hash_lookup(
int mcctrl_file_to_pidfd_hash_remove(struct file *filp,
ihk_os_t os, struct task_struct *group_leader, int fd);
#endif
#endif

View File

@ -692,14 +692,20 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
vma->vm_start, vma->vm_end, pgsize, pix);
}
}
else
else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
error = vmf_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
if (error == VM_FAULT_NOPAGE) {
dprintk("%s: vmf_insert_pfn returned %d\n",
__func__, error);
error = 0;
}
#else
error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
#endif
}
if (error) {
pr_err("%s: vm_insert_pfn returned %d\n",
__func__, error);
@ -1843,6 +1849,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
return ret;
}
#ifdef ENABLE_TOFU
struct list_head mcctrl_file_to_pidfd_hash[MCCTRL_FILE_2_PIDFD_HASH_SIZE];
spinlock_t mcctrl_file_to_pidfd_hash_lock;
@ -1971,7 +1978,7 @@ unlock_out:
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return ret;
}
#endif
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid)
@ -2458,6 +2465,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
dprintk("%s: system call: %lx\n", __FUNCTION__, sc->args[0]);
switch (sc->number) {
#ifdef ENABLE_TOFU
case __NR_close: {
struct fd f;
int fd;
@ -2478,6 +2486,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
break;
}
#endif
case __NR_mmap:
ret = pager_call(os, sc);
break;

View File

@ -1179,7 +1179,7 @@ static int start_gdb(void) {
sprintf(buf, "target remote :%d", ntohs(sin.sin_port));
execlp("gdb", "eclair", "-q", "-ex", "set prompt (eclair) ",
"-ex", buf, opt.kernel_path, NULL);
"-ex", buf, opt.kernel_path, "-ex", "set pagination off", NULL);
perror("execlp");
return 3;
}

View File

@ -68,13 +68,13 @@
#include <sys/user.h>
#endif /* !__aarch64__ */
#include <sys/prctl.h>
#include "../../config.h"
#include "../include/uprotocol.h"
#include <ihk/ihk_host_user.h>
#include "../include/uti.h"
#include <getopt.h>
#include "archdep.h"
#include "arch_args.h"
#include "../../config.h"
#include <numa.h>
#include <numaif.h>
#include <spawn.h>
@ -204,7 +204,9 @@ static char *mpol_bind_nodes = NULL;
static int uti_thread_rank = 0;
static int uti_use_last_cpu = 0;
static int enable_uti = 0;
#ifdef ENABLE_TOFU
static int enable_tofu = 0;
#endif
/* Partitioned execution (e.g., for MPI) */
static int nr_processes = 0;
@ -1787,12 +1789,14 @@ static struct option mcexec_options[] = {
.flag = &enable_uti,
.val = 1,
},
#ifdef ENABLE_TOFU
{
.name = "enable-tofu",
.has_arg = no_argument,
.flag = &enable_tofu,
.val = 1,
},
#endif
{
.name = "debug-mcexec",
.has_arg = no_argument,
@ -2816,7 +2820,9 @@ int main(int argc, char **argv)
desc->straight_map = straight_map;
desc->straight_map_threshold = straight_map_threshold;
#ifdef ENABLE_TOFU
desc->enable_tofu = enable_tofu;
#endif
/* user_start and user_end are set by this call */
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {

2
ihk

Submodule ihk updated: bf40051828...6e051eed9c

View File

@ -58,16 +58,43 @@ struct cpu_local_var *get_cpu_local_var(int id)
return clv + id;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked);
#endif
void preempt_enable(void)
{
#ifndef ENABLE_FUGAKU_HACKS
if (cpu_local_var_initialized)
--cpu_local_var(no_preempt);
#else
if (cpu_local_var_initialized) {
--cpu_local_var(no_preempt);
if (cpu_local_var(no_preempt) < 0) {
//cpu_disable_interrupt();
__kprintf("%s: %d\n", __func__, cpu_local_var(no_preempt));
__kprintf("TID: %d, call stack from builtin frame (most recent first):\n",
cpu_local_var(current)->tid);
__show_context_stack(cpu_local_var(current), (uintptr_t)&preempt_enable,
(unsigned long)__builtin_frame_address(0), 1);
//arch_cpu_stop();
//cpu_halt();
#ifdef ENABLE_FUGAKU_HACKS
panic("panic: negative preemption??");
#endif
}
}
#endif
}
void preempt_disable(void)
{
if (cpu_local_var_initialized)
if (cpu_local_var_initialized) {
++cpu_local_var(no_preempt);
}
}
int add_backlog(int (*func)(void *arg), void *arg)
@ -120,3 +147,10 @@ void do_backlog(void)
}
}
}
#ifdef ENABLE_FUGAKU_HACKS
ihk_spinlock_t *get_this_cpu_runq_lock(void)
{
return &get_this_cpu_local_var()->runq_lock;
}
#endif

View File

@ -788,7 +788,11 @@ out_remote_pf:
syscall_channel_send(resp_channel, &pckt);
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#else
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#endif
ret = 0;
break;

View File

@ -85,7 +85,11 @@ static int hugefileobj_get_page(struct memobj *memobj, off_t off,
}
memset(obj->pages[pgind], 0, obj->pgsize);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#else
kprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#endif
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
@ -274,13 +278,51 @@ int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
obj->nr_pages = nr_pages;
obj->pages = pages;
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#else
kprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#endif
" for %d pages, pagesize: %lu\n",
__func__,
obj,
virt_addr,
nr_pages,
obj->pgsize);
#ifdef ENABLE_FUGAKU_HACKS
if (!hugetlbfs_on_demand) {
int pgind;
int npages;
#ifndef ENABLE_FUGAKU_HACKS
for (pgind = 0; pgind < obj->nr_pages; ++pgind) {
#else
/* Map in only the last 8 pages */
for (pgind = ((obj->nr_pages > 8) ? (obj->nr_pages - 8) : 0);
pgind < obj->nr_pages; ++pgind) {
#endif
if (obj->pages[pgind]) {
continue;
}
npages = obj->pgsize >> PAGE_SHIFT;
obj->pages[pgind] = ihk_mc_alloc_aligned_pages_user(npages,
obj->pgshift - PTL1_SHIFT,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, 0);
if (!obj->pages[pgind]) {
kprintf("%s: error: could not allocate page for off: %lu"
", page size: %lu\n", __func__, off, obj->pgsize);
continue;
}
memset(obj->pages[pgind], 0, obj->pgsize);
dkprintf("%s: obj: 0x%lx, pre-allocated page for off: %lu"
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
}
#endif
}
obj->memobj.size = len;

View File

@ -106,6 +106,9 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
#ifdef ENABLE_FUGAKU_HACKS
int in_page_fault;
#endif
int no_preempt;
int timer_enabled;
unsigned long nr_ctx_switches;

View File

@ -69,4 +69,7 @@ static inline int page_is_multi_mapped(struct page *page)
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
#ifdef ENABLE_FUGAKU_HACKS
extern int hugetlbfs_on_demand;
#endif
#endif

View File

@ -395,6 +395,9 @@ struct vm_range {
off_t objoff;
int pgshift; /* page size. 0 means THP */
int padding;
#ifdef ENABLE_TOFU
struct list_head tofu_stag_list;
#endif
void *private_data;
};
@ -567,7 +570,9 @@ struct process {
int thp_disable;
int straight_map;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
size_t straight_map_threshold;
// perf_event
@ -592,9 +597,11 @@ struct process {
int coredump_barrier_count, coredump_barrier_count2;
mcs_rwlock_lock_t coredump_lock; // lock for coredump
#ifdef ENABLE_TOFU
#define MAX_FD_PDE 1024
void *fd_pde_data[MAX_FD_PDE];
char *fd_path[MAX_FD_PDE];
#endif
};
/*
@ -753,11 +760,16 @@ struct thread {
struct waitq coredump_wq;
int coredump_status;
#ifdef ENABLE_TOFU
/* Path of file being opened */
char *fd_path_in_open;
#endif
};
#define VM_RANGE_CACHE_SIZE 4
#ifdef ENABLE_TOFU
#define TOFU_STAG_HASH_SIZE 4
#endif
struct process_vm {
struct address_space *address_space;
@ -790,6 +802,12 @@ struct process_vm {
struct vm_range *range_cache[VM_RANGE_CACHE_SIZE];
int range_cache_ind;
struct swapinfo *swapinfo;
#ifdef ENABLE_TOFU
/* Tofu STAG hash */
ihk_spinlock_t tofu_stag_lock;
struct list_head tofu_stag_hash[TOFU_STAG_HASH_SIZE];
#endif
};
static inline int has_cap_ipc_lock(struct thread *th)

View File

@ -1,9 +1,6 @@
#ifndef __PROCESS_PROFILE_H_
#define __PROCESS_PROFILE_H_
/* Uncomment this to enable profiling */
#define PROFILE_ENABLE
#ifdef PROFILE_ENABLE
#define PROFILE_SYSCALL_MAX 2000
#define PROFILE_OFFLOAD_MAX (PROFILE_SYSCALL_MAX << 1)
@ -58,8 +55,6 @@ enum profile_event_type {
PROFILE_EVENT_MAX /* Should be the last event type */
};
#define __NR_profile PROFILE_EVENT_MAX
#ifdef __KERNEL__
struct thread;
struct process;
@ -79,6 +74,8 @@ void profile_dealloc_proc_events(struct process *proc);
#include <unistd.h>
#include <sys/syscall.h>
#define __NR_profile PROFILE_EVENT_MAX
/* Per-thread */
static inline void mckernel_profile_thread_on(void)
{

View File

@ -20,6 +20,7 @@
#include <ihk/ikc.h>
#include <rlimit.h>
#include <time.h>
#include <profile.h>
#define NUM_SYSCALLS 255
@ -238,7 +239,9 @@ struct program_load_desc {
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];

View File

@ -63,6 +63,9 @@ extern int interrupt_from_user(void *);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
#ifdef ENABLE_FUGAKU_HACKS
int hugetlbfs_on_demand;
#endif
int sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
static struct ihk_mc_pa_ops *pa_ops;
@ -744,7 +747,11 @@ distance_based:
}
else {
if (i == 0)
#ifndef ENABLE_FUGAKU_HACKS
kprintf("%s: distance: CPU @ node %d failed to allocate "
#else
dkprintf("%s: distance: CPU @ node %d failed to allocate "
#endif
"%d pages from node %d\n",
__FUNCTION__,
ihk_mc_get_numa_id(),
@ -951,6 +958,9 @@ static void query_free_mem_interrupt_handler(void *priv)
}
kprintf("McKernel free pages in total: %d\n", pages);
#ifdef ENABLE_FUGAKU_HACKS
panic("PANIC");
#endif
if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void);
@ -1286,6 +1296,9 @@ void tlb_flush_handler(int vector)
}
#endif // PROFILE_ENABLE
}
#ifdef ENABLE_FUGAKU_HACKS
extern unsigned long arch_get_instruction_address(const void *reg);
#endif
static void unhandled_page_fault(struct thread *thread, void *fault_addr,
uint64_t reason, void *regs)
@ -1317,6 +1330,22 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
__kprintf("address is out of range!\n");
}
#ifdef ENABLE_FUGAKU_HACKS
{
unsigned long pc = arch_get_instruction_address(regs);
range = lookup_process_memory_range(vm, pc, pc + 1);
if (range) {
__kprintf("PC: 0x%lx (%lx in %s)\n",
pc,
(range->memobj && range->memobj->flags & MF_REG_FILE) ?
pc - range->start + range->objoff :
pc - range->start,
(range->memobj && range->memobj->path) ?
range->memobj->path : "(unknown)");
}
}
#endif
kprintf_unlock(irqflags);
/* TODO */
@ -1324,7 +1353,13 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
#ifndef ENABLE_FUGAKU_HACKS
panic("panic: kernel mode PF");
#else
kprintf("panic: kernel mode PF");
for (;;) cpu_pause();
//panic("panic: kernel mode PF");
#endif
}
//dkprintf("now dump a core file\n");
@ -1360,6 +1395,20 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
__FUNCTION__, fault_addr, reason, regs);
preempt_disable();
#ifdef ENABLE_FUGAKU_HACKS
++cpu_local_var(in_page_fault);
if (cpu_local_var(in_page_fault) > 1) {
kprintf("%s: PF in PF??\n", __func__);
cpu_disable_interrupt();
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
panic("panic: kernel mode PF in PF");
}
while (1) {
panic("PANIC");
}
}
#endif
cpu_enable_interrupt();
@ -1427,6 +1476,13 @@ out_linux:
reason, error);
unhandled_page_fault(thread, fault_addr, reason, regs);
preempt_enable();
#ifdef ENABLE_FUGAKU_DEBUG
kprintf("%s: sending SIGSTOP to TID: %d\n", __func__, thread->tid);
do_kill(thread, thread->proc->pid, thread->tid, SIGSTOP, NULL, 0);
goto out;
#endif
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
@ -1455,6 +1511,9 @@ out_linux:
out_ok:
#endif
error = 0;
#ifdef ENABLE_FUGAKU_HACKS
--cpu_local_var(in_page_fault);
#endif
preempt_enable();
out:
dkprintf("%s: addr: %p, reason: %lx, regs: %p -> error: %d\n",
@ -2041,6 +2100,13 @@ void mem_init(void)
anon_on_demand = 1;
}
#ifdef ENABLE_FUGAKU_HACKS
if (find_command_line("hugetlbfs_on_demand")) {
kprintf("Demand paging on hugetlbfs mappings enabled.\n");
hugetlbfs_on_demand = 1;
}
#endif
/* Init distance vectors */
numa_distances_init();
}

View File

@ -36,6 +36,9 @@
#include <rusage_private.h>
#include <ihk/monitor.h>
#include <ihk/debug.h>
#ifdef ENABLE_TOFU
#include <tofu/tofu_stag_range.h>
#endif
//#define DEBUG_PRINT_PROCESS
@ -269,6 +272,12 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
}
vm->range_cache_ind = 0;
#ifdef ENABLE_TOFU
ihk_mc_spinlock_init(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&vm->tofu_stag_hash[i]);
}
#endif
return 0;
}
@ -955,6 +964,11 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
newrange->pgshift = range->pgshift;
newrange->private_data = range->private_data;
#ifdef ENABLE_TOFU
/* TODO: figure out which entries to put on which list! */
INIT_LIST_HEAD(&newrange->tofu_stag_list);
#endif
if (range->memobj) {
memobj_ref(range->memobj);
newrange->memobj = range->memobj;
@ -1023,6 +1037,28 @@ int join_process_memory_range(struct process_vm *vm,
if (vm->range_cache[i] == merging)
vm->range_cache[i] = surviving;
}
#ifdef ENABLE_TOFU
/* Move Tofu stag range entries */
if (vm->proc->enable_tofu) {
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&merging->tofu_stag_list, list) {
list_del(&tsr->list);
list_add_tail(&tsr->list, &surviving->tofu_stag_list);
dkprintf("%s: stag: %d @ %p:%lu moved in VM range merge\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
#endif
kfree(merging);
error = 0;
@ -1137,6 +1173,24 @@ static int free_process_memory_range(struct process_vm *vm,
}
straight_out:
#ifdef ENABLE_TOFU
if (vm->proc->enable_tofu) {
int entries;
extern int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range);
entries = tofu_stag_range_remove_overlapping(vm, range);
if (entries > 0) {
kprintf("%s: removed %d Tofu stag entries for range 0x%lx:%lu\n",
__func__,
entries,
range->start,
range->end - range->start);
}
}
#endif
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) {
if (vm->range_cache[i] == range)
@ -1428,6 +1482,9 @@ int add_process_memory_range(struct process_vm *vm,
range->pgshift = pgshift;
range->private_data = NULL;
range->straight_start = 0;
#ifdef ENABLE_TOFU
INIT_LIST_HEAD(&range->tofu_stag_list);
#endif
rc = 0;
if (phys == NOPHYS) {
@ -2521,6 +2578,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
__FUNCTION__, size, minsz,
ap_flag ? "(IHK_MC_AP_USER)" : "");
#ifdef ENABLE_FUGAKU_HACKS
/*
* XXX: Fugaku: Fujitsu's runtime remaps the stack
* using hugetlbfs so don't bother allocating too much here..
*/
minsz = 8*1024*1024;
#endif
stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT,
USER_STACK_PAGE_P2ALIGN,
IHK_MC_AP_NOWAIT | ap_flag,

View File

@ -204,6 +204,14 @@ long do_syscall(struct syscall_request *req, int cpu)
++thread->in_syscall_offload;
}
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (req->number == __NR_write && req->args[0] == 1) {
return req->args[2];
}
#endif
#endif
/* The current thread is the requester */
req->rtid = cpu_local_var(current)->tid;
@ -220,7 +228,9 @@ long do_syscall(struct syscall_request *req, int cpu)
req->ttid = 0;
}
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
#ifdef ENABLE_TOFU
res.pde_data = NULL;
#endif
send_syscall(req, cpu, &res);
if (req->rtid == -1) {
@ -381,6 +391,7 @@ long do_syscall(struct syscall_request *req, int cpu)
rc = res.ret;
#ifdef ENABLE_TOFU
if ((req->number == __NR_ioctl && rc == 0) ||
(req->number == __NR_openat && rc > 0)) {
int fd = req->number == __NR_ioctl ? req->args[0] : rc;
@ -407,6 +418,7 @@ long do_syscall(struct syscall_request *req, int cpu)
res.pde_data);
}
}
#endif
if(req->number != __NR_exit_group){
--thread->in_syscall_offload;
@ -1378,6 +1390,15 @@ void terminate(int rc, int sig)
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
vm = proc->vm;
#ifdef ENABLE_TOFU
if (proc->enable_tofu) {
extern void tof_utofu_finalize();
tof_utofu_finalize();
}
#endif
free_all_process_memory_range(vm);
if (proc->saved_cmdline) {
@ -1528,7 +1549,9 @@ int process_cleanup_before_terminate(int pid)
{
struct process *proc;
struct mcs_rwlock_node_irqsave lock;
#ifdef ENABLE_TOFU
int fd;
#endif
proc = find_process(pid, &lock);
if (!proc) {
@ -1536,10 +1559,12 @@ int process_cleanup_before_terminate(int pid)
return 0;
}
#ifdef ENABLE_TOFU
/* Clean up PDE file descriptors */
for (fd = 2; fd < MAX_FD_PDE; ++fd) {
__process_cleanup_fd(proc, fd);
}
#endif
process_unlock(proc, &lock);
return 0;
@ -1922,6 +1947,10 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
__FUNCTION__, proc->straight_va, range->pgshift);
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
#ifdef ENABLE_FUGAKU_HACKS
if (1) { // Un-safe mapping of covering physical range
#endif
error = ihk_mc_pt_set_range(proc->vm->address_space->page_table,
proc->vm,
(void *)range->start,
@ -1948,6 +1977,90 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
proc->straight_pa,
psize,
proc->straight_map_threshold);
#ifdef ENABLE_FUGAKU_HACKS
}
else { // Safe mapping of only LWK memory ranges
size_t max_pgsize = 0;
size_t min_pgsize = 0xFFFFFFFFFFFFFFFF;
/*
* Iterate LWK phsyical memory chunks and map them to their
* corresponding offset in the straight range using the largest
* suitable pages.
*/
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
unsigned long start, end, pa;
void *va, *va_end;
size_t pgsize;
int pg2align;
ihk_mc_get_memory_chunk(i, &start, &end, NULL);
va = proc->straight_va + (start - straight_pa_start);
va_end = va + (end - start);
pa = start;
while (va < va_end) {
pgsize = (va_end - va) + 1;
retry:
error = arch_get_smaller_page_size(NULL, pgsize,
&pgsize, &pg2align);
if (error) {
ekprintf("%s: arch_get_smaller_page_size() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
/* Are virtual or physical not page aligned for this size? */
if (((unsigned long)va & (pgsize - 1)) ||
(pa & (pgsize - 1))) {
goto retry;
}
error = ihk_mc_pt_set_range(
proc->vm->address_space->page_table,
proc->vm,
va,
va + pgsize,
pa,
ptattr,
pg2align + PAGE_SHIFT,
range,
0);
if (error) {
kprintf("%s: ihk_mc_pt_set_range() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
if (pgsize > max_pgsize)
max_pgsize = pgsize;
if (pgsize < min_pgsize)
min_pgsize = pgsize;
va += pgsize;
pa += pgsize;
}
}
region->map_end = (unsigned long)proc->straight_va +
proc->straight_len;
proc->straight_pa = straight_pa_start;
kprintf("%s: straight mapping: 0x%lx:%lu @ "
"min_pgsize: %lu, max_pgsize: %lu\n",
__FUNCTION__,
proc->straight_va,
proc->straight_len,
min_pgsize,
max_pgsize);
}
#endif
}
straight_out:
@ -2276,8 +2389,15 @@ straight_out:
range->straight_start =
(unsigned long)proc->straight_va +
(straight_phys - proc->straight_pa);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx\n",
__FUNCTION__, addr, len, range->straight_start);
__FUNCTION__, addr, len, range->straight_start);
#else
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx"
" (phys: 0x%lx)\n",
__FUNCTION__, addr, len, range->straight_start,
straight_phys);
#endif
if (!zero_at_free) {
memset((void *)phys_to_virt(straight_phys), 0, len);
}
@ -2377,11 +2497,20 @@ out:
if (memobj) {
memobj_unref(memobj);
}
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#else
if (cpu_local_var(current)->profile) {
kprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#endif
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
__FUNCTION__,
addr, len, addr0, len0, prot, flags,
fd, off0, error, addr);
#ifdef ENABLE_FUGAKU_HACKS
}
#endif
return !error ?
(range->straight_start ? range->straight_start : addr) :
@ -2418,6 +2547,11 @@ SYSCALL_DECLARE(munmap)
out:
dkprintf("[%d]sys_munmap(%lx,%lx): %d\n",
ihk_mc_get_processor_id(), addr, len0, error);
#ifdef ENABLE_FUGAKU_HACKS
if (error) {
kprintf("%s: error: %d\n", __func__, error);
}
#endif
return error;
}
@ -3978,7 +4112,9 @@ SYSCALL_DECLARE(open)
goto out;
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = pathname;
#endif
dkprintf("open(): pathname=%s\n", pathname);
if (!strncmp(pathname, XPMEM_DEV_PATH, len)) {
@ -3987,15 +4123,21 @@ SYSCALL_DECLARE(open)
rc = syscall_generic_forwarding(__NR_open, ctx);
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = NULL;
#endif
out:
#ifdef ENABLE_TOFU
if (rc > 0 && rc < MAX_FD_PDE) {
cpu_local_var(current)->proc->fd_path[rc] = pathname;
}
else {
kfree(pathname);
}
#else
kfree(pathname);
#endif
return rc;
}
@ -4023,7 +4165,9 @@ SYSCALL_DECLARE(openat)
goto out;
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = pathname;
#endif
dkprintf("openat(): pathname=%s\n", pathname);
if (!strncmp(pathname, XPMEM_DEV_PATH, len)) {
@ -4032,15 +4176,21 @@ SYSCALL_DECLARE(openat)
rc = syscall_generic_forwarding(__NR_openat, ctx);
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = NULL;
#endif
out:
#ifdef ENABLE_TOFU
if (rc > 0 && rc < MAX_FD_PDE) {
cpu_local_var(current)->proc->fd_path[rc] = pathname;
}
else {
kfree(pathname);
}
#else
kfree(pathname);
#endif
return rc;
}

View File

@ -24,6 +24,7 @@
struct kmalloc_cache_header tofu_scatterlist_cache[8];
struct kmalloc_cache_header tofu_mbpt_cache[8];
struct ihk_mc_page_cache_header tofu_mbpt_sg_pages_cache[8];
struct kmalloc_cache_header tofu_stag_range_cache[8];
typedef ihk_spinlock_t spinlock_t;
@ -43,6 +44,124 @@ typedef void (*tof_core_signal_handler)(int, int, uint64_t, uint64_t);
#include <tofu/tofu_generated-tof_utofu_bg.h>
#include <tofu/tofu_generated-tof_utofu_mbpt.h>
#include <tofu/tofu_stag_range.h>
/*
* Tofu STAG regions list keeps track of stags in a given VM range..
* Per-process tree is protected by process' vm_range_lock.
*/
int tof_utofu_stag_range_insert(struct process_vm *vm,
struct vm_range *range,
uintptr_t start, uintptr_t end,
struct tof_utofu_cq *ucq, int stag)
{
struct tofu_stag_range *tsr; // = kmalloc(sizeof(*tsr), IHK_MC_AP_NOWAIT);
tsr = kmalloc_cache_alloc(&tofu_stag_range_cache[ihk_mc_get_numa_id()],
sizeof(*tsr));
if (!tsr) {
kprintf("%s: error: allocating tofu_stag_range\n", __func__);
return -ENOMEM;
}
tsr->start = start;
tsr->end = end;
tsr->ucq = ucq;
tsr->stag = stag;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_add_tail(&tsr->list, &range->tofu_stag_list);
list_add_tail(&tsr->hash, &vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE]);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
return 0;
}
struct tofu_stag_range *tofu_stag_range_lookup_by_stag(struct process_vm *vm,
int stag)
{
struct tofu_stag_range *tsr;
struct tofu_stag_range *match = NULL;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry(tsr,
&vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE], hash) {
if (tsr->stag == stag) {
match = tsr;
break;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return match;
}
/* XXX: vm->tofu_stag_lock must be held */
void __tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
list_del(&tsr->list);
list_del(&tsr->hash);
//kfree(tsr);
kmalloc_cache_free(tsr);
}
void tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
__tofu_stag_range_remove(vm, tsr);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
static int tof_utofu_free_stag(struct tof_utofu_cq *ucq, int stag);
int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range)
{
struct tofu_stag_range *tsr, *next;
int entries = 0;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&range->tofu_stag_list, list) {
dkprintf("%s: stag: %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
linux_spin_lock(&tsr->ucq->trans.mru_lock);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock(&tsr->ucq->trans.mru_lock);
__tofu_stag_range_remove(vm, tsr);
++entries;
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return entries;
}
#define TOF_UTOFU_VERSION TOF_UAPI_VERSION
#define TOF_UTOFU_NUM_STAG_NTYPES 3
#define TOF_UTOFU_NUM_STAG_BITS(size) ((size) + 13)
@ -146,6 +265,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
else {
*_pgszbits = PTL2_SHIFT;
*_pgszbits = PTL1_CONT_SHIFT;
}
return 0;
}
@ -171,7 +291,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
}
#if 0
#if 1
/* Tofu only support 64kB and 2MB pages */
if (min_shift > PTL1_CONT_SHIFT)
min_shift = PTL1_CONT_SHIFT;
@ -647,7 +767,6 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
//struct page *page;
struct process *proc = cpu_local_var(current)->proc;
uintptr_t iova = 0, va;
int ret;
unsigned long phys = 0;
/* Special case for straight mapping */
@ -697,6 +816,8 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
}
for(va = start; va < end; va += pgsz, ix++){
size_t psize;
pte_t *ptep;
if (tof_utofu_mbpt_is_enabled(mbpt, ix)) {
/* this page is already mapped to mbpt */
@ -715,15 +836,18 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
// return -ENOMEM;
//}
ret = ihk_mc_pt_virt_to_phys(
cpu_local_var(current)->vm->address_space->page_table,
(void *)va, &phys);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)va, 0, NULL, &psize, NULL);
if (ret) {
raw_rc_output(ret);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, va);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
(va & (psize - 1));
//iova = tof_smmu_get_ipa_cq(ucq->tni, ucq->cqid,
// pfn_to_kaddr(page_to_pfn(page)), pgsz);
//if (iova == 0) {
@ -1012,6 +1136,7 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
size_t pgsz;
int ret = -ENOTSUPP;
unsigned long irqflags;
struct vm_range *range = NULL;
ucq = container_of(dev, struct tof_utofu_cq, common);
if(!ucq->common.enabled){
@ -1033,7 +1158,46 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
}
readonly = (req.flags & 1) != 0;
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
/* Assume smallest page size at first */
start = round_down((uintptr_t)req.va, PAGE_SIZE);
end = round_up((uintptr_t)req.va + req.len, PAGE_SIZE);
/* Find range, straight mapping special lookup */
if (vm->proc->straight_va &&
start >= (unsigned long)vm->proc->straight_va &&
end <= ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len) &&
!(start == (unsigned long)vm->proc->straight_va &&
end == ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len))) {
struct vm_range *range_iter;
range_iter = lookup_process_memory_range(vm, 0, -1);
while (range_iter) {
if (range_iter->straight_start &&
start >= range_iter->straight_start &&
start < (range_iter->straight_start +
(range_iter->end - range_iter->start))) {
range = range_iter;
break;
}
range_iter = next_process_memory_range(vm, range_iter);
}
}
else {
range = lookup_process_memory_range(vm, start, end);
}
if (!range) {
ret = -EINVAL;
goto unlock_out;
}
pgszbits = PAGE_SHIFT;
if (req.flags & TOF_UTOFU_ALLOC_STAG_LPG) {
ret = tof_utofu_get_pagesize_locked((uintptr_t)req.va,
@ -1109,6 +1273,12 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
//up(&ucq->ucq_sem);
ihk_mc_spinlock_unlock_noirq(&tofu_tni_cq_lock[ucq->tni][ucq->cqid]);
if (ret == 0) {
tof_utofu_stag_range_insert(vm, range, start, end, ucq, req.stag);
}
unlock_out:
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
if(ret == 0){
@ -1332,6 +1502,21 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
ret = tof_utofu_free_stag(ucq, stags[i]);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
{
struct tofu_stag_range *tsr;
tsr = tofu_stag_range_lookup_by_stag(
cpu_local_var(current)->vm, stags[i]);
if (tsr) {
tofu_stag_range_remove(cpu_local_var(current)->vm, tsr);
}
else {
kprintf("%s: no stag range object for %d??\n", __func__, stags[i]);
}
}
if(ret == 0){
stags[i] = -1;
}
@ -1374,9 +1559,11 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
void tof_utofu_release_cq(void *pde_data)
{
struct tof_utofu_cq *ucq;
int stag;
//int stag;
struct tof_utofu_device *dev;
unsigned long irqflags;
struct process_vm *vm = cpu_local_var(current)->vm;
int do_free = 1;
dev = (struct tof_utofu_device *)pde_data;
ucq = container_of(dev, struct tof_utofu_cq, common);
@ -1384,13 +1571,43 @@ void tof_utofu_release_cq(void *pde_data)
if (!ucq->common.enabled) {
kprintf("%s: UCQ TNI %d, CQ %d is disabled\n",
__func__, ucq->tni, ucq->cqid);
return;
do_free = 0;
}
#if 0
for (stag = 0; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag++) {
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(ucq, stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
#endif
{
int i;
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
if (tsr->ucq != ucq)
continue;
if (do_free) {
dkprintf("%s: removing stag %d for TNI %d CQ %d\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
}
else {
kprintf("%s: WARNING: could not free stag %d for TNI %d CQ %d (UCQ is disabled)\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
}
__tofu_stag_range_remove(vm, tsr);
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
dkprintf("%s: UCQ (pde: %p) TNI %d, CQ %d\n",
@ -1829,14 +2046,22 @@ static int tof_utofu_ioctl_enable_bch(struct tof_utofu_device *dev, unsigned lon
}
if (!phys) {
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
(void *)req.addr, &phys);
size_t psize;
pte_t *ptep;
if (ret) {
raw_rc_output(ret);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)req.addr, 0, NULL, &psize, NULL);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, req.addr);
raw_rc_output(-ENOMEM);
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
((uint64_t)req.addr & (psize - 1));
}
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
@ -2068,6 +2293,7 @@ void tof_utofu_init_globals(void)
memset(tofu_scatterlist_cache, 0, sizeof(tofu_scatterlist_cache));
memset(tofu_mbpt_cache, 0, sizeof(tofu_mbpt_cache));
memset(tofu_mbpt_sg_pages_cache, 0, sizeof(tofu_mbpt_sg_pages_cache));
memset(tofu_stag_range_cache, 0, sizeof(tofu_stag_range_cache));
{
int tni, cq;
@ -2120,6 +2346,24 @@ void tof_utofu_finalize(void)
{
struct tofu_globals *tg = ihk_mc_get_tofu_globals();
/* Could be called from idle.. */
if (cpu_local_var(current)->proc->enable_tofu) {
int i;
struct process_vm *vm = cpu_local_var(current)->vm;
struct tofu_stag_range *tsr, *next;
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
dkprintf("%s: WARNING: stray stag %d for TNI %d CQ %d?\n",
__func__, tsr->stag, tsr->ucq->tni, tsr->ucq->cqid);
}
}
kprintf("%s: STAG processing done\n", __func__);
}
ihk_mc_clear_kernel_range((void *)tg->linux_vmalloc_start,
(void *)tg->linux_vmalloc_end);
}

View File

@ -22,12 +22,18 @@ void panic(const char *msg)
arch_print_stack();
#ifndef ENABLE_FUGAKU_HACKS
/* do not assume anything after this is executed */
arch_cpu_stop();
while (1) {
cpu_halt();
}
#else
while (1) {
cpu_halt_panic();
}
#endif
}
extern void arch_show_interrupt_context(const void*);

View File

@ -23,7 +23,13 @@ extern int num_processors;
void cpu_enable_interrupt(void);
void cpu_disable_interrupt(void);
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void);
#endif
void cpu_halt(void);
#ifdef ENABLE_FUGAKU_HACKS
void cpu_halt_panic(void);
#endif
void cpu_safe_halt(void);
void cpu_restore_interrupt(unsigned long);
void cpu_pause(void);

View File

@ -227,9 +227,11 @@ int ihk_mc_get_memory_chunk(int id,
unsigned long *start,
unsigned long *end,
int *numa_id);
#ifdef ENABLE_TOFU
int ihk_mc_get_memory_chunk_dma_addr(int id,
int tni, int cqid,
uintptr_t *dma_addr);
#endif
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);

View File

@ -332,6 +332,23 @@ int deferred_zero_at_free = 1;
* of their corresponding memory (i.e., they are on the free memory chunk itself).
*/
#ifdef ENABLE_FUGAKU_HACKS
size_t __count_free_bytes(struct rb_root *root)
{
struct free_chunk *chunk;
struct rb_node *node;
size_t size = 0;
for (node = rb_first(root); node; node = rb_next(node)) {
chunk = container_of(node, struct free_chunk, node);
size += chunk->size;
}
return size;
}
#endif
/*
* Free pages.
* NOTE: locking must be managed by the caller.

View File

@ -0,0 +1,99 @@
#!/bin/bash
# IHK/McKernel user priviledge reboot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2019 RIKEN
#
prefix="@prefix@"
BINDIR="${prefix}/bin"
SBINDIR="${prefix}/sbin"
KERNDIR="@MCKERNELDIR@"
mem=""
cpus=""
ikc_map=""
while getopts c:m:r: OPT
do
case ${OPT} in
c) cpus=${OPTARG}
;;
m) mem=${OPTARG}
;;
r) ikc_map=${OPTARG}
;;
\?) exit 1
;;
esac
done
if [ "${ikc_map}" == "" ]; then
# Query IKC map
if ! ${SBINDIR}/ihkosctl 0 get ikc_map > /dev/null; then
echo "error: querying IKC map" >&2
exit 1
fi
ikc_map=`${SBINDIR}/ihkosctl 0 get ikc_map`
fi
# Shutdown OS
if ! ${SBINDIR}/ihkosctl 0 shutdown; then
echo "error: shuting down OS" >&2
exit 1
fi
sleep 2
# Query IHK-SMP resources and reassign
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
echo "error: querying cpus" >&2
exit 1
fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if [ "${cpus}" == "" ]; then
echo "error: querying CPUs" >&2
exit 1
fi
# Assign CPUs
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then
echo "error: assign CPUs" >&2
exit 1
fi
# Assign memory
for i in `seq 0 15`; do
if ! ${SBINDIR}/ihkosctl 0 assign mem all@${i}; then
echo "error: assign memory" >&2
exit 1
fi
done
if [ "${ikc_map}" != "" ]; then
# Set IKC map
if ! ${SBINDIR}/ihkosctl 0 set ikc_map ${ikc_map}; then
echo "error: setting IKC map" >&2
exit 1
fi
fi
# Load kernel image
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
echo "error: loading kernel image: ${KERNDIR}/mckernel.img" >&2
exit 1
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos dump_level=24"; then
echo "error: setting kernel arguments" >&2
exit 1
fi
# Boot OS instance
if ! ${SBINDIR}/ihkosctl 0 boot; then
echo "error: booting" >&2
exit 1
fi

View File

@ -65,8 +65,9 @@ umask_old=`umask`
idle_halt=""
allow_oversubscribe=""
time_sharing="time_sharing"
force_reserve="no"
while getopts stk:c:m:o:f:r:q:i:d:e:hOT: OPT
while getopts stk:c:m:o:f:r:q:i:d:e:hROT: OPT
do
case ${OPT} in
f) facility=${OPTARG}
@ -97,6 +98,8 @@ do
;;
O) allow_oversubscribe="allow_oversubscribe"
;;
R) force_reserve="yes"
;;
T)
case ${OPTARG} in
1) time_sharing="time_sharing"
@ -343,6 +346,17 @@ if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
fi
fi
if [ ${force_reserve} == "yes" ]; then
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
echo "error: reserving memory" >&2
error_exit "ihk_smp_loaded"
fi
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then
echo "error: reserving CPUs" >&2;
error_exit "mem_reserved"
fi
fi
# Load mcctrl if not loaded
if ! grep mcctrl /proc/modules &>/dev/null; then
if ! taskset -c 0 ${SUDO} insmod ${KMODDIR}/mcctrl.ko 2>/dev/null; then

View File

@ -18,9 +18,10 @@ KERNDIR="@KERNDIR@"
mem=""
cpus=""
kill_in_use=""
dont_unload="no"
RMMOD_PATH=/sbin/rmmod
while getopts r:k OPT
while getopts r:kR OPT
do
case ${OPT} in
r)
@ -29,6 +30,9 @@ do
k)
kill_in_use=1
;;
R)
dont_unload="yes"
;;
\?) exit 1
;;
esac
@ -115,6 +119,10 @@ if ! sudo ${SBINDIR}/ihkconfig 0 release mem "all" > /dev/null; then
exit 1
fi
if [ "${dont_unload}" == "yes" ]; then
exit 0
fi
# Remove delegator if loaded
if grep mcctrl /proc/modules &>/dev/null; then
if ! sudo ${RMMOD_PATH} mcctrl 2>/dev/null; then