Compare commits

..

31 Commits

Author SHA1 Message Date
f849745b60 spec: prerelease 0.5 for testing capped best-effort memory reservation
Change-Id: I139d6e24fbadb7313116029005e115053f31a899
2021-01-07 10:56:27 +09:00
78bc06d998 cmake: set default value of ENABLE_FUGAKU_DEBUG to OFF
Change-Id: I70703410922aa1d1440d61ead6e225d92cf60003
2021-01-07 10:42:36 +09:00
d726bd3d11 profile: fix definition of PROFILE_ENABLE and __NR_profile
Change-Id: I3f9f5870f8380d3668e1ccb06fd0f6d3307e3fa4
2021-01-06 01:03:17 +00:00
df37d6867f docs: add scheduling limitations
Change-Id: Ida4a16efa4d47f448da7417a3b4bdb5fb5304fcd
2021-01-06 09:58:38 +09:00
a4b5410d0c docs: add mlockall/munlockall limitations
Change-Id: I01d1c4eb6955baee89f6827748ac8ce4082884da
2021-01-04 12:57:32 +09:00
d73e6a161c spec: prerelease 0.4 for testing capped best-effort memory reservation
Change-Id: Iec35ea1b7fa6b8930153461c395675f1576042ba
2020-12-29 17:12:14 +09:00
67334b65c3 rus_vm_fault: vmf_insert_pfn: treat VM_FAULT_NOPAGE as success
vmf_insert_pfn is added with the following commit.
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396

Refer to the following page for the meaning of VM_FAULT_NOPAGE.
https://lwn.net/Articles/242237/

Change-Id: I2b0144a20a57c74e0e2e0d2fc24281852f49b717
2020-12-29 16:31:41 +09:00
fe3992a3a2 cmake: add switch to turn on/off Fugaku debug modifications
To prevent "TO RESET: send SIGSTOP instead of SIGV in PF" from making
some tests expecting SIGSEGV fail.

Change-Id: I8bb111cff59fe5b0b2bf6bc652dfd2fa308321ed
2020-12-29 16:31:41 +09:00
5d58100c20 cmake: add switch to turn on/off Fugaku hacks
Change-Id: I2a1ac906a19c4e45ee62acdbf0bc6f77f61974f8
2020-12-29 16:31:41 +09:00
1b106d825c Tofu: fix phys addr calculation for contiguous pages in MBPT/BCH update
Change-Id: I70def9d02bdd7e1e969dedfc277a20df6ed2dff8
2020-12-29 16:31:41 +09:00
a680395093 Tofu: kmalloc cache for stag range
Change-Id: Ib5ea12c7c8cdafa7b699308c4eeb6e9ab39905c7
2020-12-29 16:31:41 +09:00
fd5a1c4b0a TO RESET: send SIGSTOP instead of SIGV in PF
Change-Id: I5f7e07cb89f5f38b7c631d838f0eee0a2a98e246
2020-12-29 16:31:40 +09:00
b3b1883ad8 eclair: turn off gdb pagination by default
Change-Id: I7758d97b90705310bc57cb9b6da6f6af436ea7fb
2020-12-29 16:31:40 +09:00
7145c4d383 TO RESET: stack changes
Change-Id: I325420701dfa5e9eac294be086a9d1e7326d95bc
2020-12-29 16:31:40 +09:00
0b82c8942b Tofu: keep track of stags per memory range
Change-Id: I033beaeee3b141dab4485dd3a2a3848eaa84e54e
2020-12-29 16:31:40 +09:00
75694152f0 Tofu: match page sizes to MBPT and fault PTEs if not present
Change-Id: Ia7aa92005a9941d6399063fec9a0776e73fc88fe
2020-12-29 16:31:40 +09:00
1cf0bd5a78 TO RESET: add debug instruments, map Linux areas for tofu
Change-Id: I09880cad3b87182cb663d414041254817c254759
2020-12-29 16:31:39 +09:00
25943634e9 TO RESET: do_mmap: show debug message when profile is turned on
Change-Id: I18f498f3a8660114b5e038e74179df95a645d232
2020-12-29 16:31:39 +09:00
72f95f92f8 TO RESET: hugefileobj: show debug messages
Change-Id: I904c811c13a59c0db74052bc92f6661a3e1b5d34
2020-12-29 16:31:39 +09:00
ab1014863d TO RESET: page_fault_handler: send SIGSTOP instead of SIGSEGV for debug
Change-Id: Ie281dbf43280464c8f412c8444a6861e43f28beb
2020-12-29 16:31:39 +09:00
4cd7051c2d TO RESET: setup_rt_frame: show debug message
Change-Id: I07d4f2dbba9bdb72f8a2892e6b5bd429b8e0aeec
2020-12-29 16:31:39 +09:00
d5716d3c3a TO RESET: mcctrl_get_request_os_cpu and __mcctrl_os_read_write_cpu_register: show debug messages
Change-Id: Ic8430e3fd6a814b888192233b029c942500a2dc9
2020-12-29 16:31:39 +09:00
2a984a12fe TO RESET: unhandled_page_fault: show instruction address
Change-Id: I29a8d30d9b3e5cfbe5e16b1faaa253e794b8fc5b
2020-12-29 16:31:38 +09:00
3949ab65a8 TO RESET: Add kernel argument to toggle on-demand paging for hugetlbfs map
Change-Id: Id748e0a2afc4ea59142fedb652a15b4007c5dee4
2020-12-29 16:31:33 +09:00
ed923ac82f TO RESET: hugefileobj: pre-allocate on mmap
Set this change to "TO RESET" because one of the Fujitsu tests fails.

Change-Id: Iddc30e8452b3d39da4975079d0c6a035e4f3dbde
2020-12-25 11:34:14 +09:00
191e6f7499 TO RESET: preempt_enable: check if no_preempt isn't negative
Change-Id: I1cef2077c50f3b3020870505dd065d10617f440e
2020-12-25 11:34:14 +09:00
4f7fd90300 TO RESET: lock: check if runq lock is held with IRQs disabled
Change-Id: I9a79ceaf9e399ad3695ed8959ca10c587591751a
2020-12-25 11:34:09 +09:00
8f2c8791bf TO RESET: arm64: enable interrupt on panic
Change-Id: I1ceb321de324f307fc82366b162c72f64184247b
2020-12-24 17:18:37 +09:00
bbfb296c26 TO RESET: mcreboot, mcstop+release.sh: add functions
Change-Id: Ic3992dc4e16b7ade00e93edbd107c64a32068c02
2020-12-24 16:53:27 +09:00
10b17e230c TO RESET: physical memory: free memory consistency checker
Change-Id: I15aa59bb81be4d8f2acfe8d161c8255f70f9e7d3
2020-12-24 16:53:12 +09:00
b268c28e7e TO RESET: mmap: ignore MAP_HUGETLB
Change-Id: Ifd50f24de0747b06d71ebba441ae2ef451f66c4d
2020-12-24 16:51:51 +09:00
36 changed files with 1049 additions and 29 deletions

View File

@ -10,7 +10,7 @@ project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.1")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "0.3")
set(MCKERNEL_RELEASE "0.5")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
@ -64,6 +64,34 @@ if(ENABLE_TOFU)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_TOFU")
endif()
# when compiling on a compute-node
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)
else()
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" OFF)
endif()
if(ENABLE_FUGAKU_HACKS)
add_definitions(-DENABLE_FUGAKU_HACKS)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_HACKS")
endif()
# SIGSTOP instead of SIGSEGV, additional IHK Linux kmsg
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" OFF)
if(ENABLE_FUGAKU_DEBUG)
add_definitions(-DENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
option(PROFILE_ENABLE "System call profile" ON)
if(PROFILE_ENABLE)
add_definitions(-DPROFILE_ENABLE)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DPROFILE_ENABLE")
endif()
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
@ -267,6 +295,9 @@ message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_TOFU: ${ENABLE_TOFU}")
message("ENABLE_FUGAKU_HACKS: ${ENABLE_FUGAKU_HACKS}")
message("ENABLE_FUGAKU_DEBUG: ${ENABLE_FUGAKU_DEBUG}")
message("PROFILE_ENABLE: ${PROFILE_ENABLE}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")

View File

@ -730,6 +730,49 @@ static void show_context_stack(struct pt_regs *regs)
}
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void handle_IPI(unsigned int vector, struct pt_regs *regs)
{
struct ihk_mc_interrupt_handler *h;
@ -791,6 +834,19 @@ void cpu_safe_halt(void)
cpu_enable_interrupt();
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
extern void __cpu_do_idle(void);
cpu_enable_interrupt();
__cpu_do_idle();
}
#endif
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
@ -856,6 +912,21 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void)
{
unsigned long flags;
unsigned long masked = ICC_PMR_EL1_MASKED;
asm volatile(
"mrs_s %0, " __stringify(ICC_PMR_EL1)
: "=&r" (flags)
:
: "memory");
return (flags == masked);
}
#endif
#else /* defined(CONFIG_HAS_NMI) */
/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_enable */
@ -1377,6 +1448,14 @@ void arch_print_stack(void)
{
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;
return regs->pc;
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;

View File

@ -9,6 +9,9 @@
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/debug.h>
#endif
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -31,6 +34,10 @@ typedef struct {
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
#ifdef ENABLE_FUGAKU_HACKS
extern ihk_spinlock_t *get_this_cpu_runq_lock(void);
#endif
extern void preempt_enable(void);
extern void preempt_disable(void);
@ -98,6 +105,18 @@ static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
: "memory");
success = !tmp;
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (success) {
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
}
#endif
#endif
if (!success) {
preempt_enable();
}
@ -182,6 +201,14 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
#endif
#endif
}
#ifdef DEBUG_SPINLOCK

View File

@ -124,7 +124,7 @@ SYSCALL_HANDLED(271, process_vm_writev)
SYSCALL_HANDLED(281, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)

View File

@ -7,6 +7,9 @@
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/monitor.h>
#endif
#include <arch-timer.h>
#include <cls.h>
@ -313,14 +316,27 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
#ifdef ENABLE_FUGAKU_HACKS
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
++v->in_interrupt;
#endif
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
#ifndef ENABLE_FUGAKU_HACKS
handle_IPI(irqnr, regs);
#else
/* Once paniced, only allow CPU stop and NMI IRQs */
if (monitor->status != IHK_OS_MONITOR_PANIC ||
irqnr == INTRID_CPU_STOP ||
irqnr == INTRID_MULTI_NMI) {
handle_IPI(irqnr, regs);
}
#endif
}
irqnr = gic_read_iar();
}
@ -335,7 +351,12 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
#ifndef ENABLE_FUGAKU_HACKS
if (do_check) {
#else
--v->in_interrupt;
if (monitor->status != IHK_OS_MONITOR_PANIC && do_check) {
#endif
check_signal(0, regs, 0);
schedule();
}

View File

@ -1071,6 +1071,9 @@ static int setup_rt_frame(int usig, unsigned long rc, int to_restart,
if (k->sa.sa_flags & SA_RESTORER){
regs->regs[30] = (unsigned long)k->sa.sa_restorer;
#ifdef ENABLE_FUGAKU_HACKS
kprintf("%s: SA_RESTORER: 0x%lx\n", __func__, regs->regs[30]);
#endif
} else {
regs->regs[30] = (unsigned long)VDSO_SYMBOL(thread->vm->vdso_addr, sigtramp);
}
@ -1723,6 +1726,7 @@ SYSCALL_DECLARE(mmap)
/* check arguments */
pgsize = PAGE_SIZE;
#ifndef ENABLE_FUGAKU_HACKS
if (flags & MAP_HUGETLB) {
int hugeshift = flags & (0x3F << MAP_HUGE_SHIFT);
@ -1763,6 +1767,11 @@ SYSCALL_DECLARE(mmap)
goto out;
}
}
#else
if (flags & MAP_HUGETLB) {
flags &= ~(MAP_HUGETLB);
}
#endif
#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1))
addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR;

View File

@ -868,6 +868,49 @@ void show_context_stack(uintptr_t *rbp) {
return;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void interrupt_exit(struct x86_user_context *regs)
{
if (interrupt_from_user(regs)) {
@ -1137,6 +1180,17 @@ void cpu_halt(void)
asm volatile("hlt");
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
cpu_halt();
}
#endif
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@ -1521,6 +1575,16 @@ void arch_print_stack(void)
__print_stack(rbp, 0);
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
return regs->rip;
}
#endif
/*@
@ requires \valid(reg);
@ assigns \nothing;

View File

@ -451,4 +451,12 @@ extern unsigned long ap_trampoline;
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#ifdef ENABLE_FUGAKU_HACKS
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#endif /* !__ASSEMBLY__ */
#endif
#endif

View File

@ -53,5 +53,9 @@ struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);
#ifdef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#define KERNEL_STACK_SIZE LOCALS_SPAN
#endif
#endif

View File

@ -168,7 +168,7 @@ SYSCALL_HANDLED(311, process_vm_writev)
SYSCALL_HANDLED(322, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)

View File

@ -21,7 +21,9 @@
#include <registers.h>
#include <string.h>
#ifndef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#endif
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */

View File

@ -183,3 +183,9 @@ Limitations
26. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.
27. mlockall() is not supported and returns -EPERM.
28. munlockall() is not supported and returns zero.
29. scheduling behavior is not Linux compatible. For example, sometimes one of the two processes on the same CPU continues to run after yielding.

View File

@ -364,6 +364,15 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
// page table to translation_table.
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpt, PAGE_SIZE);
#ifdef ENABLE_FUGAKU_HACKS
if (!phys) {
pr_err("%s(): ERROR: VA: 0x%lx, rpt is NULL for PID %d\n",
__func__, rva, task_tgid_vnr(current));
error = -EFAULT;
goto out;
}
#endif
tbl = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
rpa = (unsigned long)tbl->tt_pa;

View File

@ -3582,7 +3582,11 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu)
*ret_cpu = ch->send.queue->read_cpu;
ret = 0;
#ifndef ENABLE_FUGAKU_HACKS
pr_info("%s: OS: %lx, CPU: %d\n",
#else
dprintk("%s: OS: %lx, CPU: %d\n",
#endif
__func__, (unsigned long)os, *ret_cpu);
out_put_ppd:
@ -3646,7 +3650,11 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
/* Notify caller (for future async implementation) */
atomic_set(&desc->sync, 1);
#ifndef ENABLE_FUGAKU_HACKS
dprintk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#else
printk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#endif
__FUNCTION__,
(op == MCCTRL_OS_CPU_READ_REGISTER ? "READ" : "WRITE"), cpu,
desc->addr_ext, desc->val);

View File

@ -235,7 +235,6 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)

View File

@ -692,14 +692,20 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
vma->vm_start, vma->vm_end, pgsize, pix);
}
}
else
else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
error = vmf_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
if (error == VM_FAULT_NOPAGE) {
dprintk("%s: vmf_insert_pfn returned %d\n",
__func__, error);
error = 0;
}
#else
error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
#endif
}
if (error) {
pr_err("%s: vm_insert_pfn returned %d\n",
__func__, error);

View File

@ -1179,7 +1179,7 @@ static int start_gdb(void) {
sprintf(buf, "target remote :%d", ntohs(sin.sin_port));
execlp("gdb", "eclair", "-q", "-ex", "set prompt (eclair) ",
"-ex", buf, opt.kernel_path, NULL);
"-ex", buf, opt.kernel_path, "-ex", "set pagination off", NULL);
perror("execlp");
return 3;
}

2
ihk

Submodule ihk updated: 30e8b79b7c...4bc641409b

View File

@ -58,16 +58,43 @@ struct cpu_local_var *get_cpu_local_var(int id)
return clv + id;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked);
#endif
void preempt_enable(void)
{
#ifndef ENABLE_FUGAKU_HACKS
if (cpu_local_var_initialized)
--cpu_local_var(no_preempt);
#else
if (cpu_local_var_initialized) {
--cpu_local_var(no_preempt);
if (cpu_local_var(no_preempt) < 0) {
//cpu_disable_interrupt();
__kprintf("%s: %d\n", __func__, cpu_local_var(no_preempt));
__kprintf("TID: %d, call stack from builtin frame (most recent first):\n",
cpu_local_var(current)->tid);
__show_context_stack(cpu_local_var(current), (uintptr_t)&preempt_enable,
(unsigned long)__builtin_frame_address(0), 1);
//arch_cpu_stop();
//cpu_halt();
#ifdef ENABLE_FUGAKU_HACKS
panic("panic: negative preemption??");
#endif
}
}
#endif
}
void preempt_disable(void)
{
if (cpu_local_var_initialized)
if (cpu_local_var_initialized) {
++cpu_local_var(no_preempt);
}
}
int add_backlog(int (*func)(void *arg), void *arg)
@ -120,3 +147,10 @@ void do_backlog(void)
}
}
}
#ifdef ENABLE_FUGAKU_HACKS
ihk_spinlock_t *get_this_cpu_runq_lock(void)
{
return &get_this_cpu_local_var()->runq_lock;
}
#endif

View File

@ -788,7 +788,11 @@ out_remote_pf:
syscall_channel_send(resp_channel, &pckt);
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#else
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#endif
ret = 0;
break;

View File

@ -85,7 +85,11 @@ static int hugefileobj_get_page(struct memobj *memobj, off_t off,
}
memset(obj->pages[pgind], 0, obj->pgsize);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#else
kprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#endif
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
@ -274,13 +278,51 @@ int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
obj->nr_pages = nr_pages;
obj->pages = pages;
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#else
kprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#endif
" for %d pages, pagesize: %lu\n",
__func__,
obj,
virt_addr,
nr_pages,
obj->pgsize);
#ifdef ENABLE_FUGAKU_HACKS
if (!hugetlbfs_on_demand) {
int pgind;
int npages;
#ifndef ENABLE_FUGAKU_HACKS
for (pgind = 0; pgind < obj->nr_pages; ++pgind) {
#else
/* Map in only the last 8 pages */
for (pgind = ((obj->nr_pages > 8) ? (obj->nr_pages - 8) : 0);
pgind < obj->nr_pages; ++pgind) {
#endif
if (obj->pages[pgind]) {
continue;
}
npages = obj->pgsize >> PAGE_SHIFT;
obj->pages[pgind] = ihk_mc_alloc_aligned_pages_user(npages,
obj->pgshift - PTL1_SHIFT,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, 0);
if (!obj->pages[pgind]) {
kprintf("%s: error: could not allocate page for off: %lu"
", page size: %lu\n", __func__, off, obj->pgsize);
continue;
}
memset(obj->pages[pgind], 0, obj->pgsize);
dkprintf("%s: obj: 0x%lx, pre-allocated page for off: %lu"
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
}
#endif
}
obj->memobj.size = len;

View File

@ -106,6 +106,9 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
#ifdef ENABLE_FUGAKU_HACKS
int in_page_fault;
#endif
int no_preempt;
int timer_enabled;
unsigned long nr_ctx_switches;

View File

@ -69,4 +69,7 @@ static inline int page_is_multi_mapped(struct page *page)
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
#ifdef ENABLE_FUGAKU_HACKS
extern int hugetlbfs_on_demand;
#endif
#endif

View File

@ -395,6 +395,9 @@ struct vm_range {
off_t objoff;
int pgshift; /* page size. 0 means THP */
int padding;
#ifdef ENABLE_TOFU
struct list_head tofu_stag_list;
#endif
void *private_data;
};
@ -764,6 +767,9 @@ struct thread {
};
#define VM_RANGE_CACHE_SIZE 4
#ifdef ENABLE_TOFU
#define TOFU_STAG_HASH_SIZE 4
#endif
struct process_vm {
struct address_space *address_space;
@ -796,6 +802,12 @@ struct process_vm {
struct vm_range *range_cache[VM_RANGE_CACHE_SIZE];
int range_cache_ind;
struct swapinfo *swapinfo;
#ifdef ENABLE_TOFU
/* Tofu STAG hash */
ihk_spinlock_t tofu_stag_lock;
struct list_head tofu_stag_hash[TOFU_STAG_HASH_SIZE];
#endif
};
static inline int has_cap_ipc_lock(struct thread *th)

View File

@ -1,9 +1,6 @@
#ifndef __PROCESS_PROFILE_H_
#define __PROCESS_PROFILE_H_
/* Uncomment this to enable profiling */
#define PROFILE_ENABLE
#ifdef PROFILE_ENABLE
#define PROFILE_SYSCALL_MAX 2000
#define PROFILE_OFFLOAD_MAX (PROFILE_SYSCALL_MAX << 1)
@ -58,8 +55,6 @@ enum profile_event_type {
PROFILE_EVENT_MAX /* Should be the last event type */
};
#define __NR_profile PROFILE_EVENT_MAX
#ifdef __KERNEL__
struct thread;
struct process;
@ -79,6 +74,8 @@ void profile_dealloc_proc_events(struct process *proc);
#include <unistd.h>
#include <sys/syscall.h>
#define __NR_profile PROFILE_EVENT_MAX
/* Per-thread */
static inline void mckernel_profile_thread_on(void)
{

View File

@ -20,6 +20,7 @@
#include <ihk/ikc.h>
#include <rlimit.h>
#include <time.h>
#include <profile.h>
#define NUM_SYSCALLS 255

View File

@ -63,6 +63,9 @@ extern int interrupt_from_user(void *);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
#ifdef ENABLE_FUGAKU_HACKS
int hugetlbfs_on_demand;
#endif
int sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
static struct ihk_mc_pa_ops *pa_ops;
@ -744,7 +747,11 @@ distance_based:
}
else {
if (i == 0)
#ifndef ENABLE_FUGAKU_HACKS
kprintf("%s: distance: CPU @ node %d failed to allocate "
#else
dkprintf("%s: distance: CPU @ node %d failed to allocate "
#endif
"%d pages from node %d\n",
__FUNCTION__,
ihk_mc_get_numa_id(),
@ -951,6 +958,9 @@ static void query_free_mem_interrupt_handler(void *priv)
}
kprintf("McKernel free pages in total: %d\n", pages);
#ifdef ENABLE_FUGAKU_HACKS
panic("PANIC");
#endif
if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void);
@ -1286,6 +1296,9 @@ void tlb_flush_handler(int vector)
}
#endif // PROFILE_ENABLE
}
#ifdef ENABLE_FUGAKU_HACKS
extern unsigned long arch_get_instruction_address(const void *reg);
#endif
static void unhandled_page_fault(struct thread *thread, void *fault_addr,
uint64_t reason, void *regs)
@ -1317,6 +1330,22 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
__kprintf("address is out of range!\n");
}
#ifdef ENABLE_FUGAKU_HACKS
{
unsigned long pc = arch_get_instruction_address(regs);
range = lookup_process_memory_range(vm, pc, pc + 1);
if (range) {
__kprintf("PC: 0x%lx (%lx in %s)\n",
pc,
(range->memobj && range->memobj->flags & MF_REG_FILE) ?
pc - range->start + range->objoff :
pc - range->start,
(range->memobj && range->memobj->path) ?
range->memobj->path : "(unknown)");
}
}
#endif
kprintf_unlock(irqflags);
/* TODO */
@ -1324,7 +1353,13 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
#ifndef ENABLE_FUGAKU_HACKS
panic("panic: kernel mode PF");
#else
kprintf("panic: kernel mode PF");
for (;;) cpu_pause();
//panic("panic: kernel mode PF");
#endif
}
//dkprintf("now dump a core file\n");
@ -1360,6 +1395,20 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
__FUNCTION__, fault_addr, reason, regs);
preempt_disable();
#ifdef ENABLE_FUGAKU_HACKS
++cpu_local_var(in_page_fault);
if (cpu_local_var(in_page_fault) > 1) {
kprintf("%s: PF in PF??\n", __func__);
cpu_disable_interrupt();
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
panic("panic: kernel mode PF in PF");
}
while (1) {
panic("PANIC");
}
}
#endif
cpu_enable_interrupt();
@ -1427,6 +1476,13 @@ out_linux:
reason, error);
unhandled_page_fault(thread, fault_addr, reason, regs);
preempt_enable();
#ifdef ENABLE_FUGAKU_DEBUG
kprintf("%s: sending SIGSTOP to TID: %d\n", __func__, thread->tid);
do_kill(thread, thread->proc->pid, thread->tid, SIGSTOP, NULL, 0);
goto out;
#endif
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
@ -1455,6 +1511,9 @@ out_linux:
out_ok:
#endif
error = 0;
#ifdef ENABLE_FUGAKU_HACKS
--cpu_local_var(in_page_fault);
#endif
preempt_enable();
out:
dkprintf("%s: addr: %p, reason: %lx, regs: %p -> error: %d\n",
@ -2041,6 +2100,13 @@ void mem_init(void)
anon_on_demand = 1;
}
#ifdef ENABLE_FUGAKU_HACKS
if (find_command_line("hugetlbfs_on_demand")) {
kprintf("Demand paging on hugetlbfs mappings enabled.\n");
hugetlbfs_on_demand = 1;
}
#endif
/* Init distance vectors */
numa_distances_init();
}

View File

@ -36,6 +36,9 @@
#include <rusage_private.h>
#include <ihk/monitor.h>
#include <ihk/debug.h>
#ifdef ENABLE_TOFU
#include <tofu/tofu_stag_range.h>
#endif
//#define DEBUG_PRINT_PROCESS
@ -269,6 +272,12 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
}
vm->range_cache_ind = 0;
#ifdef ENABLE_TOFU
ihk_mc_spinlock_init(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&vm->tofu_stag_hash[i]);
}
#endif
return 0;
}
@ -955,6 +964,11 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
newrange->pgshift = range->pgshift;
newrange->private_data = range->private_data;
#ifdef ENABLE_TOFU
/* TODO: figure out which entries to put on which list! */
INIT_LIST_HEAD(&newrange->tofu_stag_list);
#endif
if (range->memobj) {
memobj_ref(range->memobj);
newrange->memobj = range->memobj;
@ -1023,6 +1037,28 @@ int join_process_memory_range(struct process_vm *vm,
if (vm->range_cache[i] == merging)
vm->range_cache[i] = surviving;
}
#ifdef ENABLE_TOFU
/* Move Tofu stag range entries */
if (vm->proc->enable_tofu) {
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&merging->tofu_stag_list, list) {
list_del(&tsr->list);
list_add_tail(&tsr->list, &surviving->tofu_stag_list);
dkprintf("%s: stag: %d @ %p:%lu moved in VM range merge\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
#endif
kfree(merging);
error = 0;
@ -1137,6 +1173,24 @@ static int free_process_memory_range(struct process_vm *vm,
}
straight_out:
#ifdef ENABLE_TOFU
if (vm->proc->enable_tofu) {
int entries;
extern int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range);
entries = tofu_stag_range_remove_overlapping(vm, range);
if (entries > 0) {
kprintf("%s: removed %d Tofu stag entries for range 0x%lx:%lu\n",
__func__,
entries,
range->start,
range->end - range->start);
}
}
#endif
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) {
if (vm->range_cache[i] == range)
@ -1428,6 +1482,9 @@ int add_process_memory_range(struct process_vm *vm,
range->pgshift = pgshift;
range->private_data = NULL;
range->straight_start = 0;
#ifdef ENABLE_TOFU
INIT_LIST_HEAD(&range->tofu_stag_list);
#endif
rc = 0;
if (phys == NOPHYS) {
@ -2521,6 +2578,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
__FUNCTION__, size, minsz,
ap_flag ? "(IHK_MC_AP_USER)" : "");
#ifdef ENABLE_FUGAKU_HACKS
/*
* XXX: Fugaku: Fujitsu's runtime remaps the stack
* using hugetlbfs so don't bother allocating too much here..
*/
minsz = 8*1024*1024;
#endif
stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT,
USER_STACK_PAGE_P2ALIGN,
IHK_MC_AP_NOWAIT | ap_flag,

View File

@ -204,6 +204,14 @@ long do_syscall(struct syscall_request *req, int cpu)
++thread->in_syscall_offload;
}
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (req->number == __NR_write && req->args[0] == 1) {
return req->args[2];
}
#endif
#endif
/* The current thread is the requester */
req->rtid = cpu_local_var(current)->tid;
@ -1382,6 +1390,15 @@ void terminate(int rc, int sig)
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
vm = proc->vm;
#ifdef ENABLE_TOFU
if (proc->enable_tofu) {
extern void tof_utofu_finalize();
tof_utofu_finalize();
}
#endif
free_all_process_memory_range(vm);
if (proc->saved_cmdline) {
@ -1930,6 +1947,10 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
__FUNCTION__, proc->straight_va, range->pgshift);
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
#ifdef ENABLE_FUGAKU_HACKS
if (1) { // Un-safe mapping of covering physical range
#endif
error = ihk_mc_pt_set_range(proc->vm->address_space->page_table,
proc->vm,
(void *)range->start,
@ -1956,6 +1977,90 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
proc->straight_pa,
psize,
proc->straight_map_threshold);
#ifdef ENABLE_FUGAKU_HACKS
}
else { // Safe mapping of only LWK memory ranges
size_t max_pgsize = 0;
size_t min_pgsize = 0xFFFFFFFFFFFFFFFF;
/*
* Iterate LWK phsyical memory chunks and map them to their
* corresponding offset in the straight range using the largest
* suitable pages.
*/
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
unsigned long start, end, pa;
void *va, *va_end;
size_t pgsize;
int pg2align;
ihk_mc_get_memory_chunk(i, &start, &end, NULL);
va = proc->straight_va + (start - straight_pa_start);
va_end = va + (end - start);
pa = start;
while (va < va_end) {
pgsize = (va_end - va) + 1;
retry:
error = arch_get_smaller_page_size(NULL, pgsize,
&pgsize, &pg2align);
if (error) {
ekprintf("%s: arch_get_smaller_page_size() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
/* Are virtual or physical not page aligned for this size? */
if (((unsigned long)va & (pgsize - 1)) ||
(pa & (pgsize - 1))) {
goto retry;
}
error = ihk_mc_pt_set_range(
proc->vm->address_space->page_table,
proc->vm,
va,
va + pgsize,
pa,
ptattr,
pg2align + PAGE_SHIFT,
range,
0);
if (error) {
kprintf("%s: ihk_mc_pt_set_range() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
if (pgsize > max_pgsize)
max_pgsize = pgsize;
if (pgsize < min_pgsize)
min_pgsize = pgsize;
va += pgsize;
pa += pgsize;
}
}
region->map_end = (unsigned long)proc->straight_va +
proc->straight_len;
proc->straight_pa = straight_pa_start;
kprintf("%s: straight mapping: 0x%lx:%lu @ "
"min_pgsize: %lu, max_pgsize: %lu\n",
__FUNCTION__,
proc->straight_va,
proc->straight_len,
min_pgsize,
max_pgsize);
}
#endif
}
straight_out:
@ -2284,8 +2389,15 @@ straight_out:
range->straight_start =
(unsigned long)proc->straight_va +
(straight_phys - proc->straight_pa);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx\n",
__FUNCTION__, addr, len, range->straight_start);
__FUNCTION__, addr, len, range->straight_start);
#else
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx"
" (phys: 0x%lx)\n",
__FUNCTION__, addr, len, range->straight_start,
straight_phys);
#endif
if (!zero_at_free) {
memset((void *)phys_to_virt(straight_phys), 0, len);
}
@ -2385,11 +2497,20 @@ out:
if (memobj) {
memobj_unref(memobj);
}
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#else
if (cpu_local_var(current)->profile) {
kprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#endif
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
__FUNCTION__,
addr, len, addr0, len0, prot, flags,
fd, off0, error, addr);
#ifdef ENABLE_FUGAKU_HACKS
}
#endif
return !error ?
(range->straight_start ? range->straight_start : addr) :
@ -2426,6 +2547,11 @@ SYSCALL_DECLARE(munmap)
out:
dkprintf("[%d]sys_munmap(%lx,%lx): %d\n",
ihk_mc_get_processor_id(), addr, len0, error);
#ifdef ENABLE_FUGAKU_HACKS
if (error) {
kprintf("%s: error: %d\n", __func__, error);
}
#endif
return error;
}

View File

@ -24,6 +24,7 @@
struct kmalloc_cache_header tofu_scatterlist_cache[8];
struct kmalloc_cache_header tofu_mbpt_cache[8];
struct ihk_mc_page_cache_header tofu_mbpt_sg_pages_cache[8];
struct kmalloc_cache_header tofu_stag_range_cache[8];
typedef ihk_spinlock_t spinlock_t;
@ -43,6 +44,124 @@ typedef void (*tof_core_signal_handler)(int, int, uint64_t, uint64_t);
#include <tofu/tofu_generated-tof_utofu_bg.h>
#include <tofu/tofu_generated-tof_utofu_mbpt.h>
#include <tofu/tofu_stag_range.h>
/*
* Tofu STAG regions list keeps track of stags in a given VM range..
* Per-process tree is protected by process' vm_range_lock.
*/
int tof_utofu_stag_range_insert(struct process_vm *vm,
struct vm_range *range,
uintptr_t start, uintptr_t end,
struct tof_utofu_cq *ucq, int stag)
{
struct tofu_stag_range *tsr; // = kmalloc(sizeof(*tsr), IHK_MC_AP_NOWAIT);
tsr = kmalloc_cache_alloc(&tofu_stag_range_cache[ihk_mc_get_numa_id()],
sizeof(*tsr));
if (!tsr) {
kprintf("%s: error: allocating tofu_stag_range\n", __func__);
return -ENOMEM;
}
tsr->start = start;
tsr->end = end;
tsr->ucq = ucq;
tsr->stag = stag;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_add_tail(&tsr->list, &range->tofu_stag_list);
list_add_tail(&tsr->hash, &vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE]);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
return 0;
}
struct tofu_stag_range *tofu_stag_range_lookup_by_stag(struct process_vm *vm,
int stag)
{
struct tofu_stag_range *tsr;
struct tofu_stag_range *match = NULL;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry(tsr,
&vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE], hash) {
if (tsr->stag == stag) {
match = tsr;
break;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return match;
}
/* XXX: vm->tofu_stag_lock must be held */
void __tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
list_del(&tsr->list);
list_del(&tsr->hash);
//kfree(tsr);
kmalloc_cache_free(tsr);
}
void tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
__tofu_stag_range_remove(vm, tsr);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
static int tof_utofu_free_stag(struct tof_utofu_cq *ucq, int stag);
int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range)
{
struct tofu_stag_range *tsr, *next;
int entries = 0;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&range->tofu_stag_list, list) {
dkprintf("%s: stag: %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
linux_spin_lock(&tsr->ucq->trans.mru_lock);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock(&tsr->ucq->trans.mru_lock);
__tofu_stag_range_remove(vm, tsr);
++entries;
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return entries;
}
#define TOF_UTOFU_VERSION TOF_UAPI_VERSION
#define TOF_UTOFU_NUM_STAG_NTYPES 3
#define TOF_UTOFU_NUM_STAG_BITS(size) ((size) + 13)
@ -146,6 +265,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
else {
*_pgszbits = PTL2_SHIFT;
*_pgszbits = PTL1_CONT_SHIFT;
}
return 0;
}
@ -171,7 +291,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
}
#if 0
#if 1
/* Tofu only support 64kB and 2MB pages */
if (min_shift > PTL1_CONT_SHIFT)
min_shift = PTL1_CONT_SHIFT;
@ -647,7 +767,6 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
//struct page *page;
struct process *proc = cpu_local_var(current)->proc;
uintptr_t iova = 0, va;
int ret;
unsigned long phys = 0;
/* Special case for straight mapping */
@ -697,6 +816,8 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
}
for(va = start; va < end; va += pgsz, ix++){
size_t psize;
pte_t *ptep;
if (tof_utofu_mbpt_is_enabled(mbpt, ix)) {
/* this page is already mapped to mbpt */
@ -715,15 +836,18 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
// return -ENOMEM;
//}
ret = ihk_mc_pt_virt_to_phys(
cpu_local_var(current)->vm->address_space->page_table,
(void *)va, &phys);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)va, 0, NULL, &psize, NULL);
if (ret) {
raw_rc_output(ret);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, va);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
(va & (psize - 1));
//iova = tof_smmu_get_ipa_cq(ucq->tni, ucq->cqid,
// pfn_to_kaddr(page_to_pfn(page)), pgsz);
//if (iova == 0) {
@ -1012,6 +1136,7 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
size_t pgsz;
int ret = -ENOTSUPP;
unsigned long irqflags;
struct vm_range *range = NULL;
ucq = container_of(dev, struct tof_utofu_cq, common);
if(!ucq->common.enabled){
@ -1033,7 +1158,46 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
}
readonly = (req.flags & 1) != 0;
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
/* Assume smallest page size at first */
start = round_down((uintptr_t)req.va, PAGE_SIZE);
end = round_up((uintptr_t)req.va + req.len, PAGE_SIZE);
/* Find range, straight mapping special lookup */
if (vm->proc->straight_va &&
start >= (unsigned long)vm->proc->straight_va &&
end <= ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len) &&
!(start == (unsigned long)vm->proc->straight_va &&
end == ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len))) {
struct vm_range *range_iter;
range_iter = lookup_process_memory_range(vm, 0, -1);
while (range_iter) {
if (range_iter->straight_start &&
start >= range_iter->straight_start &&
start < (range_iter->straight_start +
(range_iter->end - range_iter->start))) {
range = range_iter;
break;
}
range_iter = next_process_memory_range(vm, range_iter);
}
}
else {
range = lookup_process_memory_range(vm, start, end);
}
if (!range) {
ret = -EINVAL;
goto unlock_out;
}
pgszbits = PAGE_SHIFT;
if (req.flags & TOF_UTOFU_ALLOC_STAG_LPG) {
ret = tof_utofu_get_pagesize_locked((uintptr_t)req.va,
@ -1109,6 +1273,12 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
//up(&ucq->ucq_sem);
ihk_mc_spinlock_unlock_noirq(&tofu_tni_cq_lock[ucq->tni][ucq->cqid]);
if (ret == 0) {
tof_utofu_stag_range_insert(vm, range, start, end, ucq, req.stag);
}
unlock_out:
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
if(ret == 0){
@ -1332,6 +1502,21 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
ret = tof_utofu_free_stag(ucq, stags[i]);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
{
struct tofu_stag_range *tsr;
tsr = tofu_stag_range_lookup_by_stag(
cpu_local_var(current)->vm, stags[i]);
if (tsr) {
tofu_stag_range_remove(cpu_local_var(current)->vm, tsr);
}
else {
kprintf("%s: no stag range object for %d??\n", __func__, stags[i]);
}
}
if(ret == 0){
stags[i] = -1;
}
@ -1374,9 +1559,11 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
void tof_utofu_release_cq(void *pde_data)
{
struct tof_utofu_cq *ucq;
int stag;
//int stag;
struct tof_utofu_device *dev;
unsigned long irqflags;
struct process_vm *vm = cpu_local_var(current)->vm;
int do_free = 1;
dev = (struct tof_utofu_device *)pde_data;
ucq = container_of(dev, struct tof_utofu_cq, common);
@ -1384,13 +1571,43 @@ void tof_utofu_release_cq(void *pde_data)
if (!ucq->common.enabled) {
kprintf("%s: UCQ TNI %d, CQ %d is disabled\n",
__func__, ucq->tni, ucq->cqid);
return;
do_free = 0;
}
#if 0
for (stag = 0; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag++) {
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(ucq, stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
#endif
{
int i;
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
if (tsr->ucq != ucq)
continue;
if (do_free) {
dkprintf("%s: removing stag %d for TNI %d CQ %d\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
}
else {
kprintf("%s: WARNING: could not free stag %d for TNI %d CQ %d (UCQ is disabled)\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
}
__tofu_stag_range_remove(vm, tsr);
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
dkprintf("%s: UCQ (pde: %p) TNI %d, CQ %d\n",
@ -1829,14 +2046,22 @@ static int tof_utofu_ioctl_enable_bch(struct tof_utofu_device *dev, unsigned lon
}
if (!phys) {
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
(void *)req.addr, &phys);
size_t psize;
pte_t *ptep;
if (ret) {
raw_rc_output(ret);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)req.addr, 0, NULL, &psize, NULL);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, req.addr);
raw_rc_output(-ENOMEM);
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
((uint64_t)req.addr & (psize - 1));
}
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
@ -2068,6 +2293,7 @@ void tof_utofu_init_globals(void)
memset(tofu_scatterlist_cache, 0, sizeof(tofu_scatterlist_cache));
memset(tofu_mbpt_cache, 0, sizeof(tofu_mbpt_cache));
memset(tofu_mbpt_sg_pages_cache, 0, sizeof(tofu_mbpt_sg_pages_cache));
memset(tofu_stag_range_cache, 0, sizeof(tofu_stag_range_cache));
{
int tni, cq;
@ -2120,6 +2346,24 @@ void tof_utofu_finalize(void)
{
struct tofu_globals *tg = ihk_mc_get_tofu_globals();
/* Could be called from idle.. */
if (cpu_local_var(current)->proc->enable_tofu) {
int i;
struct process_vm *vm = cpu_local_var(current)->vm;
struct tofu_stag_range *tsr, *next;
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
dkprintf("%s: WARNING: stray stag %d for TNI %d CQ %d?\n",
__func__, tsr->stag, tsr->ucq->tni, tsr->ucq->cqid);
}
}
kprintf("%s: STAG processing done\n", __func__);
}
ihk_mc_clear_kernel_range((void *)tg->linux_vmalloc_start,
(void *)tg->linux_vmalloc_end);
}

View File

@ -22,12 +22,18 @@ void panic(const char *msg)
arch_print_stack();
#ifndef ENABLE_FUGAKU_HACKS
/* do not assume anything after this is executed */
arch_cpu_stop();
while (1) {
cpu_halt();
}
#else
while (1) {
cpu_halt_panic();
}
#endif
}
extern void arch_show_interrupt_context(const void*);

View File

@ -23,7 +23,13 @@ extern int num_processors;
void cpu_enable_interrupt(void);
void cpu_disable_interrupt(void);
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void);
#endif
void cpu_halt(void);
#ifdef ENABLE_FUGAKU_HACKS
void cpu_halt_panic(void);
#endif
void cpu_safe_halt(void);
void cpu_restore_interrupt(unsigned long);
void cpu_pause(void);

View File

@ -332,6 +332,23 @@ int deferred_zero_at_free = 1;
* of their corresponding memory (i.e., they are on the free memory chunk itself).
*/
#ifdef ENABLE_FUGAKU_HACKS
size_t __count_free_bytes(struct rb_root *root)
{
struct free_chunk *chunk;
struct rb_node *node;
size_t size = 0;
for (node = rb_first(root); node; node = rb_next(node)) {
chunk = container_of(node, struct free_chunk, node);
size += chunk->size;
}
return size;
}
#endif
/*
* Free pages.
* NOTE: locking must be managed by the caller.

View File

@ -0,0 +1,99 @@
#!/bin/bash
# IHK/McKernel user priviledge reboot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2019 RIKEN
#
prefix="@prefix@"
BINDIR="${prefix}/bin"
SBINDIR="${prefix}/sbin"
KERNDIR="@MCKERNELDIR@"
mem=""
cpus=""
ikc_map=""
while getopts c:m:r: OPT
do
case ${OPT} in
c) cpus=${OPTARG}
;;
m) mem=${OPTARG}
;;
r) ikc_map=${OPTARG}
;;
\?) exit 1
;;
esac
done
if [ "${ikc_map}" == "" ]; then
# Query IKC map
if ! ${SBINDIR}/ihkosctl 0 get ikc_map > /dev/null; then
echo "error: querying IKC map" >&2
exit 1
fi
ikc_map=`${SBINDIR}/ihkosctl 0 get ikc_map`
fi
# Shutdown OS
if ! ${SBINDIR}/ihkosctl 0 shutdown; then
echo "error: shuting down OS" >&2
exit 1
fi
sleep 2
# Query IHK-SMP resources and reassign
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
echo "error: querying cpus" >&2
exit 1
fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if [ "${cpus}" == "" ]; then
echo "error: querying CPUs" >&2
exit 1
fi
# Assign CPUs
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then
echo "error: assign CPUs" >&2
exit 1
fi
# Assign memory
for i in `seq 0 15`; do
if ! ${SBINDIR}/ihkosctl 0 assign mem all@${i}; then
echo "error: assign memory" >&2
exit 1
fi
done
if [ "${ikc_map}" != "" ]; then
# Set IKC map
if ! ${SBINDIR}/ihkosctl 0 set ikc_map ${ikc_map}; then
echo "error: setting IKC map" >&2
exit 1
fi
fi
# Load kernel image
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
echo "error: loading kernel image: ${KERNDIR}/mckernel.img" >&2
exit 1
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos dump_level=24"; then
echo "error: setting kernel arguments" >&2
exit 1
fi
# Boot OS instance
if ! ${SBINDIR}/ihkosctl 0 boot; then
echo "error: booting" >&2
exit 1
fi

View File

@ -65,8 +65,9 @@ umask_old=`umask`
idle_halt=""
allow_oversubscribe=""
time_sharing="time_sharing"
force_reserve="no"
while getopts stk:c:m:o:f:r:q:i:d:e:hOT: OPT
while getopts stk:c:m:o:f:r:q:i:d:e:hROT: OPT
do
case ${OPT} in
f) facility=${OPTARG}
@ -97,6 +98,8 @@ do
;;
O) allow_oversubscribe="allow_oversubscribe"
;;
R) force_reserve="yes"
;;
T)
case ${OPTARG} in
1) time_sharing="time_sharing"
@ -343,6 +346,17 @@ if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
fi
fi
if [ ${force_reserve} == "yes" ]; then
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
echo "error: reserving memory" >&2
error_exit "ihk_smp_loaded"
fi
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then
echo "error: reserving CPUs" >&2;
error_exit "mem_reserved"
fi
fi
# Load mcctrl if not loaded
if ! grep mcctrl /proc/modules &>/dev/null; then
if ! taskset -c 0 ${SUDO} insmod ${KMODDIR}/mcctrl.ko 2>/dev/null; then

View File

@ -18,9 +18,10 @@ KERNDIR="@KERNDIR@"
mem=""
cpus=""
kill_in_use=""
dont_unload="no"
RMMOD_PATH=/sbin/rmmod
while getopts r:k OPT
while getopts r:kR OPT
do
case ${OPT} in
r)
@ -29,6 +30,9 @@ do
k)
kill_in_use=1
;;
R)
dont_unload="yes"
;;
\?) exit 1
;;
esac
@ -115,6 +119,10 @@ if ! sudo ${SBINDIR}/ihkconfig 0 release mem "all" > /dev/null; then
exit 1
fi
if [ "${dont_unload}" == "yes" ]; then
exit 0
fi
# Remove delegator if loaded
if grep mcctrl /proc/modules &>/dev/null; then
if ! sudo ${RMMOD_PATH} mcctrl 2>/dev/null; then