Compare commits
61 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0ecf31d896 | |||
| 08a625cc0d | |||
| 12840601e1 | |||
| 2ae6883a8b | |||
| d5629606c5 | |||
| 285059e504 | |||
| 5b6d0a887c | |||
| 3573b8649e | |||
| d7523cdd84 | |||
| 5753db5846 | |||
| 2d7cb0af89 | |||
| 1cb9b435a9 | |||
| 43ecf06e83 | |||
| 51982de36b | |||
| 0a22320a3c | |||
| 8813e890c5 | |||
| e664ffba18 | |||
| 3bd0137c25 | |||
| 4f2b4aa402 | |||
| 682cd34b74 | |||
| 2bc4d06a48 | |||
| 4f2c1e07c1 | |||
| 77bb3038d3 | |||
| 931448a94d | |||
| c51bbbabc6 | |||
| 2ddc52e1a4 | |||
| 3c93958c48 | |||
| 9763c40f64 | |||
| 3bf77446cc | |||
| c3dfb1663d | |||
| 217dd9c1e5 | |||
| d4cd756a91 | |||
| b894619d1b | |||
| b962da700b | |||
| 196379854b | |||
| d213efac79 | |||
| 38910fe13d | |||
| 4d4279121b | |||
| 99da5b6484 | |||
| 6b60dee890 | |||
| dd08a3151e | |||
| e1442bf12b | |||
| 86f297ddc4 | |||
| 823b222af9 | |||
| 9c25eb8ef2 | |||
| 665eead78b | |||
| f8ef43c77d | |||
| 8f4afe410f | |||
| da9bb421cc | |||
| 1e89796d3e | |||
| a1a2900606 | |||
| 79b977ac06 | |||
| 37e3118df6 | |||
| be4d84c0c1 | |||
| c43c1b640a | |||
| e294db7e53 | |||
| df3f388e09 | |||
| a2fbe99b60 | |||
| 9c847c0a8f | |||
| 58c1fd4512 | |||
| dae9a5ff13 |
70
Makefile.in
70
Makefile.in
@ -4,13 +4,21 @@ INCDIR = @INCDIR@
|
||||
ETCDIR = @ETCDIR@
|
||||
MANDIR = @MANDIR@
|
||||
|
||||
all::
|
||||
@(cd executer/kernel/mcctrl; make modules)
|
||||
@(cd executer/kernel/mcoverlayfs; make modules)
|
||||
@(cd executer/user; make)
|
||||
@case "$(TARGET)" in \
|
||||
all: executer-mcctrl executer-mcoverlayfs executer-user mckernel
|
||||
|
||||
executer-mcctrl:
|
||||
+@(cd executer/kernel/mcctrl; $(MAKE) modules)
|
||||
|
||||
executer-mcoverlayfs:
|
||||
+@(cd executer/kernel/mcoverlayfs; $(MAKE) modules)
|
||||
|
||||
executer-user:
|
||||
+@(cd executer/user; $(MAKE))
|
||||
|
||||
mckernel:
|
||||
+@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
|
||||
(cd kernel; make) \
|
||||
(cd kernel; $(MAKE)) \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
@ -18,13 +26,13 @@ all::
|
||||
;; \
|
||||
esac
|
||||
|
||||
install::
|
||||
@(cd executer/kernel/mcctrl; make install)
|
||||
@(cd executer/kernel/mcoverlayfs; make install)
|
||||
@(cd executer/user; make install)
|
||||
install:
|
||||
@(cd executer/kernel/mcctrl; $(MAKE) install)
|
||||
@(cd executer/kernel/mcoverlayfs; $(MAKE) install)
|
||||
@(cd executer/user; $(MAKE) install)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
|
||||
(cd kernel; make install) \
|
||||
(cd kernel; $(MAKE) install) \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
@ -32,34 +40,20 @@ install::
|
||||
;; \
|
||||
esac
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
builtin-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
smp-x86 | smp-arm64) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
install -m 600 arch/x86/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
|
||||
install -m 600 arch/x86/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
|
||||
install -m 755 arch/x86/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
|
||||
install -m 755 arch/x86_64/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
|
||||
install -m 755 arch/x86_64/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
|
||||
mkdir -p -m 755 $(ETCDIR); \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||
install -m 644 arch/x86_64/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||
install -m 644 arch/x86_64/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||
mkdir -p -m 755 $(INCDIR); \
|
||||
install -m 644 kernel/include/swapfmt.h $(INCDIR); \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
install -m 644 arch/x86_64/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
@ -67,13 +61,13 @@ install::
|
||||
;; \
|
||||
esac
|
||||
|
||||
clean::
|
||||
@(cd executer/kernel/mcctrl; make clean)
|
||||
@(cd executer/kernel/mcoverlayfs; make clean)
|
||||
@(cd executer/user; make clean)
|
||||
clean:
|
||||
@(cd executer/kernel/mcctrl; $(MAKE) clean)
|
||||
@(cd executer/kernel/mcoverlayfs; $(MAKE) clean)
|
||||
@(cd executer/user; $(MAKE) clean)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
|
||||
(cd kernel; make clean) \
|
||||
(cd kernel; $(MAKE) clean) \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
|
||||
@ -590,6 +590,8 @@ static void show_context_stack(struct pt_regs *regs)
|
||||
return;
|
||||
}
|
||||
|
||||
ihk_mc_debug_show_interrupt_context(regs);
|
||||
|
||||
sp = (uintptr_t)regs + sizeof(*regs);
|
||||
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
|
||||
max_loop = (stack_top - sp) / min_stack_frame_size;
|
||||
@ -1205,6 +1207,10 @@ void ihk_mc_delay_us(int us)
|
||||
arch_delay(us);
|
||||
}
|
||||
|
||||
void arch_print_stack()
|
||||
{
|
||||
}
|
||||
|
||||
void arch_show_interrupt_context(const void *reg)
|
||||
{
|
||||
const struct pt_regs *regs = (struct pt_regs *)reg;
|
||||
@ -1428,6 +1434,13 @@ save_fp_regs(struct thread *thread)
|
||||
}
|
||||
}
|
||||
|
||||
void copy_fp_regs(struct thread *from, struct thread *to)
|
||||
{
|
||||
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
|
||||
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
clear_fp_regs(struct thread *thread)
|
||||
{
|
||||
@ -1499,7 +1512,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = 0;
|
||||
|
||||
@ -1513,17 +1525,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
(error & PF_RSVD ? "was" : "wasn't"),
|
||||
(error & PF_INSTR ? "was" : "wasn't"));
|
||||
|
||||
found = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= address && range->end > address) {
|
||||
found = 1;
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
range = lookup_process_memory_range(vm, address, address+1);
|
||||
if (range) {
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
} else {
|
||||
__kprintf("address is out of range! \n");
|
||||
}
|
||||
|
||||
|
||||
@ -221,7 +221,7 @@ int gencore(struct thread *thread, void *regs,
|
||||
Elf64_Ehdr eh;
|
||||
Elf64_Phdr *ph = NULL;
|
||||
void *note = NULL;
|
||||
struct vm_range *range;
|
||||
struct vm_range *range, *next;
|
||||
struct process_vm *vm = thread->vm;
|
||||
int segs = 1; /* the first one is for NOTE */
|
||||
int notesize, phsize, alignednotesize;
|
||||
@ -235,7 +235,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
return -1;
|
||||
}
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
|
||||
range->start, range->end, range->flag, range->objoff);
|
||||
/* We omit reserved areas because they are only for
|
||||
@ -323,7 +326,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
|
||||
/* program header for each memory chunk */
|
||||
i = 1;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long flag = range->flag;
|
||||
unsigned long size = range->end - range->start;
|
||||
|
||||
@ -364,7 +370,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
|
||||
|
||||
i = 3; /* memory segments */
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long phys;
|
||||
|
||||
if (range->flag & VR_RESERVED)
|
||||
|
||||
@ -134,4 +134,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
|
||||
*dest = *(volatile uint32_t *)from;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !__HEADER_ARM64_COMMON_ARCH_FUTEX_H */
|
||||
|
||||
@ -215,7 +215,7 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
|
||||
#define PAGE_P2ALIGN 0
|
||||
#define page_offset(addr) __page_offset(addr, PAGE_SIZE)
|
||||
#define page_align(addr) __page_align(addr, PAGE_SIZE)
|
||||
#define page_align_up(addr) __page_align_up((addr, PAGE_SIZE)
|
||||
#define page_align_up(addr) __page_align_up(addr, PAGE_SIZE)
|
||||
|
||||
/*
|
||||
* large page
|
||||
@ -263,6 +263,8 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
|
||||
|
||||
#define PTE_FILEOFF PTE_SPECIAL
|
||||
|
||||
#define PT_ENTRIES (PAGE_SIZE >> 3)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <ihk/types.h>
|
||||
|
||||
@ -25,6 +25,8 @@
|
||||
#define smp_rmb() dmb(ishld)
|
||||
#define smp_wmb() dmb(ishst)
|
||||
|
||||
#define arch_barrier() smp_mb()
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
|
||||
34
arch/arm64/kernel/include/arch_rusage.h
Normal file
34
arch/arm64/kernel/include/arch_rusage.h
Normal file
@ -0,0 +1,34 @@
|
||||
#ifndef ARCH_RUSAGE_H_INCLUDED
|
||||
#define ARCH_RUSAGE_H_INCLUDED
|
||||
|
||||
#include <arch-memory.h>
|
||||
|
||||
//#define DEBUG_RUSAGE
|
||||
|
||||
extern struct rusage_global *rusage;
|
||||
|
||||
#define IHK_OS_PGSIZE_4KB 0
|
||||
#define IHK_OS_PGSIZE_16KB 1
|
||||
#define IHK_OS_PGSIZE_64KB 2
|
||||
|
||||
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
|
||||
{
|
||||
int ret = IHK_OS_PGSIZE_4KB;
|
||||
switch (pgsize) {
|
||||
case __PTL1_SIZE:
|
||||
ret = IHK_OS_PGSIZE_4KB;
|
||||
break;
|
||||
case __PTL2_SIZE:
|
||||
ret = IHK_OS_PGSIZE_16KB;
|
||||
break;
|
||||
case __PTL3_SIZE:
|
||||
ret = IHK_OS_PGSIZE_64KB;
|
||||
break;
|
||||
default:
|
||||
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */
|
||||
@ -6,6 +6,8 @@
|
||||
|
||||
#if defined(CONFIG_HAS_NMI)
|
||||
#include <arm-gic-v3.h>
|
||||
#else /* defined(CONFIG_HAS_NMI) */
|
||||
#include <sysreg.h>
|
||||
#endif /* defined(CONFIG_HAS_NMI) */
|
||||
|
||||
#if defined(CONFIG_HAS_NMI)
|
||||
|
||||
@ -35,6 +35,8 @@
|
||||
#define MIDR_IMPLEMENTOR(midr) \
|
||||
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
|
||||
|
||||
#define ARM_CPU_IMP_CAVIUM 0x43
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
static unsigned int read_cpuid_id(void)
|
||||
|
||||
@ -73,6 +73,7 @@ static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->regs[5]
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->regs[0]
|
||||
#define ihk_mc_syscall_number(uc) (uc)->regs[8]
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->pc
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->sp
|
||||
|
||||
@ -10,6 +10,8 @@
|
||||
|
||||
//#define DEBUG_GICV3
|
||||
|
||||
#define USE_CAVIUM_THUNDER_X
|
||||
|
||||
#ifdef DEBUG_GICV3
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
@ -18,6 +20,10 @@
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifdef USE_CAVIUM_THUNDER_X
|
||||
static char is_cavium_thunderx = 0;
|
||||
#endif
|
||||
|
||||
void *dist_base;
|
||||
void *rdist_base[NR_CPUS];
|
||||
|
||||
@ -108,8 +114,8 @@ static uint64_t gic_read_iar_cavium_thunderx(void)
|
||||
asm volatile("nop;nop;nop;nop;");
|
||||
asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
|
||||
asm volatile("nop;nop;nop;nop;");
|
||||
mb();
|
||||
#endif /* CONFIG_HAS_NMI */
|
||||
mb();
|
||||
|
||||
return irqstat;
|
||||
}
|
||||
@ -118,7 +124,7 @@ static uint64_t gic_read_iar_cavium_thunderx(void)
|
||||
static uint64_t gic_read_iar(void)
|
||||
{
|
||||
#ifdef USE_CAVIUM_THUNDER_X
|
||||
if (static_key_false(&is_cavium_thunderx))
|
||||
if (is_cavium_thunderx)
|
||||
return gic_read_iar_cavium_thunderx();
|
||||
else
|
||||
#endif
|
||||
@ -266,6 +272,7 @@ void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
|
||||
{
|
||||
dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid);
|
||||
|
||||
barrier();
|
||||
if(vector < 16){
|
||||
// send SGI
|
||||
arm64_raise_sgi_gicv3(cpuid, vector);
|
||||
@ -304,7 +311,9 @@ void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
|
||||
|
||||
#ifdef USE_CAVIUM_THUNDER_X
|
||||
/* Cavium ThunderX erratum 23154 */
|
||||
gicv3_check_capabilities();
|
||||
if (MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_CAVIUM) {
|
||||
is_cavium_thunderx = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -399,6 +408,7 @@ void gic_enable_gicv3(void)
|
||||
/* Set specific IPI to NMI */
|
||||
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
|
||||
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP);
|
||||
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
|
||||
|
||||
/* sync wait */
|
||||
gic_do_wait_for_rwp(rbase);
|
||||
|
||||
@ -20,10 +20,11 @@ size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
|
||||
void init_processors_local(int max_id)
|
||||
{
|
||||
int i = 0;
|
||||
const int sz = (max_id + 1) * KERNEL_STACK_SIZE;
|
||||
union arm64_cpu_local_variables *tmp;
|
||||
|
||||
/* allocate one more for alignment */
|
||||
locals = ihk_mc_alloc_pages((max_id + 1) * (KERNEL_STACK_SIZE / PAGE_SIZE), IHK_MC_AP_CRITICAL);
|
||||
locals = ihk_mc_alloc_pages(((sz + PAGE_SIZE - 1) / PAGE_SIZE), IHK_MC_AP_CRITICAL);
|
||||
locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE);
|
||||
|
||||
/* clear struct process, struct process_vm, struct thread_info area */
|
||||
|
||||
@ -1760,6 +1760,12 @@ int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift,
|
||||
return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args);
|
||||
}
|
||||
|
||||
int visit_pte_range_safe(page_table_t pt, void *start0, void *end0, int pgshift,
|
||||
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct clear_range_args {
|
||||
int free_physical;
|
||||
struct memobj *memobj;
|
||||
|
||||
@ -965,7 +965,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
|
||||
proc->status = PS_TRACED;
|
||||
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
|
||||
thread->status = PS_TRACED;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL_MASK;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL;
|
||||
if (sig == SIGSTOP || sig == SIGTSTP ||
|
||||
sig == SIGTTIN || sig == SIGTTOU) {
|
||||
proc->signal_flags |= SIGNAL_STOP_STOPPED;
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
#include <lwk/compiler.h>
|
||||
#include <hwcap.h>
|
||||
#include <prctl.h>
|
||||
#include <limits.h>
|
||||
|
||||
extern void ptrace_report_signal(struct thread *thread, int sig);
|
||||
extern void clear_single_step(struct thread *thread);
|
||||
@ -1866,4 +1867,634 @@ save_uctx(void *uctx, struct pt_regs *regs)
|
||||
/* TODO: skeleton for UTI */
|
||||
}
|
||||
|
||||
int do_process_vm_read_writev(int pid,
|
||||
const struct iovec *local_iov,
|
||||
unsigned long liovcnt,
|
||||
const struct iovec *remote_iov,
|
||||
unsigned long riovcnt,
|
||||
unsigned long flags,
|
||||
int op)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
int li, ri;
|
||||
int pli, pri;
|
||||
off_t loff, roff;
|
||||
size_t llen = 0, rlen = 0;
|
||||
size_t copied = 0;
|
||||
size_t to_copy;
|
||||
struct thread *lthread = cpu_local_var(current);
|
||||
struct process *rproc;
|
||||
struct process *lproc = lthread->proc;
|
||||
struct process_vm *rvm = NULL;
|
||||
unsigned long rphys;
|
||||
unsigned long rpage_left;
|
||||
unsigned long psize;
|
||||
void *rva;
|
||||
struct vm_range *range;
|
||||
struct mcs_rwlock_node_irqsave lock;
|
||||
struct mcs_rwlock_node update_lock;
|
||||
|
||||
/* Sanity checks */
|
||||
if (flags) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (liovcnt > IOV_MAX || riovcnt > IOV_MAX) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check if parameters are okay */
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov,
|
||||
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)remote_iov,
|
||||
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
arg_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (li = 0; li < liovcnt; ++li) {
|
||||
llen += local_iov[li].iov_len;
|
||||
dkprintf("local_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
li, local_iov[li].iov_base, local_iov[li].iov_len);
|
||||
}
|
||||
|
||||
for (ri = 0; ri < riovcnt; ++ri) {
|
||||
rlen += remote_iov[ri].iov_len;
|
||||
dkprintf("remote_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
ri, remote_iov[ri].iov_base, remote_iov[ri].iov_len);
|
||||
}
|
||||
|
||||
if (llen != rlen) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Find remote process */
|
||||
rproc = find_process(pid, &lock);
|
||||
if (!rproc) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mcs_rwlock_reader_lock_noirq(&rproc->update_lock, &update_lock);
|
||||
if(rproc->status == PS_EXITED ||
|
||||
rproc->status == PS_ZOMBIE){
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
rvm = rproc->vm;
|
||||
hold_process_vm(rvm);
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
|
||||
if (lproc->euid != 0 &&
|
||||
(lproc->ruid != rproc->ruid ||
|
||||
lproc->ruid != rproc->euid ||
|
||||
lproc->ruid != rproc->suid ||
|
||||
lproc->rgid != rproc->rgid ||
|
||||
lproc->rgid != rproc->egid ||
|
||||
lproc->rgid != rproc->sgid)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("pid %d found, doing %s: liovcnt: %d, riovcnt: %d \n", pid,
|
||||
(op == PROCESS_VM_READ) ? "PROCESS_VM_READ" : "PROCESS_VM_WRITE",
|
||||
liovcnt, riovcnt);
|
||||
|
||||
pli = pri = -1; /* Previous indeces in iovecs */
|
||||
li = ri = 0; /* Current indeces in iovecs */
|
||||
loff = roff = 0; /* Offsets in current iovec */
|
||||
|
||||
/* Now iterate and do the copy */
|
||||
while (copied < llen) {
|
||||
int faulted = 0;
|
||||
|
||||
/* New local vector? */
|
||||
if (pli != li) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + 1));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_WRITE : VR_PROT_READ))) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pli_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pli = li;
|
||||
}
|
||||
|
||||
/* New remote vector? */
|
||||
if (pri != ri) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + 1));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + remote_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_READ : VR_PROT_WRITE))) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pri_out:
|
||||
ihk_mc_spinlock_unlock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pri = ri;
|
||||
}
|
||||
|
||||
/* Figure out how much we can copy at most in this iteration */
|
||||
to_copy = (local_iov[li].iov_len - loff);
|
||||
if ((remote_iov[ri].iov_len - roff) < to_copy) {
|
||||
to_copy = remote_iov[ri].iov_len - roff;
|
||||
}
|
||||
|
||||
retry_lookup:
|
||||
/* TODO: remember page and do this only if necessary */
|
||||
ret = ihk_mc_pt_virt_to_phys_size(rvm->address_space->page_table,
|
||||
remote_iov[ri].iov_base + roff, &rphys, &psize);
|
||||
|
||||
if (ret) {
|
||||
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
void *addr;
|
||||
|
||||
if (faulted) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Fault in pages */
|
||||
for (addr = (void *)
|
||||
(((unsigned long)remote_iov[ri].iov_base + roff)
|
||||
& PAGE_MASK);
|
||||
addr < (remote_iov[ri].iov_base + roff + to_copy);
|
||||
addr += PAGE_SIZE) {
|
||||
|
||||
ret = page_fault_process_vm(rvm, addr, reason);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
faulted = 1;
|
||||
goto retry_lookup;
|
||||
}
|
||||
|
||||
rpage_left = ((((unsigned long)remote_iov[ri].iov_base + roff +
|
||||
psize) & ~(psize - 1)) -
|
||||
((unsigned long)remote_iov[ri].iov_base + roff));
|
||||
if (rpage_left < to_copy) {
|
||||
to_copy = rpage_left;
|
||||
}
|
||||
|
||||
rva = phys_to_virt(rphys);
|
||||
|
||||
fast_memcpy(
|
||||
(op == PROCESS_VM_READ) ? local_iov[li].iov_base + loff : rva,
|
||||
(op == PROCESS_VM_READ) ? rva : local_iov[li].iov_base + loff,
|
||||
to_copy);
|
||||
|
||||
copied += to_copy;
|
||||
dkprintf("local_iov[%d]: 0x%lx %s remote_iov[%d]: 0x%lx, %lu copied, psize: %lu, rpage_left: %lu\n",
|
||||
li, local_iov[li].iov_base + loff,
|
||||
(op == PROCESS_VM_READ) ? "<-" : "->",
|
||||
ri, remote_iov[ri].iov_base + roff, to_copy,
|
||||
psize, rpage_left);
|
||||
|
||||
loff += to_copy;
|
||||
roff += to_copy;
|
||||
|
||||
if (loff == local_iov[li].iov_len) {
|
||||
li++;
|
||||
loff = 0;
|
||||
}
|
||||
|
||||
if (roff == remote_iov[ri].iov_len) {
|
||||
ri++;
|
||||
roff = 0;
|
||||
}
|
||||
}
|
||||
|
||||
release_process_vm(rvm);
|
||||
|
||||
return copied;
|
||||
|
||||
out:
|
||||
if(rvm)
|
||||
release_process_vm(rvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
|
||||
{
|
||||
int i, i_s, i_e, phase = 1;
|
||||
struct move_pages_smp_req *mpsr =
|
||||
(struct move_pages_smp_req *)arg;
|
||||
struct process_vm *vm = mpsr->proc->vm;
|
||||
int count = mpsr->count;
|
||||
struct page_table *save_pt;
|
||||
extern struct page_table *get_init_page_table(void);
|
||||
|
||||
i_s = (count / nr_cpus) * cpu_index;
|
||||
i_e = i_s + (count / nr_cpus);
|
||||
if (cpu_index == (nr_cpus - 1)) {
|
||||
i_e = count;
|
||||
}
|
||||
|
||||
/* Load target process' PT so that we can access user-space */
|
||||
save_pt = cpu_local_var(current) == &cpu_local_var(idle) ?
|
||||
get_init_page_table() :
|
||||
cpu_local_var(current)->vm->address_space->page_table;
|
||||
|
||||
if (save_pt != vm->address_space->page_table) {
|
||||
ihk_mc_load_page_table(vm->address_space->page_table);
|
||||
}
|
||||
else {
|
||||
save_pt = NULL;
|
||||
}
|
||||
|
||||
if (nr_cpus == 1) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus > 1 && nr_cpus < 4) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
case 1:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 4 && nr_cpus < 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 3:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->virt_addr + (count / 2),
|
||||
mpsr->user_virt_addr + (count / 2),
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 3:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 4:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
break;
|
||||
case 5:
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 6:
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 7:
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (!(volatile int)mpsr->nodes_ready) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
/* NUMA verification in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
if (mpsr->nodes[i] < 0 ||
|
||||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
|
||||
!test_bit(mpsr->nodes[i],
|
||||
mpsr->proc->vm->numa_mask)) {
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* PTE lookup in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
void *phys;
|
||||
size_t pgsize;
|
||||
int p2align;
|
||||
/*
|
||||
* XXX: No page structures for anonymous mappings.
|
||||
* Look up physical addresses by scanning page tables.
|
||||
*/
|
||||
mpsr->ptep[i] = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
(void *)mpsr->virt_addr[i], 0, &phys, &pgsize, &p2align);
|
||||
|
||||
/* PTE valid? */
|
||||
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
|
||||
mpsr->status[i] = -ENOENT;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* PTE is file? */
|
||||
if (pte_is_fileoff(mpsr->ptep[i], PAGE_SIZE)) {
|
||||
mpsr->status[i] = -EINVAL;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu requested to be moved to node %d\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i], pgsize, mpsr->nodes[i]);
|
||||
|
||||
/* Large page? */
|
||||
if (pgsize > PAGE_SIZE) {
|
||||
int nr_sub_pages = (pgsize / PAGE_SIZE);
|
||||
int j;
|
||||
|
||||
if (i + nr_sub_pages > count) {
|
||||
kprintf("%s: ERROR: page at index %d exceeds the region\n",
|
||||
__FUNCTION__, i);
|
||||
mpsr->status[i] = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Is it contiguous across nr_sub_pages and all
|
||||
* requested to be moved to the same target node? */
|
||||
for (j = 0; j < nr_sub_pages; ++j) {
|
||||
if (mpsr->virt_addr[i + j] !=
|
||||
(mpsr->virt_addr[i] + (j * PAGE_SIZE)) ||
|
||||
mpsr->nodes[i] != mpsr->nodes[i + j]) {
|
||||
kprintf("%s: ERROR: virt address or node at index %d"
|
||||
" is inconsistent\n",
|
||||
__FUNCTION__, i + j);
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
goto pte_out;
|
||||
}
|
||||
}
|
||||
|
||||
mpsr->nr_pages[i] = nr_sub_pages;
|
||||
i += (nr_sub_pages - 1);
|
||||
}
|
||||
else {
|
||||
mpsr->nr_pages[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
pte_out:
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
if (cpu_index == 0) {
|
||||
/* Allocate new pages on target NUMA nodes */
|
||||
for (i = 0; i < count; i++) {
|
||||
int pgalign = 0;
|
||||
int j;
|
||||
void *dst;
|
||||
|
||||
if (!mpsr->ptep[i] || mpsr->status[i] < 0 || !mpsr->nr_pages[i])
|
||||
continue;
|
||||
|
||||
/* TODO: store pgalign info in an array as well? */
|
||||
if (mpsr->nr_pages[i] > 1) {
|
||||
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
|
||||
pgalign = PTL2_SHIFT - PTL1_SHIFT;
|
||||
}
|
||||
|
||||
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],
|
||||
pgalign, IHK_MC_AP_USER, mpsr->nodes[i]);
|
||||
|
||||
if (!dst) {
|
||||
mpsr->status[i] = -ENOMEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = i; j < (i + mpsr->nr_pages[i]); ++j) {
|
||||
mpsr->status[j] = mpsr->nodes[i];
|
||||
}
|
||||
|
||||
mpsr->dst_phys[i] = virt_to_phys(dst);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu to node %d, pgalign: %d,"
|
||||
" allocated phys: 0x%lx\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->nodes[i], pgalign, mpsr->dst_phys[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* Copy, PTE update, memfree in parallel */
|
||||
for (i = i_s; i < i_e; ++i) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
fast_memcpy(phys_to_virt(mpsr->dst_phys[i]),
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i] * PAGE_SIZE);
|
||||
|
||||
ihk_mc_free_pages(
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i]);
|
||||
|
||||
pte_update_phys(mpsr->ptep[i], mpsr->dst_phys[i]);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu copied and remapped to phys: 0x%lu\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->dst_phys[i]);
|
||||
}
|
||||
|
||||
/* XXX: do a separate SMP call with only CPUs running threads
|
||||
* of this process? */
|
||||
if (cpu_local_var(current)->proc == mpsr->proc) {
|
||||
/* Invalidate all TLBs */
|
||||
for (i = 0; i < mpsr->count; i++) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
flush_tlb_single((unsigned long)mpsr->virt_addr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (save_pt) {
|
||||
ihk_mc_load_page_table(save_pt);
|
||||
}
|
||||
|
||||
return mpsr->phase_ret;
|
||||
}
|
||||
|
||||
/*** End of File ***/
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
|
||||
# IHK_OBJS added coredump.o
|
||||
ifeq ($(ARCH), arm64)
|
||||
IHK_OBJS += coredump.o
|
||||
endif
|
||||
2
arch/x86_64/kernel/Makefile.arch.in
Normal file
2
arch/x86_64/kernel/Makefile.arch.in
Normal file
@ -0,0 +1,2 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
@ -1081,7 +1081,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||
|
||||
@ -1095,17 +1094,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
(error & PF_RSVD ? "was" : "wasn't"),
|
||||
(error & PF_INSTR ? "was" : "wasn't"));
|
||||
|
||||
found = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= address && range->end > address) {
|
||||
found = 1;
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
range = lookup_process_memory_range(vm, address, address+1);
|
||||
if (range) {
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
} else {
|
||||
__kprintf("address is out of range! \n");
|
||||
}
|
||||
|
||||
@ -1473,29 +1467,91 @@ void ihk_mc_delay_us(int us)
|
||||
arch_delay(us);
|
||||
}
|
||||
|
||||
#define EXTENDED_ARCH_SHOW_CONTEXT
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
void arch_show_extended_context(void)
|
||||
{
|
||||
unsigned long cr0, cr4, msr, xcr0;
|
||||
unsigned long cr0, cr4, msr, xcr0 = 0;
|
||||
|
||||
/* Read and print CRs, MSR_EFER, XCR0 */
|
||||
asm volatile("movq %%cr0, %0" : "=r"(cr0));
|
||||
asm volatile("movq %%cr4, %0" : "=r"(cr4));
|
||||
msr = rdmsr(MSR_EFER);
|
||||
xcr0 = xgetbv(0);
|
||||
|
||||
if (xsave_available) {
|
||||
xcr0 = xgetbv(0);
|
||||
}
|
||||
__kprintf("\n CR0 CR4\n");
|
||||
__kprintf("%016lX %016lX\n", cr0, cr4);
|
||||
|
||||
__kprintf(" MSR_EFER\n");
|
||||
__kprintf("%016lX\n", msr);
|
||||
|
||||
__kprintf(" XCR0\n");
|
||||
__kprintf("%016lX\n", xcr0);
|
||||
|
||||
if (xsave_available) {
|
||||
__kprintf(" XCR0\n");
|
||||
__kprintf("%016lX\n", xcr0);
|
||||
}
|
||||
}
|
||||
|
||||
struct stack {
|
||||
struct stack *rbp;
|
||||
unsigned long eip;
|
||||
};
|
||||
|
||||
/* KPRINTF_LOCAL_BUF_LEN is 1024, useless to go further */
|
||||
#define STACK_BUF_LEN (1024-sizeof("[ 0]: "))
|
||||
static void __print_stack(struct stack *rbp, unsigned long first) {
|
||||
char buf[STACK_BUF_LEN];
|
||||
size_t len;
|
||||
|
||||
/* Build string in buffer to output a single line */
|
||||
len = snprintf(buf, STACK_BUF_LEN,
|
||||
"addr2line -e smp-x86/kernel/mckernel.img -fpia");
|
||||
|
||||
if (first)
|
||||
len += snprintf(buf + len, STACK_BUF_LEN - len,
|
||||
" %#16lx", first);
|
||||
|
||||
while ((unsigned long)rbp > 0xffff880000000000 &&
|
||||
STACK_BUF_LEN - len > sizeof(" 0x0123456789abcdef")) {
|
||||
len += snprintf(buf + len, STACK_BUF_LEN - len,
|
||||
" %#16lx", rbp->eip);
|
||||
rbp = rbp->rbp;
|
||||
}
|
||||
__kprintf("%s\n", buf);
|
||||
}
|
||||
|
||||
void arch_print_pre_interrupt_stack(const struct x86_basic_regs *regs) {
|
||||
struct stack *rbp;
|
||||
|
||||
/* only for kernel stack */
|
||||
if (regs->error & PF_USER)
|
||||
return;
|
||||
|
||||
__kprintf("Pre-interrupt stack trace:\n");
|
||||
|
||||
/* interrupt stack heuristics:
|
||||
* - the first entry looks like it is always garbage, so skip.
|
||||
* (that is done by taking regs->rsp instead of ®s->rsp)
|
||||
* - that still looks sometimes wrong. For now, if it is not
|
||||
* within 64k of itself, look for the next entry that matches.
|
||||
*/
|
||||
|
||||
rbp = (struct stack*)regs->rsp;
|
||||
|
||||
while ((uintptr_t)rbp > (uintptr_t)rbp->rbp
|
||||
|| (uintptr_t)rbp + 0x10000 < (uintptr_t)rbp->rbp)
|
||||
rbp = (struct stack *)(((uintptr_t *)rbp) + 1);
|
||||
|
||||
__print_stack(rbp, regs->rip);
|
||||
}
|
||||
|
||||
void arch_print_stack() {
|
||||
struct stack *rbp;
|
||||
|
||||
__kprintf("Approximative stack trace:\n");
|
||||
|
||||
asm("mov %%rbp, %0" : "=r"(rbp) );
|
||||
|
||||
__print_stack(rbp, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*@
|
||||
@ requires \valid(reg);
|
||||
@ -1526,9 +1582,9 @@ void arch_show_interrupt_context(const void *reg)
|
||||
__kprintf("%16lx %16lx %16lx %16lx\n",
|
||||
regs->cs, regs->ss, regs->rflags, regs->error);
|
||||
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
arch_show_extended_context();
|
||||
#endif
|
||||
arch_show_extended_context();
|
||||
|
||||
arch_print_pre_interrupt_stack(regs);
|
||||
|
||||
kprintf_unlock(irqflags);
|
||||
}
|
||||
@ -1651,13 +1707,11 @@ release_fp_regs(struct thread *thread)
|
||||
thread->fp_regs = NULL;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@*/
|
||||
void
|
||||
save_fp_regs(struct thread *thread)
|
||||
static int
|
||||
check_and_allocate_fp_regs(struct thread *thread)
|
||||
{
|
||||
int pages;
|
||||
int pages;
|
||||
int result = 0;
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
@ -1666,12 +1720,26 @@ save_fp_regs(struct thread *thread)
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
kprintf("error: allocating fp_regs pages\n");
|
||||
return;
|
||||
result = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
|
||||
memset(thread->fp_regs, 0, pages * PAGE_SIZE);
|
||||
}
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@*/
|
||||
void
|
||||
save_fp_regs(struct thread *thread)
|
||||
{
|
||||
if (check_and_allocate_fp_regs(thread) != 0) {
|
||||
// alloc error
|
||||
return;
|
||||
}
|
||||
|
||||
if (xsave_available) {
|
||||
unsigned int low, high;
|
||||
@ -1687,6 +1755,13 @@ save_fp_regs(struct thread *thread)
|
||||
}
|
||||
}
|
||||
|
||||
void copy_fp_regs(struct thread *from, struct thread *to)
|
||||
{
|
||||
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
|
||||
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_19
|
||||
void
|
||||
clear_fp_regs(struct thread *thread)
|
||||
@ -289,7 +289,7 @@ int gencore(struct thread *thread, void *regs,
|
||||
Elf64_Ehdr eh;
|
||||
Elf64_Phdr *ph = NULL;
|
||||
void *note = NULL;
|
||||
struct vm_range *range;
|
||||
struct vm_range *range, *next;
|
||||
struct process_vm *vm = thread->vm;
|
||||
int segs = 1; /* the first one is for NOTE */
|
||||
int notesize, phsize, alignednotesize;
|
||||
@ -303,13 +303,18 @@ int gencore(struct thread *thread, void *regs,
|
||||
return -1;
|
||||
}
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
|
||||
range->start, range->end, range->flag, range->objoff);
|
||||
/* We omit reserved areas because they are only for
|
||||
mckernel's internal use. */
|
||||
if (range->flag & VR_RESERVED)
|
||||
continue;
|
||||
if (range->flag & VR_DONTDUMP)
|
||||
continue;
|
||||
/* We need a chunk for each page for a demand paging area.
|
||||
This can be optimized for spacial complexity but we would
|
||||
lose simplicity instead. */
|
||||
@ -391,7 +396,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
|
||||
/* program header for each memory chunk */
|
||||
i = 1;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long flag = range->flag;
|
||||
unsigned long size = range->end - range->start;
|
||||
|
||||
@ -432,7 +440,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
|
||||
|
||||
i = 3; /* memory segments */
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long phys;
|
||||
|
||||
if (range->flag & VR_RESERVED)
|
||||
@ -130,4 +130,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
|
||||
}
|
||||
#endif /* !POSTK_DEBUG_ARCH_DEP_8 */
|
||||
|
||||
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
|
||||
*dest = *(volatile uint32_t *)from;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -13,16 +13,16 @@
|
||||
#ifndef ARCH_CPU_H
|
||||
#define ARCH_CPU_H
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
#define arch_barrier() asm volatile("" : : : "memory")
|
||||
|
||||
static inline void rmb(void)
|
||||
{
|
||||
barrier();
|
||||
arch_barrier();
|
||||
}
|
||||
|
||||
static inline void wmb(void)
|
||||
{
|
||||
barrier();
|
||||
arch_barrier();
|
||||
}
|
||||
|
||||
static unsigned long read_tsc(void)
|
||||
@ -49,6 +49,7 @@ typedef struct x86_user_context ihk_mc_user_context_t;
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
|
||||
#define ihk_mc_syscall_number(uc) (uc)->gpr.orig_rax
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
|
||||
@ -189,9 +189,30 @@ struct tss64 {
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_basic_regs {
|
||||
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
|
||||
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
|
||||
unsigned long rip, cs, rflags, rsp, ss;
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
unsigned long rbp;
|
||||
unsigned long rbx;
|
||||
unsigned long r11;
|
||||
unsigned long r10;
|
||||
unsigned long r9;
|
||||
unsigned long r8;
|
||||
unsigned long rax;
|
||||
unsigned long rcx;
|
||||
unsigned long rdx;
|
||||
unsigned long rsi;
|
||||
unsigned long rdi;
|
||||
union {
|
||||
unsigned long orig_rax; /* syscall */
|
||||
unsigned long error; /* interrupts */
|
||||
};
|
||||
unsigned long rip;
|
||||
unsigned long cs;
|
||||
unsigned long rflags;
|
||||
unsigned long rsp;
|
||||
unsigned long ss;
|
||||
};
|
||||
|
||||
struct x86_sregs {
|
||||
@ -10,9 +10,12 @@
|
||||
#include <ihk/perfctr.h>
|
||||
#include <march.h>
|
||||
#include <errno.h>
|
||||
#include <cls.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/cpu.h>
|
||||
#include <registers.h>
|
||||
#include <mc_perf_event.h>
|
||||
#include <config.h>
|
||||
|
||||
extern unsigned int *x86_march_perfmap;
|
||||
extern int running_on_kvm(void);
|
||||
@ -57,6 +60,10 @@ void x86_init_perfctr(void)
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
#ifndef ENABLE_PERF
|
||||
return;
|
||||
#endif //ENABLE_PERF
|
||||
|
||||
/* Do not do it on KVM */
|
||||
if (running_on_kvm()) return;
|
||||
|
||||
@ -93,7 +100,7 @@ void x86_init_perfctr(void)
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
|
||||
}
|
||||
|
||||
|
||||
/* Enable PMC Control */
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= X86_IA32_PERF_COUNTERS_MASK;
|
||||
@ -254,6 +261,41 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
|
||||
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
|
||||
{
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
|
||||
// allocate extra_reg
|
||||
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
|
||||
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->extra_reg.idx = EXTRA_REG_RSP_1;
|
||||
}
|
||||
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
event->extra_reg.idx = EXTRA_REG_RSP_0;
|
||||
}
|
||||
|
||||
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
|
||||
// extra_regs are full
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->hw_config &= ~0xffUL;
|
||||
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_0);
|
||||
event->extra_reg.reg = MSR_OFFCORE_RSP_0;
|
||||
}
|
||||
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
event->hw_config &= ~0xffUL;
|
||||
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_1);
|
||||
event->extra_reg.reg = MSR_OFFCORE_RSP_1;
|
||||
}
|
||||
|
||||
thread->extra_reg_alloc_map |= (1UL << event->extra_reg.idx);
|
||||
wrmsr(event->extra_reg.reg, event->extra_reg.config);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_MARCH_PERFCTR_START
|
||||
extern void x86_march_perfctr_start(unsigned long counter_mask);
|
||||
#endif
|
||||
@ -29,11 +29,11 @@
|
||||
#include <prctl.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <page.h>
|
||||
#include <limits.h>
|
||||
|
||||
void terminate(int, int);
|
||||
extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact);
|
||||
long syscall(int num, ihk_mc_user_context_t *ctx);
|
||||
extern void save_fp_regs(struct thread *proc);
|
||||
void set_signal(int sig, void *regs0, siginfo_t *info);
|
||||
void check_signal(unsigned long rc, void *regs0, int num);
|
||||
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
|
||||
@ -460,7 +460,6 @@ void set_single_step(struct thread *thread)
|
||||
|
||||
long ptrace_read_fpregs(struct thread *thread, void *fpregs)
|
||||
{
|
||||
save_fp_regs(thread);
|
||||
if (thread->fp_regs == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -470,7 +469,6 @@ long ptrace_read_fpregs(struct thread *thread, void *fpregs)
|
||||
|
||||
long ptrace_write_fpregs(struct thread *thread, void *fpregs)
|
||||
{
|
||||
save_fp_regs(thread);
|
||||
if (thread->fp_regs == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -540,7 +538,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
|
||||
/* Transition thread state */
|
||||
proc->status = PS_TRACED;
|
||||
thread->status = PS_TRACED;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL_MASK;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL;
|
||||
if (sig == SIGSTOP || sig == SIGTSTP ||
|
||||
sig == SIGTTIN || sig == SIGTTOU) {
|
||||
proc->signal_flags |= SIGNAL_STOP_STOPPED;
|
||||
@ -927,6 +925,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
||||
break;
|
||||
case SIGCHLD:
|
||||
case SIGURG:
|
||||
case SIGWINCH:
|
||||
break;
|
||||
default:
|
||||
dkprintf("do_signal,default,terminate,sig=%d\n", sig);
|
||||
@ -1336,15 +1335,19 @@ set_signal(int sig, void *regs0, siginfo_t *info)
|
||||
struct x86_user_context *regs = regs0;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
|
||||
if(thread == NULL || thread->proc->pid == 0)
|
||||
if (thread == NULL || thread->proc->pid == 0)
|
||||
return;
|
||||
|
||||
if((__sigmask(sig) & thread->sigmask.__val[0]) ||
|
||||
(regs->gpr.rsp & 0x8000000000000000)){
|
||||
if (!interrupt_from_user(regs)) {
|
||||
ihk_mc_debug_show_interrupt_context(regs);
|
||||
panic("panic: kernel mode signal");
|
||||
}
|
||||
|
||||
if ((__sigmask(sig) & thread->sigmask.__val[0])) {
|
||||
coredump(thread, regs0);
|
||||
terminate(0, sig | 0x80);
|
||||
}
|
||||
do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0);
|
||||
do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0);
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(mmap)
|
||||
@ -1907,4 +1910,634 @@ save_uctx(void *uctx, struct x86_user_context *regs)
|
||||
ctx->fregsize = 0;
|
||||
}
|
||||
|
||||
int do_process_vm_read_writev(int pid,
|
||||
const struct iovec *local_iov,
|
||||
unsigned long liovcnt,
|
||||
const struct iovec *remote_iov,
|
||||
unsigned long riovcnt,
|
||||
unsigned long flags,
|
||||
int op)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
int li, ri;
|
||||
int pli, pri;
|
||||
off_t loff, roff;
|
||||
size_t llen = 0, rlen = 0;
|
||||
size_t copied = 0;
|
||||
size_t to_copy;
|
||||
struct thread *lthread = cpu_local_var(current);
|
||||
struct process *rproc;
|
||||
struct process *lproc = lthread->proc;
|
||||
struct process_vm *rvm = NULL;
|
||||
unsigned long rphys;
|
||||
unsigned long rpage_left;
|
||||
unsigned long psize;
|
||||
void *rva;
|
||||
struct vm_range *range;
|
||||
struct mcs_rwlock_node_irqsave lock;
|
||||
struct mcs_rwlock_node update_lock;
|
||||
|
||||
/* Sanity checks */
|
||||
if (flags) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (liovcnt > IOV_MAX || riovcnt > IOV_MAX) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check if parameters are okay */
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov,
|
||||
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)remote_iov,
|
||||
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
arg_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (li = 0; li < liovcnt; ++li) {
|
||||
llen += local_iov[li].iov_len;
|
||||
dkprintf("local_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
li, local_iov[li].iov_base, local_iov[li].iov_len);
|
||||
}
|
||||
|
||||
for (ri = 0; ri < riovcnt; ++ri) {
|
||||
rlen += remote_iov[ri].iov_len;
|
||||
dkprintf("remote_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
ri, remote_iov[ri].iov_base, remote_iov[ri].iov_len);
|
||||
}
|
||||
|
||||
if (llen != rlen) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Find remote process */
|
||||
rproc = find_process(pid, &lock);
|
||||
if (!rproc) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mcs_rwlock_reader_lock_noirq(&rproc->update_lock, &update_lock);
|
||||
if(rproc->status == PS_EXITED ||
|
||||
rproc->status == PS_ZOMBIE){
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
rvm = rproc->vm;
|
||||
hold_process_vm(rvm);
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
|
||||
if (lproc->euid != 0 &&
|
||||
(lproc->ruid != rproc->ruid ||
|
||||
lproc->ruid != rproc->euid ||
|
||||
lproc->ruid != rproc->suid ||
|
||||
lproc->rgid != rproc->rgid ||
|
||||
lproc->rgid != rproc->egid ||
|
||||
lproc->rgid != rproc->sgid)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("pid %d found, doing %s: liovcnt: %d, riovcnt: %d \n", pid,
|
||||
(op == PROCESS_VM_READ) ? "PROCESS_VM_READ" : "PROCESS_VM_WRITE",
|
||||
liovcnt, riovcnt);
|
||||
|
||||
pli = pri = -1; /* Previous indeces in iovecs */
|
||||
li = ri = 0; /* Current indeces in iovecs */
|
||||
loff = roff = 0; /* Offsets in current iovec */
|
||||
|
||||
/* Now iterate and do the copy */
|
||||
while (copied < llen) {
|
||||
int faulted = 0;
|
||||
|
||||
/* New local vector? */
|
||||
if (pli != li) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + 1));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_WRITE : VR_PROT_READ))) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pli_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pli = li;
|
||||
}
|
||||
|
||||
/* New remote vector? */
|
||||
if (pri != ri) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + 1));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + remote_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_READ : VR_PROT_WRITE))) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pri_out:
|
||||
ihk_mc_spinlock_unlock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pri = ri;
|
||||
}
|
||||
|
||||
/* Figure out how much we can copy at most in this iteration */
|
||||
to_copy = (local_iov[li].iov_len - loff);
|
||||
if ((remote_iov[ri].iov_len - roff) < to_copy) {
|
||||
to_copy = remote_iov[ri].iov_len - roff;
|
||||
}
|
||||
|
||||
retry_lookup:
|
||||
/* TODO: remember page and do this only if necessary */
|
||||
ret = ihk_mc_pt_virt_to_phys_size(rvm->address_space->page_table,
|
||||
remote_iov[ri].iov_base + roff, &rphys, &psize);
|
||||
|
||||
if (ret) {
|
||||
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
void *addr;
|
||||
|
||||
if (faulted) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Fault in pages */
|
||||
for (addr = (void *)
|
||||
(((unsigned long)remote_iov[ri].iov_base + roff)
|
||||
& PAGE_MASK);
|
||||
addr < (remote_iov[ri].iov_base + roff + to_copy);
|
||||
addr += PAGE_SIZE) {
|
||||
|
||||
ret = page_fault_process_vm(rvm, addr, reason);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
faulted = 1;
|
||||
goto retry_lookup;
|
||||
}
|
||||
|
||||
rpage_left = ((((unsigned long)remote_iov[ri].iov_base + roff +
|
||||
psize) & ~(psize - 1)) -
|
||||
((unsigned long)remote_iov[ri].iov_base + roff));
|
||||
if (rpage_left < to_copy) {
|
||||
to_copy = rpage_left;
|
||||
}
|
||||
|
||||
rva = phys_to_virt(rphys);
|
||||
|
||||
fast_memcpy(
|
||||
(op == PROCESS_VM_READ) ? local_iov[li].iov_base + loff : rva,
|
||||
(op == PROCESS_VM_READ) ? rva : local_iov[li].iov_base + loff,
|
||||
to_copy);
|
||||
|
||||
copied += to_copy;
|
||||
dkprintf("local_iov[%d]: 0x%lx %s remote_iov[%d]: 0x%lx, %lu copied, psize: %lu, rpage_left: %lu\n",
|
||||
li, local_iov[li].iov_base + loff,
|
||||
(op == PROCESS_VM_READ) ? "<-" : "->",
|
||||
ri, remote_iov[ri].iov_base + roff, to_copy,
|
||||
psize, rpage_left);
|
||||
|
||||
loff += to_copy;
|
||||
roff += to_copy;
|
||||
|
||||
if (loff == local_iov[li].iov_len) {
|
||||
li++;
|
||||
loff = 0;
|
||||
}
|
||||
|
||||
if (roff == remote_iov[ri].iov_len) {
|
||||
ri++;
|
||||
roff = 0;
|
||||
}
|
||||
}
|
||||
|
||||
release_process_vm(rvm);
|
||||
|
||||
return copied;
|
||||
|
||||
out:
|
||||
if(rvm)
|
||||
release_process_vm(rvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
|
||||
{
|
||||
int i, i_s, i_e, phase = 1;
|
||||
struct move_pages_smp_req *mpsr =
|
||||
(struct move_pages_smp_req *)arg;
|
||||
struct process_vm *vm = mpsr->proc->vm;
|
||||
int count = mpsr->count;
|
||||
struct page_table *save_pt;
|
||||
extern struct page_table *get_init_page_table(void);
|
||||
|
||||
i_s = (count / nr_cpus) * cpu_index;
|
||||
i_e = i_s + (count / nr_cpus);
|
||||
if (cpu_index == (nr_cpus - 1)) {
|
||||
i_e = count;
|
||||
}
|
||||
|
||||
/* Load target process' PT so that we can access user-space */
|
||||
save_pt = cpu_local_var(current) == &cpu_local_var(idle) ?
|
||||
get_init_page_table() :
|
||||
cpu_local_var(current)->vm->address_space->page_table;
|
||||
|
||||
if (save_pt != vm->address_space->page_table) {
|
||||
ihk_mc_load_page_table(vm->address_space->page_table);
|
||||
}
|
||||
else {
|
||||
save_pt = NULL;
|
||||
}
|
||||
|
||||
if (nr_cpus == 1) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus > 1 && nr_cpus < 4) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
case 1:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 4 && nr_cpus < 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 3:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->virt_addr + (count / 2),
|
||||
mpsr->user_virt_addr + (count / 2),
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 3:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 4:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
break;
|
||||
case 5:
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 6:
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 7:
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (!(volatile int)mpsr->nodes_ready) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
/* NUMA verification in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
if (mpsr->nodes[i] < 0 ||
|
||||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
|
||||
!test_bit(mpsr->nodes[i],
|
||||
mpsr->proc->vm->numa_mask)) {
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* PTE lookup in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
void *phys;
|
||||
size_t pgsize;
|
||||
int p2align;
|
||||
/*
|
||||
* XXX: No page structures for anonymous mappings.
|
||||
* Look up physical addresses by scanning page tables.
|
||||
*/
|
||||
mpsr->ptep[i] = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
(void *)mpsr->virt_addr[i], 0, &phys, &pgsize, &p2align);
|
||||
|
||||
/* PTE valid? */
|
||||
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
|
||||
mpsr->status[i] = -ENOENT;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* PTE is file? */
|
||||
if (pte_is_fileoff(mpsr->ptep[i], PAGE_SIZE)) {
|
||||
mpsr->status[i] = -EINVAL;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu requested to be moved to node %d\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i], pgsize, mpsr->nodes[i]);
|
||||
|
||||
/* Large page? */
|
||||
if (pgsize > PAGE_SIZE) {
|
||||
int nr_sub_pages = (pgsize / PAGE_SIZE);
|
||||
int j;
|
||||
|
||||
if (i + nr_sub_pages > count) {
|
||||
kprintf("%s: ERROR: page at index %d exceeds the region\n",
|
||||
__FUNCTION__, i);
|
||||
mpsr->status[i] = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Is it contiguous across nr_sub_pages and all
|
||||
* requested to be moved to the same target node? */
|
||||
for (j = 0; j < nr_sub_pages; ++j) {
|
||||
if (mpsr->virt_addr[i + j] !=
|
||||
(mpsr->virt_addr[i] + (j * PAGE_SIZE)) ||
|
||||
mpsr->nodes[i] != mpsr->nodes[i + j]) {
|
||||
kprintf("%s: ERROR: virt address or node at index %d"
|
||||
" is inconsistent\n",
|
||||
__FUNCTION__, i + j);
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
goto pte_out;
|
||||
}
|
||||
}
|
||||
|
||||
mpsr->nr_pages[i] = nr_sub_pages;
|
||||
i += (nr_sub_pages - 1);
|
||||
}
|
||||
else {
|
||||
mpsr->nr_pages[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
pte_out:
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
if (cpu_index == 0) {
|
||||
/* Allocate new pages on target NUMA nodes */
|
||||
for (i = 0; i < count; i++) {
|
||||
int pgalign = 0;
|
||||
int j;
|
||||
void *dst;
|
||||
|
||||
if (!mpsr->ptep[i] || mpsr->status[i] < 0 || !mpsr->nr_pages[i])
|
||||
continue;
|
||||
|
||||
/* TODO: store pgalign info in an array as well? */
|
||||
if (mpsr->nr_pages[i] > 1) {
|
||||
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
|
||||
pgalign = PTL2_SHIFT - PTL1_SHIFT;
|
||||
}
|
||||
|
||||
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],
|
||||
pgalign, IHK_MC_AP_USER, mpsr->nodes[i]);
|
||||
|
||||
if (!dst) {
|
||||
mpsr->status[i] = -ENOMEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = i; j < (i + mpsr->nr_pages[i]); ++j) {
|
||||
mpsr->status[j] = mpsr->nodes[i];
|
||||
}
|
||||
|
||||
mpsr->dst_phys[i] = virt_to_phys(dst);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu to node %d, pgalign: %d,"
|
||||
" allocated phys: 0x%lx\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->nodes[i], pgalign, mpsr->dst_phys[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* Copy, PTE update, memfree in parallel */
|
||||
for (i = i_s; i < i_e; ++i) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
fast_memcpy(phys_to_virt(mpsr->dst_phys[i]),
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i] * PAGE_SIZE);
|
||||
|
||||
ihk_mc_free_pages(
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i]);
|
||||
|
||||
pte_update_phys(mpsr->ptep[i], mpsr->dst_phys[i]);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu copied and remapped to phys: 0x%lu\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->dst_phys[i]);
|
||||
}
|
||||
|
||||
/* XXX: do a separate SMP call with only CPUs running threads
|
||||
* of this process? */
|
||||
if (cpu_local_var(current)->proc == mpsr->proc) {
|
||||
/* Invalidate all TLBs */
|
||||
for (i = 0; i < mpsr->count; i++) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
flush_tlb_single((unsigned long)mpsr->virt_addr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (save_pt) {
|
||||
ihk_mc_load_page_table(save_pt);
|
||||
}
|
||||
|
||||
return mpsr->phase_ret;
|
||||
}
|
||||
|
||||
/*** End of File ***/
|
||||
@ -102,7 +102,7 @@ int vsyscall_gettimeofday(struct timeval *tv, void *tz)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
if (error) {
|
||||
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
|
||||
*(volatile int *)0 = 0; /* i.e. raise(SIGSEGV) */
|
||||
}
|
||||
return error;
|
||||
} /* vsyscall_gettimeofday() */
|
||||
@ -35,6 +35,7 @@ error_exit() {
|
||||
;&
|
||||
tmp_mcos_created)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umask $umask_old
|
||||
rm -rf /tmp/mcos
|
||||
fi
|
||||
;&
|
||||
@ -45,9 +46,12 @@ error_exit() {
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
fi
|
||||
|
||||
# Change umask for /proc and /sys files
|
||||
umask_dec=$(( 8#${umask_old} & 8#0002 ))
|
||||
umask 0`printf "%o" ${umask_dec}`
|
||||
|
||||
if [ ! -e /tmp/mcos ]; then
|
||||
mkdir -p /tmp/mcos;
|
||||
fi
|
||||
@ -145,3 +149,7 @@ for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||
fi
|
||||
done
|
||||
|
||||
# Restore umask
|
||||
umask ${umask_old}
|
||||
|
||||
@ -43,8 +43,9 @@ fi
|
||||
|
||||
turbo=""
|
||||
ihk_irq=""
|
||||
umask_old=`umask`
|
||||
|
||||
while getopts :tk:c:m:o:f:r:q:i:d: OPT
|
||||
while getopts :tk:c:m:o:f:r:q:i:d:e: OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
f) facility=${OPTARG}
|
||||
@ -63,6 +64,8 @@ do
|
||||
;;
|
||||
t) turbo="turbo"
|
||||
;;
|
||||
e) extra_kopts=${OPTARG}
|
||||
;;
|
||||
d) DUMP_LEVEL=${OPTARG}
|
||||
;;
|
||||
i) mon_interval=${OPTARG}
|
||||
@ -114,6 +117,7 @@ error_exit() {
|
||||
;&
|
||||
tmp_mcos_created)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umask $umask_old
|
||||
rm -rf /tmp/mcos
|
||||
fi
|
||||
;&
|
||||
@ -148,7 +152,7 @@ error_exit() {
|
||||
fi
|
||||
;&
|
||||
ihk_smp_loaded)
|
||||
rmmod ihk_smp_x86 2>/dev/null || echo "warning: failed to remove ihk_smp_x86" >&2
|
||||
rmmod ihk_smp_@ARCH@ 2>/dev/null || echo "warning: failed to remove ihk_smp_@ARCH@" >&2
|
||||
;&
|
||||
ihk_loaded)
|
||||
rmmod ihk 2>/dev/null || echo "warning: failed to remove ihk" >&2
|
||||
@ -200,7 +204,7 @@ if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
else
|
||||
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 ]; then
|
||||
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
|
||||
@ -277,7 +281,7 @@ echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
|
||||
sync
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then
|
||||
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
|
||||
if [ "$ihk_irq" == "" ]; then
|
||||
for i in `seq 64 255`; do
|
||||
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
|
||||
@ -290,8 +294,8 @@ if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
fi
|
||||
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
|
||||
echo "error: loading ihk-smp-x86" >&2
|
||||
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-@ARCH@.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
|
||||
echo "error: loading ihk-smp-@ARCH@" >&2
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
|
||||
@ -339,7 +343,7 @@ if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
ind=`echo $i|cut -c10-`;
|
||||
# Retry when conflicting with ihkmond
|
||||
nretry=0
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -ge 4 ]; do
|
||||
sleep 0.25
|
||||
nretry=$[ $nretry + 1 ]
|
||||
done
|
||||
@ -383,7 +387,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
|
||||
fi
|
||||
|
||||
# Set kernel arguments
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL}"; then
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL} $extra_kopts"; then
|
||||
echo "error: setting kernel arguments" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
@ -421,3 +425,5 @@ if [ "${irqbalance_used}" == "yes" ]; then
|
||||
fi
|
||||
# echo cpus=$cpus ncpus=$ncpus banirq=$banirq
|
||||
fi
|
||||
|
||||
exit 0
|
||||
@ -19,7 +19,7 @@ cpus=""
|
||||
irqbalance_used=""
|
||||
|
||||
# No SMP module? Exit.
|
||||
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then exit 0; fi
|
||||
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then exit 0; fi
|
||||
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
irqbalance_used="yes"
|
||||
@ -37,7 +37,7 @@ if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
ind=`echo $i|cut -c10-`;
|
||||
# Retry when conflicting with ihkmond
|
||||
nretry=0
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -ge 4 ]; do
|
||||
sleep 0.25
|
||||
nretry=$[ $nretry + 1 ]
|
||||
done
|
||||
@ -87,9 +87,9 @@ fi
|
||||
. ${SBINDIR}/mcoverlay-destroy.sh
|
||||
|
||||
# Remove SMP module
|
||||
if grep ihk_smp_x86 /proc/modules &>/dev/null; then
|
||||
if ! rmmod ihk_smp_x86 2>/dev/null; then
|
||||
echo "error: removing ihk_smp_x86" >&2
|
||||
if grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
|
||||
if ! rmmod ihk_smp_@ARCH@ 2>/dev/null; then
|
||||
echo "error: removing ihk_smp_@ARCH@" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
@ -9,12 +9,15 @@
|
||||
/* whether memdump feature is enabled */
|
||||
#undef ENABLE_MEMDUMP
|
||||
|
||||
/* whether mcoverlayfs is enabled */
|
||||
/* whether qlmpi is enabled */
|
||||
#undef ENABLE_QLMPI
|
||||
|
||||
/* whether rusage is enabled */
|
||||
#undef ENABLE_RUSAGE
|
||||
|
||||
/* whether perf is enabled */
|
||||
#undef ENABLE_PERF
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
|
||||
56
configure
vendored
56
configure
vendored
@ -708,6 +708,7 @@ enable_dcfa
|
||||
enable_memdump
|
||||
enable_mcoverlayfs
|
||||
enable_rusage
|
||||
enable_perf
|
||||
enable_qlmpi
|
||||
with_uname_r
|
||||
'
|
||||
@ -1333,6 +1334,7 @@ Optional Features:
|
||||
--enable-memdump enable dumping memory and analyzing a dump
|
||||
--enable-mcoverlayfs enable mcoverlayfs implementation
|
||||
--enable-rusage enable rusage implementation
|
||||
--enable-perf enable perf_event implementation
|
||||
--enable-qlmpi enable qlmpi implementation
|
||||
|
||||
Optional Packages:
|
||||
@ -3568,6 +3570,14 @@ else
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-perf was given.
|
||||
if test "${enable_perf+set}" = set; then :
|
||||
enableval=$enable_perf; ENABLE_PERF=$enableval
|
||||
else
|
||||
ENABLE_PERF=yes
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-qlmpi was given.
|
||||
if test "${enable_qlmpi+set}" = set; then :
|
||||
enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
|
||||
@ -4366,6 +4376,9 @@ case $WITH_TARGET in
|
||||
if test "X$ETCDIR" = X; then
|
||||
ETCDIR="$prefix/etc"
|
||||
fi
|
||||
if test "X$INCLUDEDIR" = X; then
|
||||
INCLUDEDIR="$prefix/include"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
@ -4961,6 +4974,17 @@ else
|
||||
$as_echo "$as_me: rusage is disabled" >&6;}
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_PERF" = "xyes" ; then
|
||||
|
||||
$as_echo "#define ENABLE_PERF 1" >>confdefs.h
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is enabled" >&5
|
||||
$as_echo "$as_me: perf is enabled" >&6;}
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is disabled" >&5
|
||||
$as_echo "$as_me: perf is disabled" >&6;}
|
||||
fi
|
||||
|
||||
if test "x$MCKERNEL_INCDIR" != "x" ; then
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@ -5021,9 +5045,14 @@ ac_config_headers="$ac_config_headers config.h"
|
||||
|
||||
# POSTK_DEBUG_ARCH_DEP_37
|
||||
# AC_CONFIG_FILES arch dependfiles separate
|
||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcoverlay-destroy-smp-x86.sh arch/x86/tools/mcoverlay-create-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"
|
||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in"
|
||||
|
||||
|
||||
if test "$TARGET" = "smp-x86"; then
|
||||
ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch"
|
||||
|
||||
fi
|
||||
|
||||
if test "$TARGET" = "smp-arm64"; then
|
||||
ac_config_files="$ac_config_files kernel/config/config.smp-arm64 arch/arm64/kernel/vdso/Makefile arch/arm64/kernel/Makefile.arch"
|
||||
|
||||
@ -5741,18 +5770,19 @@ do
|
||||
"kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;;
|
||||
"kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;;
|
||||
"kernel/include/swapfmt.h") CONFIG_FILES="$CONFIG_FILES kernel/include/swapfmt.h" ;;
|
||||
"arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;;
|
||||
"arch/x86/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-attached-mic.sh" ;;
|
||||
"arch/x86/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-builtin-x86.sh" ;;
|
||||
"arch/x86/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-smp-x86.sh" ;;
|
||||
"arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;;
|
||||
"arch/x86/tools/mcoverlay-destroy-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcoverlay-destroy-smp-x86.sh" ;;
|
||||
"arch/x86/tools/mcoverlay-create-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcoverlay-create-smp-x86.sh" ;;
|
||||
"arch/x86/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/eclair-dump-backtrace.exp" ;;
|
||||
"arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;;
|
||||
"arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;;
|
||||
"arch/x86/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.service" ;;
|
||||
"arch/x86/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.in" ;;
|
||||
"arch/x86_64/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-attached-mic.sh" ;;
|
||||
"arch/x86_64/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-attached-mic.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-builtin-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcstop+release-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcoverlay-create-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcoverlay-create-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/eclair-dump-backtrace.exp" ;;
|
||||
"arch/x86_64/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-builtin-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;;
|
||||
"arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;;
|
||||
"arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;;
|
||||
"arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;;
|
||||
"kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;;
|
||||
"arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;;
|
||||
"arch/arm64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/Makefile.arch" ;;
|
||||
|
||||
48
configure.ac
48
configure.ac
@ -134,6 +134,12 @@ AC_ARG_ENABLE([rusage],
|
||||
[ENABLE_RUSAGE=$enableval],
|
||||
[ENABLE_RUSAGE=yes])
|
||||
|
||||
AC_ARG_ENABLE([perf],
|
||||
AC_HELP_STRING([--enable-perf],
|
||||
[enable perf_event implementation]),
|
||||
[ENABLE_PERF=$enableval],
|
||||
[ENABLE_PERF=yes])
|
||||
|
||||
AC_ARG_ENABLE([qlmpi],
|
||||
AC_HELP_STRING([--enable-qlmpi],
|
||||
[enable qlmpi implementation]),
|
||||
@ -316,6 +322,9 @@ case $WITH_TARGET in
|
||||
if test "X$ETCDIR" = X; then
|
||||
ETCDIR="$prefix/etc"
|
||||
fi
|
||||
if test "X$INCLUDEDIR" = X; then
|
||||
INCLUDEDIR="$prefix/include"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
@ -451,7 +460,7 @@ else
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_QLMPI" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled])
|
||||
AC_DEFINE([ENABLE_QLMPI],[1],[whether qlmpi is enabled])
|
||||
AC_MSG_NOTICE([qlmpi is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([qlmpi is disabled])
|
||||
@ -475,6 +484,13 @@ else
|
||||
AC_MSG_NOTICE([rusage is disabled])
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_PERF" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_PERF],[1],[whether perf is enabled])
|
||||
AC_MSG_NOTICE([perf is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([perf is disabled])
|
||||
fi
|
||||
|
||||
if test "x$MCKERNEL_INCDIR" != "x" ; then
|
||||
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
|
||||
fi
|
||||
@ -535,20 +551,26 @@ AC_CONFIG_FILES([
|
||||
kernel/Makefile
|
||||
kernel/Makefile.build
|
||||
kernel/include/swapfmt.h
|
||||
arch/x86/tools/mcreboot-attached-mic.sh
|
||||
arch/x86/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot-smp-x86.sh
|
||||
arch/x86/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86/tools/mcoverlay-destroy-smp-x86.sh
|
||||
arch/x86/tools/mcoverlay-create-smp-x86.sh
|
||||
arch/x86/tools/eclair-dump-backtrace.exp
|
||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||
arch/x86/tools/irqbalance_mck.service
|
||||
arch/x86/tools/irqbalance_mck.in
|
||||
arch/x86_64/tools/mcreboot-attached-mic.sh
|
||||
arch/x86_64/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86_64/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86_64/tools/mcreboot-smp-x86.sh
|
||||
arch/x86_64/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh
|
||||
arch/x86_64/tools/mcoverlay-create-smp-x86.sh
|
||||
arch/x86_64/tools/eclair-dump-backtrace.exp
|
||||
arch/x86_64/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in
|
||||
arch/x86_64/tools/irqbalance_mck.service
|
||||
arch/x86_64/tools/irqbalance_mck.in
|
||||
])
|
||||
|
||||
if test "$TARGET" = "smp-x86"; then
|
||||
AC_CONFIG_FILES([
|
||||
arch/x86_64/kernel/Makefile.arch
|
||||
])
|
||||
fi
|
||||
|
||||
if test "$TARGET" = "smp-arm64"; then
|
||||
AC_CONFIG_FILES([
|
||||
kernel/config/config.smp-arm64
|
||||
|
||||
@ -5,6 +5,10 @@
|
||||
#define IHK_MAX_NUM_NUMA_NODES 1024
|
||||
#define IHK_MAX_NUM_CPUS 1024
|
||||
|
||||
#define IHK_OS_PGSIZE_4KB 0
|
||||
#define IHK_OS_PGSIZE_2MB 1
|
||||
#define IHK_OS_PGSIZE_1GB 2
|
||||
|
||||
struct mckernel_rusage {
|
||||
unsigned long memory_stat_rss[IHK_MAX_NUM_PGSIZES];
|
||||
unsigned long memory_stat_mapped_file[IHK_MAX_NUM_PGSIZES];
|
||||
|
||||
@ -140,6 +140,8 @@ struct program_load_desc {
|
||||
unsigned long mpol_flags;
|
||||
unsigned long mpol_threshold;
|
||||
unsigned long heap_extension;
|
||||
long stack_premap;
|
||||
unsigned long mpol_bind_mask;
|
||||
int nr_processes;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
|
||||
|
||||
@ -11,10 +11,21 @@ obj-m += mcctrl.o
|
||||
|
||||
# POSTK_DEBUG_ARCH_DEP_1, arch depend "-mcmodel"
|
||||
# POSTK_DEBUG_ARCH_DEP_83, arch depend translate_rva_to_rpa() move
|
||||
ifeq ($(ARCH), arm64)
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -I$(src)/arch/$(ARCH)/include -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@
|
||||
else
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../../kernel/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ -I@abs_builddir@/../../../
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include \
|
||||
-I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) \
|
||||
-I$(IHK_BASE)/ikc/include \
|
||||
-I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) \
|
||||
-I$(IHK_BASE)/include \
|
||||
-I$(IHK_BASE)/include/arch/$(ARCH) \
|
||||
-I$(src)/../../include \
|
||||
-I$(src)/arch/$(ARCH)/include \
|
||||
-I@abs_builddir@ \
|
||||
-I@abs_builddir@/../../../ \
|
||||
-I$(src)/../../../kernel/include \
|
||||
-DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
|
||||
|
||||
ifneq ($(ARCH), arm64)
|
||||
ccflags-y += -mno-red-zone -mcmodel=kernel
|
||||
endif
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
|
||||
@ -190,7 +190,11 @@ static long mcexec_prepare_image(ihk_os_t os,
|
||||
|
||||
pdesc->status = 0;
|
||||
mb();
|
||||
mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||
ret = mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||
if(ret < 0) {
|
||||
printk("%s: ERROR mcctrl_ikc_send: %d\n", __FUNCTION__, ret);
|
||||
goto put_and_free_out;
|
||||
}
|
||||
|
||||
ret = wait_event_interruptible(ppd->wq_prepare, pdesc->status);
|
||||
if (ret < 0) {
|
||||
|
||||
@ -1207,7 +1207,7 @@ sysfsm_unlink(struct sysfsm_data *sdp, const char *path0, int flags)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!flags & SYSFS_UNLINK_KEEP_ANCESTOR) {
|
||||
if (!(flags & SYSFS_UNLINK_KEEP_ANCESTOR)) {
|
||||
cleanup_ancestor(dirp);
|
||||
}
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 243680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 263680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE_TMP),rhel)
|
||||
@ -33,7 +33,7 @@ endif
|
||||
|
||||
modules:
|
||||
ifneq ($(BUILD_MODULE),none)
|
||||
@(cd $(BUILD_MODULE); make modules)
|
||||
+@(cd $(BUILD_MODULE); make modules)
|
||||
endif
|
||||
|
||||
clean:
|
||||
|
||||
@ -10,6 +10,7 @@ MANDIR=@MANDIR@
|
||||
MCKERNEL_INCDIR=@MCKERNEL_INCDIR@
|
||||
MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH=@ARCH@
|
||||
CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH}
|
||||
LDFLAGS=@LDFLAGS@
|
||||
RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}')
|
||||
@ -17,7 +18,6 @@ VPATH=@abs_srcdir@
|
||||
TARGET=mcexec libsched_yield ldump2mcdump.so
|
||||
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
|
||||
LIBS=@LIBS@
|
||||
ARCH=@ARCH@
|
||||
IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
|
||||
MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread
|
||||
ENABLE_QLMPI=@ENABLE_QLMPI@
|
||||
@ -53,7 +53,7 @@ libsched_yield: libsched_yield.c
|
||||
$(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl
|
||||
|
||||
libmcexec.a::
|
||||
(cd arch/${ARCH}; make)
|
||||
+(cd arch/${ARCH}; $(MAKE))
|
||||
|
||||
libqlmpi.so: qlmpilib.c
|
||||
$(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $<
|
||||
@ -77,13 +77,13 @@ ql_talker: ql_talker.o
|
||||
$(CC) $^ $(CFLAGS) -o $@
|
||||
|
||||
clean::
|
||||
(cd arch/${ARCH}; make clean)
|
||||
(cd arch/${ARCH}; $(MAKE) clean)
|
||||
$(RM) $(TARGET) *.o
|
||||
|
||||
.PHONY: all clean install
|
||||
|
||||
install::
|
||||
(cd arch/${ARCH}; make install)
|
||||
(cd arch/${ARCH}; $(MAKE) install)
|
||||
mkdir -p -m 755 $(BINDIR)
|
||||
install -m 755 mcexec $(BINDIR)
|
||||
mkdir -p -m 755 $(MCKERNEL_LIBDIR)
|
||||
|
||||
@ -43,14 +43,30 @@ threads equals to the number of CPU cores assigned to McKernel.
|
||||
.TP
|
||||
.B -h N, --extend-heap-by=N
|
||||
Specify the size of heap extension by the brk() system call.
|
||||
The default size is 4K Byte. You may specify the value in the
|
||||
following format:
|
||||
<digit>{k|K|m|M|g|G}
|
||||
e.g.:
|
||||
10k for 10Kibyte, 100M for 100Mibyte, 1G for 1Gibyte
|
||||
The default size is 4K Byte. \fIN\fR accepts the following format:
|
||||
.br
|
||||
.RS 10
|
||||
<digits>{k|K|m|M|g|G}
|
||||
.br
|
||||
e.g.: 10k means 10Kibyte, 100M 100Mibyte, 1G 1Gibyte
|
||||
.RE
|
||||
.RS 7
|
||||
|
||||
In certain applications, such as miniFE, specifying "-h 12M" improves
|
||||
performance.
|
||||
.RE
|
||||
|
||||
.TP
|
||||
.B -s P,[M], --stack-premap=P,[M]
|
||||
Pre-map \fIP\fR bytes of stack when creating a process. 2Mibyte is
|
||||
used when not specified. And set the max size of stack to \fIM\fR
|
||||
bytes. Both of them accept the following format:
|
||||
.br
|
||||
.RS 10
|
||||
<digits>{k|K|m|M|g|G}
|
||||
.br
|
||||
e.g.: 10k means 10Kibyte, 100M 100Mibyte, 1G 1Gibyte
|
||||
.RE
|
||||
|
||||
.TP
|
||||
.B --profile
|
||||
|
||||
@ -214,6 +214,10 @@ static unsigned long mpol_threshold = 0;
|
||||
static unsigned long heap_extension = (4*1024);
|
||||
static int profile = 0;
|
||||
static int disable_sched_yield = 0;
|
||||
static long stack_premap = (2ULL << 20);
|
||||
static long stack_max = -1;
|
||||
static struct rlimit rlim_stack;
|
||||
static char *mpol_bind_nodes = NULL;
|
||||
|
||||
/* Partitioned execution (e.g., for MPI) */
|
||||
static int nr_processes = 0;
|
||||
@ -1211,7 +1215,7 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
|
||||
struct rlimit new_rlim;
|
||||
|
||||
/* save original value to environment variable */
|
||||
n = snprintf(newval, sizeof(newval), "%#lx,%#lx",
|
||||
n = snprintf(newval, sizeof(newval), "%ld,%ld",
|
||||
(unsigned long)orig_rlim->rlim_cur,
|
||||
(unsigned long)orig_rlim->rlim_max);
|
||||
if (n >= sizeof(newval)) {
|
||||
@ -1247,9 +1251,9 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
|
||||
void print_usage(char **argv)
|
||||
{
|
||||
#ifdef ADD_ENVS_OPTION
|
||||
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [<mcos-id>] (program) [args...]\n", argv[0]);
|
||||
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [<mcos-id>] (program) [args...]\n", argv[0]);
|
||||
#else /* ADD_ENVS_OPTION */
|
||||
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [<mcos-id>] (program) [args...]\n", argv[0]);
|
||||
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [<mcos-id>] (program) [args...]\n", argv[0]);
|
||||
#endif /* ADD_ENVS_OPTION */
|
||||
}
|
||||
|
||||
@ -1637,7 +1641,7 @@ static struct option mcexec_options[] = {
|
||||
.name = "mpol-threshold",
|
||||
.has_arg = required_argument,
|
||||
.flag = NULL,
|
||||
.val = 'm',
|
||||
.val = 'M',
|
||||
},
|
||||
{
|
||||
.name = "disable-sched-yield",
|
||||
@ -1651,13 +1655,16 @@ static struct option mcexec_options[] = {
|
||||
.flag = NULL,
|
||||
.val = 'h',
|
||||
},
|
||||
{
|
||||
.name = "stack-premap",
|
||||
.has_arg = required_argument,
|
||||
.flag = NULL,
|
||||
.val = 's',
|
||||
},
|
||||
/* end */
|
||||
{ NULL, 0, NULL, 0, },
|
||||
};
|
||||
|
||||
#define MCEXEC_DEF_CUR_STACK_SIZE (2 * 1024 * 1024) /* 2 MiB */
|
||||
#define MCEXEC_DEF_MAX_STACK_SIZE (64 * 1024 * 1024) /* 64 MiB */
|
||||
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
void bind_mount_recursive(const char *root, char *prefix)
|
||||
{
|
||||
@ -1762,7 +1769,7 @@ static void ld_preload_init()
|
||||
|
||||
if (disable_sched_yield) {
|
||||
sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR);
|
||||
__dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path);
|
||||
__dprintf("%s: preload library: %s\n", __FUNCTION__, envbuf);
|
||||
if (setenv("LD_PRELOAD", envbuf, 1) < 0) {
|
||||
printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n",
|
||||
__FUNCTION__);
|
||||
@ -1796,7 +1803,6 @@ int main(int argc, char **argv)
|
||||
char *p;
|
||||
int i;
|
||||
int error;
|
||||
struct rlimit rlim_stack;
|
||||
unsigned long lcur;
|
||||
unsigned long lmax;
|
||||
int target_core = 0;
|
||||
@ -1846,10 +1852,15 @@ int main(int argc, char **argv)
|
||||
CHKANDJUMP(error == -1, 1, "unsetenv failed");
|
||||
}
|
||||
|
||||
rlim_stack.rlim_cur = MCEXEC_DEF_CUR_STACK_SIZE;
|
||||
rlim_stack.rlim_max = MCEXEC_DEF_MAX_STACK_SIZE;
|
||||
/* Inherit ulimit settings to McKernel process */
|
||||
if (getrlimit(RLIMIT_STACK, &rlim_stack)) {
|
||||
fprintf(stderr, "getrlimit failed\n");
|
||||
return 1;
|
||||
}
|
||||
__dprintf("rlim_stack=%ld,%ld\n", rlim_stack.rlim_cur, rlim_stack.rlim_max);
|
||||
|
||||
#define MCEXEC_MAX_STACK_SIZE (1024 * 1024 * 1024) /* 1 GiB */
|
||||
/* Shrink mcexec stack if it leaves too small room for McKernel process */
|
||||
#define MCEXEC_MAX_STACK_SIZE (16 * 1024 * 1024) /* 1 GiB */
|
||||
if (rlim_stack.rlim_cur > MCEXEC_MAX_STACK_SIZE) {
|
||||
/* need to call reduce_stack() before modifying the argv[] */
|
||||
(void)reduce_stack(&rlim_stack, argv); /* no return, unless failure */
|
||||
@ -1859,25 +1870,43 @@ int main(int argc, char **argv)
|
||||
|
||||
/* Parse options ("+" denotes stop at the first non-option) */
|
||||
#ifdef ADD_ENVS_OPTION
|
||||
while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:", mcexec_options, NULL)) != -1) {
|
||||
while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:e:s:m:", mcexec_options, NULL)) != -1) {
|
||||
#else /* ADD_ENVS_OPTION */
|
||||
while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:", mcexec_options, NULL)) != -1) {
|
||||
while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:s:m:", mcexec_options, NULL)) != -1) {
|
||||
#endif /* ADD_ENVS_OPTION */
|
||||
switch (opt) {
|
||||
char *tmp;
|
||||
|
||||
case 'c':
|
||||
target_core = atoi(optarg);
|
||||
target_core = strtol(optarg, &tmp, 0);
|
||||
if (*tmp != '\0') {
|
||||
fprintf(stderr, "error: -c: invalid target CPU\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'n':
|
||||
nr_processes = atoi(optarg);
|
||||
nr_processes = strtol(optarg, &tmp, 0);
|
||||
if (*tmp != '\0' || nr_processes <= 0) {
|
||||
fprintf(stderr, "error: -n: invalid number of processes\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
|
||||
case 't':
|
||||
nr_threads = atoi(optarg);
|
||||
nr_threads = strtol(optarg, &tmp, 0);
|
||||
if (*tmp != '\0' || nr_threads <= 0) {
|
||||
fprintf(stderr, "error: -t: invalid number of threads\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'M':
|
||||
mpol_threshold = atobytes(optarg);
|
||||
break;
|
||||
|
||||
case 'm':
|
||||
mpol_threshold = atobytes(optarg);
|
||||
mpol_bind_nodes = optarg;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
@ -1889,6 +1918,23 @@ int main(int argc, char **argv)
|
||||
add_env_list(&extra_env, optarg);
|
||||
break;
|
||||
#endif /* ADD_ENVS_OPTION */
|
||||
|
||||
case 's': {
|
||||
char *token, *dup, *line;
|
||||
dup = strdup(optarg);
|
||||
line = dup;
|
||||
token = strsep(&line, ",");
|
||||
if (token != NULL && *token != 0) {
|
||||
stack_premap = atobytes(token);
|
||||
}
|
||||
token = strsep(&line, ",");
|
||||
if (token != NULL && *token != 0) {
|
||||
stack_max = atobytes(token);
|
||||
}
|
||||
free(dup);
|
||||
__dprintf("stack_premap=%ld,stack_max=%ld\n", stack_premap, stack_max);
|
||||
break; }
|
||||
|
||||
case 0: /* long opt */
|
||||
break;
|
||||
|
||||
@ -2097,6 +2143,7 @@ int main(int argc, char **argv)
|
||||
desc->cpu = target_core;
|
||||
desc->enable_vdso = enable_vdso;
|
||||
|
||||
/* Restore the stack size when mcexec stack was shrinked */
|
||||
p = getenv(rlimit_stack_envname);
|
||||
if (p) {
|
||||
char *saveptr;
|
||||
@ -2141,8 +2188,19 @@ int main(int argc, char **argv)
|
||||
rlim_stack.rlim_cur = lcur;
|
||||
}
|
||||
}
|
||||
|
||||
/* Overwrite the max with <max> of "--stack-premap <premap>,<max>" */
|
||||
if (stack_max != -1) {
|
||||
rlim_stack.rlim_cur = stack_max;
|
||||
if (rlim_stack.rlim_max != -1 && rlim_stack.rlim_max < rlim_stack.rlim_cur) {
|
||||
rlim_stack.rlim_max = rlim_stack.rlim_cur;
|
||||
}
|
||||
}
|
||||
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
|
||||
desc->stack_premap = stack_premap;
|
||||
__dprintf("desc->rlimit[MCK_RLIMIT_STACK]=%ld,%ld\n", desc->rlimit[MCK_RLIMIT_STACK].rlim_cur, desc->rlimit[MCK_RLIMIT_STACK].rlim_max);
|
||||
|
||||
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
|
||||
if(ncpu == -1){
|
||||
@ -2335,6 +2393,21 @@ int main(int argc, char **argv)
|
||||
desc->mpol_threshold = mpol_threshold;
|
||||
desc->heap_extension = heap_extension;
|
||||
|
||||
desc->mpol_bind_mask = 0;
|
||||
if (mpol_bind_nodes) {
|
||||
struct bitmask *bind_mask;
|
||||
bind_mask = numa_parse_nodestring_all(mpol_bind_nodes);
|
||||
|
||||
if (bind_mask) {
|
||||
int node;
|
||||
for (node = 0; node <= numa_max_possible_node(); ++node) {
|
||||
if (numa_bitmask_isbitset(bind_mask, node)) {
|
||||
desc->mpol_bind_mask |= (1UL << node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {
|
||||
perror("prepare");
|
||||
close(fd);
|
||||
@ -3618,6 +3691,10 @@ fork_err:
|
||||
__dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n",
|
||||
path, desc->num_sections);
|
||||
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
|
||||
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
|
||||
desc->stack_premap = stack_premap;
|
||||
|
||||
/* Copy descriptor to co-kernel side */
|
||||
trans.userp = (void*)desc;
|
||||
trans.rphys = w.sr.args[2];
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
# Makefile.build.in COPYRIGHT FUJITSU LIMITED 2015-2016
|
||||
ARCH = @ARCH@
|
||||
VPATH=@abs_srcdir@
|
||||
SRC=$(VPATH)
|
||||
IHKDIR=$(IHKBASE)/$(TARGETDIR)
|
||||
@ -8,13 +9,18 @@ OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
|
||||
OBJS += rbtree.o
|
||||
OBJS += pager.o
|
||||
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
# OBJS added gencore.o
|
||||
ifeq ($(ARCH), arm64)
|
||||
OBJS += gencore.o
|
||||
DEPSRCS += $(SRC)/../arch/arm64/kernel/gencore.c
|
||||
endif
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions
|
||||
ifneq ($(ARCH), arm64)
|
||||
CFLAGS += -mcmodel=large -mno-red-zone
|
||||
endif
|
||||
LDFLAGS += -e arch_start
|
||||
IHKOBJ = ihk/ihk.o
|
||||
|
||||
@ -37,7 +43,7 @@ OBJCOPY ?= objcopy
|
||||
|
||||
# POSTK_DEBUG_ARCH_DEP_26
|
||||
ifeq ($(ARCH), arm64)
|
||||
SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) OBJCOPY=$(OBJCOPY) SRC=$(SRC)
|
||||
SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) OBJCOPY=$(OBJCOPY) SRC=$(SRC) ARCH=$(ARCH)
|
||||
else
|
||||
SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) SRC=$(SRC)
|
||||
endif
|
||||
@ -65,12 +71,15 @@ Makefile.dep:
|
||||
|
||||
$(IHKOBJ): FORCE
|
||||
@mkdir -p $(dir $(IHKOBJ))
|
||||
$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS) prepare
|
||||
$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS)
|
||||
+$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS) prepare
|
||||
+$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS)
|
||||
|
||||
%.o: $(SRC)/%.c
|
||||
$(cc_cmd)
|
||||
|
||||
gencore.o: ../arch/arm64/kernel/gencore.c
|
||||
$(cc_cmd)
|
||||
|
||||
FORCE:
|
||||
|
||||
-include Makefile.dep
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
BUILD_TARGET ?= @TARGET@
|
||||
KERNDIR=@KERNDIR@
|
||||
VPATH=@abs_srcdir@
|
||||
ARCH ?= @ARCH@
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
vdsodir=@abs_builddir@/../arch/$(ARCH)/kernel/vdso
|
||||
@ -26,16 +27,16 @@ all: $(O) $(KERNELS)
|
||||
$(O):
|
||||
mkdir -p $(O)
|
||||
|
||||
%/kernel.img: %/Makefile
|
||||
%/kernel.img: %/Makefile $(KERNELS)
|
||||
@echo 'Building for' $(dir $@)
|
||||
@make --no-print-directory -C $(dir $@) $(SUBCMD_OPTS)
|
||||
+@make --no-print-directory -C $(dir $@) $(SUBCMD_OPTS)
|
||||
|
||||
%/Makefile: Makefile.build FORCE
|
||||
@mkdir -p $(dir $@)
|
||||
@echo 'SRC = $(SRC)' > $@
|
||||
@echo 'IHKBASE = $(IHKBASE)' >> $@
|
||||
@echo 'TARGET = $(notdir $(patsubst %/,%,$(dir $@)))' >> $@
|
||||
@echo 'TARGETDIR = $$(shell echo $$(TARGET) | sed "s/-/\//")' >> $@
|
||||
@echo 'TARGETDIR = smp/$(ARCH)' >> $@
|
||||
@cat Makefile.build >> $@
|
||||
@rm -f $(dir $@)/Makefile.dep
|
||||
|
||||
|
||||
@ -70,8 +70,6 @@ static void ap_wait(void)
|
||||
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
|
||||
}
|
||||
|
||||
pc_ap_init();
|
||||
|
||||
/* one of them listens */
|
||||
mc_ikc_test_init();
|
||||
|
||||
|
||||
@ -170,6 +170,7 @@ int kprintf(const char *format, ...)
|
||||
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
|
||||
}
|
||||
|
||||
barrier();
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
@ -246,18 +246,6 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
|
||||
return curval;
|
||||
}
|
||||
|
||||
static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
/*
|
||||
* Officially we should call:
|
||||
* return getint_user((int *)dest, (int *)from);
|
||||
*
|
||||
* but McKernel on x86 can just access user-space.
|
||||
*/
|
||||
*dest = *(volatile uint32_t *)from;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The hash bucket lock must be held when this is called.
|
||||
* Afterwards, the futex_q must not be accessed.
|
||||
|
||||
@ -432,8 +432,8 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
}
|
||||
|
||||
n = p->num_sections;
|
||||
if (n > 16) {
|
||||
kprintf("%s: ERROR: more ELF sections than 16??\n",
|
||||
if (n > 16 || 0 >= n) {
|
||||
kprintf("%s: ERROR: ELF sections other than 1 to 16 ??\n",
|
||||
__FUNCTION__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -476,6 +476,27 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
proc->mpol_threshold = pn->mpol_threshold;
|
||||
proc->nr_processes = pn->nr_processes;
|
||||
proc->heap_extension = pn->heap_extension;
|
||||
|
||||
/* Update NUMA binding policy if requested */
|
||||
if (pn->mpol_bind_mask) {
|
||||
int bit;
|
||||
|
||||
memset(&vm->numa_mask, 0, sizeof(vm->numa_mask));
|
||||
|
||||
for_each_set_bit(bit, &pn->mpol_bind_mask,
|
||||
sizeof(pn->mpol_bind_mask) * BITS_PER_BYTE) {
|
||||
|
||||
if (bit >= ihk_mc_get_nr_numa_nodes()) {
|
||||
kprintf("%s: error: NUMA id %d is larger than mask size!\n",
|
||||
__FUNCTION__, bit);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
set_bit(bit, &vm->numa_mask[0]);
|
||||
}
|
||||
vm->numa_mem_policy = MPOL_BIND;
|
||||
}
|
||||
|
||||
#ifdef PROFILE_ENABLE
|
||||
proc->profile = pn->profile;
|
||||
thread->profile = pn->profile;
|
||||
@ -497,6 +518,11 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
}
|
||||
vm->region.map_end = vm->region.map_start;
|
||||
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
|
||||
dkprintf("%s: rlim_cur: %ld, rlim_max: %ld, stack_premap: %ld\n",
|
||||
__FUNCTION__,
|
||||
proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
|
||||
proc->rlimit[MCK_RLIMIT_STACK].rlim_max,
|
||||
pn->stack_premap);
|
||||
|
||||
if (prepare_process_ranges_args_envs(thread, pn, p, attr,
|
||||
NULL, 0, NULL, 0) != 0) {
|
||||
|
||||
@ -118,7 +118,7 @@ extern void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *t
|
||||
#endif /* __HEADER_ELFCORE_H */
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_18 */
|
||||
#ifdef __x86_64
|
||||
#include "../../arch/x86/kernel/include/elfcore.h"
|
||||
#include "../../arch/x86_64/kernel/include/elfcore.h"
|
||||
#elif __aarch64__
|
||||
#include "../../arch/arm64/kernel/include/elfcore.h"
|
||||
#endif
|
||||
|
||||
@ -19,12 +19,14 @@
|
||||
#include <ihk/mm.h>
|
||||
#include <ihk/atomic.h>
|
||||
#include <list.h>
|
||||
#include <rbtree.h>
|
||||
#include <signal.h>
|
||||
#include <memobj.h>
|
||||
#include <affinity.h>
|
||||
#include <syscall.h>
|
||||
#include <bitops.h>
|
||||
#include <profile.h>
|
||||
#include <config.h>
|
||||
|
||||
#define VR_NONE 0x0
|
||||
#define VR_STACK 0x1
|
||||
@ -52,6 +54,7 @@
|
||||
#define VR_MEMTYPE_UC 0x01000000 /* uncachable */
|
||||
#define VR_MEMTYPE_MASK 0x0f000000
|
||||
#define VR_PAGEOUT 0x10000000
|
||||
#define VR_DONTDUMP 0x20000000
|
||||
|
||||
#define PROT_TO_VR_FLAG(prot) (((unsigned long)(prot) << 16) & VR_PROT_MASK)
|
||||
#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4)
|
||||
@ -77,9 +80,7 @@
|
||||
// struct process.ptrace
|
||||
#define PT_TRACED 0x80 /* The process is ptraced */
|
||||
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
|
||||
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
|
||||
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
|
||||
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
|
||||
#define PT_TRACE_SYSCALL 0x200 /* Trace syscall enter */
|
||||
|
||||
// ptrace(2) request
|
||||
#define PTRACE_TRACEME 0
|
||||
@ -378,7 +379,7 @@ struct user
|
||||
#define AUXV_LEN 18
|
||||
|
||||
struct vm_range {
|
||||
struct list_head list;
|
||||
struct rb_node vm_rb_node;
|
||||
unsigned long start, end;
|
||||
unsigned long flag;
|
||||
struct memobj *memobj;
|
||||
@ -553,6 +554,7 @@ struct process {
|
||||
unsigned long mpol_flags;
|
||||
size_t mpol_threshold;
|
||||
unsigned long heap_extension;
|
||||
unsigned long mpol_bind_mask;
|
||||
|
||||
// perf_event
|
||||
int perf_status;
|
||||
@ -690,13 +692,17 @@ struct thread {
|
||||
int mod_clone;
|
||||
struct uti_attr *mod_clone_arg;
|
||||
int parent_cpuid;
|
||||
|
||||
// for performance counter
|
||||
unsigned long pmc_alloc_map;
|
||||
unsigned long extra_reg_alloc_map;
|
||||
};
|
||||
|
||||
#define VM_RANGE_CACHE_SIZE 4
|
||||
|
||||
struct process_vm {
|
||||
struct address_space *address_space;
|
||||
struct list_head vm_range_list;
|
||||
struct rb_root vm_range_tree;
|
||||
struct vm_regions region;
|
||||
struct process *proc; /* process that reside on the same page */
|
||||
void *opt;
|
||||
|
||||
@ -197,6 +197,8 @@ struct program_load_desc {
|
||||
unsigned long mpol_flags;
|
||||
unsigned long mpol_threshold;
|
||||
unsigned long heap_extension;
|
||||
long stack_premap;
|
||||
unsigned long mpol_bind_mask;
|
||||
int nr_processes;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
|
||||
@ -567,4 +569,24 @@ typedef struct uti_attr {
|
||||
uint64_t flags; /* Representing location and behavior hints by bitmap */
|
||||
} uti_attr_t;
|
||||
|
||||
struct move_pages_smp_req {
|
||||
unsigned long count;
|
||||
const void **user_virt_addr;
|
||||
int *user_status;
|
||||
const int *user_nodes;
|
||||
void **virt_addr;
|
||||
int *status;
|
||||
pte_t **ptep;
|
||||
int *nodes;
|
||||
int nodes_ready;
|
||||
int *nr_pages;
|
||||
unsigned long *dst_phys;
|
||||
struct process *proc;
|
||||
ihk_atomic_t phase_done;
|
||||
int phase_ret;
|
||||
};
|
||||
|
||||
#define PROCESS_VM_READ 0
|
||||
#define PROCESS_VM_WRITE 1
|
||||
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user