Compare commits

...

61 Commits
1.3.0 ... 1.4.0

Author SHA1 Message Date
0ecf31d896 modify:User space memory access(arm64) 2017-10-24 10:29:11 +09:00
08a625cc0d modify:User space memory access
perf_event_open,futex,process_vm_readv,process_vm_writev,move_pages
2017-10-23 20:27:56 +09:00
12840601e1 support PERF_TYPE_{HARDWARE|HW_CACHE} in perf_event_open
refs #829
2017-10-20 23:10:20 +09:00
2ae6883a8b mcreboot.sh, mcstop+release.sh: Fix retry loop of shutdown 2017-10-19 01:54:46 +09:00
d5629606c5 mcexec: -m: interpret as numactl -m (i.e., MPOL_BIND)
Conflicts:
	executer/include/uprotocol.h
	executer/user/mcexec.c
	kernel/include/syscall.h
2017-10-18 16:54:34 +09:00
285059e504 mcexec: use -M for --mpol-threshold
Conflicts:
	executer/user/mcexec.c
2017-10-18 16:44:49 +09:00
5b6d0a887c Add ARM64 arch_rusage header 2017-10-18 09:23:08 +09:00
3573b8649e Guard call to gencore and freecore
The gencore() and freecore() code in gencore.c is guarded by
POSTK_DEBUG_ARCH_DEP_18, so the call to these functions should
also be guarded, otherwise linking fails.
2017-10-18 09:20:52 +09:00
d7523cdd84 Remove assignment of ns_per_tsc in struct monitor
struct member seems to have been removed or moved to struct
global_rusage
2017-10-18 09:20:52 +09:00
5753db5846 Add ihk_mc_syscall_number() for ARM by reading x8 2017-10-18 09:20:52 +09:00
2d7cb0af89 Add copy_fp_regs to ARM (same as for x86_64) 2017-10-18 09:20:52 +09:00
1cb9b435a9 Fix (?) build system
- disable -mno-red-zone for ARM
- add missing INCLUDEDIR
- make gencore.c compile
2017-10-18 09:20:52 +09:00
43ecf06e83 arch: x86 -> x86_64 and build system changes 2017-10-18 09:20:52 +09:00
51982de36b Handle return value of mcctrl_ikc_send in mcexec_handle_prepare_image 2017-10-18 09:20:51 +09:00
0a22320a3c Don't allocate memory for 0-page-sized requests
Previously the allocator would return all availble memory for a
request of 0 pages. This is rather counter-intuitive and left no
memory for subsequent allocations.
2017-10-18 09:20:51 +09:00
8813e890c5 Fix the check routine for elf sections 2017-10-18 09:20:51 +09:00
e664ffba18 Show context registers at the interrupt by SGI 6 2017-10-18 09:20:51 +09:00
3bd0137c25 Fix some race condition on arm64
* move barrier() to architecture depended region
* add barrier() in issue_ipi, kprintf, map_virtual
* enable the workaround for cavium thunderx
2017-10-18 09:20:51 +09:00
4f2b4aa402 Round the allocation for cpu-local variables up PAGE_SIZE
Previously, this resulted in 0 pages being allocated.
2017-10-18 09:20:51 +09:00
682cd34b74 Make mcstop+release architecture independent 2017-10-18 09:20:51 +09:00
2bc4d06a48 Add empty definition of visit_pte_range_safe()
This is for linking only. visit_pte_range_safe() is required only
for memdump, as far as i can tell. Since memdump is disabled anyway
I think it's ok to leave this function empty for now.
2017-10-18 09:20:51 +09:00
4f2c1e07c1 Add ARCH variable to Makefiles
In some Makefiles the ARCH variable was not set, although it was used.
In executer/user/Makefile.in it was used before it was set.
2017-10-18 09:20:50 +09:00
77bb3038d3 Add PT_ENTRIES macro 2017-10-18 09:20:50 +09:00
931448a94d Fix typo in page_align_up 2017-10-18 09:20:50 +09:00
c51bbbabc6 Change x86 to @ARCH@ in mcreboot-smp-x86.sh.in
since it is used for smp-x86 and smp-arm64
2017-10-18 09:20:50 +09:00
2ddc52e1a4 setitimer(): Fix error handling of copy_from_user()
This fixes POSTK_TEMP_FIX_40 (POSTK_DEBUG_TEMP_FIX_40)
2017-10-13 04:59:50 +09:00
3c93958c48 extend_process_region(): fix align_shift (POSTK_DEBUG_TEMP_FIX_68) 2017-10-17 15:07:57 +09:00
9763c40f64 set_robust_list: returns 0
refs #977
2017-10-16 09:54:23 +09:00
3bf77446cc mcreboot-smp-x86.sh: add extra_kopts param
This lets one specify arbitrary kernel parameters, instead of manually
fiddling with the script.
Could ultimately replace params like -t (turbo) and -d (dump_level) that
do not have any side effect (logmode starts a userland daemon)
2017-10-13 10:02:11 +09:00
c3dfb1663d page_fault_handler: do not try to fault addresses < 4k
There is no good reason to map these low addresses (userspace could with
mmap fixed, but that is grounds for many exploits...);

the main advantage however is if we do a null deref or close to (0->foo)
within a pagefault we will get a panic stack instead of getting a hang
because we cannot get some locks.
2017-10-13 10:02:11 +09:00
217dd9c1e5 x86 set_signal: panic if interrupt came from kernel
This makes debugging errors e.g. FPE from kernel much easier,
we really shouldn't be taking a user level coredump blaming user
in that case anyway
2017-10-13 10:02:11 +09:00
d4cd756a91 x86/cpu.c: unhandled page fault: print pre-fault stack
Do basic manual unwinding and print raw stack addresses, with a
suggested invocation of addr2line to pretty-print the result.
2017-10-13 10:02:11 +09:00
b894619d1b Speed up parallel builds
- make should be $(MAKE)
 - add + in front of rules spawning long-lasted make process in a
subshell. (This would not be needed with $(MAKE) -C .. target, but our
makefiles do not handle that because they use $(PWD))
 - split the main 'all' rule as all 4 targets are independant
 - fix dependencies where appropriate for parallelism

Extra, not speed-related changes:
 - remove some double-colon for targets as they do not need it

This cuts build time from 5s to 1.5s on a laptop with -j4, and more
importantly from 85s to 35s on a KNL node.
As a bonus, the fixed dependencies removes the need to clean before
rebuilding all the time. Probably.
2017-10-13 10:02:11 +09:00
b962da700b do_signal: ignore SIGWINCH
McKernel would terminate() running program on terminal resizing
It actually looks like there is nothing for us to do when we
get that anyway (tested with `dialog`)
2017-10-13 10:02:11 +09:00
196379854b Fix a few more harmless compiler warnings:
- myfree in pager.c was called with an argument, so add one to the
dummy definition
- pgoff is offset_t (unsigned) and doesn't need to be compared to 0
- clang says '*(int *)0 = 0' will be optimized away instead of keeping
the segfault without a volatile hint (?! that is wrong!), but it causes
no harm to add anyway.
2017-10-13 10:02:11 +09:00
d213efac79 mcctrl/sysfs: add parenthesis around SYSFS_UNLINK_KEEP_ANCESTOR check
! has more priority than &, so !flags & SYSFS_UNLINK_KEEP_ANCESTOR is
not very likely. Change to !(flags & SYSFS_UNLINK_KEEP_ANCESTOR)
2017-10-13 10:02:11 +09:00
38910fe13d mc_perf_event.h: s/EVNET/EVENT/ in the guard (improper ifndef) 2017-10-13 10:02:11 +09:00
4d4279121b process/vm; replace vm_range list by a rbtree
This replaces the chained list used to keep track of all memory ranges
of a process by a standard rbtree (no need of interval tree here
because there is no overlap)

Accesses that were done directly through vm_range_list before were
replaced by lookup_process_memory_range, even full list scan (e.g.
coredump).
The full scans will thus be less efficient because calls to rb_next()
will not be inlined, but these are rarer calls that can probably afford
this compared to code simplicity.

The only reference to the actual backing structure left outside of
process.c is a call to rb_erase in xpmem_free_process_memory_range.

v2: fix lookup_process_memory_range with small start address

v3: make vm_range_insert error out properly

Panic does not lead to easy debug, all error paths
are handled to just return someting on error

v4: fix lookup_process_memory_range (again)

That optimistically going left was a more serious bug than just
last iteration, we could just pass by a match and continue down
the tree if the match was not a leaf.

v5: some users actually needed leftmost match, so restore behavior
without the breakage (hopefully)
2017-10-13 10:00:27 +09:00
99da5b6484 ptrace: unify flags PT_TRACE_SYSCALL_ENTER and PT_TRACE_SYSCALL_EXIT to PT_TRACE_SYSCALL
refs #961
2017-10-11 15:43:57 +09:00
6b60dee890 ihklib: Fix ihklib_rusage.h for x86 2017-10-04 05:06:17 +09:00
dd08a3151e mcreboot: Fix version check for mcoverlayfs 2017-10-04 00:37:01 +09:00
e1442bf12b mcexec: Fix usage 2017-10-03 15:34:00 +09:00
86f297ddc4 mcreboot: Fix change umask for /proc and /sys files 2017-10-03 15:21:44 +09:00
823b222af9 mcreboot: Change umask for /proc and /sys files 2017-10-03 06:03:44 +09:00
9c25eb8ef2 mcoverlayfs: Fix version check 2017-10-02 19:51:30 +09:00
665eead78b do_wait: delegate process status for ppid_parent if child process is teacee
refs #946
2017-09-29 14:59:34 +09:00
f8ef43c77d Merge branch 'development' of pccluster.org:mckernel into development 2017-09-29 14:59:10 +09:00
8f4afe410f Remove obsolete pc_init(), pc_ap_init(), pc_test() 2017-09-29 13:20:01 +09:00
da9bb421cc ptrace: call ptrace_syscall_exit before check_signal
refs #960
2017-09-29 10:03:44 +09:00
1e89796d3e Replace ihk_set_kmsg() with ihk_get_kmsg_buf() 2017-09-27 20:26:23 +09:00
a1a2900606 ptrace: Fix the timing of save_fp_regs, and Add copy fp_regs to child in clone_thread
refs #702
2017-09-27 17:02:30 +09:00
79b977ac06 Check xgetbv availability before use for machines without it (i.e. KVM) 2017-09-26 19:31:34 +09:00
37e3118df6 mcexec: Add --stack-premap=<premap_size>[,<max>] to man page 2017-09-26 18:45:52 +09:00
be4d84c0c1 mcexec: Add --stack-premap=<premap_size>[,<max>]
<premap_size> of stack is pre-mapped on creating a process.
And its max size of stack is set to <max>.
This replaces MCKERNEL_RLIMIT_STACK=<premap_size>,<max>.
2017-09-26 17:04:10 +09:00
c43c1b640a execve: call ptrace_syscall_exit if execve successed
refs #945
2017-09-26 14:31:07 +09:00
e294db7e53 syscall: set syscall_return before calling ptrace_syscall_exit
refs #944
2017-09-26 14:29:02 +09:00
df3f388e09 syscall: set -ENOSYS to syscall_return before calling ptrace_syscall_enter
refs #943
2017-09-26 14:25:49 +09:00
a2fbe99b60 madvise: support MADV_DONTDUMP/DODUMP
refs #661
2017-09-26 14:21:40 +09:00
9c847c0a8f Change permission of mcoverlay-create/destroy.sh from 600 to 755 2017-09-26 14:05:54 +09:00
58c1fd4512 Update test programs for qlmpi (do swap with using shared memory, ib_pingpong) 2017-09-25 16:56:52 +09:00
dae9a5ff13 mcexec: verify argument for -n/-t/-c 2017-09-25 16:43:47 +09:00
134 changed files with 3448 additions and 1224 deletions

View File

@ -4,13 +4,21 @@ INCDIR = @INCDIR@
ETCDIR = @ETCDIR@
MANDIR = @MANDIR@
all::
@(cd executer/kernel/mcctrl; make modules)
@(cd executer/kernel/mcoverlayfs; make modules)
@(cd executer/user; make)
@case "$(TARGET)" in \
all: executer-mcctrl executer-mcoverlayfs executer-user mckernel
executer-mcctrl:
+@(cd executer/kernel/mcctrl; $(MAKE) modules)
executer-mcoverlayfs:
+@(cd executer/kernel/mcoverlayfs; $(MAKE) modules)
executer-user:
+@(cd executer/user; $(MAKE))
mckernel:
+@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; make) \
(cd kernel; $(MAKE)) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
@ -18,13 +26,13 @@ all::
;; \
esac
install::
@(cd executer/kernel/mcctrl; make install)
@(cd executer/kernel/mcoverlayfs; make install)
@(cd executer/user; make install)
install:
@(cd executer/kernel/mcctrl; $(MAKE) install)
@(cd executer/kernel/mcoverlayfs; $(MAKE) install)
@(cd executer/user; $(MAKE) install)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; make install) \
(cd kernel; $(MAKE) install) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
@ -32,34 +40,20 @@ install::
;; \
esac
@case "$(TARGET)" in \
attached-mic) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
builtin-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
smp-x86 | smp-arm64) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
install -m 600 arch/x86/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
install -m 600 arch/x86/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
install -m 755 arch/x86/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
install -m 755 arch/x86_64/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86_64/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
install -m 755 arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
install -m 755 arch/x86_64/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
install -m 755 arch/x86_64/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
mkdir -p -m 755 $(ETCDIR); \
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
install -m 644 arch/x86_64/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
install -m 644 arch/x86_64/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
mkdir -p -m 755 $(INCDIR); \
install -m 644 kernel/include/swapfmt.h $(INCDIR); \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
install -m 644 arch/x86_64/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
@ -67,13 +61,13 @@ install::
;; \
esac
clean::
@(cd executer/kernel/mcctrl; make clean)
@(cd executer/kernel/mcoverlayfs; make clean)
@(cd executer/user; make clean)
clean:
@(cd executer/kernel/mcctrl; $(MAKE) clean)
@(cd executer/kernel/mcoverlayfs; $(MAKE) clean)
@(cd executer/user; $(MAKE) clean)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; make clean) \
(cd kernel; $(MAKE) clean) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \

View File

@ -590,6 +590,8 @@ static void show_context_stack(struct pt_regs *regs)
return;
}
ihk_mc_debug_show_interrupt_context(regs);
sp = (uintptr_t)regs + sizeof(*regs);
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
max_loop = (stack_top - sp) / min_stack_frame_size;
@ -1205,6 +1207,10 @@ void ihk_mc_delay_us(int us)
arch_delay(us);
}
void arch_print_stack()
{
}
void arch_show_interrupt_context(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;
@ -1428,6 +1434,13 @@ save_fp_regs(struct thread *thread)
}
}
void copy_fp_regs(struct thread *from, struct thread *to)
{
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
}
}
void
clear_fp_regs(struct thread *thread)
{
@ -1499,7 +1512,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
const uintptr_t address = (uintptr_t)fault_addr;
struct process_vm *vm = thread->vm;
struct vm_range *range;
char found;
unsigned long irqflags;
unsigned long error = 0;
@ -1513,17 +1525,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
(error & PF_RSVD ? "was" : "wasn't"),
(error & PF_INSTR ? "was" : "wasn't"));
found = 0;
list_for_each_entry(range, &vm->vm_range_list, list) {
if (range->start <= address && range->end > address) {
found = 1;
range = lookup_process_memory_range(vm, address, address+1);
if (range) {
__kprintf("address is in range, flag: 0x%lx\n",
range->flag);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
break;
}
}
if (!found) {
} else {
__kprintf("address is out of range! \n");
}

View File

@ -221,7 +221,7 @@ int gencore(struct thread *thread, void *regs,
Elf64_Ehdr eh;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range;
struct vm_range *range, *next;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
@ -235,7 +235,10 @@ int gencore(struct thread *thread, void *regs,
return -1;
}
list_for_each_entry(range, &vm->vm_range_list, list) {
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
@ -323,7 +326,10 @@ int gencore(struct thread *thread, void *regs,
/* program header for each memory chunk */
i = 1;
list_for_each_entry(range, &vm->vm_range_list, list) {
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
@ -364,7 +370,10 @@ int gencore(struct thread *thread, void *regs,
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
i = 3; /* memory segments */
list_for_each_entry(range, &vm->vm_range_list, list) {
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long phys;
if (range->flag & VR_RESERVED)

View File

@ -134,4 +134,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
return ret;
}
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
*dest = *(volatile uint32_t *)from;
return 0;
}
#endif /* !__HEADER_ARM64_COMMON_ARCH_FUTEX_H */

View File

@ -215,7 +215,7 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
#define PAGE_P2ALIGN 0
#define page_offset(addr) __page_offset(addr, PAGE_SIZE)
#define page_align(addr) __page_align(addr, PAGE_SIZE)
#define page_align_up(addr) __page_align_up((addr, PAGE_SIZE)
#define page_align_up(addr) __page_align_up(addr, PAGE_SIZE)
/*
* large page
@ -263,6 +263,8 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
#define PTE_FILEOFF PTE_SPECIAL
#define PT_ENTRIES (PAGE_SIZE >> 3)
#ifndef __ASSEMBLY__
#include <ihk/types.h>

View File

@ -25,6 +25,8 @@
#define smp_rmb() dmb(ishld)
#define smp_wmb() dmb(ishst)
#define arch_barrier() smp_mb()
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \

View File

@ -0,0 +1,34 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#include <arch-memory.h>
//#define DEBUG_RUSAGE
extern struct rusage_global *rusage;
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_16KB 1
#define IHK_OS_PGSIZE_64KB 2
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case __PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case __PTL2_SIZE:
ret = IHK_OS_PGSIZE_16KB;
break;
case __PTL3_SIZE:
ret = IHK_OS_PGSIZE_64KB;
break;
default:
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -6,6 +6,8 @@
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
#else /* defined(CONFIG_HAS_NMI) */
#include <sysreg.h>
#endif /* defined(CONFIG_HAS_NMI) */
#if defined(CONFIG_HAS_NMI)

View File

@ -35,6 +35,8 @@
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define ARM_CPU_IMP_CAVIUM 0x43
#ifndef __ASSEMBLY__
static unsigned int read_cpuid_id(void)

View File

@ -73,6 +73,7 @@ static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
#define ihk_mc_syscall_arg5(uc) (uc)->regs[5]
#define ihk_mc_syscall_ret(uc) (uc)->regs[0]
#define ihk_mc_syscall_number(uc) (uc)->regs[8]
#define ihk_mc_syscall_pc(uc) (uc)->pc
#define ihk_mc_syscall_sp(uc) (uc)->sp

View File

@ -10,6 +10,8 @@
//#define DEBUG_GICV3
#define USE_CAVIUM_THUNDER_X
#ifdef DEBUG_GICV3
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
@ -18,6 +20,10 @@
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
#ifdef USE_CAVIUM_THUNDER_X
static char is_cavium_thunderx = 0;
#endif
void *dist_base;
void *rdist_base[NR_CPUS];
@ -108,8 +114,8 @@ static uint64_t gic_read_iar_cavium_thunderx(void)
asm volatile("nop;nop;nop;nop;");
asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
asm volatile("nop;nop;nop;nop;");
mb();
#endif /* CONFIG_HAS_NMI */
mb();
return irqstat;
}
@ -118,7 +124,7 @@ static uint64_t gic_read_iar_cavium_thunderx(void)
static uint64_t gic_read_iar(void)
{
#ifdef USE_CAVIUM_THUNDER_X
if (static_key_false(&is_cavium_thunderx))
if (is_cavium_thunderx)
return gic_read_iar_cavium_thunderx();
else
#endif
@ -266,6 +272,7 @@ void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
{
dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid);
barrier();
if(vector < 16){
// send SGI
arm64_raise_sgi_gicv3(cpuid, vector);
@ -304,7 +311,9 @@ void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
#ifdef USE_CAVIUM_THUNDER_X
/* Cavium ThunderX erratum 23154 */
gicv3_check_capabilities();
if (MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_CAVIUM) {
is_cavium_thunderx = 1;
}
#endif
}
@ -399,6 +408,7 @@ void gic_enable_gicv3(void)
/* Set specific IPI to NMI */
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
/* sync wait */
gic_do_wait_for_rwp(rbase);

View File

@ -20,10 +20,11 @@ size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
void init_processors_local(int max_id)
{
int i = 0;
const int sz = (max_id + 1) * KERNEL_STACK_SIZE;
union arm64_cpu_local_variables *tmp;
/* allocate one more for alignment */
locals = ihk_mc_alloc_pages((max_id + 1) * (KERNEL_STACK_SIZE / PAGE_SIZE), IHK_MC_AP_CRITICAL);
locals = ihk_mc_alloc_pages(((sz + PAGE_SIZE - 1) / PAGE_SIZE), IHK_MC_AP_CRITICAL);
locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE);
/* clear struct process, struct process_vm, struct thread_info area */

View File

@ -1760,6 +1760,12 @@ int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift,
return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args);
}
int visit_pte_range_safe(page_table_t pt, void *start0, void *end0, int pgshift,
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg)
{
return 0;
}
struct clear_range_args {
int free_physical;
struct memobj *memobj;

View File

@ -965,7 +965,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL_MASK;
proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
proc->signal_flags |= SIGNAL_STOP_STOPPED;

View File

@ -12,6 +12,7 @@
#include <lwk/compiler.h>
#include <hwcap.h>
#include <prctl.h>
#include <limits.h>
extern void ptrace_report_signal(struct thread *thread, int sig);
extern void clear_single_step(struct thread *thread);
@ -1866,4 +1867,634 @@ save_uctx(void *uctx, struct pt_regs *regs)
/* TODO: skeleton for UTI */
}
int do_process_vm_read_writev(int pid,
const struct iovec *local_iov,
unsigned long liovcnt,
const struct iovec *remote_iov,
unsigned long riovcnt,
unsigned long flags,
int op)
{
int ret = -EINVAL;
int li, ri;
int pli, pri;
off_t loff, roff;
size_t llen = 0, rlen = 0;
size_t copied = 0;
size_t to_copy;
struct thread *lthread = cpu_local_var(current);
struct process *rproc;
struct process *lproc = lthread->proc;
struct process_vm *rvm = NULL;
unsigned long rphys;
unsigned long rpage_left;
unsigned long psize;
void *rva;
struct vm_range *range;
struct mcs_rwlock_node_irqsave lock;
struct mcs_rwlock_node update_lock;
/* Sanity checks */
if (flags) {
return -EINVAL;
}
if (liovcnt > IOV_MAX || riovcnt > IOV_MAX) {
return -EINVAL;
}
/* Check if parameters are okay */
ihk_mc_spinlock_lock_noirq(&lthread->vm->memory_range_lock);
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov,
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
if (!range) {
ret = -EFAULT;
goto arg_out;
}
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)remote_iov,
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
if (!range) {
ret = -EFAULT;
goto arg_out;
}
ret = 0;
arg_out:
ihk_mc_spinlock_unlock_noirq(&lthread->vm->memory_range_lock);
if (ret != 0) {
goto out;
}
for (li = 0; li < liovcnt; ++li) {
llen += local_iov[li].iov_len;
dkprintf("local_iov[%d].iov_base: 0x%lx, len: %lu\n",
li, local_iov[li].iov_base, local_iov[li].iov_len);
}
for (ri = 0; ri < riovcnt; ++ri) {
rlen += remote_iov[ri].iov_len;
dkprintf("remote_iov[%d].iov_base: 0x%lx, len: %lu\n",
ri, remote_iov[ri].iov_base, remote_iov[ri].iov_len);
}
if (llen != rlen) {
return -EINVAL;
}
/* Find remote process */
rproc = find_process(pid, &lock);
if (!rproc) {
ret = -ESRCH;
goto out;
}
mcs_rwlock_reader_lock_noirq(&rproc->update_lock, &update_lock);
if(rproc->status == PS_EXITED ||
rproc->status == PS_ZOMBIE){
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
process_unlock(rproc, &lock);
ret = -ESRCH;
goto out;
}
rvm = rproc->vm;
hold_process_vm(rvm);
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
process_unlock(rproc, &lock);
if (lproc->euid != 0 &&
(lproc->ruid != rproc->ruid ||
lproc->ruid != rproc->euid ||
lproc->ruid != rproc->suid ||
lproc->rgid != rproc->rgid ||
lproc->rgid != rproc->egid ||
lproc->rgid != rproc->sgid)) {
ret = -EPERM;
goto out;
}
dkprintf("pid %d found, doing %s: liovcnt: %d, riovcnt: %d \n", pid,
(op == PROCESS_VM_READ) ? "PROCESS_VM_READ" : "PROCESS_VM_WRITE",
liovcnt, riovcnt);
pli = pri = -1; /* Previous indeces in iovecs */
li = ri = 0; /* Current indeces in iovecs */
loff = roff = 0; /* Offsets in current iovec */
/* Now iterate and do the copy */
while (copied < llen) {
int faulted = 0;
/* New local vector? */
if (pli != li) {
struct vm_range *range;
ihk_mc_spinlock_lock_noirq(&lthread->vm->memory_range_lock);
/* Is base valid? */
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov[li].iov_base,
(uintptr_t)(local_iov[li].iov_base + 1));
if (!range) {
ret = -EFAULT;
goto pli_out;
}
/* Is range valid? */
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov[li].iov_base,
(uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len));
if (range == NULL) {
ret = -EINVAL;
goto pli_out;
}
if (!(range->flag & ((op == PROCESS_VM_READ) ?
VR_PROT_WRITE : VR_PROT_READ))) {
ret = -EFAULT;
goto pli_out;
}
ret = 0;
pli_out:
ihk_mc_spinlock_unlock_noirq(&lthread->vm->memory_range_lock);
if (ret != 0) {
goto out;
}
pli = li;
}
/* New remote vector? */
if (pri != ri) {
struct vm_range *range;
ihk_mc_spinlock_lock_noirq(&rvm->memory_range_lock);
/* Is base valid? */
range = lookup_process_memory_range(rvm,
(uintptr_t)remote_iov[li].iov_base,
(uintptr_t)(remote_iov[li].iov_base + 1));
if (range == NULL) {
ret = -EFAULT;
goto pri_out;
}
/* Is range valid? */
range = lookup_process_memory_range(rvm,
(uintptr_t)remote_iov[li].iov_base,
(uintptr_t)(remote_iov[li].iov_base + remote_iov[li].iov_len));
if (range == NULL) {
ret = -EINVAL;
goto pri_out;
}
if (!(range->flag & ((op == PROCESS_VM_READ) ?
VR_PROT_READ : VR_PROT_WRITE))) {
ret = -EFAULT;
goto pri_out;
}
ret = 0;
pri_out:
ihk_mc_spinlock_unlock_noirq(&rvm->memory_range_lock);
if (ret != 0) {
goto out;
}
pri = ri;
}
/* Figure out how much we can copy at most in this iteration */
to_copy = (local_iov[li].iov_len - loff);
if ((remote_iov[ri].iov_len - roff) < to_copy) {
to_copy = remote_iov[ri].iov_len - roff;
}
retry_lookup:
/* TODO: remember page and do this only if necessary */
ret = ihk_mc_pt_virt_to_phys_size(rvm->address_space->page_table,
remote_iov[ri].iov_base + roff, &rphys, &psize);
if (ret) {
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
void *addr;
if (faulted) {
ret = -EFAULT;
goto out;
}
/* Fault in pages */
for (addr = (void *)
(((unsigned long)remote_iov[ri].iov_base + roff)
& PAGE_MASK);
addr < (remote_iov[ri].iov_base + roff + to_copy);
addr += PAGE_SIZE) {
ret = page_fault_process_vm(rvm, addr, reason);
if (ret) {
ret = -EFAULT;
goto out;
}
}
faulted = 1;
goto retry_lookup;
}
rpage_left = ((((unsigned long)remote_iov[ri].iov_base + roff +
psize) & ~(psize - 1)) -
((unsigned long)remote_iov[ri].iov_base + roff));
if (rpage_left < to_copy) {
to_copy = rpage_left;
}
rva = phys_to_virt(rphys);
fast_memcpy(
(op == PROCESS_VM_READ) ? local_iov[li].iov_base + loff : rva,
(op == PROCESS_VM_READ) ? rva : local_iov[li].iov_base + loff,
to_copy);
copied += to_copy;
dkprintf("local_iov[%d]: 0x%lx %s remote_iov[%d]: 0x%lx, %lu copied, psize: %lu, rpage_left: %lu\n",
li, local_iov[li].iov_base + loff,
(op == PROCESS_VM_READ) ? "<-" : "->",
ri, remote_iov[ri].iov_base + roff, to_copy,
psize, rpage_left);
loff += to_copy;
roff += to_copy;
if (loff == local_iov[li].iov_len) {
li++;
loff = 0;
}
if (roff == remote_iov[ri].iov_len) {
ri++;
roff = 0;
}
}
release_process_vm(rvm);
return copied;
out:
if(rvm)
release_process_vm(rvm);
return ret;
}
int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
{
int i, i_s, i_e, phase = 1;
struct move_pages_smp_req *mpsr =
(struct move_pages_smp_req *)arg;
struct process_vm *vm = mpsr->proc->vm;
int count = mpsr->count;
struct page_table *save_pt;
extern struct page_table *get_init_page_table(void);
i_s = (count / nr_cpus) * cpu_index;
i_e = i_s + (count / nr_cpus);
if (cpu_index == (nr_cpus - 1)) {
i_e = count;
}
/* Load target process' PT so that we can access user-space */
save_pt = cpu_local_var(current) == &cpu_local_var(idle) ?
get_init_page_table() :
cpu_local_var(current)->vm->address_space->page_table;
if (save_pt != vm->address_space->page_table) {
ihk_mc_load_page_table(vm->address_space->page_table);
}
else {
save_pt = NULL;
}
if (nr_cpus == 1) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
mpsr->nodes_ready = 1;
break;
default:
break;
}
}
else if (nr_cpus > 1 && nr_cpus < 4) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
case 1:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
mpsr->nodes_ready = 1;
break;
default:
break;
}
}
else if (nr_cpus >= 4 && nr_cpus < 8) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
break;
case 1:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 2:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 3:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
break;
default:
break;
}
}
else if (nr_cpus >= 8) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * (count / 2));
break;
case 1:
memcpy(mpsr->virt_addr + (count / 2),
mpsr->user_virt_addr + (count / 2),
sizeof(void *) * (count / 2));
break;
case 2:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 3:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 4:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
break;
case 5:
memset(mpsr->status, 0, sizeof(int) * count);
break;
case 6:
memset(mpsr->nr_pages, 0, sizeof(int) * count);
break;
case 7:
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
break;
default:
break;
}
}
while (!(volatile int)mpsr->nodes_ready) {
cpu_pause();
}
/* NUMA verification in parallel */
for (i = i_s; i < i_e; i++) {
if (mpsr->nodes[i] < 0 ||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
!test_bit(mpsr->nodes[i],
mpsr->proc->vm->numa_mask)) {
mpsr->phase_ret = -EINVAL;
break;
}
}
/* Barrier */
ihk_atomic_inc(&mpsr->phase_done);
while (ihk_atomic_read(&mpsr->phase_done) <
(phase * nr_cpus)) {
cpu_pause();
}
if (mpsr->phase_ret != 0) {
goto out;
}
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
/* PTE lookup in parallel */
for (i = i_s; i < i_e; i++) {
void *phys;
size_t pgsize;
int p2align;
/*
* XXX: No page structures for anonymous mappings.
* Look up physical addresses by scanning page tables.
*/
mpsr->ptep[i] = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)mpsr->virt_addr[i], 0, &phys, &pgsize, &p2align);
/* PTE valid? */
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
mpsr->status[i] = -ENOENT;
mpsr->ptep[i] = NULL;
continue;
}
/* PTE is file? */
if (pte_is_fileoff(mpsr->ptep[i], PAGE_SIZE)) {
mpsr->status[i] = -EINVAL;
mpsr->ptep[i] = NULL;
continue;
}
dkprintf("%s: virt 0x%lx:%lu requested to be moved to node %d\n",
__FUNCTION__, mpsr->virt_addr[i], pgsize, mpsr->nodes[i]);
/* Large page? */
if (pgsize > PAGE_SIZE) {
int nr_sub_pages = (pgsize / PAGE_SIZE);
int j;
if (i + nr_sub_pages > count) {
kprintf("%s: ERROR: page at index %d exceeds the region\n",
__FUNCTION__, i);
mpsr->status[i] = -EINVAL;
break;
}
/* Is it contiguous across nr_sub_pages and all
* requested to be moved to the same target node? */
for (j = 0; j < nr_sub_pages; ++j) {
if (mpsr->virt_addr[i + j] !=
(mpsr->virt_addr[i] + (j * PAGE_SIZE)) ||
mpsr->nodes[i] != mpsr->nodes[i + j]) {
kprintf("%s: ERROR: virt address or node at index %d"
" is inconsistent\n",
__FUNCTION__, i + j);
mpsr->phase_ret = -EINVAL;
goto pte_out;
}
}
mpsr->nr_pages[i] = nr_sub_pages;
i += (nr_sub_pages - 1);
}
else {
mpsr->nr_pages[i] = 1;
}
}
pte_out:
/* Barrier */
ihk_atomic_inc(&mpsr->phase_done);
while (ihk_atomic_read(&mpsr->phase_done) <
(phase * nr_cpus)) {
cpu_pause();
}
if (mpsr->phase_ret != 0) {
goto out;
}
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
if (cpu_index == 0) {
/* Allocate new pages on target NUMA nodes */
for (i = 0; i < count; i++) {
int pgalign = 0;
int j;
void *dst;
if (!mpsr->ptep[i] || mpsr->status[i] < 0 || !mpsr->nr_pages[i])
continue;
/* TODO: store pgalign info in an array as well? */
if (mpsr->nr_pages[i] > 1) {
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
pgalign = PTL2_SHIFT - PTL1_SHIFT;
}
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],
pgalign, IHK_MC_AP_USER, mpsr->nodes[i]);
if (!dst) {
mpsr->status[i] = -ENOMEM;
continue;
}
for (j = i; j < (i + mpsr->nr_pages[i]); ++j) {
mpsr->status[j] = mpsr->nodes[i];
}
mpsr->dst_phys[i] = virt_to_phys(dst);
dkprintf("%s: virt 0x%lx:%lu to node %d, pgalign: %d,"
" allocated phys: 0x%lx\n",
__FUNCTION__, mpsr->virt_addr[i],
mpsr->nr_pages[i] * PAGE_SIZE,
mpsr->nodes[i], pgalign, mpsr->dst_phys[i]);
}
}
/* Barrier */
ihk_atomic_inc(&mpsr->phase_done);
while (ihk_atomic_read(&mpsr->phase_done) <
(phase * nr_cpus)) {
cpu_pause();
}
if (mpsr->phase_ret != 0) {
goto out;
}
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
/* Copy, PTE update, memfree in parallel */
for (i = i_s; i < i_e; ++i) {
if (!mpsr->dst_phys[i])
continue;
fast_memcpy(phys_to_virt(mpsr->dst_phys[i]),
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
mpsr->nr_pages[i] * PAGE_SIZE);
ihk_mc_free_pages(
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
mpsr->nr_pages[i]);
pte_update_phys(mpsr->ptep[i], mpsr->dst_phys[i]);
dkprintf("%s: virt 0x%lx:%lu copied and remapped to phys: 0x%lu\n",
__FUNCTION__, mpsr->virt_addr[i],
mpsr->nr_pages[i] * PAGE_SIZE,
mpsr->dst_phys[i]);
}
/* XXX: do a separate SMP call with only CPUs running threads
* of this process? */
if (cpu_local_var(current)->proc == mpsr->proc) {
/* Invalidate all TLBs */
for (i = 0; i < mpsr->count; i++) {
if (!mpsr->dst_phys[i])
continue;
flush_tlb_single((unsigned long)mpsr->virt_addr[i]);
}
}
out:
if (save_pt) {
ihk_mc_load_page_table(save_pt);
}
return mpsr->phase_ret;
}
/*** End of File ***/

View File

@ -1,7 +0,0 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
# IHK_OBJS added coredump.o
ifeq ($(ARCH), arm64)
IHK_OBJS += coredump.o
endif

View File

@ -0,0 +1,2 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o

View File

@ -1081,7 +1081,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
const uintptr_t address = (uintptr_t)fault_addr;
struct process_vm *vm = thread->vm;
struct vm_range *range;
char found;
unsigned long irqflags;
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
@ -1095,17 +1094,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
(error & PF_RSVD ? "was" : "wasn't"),
(error & PF_INSTR ? "was" : "wasn't"));
found = 0;
list_for_each_entry(range, &vm->vm_range_list, list) {
if (range->start <= address && range->end > address) {
found = 1;
range = lookup_process_memory_range(vm, address, address+1);
if (range) {
__kprintf("address is in range, flag: 0x%lx\n",
range->flag);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
break;
}
}
if (!found) {
} else {
__kprintf("address is out of range! \n");
}
@ -1473,29 +1467,91 @@ void ihk_mc_delay_us(int us)
arch_delay(us);
}
#define EXTENDED_ARCH_SHOW_CONTEXT
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
void arch_show_extended_context(void)
{
unsigned long cr0, cr4, msr, xcr0;
unsigned long cr0, cr4, msr, xcr0 = 0;
/* Read and print CRs, MSR_EFER, XCR0 */
asm volatile("movq %%cr0, %0" : "=r"(cr0));
asm volatile("movq %%cr4, %0" : "=r"(cr4));
msr = rdmsr(MSR_EFER);
if (xsave_available) {
xcr0 = xgetbv(0);
}
__kprintf("\n CR0 CR4\n");
__kprintf("%016lX %016lX\n", cr0, cr4);
__kprintf(" MSR_EFER\n");
__kprintf("%016lX\n", msr);
if (xsave_available) {
__kprintf(" XCR0\n");
__kprintf("%016lX\n", xcr0);
}
#endif
}
struct stack {
struct stack *rbp;
unsigned long eip;
};
/* KPRINTF_LOCAL_BUF_LEN is 1024, useless to go further */
#define STACK_BUF_LEN (1024-sizeof("[ 0]: "))
static void __print_stack(struct stack *rbp, unsigned long first) {
char buf[STACK_BUF_LEN];
size_t len;
/* Build string in buffer to output a single line */
len = snprintf(buf, STACK_BUF_LEN,
"addr2line -e smp-x86/kernel/mckernel.img -fpia");
if (first)
len += snprintf(buf + len, STACK_BUF_LEN - len,
" %#16lx", first);
while ((unsigned long)rbp > 0xffff880000000000 &&
STACK_BUF_LEN - len > sizeof(" 0x0123456789abcdef")) {
len += snprintf(buf + len, STACK_BUF_LEN - len,
" %#16lx", rbp->eip);
rbp = rbp->rbp;
}
__kprintf("%s\n", buf);
}
void arch_print_pre_interrupt_stack(const struct x86_basic_regs *regs) {
struct stack *rbp;
/* only for kernel stack */
if (regs->error & PF_USER)
return;
__kprintf("Pre-interrupt stack trace:\n");
/* interrupt stack heuristics:
* - the first entry looks like it is always garbage, so skip.
* (that is done by taking regs->rsp instead of &regs->rsp)
* - that still looks sometimes wrong. For now, if it is not
* within 64k of itself, look for the next entry that matches.
*/
rbp = (struct stack*)regs->rsp;
while ((uintptr_t)rbp > (uintptr_t)rbp->rbp
|| (uintptr_t)rbp + 0x10000 < (uintptr_t)rbp->rbp)
rbp = (struct stack *)(((uintptr_t *)rbp) + 1);
__print_stack(rbp, regs->rip);
}
void arch_print_stack() {
struct stack *rbp;
__kprintf("Approximative stack trace:\n");
asm("mov %%rbp, %0" : "=r"(rbp) );
__print_stack(rbp, 0);
}
/*@
@ requires \valid(reg);
@ -1526,9 +1582,9 @@ void arch_show_interrupt_context(const void *reg)
__kprintf("%16lx %16lx %16lx %16lx\n",
regs->cs, regs->ss, regs->rflags, regs->error);
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
arch_show_extended_context();
#endif
arch_print_pre_interrupt_stack(regs);
kprintf_unlock(irqflags);
}
@ -1651,13 +1707,11 @@ release_fp_regs(struct thread *thread)
thread->fp_regs = NULL;
}
/*@
@ requires \valid(thread);
@*/
void
save_fp_regs(struct thread *thread)
static int
check_and_allocate_fp_regs(struct thread *thread)
{
int pages;
int result = 0;
if (!thread->fp_regs) {
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
@ -1666,12 +1720,26 @@ save_fp_regs(struct thread *thread)
if (!thread->fp_regs) {
kprintf("error: allocating fp_regs pages\n");
return;
result = 1;
goto out;
}
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
memset(thread->fp_regs, 0, pages * PAGE_SIZE);
}
out:
return result;
}
/*@
@ requires \valid(thread);
@*/
void
save_fp_regs(struct thread *thread)
{
if (check_and_allocate_fp_regs(thread) != 0) {
// alloc error
return;
}
if (xsave_available) {
unsigned int low, high;
@ -1687,6 +1755,13 @@ save_fp_regs(struct thread *thread)
}
}
void copy_fp_regs(struct thread *from, struct thread *to)
{
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
}
}
#ifdef POSTK_DEBUG_TEMP_FIX_19
void
clear_fp_regs(struct thread *thread)

View File

@ -289,7 +289,7 @@ int gencore(struct thread *thread, void *regs,
Elf64_Ehdr eh;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range;
struct vm_range *range, *next;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
@ -303,13 +303,18 @@ int gencore(struct thread *thread, void *regs,
return -1;
}
list_for_each_entry(range, &vm->vm_range_list, list) {
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
mckernel's internal use. */
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DONTDUMP)
continue;
/* We need a chunk for each page for a demand paging area.
This can be optimized for spacial complexity but we would
lose simplicity instead. */
@ -391,7 +396,10 @@ int gencore(struct thread *thread, void *regs,
/* program header for each memory chunk */
i = 1;
list_for_each_entry(range, &vm->vm_range_list, list) {
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
@ -432,7 +440,10 @@ int gencore(struct thread *thread, void *regs,
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
i = 3; /* memory segments */
list_for_each_entry(range, &vm->vm_range_list, list) {
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long phys;
if (range->flag & VR_RESERVED)

View File

@ -130,4 +130,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
#endif /* !POSTK_DEBUG_ARCH_DEP_8 */
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
*dest = *(volatile uint32_t *)from;
return 0;
}
#endif

View File

@ -13,16 +13,16 @@
#ifndef ARCH_CPU_H
#define ARCH_CPU_H
#include <ihk/cpu.h>
#define arch_barrier() asm volatile("" : : : "memory")
static inline void rmb(void)
{
barrier();
arch_barrier();
}
static inline void wmb(void)
{
barrier();
arch_barrier();
}
static unsigned long read_tsc(void)

View File

@ -49,6 +49,7 @@ typedef struct x86_user_context ihk_mc_user_context_t;
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
#define ihk_mc_syscall_number(uc) (uc)->gpr.orig_rax
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp

View File

@ -189,9 +189,30 @@ struct tss64 {
} __attribute__((packed));
struct x86_basic_regs {
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
unsigned long rip, cs, rflags, rsp, ss;
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rax;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
union {
unsigned long orig_rax; /* syscall */
unsigned long error; /* interrupts */
};
unsigned long rip;
unsigned long cs;
unsigned long rflags;
unsigned long rsp;
unsigned long ss;
};
struct x86_sregs {

View File

@ -10,9 +10,12 @@
#include <ihk/perfctr.h>
#include <march.h>
#include <errno.h>
#include <cls.h>
#include <ihk/debug.h>
#include <ihk/cpu.h>
#include <registers.h>
#include <mc_perf_event.h>
#include <config.h>
extern unsigned int *x86_march_perfmap;
extern int running_on_kvm(void);
@ -57,6 +60,10 @@ void x86_init_perfctr(void)
uint64_t ecx;
uint64_t edx;
#ifndef ENABLE_PERF
return;
#endif //ENABLE_PERF
/* Do not do it on KVM */
if (running_on_kvm()) return;
@ -254,6 +261,41 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
}
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
{
struct thread *thread = cpu_local_var(current);
// allocate extra_reg
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
event->extra_reg.idx = EXTRA_REG_RSP_1;
}
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
event->extra_reg.idx = EXTRA_REG_RSP_0;
}
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
// extra_regs are full
return -1;
}
}
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
event->hw_config &= ~0xffUL;
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_0);
event->extra_reg.reg = MSR_OFFCORE_RSP_0;
}
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
event->hw_config &= ~0xffUL;
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_1);
event->extra_reg.reg = MSR_OFFCORE_RSP_1;
}
thread->extra_reg_alloc_map |= (1UL << event->extra_reg.idx);
wrmsr(event->extra_reg.reg, event->extra_reg.config);
return 0;
}
#ifdef HAVE_MARCH_PERFCTR_START
extern void x86_march_perfctr_start(unsigned long counter_mask);
#endif

View File

@ -29,11 +29,11 @@
#include <prctl.h>
#include <ihk/ikc.h>
#include <page.h>
#include <limits.h>
void terminate(int, int);
extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact);
long syscall(int num, ihk_mc_user_context_t *ctx);
extern void save_fp_regs(struct thread *proc);
void set_signal(int sig, void *regs0, siginfo_t *info);
void check_signal(unsigned long rc, void *regs0, int num);
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
@ -460,7 +460,6 @@ void set_single_step(struct thread *thread)
long ptrace_read_fpregs(struct thread *thread, void *fpregs)
{
save_fp_regs(thread);
if (thread->fp_regs == NULL) {
return -ENOMEM;
}
@ -470,7 +469,6 @@ long ptrace_read_fpregs(struct thread *thread, void *fpregs)
long ptrace_write_fpregs(struct thread *thread, void *fpregs)
{
save_fp_regs(thread);
if (thread->fp_regs == NULL) {
return -ENOMEM;
}
@ -540,7 +538,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
/* Transition thread state */
proc->status = PS_TRACED;
thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL_MASK;
proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
proc->signal_flags |= SIGNAL_STOP_STOPPED;
@ -927,6 +925,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
break;
case SIGCHLD:
case SIGURG:
case SIGWINCH:
break;
default:
dkprintf("do_signal,default,terminate,sig=%d\n", sig);
@ -1339,8 +1338,12 @@ set_signal(int sig, void *regs0, siginfo_t *info)
if (thread == NULL || thread->proc->pid == 0)
return;
if((__sigmask(sig) & thread->sigmask.__val[0]) ||
(regs->gpr.rsp & 0x8000000000000000)){
if (!interrupt_from_user(regs)) {
ihk_mc_debug_show_interrupt_context(regs);
panic("panic: kernel mode signal");
}
if ((__sigmask(sig) & thread->sigmask.__val[0])) {
coredump(thread, regs0);
terminate(0, sig | 0x80);
}
@ -1907,4 +1910,634 @@ save_uctx(void *uctx, struct x86_user_context *regs)
ctx->fregsize = 0;
}
int do_process_vm_read_writev(int pid,
const struct iovec *local_iov,
unsigned long liovcnt,
const struct iovec *remote_iov,
unsigned long riovcnt,
unsigned long flags,
int op)
{
int ret = -EINVAL;
int li, ri;
int pli, pri;
off_t loff, roff;
size_t llen = 0, rlen = 0;
size_t copied = 0;
size_t to_copy;
struct thread *lthread = cpu_local_var(current);
struct process *rproc;
struct process *lproc = lthread->proc;
struct process_vm *rvm = NULL;
unsigned long rphys;
unsigned long rpage_left;
unsigned long psize;
void *rva;
struct vm_range *range;
struct mcs_rwlock_node_irqsave lock;
struct mcs_rwlock_node update_lock;
/* Sanity checks */
if (flags) {
return -EINVAL;
}
if (liovcnt > IOV_MAX || riovcnt > IOV_MAX) {
return -EINVAL;
}
/* Check if parameters are okay */
ihk_mc_spinlock_lock_noirq(&lthread->vm->memory_range_lock);
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov,
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
if (!range) {
ret = -EFAULT;
goto arg_out;
}
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)remote_iov,
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
if (!range) {
ret = -EFAULT;
goto arg_out;
}
ret = 0;
arg_out:
ihk_mc_spinlock_unlock_noirq(&lthread->vm->memory_range_lock);
if (ret != 0) {
goto out;
}
for (li = 0; li < liovcnt; ++li) {
llen += local_iov[li].iov_len;
dkprintf("local_iov[%d].iov_base: 0x%lx, len: %lu\n",
li, local_iov[li].iov_base, local_iov[li].iov_len);
}
for (ri = 0; ri < riovcnt; ++ri) {
rlen += remote_iov[ri].iov_len;
dkprintf("remote_iov[%d].iov_base: 0x%lx, len: %lu\n",
ri, remote_iov[ri].iov_base, remote_iov[ri].iov_len);
}
if (llen != rlen) {
return -EINVAL;
}
/* Find remote process */
rproc = find_process(pid, &lock);
if (!rproc) {
ret = -ESRCH;
goto out;
}
mcs_rwlock_reader_lock_noirq(&rproc->update_lock, &update_lock);
if(rproc->status == PS_EXITED ||
rproc->status == PS_ZOMBIE){
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
process_unlock(rproc, &lock);
ret = -ESRCH;
goto out;
}
rvm = rproc->vm;
hold_process_vm(rvm);
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
process_unlock(rproc, &lock);
if (lproc->euid != 0 &&
(lproc->ruid != rproc->ruid ||
lproc->ruid != rproc->euid ||
lproc->ruid != rproc->suid ||
lproc->rgid != rproc->rgid ||
lproc->rgid != rproc->egid ||
lproc->rgid != rproc->sgid)) {
ret = -EPERM;
goto out;
}
dkprintf("pid %d found, doing %s: liovcnt: %d, riovcnt: %d \n", pid,
(op == PROCESS_VM_READ) ? "PROCESS_VM_READ" : "PROCESS_VM_WRITE",
liovcnt, riovcnt);
pli = pri = -1; /* Previous indeces in iovecs */
li = ri = 0; /* Current indeces in iovecs */
loff = roff = 0; /* Offsets in current iovec */
/* Now iterate and do the copy */
while (copied < llen) {
int faulted = 0;
/* New local vector? */
if (pli != li) {
struct vm_range *range;
ihk_mc_spinlock_lock_noirq(&lthread->vm->memory_range_lock);
/* Is base valid? */
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov[li].iov_base,
(uintptr_t)(local_iov[li].iov_base + 1));
if (!range) {
ret = -EFAULT;
goto pli_out;
}
/* Is range valid? */
range = lookup_process_memory_range(lthread->vm,
(uintptr_t)local_iov[li].iov_base,
(uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len));
if (range == NULL) {
ret = -EINVAL;
goto pli_out;
}
if (!(range->flag & ((op == PROCESS_VM_READ) ?
VR_PROT_WRITE : VR_PROT_READ))) {
ret = -EFAULT;
goto pli_out;
}
ret = 0;
pli_out:
ihk_mc_spinlock_unlock_noirq(&lthread->vm->memory_range_lock);
if (ret != 0) {
goto out;
}
pli = li;
}
/* New remote vector? */
if (pri != ri) {
struct vm_range *range;
ihk_mc_spinlock_lock_noirq(&rvm->memory_range_lock);
/* Is base valid? */
range = lookup_process_memory_range(rvm,
(uintptr_t)remote_iov[li].iov_base,
(uintptr_t)(remote_iov[li].iov_base + 1));
if (range == NULL) {
ret = -EFAULT;
goto pri_out;
}
/* Is range valid? */
range = lookup_process_memory_range(rvm,
(uintptr_t)remote_iov[li].iov_base,
(uintptr_t)(remote_iov[li].iov_base + remote_iov[li].iov_len));
if (range == NULL) {
ret = -EINVAL;
goto pri_out;
}
if (!(range->flag & ((op == PROCESS_VM_READ) ?
VR_PROT_READ : VR_PROT_WRITE))) {
ret = -EFAULT;
goto pri_out;
}
ret = 0;
pri_out:
ihk_mc_spinlock_unlock_noirq(&rvm->memory_range_lock);
if (ret != 0) {
goto out;
}
pri = ri;
}
/* Figure out how much we can copy at most in this iteration */
to_copy = (local_iov[li].iov_len - loff);
if ((remote_iov[ri].iov_len - roff) < to_copy) {
to_copy = remote_iov[ri].iov_len - roff;
}
retry_lookup:
/* TODO: remember page and do this only if necessary */
ret = ihk_mc_pt_virt_to_phys_size(rvm->address_space->page_table,
remote_iov[ri].iov_base + roff, &rphys, &psize);
if (ret) {
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
void *addr;
if (faulted) {
ret = -EFAULT;
goto out;
}
/* Fault in pages */
for (addr = (void *)
(((unsigned long)remote_iov[ri].iov_base + roff)
& PAGE_MASK);
addr < (remote_iov[ri].iov_base + roff + to_copy);
addr += PAGE_SIZE) {
ret = page_fault_process_vm(rvm, addr, reason);
if (ret) {
ret = -EFAULT;
goto out;
}
}
faulted = 1;
goto retry_lookup;
}
rpage_left = ((((unsigned long)remote_iov[ri].iov_base + roff +
psize) & ~(psize - 1)) -
((unsigned long)remote_iov[ri].iov_base + roff));
if (rpage_left < to_copy) {
to_copy = rpage_left;
}
rva = phys_to_virt(rphys);
fast_memcpy(
(op == PROCESS_VM_READ) ? local_iov[li].iov_base + loff : rva,
(op == PROCESS_VM_READ) ? rva : local_iov[li].iov_base + loff,
to_copy);
copied += to_copy;
dkprintf("local_iov[%d]: 0x%lx %s remote_iov[%d]: 0x%lx, %lu copied, psize: %lu, rpage_left: %lu\n",
li, local_iov[li].iov_base + loff,
(op == PROCESS_VM_READ) ? "<-" : "->",
ri, remote_iov[ri].iov_base + roff, to_copy,
psize, rpage_left);
loff += to_copy;
roff += to_copy;
if (loff == local_iov[li].iov_len) {
li++;
loff = 0;
}
if (roff == remote_iov[ri].iov_len) {
ri++;
roff = 0;
}
}
release_process_vm(rvm);
return copied;
out:
if(rvm)
release_process_vm(rvm);
return ret;
}
int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
{
int i, i_s, i_e, phase = 1;
struct move_pages_smp_req *mpsr =
(struct move_pages_smp_req *)arg;
struct process_vm *vm = mpsr->proc->vm;
int count = mpsr->count;
struct page_table *save_pt;
extern struct page_table *get_init_page_table(void);
i_s = (count / nr_cpus) * cpu_index;
i_e = i_s + (count / nr_cpus);
if (cpu_index == (nr_cpus - 1)) {
i_e = count;
}
/* Load target process' PT so that we can access user-space */
save_pt = cpu_local_var(current) == &cpu_local_var(idle) ?
get_init_page_table() :
cpu_local_var(current)->vm->address_space->page_table;
if (save_pt != vm->address_space->page_table) {
ihk_mc_load_page_table(vm->address_space->page_table);
}
else {
save_pt = NULL;
}
if (nr_cpus == 1) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
mpsr->nodes_ready = 1;
break;
default:
break;
}
}
else if (nr_cpus > 1 && nr_cpus < 4) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
case 1:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
mpsr->nodes_ready = 1;
break;
default:
break;
}
}
else if (nr_cpus >= 4 && nr_cpus < 8) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * count);
break;
case 1:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 2:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 3:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
memset(mpsr->status, 0, sizeof(int) * count);
memset(mpsr->nr_pages, 0, sizeof(int) * count);
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
break;
default:
break;
}
}
else if (nr_cpus >= 8) {
switch (cpu_index) {
case 0:
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
sizeof(void *) * (count / 2));
break;
case 1:
memcpy(mpsr->virt_addr + (count / 2),
mpsr->user_virt_addr + (count / 2),
sizeof(void *) * (count / 2));
break;
case 2:
memcpy(mpsr->status, mpsr->user_status,
sizeof(int) * count);
break;
case 3:
memcpy(mpsr->nodes, mpsr->user_nodes,
sizeof(int) * count);
mpsr->nodes_ready = 1;
break;
case 4:
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
break;
case 5:
memset(mpsr->status, 0, sizeof(int) * count);
break;
case 6:
memset(mpsr->nr_pages, 0, sizeof(int) * count);
break;
case 7:
memset(mpsr->dst_phys, 0,
sizeof(unsigned long) * count);
break;
default:
break;
}
}
while (!(volatile int)mpsr->nodes_ready) {
cpu_pause();
}
/* NUMA verification in parallel */
for (i = i_s; i < i_e; i++) {
if (mpsr->nodes[i] < 0 ||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
!test_bit(mpsr->nodes[i],
mpsr->proc->vm->numa_mask)) {
mpsr->phase_ret = -EINVAL;
break;
}
}
/* Barrier */
ihk_atomic_inc(&mpsr->phase_done);
while (ihk_atomic_read(&mpsr->phase_done) <
(phase * nr_cpus)) {
cpu_pause();
}
if (mpsr->phase_ret != 0) {
goto out;
}
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
/* PTE lookup in parallel */
for (i = i_s; i < i_e; i++) {
void *phys;
size_t pgsize;
int p2align;
/*
* XXX: No page structures for anonymous mappings.
* Look up physical addresses by scanning page tables.
*/
mpsr->ptep[i] = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
(void *)mpsr->virt_addr[i], 0, &phys, &pgsize, &p2align);
/* PTE valid? */
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
mpsr->status[i] = -ENOENT;
mpsr->ptep[i] = NULL;
continue;
}
/* PTE is file? */
if (pte_is_fileoff(mpsr->ptep[i], PAGE_SIZE)) {
mpsr->status[i] = -EINVAL;
mpsr->ptep[i] = NULL;
continue;
}
dkprintf("%s: virt 0x%lx:%lu requested to be moved to node %d\n",
__FUNCTION__, mpsr->virt_addr[i], pgsize, mpsr->nodes[i]);
/* Large page? */
if (pgsize > PAGE_SIZE) {
int nr_sub_pages = (pgsize / PAGE_SIZE);
int j;
if (i + nr_sub_pages > count) {
kprintf("%s: ERROR: page at index %d exceeds the region\n",
__FUNCTION__, i);
mpsr->status[i] = -EINVAL;
break;
}
/* Is it contiguous across nr_sub_pages and all
* requested to be moved to the same target node? */
for (j = 0; j < nr_sub_pages; ++j) {
if (mpsr->virt_addr[i + j] !=
(mpsr->virt_addr[i] + (j * PAGE_SIZE)) ||
mpsr->nodes[i] != mpsr->nodes[i + j]) {
kprintf("%s: ERROR: virt address or node at index %d"
" is inconsistent\n",
__FUNCTION__, i + j);
mpsr->phase_ret = -EINVAL;
goto pte_out;
}
}
mpsr->nr_pages[i] = nr_sub_pages;
i += (nr_sub_pages - 1);
}
else {
mpsr->nr_pages[i] = 1;
}
}
pte_out:
/* Barrier */
ihk_atomic_inc(&mpsr->phase_done);
while (ihk_atomic_read(&mpsr->phase_done) <
(phase * nr_cpus)) {
cpu_pause();
}
if (mpsr->phase_ret != 0) {
goto out;
}
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
if (cpu_index == 0) {
/* Allocate new pages on target NUMA nodes */
for (i = 0; i < count; i++) {
int pgalign = 0;
int j;
void *dst;
if (!mpsr->ptep[i] || mpsr->status[i] < 0 || !mpsr->nr_pages[i])
continue;
/* TODO: store pgalign info in an array as well? */
if (mpsr->nr_pages[i] > 1) {
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
pgalign = PTL2_SHIFT - PTL1_SHIFT;
}
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],
pgalign, IHK_MC_AP_USER, mpsr->nodes[i]);
if (!dst) {
mpsr->status[i] = -ENOMEM;
continue;
}
for (j = i; j < (i + mpsr->nr_pages[i]); ++j) {
mpsr->status[j] = mpsr->nodes[i];
}
mpsr->dst_phys[i] = virt_to_phys(dst);
dkprintf("%s: virt 0x%lx:%lu to node %d, pgalign: %d,"
" allocated phys: 0x%lx\n",
__FUNCTION__, mpsr->virt_addr[i],
mpsr->nr_pages[i] * PAGE_SIZE,
mpsr->nodes[i], pgalign, mpsr->dst_phys[i]);
}
}
/* Barrier */
ihk_atomic_inc(&mpsr->phase_done);
while (ihk_atomic_read(&mpsr->phase_done) <
(phase * nr_cpus)) {
cpu_pause();
}
if (mpsr->phase_ret != 0) {
goto out;
}
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
++phase;
/* Copy, PTE update, memfree in parallel */
for (i = i_s; i < i_e; ++i) {
if (!mpsr->dst_phys[i])
continue;
fast_memcpy(phys_to_virt(mpsr->dst_phys[i]),
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
mpsr->nr_pages[i] * PAGE_SIZE);
ihk_mc_free_pages(
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
mpsr->nr_pages[i]);
pte_update_phys(mpsr->ptep[i], mpsr->dst_phys[i]);
dkprintf("%s: virt 0x%lx:%lu copied and remapped to phys: 0x%lu\n",
__FUNCTION__, mpsr->virt_addr[i],
mpsr->nr_pages[i] * PAGE_SIZE,
mpsr->dst_phys[i]);
}
/* XXX: do a separate SMP call with only CPUs running threads
* of this process? */
if (cpu_local_var(current)->proc == mpsr->proc) {
/* Invalidate all TLBs */
for (i = 0; i < mpsr->count; i++) {
if (!mpsr->dst_phys[i])
continue;
flush_tlb_single((unsigned long)mpsr->virt_addr[i]);
}
}
out:
if (save_pt) {
ihk_mc_load_page_table(save_pt);
}
return mpsr->phase_ret;
}
/*** End of File ***/

View File

@ -102,7 +102,7 @@ int vsyscall_gettimeofday(struct timeval *tv, void *tz)
: "%rcx", "%r11", "memory");
if (error) {
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
*(volatile int *)0 = 0; /* i.e. raise(SIGSEGV) */
}
return error;
} /* vsyscall_gettimeofday() */

View File

@ -35,6 +35,7 @@ error_exit() {
;&
tmp_mcos_created)
if [ "$enable_mcoverlay" == "yes" ]; then
umask $umask_old
rm -rf /tmp/mcos
fi
;&
@ -45,9 +46,12 @@ error_exit() {
exit 1
}
fi
# Change umask for /proc and /sys files
umask_dec=$(( 8#${umask_old} & 8#0002 ))
umask 0`printf "%o" ${umask_dec}`
if [ ! -e /tmp/mcos ]; then
mkdir -p /tmp/mcos;
fi
@ -145,3 +149,7 @@ for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
fi
done
# Restore umask
umask ${umask_old}

View File

@ -43,8 +43,9 @@ fi
turbo=""
ihk_irq=""
umask_old=`umask`
while getopts :tk:c:m:o:f:r:q:i:d: OPT
while getopts :tk:c:m:o:f:r:q:i:d:e: OPT
do
case ${OPT} in
f) facility=${OPTARG}
@ -63,6 +64,8 @@ do
;;
t) turbo="turbo"
;;
e) extra_kopts=${OPTARG}
;;
d) DUMP_LEVEL=${OPTARG}
;;
i) mon_interval=${OPTARG}
@ -114,6 +117,7 @@ error_exit() {
;&
tmp_mcos_created)
if [ "$enable_mcoverlay" == "yes" ]; then
umask $umask_old
rm -rf /tmp/mcos
fi
;&
@ -148,7 +152,7 @@ error_exit() {
fi
;&
ihk_smp_loaded)
rmmod ihk_smp_x86 2>/dev/null || echo "warning: failed to remove ihk_smp_x86" >&2
rmmod ihk_smp_@ARCH@ 2>/dev/null || echo "warning: failed to remove ihk_smp_@ARCH@" >&2
;&
ihk_loaded)
rmmod ihk 2>/dev/null || echo "warning: failed to remove ihk" >&2
@ -200,7 +204,7 @@ if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
enable_mcoverlay="yes"
fi
else
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 ]; then
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then
enable_mcoverlay="yes"
fi
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
@ -277,7 +281,7 @@ echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
sync
# Load IHK-SMP if not loaded and reserve CPUs and memory
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
if [ "$ihk_irq" == "" ]; then
for i in `seq 64 255`; do
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
@ -290,8 +294,8 @@ if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then
error_exit "ihk_loaded"
fi
fi
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
echo "error: loading ihk-smp-x86" >&2
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-@ARCH@.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
echo "error: loading ihk-smp-@ARCH@" >&2
error_exit "ihk_loaded"
fi
@ -339,7 +343,7 @@ if ls /dev/mcos* 1>/dev/null 2>&1; then
ind=`echo $i|cut -c10-`;
# Retry when conflicting with ihkmond
nretry=0
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -ge 4 ]; do
sleep 0.25
nretry=$[ $nretry + 1 ]
done
@ -383,7 +387,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL}"; then
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL} $extra_kopts"; then
echo "error: setting kernel arguments" >&2
error_exit "os_created"
fi
@ -421,3 +425,5 @@ if [ "${irqbalance_used}" == "yes" ]; then
fi
# echo cpus=$cpus ncpus=$ncpus banirq=$banirq
fi
exit 0

View File

@ -19,7 +19,7 @@ cpus=""
irqbalance_used=""
# No SMP module? Exit.
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then exit 0; fi
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then exit 0; fi
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
irqbalance_used="yes"
@ -37,7 +37,7 @@ if ls /dev/mcos* 1>/dev/null 2>&1; then
ind=`echo $i|cut -c10-`;
# Retry when conflicting with ihkmond
nretry=0
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -ge 4 ]; do
sleep 0.25
nretry=$[ $nretry + 1 ]
done
@ -87,9 +87,9 @@ fi
. ${SBINDIR}/mcoverlay-destroy.sh
# Remove SMP module
if grep ihk_smp_x86 /proc/modules &>/dev/null; then
if ! rmmod ihk_smp_x86 2>/dev/null; then
echo "error: removing ihk_smp_x86" >&2
if grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
if ! rmmod ihk_smp_@ARCH@ 2>/dev/null; then
echo "error: removing ihk_smp_@ARCH@" >&2
exit 1
fi
fi

View File

@ -9,12 +9,15 @@
/* whether memdump feature is enabled */
#undef ENABLE_MEMDUMP
/* whether mcoverlayfs is enabled */
/* whether qlmpi is enabled */
#undef ENABLE_QLMPI
/* whether rusage is enabled */
#undef ENABLE_RUSAGE
/* whether perf is enabled */
#undef ENABLE_PERF
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H

56
configure vendored
View File

@ -708,6 +708,7 @@ enable_dcfa
enable_memdump
enable_mcoverlayfs
enable_rusage
enable_perf
enable_qlmpi
with_uname_r
'
@ -1333,6 +1334,7 @@ Optional Features:
--enable-memdump enable dumping memory and analyzing a dump
--enable-mcoverlayfs enable mcoverlayfs implementation
--enable-rusage enable rusage implementation
--enable-perf enable perf_event implementation
--enable-qlmpi enable qlmpi implementation
Optional Packages:
@ -3568,6 +3570,14 @@ else
fi
# Check whether --enable-perf was given.
if test "${enable_perf+set}" = set; then :
enableval=$enable_perf; ENABLE_PERF=$enableval
else
ENABLE_PERF=yes
fi
# Check whether --enable-qlmpi was given.
if test "${enable_qlmpi+set}" = set; then :
enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
@ -4366,6 +4376,9 @@ case $WITH_TARGET in
if test "X$ETCDIR" = X; then
ETCDIR="$prefix/etc"
fi
if test "X$INCLUDEDIR" = X; then
INCLUDEDIR="$prefix/include"
fi
if test "X$KMODDIR" = X; then
KMODDIR="$prefix/kmod"
fi
@ -4961,6 +4974,17 @@ else
$as_echo "$as_me: rusage is disabled" >&6;}
fi
if test "x$ENABLE_PERF" = "xyes" ; then
$as_echo "#define ENABLE_PERF 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is enabled" >&5
$as_echo "$as_me: perf is enabled" >&6;}
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is disabled" >&5
$as_echo "$as_me: perf is disabled" >&6;}
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then
cat >>confdefs.h <<_ACEOF
@ -5021,9 +5045,14 @@ ac_config_headers="$ac_config_headers config.h"
# POSTK_DEBUG_ARCH_DEP_37
# AC_CONFIG_FILES arch dependfiles separate
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcoverlay-destroy-smp-x86.sh arch/x86/tools/mcoverlay-create-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in"
if test "$TARGET" = "smp-x86"; then
ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch"
fi
if test "$TARGET" = "smp-arm64"; then
ac_config_files="$ac_config_files kernel/config/config.smp-arm64 arch/arm64/kernel/vdso/Makefile arch/arm64/kernel/Makefile.arch"
@ -5741,18 +5770,19 @@ do
"kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;;
"kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;;
"kernel/include/swapfmt.h") CONFIG_FILES="$CONFIG_FILES kernel/include/swapfmt.h" ;;
"arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;;
"arch/x86/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-attached-mic.sh" ;;
"arch/x86/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-builtin-x86.sh" ;;
"arch/x86/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-smp-x86.sh" ;;
"arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;;
"arch/x86/tools/mcoverlay-destroy-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcoverlay-destroy-smp-x86.sh" ;;
"arch/x86/tools/mcoverlay-create-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcoverlay-create-smp-x86.sh" ;;
"arch/x86/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/eclair-dump-backtrace.exp" ;;
"arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;;
"arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;;
"arch/x86/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.service" ;;
"arch/x86/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.in" ;;
"arch/x86_64/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-attached-mic.sh" ;;
"arch/x86_64/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-attached-mic.sh" ;;
"arch/x86_64/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-builtin-x86.sh" ;;
"arch/x86_64/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-smp-x86.sh" ;;
"arch/x86_64/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcstop+release-smp-x86.sh" ;;
"arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh" ;;
"arch/x86_64/tools/mcoverlay-create-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcoverlay-create-smp-x86.sh" ;;
"arch/x86_64/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/eclair-dump-backtrace.exp" ;;
"arch/x86_64/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-builtin-x86.sh" ;;
"arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;;
"arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;;
"arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;;
"arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;;
"kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;;
"arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;;
"arch/arm64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/Makefile.arch" ;;

View File

@ -134,6 +134,12 @@ AC_ARG_ENABLE([rusage],
[ENABLE_RUSAGE=$enableval],
[ENABLE_RUSAGE=yes])
AC_ARG_ENABLE([perf],
AC_HELP_STRING([--enable-perf],
[enable perf_event implementation]),
[ENABLE_PERF=$enableval],
[ENABLE_PERF=yes])
AC_ARG_ENABLE([qlmpi],
AC_HELP_STRING([--enable-qlmpi],
[enable qlmpi implementation]),
@ -316,6 +322,9 @@ case $WITH_TARGET in
if test "X$ETCDIR" = X; then
ETCDIR="$prefix/etc"
fi
if test "X$INCLUDEDIR" = X; then
INCLUDEDIR="$prefix/include"
fi
if test "X$KMODDIR" = X; then
KMODDIR="$prefix/kmod"
fi
@ -451,7 +460,7 @@ else
fi
if test "x$ENABLE_QLMPI" = "xyes" ; then
AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled])
AC_DEFINE([ENABLE_QLMPI],[1],[whether qlmpi is enabled])
AC_MSG_NOTICE([qlmpi is enabled])
else
AC_MSG_NOTICE([qlmpi is disabled])
@ -475,6 +484,13 @@ else
AC_MSG_NOTICE([rusage is disabled])
fi
if test "x$ENABLE_PERF" = "xyes" ; then
AC_DEFINE([ENABLE_PERF],[1],[whether perf is enabled])
AC_MSG_NOTICE([perf is enabled])
else
AC_MSG_NOTICE([perf is disabled])
fi
if test "x$MCKERNEL_INCDIR" != "x" ; then
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
fi
@ -535,20 +551,26 @@ AC_CONFIG_FILES([
kernel/Makefile
kernel/Makefile.build
kernel/include/swapfmt.h
arch/x86/tools/mcreboot-attached-mic.sh
arch/x86/tools/mcshutdown-attached-mic.sh
arch/x86/tools/mcreboot-builtin-x86.sh
arch/x86/tools/mcreboot-smp-x86.sh
arch/x86/tools/mcstop+release-smp-x86.sh
arch/x86/tools/mcoverlay-destroy-smp-x86.sh
arch/x86/tools/mcoverlay-create-smp-x86.sh
arch/x86/tools/eclair-dump-backtrace.exp
arch/x86/tools/mcshutdown-builtin-x86.sh
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
arch/x86/tools/irqbalance_mck.service
arch/x86/tools/irqbalance_mck.in
arch/x86_64/tools/mcreboot-attached-mic.sh
arch/x86_64/tools/mcshutdown-attached-mic.sh
arch/x86_64/tools/mcreboot-builtin-x86.sh
arch/x86_64/tools/mcreboot-smp-x86.sh
arch/x86_64/tools/mcstop+release-smp-x86.sh
arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh
arch/x86_64/tools/mcoverlay-create-smp-x86.sh
arch/x86_64/tools/eclair-dump-backtrace.exp
arch/x86_64/tools/mcshutdown-builtin-x86.sh
arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in
arch/x86_64/tools/irqbalance_mck.service
arch/x86_64/tools/irqbalance_mck.in
])
if test "$TARGET" = "smp-x86"; then
AC_CONFIG_FILES([
arch/x86_64/kernel/Makefile.arch
])
fi
if test "$TARGET" = "smp-arm64"; then
AC_CONFIG_FILES([
kernel/config/config.smp-arm64

View File

@ -5,6 +5,10 @@
#define IHK_MAX_NUM_NUMA_NODES 1024
#define IHK_MAX_NUM_CPUS 1024
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
struct mckernel_rusage {
unsigned long memory_stat_rss[IHK_MAX_NUM_PGSIZES];
unsigned long memory_stat_mapped_file[IHK_MAX_NUM_PGSIZES];

View File

@ -140,6 +140,8 @@ struct program_load_desc {
unsigned long mpol_flags;
unsigned long mpol_threshold;
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN];
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];

View File

@ -11,10 +11,21 @@ obj-m += mcctrl.o
# POSTK_DEBUG_ARCH_DEP_1, arch depend "-mcmodel"
# POSTK_DEBUG_ARCH_DEP_83, arch depend translate_rva_to_rpa() move
ifeq ($(ARCH), arm64)
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -I$(src)/arch/$(ARCH)/include -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@
else
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../../kernel/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ -I@abs_builddir@/../../../
ccflags-y := -I$(IHK_BASE)/linux/include \
-I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) \
-I$(IHK_BASE)/ikc/include \
-I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) \
-I$(IHK_BASE)/include \
-I$(IHK_BASE)/include/arch/$(ARCH) \
-I$(src)/../../include \
-I$(src)/arch/$(ARCH)/include \
-I@abs_builddir@ \
-I@abs_builddir@/../../../ \
-I$(src)/../../../kernel/include \
-DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
ifneq ($(ARCH), arm64)
ccflags-y += -mno-red-zone -mcmodel=kernel
endif
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o

View File

@ -190,7 +190,11 @@ static long mcexec_prepare_image(ihk_os_t os,
pdesc->status = 0;
mb();
mcctrl_ikc_send(os, pdesc->cpu, &isp);
ret = mcctrl_ikc_send(os, pdesc->cpu, &isp);
if(ret < 0) {
printk("%s: ERROR mcctrl_ikc_send: %d\n", __FUNCTION__, ret);
goto put_and_free_out;
}
ret = wait_event_interruptible(ppd->wq_prepare, pdesc->status);
if (ret < 0) {

View File

@ -1207,7 +1207,7 @@ sysfsm_unlink(struct sysfsm_data *sdp, const char *path0, int flags)
goto out;
}
if (!flags & SYSFS_UNLINK_KEEP_ANCESTOR) {
if (!(flags & SYSFS_UNLINK_KEEP_ANCESTOR)) {
cleanup_ancestor(dirp);
}

View File

@ -16,7 +16,7 @@ ifeq ($(BUILD_MODULE),none)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)
endif
ifeq ($(BUILD_MODULE),none)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 243680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 263680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
endif
endif
ifeq ($(BUILD_MODULE_TMP),rhel)
@ -33,7 +33,7 @@ endif
modules:
ifneq ($(BUILD_MODULE),none)
@(cd $(BUILD_MODULE); make modules)
+@(cd $(BUILD_MODULE); make modules)
endif
clean:

View File

@ -10,6 +10,7 @@ MANDIR=@MANDIR@
MCKERNEL_INCDIR=@MCKERNEL_INCDIR@
MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@
KDIR ?= @KDIR@
ARCH=@ARCH@
CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH}
LDFLAGS=@LDFLAGS@
RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}')
@ -17,7 +18,6 @@ VPATH=@abs_srcdir@
TARGET=mcexec libsched_yield ldump2mcdump.so
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
LIBS=@LIBS@
ARCH=@ARCH@
IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread
ENABLE_QLMPI=@ENABLE_QLMPI@
@ -53,7 +53,7 @@ libsched_yield: libsched_yield.c
$(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl
libmcexec.a::
(cd arch/${ARCH}; make)
+(cd arch/${ARCH}; $(MAKE))
libqlmpi.so: qlmpilib.c
$(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $<
@ -77,13 +77,13 @@ ql_talker: ql_talker.o
$(CC) $^ $(CFLAGS) -o $@
clean::
(cd arch/${ARCH}; make clean)
(cd arch/${ARCH}; $(MAKE) clean)
$(RM) $(TARGET) *.o
.PHONY: all clean install
install::
(cd arch/${ARCH}; make install)
(cd arch/${ARCH}; $(MAKE) install)
mkdir -p -m 755 $(BINDIR)
install -m 755 mcexec $(BINDIR)
mkdir -p -m 755 $(MCKERNEL_LIBDIR)

View File

@ -43,14 +43,30 @@ threads equals to the number of CPU cores assigned to McKernel.
.TP
.B -h N, --extend-heap-by=N
Specify the size of heap extension by the brk() system call.
The default size is 4K Byte. You may specify the value in the
following format:
<digit>{k|K|m|M|g|G}
e.g.:
10k for 10Kibyte, 100M for 100Mibyte, 1G for 1Gibyte
The default size is 4K Byte. \fIN\fR accepts the following format:
.br
.RS 10
<digits>{k|K|m|M|g|G}
.br
e.g.: 10k means 10Kibyte, 100M 100Mibyte, 1G 1Gibyte
.RE
.RS 7
In certain applications, such as miniFE, specifying "-h 12M" improves
performance.
.RE
.TP
.B -s P,[M], --stack-premap=P,[M]
Pre-map \fIP\fR bytes of stack when creating a process. 2Mibyte is
used when not specified. And set the max size of stack to \fIM\fR
bytes. Both of them accept the following format:
.br
.RS 10
<digits>{k|K|m|M|g|G}
.br
e.g.: 10k means 10Kibyte, 100M 100Mibyte, 1G 1Gibyte
.RE
.TP
.B --profile

View File

@ -214,6 +214,10 @@ static unsigned long mpol_threshold = 0;
static unsigned long heap_extension = (4*1024);
static int profile = 0;
static int disable_sched_yield = 0;
static long stack_premap = (2ULL << 20);
static long stack_max = -1;
static struct rlimit rlim_stack;
static char *mpol_bind_nodes = NULL;
/* Partitioned execution (e.g., for MPI) */
static int nr_processes = 0;
@ -1211,7 +1215,7 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
struct rlimit new_rlim;
/* save original value to environment variable */
n = snprintf(newval, sizeof(newval), "%#lx,%#lx",
n = snprintf(newval, sizeof(newval), "%ld,%ld",
(unsigned long)orig_rlim->rlim_cur,
(unsigned long)orig_rlim->rlim_max);
if (n >= sizeof(newval)) {
@ -1247,9 +1251,9 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
void print_usage(char **argv)
{
#ifdef ADD_ENVS_OPTION
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [<mcos-id>] (program) [args...]\n", argv[0]);
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [<mcos-id>] (program) [args...]\n", argv[0]);
#else /* ADD_ENVS_OPTION */
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [<mcos-id>] (program) [args...]\n", argv[0]);
fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [-s (--stack-premap=)[premap_size][,max]] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [--mpol-shm-premap] [--disable-sched-yield] [<mcos-id>] (program) [args...]\n", argv[0]);
#endif /* ADD_ENVS_OPTION */
}
@ -1637,7 +1641,7 @@ static struct option mcexec_options[] = {
.name = "mpol-threshold",
.has_arg = required_argument,
.flag = NULL,
.val = 'm',
.val = 'M',
},
{
.name = "disable-sched-yield",
@ -1651,13 +1655,16 @@ static struct option mcexec_options[] = {
.flag = NULL,
.val = 'h',
},
{
.name = "stack-premap",
.has_arg = required_argument,
.flag = NULL,
.val = 's',
},
/* end */
{ NULL, 0, NULL, 0, },
};
#define MCEXEC_DEF_CUR_STACK_SIZE (2 * 1024 * 1024) /* 2 MiB */
#define MCEXEC_DEF_MAX_STACK_SIZE (64 * 1024 * 1024) /* 64 MiB */
#ifdef ENABLE_MCOVERLAYFS
void bind_mount_recursive(const char *root, char *prefix)
{
@ -1762,7 +1769,7 @@ static void ld_preload_init()
if (disable_sched_yield) {
sprintf(envbuf, "%s/libsched_yield.so.1.0.0", MCKERNEL_LIBDIR);
__dprintf("%s: %s\n", __FUNCTION__, sched_yield_lib_path);
__dprintf("%s: preload library: %s\n", __FUNCTION__, envbuf);
if (setenv("LD_PRELOAD", envbuf, 1) < 0) {
printf("%s: warning: failed to set LD_PRELOAD for sched_yield\n",
__FUNCTION__);
@ -1796,7 +1803,6 @@ int main(int argc, char **argv)
char *p;
int i;
int error;
struct rlimit rlim_stack;
unsigned long lcur;
unsigned long lmax;
int target_core = 0;
@ -1846,10 +1852,15 @@ int main(int argc, char **argv)
CHKANDJUMP(error == -1, 1, "unsetenv failed");
}
rlim_stack.rlim_cur = MCEXEC_DEF_CUR_STACK_SIZE;
rlim_stack.rlim_max = MCEXEC_DEF_MAX_STACK_SIZE;
/* Inherit ulimit settings to McKernel process */
if (getrlimit(RLIMIT_STACK, &rlim_stack)) {
fprintf(stderr, "getrlimit failed\n");
return 1;
}
__dprintf("rlim_stack=%ld,%ld\n", rlim_stack.rlim_cur, rlim_stack.rlim_max);
#define MCEXEC_MAX_STACK_SIZE (1024 * 1024 * 1024) /* 1 GiB */
/* Shrink mcexec stack if it leaves too small room for McKernel process */
#define MCEXEC_MAX_STACK_SIZE (16 * 1024 * 1024) /* 1 GiB */
if (rlim_stack.rlim_cur > MCEXEC_MAX_STACK_SIZE) {
/* need to call reduce_stack() before modifying the argv[] */
(void)reduce_stack(&rlim_stack, argv); /* no return, unless failure */
@ -1859,25 +1870,43 @@ int main(int argc, char **argv)
/* Parse options ("+" denotes stop at the first non-option) */
#ifdef ADD_ENVS_OPTION
while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:", mcexec_options, NULL)) != -1) {
while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:e:s:m:", mcexec_options, NULL)) != -1) {
#else /* ADD_ENVS_OPTION */
while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:", mcexec_options, NULL)) != -1) {
while ((opt = getopt_long(argc, argv, "+c:n:t:M:h:s:m:", mcexec_options, NULL)) != -1) {
#endif /* ADD_ENVS_OPTION */
switch (opt) {
char *tmp;
case 'c':
target_core = atoi(optarg);
target_core = strtol(optarg, &tmp, 0);
if (*tmp != '\0') {
fprintf(stderr, "error: -c: invalid target CPU\n");
exit(EXIT_FAILURE);
}
break;
case 'n':
nr_processes = atoi(optarg);
nr_processes = strtol(optarg, &tmp, 0);
if (*tmp != '\0' || nr_processes <= 0) {
fprintf(stderr, "error: -n: invalid number of processes\n");
exit(EXIT_FAILURE);
}
break;
case 't':
nr_threads = atoi(optarg);
nr_threads = strtol(optarg, &tmp, 0);
if (*tmp != '\0' || nr_threads <= 0) {
fprintf(stderr, "error: -t: invalid number of threads\n");
exit(EXIT_FAILURE);
}
break;
case 'M':
mpol_threshold = atobytes(optarg);
break;
case 'm':
mpol_threshold = atobytes(optarg);
mpol_bind_nodes = optarg;
break;
case 'h':
@ -1889,6 +1918,23 @@ int main(int argc, char **argv)
add_env_list(&extra_env, optarg);
break;
#endif /* ADD_ENVS_OPTION */
case 's': {
char *token, *dup, *line;
dup = strdup(optarg);
line = dup;
token = strsep(&line, ",");
if (token != NULL && *token != 0) {
stack_premap = atobytes(token);
}
token = strsep(&line, ",");
if (token != NULL && *token != 0) {
stack_max = atobytes(token);
}
free(dup);
__dprintf("stack_premap=%ld,stack_max=%ld\n", stack_premap, stack_max);
break; }
case 0: /* long opt */
break;
@ -2097,6 +2143,7 @@ int main(int argc, char **argv)
desc->cpu = target_core;
desc->enable_vdso = enable_vdso;
/* Restore the stack size when mcexec stack was shrinked */
p = getenv(rlimit_stack_envname);
if (p) {
char *saveptr;
@ -2141,8 +2188,19 @@ int main(int argc, char **argv)
rlim_stack.rlim_cur = lcur;
}
}
/* Overwrite the max with <max> of "--stack-premap <premap>,<max>" */
if (stack_max != -1) {
rlim_stack.rlim_cur = stack_max;
if (rlim_stack.rlim_max != -1 && rlim_stack.rlim_max < rlim_stack.rlim_cur) {
rlim_stack.rlim_max = rlim_stack.rlim_cur;
}
}
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
desc->stack_premap = stack_premap;
__dprintf("desc->rlimit[MCK_RLIMIT_STACK]=%ld,%ld\n", desc->rlimit[MCK_RLIMIT_STACK].rlim_cur, desc->rlimit[MCK_RLIMIT_STACK].rlim_max);
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
if(ncpu == -1){
@ -2335,6 +2393,21 @@ int main(int argc, char **argv)
desc->mpol_threshold = mpol_threshold;
desc->heap_extension = heap_extension;
desc->mpol_bind_mask = 0;
if (mpol_bind_nodes) {
struct bitmask *bind_mask;
bind_mask = numa_parse_nodestring_all(mpol_bind_nodes);
if (bind_mask) {
int node;
for (node = 0; node <= numa_max_possible_node(); ++node) {
if (numa_bitmask_isbitset(bind_mask, node)) {
desc->mpol_bind_mask |= (1UL << node);
}
}
}
}
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {
perror("prepare");
close(fd);
@ -3618,6 +3691,10 @@ fork_err:
__dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n",
path, desc->num_sections);
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
desc->stack_premap = stack_premap;
/* Copy descriptor to co-kernel side */
trans.userp = (void*)desc;
trans.rphys = w.sr.args[2];

View File

@ -1,4 +1,5 @@
# Makefile.build.in COPYRIGHT FUJITSU LIMITED 2015-2016
ARCH = @ARCH@
VPATH=@abs_srcdir@
SRC=$(VPATH)
IHKDIR=$(IHKBASE)/$(TARGETDIR)
@ -8,13 +9,18 @@ OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
OBJS += rbtree.o
OBJS += pager.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
DEPSRCS=$(wildcard $(SRC)/*.c)
# OBJS added gencore.o
ifeq ($(ARCH), arm64)
OBJS += gencore.o
DEPSRCS += $(SRC)/../arch/arm64/kernel/gencore.c
endif
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions
ifneq ($(ARCH), arm64)
CFLAGS += -mcmodel=large -mno-red-zone
endif
LDFLAGS += -e arch_start
IHKOBJ = ihk/ihk.o
@ -37,7 +43,7 @@ OBJCOPY ?= objcopy
# POSTK_DEBUG_ARCH_DEP_26
ifeq ($(ARCH), arm64)
SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) OBJCOPY=$(OBJCOPY) SRC=$(SRC)
SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) OBJCOPY=$(OBJCOPY) SRC=$(SRC) ARCH=$(ARCH)
else
SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) SRC=$(SRC)
endif
@ -65,12 +71,15 @@ Makefile.dep:
$(IHKOBJ): FORCE
@mkdir -p $(dir $(IHKOBJ))
$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS) prepare
$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS)
+$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS) prepare
+$(call echo_cmd,BUILD IHK,$(TARGET))$(submake) -C $(IHKBASE) $(SUBCMD_OPTS)
%.o: $(SRC)/%.c
$(cc_cmd)
gencore.o: ../arch/arm64/kernel/gencore.c
$(cc_cmd)
FORCE:
-include Makefile.dep

View File

@ -1,6 +1,7 @@
BUILD_TARGET ?= @TARGET@
KERNDIR=@KERNDIR@
VPATH=@abs_srcdir@
ARCH ?= @ARCH@
ifeq ($(ARCH), arm64)
vdsodir=@abs_builddir@/../arch/$(ARCH)/kernel/vdso
@ -26,16 +27,16 @@ all: $(O) $(KERNELS)
$(O):
mkdir -p $(O)
%/kernel.img: %/Makefile
%/kernel.img: %/Makefile $(KERNELS)
@echo 'Building for' $(dir $@)
@make --no-print-directory -C $(dir $@) $(SUBCMD_OPTS)
+@make --no-print-directory -C $(dir $@) $(SUBCMD_OPTS)
%/Makefile: Makefile.build FORCE
@mkdir -p $(dir $@)
@echo 'SRC = $(SRC)' > $@
@echo 'IHKBASE = $(IHKBASE)' >> $@
@echo 'TARGET = $(notdir $(patsubst %/,%,$(dir $@)))' >> $@
@echo 'TARGETDIR = $$(shell echo $$(TARGET) | sed "s/-/\//")' >> $@
@echo 'TARGETDIR = smp/$(ARCH)' >> $@
@cat Makefile.build >> $@
@rm -f $(dir $@)/Makefile.dep

View File

@ -70,8 +70,6 @@ static void ap_wait(void)
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
}
pc_ap_init();
/* one of them listens */
mc_ikc_test_init();

View File

@ -170,6 +170,7 @@ int kprintf(const char *format, ...)
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
}
barrier();
return len;
}

View File

@ -246,18 +246,6 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
return curval;
}
static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
/*
* Officially we should call:
* return getint_user((int *)dest, (int *)from);
*
* but McKernel on x86 can just access user-space.
*/
*dest = *(volatile uint32_t *)from;
return 0;
}
/*
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.

View File

@ -432,8 +432,8 @@ static int process_msg_prepare_process(unsigned long rphys)
}
n = p->num_sections;
if (n > 16) {
kprintf("%s: ERROR: more ELF sections than 16??\n",
if (n > 16 || 0 >= n) {
kprintf("%s: ERROR: ELF sections other than 1 to 16 ??\n",
__FUNCTION__);
return -ENOMEM;
}
@ -476,6 +476,27 @@ static int process_msg_prepare_process(unsigned long rphys)
proc->mpol_threshold = pn->mpol_threshold;
proc->nr_processes = pn->nr_processes;
proc->heap_extension = pn->heap_extension;
/* Update NUMA binding policy if requested */
if (pn->mpol_bind_mask) {
int bit;
memset(&vm->numa_mask, 0, sizeof(vm->numa_mask));
for_each_set_bit(bit, &pn->mpol_bind_mask,
sizeof(pn->mpol_bind_mask) * BITS_PER_BYTE) {
if (bit >= ihk_mc_get_nr_numa_nodes()) {
kprintf("%s: error: NUMA id %d is larger than mask size!\n",
__FUNCTION__, bit);
return -EINVAL;
}
set_bit(bit, &vm->numa_mask[0]);
}
vm->numa_mem_policy = MPOL_BIND;
}
#ifdef PROFILE_ENABLE
proc->profile = pn->profile;
thread->profile = pn->profile;
@ -497,6 +518,11 @@ static int process_msg_prepare_process(unsigned long rphys)
}
vm->region.map_end = vm->region.map_start;
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
dkprintf("%s: rlim_cur: %ld, rlim_max: %ld, stack_premap: %ld\n",
__FUNCTION__,
proc->rlimit[MCK_RLIMIT_STACK].rlim_cur,
proc->rlimit[MCK_RLIMIT_STACK].rlim_max,
pn->stack_premap);
if (prepare_process_ranges_args_envs(thread, pn, p, attr,
NULL, 0, NULL, 0) != 0) {

View File

@ -118,7 +118,7 @@ extern void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *t
#endif /* __HEADER_ELFCORE_H */
#else /* POSTK_DEBUG_ARCH_DEP_18 */
#ifdef __x86_64
#include "../../arch/x86/kernel/include/elfcore.h"
#include "../../arch/x86_64/kernel/include/elfcore.h"
#elif __aarch64__
#include "../../arch/arm64/kernel/include/elfcore.h"
#endif

View File

@ -19,12 +19,14 @@
#include <ihk/mm.h>
#include <ihk/atomic.h>
#include <list.h>
#include <rbtree.h>
#include <signal.h>
#include <memobj.h>
#include <affinity.h>
#include <syscall.h>
#include <bitops.h>
#include <profile.h>
#include <config.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
@ -52,6 +54,7 @@
#define VR_MEMTYPE_UC 0x01000000 /* uncachable */
#define VR_MEMTYPE_MASK 0x0f000000
#define VR_PAGEOUT 0x10000000
#define VR_DONTDUMP 0x20000000
#define PROT_TO_VR_FLAG(prot) (((unsigned long)(prot) << 16) & VR_PROT_MASK)
#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4)
@ -77,9 +80,7 @@
// struct process.ptrace
#define PT_TRACED 0x80 /* The process is ptraced */
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
#define PT_TRACE_SYSCALL 0x200 /* Trace syscall enter */
// ptrace(2) request
#define PTRACE_TRACEME 0
@ -378,7 +379,7 @@ struct user
#define AUXV_LEN 18
struct vm_range {
struct list_head list;
struct rb_node vm_rb_node;
unsigned long start, end;
unsigned long flag;
struct memobj *memobj;
@ -553,6 +554,7 @@ struct process {
unsigned long mpol_flags;
size_t mpol_threshold;
unsigned long heap_extension;
unsigned long mpol_bind_mask;
// perf_event
int perf_status;
@ -690,13 +692,17 @@ struct thread {
int mod_clone;
struct uti_attr *mod_clone_arg;
int parent_cpuid;
// for performance counter
unsigned long pmc_alloc_map;
unsigned long extra_reg_alloc_map;
};
#define VM_RANGE_CACHE_SIZE 4
struct process_vm {
struct address_space *address_space;
struct list_head vm_range_list;
struct rb_root vm_range_tree;
struct vm_regions region;
struct process *proc; /* process that reside on the same page */
void *opt;

View File

@ -197,6 +197,8 @@ struct program_load_desc {
unsigned long mpol_flags;
unsigned long mpol_threshold;
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int nr_processes;
char shell_path[SHELL_PATH_MAX_LEN];
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
@ -567,4 +569,24 @@ typedef struct uti_attr {
uint64_t flags; /* Representing location and behavior hints by bitmap */
} uti_attr_t;
struct move_pages_smp_req {
unsigned long count;
const void **user_virt_addr;
int *user_status;
const int *user_nodes;
void **virt_addr;
int *status;
pte_t **ptep;
int *nodes;
int nodes_ready;
int *nr_pages;
unsigned long *dst_phys;
struct process *proc;
ihk_atomic_t phase_done;
int phase_ret;
};
#define PROCESS_VM_READ 0
#define PROCESS_VM_WRITE 1
#endif

Some files were not shown because too many files have changed in this diff Show More