Compare commits
356 Commits
1.3.0
...
1.5.1-knl+
| Author | SHA1 | Date | |
|---|---|---|---|
| 8f117cc0dc | |||
| 0b9a657a01 | |||
| c2d6651cd2 | |||
| d979444049 | |||
| faa357d5a6 | |||
| 653aba17a1 | |||
| 7736e25ca4 | |||
| 73d16a9d79 | |||
| 922bd7e6eb | |||
| 0d99072109 | |||
| 3ced3f6080 | |||
| d9ff940528 | |||
| cd63ec877d | |||
| 6c0bb9e576 | |||
| ca9894108b | |||
| 3f26e44f85 | |||
| bacfb0c2b9 | |||
| 09f63483cc | |||
| 2f0c2aae9e | |||
| f7b277a623 | |||
| a3aa96af19 | |||
| 91d732308d | |||
| 166c6105ef | |||
| 5a2f8388a6 | |||
| 8164b63fc2 | |||
| af22ce62d2 | |||
| 2eca75ead8 | |||
| 22992780cf | |||
| 3043591e9a | |||
| 7e7c0f9ed3 | |||
| 7193f165cc | |||
| c8c42576fd | |||
| 0412e1fcc6 | |||
| 238e346586 | |||
| 0e57c715ad | |||
| 3facd3dcca | |||
| ec5328de69 | |||
| 880dd6ddb2 | |||
| 898708b8b4 | |||
| b08331b21a | |||
| c196c996dd | |||
| 20e179f6dc | |||
| 32fbc015f5 | |||
| 558c250bb3 | |||
| 96ea2d3658 | |||
| 9c91298ccf | |||
| b08da83a51 | |||
| fcc8310454 | |||
| 96b8b30516 | |||
| 521e0dc707 | |||
| e2e773d883 | |||
| 04d22d90a3 | |||
| f6405081a6 | |||
| 5bea237581 | |||
| 33ad55e72b | |||
| 6848c2ecf7 | |||
| 79f9a2d31a | |||
| 2900ce20f7 | |||
| 002b78372d | |||
| 5fce5e4e3c | |||
| 7a1ad31183 | |||
| 54bdb3419d | |||
| 03fed4d1c8 | |||
| 6279f69f5c | |||
| 6959d5ead4 | |||
| a5aa68744f | |||
| 89c5aaa9e9 | |||
| 15422d886f | |||
| f139bef0cb | |||
| de82cf8779 | |||
| 662895c020 | |||
| d23939da8c | |||
| 67529f21ff | |||
| 5c11ff0950 | |||
| ce4eb0d409 | |||
| 04434320fc | |||
| 50fafa6d71 | |||
| f5ced648ef | |||
| 0f8f88ca46 | |||
| e99f19e812 | |||
| 9a36e5d213 | |||
| 4816f27639 | |||
| 9c0b8aa812 | |||
| 23f178d718 | |||
| 159c18b98b | |||
| 1847a3ac11 | |||
| 15b16ffbbb | |||
| e64d89cd48 | |||
| 7366da4390 | |||
| 2dc85ee417 | |||
| 73cc07f98e | |||
| 815e2244ca | |||
| 163af73554 | |||
| fd316f3ca3 | |||
| 122588bc4d | |||
| 70238982c2 | |||
| 5b5191ef64 | |||
| a65faeaed4 | |||
| 4dea1842e0 | |||
| 5353b11f90 | |||
| abdbf96254 | |||
| bd170e63ba | |||
| d35fa16417 | |||
| 6406a0df6b | |||
| 52e8f03b4b | |||
| b071a3f32c | |||
| 90258f00bd | |||
| 28eb649056 | |||
| 744ebacf65 | |||
| 62e438a0aa | |||
| 5ac582a678 | |||
| 51bc28acca | |||
| c43654d69b | |||
| c1d2db6a73 | |||
| aeef55d1b0 | |||
| 6e289e8d9f | |||
| 3b5363c533 | |||
| 60f6862db2 | |||
| 39deff4e10 | |||
| 7f03c18d4d | |||
| 640dba627f | |||
| ae368d97d4 | |||
| 99c216d91e | |||
| 3c357dc30a | |||
| 37866e61ab | |||
| 076e6b9b12 | |||
| fa6db686b4 | |||
| 74a636a612 | |||
| 1c4a6568e6 | |||
| 7d2e2f93b0 | |||
| 7005110697 | |||
| c4ca4ae3ab | |||
| b024a486b9 | |||
| fe4c461f2f | |||
| b60a980088 | |||
| ec66229063 | |||
| b875b5186f | |||
| 5cf884ef41 | |||
| 64e2639adc | |||
| 14b360e867 | |||
| 4a0e389953 | |||
| 34363c2b68 | |||
| 8a1d756cb1 | |||
| e36abe57e7 | |||
| b2c8cc50dc | |||
| b9b4a4fe36 | |||
| 4b652c9353 | |||
| 60ac94cbb9 | |||
| 42bbf5f2a4 | |||
| e29a40331d | |||
| 655de2cd82 | |||
| 205747594b | |||
| 21f9a1ea33 | |||
| aed099fbcb | |||
| 48515970a0 | |||
| b888f31b30 | |||
| 7982008b5b | |||
| f658173269 | |||
| ca7edf1df8 | |||
| 9a5f3ad4e6 | |||
| cfbab0ee82 | |||
| 86ae1380e4 | |||
| 9bb48186e6 | |||
| 139123dc12 | |||
| 6602cf442c | |||
| f148863586 | |||
| ec375da27a | |||
| c50e7c1029 | |||
| 5f4dbb2c71 | |||
| 328609269b | |||
| 056fdb2633 | |||
| 09d0a59e22 | |||
| 511555c8cb | |||
| 81699345cc | |||
| 130751ff66 | |||
| f3d18eb9de | |||
| 249bda4aef | |||
| aaa246f86f | |||
| c52f7a5b49 | |||
| 90a34f54c9 | |||
| bfb5080b71 | |||
| 641dfed37e | |||
| 4572e6be3f | |||
| 12e44050c9 | |||
| d5190990f5 | |||
| 82822b1f16 | |||
| 7f02889f76 | |||
| 9dc86869d8 | |||
| 02bb127007 | |||
| c26c4aba4f | |||
| e8d8ad60c2 | |||
| a7f645f7df | |||
| 73731d2a0d | |||
| 0f049c5ed7 | |||
| 8d5f95de04 | |||
| 88fca2c0df | |||
| 81d18e35dd | |||
| 309da8fc53 | |||
| 535e3f3af6 | |||
| 4c80dca479 | |||
| 7bef1f5117 | |||
| bb8c8355c2 | |||
| fab0641813 | |||
| ce3af4734a | |||
| e2dea4e9f8 | |||
| 0d9c1df75a | |||
| 6a979cf4b8 | |||
| c107d1fdf9 | |||
| bc89a51e00 | |||
| 9da9e755fa | |||
| fe42481d6f | |||
| b1ea6eb82a | |||
| 8c2e20c3aa | |||
| 65667709a8 | |||
| 51bc5fd61f | |||
| 3b277b2354 | |||
| 3e4c9bdd90 | |||
| 06b1b4f8ab | |||
| 7b4de6e6c2 | |||
| 1c266f4849 | |||
| b7a7281195 | |||
| b77732fb4f | |||
| a224bf648a | |||
| 642520f80c | |||
| 5cb75b00c7 | |||
| 7dd0d1137f | |||
| cb2fe29f06 | |||
| 3432f46d8b | |||
| afcf1a24aa | |||
| 140f813d77 | |||
| 7ad6f9595c | |||
| 1796c20b88 | |||
| 0da5b76916 | |||
| 4ac1efae6c | |||
| 523a066245 | |||
| 98df469d29 | |||
| f46287a711 | |||
| c260b5c6f3 | |||
| c9157f273f | |||
| 840acd6021 | |||
| c949a894c6 | |||
| 228f8f8533 | |||
| 8ee9eca74e | |||
| 748429fc92 | |||
| a9dfcd9a89 | |||
| 559fc9746c | |||
| 54169bc3ea | |||
| 142e923222 | |||
| 86efc86945 | |||
| ebaafa95d8 | |||
| b8ee144e67 | |||
| 722ae0e7d5 | |||
| f56e087208 | |||
| f55f01cc11 | |||
| 1fa398cfab | |||
| 8123cc413e | |||
| d4459cf9f3 | |||
| 4bb65494e9 | |||
| 2f2b3cdc6f | |||
| 1e9f9d9809 | |||
| 1b25379c02 | |||
| 38bbb4e390 | |||
| 0fa88f513f | |||
| cd54c5983a | |||
| 6084faeecd | |||
| d209c00a30 | |||
| 9a5d5feb9c | |||
| 0cda763f95 | |||
| cc7be46b7d | |||
| 589504dc33 | |||
| bf2f38051b | |||
| 2d2d0af6fb | |||
| 7f47dc78a1 | |||
| c3c9187ed5 | |||
| aebacb243e | |||
| 5a8d1f09e8 | |||
| 0e10b6d1ee | |||
| d649d6fc2d | |||
| bad487cc07 | |||
| 3b6056fb1a | |||
| 5cc738d6bd | |||
| c9fa445f54 | |||
| d273a2f58b | |||
| 4e7069d499 | |||
| 66f44e77af | |||
| 35f908b75c | |||
| 2f0089dfb9 | |||
| 2af6d5115a | |||
| ac25c5e1e7 | |||
| 90c0355d90 | |||
| 43230eb623 | |||
| f18dc8428d | |||
| ab53c8e0a4 | |||
| 6c33e236d7 | |||
| 85d36f1469 | |||
| 0ecf31d896 | |||
| 08a625cc0d | |||
| 12840601e1 | |||
| 2ae6883a8b | |||
| d5629606c5 | |||
| 285059e504 | |||
| 5b6d0a887c | |||
| 3573b8649e | |||
| d7523cdd84 | |||
| 5753db5846 | |||
| 2d7cb0af89 | |||
| 1cb9b435a9 | |||
| 43ecf06e83 | |||
| 51982de36b | |||
| 0a22320a3c | |||
| 8813e890c5 | |||
| e664ffba18 | |||
| 3bd0137c25 | |||
| 4f2b4aa402 | |||
| 682cd34b74 | |||
| 2bc4d06a48 | |||
| 4f2c1e07c1 | |||
| 77bb3038d3 | |||
| 931448a94d | |||
| c51bbbabc6 | |||
| 2ddc52e1a4 | |||
| 3c93958c48 | |||
| 9763c40f64 | |||
| 3bf77446cc | |||
| c3dfb1663d | |||
| 217dd9c1e5 | |||
| d4cd756a91 | |||
| b894619d1b | |||
| b962da700b | |||
| 196379854b | |||
| d213efac79 | |||
| 38910fe13d | |||
| 4d4279121b | |||
| 99da5b6484 | |||
| 6b60dee890 | |||
| dd08a3151e | |||
| e1442bf12b | |||
| 86f297ddc4 | |||
| 823b222af9 | |||
| 9c25eb8ef2 | |||
| 665eead78b | |||
| f8ef43c77d | |||
| 8f4afe410f | |||
| da9bb421cc | |||
| 1e89796d3e | |||
| a1a2900606 | |||
| 79b977ac06 | |||
| 37e3118df6 | |||
| be4d84c0c1 | |||
| c43c1b640a | |||
| e294db7e53 | |||
| df3f388e09 | |||
| a2fbe99b60 | |||
| 9c847c0a8f | |||
| 58c1fd4512 | |||
| dae9a5ff13 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -14,3 +14,4 @@ elfboot/elfboot_test
|
||||
linux/executer/mcexec
|
||||
linux/mod_test*
|
||||
linux/target
|
||||
kernel/script/dwarf-extract-struct
|
||||
|
||||
78
Makefile.in
78
Makefile.in
@ -1,16 +1,25 @@
|
||||
TARGET = @TARGET@
|
||||
SBINDIR = @SBINDIR@
|
||||
BINDIR = @BINDIR@
|
||||
INCDIR = @INCDIR@
|
||||
ETCDIR = @ETCDIR@
|
||||
MANDIR = @MANDIR@
|
||||
|
||||
all::
|
||||
@(cd executer/kernel/mcctrl; make modules)
|
||||
@(cd executer/kernel/mcoverlayfs; make modules)
|
||||
@(cd executer/user; make)
|
||||
@case "$(TARGET)" in \
|
||||
all: executer-mcctrl executer-mcoverlayfs executer-user mckernel mck-tools
|
||||
|
||||
executer-mcctrl:
|
||||
+@(cd executer/kernel/mcctrl; $(MAKE) modules)
|
||||
|
||||
executer-mcoverlayfs:
|
||||
+@(cd executer/kernel/mcoverlayfs; $(MAKE) modules)
|
||||
|
||||
executer-user:
|
||||
+@(cd executer/user; $(MAKE))
|
||||
|
||||
mckernel:
|
||||
+@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
|
||||
(cd kernel; make) \
|
||||
(cd kernel; $(MAKE)) \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
@ -18,13 +27,16 @@ all::
|
||||
;; \
|
||||
esac
|
||||
|
||||
install::
|
||||
@(cd executer/kernel/mcctrl; make install)
|
||||
@(cd executer/kernel/mcoverlayfs; make install)
|
||||
@(cd executer/user; make install)
|
||||
mck-tools:
|
||||
+@(cd tools/mcstat; $(MAKE))
|
||||
|
||||
install:
|
||||
@(cd executer/kernel/mcctrl; $(MAKE) install)
|
||||
@(cd executer/kernel/mcoverlayfs; $(MAKE) install)
|
||||
@(cd executer/user; $(MAKE) install)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
|
||||
(cd kernel; make install) \
|
||||
(cd kernel; $(MAKE) install) \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
@ -32,51 +44,41 @@ install::
|
||||
;; \
|
||||
esac
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
builtin-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
smp-x86 | smp-arm64) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
install -m 600 arch/x86/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
|
||||
install -m 600 arch/x86/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
|
||||
install -m 755 arch/x86/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
|
||||
install -m 755 arch/x86_64/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
install -m 755 arch/x86_64/tools/mpimcexec $(BINDIR)/mpimcexec; \
|
||||
install -m 755 arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
|
||||
install -m 755 arch/x86_64/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
|
||||
mkdir -p -m 755 $(ETCDIR); \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||
install -m 644 arch/x86_64/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||
install -m 644 arch/x86_64/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||
mkdir -p -m 755 $(INCDIR); \
|
||||
install -m 644 kernel/include/swapfmt.h $(INCDIR); \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
install -m 644 arch/x86_64/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
install -m 644 arch/x86_64/tools/mpimcexec.1 $(MANDIR)/man1/mpimcexec.1; \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
@(cd tools/mcstat/; $(MAKE) install)
|
||||
|
||||
clean::
|
||||
@(cd executer/kernel/mcctrl; make clean)
|
||||
@(cd executer/kernel/mcoverlayfs; make clean)
|
||||
@(cd executer/user; make clean)
|
||||
clean:
|
||||
@(cd executer/kernel/mcctrl; $(MAKE) clean)
|
||||
@(cd executer/kernel/mcoverlayfs; $(MAKE) clean)
|
||||
@(cd executer/user; $(MAKE) clean)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
|
||||
(cd kernel; make clean) \
|
||||
(cd kernel; $(MAKE) clean) \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
@(cd tools/mcstat; $(MAKE) clean)
|
||||
|
||||
@ -590,6 +590,8 @@ static void show_context_stack(struct pt_regs *regs)
|
||||
return;
|
||||
}
|
||||
|
||||
ihk_mc_debug_show_interrupt_context(regs);
|
||||
|
||||
sp = (uintptr_t)regs + sizeof(*regs);
|
||||
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
|
||||
max_loop = (stack_top - sp) / min_stack_frame_size;
|
||||
@ -1170,8 +1172,6 @@ void arch_clone_thread(struct thread *othread, unsigned long pc,
|
||||
asm("mrs %0, tpidr_el0" : "=r" (tls));
|
||||
othread->tlsblock_base = nthread->tlsblock_base = tls;
|
||||
|
||||
/* copy fp_regs values from parent. */
|
||||
save_fp_regs(othread);
|
||||
if ((othread->fp_regs != NULL) && (check_and_allocate_fp_regs(nthread) == 0)) {
|
||||
memcpy(nthread->fp_regs, othread->fp_regs, sizeof(fp_regs_struct));
|
||||
}
|
||||
@ -1205,6 +1205,10 @@ void ihk_mc_delay_us(int us)
|
||||
arch_delay(us);
|
||||
}
|
||||
|
||||
void arch_print_stack()
|
||||
{
|
||||
}
|
||||
|
||||
void arch_show_interrupt_context(const void *reg)
|
||||
{
|
||||
const struct pt_regs *regs = (struct pt_regs *)reg;
|
||||
@ -1428,6 +1432,13 @@ save_fp_regs(struct thread *thread)
|
||||
}
|
||||
}
|
||||
|
||||
void copy_fp_regs(struct thread *from, struct thread *to)
|
||||
{
|
||||
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
|
||||
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
clear_fp_regs(struct thread *thread)
|
||||
{
|
||||
@ -1499,7 +1510,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = 0;
|
||||
|
||||
@ -1513,17 +1523,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
(error & PF_RSVD ? "was" : "wasn't"),
|
||||
(error & PF_INSTR ? "was" : "wasn't"));
|
||||
|
||||
found = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= address && range->end > address) {
|
||||
found = 1;
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
range = lookup_process_memory_range(vm, address, address+1);
|
||||
if (range) {
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
} else {
|
||||
__kprintf("address is out of range! \n");
|
||||
}
|
||||
|
||||
|
||||
@ -221,7 +221,7 @@ int gencore(struct thread *thread, void *regs,
|
||||
Elf64_Ehdr eh;
|
||||
Elf64_Phdr *ph = NULL;
|
||||
void *note = NULL;
|
||||
struct vm_range *range;
|
||||
struct vm_range *range, *next;
|
||||
struct process_vm *vm = thread->vm;
|
||||
int segs = 1; /* the first one is for NOTE */
|
||||
int notesize, phsize, alignednotesize;
|
||||
@ -235,7 +235,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
return -1;
|
||||
}
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
|
||||
range->start, range->end, range->flag, range->objoff);
|
||||
/* We omit reserved areas because they are only for
|
||||
@ -323,7 +326,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
|
||||
/* program header for each memory chunk */
|
||||
i = 1;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long flag = range->flag;
|
||||
unsigned long size = range->end - range->start;
|
||||
|
||||
@ -364,7 +370,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
|
||||
|
||||
i = 3; /* memory segments */
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long phys;
|
||||
|
||||
if (range->flag & VR_RESERVED)
|
||||
|
||||
@ -134,4 +134,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
|
||||
*dest = *(volatile uint32_t *)from;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !__HEADER_ARM64_COMMON_ARCH_FUTEX_H */
|
||||
|
||||
@ -215,7 +215,7 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
|
||||
#define PAGE_P2ALIGN 0
|
||||
#define page_offset(addr) __page_offset(addr, PAGE_SIZE)
|
||||
#define page_align(addr) __page_align(addr, PAGE_SIZE)
|
||||
#define page_align_up(addr) __page_align_up((addr, PAGE_SIZE)
|
||||
#define page_align_up(addr) __page_align_up(addr, PAGE_SIZE)
|
||||
|
||||
/*
|
||||
* large page
|
||||
@ -263,6 +263,8 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
|
||||
|
||||
#define PTE_FILEOFF PTE_SPECIAL
|
||||
|
||||
#define PT_ENTRIES (PAGE_SIZE >> 3)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <ihk/types.h>
|
||||
|
||||
@ -25,6 +25,8 @@
|
||||
#define smp_rmb() dmb(ishld)
|
||||
#define smp_wmb() dmb(ishst)
|
||||
|
||||
#define arch_barrier() smp_mb()
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
compiletime_assert_atomic_type(*p); \
|
||||
|
||||
34
arch/arm64/kernel/include/arch_rusage.h
Normal file
34
arch/arm64/kernel/include/arch_rusage.h
Normal file
@ -0,0 +1,34 @@
|
||||
#ifndef ARCH_RUSAGE_H_INCLUDED
|
||||
#define ARCH_RUSAGE_H_INCLUDED
|
||||
|
||||
#include <arch-memory.h>
|
||||
|
||||
//#define DEBUG_RUSAGE
|
||||
|
||||
extern struct rusage_global *rusage;
|
||||
|
||||
#define IHK_OS_PGSIZE_4KB 0
|
||||
#define IHK_OS_PGSIZE_16KB 1
|
||||
#define IHK_OS_PGSIZE_64KB 2
|
||||
|
||||
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
|
||||
{
|
||||
int ret = IHK_OS_PGSIZE_4KB;
|
||||
switch (pgsize) {
|
||||
case __PTL1_SIZE:
|
||||
ret = IHK_OS_PGSIZE_4KB;
|
||||
break;
|
||||
case __PTL2_SIZE:
|
||||
ret = IHK_OS_PGSIZE_16KB;
|
||||
break;
|
||||
case __PTL3_SIZE:
|
||||
ret = IHK_OS_PGSIZE_64KB;
|
||||
break;
|
||||
default:
|
||||
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */
|
||||
@ -6,6 +6,8 @@
|
||||
|
||||
#if defined(CONFIG_HAS_NMI)
|
||||
#include <arm-gic-v3.h>
|
||||
#else /* defined(CONFIG_HAS_NMI) */
|
||||
#include <sysreg.h>
|
||||
#endif /* defined(CONFIG_HAS_NMI) */
|
||||
|
||||
#if defined(CONFIG_HAS_NMI)
|
||||
|
||||
@ -35,6 +35,8 @@
|
||||
#define MIDR_IMPLEMENTOR(midr) \
|
||||
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
|
||||
|
||||
#define ARM_CPU_IMP_CAVIUM 0x43
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
static unsigned int read_cpuid_id(void)
|
||||
|
||||
@ -73,6 +73,7 @@ static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->regs[5]
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->regs[0]
|
||||
#define ihk_mc_syscall_number(uc) (uc)->regs[8]
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->pc
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->sp
|
||||
|
||||
@ -144,5 +144,3 @@ SYSCALL_HANDLED(1045, signalfd)
|
||||
SYSCALL_DELEGATED(1049, stat)
|
||||
SYSCALL_DELEGATED(1060, getpgrp)
|
||||
SYSCALL_DELEGATED(1062, time)
|
||||
SYSCALL_HANDLED(1071, vfork)
|
||||
SYSCALL_DELEGATED(1079, fork)
|
||||
|
||||
@ -10,6 +10,8 @@
|
||||
|
||||
//#define DEBUG_GICV3
|
||||
|
||||
#define USE_CAVIUM_THUNDER_X
|
||||
|
||||
#ifdef DEBUG_GICV3
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
@ -18,6 +20,10 @@
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#ifdef USE_CAVIUM_THUNDER_X
|
||||
static char is_cavium_thunderx = 0;
|
||||
#endif
|
||||
|
||||
void *dist_base;
|
||||
void *rdist_base[NR_CPUS];
|
||||
|
||||
@ -108,8 +114,8 @@ static uint64_t gic_read_iar_cavium_thunderx(void)
|
||||
asm volatile("nop;nop;nop;nop;");
|
||||
asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
|
||||
asm volatile("nop;nop;nop;nop;");
|
||||
mb();
|
||||
#endif /* CONFIG_HAS_NMI */
|
||||
mb();
|
||||
|
||||
return irqstat;
|
||||
}
|
||||
@ -118,7 +124,7 @@ static uint64_t gic_read_iar_cavium_thunderx(void)
|
||||
static uint64_t gic_read_iar(void)
|
||||
{
|
||||
#ifdef USE_CAVIUM_THUNDER_X
|
||||
if (static_key_false(&is_cavium_thunderx))
|
||||
if (is_cavium_thunderx)
|
||||
return gic_read_iar_cavium_thunderx();
|
||||
else
|
||||
#endif
|
||||
@ -266,6 +272,7 @@ void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
|
||||
{
|
||||
dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid);
|
||||
|
||||
barrier();
|
||||
if(vector < 16){
|
||||
// send SGI
|
||||
arm64_raise_sgi_gicv3(cpuid, vector);
|
||||
@ -304,7 +311,9 @@ void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
|
||||
|
||||
#ifdef USE_CAVIUM_THUNDER_X
|
||||
/* Cavium ThunderX erratum 23154 */
|
||||
gicv3_check_capabilities();
|
||||
if (MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_CAVIUM) {
|
||||
is_cavium_thunderx = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -399,6 +408,7 @@ void gic_enable_gicv3(void)
|
||||
/* Set specific IPI to NMI */
|
||||
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
|
||||
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP);
|
||||
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
|
||||
|
||||
/* sync wait */
|
||||
gic_do_wait_for_rwp(rbase);
|
||||
|
||||
@ -20,10 +20,11 @@ size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
|
||||
void init_processors_local(int max_id)
|
||||
{
|
||||
int i = 0;
|
||||
const int sz = (max_id + 1) * KERNEL_STACK_SIZE;
|
||||
union arm64_cpu_local_variables *tmp;
|
||||
|
||||
/* allocate one more for alignment */
|
||||
locals = ihk_mc_alloc_pages((max_id + 1) * (KERNEL_STACK_SIZE / PAGE_SIZE), IHK_MC_AP_CRITICAL);
|
||||
locals = ihk_mc_alloc_pages(((sz + PAGE_SIZE - 1) / PAGE_SIZE), IHK_MC_AP_CRITICAL);
|
||||
locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE);
|
||||
|
||||
/* clear struct process, struct process_vm, struct thread_info area */
|
||||
|
||||
@ -1760,6 +1760,12 @@ int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift,
|
||||
return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args);
|
||||
}
|
||||
|
||||
int visit_pte_range_safe(page_table_t pt, void *start0, void *end0, int pgshift,
|
||||
enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct clear_range_args {
|
||||
int free_physical;
|
||||
struct memobj *memobj;
|
||||
@ -1801,7 +1807,6 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys), npages);
|
||||
dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base);
|
||||
}
|
||||
args->vm->currss -= PTL1_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -1881,7 +1886,6 @@ static int clear_range_middle(void *args0, pte_t *ptep, uint64_t base,
|
||||
ihk_mc_free_pages_user(phys_to_virt(phys), npages);
|
||||
dkprintf("%s(level=%d): freeing large page at 0x%lx\n", __FUNCTION__, level, base);
|
||||
}
|
||||
args->vm->currss -= tbl.pgsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@ -965,7 +965,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
|
||||
proc->status = PS_TRACED;
|
||||
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
|
||||
thread->status = PS_TRACED;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL_MASK;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL;
|
||||
if (sig == SIGSTOP || sig == SIGTSTP ||
|
||||
sig == SIGTTIN || sig == SIGTTOU) {
|
||||
proc->signal_flags |= SIGNAL_STOP_STOPPED;
|
||||
|
||||
@ -12,6 +12,8 @@
|
||||
#include <lwk/compiler.h>
|
||||
#include <hwcap.h>
|
||||
#include <prctl.h>
|
||||
#include <limits.h>
|
||||
#include <syscall.h>
|
||||
|
||||
extern void ptrace_report_signal(struct thread *thread, int sig);
|
||||
extern void clear_single_step(struct thread *thread);
|
||||
@ -1321,6 +1323,17 @@ interrupt_from_user(void *regs0)
|
||||
return((regs->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t);
|
||||
}
|
||||
|
||||
void save_syscall_return_value(int num, unsigned long rc)
|
||||
{
|
||||
/*
|
||||
* Save syscall return value.
|
||||
*/
|
||||
if (cpu_local_var(current) && cpu_local_var(current)->uctx &&
|
||||
num != __NR_rt_sigsuspend) {
|
||||
ihk_mc_syscall_arg0(cpu_local_var(current)->uctx) = rc;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
check_signal(unsigned long rc, void *regs0, int num)
|
||||
{
|
||||
@ -1345,16 +1358,6 @@ __check_signal(unsigned long rc, void *regs0, int num, int irq_disabled)
|
||||
return;
|
||||
thread = cpu_local_var(current);
|
||||
|
||||
/**
|
||||
* If check_signal is called from syscall(),
|
||||
* then save syscall return value.
|
||||
*/
|
||||
if((regs == NULL)&&(num != __NR_rt_sigsuspend)){ /* It's call from syscall! */
|
||||
// Get user context through current thread
|
||||
// and update syscall return.
|
||||
ihk_mc_syscall_arg0(thread->uctx) = rc;
|
||||
}
|
||||
|
||||
if(thread == NULL || thread->proc->pid == 0){
|
||||
struct thread *t;
|
||||
irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock)));
|
||||
@ -1866,4 +1869,645 @@ save_uctx(void *uctx, struct pt_regs *regs)
|
||||
/* TODO: skeleton for UTI */
|
||||
}
|
||||
|
||||
int do_process_vm_read_writev(int pid,
|
||||
const struct iovec *local_iov,
|
||||
unsigned long liovcnt,
|
||||
const struct iovec *remote_iov,
|
||||
unsigned long riovcnt,
|
||||
unsigned long flags,
|
||||
int op)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
int li, ri;
|
||||
int pli, pri;
|
||||
off_t loff, roff;
|
||||
size_t llen = 0, rlen = 0;
|
||||
size_t copied = 0;
|
||||
size_t to_copy;
|
||||
struct thread *lthread = cpu_local_var(current);
|
||||
struct process *rproc;
|
||||
struct process *lproc = lthread->proc;
|
||||
struct process_vm *rvm = NULL;
|
||||
unsigned long rphys;
|
||||
unsigned long rpage_left;
|
||||
unsigned long psize;
|
||||
void *rva;
|
||||
struct vm_range *range;
|
||||
struct mcs_rwlock_node_irqsave lock;
|
||||
struct mcs_rwlock_node update_lock;
|
||||
|
||||
/* Sanity checks */
|
||||
if (flags) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (liovcnt > IOV_MAX || riovcnt > IOV_MAX) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check if parameters are okay */
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov,
|
||||
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)remote_iov,
|
||||
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
arg_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (li = 0; li < liovcnt; ++li) {
|
||||
llen += local_iov[li].iov_len;
|
||||
dkprintf("local_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
li, local_iov[li].iov_base, local_iov[li].iov_len);
|
||||
}
|
||||
|
||||
for (ri = 0; ri < riovcnt; ++ri) {
|
||||
rlen += remote_iov[ri].iov_len;
|
||||
dkprintf("remote_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
ri, remote_iov[ri].iov_base, remote_iov[ri].iov_len);
|
||||
}
|
||||
|
||||
if (llen != rlen) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Find remote process */
|
||||
rproc = find_process(pid, &lock);
|
||||
if (!rproc) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mcs_rwlock_reader_lock_noirq(&rproc->update_lock, &update_lock);
|
||||
if(rproc->status == PS_EXITED ||
|
||||
rproc->status == PS_ZOMBIE){
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
rvm = rproc->vm;
|
||||
hold_process_vm(rvm);
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
|
||||
if (lproc->euid != 0 &&
|
||||
(lproc->ruid != rproc->ruid ||
|
||||
lproc->ruid != rproc->euid ||
|
||||
lproc->ruid != rproc->suid ||
|
||||
lproc->rgid != rproc->rgid ||
|
||||
lproc->rgid != rproc->egid ||
|
||||
lproc->rgid != rproc->sgid)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("pid %d found, doing %s: liovcnt: %d, riovcnt: %d \n", pid,
|
||||
(op == PROCESS_VM_READ) ? "PROCESS_VM_READ" : "PROCESS_VM_WRITE",
|
||||
liovcnt, riovcnt);
|
||||
|
||||
pli = pri = -1; /* Previous indeces in iovecs */
|
||||
li = ri = 0; /* Current indeces in iovecs */
|
||||
loff = roff = 0; /* Offsets in current iovec */
|
||||
|
||||
/* Now iterate and do the copy */
|
||||
while (copied < llen) {
|
||||
int faulted = 0;
|
||||
|
||||
/* New local vector? */
|
||||
if (pli != li) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + 1));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_WRITE : VR_PROT_READ))) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pli_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pli = li;
|
||||
}
|
||||
|
||||
/* New remote vector? */
|
||||
if (pri != ri) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + 1));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + remote_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_READ : VR_PROT_WRITE))) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pri_out:
|
||||
ihk_mc_spinlock_unlock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pri = ri;
|
||||
}
|
||||
|
||||
/* Figure out how much we can copy at most in this iteration */
|
||||
to_copy = (local_iov[li].iov_len - loff);
|
||||
if ((remote_iov[ri].iov_len - roff) < to_copy) {
|
||||
to_copy = remote_iov[ri].iov_len - roff;
|
||||
}
|
||||
|
||||
retry_lookup:
|
||||
/* TODO: remember page and do this only if necessary */
|
||||
ret = ihk_mc_pt_virt_to_phys_size(rvm->address_space->page_table,
|
||||
remote_iov[ri].iov_base + roff, &rphys, &psize);
|
||||
|
||||
if (ret) {
|
||||
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
void *addr;
|
||||
|
||||
if (faulted) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Fault in pages */
|
||||
for (addr = (void *)
|
||||
(((unsigned long)remote_iov[ri].iov_base + roff)
|
||||
& PAGE_MASK);
|
||||
addr < (remote_iov[ri].iov_base + roff + to_copy);
|
||||
addr += PAGE_SIZE) {
|
||||
|
||||
ret = page_fault_process_vm(rvm, addr, reason);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
faulted = 1;
|
||||
goto retry_lookup;
|
||||
}
|
||||
|
||||
rpage_left = ((((unsigned long)remote_iov[ri].iov_base + roff +
|
||||
psize) & ~(psize - 1)) -
|
||||
((unsigned long)remote_iov[ri].iov_base + roff));
|
||||
if (rpage_left < to_copy) {
|
||||
to_copy = rpage_left;
|
||||
}
|
||||
|
||||
rva = phys_to_virt(rphys);
|
||||
|
||||
fast_memcpy(
|
||||
(op == PROCESS_VM_READ) ? local_iov[li].iov_base + loff : rva,
|
||||
(op == PROCESS_VM_READ) ? rva : local_iov[li].iov_base + loff,
|
||||
to_copy);
|
||||
|
||||
copied += to_copy;
|
||||
dkprintf("local_iov[%d]: 0x%lx %s remote_iov[%d]: 0x%lx, %lu copied, psize: %lu, rpage_left: %lu\n",
|
||||
li, local_iov[li].iov_base + loff,
|
||||
(op == PROCESS_VM_READ) ? "<-" : "->",
|
||||
ri, remote_iov[ri].iov_base + roff, to_copy,
|
||||
psize, rpage_left);
|
||||
|
||||
loff += to_copy;
|
||||
roff += to_copy;
|
||||
|
||||
if (loff == local_iov[li].iov_len) {
|
||||
li++;
|
||||
loff = 0;
|
||||
}
|
||||
|
||||
if (roff == remote_iov[ri].iov_len) {
|
||||
ri++;
|
||||
roff = 0;
|
||||
}
|
||||
}
|
||||
|
||||
release_process_vm(rvm);
|
||||
|
||||
return copied;
|
||||
|
||||
out:
|
||||
if(rvm)
|
||||
release_process_vm(rvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
|
||||
{
|
||||
int i, i_s, i_e, phase = 1;
|
||||
struct move_pages_smp_req *mpsr =
|
||||
(struct move_pages_smp_req *)arg;
|
||||
struct process_vm *vm = mpsr->proc->vm;
|
||||
int count = mpsr->count;
|
||||
struct page_table *save_pt;
|
||||
extern struct page_table *get_init_page_table(void);
|
||||
|
||||
i_s = (count / nr_cpus) * cpu_index;
|
||||
i_e = i_s + (count / nr_cpus);
|
||||
if (cpu_index == (nr_cpus - 1)) {
|
||||
i_e = count;
|
||||
}
|
||||
|
||||
/* Load target process' PT so that we can access user-space */
|
||||
save_pt = cpu_local_var(current) == &cpu_local_var(idle) ?
|
||||
get_init_page_table() :
|
||||
cpu_local_var(current)->vm->address_space->page_table;
|
||||
|
||||
if (save_pt != vm->address_space->page_table) {
|
||||
ihk_mc_load_page_table(vm->address_space->page_table);
|
||||
}
|
||||
else {
|
||||
save_pt = NULL;
|
||||
}
|
||||
|
||||
if (nr_cpus == 1) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus > 1 && nr_cpus < 4) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
case 1:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 4 && nr_cpus < 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 3:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->virt_addr + (count / 2),
|
||||
mpsr->user_virt_addr + (count / 2),
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 3:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 4:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
break;
|
||||
case 5:
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 6:
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 7:
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (!(volatile int)mpsr->nodes_ready) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
/* NUMA verification in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
if (mpsr->nodes[i] < 0 ||
|
||||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
|
||||
!test_bit(mpsr->nodes[i],
|
||||
mpsr->proc->vm->numa_mask)) {
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* PTE lookup in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
void *phys;
|
||||
size_t pgsize;
|
||||
int p2align;
|
||||
/*
|
||||
* XXX: No page structures for anonymous mappings.
|
||||
* Look up physical addresses by scanning page tables.
|
||||
*/
|
||||
mpsr->ptep[i] = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
(void *)mpsr->virt_addr[i], 0, &phys, &pgsize, &p2align);
|
||||
|
||||
/* PTE valid? */
|
||||
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
|
||||
mpsr->status[i] = -ENOENT;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* PTE is file? */
|
||||
if (pte_is_fileoff(mpsr->ptep[i], PAGE_SIZE)) {
|
||||
mpsr->status[i] = -EINVAL;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu requested to be moved to node %d\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i], pgsize, mpsr->nodes[i]);
|
||||
|
||||
/* Large page? */
|
||||
if (pgsize > PAGE_SIZE) {
|
||||
int nr_sub_pages = (pgsize / PAGE_SIZE);
|
||||
int j;
|
||||
|
||||
if (i + nr_sub_pages > count) {
|
||||
kprintf("%s: ERROR: page at index %d exceeds the region\n",
|
||||
__FUNCTION__, i);
|
||||
mpsr->status[i] = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Is it contiguous across nr_sub_pages and all
|
||||
* requested to be moved to the same target node? */
|
||||
for (j = 0; j < nr_sub_pages; ++j) {
|
||||
if (mpsr->virt_addr[i + j] !=
|
||||
(mpsr->virt_addr[i] + (j * PAGE_SIZE)) ||
|
||||
mpsr->nodes[i] != mpsr->nodes[i + j]) {
|
||||
kprintf("%s: ERROR: virt address or node at index %d"
|
||||
" is inconsistent\n",
|
||||
__FUNCTION__, i + j);
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
goto pte_out;
|
||||
}
|
||||
}
|
||||
|
||||
mpsr->nr_pages[i] = nr_sub_pages;
|
||||
i += (nr_sub_pages - 1);
|
||||
}
|
||||
else {
|
||||
mpsr->nr_pages[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
pte_out:
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
if (cpu_index == 0) {
|
||||
/* Allocate new pages on target NUMA nodes */
|
||||
for (i = 0; i < count; i++) {
|
||||
int pgalign = 0;
|
||||
int j;
|
||||
void *dst;
|
||||
|
||||
if (!mpsr->ptep[i] || mpsr->status[i] < 0 || !mpsr->nr_pages[i])
|
||||
continue;
|
||||
|
||||
/* TODO: store pgalign info in an array as well? */
|
||||
if (mpsr->nr_pages[i] > 1) {
|
||||
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
|
||||
pgalign = PTL2_SHIFT - PTL1_SHIFT;
|
||||
}
|
||||
|
||||
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],
|
||||
pgalign, IHK_MC_AP_USER, mpsr->nodes[i]);
|
||||
|
||||
if (!dst) {
|
||||
mpsr->status[i] = -ENOMEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = i; j < (i + mpsr->nr_pages[i]); ++j) {
|
||||
mpsr->status[j] = mpsr->nodes[i];
|
||||
}
|
||||
|
||||
mpsr->dst_phys[i] = virt_to_phys(dst);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu to node %d, pgalign: %d,"
|
||||
" allocated phys: 0x%lx\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->nodes[i], pgalign, mpsr->dst_phys[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* Copy, PTE update, memfree in parallel */
|
||||
for (i = i_s; i < i_e; ++i) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
fast_memcpy(phys_to_virt(mpsr->dst_phys[i]),
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i] * PAGE_SIZE);
|
||||
|
||||
ihk_mc_free_pages(
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i]);
|
||||
|
||||
pte_update_phys(mpsr->ptep[i], mpsr->dst_phys[i]);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu copied and remapped to phys: 0x%lu\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->dst_phys[i]);
|
||||
}
|
||||
|
||||
/* XXX: do a separate SMP call with only CPUs running threads
|
||||
* of this process? */
|
||||
if (cpu_local_var(current)->proc == mpsr->proc) {
|
||||
/* Invalidate all TLBs */
|
||||
for (i = 0; i < mpsr->count; i++) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
flush_tlb_single((unsigned long)mpsr->virt_addr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (save_pt) {
|
||||
ihk_mc_load_page_table(save_pt);
|
||||
}
|
||||
|
||||
return mpsr->phase_ret;
|
||||
}
|
||||
|
||||
time_t time(void) {
|
||||
struct timespec ats;
|
||||
|
||||
if (gettime_local_support) {
|
||||
calculate_time_from_tsc(&ats);
|
||||
return ats.tv_sec;
|
||||
}
|
||||
|
||||
return (time_t)0;
|
||||
}
|
||||
|
||||
/*** End of File ***/
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
|
||||
# IHK_OBJS added coredump.o
|
||||
ifeq ($(ARCH), arm64)
|
||||
IHK_OBJS += coredump.o
|
||||
endif
|
||||
2
arch/x86_64/kernel/Makefile.arch.in
Normal file
2
arch/x86_64/kernel/Makefile.arch.in
Normal file
@ -0,0 +1,2 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
@ -849,6 +849,7 @@ void setup_x86_ap(void (*next_func)(void))
|
||||
void arch_show_interrupt_context(const void *reg);
|
||||
void set_signal(int sig, void *regs, struct siginfo *info);
|
||||
void check_signal(unsigned long, void *, int);
|
||||
void check_sig_pending();
|
||||
extern void tlb_flush_handler(int vector);
|
||||
|
||||
void __show_stack(uintptr_t *sp) {
|
||||
@ -870,6 +871,19 @@ void show_context_stack(uintptr_t *rbp) {
|
||||
return;
|
||||
}
|
||||
|
||||
void interrupt_exit(struct x86_user_context *regs)
|
||||
{
|
||||
if (interrupt_from_user(regs)) {
|
||||
cpu_enable_interrupt();
|
||||
check_sig_pending();
|
||||
check_need_resched();
|
||||
check_signal(0, regs, 0);
|
||||
}
|
||||
else {
|
||||
check_sig_pending();
|
||||
}
|
||||
}
|
||||
|
||||
void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
{
|
||||
struct ihk_mc_interrupt_handler *h;
|
||||
@ -992,12 +1006,8 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
}
|
||||
}
|
||||
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
interrupt_exit(regs);
|
||||
set_cputime(interrupt_from_user(regs)? 0: 1);
|
||||
|
||||
--v->in_interrupt;
|
||||
}
|
||||
@ -1012,13 +1022,9 @@ void gpe_handler(struct x86_user_context *regs)
|
||||
panic("gpe_handler");
|
||||
}
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
// panic("GPF");
|
||||
interrupt_exit(regs);
|
||||
set_cputime(interrupt_from_user(regs)? 0: 1);
|
||||
panic("GPF");
|
||||
}
|
||||
|
||||
void debug_handler(struct x86_user_context *regs)
|
||||
@ -1045,12 +1051,8 @@ void debug_handler(struct x86_user_context *regs)
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = si_code;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
interrupt_exit(regs);
|
||||
set_cputime(interrupt_from_user(regs)? 0: 1);
|
||||
}
|
||||
|
||||
void int3_handler(struct x86_user_context *regs)
|
||||
@ -1067,12 +1069,8 @@ void int3_handler(struct x86_user_context *regs)
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = TRAP_BRKPT;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
interrupt_exit(regs);
|
||||
set_cputime(interrupt_from_user(regs)? 0: 1);
|
||||
}
|
||||
|
||||
void
|
||||
@ -1081,7 +1079,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||
|
||||
@ -1095,17 +1092,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
(error & PF_RSVD ? "was" : "wasn't"),
|
||||
(error & PF_INSTR ? "was" : "wasn't"));
|
||||
|
||||
found = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= address && range->end > address) {
|
||||
found = 1;
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
range = lookup_process_memory_range(vm, address, address+1);
|
||||
if (range) {
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
} else {
|
||||
__kprintf("address is out of range! \n");
|
||||
}
|
||||
|
||||
@ -1233,6 +1225,13 @@ void cpu_pause(void)
|
||||
asm volatile("pause" ::: "memory");
|
||||
}
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/arch/x86/include/asm/processor.h */
|
||||
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
|
||||
void cpu_relax(void)
|
||||
{
|
||||
asm volatile("rep; nop" ::: "memory");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
@ -1473,29 +1472,91 @@ void ihk_mc_delay_us(int us)
|
||||
arch_delay(us);
|
||||
}
|
||||
|
||||
#define EXTENDED_ARCH_SHOW_CONTEXT
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
void arch_show_extended_context(void)
|
||||
{
|
||||
unsigned long cr0, cr4, msr, xcr0;
|
||||
unsigned long cr0, cr4, msr, xcr0 = 0;
|
||||
|
||||
/* Read and print CRs, MSR_EFER, XCR0 */
|
||||
asm volatile("movq %%cr0, %0" : "=r"(cr0));
|
||||
asm volatile("movq %%cr4, %0" : "=r"(cr4));
|
||||
msr = rdmsr(MSR_EFER);
|
||||
xcr0 = xgetbv(0);
|
||||
|
||||
if (xsave_available) {
|
||||
xcr0 = xgetbv(0);
|
||||
}
|
||||
__kprintf("\n CR0 CR4\n");
|
||||
__kprintf("%016lX %016lX\n", cr0, cr4);
|
||||
|
||||
__kprintf(" MSR_EFER\n");
|
||||
__kprintf("%016lX\n", msr);
|
||||
|
||||
__kprintf(" XCR0\n");
|
||||
__kprintf("%016lX\n", xcr0);
|
||||
|
||||
if (xsave_available) {
|
||||
__kprintf(" XCR0\n");
|
||||
__kprintf("%016lX\n", xcr0);
|
||||
}
|
||||
}
|
||||
|
||||
struct stack {
|
||||
struct stack *rbp;
|
||||
unsigned long eip;
|
||||
};
|
||||
|
||||
/* KPRINTF_LOCAL_BUF_LEN is 1024, useless to go further */
|
||||
#define STACK_BUF_LEN (1024-sizeof("[ 0]: "))
|
||||
static void __print_stack(struct stack *rbp, unsigned long first) {
|
||||
char buf[STACK_BUF_LEN];
|
||||
size_t len;
|
||||
|
||||
/* Build string in buffer to output a single line */
|
||||
len = snprintf(buf, STACK_BUF_LEN,
|
||||
"addr2line -e smp-x86/kernel/mckernel.img -fpia");
|
||||
|
||||
if (first)
|
||||
len += snprintf(buf + len, STACK_BUF_LEN - len,
|
||||
" %#16lx", first);
|
||||
|
||||
while ((unsigned long)rbp > 0xffff880000000000 &&
|
||||
STACK_BUF_LEN - len > sizeof(" 0x0123456789abcdef")) {
|
||||
len += snprintf(buf + len, STACK_BUF_LEN - len,
|
||||
" %#16lx", rbp->eip);
|
||||
rbp = rbp->rbp;
|
||||
}
|
||||
__kprintf("%s\n", buf);
|
||||
}
|
||||
|
||||
void arch_print_pre_interrupt_stack(const struct x86_basic_regs *regs) {
|
||||
struct stack *rbp;
|
||||
|
||||
/* only for kernel stack */
|
||||
if (regs->error & PF_USER)
|
||||
return;
|
||||
|
||||
__kprintf("Pre-interrupt stack trace:\n");
|
||||
|
||||
/* interrupt stack heuristics:
|
||||
* - the first entry looks like it is always garbage, so skip.
|
||||
* (that is done by taking regs->rsp instead of ®s->rsp)
|
||||
* - that still looks sometimes wrong. For now, if it is not
|
||||
* within 64k of itself, look for the next entry that matches.
|
||||
*/
|
||||
|
||||
rbp = (struct stack*)regs->rsp;
|
||||
|
||||
while ((uintptr_t)rbp > (uintptr_t)rbp->rbp
|
||||
|| (uintptr_t)rbp + 0x10000 < (uintptr_t)rbp->rbp)
|
||||
rbp = (struct stack *)(((uintptr_t *)rbp) + 1);
|
||||
|
||||
__print_stack(rbp, regs->rip);
|
||||
}
|
||||
|
||||
void arch_print_stack() {
|
||||
struct stack *rbp;
|
||||
|
||||
__kprintf("Approximative stack trace:\n");
|
||||
|
||||
asm("mov %%rbp, %0" : "=r"(rbp) );
|
||||
|
||||
__print_stack(rbp, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*@
|
||||
@ requires \valid(reg);
|
||||
@ -1526,9 +1587,11 @@ void arch_show_interrupt_context(const void *reg)
|
||||
__kprintf("%16lx %16lx %16lx %16lx\n",
|
||||
regs->cs, regs->ss, regs->rflags, regs->error);
|
||||
|
||||
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
|
||||
arch_show_extended_context();
|
||||
#endif
|
||||
kprintf_unlock(irqflags);
|
||||
return;
|
||||
arch_show_extended_context();
|
||||
|
||||
arch_print_pre_interrupt_stack(regs);
|
||||
|
||||
kprintf_unlock(irqflags);
|
||||
}
|
||||
@ -1651,13 +1714,11 @@ release_fp_regs(struct thread *thread)
|
||||
thread->fp_regs = NULL;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@*/
|
||||
void
|
||||
save_fp_regs(struct thread *thread)
|
||||
static int
|
||||
check_and_allocate_fp_regs(struct thread *thread)
|
||||
{
|
||||
int pages;
|
||||
int pages;
|
||||
int result = 0;
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
@ -1666,12 +1727,26 @@ save_fp_regs(struct thread *thread)
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
kprintf("error: allocating fp_regs pages\n");
|
||||
return;
|
||||
result = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
|
||||
memset(thread->fp_regs, 0, pages * PAGE_SIZE);
|
||||
}
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@*/
|
||||
void
|
||||
save_fp_regs(struct thread *thread)
|
||||
{
|
||||
if (check_and_allocate_fp_regs(thread) != 0) {
|
||||
// alloc error
|
||||
return;
|
||||
}
|
||||
|
||||
if (xsave_available) {
|
||||
unsigned int low, high;
|
||||
@ -1687,6 +1762,13 @@ save_fp_regs(struct thread *thread)
|
||||
}
|
||||
}
|
||||
|
||||
void copy_fp_regs(struct thread *from, struct thread *to)
|
||||
{
|
||||
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
|
||||
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_19
|
||||
void
|
||||
clear_fp_regs(struct thread *thread)
|
||||
@ -289,7 +289,7 @@ int gencore(struct thread *thread, void *regs,
|
||||
Elf64_Ehdr eh;
|
||||
Elf64_Phdr *ph = NULL;
|
||||
void *note = NULL;
|
||||
struct vm_range *range;
|
||||
struct vm_range *range, *next;
|
||||
struct process_vm *vm = thread->vm;
|
||||
int segs = 1; /* the first one is for NOTE */
|
||||
int notesize, phsize, alignednotesize;
|
||||
@ -303,13 +303,18 @@ int gencore(struct thread *thread, void *regs,
|
||||
return -1;
|
||||
}
|
||||
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
|
||||
range->start, range->end, range->flag, range->objoff);
|
||||
/* We omit reserved areas because they are only for
|
||||
mckernel's internal use. */
|
||||
if (range->flag & VR_RESERVED)
|
||||
continue;
|
||||
if (range->flag & VR_DONTDUMP)
|
||||
continue;
|
||||
/* We need a chunk for each page for a demand paging area.
|
||||
This can be optimized for spacial complexity but we would
|
||||
lose simplicity instead. */
|
||||
@ -391,7 +396,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
|
||||
/* program header for each memory chunk */
|
||||
i = 1;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long flag = range->flag;
|
||||
unsigned long size = range->end - range->start;
|
||||
|
||||
@ -432,7 +440,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
|
||||
|
||||
i = 3; /* memory segments */
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
next = lookup_process_memory_range(vm, 0, -1);
|
||||
while ((range = next)) {
|
||||
next = next_process_memory_range(vm, range);
|
||||
|
||||
unsigned long phys;
|
||||
|
||||
if (range->flag & VR_RESERVED)
|
||||
@ -64,7 +64,6 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */
|
||||
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
|
||||
{
|
||||
int op = (encoded_op >> 28) & 7;
|
||||
@ -128,6 +127,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif /* !POSTK_DEBUG_ARCH_DEP_8 */
|
||||
|
||||
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
|
||||
*dest = *(volatile uint32_t *)from;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -14,7 +14,17 @@
|
||||
int __kprintf(const char *format, ...);
|
||||
#endif
|
||||
|
||||
typedef int ihk_spinlock_t;
|
||||
typedef unsigned short __ticket_t;
|
||||
typedef unsigned int __ticketpair_t;
|
||||
|
||||
typedef struct ihk_spinlock {
|
||||
union {
|
||||
__ticketpair_t head_tail;
|
||||
struct __raw_tickets {
|
||||
__ticket_t head, tail;
|
||||
} tickets;
|
||||
};
|
||||
} ihk_spinlock_t;
|
||||
|
||||
extern void preempt_enable(void);
|
||||
extern void preempt_disable(void);
|
||||
@ -23,9 +33,9 @@ extern void preempt_disable(void);
|
||||
|
||||
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
|
||||
{
|
||||
*lock = 0;
|
||||
lock->head_tail = 0;
|
||||
}
|
||||
#define SPIN_LOCK_UNLOCKED 0
|
||||
#define SPIN_LOCK_UNLOCKED { .head_tail = 0 }
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_lock_noirq(l) { \
|
||||
@ -39,40 +49,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
|
||||
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
int inc = 0x00010000;
|
||||
int tmp;
|
||||
|
||||
#if 0
|
||||
asm volatile("lock ; xaddl %0, %1\n"
|
||||
"movzwl %w0, %2\n\t"
|
||||
"shrl $16, %0\n\t"
|
||||
"1:\t"
|
||||
"cmpl %0, %2\n\t"
|
||||
"je 2f\n\t"
|
||||
"rep ; nop\n\t"
|
||||
"movzwl %1, %2\n\t"
|
||||
"jmp 1b\n"
|
||||
"2:"
|
||||
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
|
||||
#endif
|
||||
register struct __raw_tickets inc = { .tail = 0x0002 };
|
||||
|
||||
preempt_disable();
|
||||
|
||||
asm volatile("lock; xaddl %0, %1\n"
|
||||
"movzwl %w0, %2\n\t"
|
||||
"shrl $16, %0\n\t"
|
||||
"1:\t"
|
||||
"cmpl %0, %2\n\t"
|
||||
"je 2f\n\t"
|
||||
"rep ; nop\n\t"
|
||||
"movzwl %1, %2\n\t"
|
||||
/* don't need lfence here, because loads are in-order */
|
||||
"jmp 1b\n"
|
||||
"2:"
|
||||
: "+r" (inc), "+m" (*lock), "=&r" (tmp)
|
||||
:
|
||||
: "memory", "cc");
|
||||
asm volatile ("lock xaddl %0, %1\n"
|
||||
: "+r" (inc), "+m" (*(lock)) : : "memory", "cc");
|
||||
|
||||
if (inc.head == inc.tail)
|
||||
goto out;
|
||||
|
||||
for (;;) {
|
||||
if (*((volatile __ticket_t *)&lock->tickets.head) == inc.tail)
|
||||
goto out;
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
out:
|
||||
barrier(); /* make sure nothing creeps before the lock is taken */
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
@ -106,8 +100,11 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
|
||||
#endif
|
||||
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
|
||||
|
||||
__ticket_t inc = 0x0002;
|
||||
|
||||
asm volatile ("lock addw %1, %0\n"
|
||||
: "+m" (lock->tickets.head) : "ri" (inc) : "memory", "cc");
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@ -134,6 +131,8 @@ typedef struct mcs_lock_node {
|
||||
unsigned long irqsave;
|
||||
} __attribute__((aligned(64))) mcs_lock_node_t;
|
||||
|
||||
typedef mcs_lock_node_t mcs_lock_t;
|
||||
|
||||
static void mcs_lock_init(struct mcs_lock_node *node)
|
||||
{
|
||||
node->locked = 0;
|
||||
@ -40,18 +40,42 @@
|
||||
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
|
||||
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
|
||||
|
||||
#define GB_PAGE_SHIFT 30
|
||||
#define GB_PAGE_SIZE (1UL << GB_PAGE_SHIFT)
|
||||
#define GB_PAGE_MASK (~((unsigned long)GB_PAGE_SIZE - 1))
|
||||
#define GB_PAGE_P2ALIGN (GB_PAGE_SHIFT - PAGE_SHIFT)
|
||||
|
||||
|
||||
#define USER_END 0x0000800000000000UL
|
||||
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
|
||||
|
||||
/*
|
||||
* Canonical negative addresses (i.e., the smallest kernel virtual address)
|
||||
* on x86 64 bit mode (in its most restricted 48 bit format) starts from
|
||||
* 0xffff800000000000, but Linux starts mapping physical memory at 0xffff880000000000.
|
||||
* The 0x80000000000 long gap (8TBs, i.e., 16 PGD level entries in the page tables)
|
||||
* is used for Xen hyervisor (see arch/x86/include/asm/page.h) and that is
|
||||
* what we utilize for McKernel.
|
||||
* This gives us the benefit of being able to use Linux kernel virtual
|
||||
* addresses identically as in Linux.
|
||||
*
|
||||
* NOTE: update these also in eclair.c when modified!
|
||||
*/
|
||||
#define MAP_ST_START 0xffff800000000000UL
|
||||
#define MAP_VMAP_START 0xfffff00000000000UL
|
||||
#define MAP_FIXED_START 0xffffffff70000000UL
|
||||
#define MAP_KERNEL_START 0xffffffff80000000UL
|
||||
#define MAP_VMAP_START 0xffff850000000000UL
|
||||
#define MAP_FIXED_START 0xffff860000000000UL
|
||||
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
|
||||
/*
|
||||
* MAP_KERNEL_START is 8MB below MODULES_END in Linux.
|
||||
* Placing the LWK image in the virtual address space at the end of
|
||||
* the Linux modules section enables us to map the LWK TEXT in Linux
|
||||
* as well, so that Linux can also call into LWK text.
|
||||
*/
|
||||
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
|
||||
#define STACK_TOP(region) ((region)->user_end)
|
||||
|
||||
#define MAP_VMAP_SIZE 0x0000000100000000UL
|
||||
|
||||
#define KERNEL_PHYS_OFFSET MAP_ST_START
|
||||
|
||||
#define PTL4_SHIFT 39
|
||||
#define PTL4_SIZE (1UL << PTL4_SHIFT)
|
||||
#define PTL3_SHIFT 30
|
||||
@ -13,16 +13,16 @@
|
||||
#ifndef ARCH_CPU_H
|
||||
#define ARCH_CPU_H
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
#define arch_barrier() asm volatile("" : : : "memory")
|
||||
|
||||
static inline void rmb(void)
|
||||
{
|
||||
barrier();
|
||||
arch_barrier();
|
||||
}
|
||||
|
||||
static inline void wmb(void)
|
||||
{
|
||||
barrier();
|
||||
arch_barrier();
|
||||
}
|
||||
|
||||
static unsigned long read_tsc(void)
|
||||
@ -133,7 +133,7 @@ static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
|
||||
* Note 2: xchg has side effect, so that attribute volatile is necessary,
|
||||
* but generally the primitive is invalid, *ptr is output argument. --ANK
|
||||
*/
|
||||
#define __xg(x) ((volatile long *)(x))
|
||||
#define __xg(x) ((volatile typeof(x))(x))
|
||||
|
||||
#define xchg4(ptr, x) \
|
||||
({ \
|
||||
@ -49,6 +49,7 @@ typedef struct x86_user_context ihk_mc_user_context_t;
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
|
||||
#define ihk_mc_syscall_number(uc) (uc)->gpr.orig_rax
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
|
||||
@ -189,9 +189,30 @@ struct tss64 {
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_basic_regs {
|
||||
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
|
||||
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
|
||||
unsigned long rip, cs, rflags, rsp, ss;
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
unsigned long rbp;
|
||||
unsigned long rbx;
|
||||
unsigned long r11;
|
||||
unsigned long r10;
|
||||
unsigned long r9;
|
||||
unsigned long r8;
|
||||
unsigned long rax;
|
||||
unsigned long rcx;
|
||||
unsigned long rdx;
|
||||
unsigned long rsi;
|
||||
unsigned long rdi;
|
||||
union {
|
||||
unsigned long orig_rax; /* syscall */
|
||||
unsigned long error; /* interrupts */
|
||||
};
|
||||
unsigned long rip;
|
||||
unsigned long cs;
|
||||
unsigned long rflags;
|
||||
unsigned long rsp;
|
||||
unsigned long ss;
|
||||
};
|
||||
|
||||
struct x86_sregs {
|
||||
@ -18,6 +18,11 @@
|
||||
#define _NSIG_BPW 64
|
||||
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
|
||||
|
||||
static inline int valid_signal(unsigned long sig)
|
||||
{
|
||||
return sig <= _NSIG ? 1 : 0;
|
||||
}
|
||||
|
||||
typedef unsigned long int __sigset_t;
|
||||
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))
|
||||
|
||||
@ -39,7 +39,7 @@ SYSCALL_HANDLED(15, rt_sigreturn)
|
||||
SYSCALL_HANDLED(16, ioctl)
|
||||
SYSCALL_DELEGATED(17, pread64)
|
||||
SYSCALL_DELEGATED(18, pwrite64)
|
||||
SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_HANDLED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_DELEGATED(23, select)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
@ -56,7 +56,7 @@ SYSCALL_HANDLED(36, getitimer)
|
||||
SYSCALL_HANDLED(38, setitimer)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
SYSCALL_HANDLED(56, clone)
|
||||
SYSCALL_DELEGATED(57, fork)
|
||||
SYSCALL_HANDLED(57, fork)
|
||||
SYSCALL_HANDLED(58, vfork)
|
||||
SYSCALL_HANDLED(59, execve)
|
||||
SYSCALL_HANDLED(60, exit)
|
||||
@ -145,6 +145,8 @@ nmi:
|
||||
movq %rsp,%gs:PANIC_REGS+0x08
|
||||
|
||||
movl nmi_mode(%rip),%eax
|
||||
cmp $3,%rax
|
||||
je 4f
|
||||
cmp $1,%rax
|
||||
je 1f
|
||||
cmp $2,%rax
|
||||
@ -199,9 +201,9 @@ nmi:
|
||||
movl %eax,%gs:PANIC_REGS+0xA0
|
||||
movq $1,%gs:PANICED
|
||||
call ihk_mc_query_mem_areas
|
||||
1:
|
||||
4:
|
||||
hlt
|
||||
jmp 1b
|
||||
jmp 4b
|
||||
|
||||
.globl x86_syscall
|
||||
x86_syscall:
|
||||
@ -107,9 +107,17 @@ void init_boot_processor_local(void)
|
||||
@ ensures \result == %gs;
|
||||
@ assigns \nothing;
|
||||
*/
|
||||
extern int num_processors;
|
||||
int ihk_mc_get_processor_id(void)
|
||||
{
|
||||
int id;
|
||||
void *gs;
|
||||
|
||||
gs = (void *)rdmsr(MSR_GS_BASE);
|
||||
if (gs < (void *)locals ||
|
||||
gs > ((void *)locals + LOCALS_SPAN * num_processors)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
asm volatile("movl %%gs:0, %0" : "=r"(id));
|
||||
|
||||
@ -41,6 +41,8 @@ extern char _head[], _end[];
|
||||
|
||||
extern unsigned long x86_kernel_phys_base;
|
||||
|
||||
int safe_kernel_map = 0;
|
||||
|
||||
/* Arch specific early allocation routine */
|
||||
void *early_alloc_pages(int nr_pages)
|
||||
{
|
||||
@ -109,6 +111,7 @@ struct page_table {
|
||||
};
|
||||
|
||||
static struct page_table *init_pt;
|
||||
static int init_pt_loaded = 0;
|
||||
static ihk_spinlock_t init_pt_lock;
|
||||
|
||||
static int use_1gb_page = 0;
|
||||
@ -172,19 +175,23 @@ static void init_normal_area(struct page_table *pt)
|
||||
unsigned long map_start, map_end, phys, pt_phys;
|
||||
int ident_index, virt_index;
|
||||
|
||||
map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0);
|
||||
/*
|
||||
* This has to start from 0x00, see load_file() in IHK-SMP.
|
||||
* For security reasons, we could skip holes in the LWK
|
||||
* assigned physical memory, but Linux mappings already map
|
||||
* those anyway.
|
||||
*/
|
||||
map_start = 0;
|
||||
map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0);
|
||||
|
||||
kprintf("map_start = %lx, map_end = %lx\n", map_start, map_end);
|
||||
ident_index = map_start >> PTL4_SHIFT;
|
||||
virt_index = (MAP_ST_START >> PTL4_SHIFT) & (PT_ENTRIES - 1);
|
||||
|
||||
memset(pt, 0, sizeof(struct page_table));
|
||||
|
||||
for (phys = (map_start & ~(PTL4_SIZE - 1)); phys < map_end;
|
||||
phys += PTL4_SIZE) {
|
||||
pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL), phys,
|
||||
map_start, map_end);
|
||||
for (phys = map_start; phys < map_end; phys += PTL4_SIZE) {
|
||||
pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL),
|
||||
phys, map_start, map_end);
|
||||
|
||||
pt->entry[ident_index++] = pt_phys | PFL4_PDIR_ATTR;
|
||||
pt->entry[virt_index++] = pt_phys | PFL4_PDIR_ATTR;
|
||||
@ -493,7 +500,7 @@ uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt)
|
||||
|
||||
error = ihk_mc_pt_virt_to_phys(pt, (void *)virt, &phys);
|
||||
if (error) {
|
||||
return 0;
|
||||
return PM_PSHIFT(PAGE_SHIFT);
|
||||
}
|
||||
|
||||
pagemap = PM_PFRAME(phys >> PAGE_SHIFT);
|
||||
@ -724,6 +731,26 @@ static void destroy_page_table(int level, struct page_table *pt)
|
||||
return;
|
||||
}
|
||||
|
||||
void ihk_mc_pt_destroy_pgd_subtree(struct page_table *pt, void *virt)
|
||||
{
|
||||
int l4idx, l3idx, l2idx, l1idx;
|
||||
unsigned long v = (unsigned long)virt;
|
||||
struct page_table *lower;
|
||||
|
||||
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
|
||||
|
||||
if (!(pt->entry[l4idx] & PF_PRESENT))
|
||||
return;
|
||||
|
||||
lower = (struct page_table *)
|
||||
phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK);
|
||||
destroy_page_table(3, lower);
|
||||
|
||||
pt->entry[l4idx] = 0;
|
||||
dkprintf("%s: virt: 0x%lx, l4idx: %d subtree destroyed\n",
|
||||
__FUNCTION__, virt, l4idx);
|
||||
}
|
||||
|
||||
void ihk_mc_pt_destroy(struct page_table *pt)
|
||||
{
|
||||
const int level = 4; /* PML4 */
|
||||
@ -1542,7 +1569,6 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL1_SIZE, PTL1_SIZE);
|
||||
rusage_memory_stat_sub(args->memobj, PTL1_SIZE, PTL1_SIZE);
|
||||
}
|
||||
args->vm->currss -= PTL1_SIZE;
|
||||
} else {
|
||||
dkprintf("%s: !calling memory_stat_rss_sub(),virt=%lx,phys=%lx\n", __FUNCTION__, base, pte_get_phys(&old));
|
||||
}
|
||||
@ -1611,7 +1637,6 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL2_SIZE, PTL2_SIZE);
|
||||
rusage_memory_stat_sub(args->memobj, PTL2_SIZE, PTL2_SIZE);
|
||||
}
|
||||
args->vm->currss -= PTL2_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1693,7 +1718,6 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
|
||||
dkprintf("%lx-,%s: calling memory_stat_rss_sub(),phys=%lx,size=%ld,pgsize=%ld\n", pte_get_phys(&old), __FUNCTION__, pte_get_phys(&old), PTL3_SIZE, PTL3_SIZE);
|
||||
rusage_memory_stat_sub(args->memobj, PTL3_SIZE, PTL3_SIZE);
|
||||
}
|
||||
args->vm->currss -= PTL3_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1963,6 +1987,28 @@ out:
|
||||
return ptep;
|
||||
}
|
||||
|
||||
pte_t *ihk_mc_pt_lookup_fault_pte(struct process_vm *vm, void *virt,
|
||||
int pgshift, void **basep, size_t *sizep, int *p2alignp)
|
||||
{
|
||||
int faulted = 0;
|
||||
pte_t *ptep;
|
||||
|
||||
retry:
|
||||
ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
virt, pgshift, basep, sizep, p2alignp);
|
||||
if (!faulted && (!ptep || !pte_is_present(ptep))) {
|
||||
page_fault_process_vm(vm, virt, PF_POPULATE | PF_USER);
|
||||
faulted = 1;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (faulted && ptep && pte_is_present(ptep)) {
|
||||
kprintf("%s: successfully faulted 0x%lx\n", __FUNCTION__, virt);
|
||||
}
|
||||
|
||||
return ptep;
|
||||
}
|
||||
|
||||
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift,
|
||||
void **basep, size_t *sizep, int *p2alignp)
|
||||
{
|
||||
@ -2262,7 +2308,7 @@ out:
|
||||
|
||||
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
|
||||
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
|
||||
int pgshift, struct vm_range *range)
|
||||
int pgshift, struct vm_range *range)
|
||||
{
|
||||
int error;
|
||||
struct set_range_args args;
|
||||
@ -2606,6 +2652,61 @@ void init_low_area(struct page_table *pt)
|
||||
set_pt_large_page(pt, 0, 0, PTATTR_NO_EXECUTE|PTATTR_WRITABLE);
|
||||
}
|
||||
|
||||
static void init_linux_kernel_mapping(struct page_table *pt)
|
||||
{
|
||||
unsigned long map_start, map_end, phys;
|
||||
void *virt;
|
||||
int nr_memory_chunks, chunk_id, numa_id;
|
||||
|
||||
/* In case of safe_kernel_map option (safe_kernel_map == 1),
|
||||
processing to prevent destruction of the memory area on Linux side
|
||||
is executed */
|
||||
if (safe_kernel_map == 0) {
|
||||
kprintf("Straight-map entire physical memory\n");
|
||||
|
||||
/* Map 2 TB for now */
|
||||
map_start = 0;
|
||||
map_end = 0x20000000000;
|
||||
|
||||
virt = (void *)LINUX_PAGE_OFFSET;
|
||||
|
||||
kprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
|
||||
LINUX_PAGE_OFFSET, LINUX_PAGE_OFFSET + map_end, 0, map_end);
|
||||
|
||||
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) {
|
||||
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
|
||||
kprintf("%s: error setting mapping for 0x%lx\n", __FUNCTION__, virt);
|
||||
}
|
||||
virt += LARGE_PAGE_SIZE;
|
||||
}
|
||||
} else {
|
||||
kprintf("Straight-map physical memory areas allocated to McKernel\n");
|
||||
|
||||
nr_memory_chunks = ihk_mc_get_nr_memory_chunks();
|
||||
if (nr_memory_chunks == 0) {
|
||||
kprintf("%s: ERROR: No memory chunk available.\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
for (chunk_id = 0; chunk_id < nr_memory_chunks; chunk_id++) {
|
||||
if (ihk_mc_get_memory_chunk(chunk_id, &map_start, &map_end, &numa_id)) {
|
||||
kprintf("%s: ERROR: Memory chunk id (%d) out of range.\n", __FUNCTION__, chunk_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
dkprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
|
||||
LINUX_PAGE_OFFSET + map_start, LINUX_PAGE_OFFSET + map_end, map_start, map_end);
|
||||
|
||||
virt = (void *)(LINUX_PAGE_OFFSET + map_start);
|
||||
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE, virt += LARGE_PAGE_SIZE) {
|
||||
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
|
||||
kprintf("%s: set_pt_large_page() failed for 0x%lx\n", __FUNCTION__, virt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void init_vsyscall_area(struct page_table *pt)
|
||||
{
|
||||
extern char vsyscall_page[];
|
||||
@ -2631,13 +2732,15 @@ void init_page_table(void)
|
||||
|
||||
/* Normal memory area */
|
||||
init_normal_area(init_pt);
|
||||
init_linux_kernel_mapping(init_pt);
|
||||
init_fixed_area(init_pt);
|
||||
init_low_area(init_pt);
|
||||
init_text_area(init_pt);
|
||||
init_vsyscall_area(init_pt);
|
||||
|
||||
load_page_table(init_pt);
|
||||
kprintf("Page table is now at %p\n", init_pt);
|
||||
init_pt_loaded = 1;
|
||||
kprintf("Page table is now at 0x%lx\n", init_pt);
|
||||
}
|
||||
|
||||
extern void __reserve_arch_pages(unsigned long, unsigned long,
|
||||
@ -2665,17 +2768,33 @@ void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator,
|
||||
unsigned long virt_to_phys(void *v)
|
||||
{
|
||||
unsigned long va = (unsigned long)v;
|
||||
|
||||
|
||||
if (va >= MAP_KERNEL_START) {
|
||||
dkprintf("%s: MAP_KERNEL_START <= 0x%lx <= LINUX_PAGE_OFFSET\n",
|
||||
__FUNCTION__, va);
|
||||
return va - MAP_KERNEL_START + x86_kernel_phys_base;
|
||||
} else {
|
||||
}
|
||||
else if (va >= LINUX_PAGE_OFFSET) {
|
||||
return va - LINUX_PAGE_OFFSET;
|
||||
}
|
||||
else if (va >= MAP_FIXED_START) {
|
||||
return va - MAP_FIXED_START;
|
||||
}
|
||||
else {
|
||||
dkprintf("%s: MAP_ST_START <= 0x%lx <= MAP_FIXED_START\n",
|
||||
__FUNCTION__, va);
|
||||
return va - MAP_ST_START;
|
||||
}
|
||||
}
|
||||
|
||||
void *phys_to_virt(unsigned long p)
|
||||
{
|
||||
return (void *)(p + MAP_ST_START);
|
||||
/* Before loading our own PT use straight mapping */
|
||||
if (!init_pt_loaded) {
|
||||
return (void *)(p + MAP_ST_START);
|
||||
}
|
||||
|
||||
return (void *)(p + LINUX_PAGE_OFFSET);
|
||||
}
|
||||
|
||||
int copy_from_user(void *dst, const void *src, size_t siz)
|
||||
@ -10,9 +10,12 @@
|
||||
#include <ihk/perfctr.h>
|
||||
#include <march.h>
|
||||
#include <errno.h>
|
||||
#include <cls.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/cpu.h>
|
||||
#include <registers.h>
|
||||
#include <mc_perf_event.h>
|
||||
#include <config.h>
|
||||
|
||||
extern unsigned int *x86_march_perfmap;
|
||||
extern int running_on_kvm(void);
|
||||
@ -57,6 +60,10 @@ void x86_init_perfctr(void)
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
#ifndef ENABLE_PERF
|
||||
return;
|
||||
#endif //ENABLE_PERF
|
||||
|
||||
/* Do not do it on KVM */
|
||||
if (running_on_kvm()) return;
|
||||
|
||||
@ -93,7 +100,7 @@ void x86_init_perfctr(void)
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
|
||||
}
|
||||
|
||||
|
||||
/* Enable PMC Control */
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= X86_IA32_PERF_COUNTERS_MASK;
|
||||
@ -254,6 +261,41 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
|
||||
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
|
||||
{
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
|
||||
// allocate extra_reg
|
||||
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
|
||||
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->extra_reg.idx = EXTRA_REG_RSP_1;
|
||||
}
|
||||
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
event->extra_reg.idx = EXTRA_REG_RSP_0;
|
||||
}
|
||||
|
||||
if (thread->extra_reg_alloc_map & (1UL << event->extra_reg.idx)) {
|
||||
// extra_regs are full
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (event->extra_reg.idx == EXTRA_REG_RSP_0) {
|
||||
event->hw_config &= ~0xffUL;
|
||||
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_0);
|
||||
event->extra_reg.reg = MSR_OFFCORE_RSP_0;
|
||||
}
|
||||
else if (event->extra_reg.idx == EXTRA_REG_RSP_1) {
|
||||
event->hw_config &= ~0xffUL;
|
||||
event->hw_config |= ihk_mc_get_extra_reg_event(EXTRA_REG_RSP_1);
|
||||
event->extra_reg.reg = MSR_OFFCORE_RSP_1;
|
||||
}
|
||||
|
||||
thread->extra_reg_alloc_map |= (1UL << event->extra_reg.idx);
|
||||
wrmsr(event->extra_reg.reg, event->extra_reg.config);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_MARCH_PERFCTR_START
|
||||
extern void x86_march_perfctr_start(unsigned long counter_mask);
|
||||
#endif
|
||||
@ -29,11 +29,12 @@
|
||||
#include <prctl.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <page.h>
|
||||
#include <limits.h>
|
||||
#include <syscall.h>
|
||||
|
||||
void terminate(int, int);
|
||||
void terminate_mcexec(int, int);
|
||||
extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact);
|
||||
long syscall(int num, ihk_mc_user_context_t *ctx);
|
||||
extern void save_fp_regs(struct thread *proc);
|
||||
void set_signal(int sig, void *regs0, siginfo_t *info);
|
||||
void check_signal(unsigned long rc, void *regs0, int num);
|
||||
extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long,
|
||||
@ -142,8 +143,6 @@ SYSCALL_DECLARE(rt_sigaction)
|
||||
struct k_sigaction new_sa, old_sa;
|
||||
int rc;
|
||||
|
||||
if(sig == SIGKILL || sig == SIGSTOP || sig <= 0 || sig > 64)
|
||||
return -EINVAL;
|
||||
if (sigsetsize != sizeof(sigset_t))
|
||||
return -EINVAL;
|
||||
|
||||
@ -251,8 +250,8 @@ SYSCALL_DECLARE(rt_sigreturn)
|
||||
regs->gpr.rflags &= ~RFLAGS_TF;
|
||||
info.si_code = TRAP_TRACE;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
check_signal(0, regs, 0);
|
||||
}
|
||||
|
||||
if(ksigsp.fpregs && xsavesize){
|
||||
@ -279,6 +278,7 @@ SYSCALL_DECLARE(rt_sigreturn)
|
||||
extern struct cpu_local_var *clv;
|
||||
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
|
||||
extern void interrupt_syscall(struct thread *, int sig);
|
||||
extern void terminate(int, int);
|
||||
extern int num_processors;
|
||||
|
||||
#define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \
|
||||
@ -460,7 +460,6 @@ void set_single_step(struct thread *thread)
|
||||
|
||||
long ptrace_read_fpregs(struct thread *thread, void *fpregs)
|
||||
{
|
||||
save_fp_regs(thread);
|
||||
if (thread->fp_regs == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -470,7 +469,6 @@ long ptrace_read_fpregs(struct thread *thread, void *fpregs)
|
||||
|
||||
long ptrace_write_fpregs(struct thread *thread, void *fpregs)
|
||||
{
|
||||
save_fp_regs(thread);
|
||||
if (thread->fp_regs == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -540,7 +538,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
|
||||
/* Transition thread state */
|
||||
proc->status = PS_TRACED;
|
||||
thread->status = PS_TRACED;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL_MASK;
|
||||
proc->ptrace &= ~PT_TRACE_SYSCALL;
|
||||
if (sig == SIGSTOP || sig == SIGTSTP ||
|
||||
sig == SIGTTIN || sig == SIGTTOU) {
|
||||
proc->signal_flags |= SIGNAL_STOP_STOPPED;
|
||||
@ -809,6 +807,11 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
||||
regs->gpr.rip = (unsigned long)k->sa.sa_handler;
|
||||
regs->gpr.rsp = (unsigned long)usp;
|
||||
|
||||
// check signal handler is ONESHOT
|
||||
if (k->sa.sa_flags & SA_RESETHAND) {
|
||||
k->sa.sa_handler = SIG_DFL;
|
||||
}
|
||||
|
||||
if(!(k->sa.sa_flags & SA_NODEFER))
|
||||
thread->sigmask.__val[0] |= pending->sigmask.__val[0];
|
||||
kfree(pending);
|
||||
@ -820,8 +823,8 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
||||
regs->gpr.rflags &= ~RFLAGS_TF;
|
||||
info.si_code = TRAP_TRACE;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
check_signal(0, regs, 0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -927,6 +930,7 @@ do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pendi
|
||||
break;
|
||||
case SIGCHLD:
|
||||
case SIGURG:
|
||||
case SIGWINCH:
|
||||
break;
|
||||
default:
|
||||
dkprintf("do_signal,default,terminate,sig=%d\n", sig);
|
||||
@ -1007,6 +1011,12 @@ interrupt_from_user(void *regs0)
|
||||
return !(regs->gpr.rsp & 0x8000000000000000);
|
||||
}
|
||||
|
||||
void save_syscall_return_value(int num, unsigned long rc)
|
||||
{
|
||||
/* Empty on x86 */
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
check_signal(unsigned long rc, void *regs0, int num)
|
||||
{
|
||||
@ -1054,6 +1064,110 @@ out:
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
check_sig_pending_thread(struct thread *thread)
|
||||
{
|
||||
int found = 0;
|
||||
struct list_head *head;
|
||||
mcs_rwlock_lock_t *lock;
|
||||
struct mcs_rwlock_node_irqsave mcs_rw_node;
|
||||
struct sig_pending *next;
|
||||
struct sig_pending *pending;
|
||||
__sigset_t w;
|
||||
__sigset_t x;
|
||||
int sig = 0;
|
||||
struct k_sigaction *k;
|
||||
struct cpu_local_var *v;
|
||||
|
||||
v = get_this_cpu_local_var();
|
||||
w = thread->sigmask.__val[0];
|
||||
|
||||
lock = &thread->sigcommon->lock;
|
||||
head = &thread->sigcommon->sigpending;
|
||||
for (;;) {
|
||||
mcs_rwlock_reader_lock(lock, &mcs_rw_node);
|
||||
|
||||
list_for_each_entry_safe(pending, next, head, list){
|
||||
for (x = pending->sigmask.__val[0], sig = 0; x;
|
||||
sig++, x >>= 1);
|
||||
k = thread->sigcommon->action + sig - 1;
|
||||
if ((sig != SIGCHLD && sig != SIGURG) ||
|
||||
(k->sa.sa_handler != (void *)1 &&
|
||||
k->sa.sa_handler != NULL)) {
|
||||
if (!(pending->sigmask.__val[0] & w)) {
|
||||
if (pending->interrupted == 0) {
|
||||
pending->interrupted = 1;
|
||||
found = 1;
|
||||
if (sig != SIGCHLD &&
|
||||
sig != SIGURG &&
|
||||
!k->sa.sa_handler) {
|
||||
found = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
|
||||
|
||||
if (found == 2) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (lock == &thread->sigpendinglock) {
|
||||
break;
|
||||
}
|
||||
|
||||
lock = &thread->sigpendinglock;
|
||||
head = &thread->sigpending;
|
||||
}
|
||||
|
||||
if (found == 2) {
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
|
||||
terminate_mcexec(0, sig);
|
||||
return 1;
|
||||
}
|
||||
else if (found == 1) {
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
|
||||
interrupt_syscall(thread, 0);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
check_sig_pending()
|
||||
{
|
||||
struct thread *thread;
|
||||
struct cpu_local_var *v;
|
||||
|
||||
if (clv == NULL)
|
||||
return;
|
||||
|
||||
v = get_this_cpu_local_var();
|
||||
repeat:
|
||||
v->runq_irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
|
||||
list_for_each_entry(thread, &(v->runq), sched_list) {
|
||||
|
||||
if (thread == NULL || thread == &cpu_local_var(idle)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (thread->in_syscall_offload == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (thread->proc->exit_status & 0x0000000100000000L) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (check_sig_pending_thread(thread))
|
||||
goto repeat;
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
|
||||
}
|
||||
|
||||
unsigned long
|
||||
do_kill(struct thread *thread, int pid, int tid, int sig, siginfo_t *info,
|
||||
int ptracecont)
|
||||
@ -1215,15 +1329,19 @@ done:
|
||||
mcs_rwlock_reader_lock_noirq(&tproc->update_lock, &updatelock);
|
||||
savelock = &tthread->sigpendinglock;
|
||||
head = &tthread->sigpending;
|
||||
if(sig == SIGKILL ||
|
||||
(tproc->status != PS_EXITED &&
|
||||
tproc->status != PS_ZOMBIE &&
|
||||
tthread->status != PS_EXITED)){
|
||||
hold_thread(tthread);
|
||||
mcs_rwlock_reader_lock_noirq(&tproc->threads_lock, &lock);
|
||||
if (tthread->status != PS_EXITED &&
|
||||
(sig == SIGKILL ||
|
||||
(tproc->status != PS_EXITED && tproc->status != PS_ZOMBIE))) {
|
||||
if ((rc = hold_thread(tthread))) {
|
||||
kprintf("%s: ERROR hold_thread returned %d,tid=%d\n", __FUNCTION__, rc, tthread->tid);
|
||||
tthread = NULL;
|
||||
}
|
||||
}
|
||||
else{
|
||||
tthread = NULL;
|
||||
}
|
||||
mcs_rwlock_reader_unlock_noirq(&tproc->threads_lock, &lock);
|
||||
mcs_rwlock_reader_unlock_noirq(&tproc->update_lock, &updatelock);
|
||||
mcs_rwlock_reader_unlock_noirq(&thash->lock[hash], &lock);
|
||||
}
|
||||
@ -1250,7 +1368,9 @@ done:
|
||||
}
|
||||
|
||||
if (tthread->thread_offloaded) {
|
||||
interrupt_syscall(tthread, sig);
|
||||
if (!tthread->proc->nohost) {
|
||||
interrupt_syscall(tthread, sig);
|
||||
}
|
||||
release_thread(tthread);
|
||||
return 0;
|
||||
}
|
||||
@ -1285,6 +1405,7 @@ done:
|
||||
rc = -ENOMEM;
|
||||
}
|
||||
else{
|
||||
memset(pending, 0, sizeof(struct sig_pending));
|
||||
pending->sigmask.__val[0] = mask;
|
||||
memcpy(&pending->info, info, sizeof(siginfo_t));
|
||||
pending->ptracecont = ptracecont;
|
||||
@ -1308,9 +1429,6 @@ done:
|
||||
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(tthread->cpu_id)->apic_id, 0xd0);
|
||||
}
|
||||
|
||||
if(!tthread->proc->nohost)
|
||||
interrupt_syscall(tthread, 0);
|
||||
|
||||
if (status != PS_RUNNING) {
|
||||
if(sig == SIGKILL){
|
||||
/* Wake up the target only when stopped by ptrace-reporting */
|
||||
@ -1336,15 +1454,19 @@ set_signal(int sig, void *regs0, siginfo_t *info)
|
||||
struct x86_user_context *regs = regs0;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
|
||||
if(thread == NULL || thread->proc->pid == 0)
|
||||
if (thread == NULL || thread->proc->pid == 0)
|
||||
return;
|
||||
|
||||
if((__sigmask(sig) & thread->sigmask.__val[0]) ||
|
||||
(regs->gpr.rsp & 0x8000000000000000)){
|
||||
if (!interrupt_from_user(regs)) {
|
||||
ihk_mc_debug_show_interrupt_context(regs);
|
||||
panic("panic: kernel mode signal");
|
||||
}
|
||||
|
||||
if ((__sigmask(sig) & thread->sigmask.__val[0])) {
|
||||
coredump(thread, regs0);
|
||||
terminate(0, sig | 0x80);
|
||||
}
|
||||
do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0);
|
||||
do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0);
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(mmap)
|
||||
@ -1483,6 +1605,16 @@ SYSCALL_DECLARE(clone)
|
||||
ihk_mc_syscall_sp(ctx));
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(fork)
|
||||
{
|
||||
return do_fork(SIGCHLD, 0, 0, 0, 0, ihk_mc_syscall_pc(ctx), ihk_mc_syscall_sp(ctx));
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(vfork)
|
||||
{
|
||||
return do_fork(CLONE_VFORK|SIGCHLD, 0, 0, 0, 0, ihk_mc_syscall_pc(ctx), ihk_mc_syscall_sp(ctx));
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(shmget)
|
||||
{
|
||||
const key_t key = ihk_mc_syscall_arg0(ctx);
|
||||
@ -1907,4 +2039,644 @@ save_uctx(void *uctx, struct x86_user_context *regs)
|
||||
ctx->fregsize = 0;
|
||||
}
|
||||
|
||||
int do_process_vm_read_writev(int pid,
|
||||
const struct iovec *local_iov,
|
||||
unsigned long liovcnt,
|
||||
const struct iovec *remote_iov,
|
||||
unsigned long riovcnt,
|
||||
unsigned long flags,
|
||||
int op)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
int li, ri;
|
||||
int pli, pri;
|
||||
off_t loff, roff;
|
||||
size_t llen = 0, rlen = 0;
|
||||
size_t copied = 0;
|
||||
size_t to_copy;
|
||||
struct thread *lthread = cpu_local_var(current);
|
||||
struct process *rproc;
|
||||
struct process *lproc = lthread->proc;
|
||||
struct process_vm *rvm = NULL;
|
||||
unsigned long rphys;
|
||||
unsigned long rpage_left;
|
||||
unsigned long psize;
|
||||
void *rva;
|
||||
struct vm_range *range;
|
||||
struct mcs_rwlock_node_irqsave lock;
|
||||
struct mcs_rwlock_node update_lock;
|
||||
|
||||
/* Sanity checks */
|
||||
if (flags) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (liovcnt > IOV_MAX || riovcnt > IOV_MAX) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check if parameters are okay */
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov,
|
||||
(uintptr_t)(local_iov + liovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)remote_iov,
|
||||
(uintptr_t)(remote_iov + riovcnt * sizeof(struct iovec)));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto arg_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
arg_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (li = 0; li < liovcnt; ++li) {
|
||||
llen += local_iov[li].iov_len;
|
||||
dkprintf("local_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
li, local_iov[li].iov_base, local_iov[li].iov_len);
|
||||
}
|
||||
|
||||
for (ri = 0; ri < riovcnt; ++ri) {
|
||||
rlen += remote_iov[ri].iov_len;
|
||||
dkprintf("remote_iov[%d].iov_base: 0x%lx, len: %lu\n",
|
||||
ri, remote_iov[ri].iov_base, remote_iov[ri].iov_len);
|
||||
}
|
||||
|
||||
if (llen != rlen) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Find remote process */
|
||||
rproc = find_process(pid, &lock);
|
||||
if (!rproc) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mcs_rwlock_reader_lock_noirq(&rproc->update_lock, &update_lock);
|
||||
if(rproc->status == PS_EXITED ||
|
||||
rproc->status == PS_ZOMBIE){
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
rvm = rproc->vm;
|
||||
hold_process_vm(rvm);
|
||||
mcs_rwlock_reader_unlock_noirq(&rproc->update_lock, &update_lock);
|
||||
process_unlock(rproc, &lock);
|
||||
|
||||
if (lproc->euid != 0 &&
|
||||
(lproc->ruid != rproc->ruid ||
|
||||
lproc->ruid != rproc->euid ||
|
||||
lproc->ruid != rproc->suid ||
|
||||
lproc->rgid != rproc->rgid ||
|
||||
lproc->rgid != rproc->egid ||
|
||||
lproc->rgid != rproc->sgid)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("pid %d found, doing %s: liovcnt: %d, riovcnt: %d \n", pid,
|
||||
(op == PROCESS_VM_READ) ? "PROCESS_VM_READ" : "PROCESS_VM_WRITE",
|
||||
liovcnt, riovcnt);
|
||||
|
||||
pli = pri = -1; /* Previous indeces in iovecs */
|
||||
li = ri = 0; /* Current indeces in iovecs */
|
||||
loff = roff = 0; /* Offsets in current iovec */
|
||||
|
||||
/* Now iterate and do the copy */
|
||||
while (copied < llen) {
|
||||
int faulted = 0;
|
||||
|
||||
/* New local vector? */
|
||||
if (pli != li) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + 1));
|
||||
|
||||
if (!range) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(lthread->vm,
|
||||
(uintptr_t)local_iov[li].iov_base,
|
||||
(uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_WRITE : VR_PROT_READ))) {
|
||||
ret = -EFAULT;
|
||||
goto pli_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pli_out:
|
||||
ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pli = li;
|
||||
}
|
||||
|
||||
/* New remote vector? */
|
||||
if (pri != ri) {
|
||||
struct vm_range *range;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
/* Is base valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + 1));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
/* Is range valid? */
|
||||
range = lookup_process_memory_range(rvm,
|
||||
(uintptr_t)remote_iov[li].iov_base,
|
||||
(uintptr_t)(remote_iov[li].iov_base + remote_iov[li].iov_len));
|
||||
|
||||
if (range == NULL) {
|
||||
ret = -EINVAL;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
if (!(range->flag & ((op == PROCESS_VM_READ) ?
|
||||
VR_PROT_READ : VR_PROT_WRITE))) {
|
||||
ret = -EFAULT;
|
||||
goto pri_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
pri_out:
|
||||
ihk_mc_spinlock_unlock_noirq(&rvm->memory_range_lock);
|
||||
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pri = ri;
|
||||
}
|
||||
|
||||
/* Figure out how much we can copy at most in this iteration */
|
||||
to_copy = (local_iov[li].iov_len - loff);
|
||||
if ((remote_iov[ri].iov_len - roff) < to_copy) {
|
||||
to_copy = remote_iov[ri].iov_len - roff;
|
||||
}
|
||||
|
||||
retry_lookup:
|
||||
/* TODO: remember page and do this only if necessary */
|
||||
ret = ihk_mc_pt_virt_to_phys_size(rvm->address_space->page_table,
|
||||
remote_iov[ri].iov_base + roff, &rphys, &psize);
|
||||
|
||||
if (ret) {
|
||||
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
|
||||
void *addr;
|
||||
|
||||
if (faulted) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Fault in pages */
|
||||
for (addr = (void *)
|
||||
(((unsigned long)remote_iov[ri].iov_base + roff)
|
||||
& PAGE_MASK);
|
||||
addr < (remote_iov[ri].iov_base + roff + to_copy);
|
||||
addr += PAGE_SIZE) {
|
||||
|
||||
ret = page_fault_process_vm(rvm, addr, reason);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
faulted = 1;
|
||||
goto retry_lookup;
|
||||
}
|
||||
|
||||
rpage_left = ((((unsigned long)remote_iov[ri].iov_base + roff +
|
||||
psize) & ~(psize - 1)) -
|
||||
((unsigned long)remote_iov[ri].iov_base + roff));
|
||||
if (rpage_left < to_copy) {
|
||||
to_copy = rpage_left;
|
||||
}
|
||||
|
||||
rva = phys_to_virt(rphys);
|
||||
|
||||
fast_memcpy(
|
||||
(op == PROCESS_VM_READ) ? local_iov[li].iov_base + loff : rva,
|
||||
(op == PROCESS_VM_READ) ? rva : local_iov[li].iov_base + loff,
|
||||
to_copy);
|
||||
|
||||
copied += to_copy;
|
||||
dkprintf("local_iov[%d]: 0x%lx %s remote_iov[%d]: 0x%lx, %lu copied, psize: %lu, rpage_left: %lu\n",
|
||||
li, local_iov[li].iov_base + loff,
|
||||
(op == PROCESS_VM_READ) ? "<-" : "->",
|
||||
ri, remote_iov[ri].iov_base + roff, to_copy,
|
||||
psize, rpage_left);
|
||||
|
||||
loff += to_copy;
|
||||
roff += to_copy;
|
||||
|
||||
if (loff == local_iov[li].iov_len) {
|
||||
li++;
|
||||
loff = 0;
|
||||
}
|
||||
|
||||
if (roff == remote_iov[ri].iov_len) {
|
||||
ri++;
|
||||
roff = 0;
|
||||
}
|
||||
}
|
||||
|
||||
release_process_vm(rvm);
|
||||
|
||||
return copied;
|
||||
|
||||
out:
|
||||
if(rvm)
|
||||
release_process_vm(rvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int move_pages_smp_handler(int cpu_index, int nr_cpus, void *arg)
|
||||
{
|
||||
int i, i_s, i_e, phase = 1;
|
||||
struct move_pages_smp_req *mpsr =
|
||||
(struct move_pages_smp_req *)arg;
|
||||
struct process_vm *vm = mpsr->proc->vm;
|
||||
int count = mpsr->count;
|
||||
struct page_table *save_pt;
|
||||
extern struct page_table *get_init_page_table(void);
|
||||
|
||||
i_s = (count / nr_cpus) * cpu_index;
|
||||
i_e = i_s + (count / nr_cpus);
|
||||
if (cpu_index == (nr_cpus - 1)) {
|
||||
i_e = count;
|
||||
}
|
||||
|
||||
/* Load target process' PT so that we can access user-space */
|
||||
save_pt = cpu_local_var(current) == &cpu_local_var(idle) ?
|
||||
get_init_page_table() :
|
||||
cpu_local_var(current)->vm->address_space->page_table;
|
||||
|
||||
if (save_pt != vm->address_space->page_table) {
|
||||
ihk_mc_load_page_table(vm->address_space->page_table);
|
||||
}
|
||||
else {
|
||||
save_pt = NULL;
|
||||
}
|
||||
|
||||
if (nr_cpus == 1) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus > 1 && nr_cpus < 4) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
case 1:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 4 && nr_cpus < 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * count);
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 3:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (nr_cpus >= 8) {
|
||||
switch (cpu_index) {
|
||||
case 0:
|
||||
memcpy(mpsr->virt_addr, mpsr->user_virt_addr,
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 1:
|
||||
memcpy(mpsr->virt_addr + (count / 2),
|
||||
mpsr->user_virt_addr + (count / 2),
|
||||
sizeof(void *) * (count / 2));
|
||||
break;
|
||||
case 2:
|
||||
memcpy(mpsr->status, mpsr->user_status,
|
||||
sizeof(int) * count);
|
||||
break;
|
||||
case 3:
|
||||
memcpy(mpsr->nodes, mpsr->user_nodes,
|
||||
sizeof(int) * count);
|
||||
mpsr->nodes_ready = 1;
|
||||
break;
|
||||
case 4:
|
||||
memset(mpsr->ptep, 0, sizeof(pte_t) * count);
|
||||
break;
|
||||
case 5:
|
||||
memset(mpsr->status, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 6:
|
||||
memset(mpsr->nr_pages, 0, sizeof(int) * count);
|
||||
break;
|
||||
case 7:
|
||||
memset(mpsr->dst_phys, 0,
|
||||
sizeof(unsigned long) * count);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (!(volatile int)mpsr->nodes_ready) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
/* NUMA verification in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
if (mpsr->nodes[i] < 0 ||
|
||||
mpsr->nodes[i] >= ihk_mc_get_nr_numa_nodes() ||
|
||||
!test_bit(mpsr->nodes[i],
|
||||
mpsr->proc->vm->numa_mask)) {
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* PTE lookup in parallel */
|
||||
for (i = i_s; i < i_e; i++) {
|
||||
void *phys;
|
||||
size_t pgsize;
|
||||
int p2align;
|
||||
/*
|
||||
* XXX: No page structures for anonymous mappings.
|
||||
* Look up physical addresses by scanning page tables.
|
||||
*/
|
||||
mpsr->ptep[i] = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
(void *)mpsr->virt_addr[i], 0, &phys, &pgsize, &p2align);
|
||||
|
||||
/* PTE valid? */
|
||||
if (!mpsr->ptep[i] || !pte_is_present(mpsr->ptep[i])) {
|
||||
mpsr->status[i] = -ENOENT;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* PTE is file? */
|
||||
if (pte_is_fileoff(mpsr->ptep[i], PAGE_SIZE)) {
|
||||
mpsr->status[i] = -EINVAL;
|
||||
mpsr->ptep[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu requested to be moved to node %d\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i], pgsize, mpsr->nodes[i]);
|
||||
|
||||
/* Large page? */
|
||||
if (pgsize > PAGE_SIZE) {
|
||||
int nr_sub_pages = (pgsize / PAGE_SIZE);
|
||||
int j;
|
||||
|
||||
if (i + nr_sub_pages > count) {
|
||||
kprintf("%s: ERROR: page at index %d exceeds the region\n",
|
||||
__FUNCTION__, i);
|
||||
mpsr->status[i] = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Is it contiguous across nr_sub_pages and all
|
||||
* requested to be moved to the same target node? */
|
||||
for (j = 0; j < nr_sub_pages; ++j) {
|
||||
if (mpsr->virt_addr[i + j] !=
|
||||
(mpsr->virt_addr[i] + (j * PAGE_SIZE)) ||
|
||||
mpsr->nodes[i] != mpsr->nodes[i + j]) {
|
||||
kprintf("%s: ERROR: virt address or node at index %d"
|
||||
" is inconsistent\n",
|
||||
__FUNCTION__, i + j);
|
||||
mpsr->phase_ret = -EINVAL;
|
||||
goto pte_out;
|
||||
}
|
||||
}
|
||||
|
||||
mpsr->nr_pages[i] = nr_sub_pages;
|
||||
i += (nr_sub_pages - 1);
|
||||
}
|
||||
else {
|
||||
mpsr->nr_pages[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
pte_out:
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
if (cpu_index == 0) {
|
||||
/* Allocate new pages on target NUMA nodes */
|
||||
for (i = 0; i < count; i++) {
|
||||
int pgalign = 0;
|
||||
int j;
|
||||
void *dst;
|
||||
|
||||
if (!mpsr->ptep[i] || mpsr->status[i] < 0 || !mpsr->nr_pages[i])
|
||||
continue;
|
||||
|
||||
/* TODO: store pgalign info in an array as well? */
|
||||
if (mpsr->nr_pages[i] > 1) {
|
||||
if (mpsr->nr_pages[i] * PAGE_SIZE == PTL2_SIZE)
|
||||
pgalign = PTL2_SHIFT - PTL1_SHIFT;
|
||||
}
|
||||
|
||||
dst = ihk_mc_alloc_aligned_pages_node(mpsr->nr_pages[i],
|
||||
pgalign, IHK_MC_AP_USER, mpsr->nodes[i]);
|
||||
|
||||
if (!dst) {
|
||||
mpsr->status[i] = -ENOMEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = i; j < (i + mpsr->nr_pages[i]); ++j) {
|
||||
mpsr->status[j] = mpsr->nodes[i];
|
||||
}
|
||||
|
||||
mpsr->dst_phys[i] = virt_to_phys(dst);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu to node %d, pgalign: %d,"
|
||||
" allocated phys: 0x%lx\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->nodes[i], pgalign, mpsr->dst_phys[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Barrier */
|
||||
ihk_atomic_inc(&mpsr->phase_done);
|
||||
while (ihk_atomic_read(&mpsr->phase_done) <
|
||||
(phase * nr_cpus)) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (mpsr->phase_ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dkprintf("%s: phase %d done\n", __FUNCTION__, phase);
|
||||
++phase;
|
||||
|
||||
/* Copy, PTE update, memfree in parallel */
|
||||
for (i = i_s; i < i_e; ++i) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
fast_memcpy(phys_to_virt(mpsr->dst_phys[i]),
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i] * PAGE_SIZE);
|
||||
|
||||
ihk_mc_free_pages(
|
||||
phys_to_virt(pte_get_phys(mpsr->ptep[i])),
|
||||
mpsr->nr_pages[i]);
|
||||
|
||||
pte_update_phys(mpsr->ptep[i], mpsr->dst_phys[i]);
|
||||
|
||||
dkprintf("%s: virt 0x%lx:%lu copied and remapped to phys: 0x%lu\n",
|
||||
__FUNCTION__, mpsr->virt_addr[i],
|
||||
mpsr->nr_pages[i] * PAGE_SIZE,
|
||||
mpsr->dst_phys[i]);
|
||||
}
|
||||
|
||||
/* XXX: do a separate SMP call with only CPUs running threads
|
||||
* of this process? */
|
||||
if (cpu_local_var(current)->proc == mpsr->proc) {
|
||||
/* Invalidate all TLBs */
|
||||
for (i = 0; i < mpsr->count; i++) {
|
||||
if (!mpsr->dst_phys[i])
|
||||
continue;
|
||||
|
||||
flush_tlb_single((unsigned long)mpsr->virt_addr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (save_pt) {
|
||||
ihk_mc_load_page_table(save_pt);
|
||||
}
|
||||
|
||||
return mpsr->phase_ret;
|
||||
}
|
||||
|
||||
time_t time(void) {
|
||||
struct syscall_request sreq IHK_DMA_ALIGN;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
time_t ret;
|
||||
sreq.number = __NR_time;
|
||||
sreq.args[0] = (uintptr_t)NULL;
|
||||
ret = (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*** End of File ***/
|
||||
@ -102,7 +102,7 @@ int vsyscall_gettimeofday(struct timeval *tv, void *tz)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
if (error) {
|
||||
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
|
||||
*(volatile int *)0 = 0; /* i.e. raise(SIGSEGV) */
|
||||
}
|
||||
return error;
|
||||
} /* vsyscall_gettimeofday() */
|
||||
@ -45,7 +45,6 @@ error_exit() {
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
fi
|
||||
|
||||
if [ ! -e /tmp/mcos ]; then
|
||||
@ -19,6 +19,7 @@ ETCDIR=@ETCDIR@
|
||||
KMODDIR="${prefix}/kmod"
|
||||
KERNDIR="${prefix}/@TARGET@/kernel"
|
||||
ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@"
|
||||
MCK_BUILDID=@BUILDID@
|
||||
|
||||
mem="512M@0"
|
||||
cpus=""
|
||||
@ -43,8 +44,12 @@ fi
|
||||
|
||||
turbo=""
|
||||
ihk_irq=""
|
||||
safe_kernel_map=""
|
||||
umask_old=`umask`
|
||||
idle_halt=""
|
||||
allow_oversubscribe=""
|
||||
|
||||
while getopts :tk:c:m:o:f:r:q:i:d: OPT
|
||||
while getopts :stk:c:m:o:f:r:q:i:d:e:hO OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
f) facility=${OPTARG}
|
||||
@ -57,21 +62,32 @@ do
|
||||
;;
|
||||
m) mem=${OPTARG}
|
||||
;;
|
||||
s) safe_kernel_map="safe_kernel_map"
|
||||
;;
|
||||
r) ikc_map=${OPTARG}
|
||||
;;
|
||||
q) ihk_irq=${OPTARG}
|
||||
;;
|
||||
t) turbo="turbo"
|
||||
;;
|
||||
e) extra_kopts=${OPTARG}
|
||||
;;
|
||||
d) DUMP_LEVEL=${OPTARG}
|
||||
;;
|
||||
i) mon_interval=${OPTARG}
|
||||
;;
|
||||
h) idle_halt="idle_halt"
|
||||
;;
|
||||
O) allow_oversubscribe="allow_oversubscribe"
|
||||
;;
|
||||
*) echo "invalid option -${OPT}" >&2
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
redirect_kmsg=0
|
||||
turbo="turbo"
|
||||
|
||||
# Start ihkmond
|
||||
pid=`pidof ihkmond`
|
||||
if [ "${pid}" != "" ]; then
|
||||
@ -80,6 +96,16 @@ fi
|
||||
if [ "${redirect_kmsg}" != "0" -o "${mon_interval}" != "-1" ]; then
|
||||
${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval}
|
||||
fi
|
||||
|
||||
disable_irqbalance_mck() {
|
||||
if [ -f /etc/systemd/system/irqbalance_mck.service ]; then
|
||||
systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null
|
||||
|
||||
# Invalid .service file persists so remove it
|
||||
rm -f /etc/systemd/system/irqbalance_mck.service
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# Revert any state that has been initialized before the error occured.
|
||||
#
|
||||
@ -87,6 +113,16 @@ error_exit() {
|
||||
local status=$1
|
||||
|
||||
case $status in
|
||||
irqbalance_mck_started)
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
|
||||
echo "warning: failed to stop irqbalance_mck" >&2
|
||||
fi
|
||||
disable_irqbalance_mck
|
||||
fi
|
||||
fi
|
||||
;&
|
||||
mcos_sys_mounted)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umount /tmp/mcos/mcos0_sys
|
||||
@ -148,25 +184,25 @@ error_exit() {
|
||||
fi
|
||||
;&
|
||||
ihk_smp_loaded)
|
||||
rmmod ihk_smp_x86 2>/dev/null || echo "warning: failed to remove ihk_smp_x86" >&2
|
||||
rmmod ihk_smp_@ARCH@ 2>/dev/null || echo "warning: failed to remove ihk_smp_@ARCH@" >&2
|
||||
;&
|
||||
ihk_loaded)
|
||||
rmmod ihk 2>/dev/null || echo "warning: failed to remove ihk" >&2
|
||||
;&
|
||||
smp_affinity_modified)
|
||||
umask $umask_old
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
if ! perl -e '$tmpdir="/tmp/mcreboot"; @files = grep { -f } glob "$tmpdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($tmpdir)); if (0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
|
||||
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
|
||||
fi
|
||||
if [ -e /tmp/mcreboot ]; then rm -rf /tmp/mcreboot; fi
|
||||
fi
|
||||
;&
|
||||
irqbalance_stopped)
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
|
||||
echo "warning: failed to stop irqbalance_mck" >&2
|
||||
fi
|
||||
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then
|
||||
echo "warning: failed to disable irqbalance_mck" >&2
|
||||
fi
|
||||
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
|
||||
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
|
||||
fi
|
||||
if ! systemctl start irqbalance.service; then
|
||||
echo "warning: failed to start irqbalance" >&2;
|
||||
fi
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
if ! systemctl start irqbalance.service; then
|
||||
echo "warning: failed to start irqbalance" >&2;
|
||||
fi
|
||||
fi
|
||||
;&
|
||||
initial)
|
||||
@ -200,7 +236,7 @@ if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
else
|
||||
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 ]; then
|
||||
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
|
||||
@ -236,9 +272,9 @@ if [ "${irqbalance_used}" == "yes" ]; then
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }'; then
|
||||
if ! perl -e 'use File::Copy qw(copy); $tmpdir="/tmp/mcreboot"; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir = substr($rel, 0, length($rel) - length("/smp_affinity")); if (system("mkdir -p $tmpdir/$dir")) { exit 1; } if (0) { print "cp $file $tmpdir/$rel\n"; } if (!copy($file,"$tmpdir/$rel")) { exit 1; } }'; then
|
||||
echo "error: saving /proc/irq/*/smp_affinity" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
error_exit "irqbalance_stopped"
|
||||
fi;
|
||||
|
||||
# Prevent /proc/irq/*/smp_affinity from getting zero after offlining
|
||||
@ -252,32 +288,45 @@ if [ "${irqbalance_used}" == "yes" ]; then
|
||||
|
||||
if ! ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask perl -e '@dirs = grep { -d } glob "/proc/irq/*"; foreach $dir (@dirs) { $hit = 0; $affinity_str = `cat $dir/smp_affinity`; chomp $affinity_str; @int32strs = split /,/, $affinity_str; @int32strs_mask=split /,/, $ENV{'smp_affinity_mask'}; for($i=0;$i <= $#int32strs_mask; $i++) { $int32strs_inv[$i] = sprintf("%08x",hex($int32strs_mask[$i])^0xffffffff); if($i == 0) { $len = int((($ENV{'ncpus'}%32)+3)/4); if($len != 0) { $int32strs_inv[$i] = substr($int32strs_inv[$i], -$len, $len); } } } $inv = join(",", @int32strs_inv); $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if(hex($int32strs[$nint32s - 1 - $j]) & hex($int32strs_mask[$nint32s - 1 - $j])) { $hit = 1; }} if($hit == 1) { $cmd = "echo $inv > $dir/smp_affinity 2>/dev/null"; system $cmd;}}'; then
|
||||
echo "error: modifying /proc/irq/*/smp_affinity" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
error_exit "irqbalance_stopped"
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
# Set umask so that proc/sys files/directories created by
|
||||
# mcctrl.ko and mcreboot.sh have appropriate permission bits
|
||||
umask_dec=$(( 8#${umask_old} & 8#0002 ))
|
||||
umask 0`printf "%o" ${umask_dec}`
|
||||
|
||||
# Load IHK if not loaded
|
||||
if ! grep -E 'ihk\s' /proc/modules &>/dev/null; then
|
||||
if ! taskset -c 0 insmod ${KMODDIR}/ihk.ko 2>/dev/null; then
|
||||
echo "error: loading ihk" >&2
|
||||
error_exit "irqbalance_stopped"
|
||||
error_exit "smp_affinity_modified"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase swappiness so that we have better chance to allocate memory for IHK
|
||||
echo 100 > /proc/sys/vm/swappiness
|
||||
# Copy modules under /tmp to avoid loading from shared FS
|
||||
if mkdir -p /tmp/mcos-kmod; then
|
||||
cp ${KMODDIR}/* /tmp/mcos-kmod/
|
||||
KMODDIR="/tmp/mcos-kmod/"
|
||||
fi
|
||||
|
||||
# Drop Linux caches to free memory
|
||||
sync && echo 3 > /proc/sys/vm/drop_caches
|
||||
# Fujitsu drops caches for us in between jobs so don't do it on OFP
|
||||
if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" == "" ]; then
|
||||
# Increase swappiness so that we have better chance to allocate memory for IHK
|
||||
echo 100 > /proc/sys/vm/swappiness
|
||||
|
||||
# Merge free memory areas into large, physically contigous ones
|
||||
echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
|
||||
# Drop Linux caches to free memory
|
||||
sync && echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
sync
|
||||
# Merge free memory areas into large, physically contigous ones
|
||||
echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
|
||||
|
||||
sync
|
||||
fi
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then
|
||||
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
|
||||
if [ "$ihk_irq" == "" ]; then
|
||||
for i in `seq 64 255`; do
|
||||
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
|
||||
@ -290,30 +339,46 @@ if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
fi
|
||||
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
|
||||
echo "error: loading ihk-smp-x86" >&2
|
||||
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-@ARCH@.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
|
||||
echo "error: loading ihk-smp-@ARCH@" >&2
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
|
||||
# Offline-reonline RAM (special case for OFP SNC-4 mode)
|
||||
if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-7" ]; then
|
||||
for i in 0 1 2 3; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 0 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 1 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
for i in 4 5 6 7; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 0 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 1 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
fi
|
||||
# # Offline-reonline RAM (special case for OFP SNC-4 flat mode)
|
||||
# if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-7" ]; then
|
||||
# for i in 0 1 2 3; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 0 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 1 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# for i in 4 5 6 7; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 0 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# for i in 4 5 6 7; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 1 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# fi
|
||||
#
|
||||
# # Offline-reonline RAM (special case for OFP Quadrant flat mode)
|
||||
# if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-1" ]; then
|
||||
# for i in 1; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 0 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# for i in 1; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 1 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
|
||||
echo "error: reserving memory" >&2
|
||||
@ -333,13 +398,20 @@ if ! grep mcctrl /proc/modules &>/dev/null; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check that different versions of binaries/scripts are not mixed
|
||||
IHK_BUILDID=`${SBINDIR}/ihkconfig 0 get buildid`
|
||||
if [ "${IHK_BUILDID}" != "${MCK_BUILDID}" ]; then
|
||||
echo "IHK build-id (${IHK_BUILDID}) didn't match McKernel build-id (${MCK_BUILDID})." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
# Retry when conflicting with ihkmond
|
||||
nretry=0
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -ge 4 ]; do
|
||||
sleep 0.25
|
||||
nretry=$[ $nretry + 1 ]
|
||||
done
|
||||
@ -383,7 +455,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
|
||||
fi
|
||||
|
||||
# Set kernel arguments
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL}"; then
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $safe_kernel_map $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then
|
||||
echo "error: setting kernel arguments" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
@ -409,7 +481,9 @@ if [ "${irqbalance_used}" == "yes" ]; then
|
||||
banirq=`cat /proc/interrupts| perl -e 'while(<>) { if(/^\s*(\d+).*IHK\-SMP\s*$/) {print $1;}}'`
|
||||
|
||||
sed "s/%mask%/$smp_affinity_mask/g" $ETCDIR/irqbalance_mck.in | sed "s/%banirq%/$banirq/g" > /tmp/irqbalance_mck
|
||||
systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null
|
||||
|
||||
disable_irqbalance_mck
|
||||
|
||||
if ! systemctl link $ETCDIR/irqbalance_mck.service >/dev/null 2>/dev/null; then
|
||||
echo "error: linking irqbalance_mck" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
@ -421,3 +495,8 @@ if [ "${irqbalance_used}" == "yes" ]; then
|
||||
fi
|
||||
# echo cpus=$cpus ncpus=$ncpus banirq=$banirq
|
||||
fi
|
||||
|
||||
# Restore umask
|
||||
umask ${umask_old}
|
||||
|
||||
exit 0
|
||||
@ -18,17 +18,24 @@ mem=""
|
||||
cpus=""
|
||||
irqbalance_used=""
|
||||
|
||||
disable_irqbalance_mck() {
|
||||
if [ -f /etc/systemd/system/irqbalance_mck.service ]; then
|
||||
systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null
|
||||
|
||||
# Invalid .service file persists so remove it
|
||||
rm -f /etc/systemd/system/irqbalance_mck.service
|
||||
fi
|
||||
}
|
||||
|
||||
# No SMP module? Exit.
|
||||
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then exit 0; fi
|
||||
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then exit 0; fi
|
||||
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
irqbalance_used="yes"
|
||||
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
|
||||
echo "warning: failed to stop irqbalance_mck" >&2
|
||||
fi
|
||||
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then
|
||||
echo "warning: failed to disable irqbalance_mck" >&2
|
||||
fi
|
||||
disable_irqbalance_mck
|
||||
fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
@ -37,7 +44,7 @@ if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
ind=`echo $i|cut -c10-`;
|
||||
# Retry when conflicting with ihkmond
|
||||
nretry=0
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
|
||||
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -ge 4 ]; do
|
||||
sleep 0.25
|
||||
nretry=$[ $nretry + 1 ]
|
||||
done
|
||||
@ -48,6 +55,9 @@ if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
done
|
||||
fi
|
||||
|
||||
# Allow ihkmond to flush kmsg buffer
|
||||
sleep 2.0
|
||||
|
||||
# Query IHK-SMP resources and release them
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
|
||||
echo "error: querying cpus" >&2
|
||||
@ -62,17 +72,23 @@ if [ "${cpus}" != "" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then
|
||||
echo "error: querying memory" >&2
|
||||
exit 1
|
||||
fi
|
||||
#if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then
|
||||
# echo "error: querying memory" >&2
|
||||
# exit 1
|
||||
#fi
|
||||
#
|
||||
#mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
#if [ "${mem}" != "" ]; then
|
||||
# if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then
|
||||
# echo "error: releasing memory" >&2
|
||||
# exit 1
|
||||
# fi
|
||||
#fi
|
||||
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
if [ "${mem}" != "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then
|
||||
echo "error: releasing memory" >&2
|
||||
exit 1
|
||||
fi
|
||||
# Release all memory
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem "all" > /dev/null; then
|
||||
echo "error: releasing memory" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
@ -87,9 +103,9 @@ fi
|
||||
. ${SBINDIR}/mcoverlay-destroy.sh
|
||||
|
||||
# Remove SMP module
|
||||
if grep ihk_smp_x86 /proc/modules &>/dev/null; then
|
||||
if ! rmmod ihk_smp_x86 2>/dev/null; then
|
||||
echo "error: removing ihk_smp_x86" >&2
|
||||
if grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
|
||||
if ! rmmod ihk_smp_@ARCH@ 2>/dev/null; then
|
||||
echo "error: removing ihk_smp_@ARCH@" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
@ -110,9 +126,10 @@ fi
|
||||
|
||||
# Start irqbalance with the original settings
|
||||
if [ "${irqbalance_used}" != "" ]; then
|
||||
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
|
||||
if ! perl -e '$tmpdir="/tmp/mcreboot"; @files = grep { -f } glob "$tmpdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($tmpdir)); if (0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
|
||||
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
|
||||
fi
|
||||
if [ -e /tmp/mcreboot ]; then rm -rf /tmp/mcreboot; fi
|
||||
if ! systemctl start irqbalance.service; then
|
||||
echo "warning: failed to start irqbalance" >&2;
|
||||
fi
|
||||
60
arch/x86_64/tools/mpimcexec.1in
Normal file
60
arch/x86_64/tools/mpimcexec.1in
Normal file
@ -0,0 +1,60 @@
|
||||
.\" Man page for mpimcexec
|
||||
.\"
|
||||
|
||||
.TH MPIMCEXEC 1 "@MCKERNEL_RELEASE_DATE@" "Version @MCKERNEL_VERSION@" MCKERNEL @MCKERNEL_VERSION@"
|
||||
.SH NAME
|
||||
mpimcexec \- run an MPI application on McKernel
|
||||
.\"
|
||||
|
||||
.\" ---------------------------- SYNOPSIS ----------------------------
|
||||
.SH SYNOPSIS
|
||||
.B mpimcexec \fR [\fIoptions\fR] \fI<command>\fR
|
||||
|
||||
.\" ---------------------------- DESCRIPTION ----------------------------
|
||||
.SH DESCRIPTION
|
||||
mpimcexec is a wrapper script for running MPI applications on McKernel.
|
||||
It internally calls mpiexec to spawn mcexec on compute nodes, which in
|
||||
turn runs \fI<command>\fR on McKernel. mpimcexec specifies a number of
|
||||
mcexec arguments that enable high performance execution.
|
||||
|
||||
.\" ---------------------------- OPTIONS ----------------------------
|
||||
.SH OPTIONS
|
||||
|
||||
.TP
|
||||
.B -ppn N, --ppn N, --ranks-per-node N
|
||||
Specify the number of MPI ranks per node. This argument is required.
|
||||
.TP
|
||||
.B -n N, --n N, --ranks N
|
||||
Specify the number of total MPI ranks.
|
||||
e.g.,
|
||||
$ mpimcexec -n 32 -ppn 4 ./a.out
|
||||
.br
|
||||
In the above example, 32 MPI processes are invoked
|
||||
on eight compute nodes each of which has four processes.
|
||||
.TP
|
||||
.B --nodes N
|
||||
Specify the number of compute nodes.
|
||||
By default, all nodes, specified by "PJM --mpi proc" option, are used.
|
||||
.TP
|
||||
.B --env, -env
|
||||
Pass an additional environment variable
|
||||
.TP
|
||||
.B -m N, --numa N
|
||||
Specify preferred NUMA node.
|
||||
.TP
|
||||
.B -h <file name>, ---hostfile <file name>
|
||||
Specify a host file for MPI.
|
||||
.TP
|
||||
.B --help
|
||||
Show help message.
|
||||
|
||||
.PP
|
||||
.\" ---------------------------- SEE ALSO ----------------------------
|
||||
.SH SEE ALSO
|
||||
\fBmcexec\fR (1), \fBmpiexec\fR (1)
|
||||
|
||||
.\" ---------------------------- AUTHORS ----------------------------
|
||||
.SH AUTHORS
|
||||
Copyright (C) 2018 McKernel Development Team, RIKEN, Japan
|
||||
|
||||
|
||||
147
arch/x86_64/tools/mpimcexec.in
Executable file
147
arch/x86_64/tools/mpimcexec.in
Executable file
@ -0,0 +1,147 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# OFP McKernel MPI wrapper script
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2018 RIKEN R-CCS
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="${prefix}/bin"
|
||||
|
||||
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
|
||||
echo "You need at least bash-4.0 to run this script." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RANKS=""
|
||||
NODES=""
|
||||
PPN=""
|
||||
MPI_ENV=""
|
||||
COMMAND=""
|
||||
NUMA=""
|
||||
HOSTFILE=""
|
||||
|
||||
if [ ! -z "${PJM_PROC_BY_NODE}" ]; then
|
||||
PPN=${PJM_PROC_BY_NODE}
|
||||
elif [ ! -z "${MPI_LOCALNRANKS}" ]; then
|
||||
PPN=${MPI_LOCALNRANKS}
|
||||
fi
|
||||
|
||||
help_exit() {
|
||||
echo ""
|
||||
echo "Spawn an McKernel MPI job on Oakforest-PACS."
|
||||
echo "usage: `basename $0` -ppn ranks_per_node [--nodes nodes] [-n ranks] [--env additional_environment]... command"
|
||||
echo ""
|
||||
echo " -ppn | --ppn | --ranks-per-node Number of MPI ranks per node (required)"
|
||||
echo " -n | --n | --ranks Total number of MPI ranks in the job"
|
||||
echo " --nodes Number of nodes to be used"
|
||||
echo " --env | -env Pass an additional environment variable"
|
||||
echo " -m | --numa Preferred NUMA node(s)"
|
||||
echo " -h | --hostfile Host file for MPI"
|
||||
echo " --help Show help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse options
|
||||
while true; do
|
||||
case $1 in
|
||||
-ppn | --ppn | --ranks-per-node )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for -ppn, --ppn, or --ranks-per-node option"
|
||||
help_exit
|
||||
fi
|
||||
PPN=$2
|
||||
shift 2
|
||||
;;
|
||||
-n | --n | --ranks )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for -n, --n, or --ranks option"
|
||||
help_exit
|
||||
fi
|
||||
RANKS=$2
|
||||
shift 2
|
||||
;;
|
||||
-m | --numa )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for -m or --numa option"
|
||||
help_exit
|
||||
fi
|
||||
NUMA="-m $2"
|
||||
shift 2
|
||||
;;
|
||||
--nodes )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for --nodes option"
|
||||
help_exit
|
||||
fi
|
||||
NODES=$2
|
||||
shift 2
|
||||
;;
|
||||
--env | -env )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an environment variable name for -env or --env option"
|
||||
help_exit
|
||||
fi
|
||||
if [ -z "`echo $2 | grep I_MPI_PIN`" ]; then
|
||||
MPI_ENV=`echo "${MPI_ENV} -env $2" | xargs`
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
-h | --hostfile )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs a file name for -h or --hostfile option"
|
||||
help_exit
|
||||
fi
|
||||
HOSTFILE="-hostfile $2"
|
||||
shift 2
|
||||
;;
|
||||
--help )
|
||||
help_exit
|
||||
;;
|
||||
* )
|
||||
COMMAND=$@
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z ${PPN} ]; then
|
||||
echo "error: please specify the number of ranks per node"
|
||||
help_exit
|
||||
fi
|
||||
|
||||
# Unless explicitly specified, use Fujitsu inherited value
|
||||
if [ -z ${NODES} ]; then
|
||||
NODES=${PJM_VNODES}
|
||||
fi
|
||||
|
||||
if [ -z ${RANKS} ] && [ -z ${NODES} ]; then
|
||||
echo "error: please specify the total number of ranks or the number of nodes"
|
||||
help_exit
|
||||
fi
|
||||
|
||||
if [ "x${COMMAND}" = "x" ]; then
|
||||
echo "error: please specify command"
|
||||
help_exit
|
||||
fi
|
||||
|
||||
# Calculate total job size if not specified
|
||||
if [ -z ${RANKS} ]; then
|
||||
let RANKS=(${PPN}*${NODES})
|
||||
fi
|
||||
|
||||
# Support direct SSH when not executed from Fujitsu job system
|
||||
if [ -z ${PJM_VNODES} ]; then
|
||||
HOSTFILE="-launcher-exec ssh ${HOSTFILE}"
|
||||
fi
|
||||
|
||||
export I_MPI_PIN=off
|
||||
export PSM2_RCVTHREAD=0
|
||||
export HFI_NO_CPUAFFINITY=1
|
||||
export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304
|
||||
export PSM2_MQ_RNDV_HFI_WINDOW=4194304
|
||||
export PSM2_MQ_EAGER_SDMA_SZ=65536
|
||||
export PSM2_MQ_RNDV_HFI_THRESH=200000
|
||||
|
||||
mpirun ${HOSTFILE} -n ${RANKS} -ppn ${PPN} ${MPI_ENV} ${BINDIR}/mcexec -n ${PPN} ${NUMA} --enable-hfi1 --mpol-threshold=1M --stack-premap=4M,4G --extend-heap-by=8M --disable-sched-yield --mpol-shm-premap ${COMMAND}
|
||||
|
||||
@ -3,13 +3,19 @@
|
||||
/* Path of install directory for binary */
|
||||
#undef BINDIR
|
||||
|
||||
/* IHK build-id to confirm IHK and McKernel built at the same time are used */
|
||||
#undef BUILDID
|
||||
|
||||
/* whether mcoverlayfs is enabled */
|
||||
#undef ENABLE_MCOVERLAYFS
|
||||
|
||||
/* whether memdump feature is enabled */
|
||||
#undef ENABLE_MEMDUMP
|
||||
|
||||
/* whether mcoverlayfs is enabled */
|
||||
/* whether perf is enabled */
|
||||
#undef ENABLE_PERF
|
||||
|
||||
/* whether qlmpi is enabled */
|
||||
#undef ENABLE_QLMPI
|
||||
|
||||
/* whether rusage is enabled */
|
||||
|
||||
114
configure
vendored
114
configure
vendored
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for mckernel 0.9.0.
|
||||
# Generated by GNU Autoconf 2.69 for mckernel 1.5.1-knl+hfi.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='mckernel'
|
||||
PACKAGE_TARNAME='mckernel'
|
||||
PACKAGE_VERSION='0.9.0'
|
||||
PACKAGE_STRING='mckernel 0.9.0'
|
||||
PACKAGE_VERSION='1.5.1-knl+hfi'
|
||||
PACKAGE_STRING='mckernel 1.5.1-knl+hfi'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@ -645,6 +645,7 @@ TARGET
|
||||
UNAME_R
|
||||
KDIR
|
||||
ARCH
|
||||
BUILDID
|
||||
XCC
|
||||
FGREP
|
||||
EGREP
|
||||
@ -708,6 +709,7 @@ enable_dcfa
|
||||
enable_memdump
|
||||
enable_mcoverlayfs
|
||||
enable_rusage
|
||||
enable_perf
|
||||
enable_qlmpi
|
||||
with_uname_r
|
||||
'
|
||||
@ -1260,7 +1262,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures mckernel 0.9.0 to adapt to many kinds of systems.
|
||||
\`configure' configures mckernel 1.5.1-knl+hfi to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@ -1321,7 +1323,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of mckernel 0.9.0:";;
|
||||
short | recursive ) echo "Configuration of mckernel 1.5.1-knl+hfi:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@ -1333,6 +1335,7 @@ Optional Features:
|
||||
--enable-memdump enable dumping memory and analyzing a dump
|
||||
--enable-mcoverlayfs enable mcoverlayfs implementation
|
||||
--enable-rusage enable rusage implementation
|
||||
--enable-perf enable perf_event implementation
|
||||
--enable-qlmpi enable qlmpi implementation
|
||||
|
||||
Optional Packages:
|
||||
@ -1428,7 +1431,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
mckernel configure 0.9.0
|
||||
mckernel configure 1.5.1-knl+hfi
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@ -1726,7 +1729,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by mckernel $as_me 0.9.0, which was
|
||||
It was created by mckernel $as_me 1.5.1-knl+hfi, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@ -2079,12 +2082,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
|
||||
|
||||
|
||||
IHK_VERSION=0.9.0
|
||||
MCKERNEL_VERSION=0.9.0
|
||||
DCFA_VERSION=0.9.0
|
||||
IHK_RELEASE_DATE=2013-11-18
|
||||
MCKERNEL_RELEASE_DATE=2013-11-18
|
||||
DCFA_RELEASE_DATE=2013-11-18
|
||||
IHK_VERSION=1.5.1-knl+hfi
|
||||
MCKERNEL_VERSION=1.5.1-knl+hfi
|
||||
DCFA_VERSION=DCFA_VERSION_m4
|
||||
IHK_RELEASE_DATE=2019-05-14
|
||||
MCKERNEL_RELEASE_DATE=2019-05-14
|
||||
DCFA_RELEASE_DATE=DCFA_RELEASE_DATE_m4
|
||||
|
||||
|
||||
|
||||
@ -3568,6 +3571,14 @@ else
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-perf was given.
|
||||
if test "${enable_perf+set}" = set; then :
|
||||
enableval=$enable_perf; ENABLE_PERF=$enableval
|
||||
else
|
||||
ENABLE_PERF=yes
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-qlmpi was given.
|
||||
if test "${enable_qlmpi+set}" = set; then :
|
||||
enableval=$enable_qlmpi; ENABLE_QLMPI=$enableval
|
||||
@ -4275,7 +4286,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
builtin-mic)
|
||||
@ -4292,7 +4303,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/attached/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/attached/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
builtin-x86)
|
||||
@ -4309,7 +4320,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/attached/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
smp-x86)
|
||||
@ -4341,7 +4352,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-x86/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
smp-arm64)
|
||||
@ -4366,11 +4377,14 @@ case $WITH_TARGET in
|
||||
if test "X$ETCDIR" = X; then
|
||||
ETCDIR="$prefix/etc"
|
||||
fi
|
||||
if test "X$INCLUDEDIR" = X; then
|
||||
INCLUDEDIR="$prefix/include"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-arm64/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
@ -4961,6 +4975,17 @@ else
|
||||
$as_echo "$as_me: rusage is disabled" >&6;}
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_PERF" = "xyes" ; then
|
||||
|
||||
$as_echo "#define ENABLE_PERF 1" >>confdefs.h
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is enabled" >&5
|
||||
$as_echo "$as_me: perf is enabled" >&6;}
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: perf is disabled" >&5
|
||||
$as_echo "$as_me: perf is disabled" >&6;}
|
||||
fi
|
||||
|
||||
if test "x$MCKERNEL_INCDIR" != "x" ; then
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@ -4988,6 +5013,20 @@ cat >>confdefs.h <<_ACEOF
|
||||
_ACEOF
|
||||
|
||||
|
||||
ABS_SRCDIR=$( cd $( dirname $0 ); pwd )
|
||||
IHK_ABS_SRCDIR=${ABS_SRCDIR}/../ihk
|
||||
BUILDID=$( cd $IHK_ABS_SRCDIR; if [ ! -d .git ]; then echo $IHK_VERSION; else bash -c 'git rev-list -1 HEAD | cut -c1-8'; fi )
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: BUILDID=$BUILDID" >&5
|
||||
$as_echo "$as_me: BUILDID=$BUILDID" >&6;}
|
||||
if test "x$BUILDID" != "x" ; then
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define BUILDID "$BUILDID"
|
||||
_ACEOF
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -5021,9 +5060,14 @@ ac_config_headers="$ac_config_headers config.h"
|
||||
|
||||
# POSTK_DEBUG_ARCH_DEP_37
|
||||
# AC_CONFIG_FILES arch dependfiles separate
|
||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcoverlay-destroy-smp-x86.sh arch/x86/tools/mcoverlay-create-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in"
|
||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/mpimcexec arch/x86_64/tools/mpimcexec.1:arch/x86_64/tools/mpimcexec.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/Makefile"
|
||||
|
||||
|
||||
if test "$TARGET" = "smp-x86"; then
|
||||
ac_config_files="$ac_config_files arch/x86_64/kernel/Makefile.arch"
|
||||
|
||||
fi
|
||||
|
||||
if test "$TARGET" = "smp-arm64"; then
|
||||
ac_config_files="$ac_config_files kernel/config/config.smp-arm64 arch/arm64/kernel/vdso/Makefile arch/arm64/kernel/Makefile.arch"
|
||||
|
||||
@ -5541,7 +5585,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by mckernel $as_me 0.9.0, which was
|
||||
This file was extended by mckernel $as_me 1.5.1-knl+hfi, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@ -5603,7 +5647,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
mckernel config.status 0.9.0
|
||||
mckernel config.status 1.5.1-knl+hfi
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
@ -5741,18 +5785,22 @@ do
|
||||
"kernel/Makefile") CONFIG_FILES="$CONFIG_FILES kernel/Makefile" ;;
|
||||
"kernel/Makefile.build") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.build" ;;
|
||||
"kernel/include/swapfmt.h") CONFIG_FILES="$CONFIG_FILES kernel/include/swapfmt.h" ;;
|
||||
"arch/x86/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-attached-mic.sh" ;;
|
||||
"arch/x86/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-attached-mic.sh" ;;
|
||||
"arch/x86/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-builtin-x86.sh" ;;
|
||||
"arch/x86/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-smp-x86.sh" ;;
|
||||
"arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;;
|
||||
"arch/x86/tools/mcoverlay-destroy-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcoverlay-destroy-smp-x86.sh" ;;
|
||||
"arch/x86/tools/mcoverlay-create-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcoverlay-create-smp-x86.sh" ;;
|
||||
"arch/x86/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/eclair-dump-backtrace.exp" ;;
|
||||
"arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;;
|
||||
"arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;;
|
||||
"arch/x86/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.service" ;;
|
||||
"arch/x86/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.in" ;;
|
||||
"arch/x86_64/tools/mcreboot-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-attached-mic.sh" ;;
|
||||
"arch/x86_64/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-attached-mic.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-builtin-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcstop+release-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcoverlay-create-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcoverlay-create-smp-x86.sh" ;;
|
||||
"arch/x86_64/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/eclair-dump-backtrace.exp" ;;
|
||||
"arch/x86_64/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-builtin-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;;
|
||||
"arch/x86_64/tools/mpimcexec") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mpimcexec" ;;
|
||||
"arch/x86_64/tools/mpimcexec.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mpimcexec.1:arch/x86_64/tools/mpimcexec.1in" ;;
|
||||
"arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;;
|
||||
"arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;;
|
||||
"tools/mcstat/Makefile") CONFIG_FILES="$CONFIG_FILES tools/mcstat/Makefile" ;;
|
||||
"arch/x86_64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/x86_64/kernel/Makefile.arch" ;;
|
||||
"kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;;
|
||||
"arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;;
|
||||
"arch/arm64/kernel/Makefile.arch") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/Makefile.arch" ;;
|
||||
|
||||
80
configure.ac
80
configure.ac
@ -1,11 +1,9 @@
|
||||
# configure.ac COPYRIGHT FUJITSU LIMITED 2015-2016
|
||||
AC_PREREQ(2.63)
|
||||
m4_define([IHK_VERSION_m4],[0.9.0])dnl
|
||||
m4_define([MCKERNEL_VERSION_m4],[0.9.0])dnl
|
||||
m4_define([DCFA_VERSION_m4],[0.9.0])dnl
|
||||
m4_define([IHK_RELEASE_DATE_m4],[2013-11-18])dnl
|
||||
m4_define([MCKERNEL_RELEASE_DATE_m4],[2013-11-18])dnl
|
||||
m4_define([DCFA_RELEASE_DATE_m4],[2013-11-18])dnl
|
||||
m4_define([IHK_VERSION_m4],[1.5.1-knl+hfi])dnl
|
||||
m4_define([MCKERNEL_VERSION_m4],[1.5.1-knl+hfi])dnl
|
||||
m4_define([IHK_RELEASE_DATE_m4],[2019-05-14])dnl
|
||||
m4_define([MCKERNEL_RELEASE_DATE_m4],[2019-05-14])dnl
|
||||
|
||||
AC_INIT([mckernel], MCKERNEL_VERSION_m4)
|
||||
|
||||
@ -134,6 +132,12 @@ AC_ARG_ENABLE([rusage],
|
||||
[ENABLE_RUSAGE=$enableval],
|
||||
[ENABLE_RUSAGE=yes])
|
||||
|
||||
AC_ARG_ENABLE([perf],
|
||||
AC_HELP_STRING([--enable-perf],
|
||||
[enable perf_event implementation]),
|
||||
[ENABLE_PERF=$enableval],
|
||||
[ENABLE_PERF=yes])
|
||||
|
||||
AC_ARG_ENABLE([qlmpi],
|
||||
AC_HELP_STRING([--enable-qlmpi],
|
||||
[enable qlmpi implementation]),
|
||||
@ -225,7 +229,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
builtin-mic)
|
||||
@ -242,7 +246,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/attached/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/attached/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
builtin-x86)
|
||||
@ -259,7 +263,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/attached/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
smp-x86)
|
||||
@ -291,7 +295,7 @@ case $WITH_TARGET in
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-x86/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
smp-arm64)
|
||||
@ -316,11 +320,14 @@ case $WITH_TARGET in
|
||||
if test "X$ETCDIR" = X; then
|
||||
ETCDIR="$prefix/etc"
|
||||
fi
|
||||
if test "X$INCLUDEDIR" = X; then
|
||||
INCLUDEDIR="$prefix/include"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-arm64/man"
|
||||
MANDIR="$prefix/share/man"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
@ -451,7 +458,7 @@ else
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_QLMPI" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_QLMPI],[1],[whether mcoverlayfs is enabled])
|
||||
AC_DEFINE([ENABLE_QLMPI],[1],[whether qlmpi is enabled])
|
||||
AC_MSG_NOTICE([qlmpi is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([qlmpi is disabled])
|
||||
@ -475,6 +482,13 @@ else
|
||||
AC_MSG_NOTICE([rusage is disabled])
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_PERF" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_PERF],[1],[whether perf is enabled])
|
||||
AC_MSG_NOTICE([perf is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([perf is disabled])
|
||||
fi
|
||||
|
||||
if test "x$MCKERNEL_INCDIR" != "x" ; then
|
||||
AC_DEFINE_UNQUOTED(MCKERNEL_INCDIR,"$MCKERNEL_INCDIR",[McKernel specific headers])
|
||||
fi
|
||||
@ -486,6 +500,15 @@ fi
|
||||
AC_DEFINE_UNQUOTED(BINDIR,"$BINDIR",[Path of install directory for binary])
|
||||
AC_DEFINE_UNQUOTED(SBINDIR,"$SBINDIR",[Path of install directory for system binary])
|
||||
|
||||
ABS_SRCDIR=$( cd $( dirname $0 ); pwd )
|
||||
IHK_ABS_SRCDIR=${ABS_SRCDIR}/../ihk
|
||||
BUILDID=$( cd $IHK_ABS_SRCDIR; if @<:@ ! -d .git @:>@; then echo $IHK_VERSION; else bash -c 'git rev-list -1 HEAD | cut -c1-8'; fi )
|
||||
AC_MSG_NOTICE([BUILDID=$BUILDID])
|
||||
if test "x$BUILDID" != "x" ; then
|
||||
AC_DEFINE_UNQUOTED(BUILDID,"$BUILDID",[IHK build-id to confirm IHK and McKernel built at the same time are used])
|
||||
fi
|
||||
AC_SUBST(BUILDID)
|
||||
|
||||
AC_SUBST(CC)
|
||||
AC_SUBST(XCC)
|
||||
AC_SUBST(ARCH)
|
||||
@ -535,20 +558,29 @@ AC_CONFIG_FILES([
|
||||
kernel/Makefile
|
||||
kernel/Makefile.build
|
||||
kernel/include/swapfmt.h
|
||||
arch/x86/tools/mcreboot-attached-mic.sh
|
||||
arch/x86/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot-smp-x86.sh
|
||||
arch/x86/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86/tools/mcoverlay-destroy-smp-x86.sh
|
||||
arch/x86/tools/mcoverlay-create-smp-x86.sh
|
||||
arch/x86/tools/eclair-dump-backtrace.exp
|
||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||
arch/x86/tools/irqbalance_mck.service
|
||||
arch/x86/tools/irqbalance_mck.in
|
||||
arch/x86_64/tools/mcreboot-attached-mic.sh
|
||||
arch/x86_64/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86_64/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86_64/tools/mcreboot-smp-x86.sh
|
||||
arch/x86_64/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh
|
||||
arch/x86_64/tools/mcoverlay-create-smp-x86.sh
|
||||
arch/x86_64/tools/eclair-dump-backtrace.exp
|
||||
arch/x86_64/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in
|
||||
arch/x86_64/tools/mpimcexec
|
||||
arch/x86_64/tools/mpimcexec.1:arch/x86_64/tools/mpimcexec.1in
|
||||
arch/x86_64/tools/irqbalance_mck.service
|
||||
arch/x86_64/tools/irqbalance_mck.in
|
||||
tools/mcstat/Makefile
|
||||
])
|
||||
|
||||
if test "$TARGET" = "smp-x86"; then
|
||||
AC_CONFIG_FILES([
|
||||
arch/x86_64/kernel/Makefile.arch
|
||||
])
|
||||
fi
|
||||
|
||||
if test "$TARGET" = "smp-arm64"; then
|
||||
AC_CONFIG_FILES([
|
||||
kernel/config/config.smp-arm64
|
||||
|
||||
@ -5,6 +5,10 @@
|
||||
#define IHK_MAX_NUM_NUMA_NODES 1024
|
||||
#define IHK_MAX_NUM_CPUS 1024
|
||||
|
||||
#define IHK_OS_PGSIZE_4KB 0
|
||||
#define IHK_OS_PGSIZE_2MB 1
|
||||
#define IHK_OS_PGSIZE_1GB 2
|
||||
|
||||
struct mckernel_rusage {
|
||||
unsigned long memory_stat_rss[IHK_MAX_NUM_PGSIZES];
|
||||
unsigned long memory_stat_mapped_file[IHK_MAX_NUM_PGSIZES];
|
||||
|
||||
@ -91,6 +91,7 @@ struct program_image_section {
|
||||
|
||||
struct get_cpu_set_arg {
|
||||
int nr_processes;
|
||||
int *process_rank;
|
||||
void *cpu_set;
|
||||
size_t cpu_set_size; // Size in bytes
|
||||
int *target_core;
|
||||
@ -109,6 +110,8 @@ typedef unsigned long __cpu_set_unit;
|
||||
#define MPOL_NO_BSS 0x04
|
||||
#define MPOL_SHM_PREMAP 0x08
|
||||
|
||||
#define MCEXEC_HFI1 0x01
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
int status;
|
||||
@ -137,10 +140,14 @@ struct program_load_desc {
|
||||
unsigned long envs_len;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
unsigned long mcexec_flags;
|
||||
unsigned long mpol_flags;
|
||||
unsigned long mpol_threshold;
|
||||
unsigned long heap_extension;
|
||||
long stack_premap;
|
||||
unsigned long mpol_bind_mask;
|
||||
int nr_processes;
|
||||
int process_rank;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
|
||||
int profile;
|
||||
@ -187,6 +194,7 @@ struct syscall_response {
|
||||
long ret;
|
||||
unsigned long fault_address;
|
||||
unsigned long fault_reason;
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
struct syscall_ret_desc {
|
||||
|
||||
@ -9,13 +9,22 @@ IHK_BASE=$(src)/../../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
# POSTK_DEBUG_ARCH_DEP_1, arch depend "-mcmodel"
|
||||
# POSTK_DEBUG_ARCH_DEP_83, arch depend translate_rva_to_rpa() move
|
||||
ifeq ($(ARCH), arm64)
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -I$(src)/arch/$(ARCH)/include -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@
|
||||
else
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../../kernel/include -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ -I@abs_builddir@/../../../
|
||||
endif
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include \
|
||||
-I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) \
|
||||
-I$(IHK_BASE)/ikc/include \
|
||||
-I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) \
|
||||
-I$(IHK_BASE)/include \
|
||||
-I$(IHK_BASE)/include/arch/$(ARCH) \
|
||||
-I$(src)/../../include \
|
||||
-I$(src)/arch/$(ARCH)/include \
|
||||
-I@abs_builddir@ \
|
||||
-I@abs_builddir@/../../../ \
|
||||
-I$(src)/../../../kernel/include \
|
||||
-DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
|
||||
|
||||
# depending arch
|
||||
include @abs_builddir@/arch/$(ARCH)/Makefile
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
mcctrl-y += sysfs.o sysfs_files.o arch/$(ARCH)/archdeps.o
|
||||
|
||||
@ -1 +1 @@
|
||||
# dummy file
|
||||
ccflags-y += -mno-red-zone -mcmodel=kernel
|
||||
|
||||
@ -327,6 +327,14 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
|
||||
pgsize = 1UL << offsh;
|
||||
rpa = pt[ix] & ((1UL << 52) - 1) & ~(pgsize - 1);
|
||||
rpa |= rva & (pgsize - 1);
|
||||
|
||||
/* For GB pages, just report regular 2MB page */
|
||||
if (offsh == 30) {
|
||||
pgsize = 1UL << 21;
|
||||
dprintk("%s: GB page translated 0x%lx -> 0x%lx, pgsize: %lu\n",
|
||||
__FUNCTION__, rva, rpa, pgsize);
|
||||
}
|
||||
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
|
||||
error = 0;
|
||||
|
||||
@ -190,7 +190,11 @@ static long mcexec_prepare_image(ihk_os_t os,
|
||||
|
||||
pdesc->status = 0;
|
||||
mb();
|
||||
mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||
ret = mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||
if(ret < 0) {
|
||||
printk("%s: ERROR mcctrl_ikc_send: %d\n", __FUNCTION__, ret);
|
||||
goto put_and_free_out;
|
||||
}
|
||||
|
||||
ret = wait_event_interruptible(ppd->wq_prepare, pdesc->status);
|
||||
if (ret < 0) {
|
||||
@ -363,7 +367,7 @@ static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
|
||||
}
|
||||
|
||||
int mcexec_close_exec(ihk_os_t os);
|
||||
int mcexec_destroy_per_process_data(ihk_os_t os);
|
||||
int mcexec_destroy_per_process_data(ihk_os_t os, int pid);
|
||||
|
||||
static void release_handler(ihk_os_t os, void *param)
|
||||
{
|
||||
@ -383,7 +387,7 @@ static void release_handler(ihk_os_t os, void *param)
|
||||
|
||||
mcexec_close_exec(os);
|
||||
|
||||
mcexec_destroy_per_process_data(os);
|
||||
mcexec_destroy_per_process_data(os, info->pid);
|
||||
|
||||
memset(&isp, '\0', sizeof isp);
|
||||
isp.msg = SCD_MSG_CLEANUP_PROCESS;
|
||||
@ -431,6 +435,7 @@ static long mcexec_start_image(ihk_os_t os,
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct mcos_handler_info *info;
|
||||
int ret = 0;
|
||||
|
||||
desc = kmalloc(sizeof(*desc), GFP_KERNEL);
|
||||
if (!desc) {
|
||||
@ -441,17 +446,18 @@ static long mcexec_start_image(ihk_os_t os,
|
||||
|
||||
if (copy_from_user(desc, udesc,
|
||||
sizeof(struct program_load_desc))) {
|
||||
kfree(desc);
|
||||
return -EFAULT;
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
info = new_mcos_handler_info(os, file);
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_64 /* host process is SIGKILLed fix. */
|
||||
if (info == NULL) {
|
||||
kfree(desc);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
#endif /* POSTK_DEBUG_TEMP_FIX_64 */
|
||||
|
||||
info->pid = desc->pid;
|
||||
info->cpu = desc->cpu;
|
||||
ihk_os_register_release_handler(file, release_handler, info);
|
||||
@ -467,10 +473,14 @@ static long mcexec_start_image(ihk_os_t os,
|
||||
isp.ref = desc->cpu;
|
||||
isp.arg = desc->rprocess;
|
||||
|
||||
mcctrl_ikc_send(os, desc->cpu, &isp);
|
||||
ret = mcctrl_ikc_send(os, desc->cpu, &isp);
|
||||
if (ret < 0) {
|
||||
printk("%s: error: sending IKC msg\n", __FUNCTION__);
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(desc);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(signalq);
|
||||
@ -628,6 +638,7 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
|
||||
|
||||
pli->task = current;
|
||||
pli->ready = 0;
|
||||
pli->timeout = 0;
|
||||
init_waitqueue_head(&pli->pli_wq);
|
||||
|
||||
pli_next = NULL;
|
||||
@ -681,6 +692,7 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
|
||||
wake_up_interruptible(&pli_next->pli_wq);
|
||||
/* Reset process counter */
|
||||
pe->nr_processes_left = pe->nr_processes;
|
||||
pe->process_rank = 0;
|
||||
}
|
||||
|
||||
/* Wait for the rest if not the last or if the last but
|
||||
@ -689,11 +701,50 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
|
||||
dprintk("%s: pid: %d, waiting in list\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
mutex_unlock(&pe->lock);
|
||||
ret = wait_event_interruptible(pli->pli_wq, pli->ready);
|
||||
/* Timeout period: 10 secs + (#procs * 0.1sec) */
|
||||
ret = wait_event_interruptible_timeout(pli->pli_wq,
|
||||
pli->ready,
|
||||
msecs_to_jiffies(10000 + req.nr_processes * 100));
|
||||
mutex_lock(&pe->lock);
|
||||
if (ret != 0) {
|
||||
|
||||
/* First timeout task? Wake up everyone else,
|
||||
* but tell them we timed out */
|
||||
if (ret == 0) {
|
||||
printk("%s: error: pid: %d, timed out, waking everyone\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
while (!list_empty(&pe->pli_list)) {
|
||||
pli_next = list_first_entry(&pe->pli_list,
|
||||
struct process_list_item, list);
|
||||
list_del(&pli_next->list);
|
||||
pli_next->ready = 1;
|
||||
pli_next->timeout = 1;
|
||||
wake_up_interruptible(&pli_next->pli_wq);
|
||||
}
|
||||
|
||||
/* Reset process counter to start state */
|
||||
pe->nr_processes = -1;
|
||||
ret = -ETIMEDOUT;
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
/* Interrupted or woken up by someone else due to time out? */
|
||||
if (ret < 0 || pli->timeout) {
|
||||
if (ret > 0) {
|
||||
printk("%s: error: pid: %d, job startup timed out\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
ret = -ETIMEDOUT;
|
||||
}
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
/* Incorrect wakeup state? */
|
||||
if (!pli->ready) {
|
||||
printk("%s: error: pid: %d, not ready but woken?\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
ret = -EINVAL;
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
dprintk("%s: pid: %d, woken up\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
}
|
||||
@ -873,6 +924,15 @@ next_cpu:
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
/* Copy rank */
|
||||
if (copy_to_user(req.process_rank, &pe->process_rank,
|
||||
sizeof(int))) {
|
||||
printk("%s: error copying process rank to user\n",
|
||||
__FUNCTION__);
|
||||
ret = -EINVAL;
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
/* mcexec NUMA to bind to */
|
||||
mcexec_linux_numa = cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu));
|
||||
if (copy_to_user(req.mcexec_linux_numa, &mcexec_linux_numa,
|
||||
@ -920,6 +980,7 @@ next_cpu:
|
||||
}
|
||||
/* Otherwise wake up next process in list */
|
||||
else {
|
||||
++pe->process_rank;
|
||||
pli_next = list_first_entry(&pe->pli_list,
|
||||
struct process_list_item, list);
|
||||
list_del(&pli_next->list);
|
||||
@ -1012,7 +1073,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* NOTE: per-process data is refcounted.
|
||||
* For every get call the user should call put. */
|
||||
struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||
@ -1142,7 +1202,7 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
|
||||
return -1;
|
||||
}
|
||||
|
||||
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n",
|
||||
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %lu\n",
|
||||
__FUNCTION__,
|
||||
packet->req.rtid,
|
||||
packet->req.ttid,
|
||||
@ -1197,8 +1257,8 @@ retry_alloc:
|
||||
|
||||
wqhln->packet = packet;
|
||||
wqhln->req = 1;
|
||||
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags);
|
||||
wake_up(&wqhln->wq_syscall);
|
||||
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags);
|
||||
|
||||
mcctrl_put_per_proc_data(ppd);
|
||||
|
||||
@ -1312,7 +1372,7 @@ retry_alloc:
|
||||
}
|
||||
|
||||
packet->req.valid = 0; /* ack */
|
||||
dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
|
||||
dprintk("%s: system call: %lu, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
|
||||
"args[3]: %lu, args[4]: %lu, args[5]: %lu\n",
|
||||
__FUNCTION__,
|
||||
packet->req.number,
|
||||
@ -1340,7 +1400,6 @@ retry_alloc:
|
||||
goto put_ppd_out;
|
||||
}
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_46 /* user area direct access fix. */
|
||||
if (copy_to_user(&req->cpu, &packet->ref, sizeof(req->cpu))) {
|
||||
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
|
||||
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
|
||||
@ -1348,9 +1407,6 @@ retry_alloc:
|
||||
ret = -EINVAL;
|
||||
goto put_ppd_out;
|
||||
}
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_46 */
|
||||
req->cpu = packet->ref;
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_46 */
|
||||
|
||||
ret = 0;
|
||||
goto put_ppd_out;
|
||||
@ -1441,7 +1497,7 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, desc.size, NULL, 0);
|
||||
#endif
|
||||
|
||||
dprintk("mcexec_load_syscall: %s (desc.size: %d)\n", rpm, desc.size);
|
||||
dprintk("mcexec_load_syscall: %p (desc.size: %lu)\n", rpm, desc.size);
|
||||
|
||||
if (copy_to_user((void *__user)desc.dest, rpm, desc.size)) {
|
||||
return -EFAULT;
|
||||
@ -1676,12 +1732,12 @@ int mcexec_create_per_process_data(ihk_os_t os)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mcexec_destroy_per_process_data(ihk_os_t os)
|
||||
int mcexec_destroy_per_process_data(ihk_os_t os, int pid)
|
||||
{
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct mcctrl_per_proc_data *ppd = NULL;
|
||||
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, pid);
|
||||
|
||||
if (ppd) {
|
||||
/* One for the reference and one for deallocation.
|
||||
@ -2427,7 +2483,9 @@ mcexec_terminate_thread(ihk_os_t os, unsigned long *param, struct file *file)
|
||||
mcctrl_delete_per_thread_data(ppd, tsk);
|
||||
__return_syscall(usrdata->os, packet, param[2], tid);
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||
(usrdata->channels + packet->ref)->c);
|
||||
(usrdata->ikc2linux[smp_processor_id()] ?
|
||||
usrdata->ikc2linux[smp_processor_id()] :
|
||||
usrdata->ikc2linux[0]));
|
||||
err:
|
||||
if(ppd)
|
||||
mcctrl_put_per_proc_data(ppd);
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/delay.h>
|
||||
#include "mcctrl.h"
|
||||
#include <ihk/ihk_host_user.h>
|
||||
|
||||
@ -169,6 +170,14 @@ error_cleanup_channels:
|
||||
int mcctrl_os_shutdown_notifier(int os_index)
|
||||
{
|
||||
if (os[os_index]) {
|
||||
/* Wait for os running */
|
||||
if (ihk_os_wait_for_status(os[os_index], IHK_OS_STATUS_RUNNING, 0, 200) != 0) {
|
||||
printk("IHK: OS does not become RUNNING in shutdown. Force shutdown.\n");
|
||||
/* send nmi to force shutdown */
|
||||
ihk_os_send_nmi(os[os_index], 3);
|
||||
mdelay(200);
|
||||
}
|
||||
|
||||
sysfsm_cleanup(os[os_index]);
|
||||
free_topology_info(os[os_index]);
|
||||
ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||
|
||||
@ -304,6 +304,7 @@ struct node_topology {
|
||||
|
||||
struct process_list_item {
|
||||
int ready;
|
||||
int timeout;
|
||||
struct task_struct *task;
|
||||
struct list_head list;
|
||||
wait_queue_head_t pli_wq;
|
||||
@ -313,6 +314,7 @@ struct mcctrl_part_exec {
|
||||
struct mutex lock;
|
||||
int nr_processes;
|
||||
int nr_processes_left;
|
||||
int process_rank;
|
||||
cpumask_t cpus_used;
|
||||
struct list_head pli_list;
|
||||
};
|
||||
|
||||
@ -1019,7 +1019,8 @@ static const struct procfs_entry tid_entry_stuff[] = {
|
||||
|
||||
static const struct procfs_entry pid_entry_stuff[] = {
|
||||
PROC_REG("auxv", S_IRUSR, NULL),
|
||||
PROC_REG("cgroup", S_IXUSR, NULL),
|
||||
/* Support the case where McKernel process retrieves its job-id under the Fujitsu TCS suite. */
|
||||
// PROC_REG("cgroup", S_IXUSR, NULL),
|
||||
// PROC_REG("clear_refs", S_IWUSR, NULL),
|
||||
PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL),
|
||||
|
||||
@ -222,6 +222,14 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
|
||||
pgsize = 1UL << offsh;
|
||||
rpa = pt[ix] & ((1UL << 52) - 1) & ~(pgsize - 1);
|
||||
rpa |= rva & (pgsize - 1);
|
||||
|
||||
/* For GB pages, just report regular 2MB page */
|
||||
if (offsh == 30) {
|
||||
pgsize = 1UL << 21;
|
||||
dprintk("%s: GB page translated 0x%lx -> 0x%lx, pgsize: %lu\n",
|
||||
__FUNCTION__, rva, rpa, pgsize);
|
||||
}
|
||||
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
|
||||
error = 0;
|
||||
@ -799,7 +807,7 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
|
||||
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
|
||||
if (!packet) {
|
||||
error = -ENOENT;
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
printk("%s: no packet registered for TID %d\n",
|
||||
__FUNCTION__, task_pid_vnr(current));
|
||||
goto put_and_out;
|
||||
@ -1174,6 +1182,7 @@ struct pager_create_result {
|
||||
int maxprot;
|
||||
uint32_t flags;
|
||||
size_t size;
|
||||
char path[PATH_MAX];
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -1192,6 +1201,33 @@ enum {
|
||||
MF_END
|
||||
};
|
||||
|
||||
static int pager_get_path(struct file *file, char *path) {
|
||||
int error = 0;
|
||||
char *pathbuf, *fullpath;
|
||||
|
||||
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
|
||||
if (!pathbuf) {
|
||||
printk("%s: ERROR: allocating path\n", __FUNCTION__);
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fullpath = d_path(&file->f_path, pathbuf, PATH_MAX);
|
||||
if (!IS_ERR(fullpath)) {
|
||||
memcpy(path, fullpath, strlen(fullpath));
|
||||
}
|
||||
else {
|
||||
path[0] = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
if (pathbuf) {
|
||||
kfree(pathbuf);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
{
|
||||
ihk_device_t dev = ihk_os_to_dev(os);
|
||||
@ -1286,7 +1322,10 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
|
||||
dprintk("%s: filename: %s, premap & zerofill\n",
|
||||
__FUNCTION__, fullpath);
|
||||
}
|
||||
else if (strstr(fullpath, "libmpi") != NULL) {
|
||||
else if (strstr(fullpath, "libmpi") ||
|
||||
strstr(fullpath, "libiomp") ||
|
||||
strstr(fullpath, "libpthread") ||
|
||||
strstr(fullpath, "libc.so")) {
|
||||
mf_flags = MF_PREFETCH;
|
||||
dprintk("%s: filename: %s, prefetch\n",
|
||||
__FUNCTION__, fullpath);
|
||||
@ -1325,6 +1364,7 @@ found:
|
||||
phys = ihk_device_map_memory(dev, result_pa, sizeof(*resp));
|
||||
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
|
||||
if (!resp) {
|
||||
ihk_device_unmap_memory(dev, phys, sizeof(*resp));
|
||||
printk("%s: ERROR: invalid response structure address\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
@ -1335,10 +1375,18 @@ found:
|
||||
resp->maxprot = maxprot;
|
||||
resp->flags = mf_flags;
|
||||
resp->size = st.size;
|
||||
|
||||
error = pager_get_path(file, resp->path);
|
||||
if (error) {
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
|
||||
out_unmap:
|
||||
ihk_device_unmap_virtual(dev, resp, sizeof(*resp));
|
||||
ihk_device_unmap_memory(dev, phys, sizeof(*resp));
|
||||
|
||||
error = 0;
|
||||
out:
|
||||
if (newpager) {
|
||||
kfree(newpager);
|
||||
@ -1570,6 +1618,7 @@ struct pager_map_result {
|
||||
uintptr_t handle;
|
||||
int maxprot;
|
||||
int8_t padding[4];
|
||||
char path[PATH_MAX];
|
||||
};
|
||||
|
||||
static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
|
||||
@ -1624,20 +1673,22 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
|
||||
maxprot |= PROT_EXEC;
|
||||
}
|
||||
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
prot_and_flags = MAP_SHARED |
|
||||
(prot_and_flags & (MAP_POPULATE | MAP_LOCKED));
|
||||
|
||||
#define ANY_WHERE 0
|
||||
if (prot_and_flags & MAP_LOCKED) prot_and_flags |= MAP_POPULATE;
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
|
||||
va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot,
|
||||
MAP_SHARED | (prot_and_flags & (MAP_POPULATE | MAP_LOCKED)), pgoff);
|
||||
#endif
|
||||
prot_and_flags, pgoff);
|
||||
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
va = vm_mmap(file, ANY_WHERE, len, maxprot, MAP_SHARED |
|
||||
(prot_and_flags & (MAP_POPULATE | MAP_LOCKED)), pgoff << PAGE_SHIFT);
|
||||
#else
|
||||
va = vm_mmap(file, ANY_WHERE, len, maxprot,
|
||||
prot_and_flags, pgoff << PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
if (IS_ERR_VALUE(va)) {
|
||||
@ -1657,6 +1708,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
|
||||
phys = ihk_device_map_memory(dev, result_rpa, sizeof(*resp));
|
||||
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
|
||||
if (!resp) {
|
||||
ihk_device_unmap_memory(dev, phys, sizeof(*resp));
|
||||
printk("%s: ERROR: invalid response structure address\n",
|
||||
__FUNCTION__);
|
||||
error = -EINVAL;
|
||||
@ -1665,13 +1717,16 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
|
||||
|
||||
resp->handle = (uintptr_t)pager;
|
||||
resp->maxprot = maxprot;
|
||||
ihk_device_unmap_virtual(dev, resp, sizeof(*resp));
|
||||
ihk_device_unmap_memory(dev, phys, sizeof(*resp));
|
||||
|
||||
error = pager_get_path(file, resp->path);
|
||||
if (error) {
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
error = down_interruptible(&ppd->devobj_pager_lock);
|
||||
if (error) {
|
||||
error = -EINTR;
|
||||
goto out;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
list_add_tail(&pager->list, &ppd->devobj_pager_list);
|
||||
@ -1680,6 +1735,10 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
|
||||
pager = 0;
|
||||
error = 0;
|
||||
|
||||
out_unmap:
|
||||
ihk_device_unmap_virtual(dev, resp, sizeof(*resp));
|
||||
ihk_device_unmap_memory(dev, phys, sizeof(*resp));
|
||||
|
||||
out:
|
||||
if (file) {
|
||||
fput(file);
|
||||
@ -1854,6 +1913,7 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
|
||||
kfree(pager);
|
||||
|
||||
out:
|
||||
mcctrl_put_per_proc_data(ppd);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -2005,6 +2065,17 @@ void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
|
||||
/* Map response structure and notify offloading thread */
|
||||
res->ret = ret;
|
||||
res->stid = stid;
|
||||
res->private_data = 0;
|
||||
|
||||
/* Special case for open() to return private_data */
|
||||
if (packet->req.number == __NR_open && ret > 0) {
|
||||
struct fd f;
|
||||
f = fdget(ret);
|
||||
if (f.file) {
|
||||
res->private_data = f.file->private_data;
|
||||
fdput(f);
|
||||
}
|
||||
}
|
||||
|
||||
if (__notify_syscall_requester(os, packet, res) < 0) {
|
||||
printk("%s: WARNING: failed to notify PID %d\n",
|
||||
|
||||
@ -1207,7 +1207,7 @@ sysfsm_unlink(struct sysfsm_data *sdp, const char *path0, int flags)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!flags & SYSFS_UNLINK_KEEP_ANCESTOR) {
|
||||
if (!(flags & SYSFS_UNLINK_KEEP_ANCESTOR)) {
|
||||
cleanup_ancestor(dirp);
|
||||
}
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 243680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 263680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE_TMP),rhel)
|
||||
@ -33,7 +33,7 @@ endif
|
||||
|
||||
modules:
|
||||
ifneq ($(BUILD_MODULE),none)
|
||||
@(cd $(BUILD_MODULE); make modules)
|
||||
+@(cd $(BUILD_MODULE); make modules)
|
||||
endif
|
||||
|
||||
clean:
|
||||
|
||||
@ -10,16 +10,16 @@ MANDIR=@MANDIR@
|
||||
MCKERNEL_INCDIR=@MCKERNEL_INCDIR@
|
||||
MCKERNEL_LIBDIR=@MCKERNEL_LIBDIR@
|
||||
KDIR ?= @KDIR@
|
||||
CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH}
|
||||
ARCH=@ARCH@
|
||||
CFLAGS=-Wall -O -I. -I$(VPATH)/arch/${ARCH} -I${IHKDIR} -I@abs_builddir@/../../../ihk/linux/include
|
||||
LDFLAGS=@LDFLAGS@
|
||||
RPATH=$(shell echo $(LDFLAGS)|awk '{for(i=1;i<=NF;i++){if($$i~/^-L/){w=$$i;sub(/^-L/,"-Wl,-rpath,",w);print w}}}')
|
||||
VPATH=@abs_srcdir@
|
||||
TARGET=mcexec libsched_yield ldump2mcdump.so
|
||||
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
|
||||
LIBS=@LIBS@
|
||||
ARCH=@ARCH@
|
||||
IHKDIR ?= $(VPATH)/../../../ihk/linux/include/
|
||||
MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread
|
||||
MCEXEC_LIBS=-lmcexec -lrt -lnuma -pthread -L@abs_builddir@/../../../ihk/linux/user -lihk -Wl,-rpath,$(MCKERNEL_LIBDIR)
|
||||
ENABLE_QLMPI=@ENABLE_QLMPI@
|
||||
|
||||
ifeq ($(ENABLE_QLMPI),yes)
|
||||
@ -40,10 +40,10 @@ mcexec: mcexec.c libmcexec.a
|
||||
# POSTK_DEBUG_ARCH_DEP_34, eclair arch depend separate.
|
||||
ifeq ($(ARCH), arm64)
|
||||
eclair: eclair.c arch/$(ARCH)/arch-eclair.c
|
||||
$(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include -I${IHKDIR} $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
$(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
else
|
||||
eclair: eclair.c
|
||||
$(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS)
|
||||
eclair: eclair.c arch/$(ARCH)/arch-eclair.c
|
||||
$(CC) -I.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
endif
|
||||
|
||||
ldump2mcdump.so: ldump2mcdump.c
|
||||
@ -53,7 +53,7 @@ libsched_yield: libsched_yield.c
|
||||
$(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl
|
||||
|
||||
libmcexec.a::
|
||||
(cd arch/${ARCH}; make)
|
||||
+(cd arch/${ARCH}; $(MAKE))
|
||||
|
||||
libqlmpi.so: qlmpilib.c
|
||||
$(MCC) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $<
|
||||
@ -77,18 +77,19 @@ ql_talker: ql_talker.o
|
||||
$(CC) $^ $(CFLAGS) -o $@
|
||||
|
||||
clean::
|
||||
(cd arch/${ARCH}; make clean)
|
||||
(cd arch/${ARCH}; $(MAKE) clean)
|
||||
$(RM) $(TARGET) *.o
|
||||
|
||||
.PHONY: all clean install
|
||||
|
||||
install::
|
||||
(cd arch/${ARCH}; make install)
|
||||
(cd arch/${ARCH}; $(MAKE) install)
|
||||
mkdir -p -m 755 $(BINDIR)
|
||||
install -m 755 mcexec $(BINDIR)
|
||||
mkdir -p -m 755 $(MCKERNEL_LIBDIR)
|
||||
install -m 755 ldump2mcdump.so $(MCKERNEL_LIBDIR)
|
||||
install -m 755 libsched_yield.so.1.0.0 $(MCKERNEL_LIBDIR)
|
||||
mkdir -p -m 755 $(MANDIR)/man1
|
||||
install -m 644 mcexec.1 $(MANDIR)/man1/mcexec.1
|
||||
ifeq ($(ENABLE_QLMPI),yes)
|
||||
install -m 644 ../include/qlmpilib.h $(MCKERNEL_INCDIR)
|
||||
|
||||
@ -9,12 +9,15 @@ LIBS=@LIBS@
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
../../libmcexec.a: archdep.o
|
||||
$(AR) cr ../../libmcexec.a archdep.o
|
||||
../../libmcexec.a: archdep.o arch_syscall.o
|
||||
$(AR) cr ../../libmcexec.a archdep.o arch_syscall.o
|
||||
|
||||
archdep.o: archdep.S
|
||||
$(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $<
|
||||
|
||||
arch_syscall.o: arch_syscall.c
|
||||
$(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $<
|
||||
|
||||
clean:
|
||||
$(RM) $(TARGET) *.o
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user