From 9989f41fd32b60dbfea47dea20a0c02593d757bc Mon Sep 17 00:00:00 2001 From: Takayuki Okamoto Date: Tue, 5 Sep 2017 15:06:27 +0900 Subject: [PATCH] add arm64 support - add arm64 dependent codes with GICv3 and SVE support - fix bugs based on architecture separation requests --- Makefile.in | 8 +- arch/arm64/kernel/Makefile.arch | 27 + arch/arm64/kernel/assert.c | 52 + arch/arm64/kernel/cache.S | 39 + arch/arm64/kernel/context.c | 191 + arch/arm64/kernel/copy_template.S | 194 + arch/arm64/kernel/coredump.c | 35 + arch/arm64/kernel/cpu.c | 1629 +++++++++ arch/arm64/kernel/cpufeature.c | 1005 ++++++ arch/arm64/kernel/cputable.c | 14 + arch/arm64/kernel/debug-monitors.c | 110 + arch/arm64/kernel/entry-fpsimd.S | 126 + arch/arm64/kernel/entry.S | 558 +++ arch/arm64/kernel/fault.c | 289 ++ arch/arm64/kernel/fpsimd.c | 325 ++ arch/arm64/kernel/gencore.c | 463 +++ arch/arm64/kernel/head.S | 805 +++++ arch/arm64/kernel/hw_breakpoint.c | 409 +++ arch/arm64/kernel/hyp-stub.S | 58 + arch/arm64/kernel/include/arch-bitops.h | 19 + arch/arm64/kernel/include/arch-futex.h | 137 + arch/arm64/kernel/include/arch-lock.h | 605 ++++ arch/arm64/kernel/include/arch-memory.h | 487 +++ arch/arm64/kernel/include/arch-perfctr.h | 72 + arch/arm64/kernel/include/arch-string.h | 13 + arch/arm64/kernel/include/arch-timer.h | 14 + arch/arm64/kernel/include/arch/auxvec.h | 7 + arch/arm64/kernel/include/arch/cpu.h | 103 + arch/arm64/kernel/include/arch/mm.h | 17 + arch/arm64/kernel/include/arch/mman.h | 37 + arch/arm64/kernel/include/arch/rusage.h | 60 + arch/arm64/kernel/include/arch/shm.h | 41 + arch/arm64/kernel/include/arm-gic-v2.h | 106 + arch/arm64/kernel/include/arm-gic-v3.h | 391 ++ arch/arm64/kernel/include/asm-offsets.h | 27 + arch/arm64/kernel/include/assembler.h | 147 + arch/arm64/kernel/include/cache.h | 7 + arch/arm64/kernel/include/cas.h | 32 + arch/arm64/kernel/include/compiler.h | 32 + arch/arm64/kernel/include/const.h | 23 + arch/arm64/kernel/include/context.h | 8 + arch/arm64/kernel/include/cpufeature.h | 191 + arch/arm64/kernel/include/cpuinfo.h | 34 + arch/arm64/kernel/include/cpulocal.h | 13 + arch/arm64/kernel/include/cputable.h | 12 + arch/arm64/kernel/include/cputype.h | 47 + arch/arm64/kernel/include/debug-monitors.h | 35 + arch/arm64/kernel/include/elf.h | 28 + arch/arm64/kernel/include/elfcore.h | 92 + arch/arm64/kernel/include/elfcoregpl.h | 98 + arch/arm64/kernel/include/elfnote.h | 60 + arch/arm64/kernel/include/errno.h | 112 + arch/arm64/kernel/include/esr.h | 180 + arch/arm64/kernel/include/fpsimd.h | 99 + arch/arm64/kernel/include/fpsimdmacros.h | 151 + arch/arm64/kernel/include/hw_breakpoint.h | 92 + arch/arm64/kernel/include/hwcap.h | 28 + arch/arm64/kernel/include/ihk/atomic.h | 363 ++ arch/arm64/kernel/include/ihk/context.h | 80 + arch/arm64/kernel/include/ihk/ikc.h | 14 + arch/arm64/kernel/include/ihk/types.h | 35 + arch/arm64/kernel/include/io.h | 99 + arch/arm64/kernel/include/irq.h | 70 + arch/arm64/kernel/include/irqflags.h | 31 + arch/arm64/kernel/include/linkage.h | 25 + arch/arm64/kernel/include/mmu_context.h | 22 + arch/arm64/kernel/include/pgtable-hwdef.h | 196 + arch/arm64/kernel/include/pgtable.h | 7 + arch/arm64/kernel/include/prctl.h | 17 + arch/arm64/kernel/include/psci.h | 68 + arch/arm64/kernel/include/ptrace.h | 198 ++ arch/arm64/kernel/include/registers.h | 129 + arch/arm64/kernel/include/rlimit.h | 96 + arch/arm64/kernel/include/signal.h | 409 +++ arch/arm64/kernel/include/smp.h | 23 + arch/arm64/kernel/include/stringify.h | 17 + arch/arm64/kernel/include/syscall_list.h | 148 + arch/arm64/kernel/include/sysreg.h | 339 ++ arch/arm64/kernel/include/thread_info.h | 100 + arch/arm64/kernel/include/traps.h | 27 + arch/arm64/kernel/include/vdso.h | 30 + arch/arm64/kernel/include/virt.h | 8 + arch/arm64/kernel/irq-gic-v2.c | 158 + arch/arm64/kernel/irq-gic-v3.c | 406 +++ arch/arm64/kernel/local.c | 87 + arch/arm64/kernel/memcpy.S | 78 + arch/arm64/kernel/memory.c | 3167 +++++++++++++++++ arch/arm64/kernel/memset.S | 220 ++ arch/arm64/kernel/mikc.c | 44 + arch/arm64/kernel/perfctr.c | 156 + arch/arm64/kernel/perfctr_armv8pmu.c | 653 ++++ arch/arm64/kernel/postk_print_sysreg.c | 311 ++ arch/arm64/kernel/proc-macros.S | 13 + arch/arm64/kernel/proc.S | 148 + arch/arm64/kernel/psci.c | 155 + arch/arm64/kernel/ptrace.c | 1006 ++++++ arch/arm64/kernel/smp.c | 22 + arch/arm64/kernel/syscall.c | 1869 ++++++++++ arch/arm64/kernel/trampoline.S | 56 + arch/arm64/kernel/traps.c | 177 + arch/arm64/kernel/vdso.c | 205 ++ arch/arm64/kernel/vdso.so.S | 32 + arch/arm64/kernel/vdso/Makefile.in | 123 + arch/arm64/kernel/vdso/gen_vdso_offsets.sh | 17 + arch/arm64/kernel/vdso/gettimeofday.c | 205 ++ arch/arm64/kernel/vdso/note.S | 28 + arch/arm64/kernel/vdso/sigreturn.S | 39 + arch/arm64/kernel/vdso/syscall.h | 15 + arch/arm64/kernel/vdso/vdso.lds.S | 96 + arch/x86/kernel/Makefile.arch | 3 + arch/x86/kernel/coredump.c | 59 + arch/x86/kernel/cpu.c | 74 + arch/x86/kernel/gencore.c | 2 + arch/x86/kernel/include/arch-futex.h | 66 + arch/x86/kernel/include/arch-memory.h | 7 + arch/x86/kernel/include/elf.h | 59 + arch/x86/kernel/include/elfcore.h | 2 + arch/x86/kernel/include/elfcoregpl.h | 2 + arch/x86/kernel/include/ihk/types.h | 8 + arch/x86/kernel/include/syscall_list.h | 4 + arch/x86/kernel/memory.c | 115 + arch/x86/kernel/perfctr.c | 53 + arch/x86/kernel/syscall.c | 19 + config.h.in | 3 + configure | 195 +- configure.ac | 89 +- executer/kernel/mcctrl/Makefile.in | 9 +- executer/kernel/mcctrl/arch/arm64/Makefile.in | 2 + executer/kernel/mcctrl/arch/arm64/archdeps.c | 316 ++ .../mcctrl/arch/arm64/include/archdeps.h | 18 + executer/kernel/mcctrl/arch/x86_64/archdeps.c | 103 + .../mcctrl/arch/x86_64/include/archdeps.h | 18 + executer/kernel/mcctrl/binfmt_mcexec.c | 18 + executer/kernel/mcctrl/control.c | 103 + executer/kernel/mcctrl/driver.c | 4 + executer/kernel/mcctrl/mcctrl.h | 37 + executer/kernel/mcctrl/procfs.c | 215 ++ executer/kernel/mcctrl/syscall.c | 185 + executer/kernel/mcctrl/sysfs_files.c | 129 + executer/user/Makefile.in | 11 +- executer/user/arch/arm64/Makefile.in | 23 + executer/user/arch/arm64/arch-eclair.c | 51 + executer/user/arch/arm64/arch_args.h | 126 + executer/user/arch/arm64/archdep.S | 16 + .../user/arch/arm64/include/arch-eclair.h | 24 + executer/user/arch/x86_64/arch-eclair.c | 101 + executer/user/arch/x86_64/arch_args.h | 4 + .../user/arch/x86_64/include/arch-eclair.h | 24 + executer/user/eclair.c | 139 + executer/user/eclair.h | 22 + executer/user/mcexec.c | 552 +++ kernel/Makefile.build.in | 13 +- kernel/ap.c | 1 + kernel/config/config.smp-arm64.in | 39 + kernel/config/smp-arm64_type1.lds | 50 + kernel/config/smp-arm64_type2.lds | 50 + kernel/config/smp-arm64_type3.lds | 50 + kernel/config/smp-arm64_type4.lds | 50 + kernel/devobj.c | 18 + kernel/fileobj.c | 1 + kernel/gencore.c | 499 +++ kernel/host.c | 1 + kernel/include/auxvec.h | 1 + kernel/include/elfcore.h | 119 + kernel/include/elfcoregpl.h | 67 + kernel/include/futex.h | 4 + kernel/include/lwk/compiler.h | 12 + kernel/include/memobj.h | 4 + kernel/include/process.h | 9 + kernel/include/syscall.h | 15 + kernel/include/xpmem.h | 5 + kernel/init.c | 27 + kernel/listeners.c | 1 + kernel/mem.c | 113 + kernel/process.c | 140 + kernel/procfs.c | 54 + kernel/syscall.c | 470 ++- kernel/xpmem.c | 23 + lib/bitops.c | 2 +- lib/include/bitops-__ffs.h | 2 +- lib/include/bitops-clear_bit.h | 2 +- lib/include/bitops-ffz.h | 2 +- lib/include/bitops-fls.h | 2 +- lib/include/bitops-set_bit.h | 2 +- lib/include/bitops.h | 2 +- lib/include/ihk/cpu.h | 15 + lib/include/ihk/debug.h | 16 + lib/include/ihk/perfctr.h | 15 + lib/include/mc_perf_event.h | 4 + lib/include/memory.h | 10 + lib/include/types.h | 1 + lib/vsprintf.c | 23 + 192 files changed, 26941 insertions(+), 34 deletions(-) create mode 100644 arch/arm64/kernel/Makefile.arch create mode 100644 arch/arm64/kernel/assert.c create mode 100644 arch/arm64/kernel/cache.S create mode 100644 arch/arm64/kernel/context.c create mode 100644 arch/arm64/kernel/copy_template.S create mode 100644 arch/arm64/kernel/coredump.c create mode 100644 arch/arm64/kernel/cpu.c create mode 100644 arch/arm64/kernel/cpufeature.c create mode 100644 arch/arm64/kernel/cputable.c create mode 100644 arch/arm64/kernel/debug-monitors.c create mode 100644 arch/arm64/kernel/entry-fpsimd.S create mode 100644 arch/arm64/kernel/entry.S create mode 100644 arch/arm64/kernel/fault.c create mode 100644 arch/arm64/kernel/fpsimd.c create mode 100644 arch/arm64/kernel/gencore.c create mode 100644 arch/arm64/kernel/head.S create mode 100644 arch/arm64/kernel/hw_breakpoint.c create mode 100644 arch/arm64/kernel/hyp-stub.S create mode 100644 arch/arm64/kernel/include/arch-bitops.h create mode 100644 arch/arm64/kernel/include/arch-futex.h create mode 100644 arch/arm64/kernel/include/arch-lock.h create mode 100644 arch/arm64/kernel/include/arch-memory.h create mode 100644 arch/arm64/kernel/include/arch-perfctr.h create mode 100644 arch/arm64/kernel/include/arch-string.h create mode 100644 arch/arm64/kernel/include/arch-timer.h create mode 100644 arch/arm64/kernel/include/arch/auxvec.h create mode 100644 arch/arm64/kernel/include/arch/cpu.h create mode 100644 arch/arm64/kernel/include/arch/mm.h create mode 100644 arch/arm64/kernel/include/arch/mman.h create mode 100644 arch/arm64/kernel/include/arch/rusage.h create mode 100644 arch/arm64/kernel/include/arch/shm.h create mode 100644 arch/arm64/kernel/include/arm-gic-v2.h create mode 100644 arch/arm64/kernel/include/arm-gic-v3.h create mode 100644 arch/arm64/kernel/include/asm-offsets.h create mode 100644 arch/arm64/kernel/include/assembler.h create mode 100644 arch/arm64/kernel/include/cache.h create mode 100644 arch/arm64/kernel/include/cas.h create mode 100644 arch/arm64/kernel/include/compiler.h create mode 100644 arch/arm64/kernel/include/const.h create mode 100644 arch/arm64/kernel/include/context.h create mode 100644 arch/arm64/kernel/include/cpufeature.h create mode 100644 arch/arm64/kernel/include/cpuinfo.h create mode 100644 arch/arm64/kernel/include/cpulocal.h create mode 100644 arch/arm64/kernel/include/cputable.h create mode 100644 arch/arm64/kernel/include/cputype.h create mode 100644 arch/arm64/kernel/include/debug-monitors.h create mode 100644 arch/arm64/kernel/include/elf.h create mode 100644 arch/arm64/kernel/include/elfcore.h create mode 100644 arch/arm64/kernel/include/elfcoregpl.h create mode 100644 arch/arm64/kernel/include/elfnote.h create mode 100644 arch/arm64/kernel/include/errno.h create mode 100644 arch/arm64/kernel/include/esr.h create mode 100644 arch/arm64/kernel/include/fpsimd.h create mode 100644 arch/arm64/kernel/include/fpsimdmacros.h create mode 100644 arch/arm64/kernel/include/hw_breakpoint.h create mode 100644 arch/arm64/kernel/include/hwcap.h create mode 100644 arch/arm64/kernel/include/ihk/atomic.h create mode 100644 arch/arm64/kernel/include/ihk/context.h create mode 100644 arch/arm64/kernel/include/ihk/ikc.h create mode 100644 arch/arm64/kernel/include/ihk/types.h create mode 100644 arch/arm64/kernel/include/io.h create mode 100644 arch/arm64/kernel/include/irq.h create mode 100644 arch/arm64/kernel/include/irqflags.h create mode 100644 arch/arm64/kernel/include/linkage.h create mode 100644 arch/arm64/kernel/include/mmu_context.h create mode 100644 arch/arm64/kernel/include/pgtable-hwdef.h create mode 100644 arch/arm64/kernel/include/pgtable.h create mode 100644 arch/arm64/kernel/include/prctl.h create mode 100644 arch/arm64/kernel/include/psci.h create mode 100644 arch/arm64/kernel/include/ptrace.h create mode 100644 arch/arm64/kernel/include/registers.h create mode 100644 arch/arm64/kernel/include/rlimit.h create mode 100644 arch/arm64/kernel/include/signal.h create mode 100644 arch/arm64/kernel/include/smp.h create mode 100644 arch/arm64/kernel/include/stringify.h create mode 100644 arch/arm64/kernel/include/syscall_list.h create mode 100644 arch/arm64/kernel/include/sysreg.h create mode 100644 arch/arm64/kernel/include/thread_info.h create mode 100644 arch/arm64/kernel/include/traps.h create mode 100644 arch/arm64/kernel/include/vdso.h create mode 100644 arch/arm64/kernel/include/virt.h create mode 100644 arch/arm64/kernel/irq-gic-v2.c create mode 100644 arch/arm64/kernel/irq-gic-v3.c create mode 100644 arch/arm64/kernel/local.c create mode 100644 arch/arm64/kernel/memcpy.S create mode 100644 arch/arm64/kernel/memory.c create mode 100644 arch/arm64/kernel/memset.S create mode 100644 arch/arm64/kernel/mikc.c create mode 100644 arch/arm64/kernel/perfctr.c create mode 100644 arch/arm64/kernel/perfctr_armv8pmu.c create mode 100644 arch/arm64/kernel/postk_print_sysreg.c create mode 100644 arch/arm64/kernel/proc-macros.S create mode 100644 arch/arm64/kernel/proc.S create mode 100644 arch/arm64/kernel/psci.c create mode 100644 arch/arm64/kernel/ptrace.c create mode 100644 arch/arm64/kernel/smp.c create mode 100644 arch/arm64/kernel/syscall.c create mode 100644 arch/arm64/kernel/trampoline.S create mode 100644 arch/arm64/kernel/traps.c create mode 100644 arch/arm64/kernel/vdso.c create mode 100644 arch/arm64/kernel/vdso.so.S create mode 100644 arch/arm64/kernel/vdso/Makefile.in create mode 100644 arch/arm64/kernel/vdso/gen_vdso_offsets.sh create mode 100644 arch/arm64/kernel/vdso/gettimeofday.c create mode 100644 arch/arm64/kernel/vdso/note.S create mode 100644 arch/arm64/kernel/vdso/sigreturn.S create mode 100644 arch/arm64/kernel/vdso/syscall.h create mode 100644 arch/arm64/kernel/vdso/vdso.lds.S create mode 100644 arch/x86/kernel/coredump.c create mode 100644 arch/x86/kernel/include/elf.h create mode 100644 executer/kernel/mcctrl/arch/arm64/Makefile.in create mode 100644 executer/kernel/mcctrl/arch/arm64/archdeps.c create mode 100644 executer/kernel/mcctrl/arch/arm64/include/archdeps.h create mode 100644 executer/kernel/mcctrl/arch/x86_64/include/archdeps.h create mode 100644 executer/user/arch/arm64/Makefile.in create mode 100644 executer/user/arch/arm64/arch-eclair.c create mode 100644 executer/user/arch/arm64/arch_args.h create mode 100644 executer/user/arch/arm64/archdep.S create mode 100644 executer/user/arch/arm64/include/arch-eclair.h create mode 100644 executer/user/arch/x86_64/arch-eclair.c create mode 100644 executer/user/arch/x86_64/include/arch-eclair.h create mode 100644 executer/user/eclair.h create mode 100644 kernel/config/config.smp-arm64.in create mode 100644 kernel/config/smp-arm64_type1.lds create mode 100644 kernel/config/smp-arm64_type2.lds create mode 100644 kernel/config/smp-arm64_type3.lds create mode 100644 kernel/config/smp-arm64_type4.lds create mode 100644 kernel/gencore.c create mode 100644 kernel/include/elfcore.h create mode 100644 kernel/include/elfcoregpl.h diff --git a/Makefile.in b/Makefile.in index ac9bf784..fd5fcb0d 100755 --- a/Makefile.in +++ b/Makefile.in @@ -9,7 +9,7 @@ all:: @(cd executer/kernel/mcoverlayfs; make modules) @(cd executer/user; make) @case "$(TARGET)" in \ - attached-mic | builtin-x86 | builtin-mic | smp-x86) \ + attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \ (cd kernel; make) \ ;; \ *) \ @@ -23,7 +23,7 @@ install:: @(cd executer/kernel/mcoverlayfs; make install) @(cd executer/user; make install) @case "$(TARGET)" in \ - attached-mic | builtin-x86 | builtin-mic | smp-x86) \ + attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \ (cd kernel; make install) \ ;; \ *) \ @@ -46,7 +46,7 @@ install:: mkdir -p -m 755 $(MANDIR)/man1; \ install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \ ;; \ - smp-x86) \ + smp-x86 | smp-arm64) \ mkdir -p -m 755 $(SBINDIR); \ install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \ install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \ @@ -70,7 +70,7 @@ clean:: @(cd executer/kernel/mcoverlayfs; make clean) @(cd executer/user; make clean) @case "$(TARGET)" in \ - attached-mic | builtin-x86 | builtin-mic | smp-x86) \ + attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \ (cd kernel; make clean) \ ;; \ *) \ diff --git a/arch/arm64/kernel/Makefile.arch b/arch/arm64/kernel/Makefile.arch new file mode 100644 index 00000000..ac60a4a6 --- /dev/null +++ b/arch/arm64/kernel/Makefile.arch @@ -0,0 +1,27 @@ +# Makefile.arch COPYRIGHT FUJITSU LIMITED 2015-2017 +VDSO_SRCDIR = $(SRC)/../arch/$(IHKARCH)/kernel/vdso +VDSO_SO_O = $(O)/vdso.so.o + +IHK_OBJS += assert.o cache.o cpu.o cputable.o context.o entry.o entry-fpsimd.o +IHK_OBJS += fault.o head.o hyp-stub.o local.o perfctr.o perfctr_armv8pmu.o proc.o proc-macros.o +IHK_OBJS += psci.o smp.o trampoline.o traps.o fpsimd.o +IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o +IHK_OBJS += $(notdir $(VDSO_SO_O)) memory.o syscall.o vdso.o + +IHK_OBJS += irq-gic-v2.o irq-gic-v3.o +IHK_OBJS += memcpy.o memset.o +IHK_OBJS += cpufeature.o + +# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. +# IHK_OBJS added coredump.o +IHK_OBJS += coredump.o + +$(VDSO_SO_O): $(VDSO_SRCDIR)/vdso.so + +$(VDSO_SRCDIR)/vdso.so: FORCE + $(call echo_cmd,BUILD VDSO,$(TARGET)) + @mkdir -p $(O)/vdso + @TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_SRCDIR) $(SUBOPTS) prepare + @TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_SRCDIR) $(SUBOPTS) + +FORCE: diff --git a/arch/arm64/kernel/assert.c b/arch/arm64/kernel/assert.c new file mode 100644 index 00000000..81aa3e0e --- /dev/null +++ b/arch/arm64/kernel/assert.c @@ -0,0 +1,52 @@ +/* assert.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* assert for struct pt_regs member offset & size define */ +STATIC_ASSERT(offsetof(struct pt_regs, regs[0]) == S_X0); +STATIC_ASSERT(offsetof(struct pt_regs, regs[1]) == S_X1); +STATIC_ASSERT(offsetof(struct pt_regs, regs[2]) == S_X2); +STATIC_ASSERT(offsetof(struct pt_regs, regs[3]) == S_X3); +STATIC_ASSERT(offsetof(struct pt_regs, regs[4]) == S_X4); +STATIC_ASSERT(offsetof(struct pt_regs, regs[5]) == S_X5); +STATIC_ASSERT(offsetof(struct pt_regs, regs[6]) == S_X6); +STATIC_ASSERT(offsetof(struct pt_regs, regs[7]) == S_X7); +STATIC_ASSERT(offsetof(struct pt_regs, regs[30]) == S_LR); +STATIC_ASSERT(offsetof(struct pt_regs, sp) == S_SP); +STATIC_ASSERT(offsetof(struct pt_regs, pc) == S_PC); +STATIC_ASSERT(offsetof(struct pt_regs, pstate) == S_PSTATE); +STATIC_ASSERT(offsetof(struct pt_regs, orig_x0) == S_ORIG_X0); +STATIC_ASSERT(offsetof(struct pt_regs, syscallno) == S_SYSCALLNO); +STATIC_ASSERT(sizeof(struct pt_regs) == S_FRAME_SIZE); + +/* assert for struct cpu_info member offset & size define */ +STATIC_ASSERT(offsetof(struct cpu_info, cpu_setup) == CPU_INFO_SETUP); +STATIC_ASSERT(sizeof(struct cpu_info) == CPU_INFO_SZ); + +/* assert for struct thread_info member offset define */ +STATIC_ASSERT(offsetof(struct thread_info, flags) == TI_FLAGS); +STATIC_ASSERT(offsetof(struct thread_info, cpu_context) == TI_CPU_CONTEXT); + +/* assert for arch depend kernel stack size and common kernel stack pages */ +STATIC_ASSERT((KERNEL_STACK_SIZE * 2) < (KERNEL_STACK_NR_PAGES * PAGE_SIZE)); + +/* assert for struct secondary_data member offset define */ +STATIC_ASSERT(offsetof(struct secondary_data, stack) == SECONDARY_DATA_STACK); +STATIC_ASSERT(offsetof(struct secondary_data, next_pc) == SECONDARY_DATA_NEXT_PC); +STATIC_ASSERT(offsetof(struct secondary_data, arg) == SECONDARY_DATA_ARG); + +/* assert for sve defines */ +/* @ref.impl arch/arm64/kernel/signal.c::BUILD_BUG_ON in the init_user_layout */ +STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved) > ALIGN_UP(sizeof(struct _aarch64_ctx), 16)); +STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved) - + ALIGN_UP(sizeof(struct _aarch64_ctx), 16) > sizeof(struct extra_context)); +STATIC_ASSERT(SVE_PT_FPSIMD_OFFSET == sizeof(struct user_sve_header)); +STATIC_ASSERT(SVE_PT_SVE_OFFSET == sizeof(struct user_sve_header)); diff --git a/arch/arm64/kernel/cache.S b/arch/arm64/kernel/cache.S new file mode 100644 index 00000000..fae74fa5 --- /dev/null +++ b/arch/arm64/kernel/cache.S @@ -0,0 +1,39 @@ +/* cache.S COPYRIGHT FUJITSU LIMITED 2015 */ + +#include +#include "proc-macros.S" + +/* + * __inval_cache_range(start, end) + * - start - start address of region + * - end - end address of region + */ +ENTRY(__inval_cache_range) + /* FALLTHROUGH */ + +/* + * __dma_inv_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_inv_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + tst x1, x3 // end cache line aligned? + bic x1, x1, x3 + b.eq 1f + dc civac, x1 // clean & invalidate D / U line +1: tst x0, x3 // start cache line aligned? + bic x0, x0, x3 + b.eq 2f + dc civac, x0 // clean & invalidate D / U line + b 3f +2: dc ivac, x0 // invalidate D / U line +3: add x0, x0, x2 + cmp x0, x1 + b.lo 2b + dsb sy + ret +ENDPROC(__inval_cache_range) +ENDPROC(__dma_inv_range) + diff --git a/arch/arm64/kernel/context.c b/arch/arm64/kernel/context.c new file mode 100644 index 00000000..5995964b --- /dev/null +++ b/arch/arm64/kernel/context.c @@ -0,0 +1,191 @@ +/* context.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* @ref.impl arch/arm64/include/asm/mmu_context.h::MAX_ASID_BITS */ +#define MAX_ASID_BITS 16 +#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) +#define ASID_MASK ((1 << MAX_ASID_BITS) - 1) +#define VERSION_MASK (0xFFFF << MAX_ASID_BITS) + +/* @ref.impl arch/arm64/mm/context.c::asid_bits */ +#define asid_bits(reg) \ + (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) + +#define MAX_CTX_NR (1UL << MAX_ASID_BITS) +DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR) = { 1 }; /* context number 0 reserved. */ + +/* cpu_asid lock */ +static ihk_spinlock_t cpu_asid_lock = SPIN_LOCK_UNLOCKED; + +/* last allocation ASID, initialized by 0x0001_0000 */ +static unsigned int cpu_last_asid = ASID_FIRST_VERSION; + +/* @ref.impl arch/arm64/mm/context.c::set_mm_context */ +/* set asid for kernel_context_t.context */ +static void set_mm_context(struct page_table *pgtbl, unsigned int asid) +{ + unsigned int context = get_address_space_id(pgtbl); + if (likely((context ^ cpu_last_asid) >> MAX_ASID_BITS)) { + set_address_space_id(pgtbl, asid); + } +} + +/* @ref.impl arch/arm64/mm/context.c::__new_context */ +/* ASID allocation for new process function */ +static inline void __new_context(struct page_table *pgtbl) +{ + unsigned int asid; + unsigned int bits = asid_bits(); + unsigned long flags; + unsigned int context = get_address_space_id(pgtbl); + unsigned long index = 0; + + flags = ihk_mc_spinlock_lock(&cpu_asid_lock); + + /* already assigned context number? */ + if (!unlikely((context ^ cpu_last_asid) >> MAX_ASID_BITS)) { + /* true, unnecessary assigned context number */ + ihk_mc_spinlock_unlock(&cpu_asid_lock, flags); + return; + } + + /* false, necessary assigned context number */ + /* search from the previous assigned number */ + index = (cpu_last_asid & ASID_MASK) + 1; + asid = find_next_zero_bit(mmu_context_bmap, MAX_CTX_NR, index); + + /* upper limit exceeded */ + if (asid >= (1 << bits)) { + /* re assigned context number, search from 1 */ + asid = find_next_zero_bit(mmu_context_bmap, index, 1); + + /* upper previous assigned number, goto panic */ + if (unlikely(asid >= index)) { + ihk_mc_spinlock_unlock(&cpu_asid_lock, flags); + panic("__new_context(): PANIC: Context Number Depletion.\n"); + } + } + + /* set assigned context number bitmap */ + mmu_context_bmap[asid >> 6] |= (1UL << (asid & 63)); + + /* set previous assigned context number */ + cpu_last_asid = asid | (cpu_last_asid & VERSION_MASK); + + set_mm_context(pgtbl, cpu_last_asid); + ihk_mc_spinlock_unlock(&cpu_asid_lock, flags); +} + +void free_mmu_context(struct page_table *pgtbl) +{ + unsigned int context = get_address_space_id(pgtbl); + unsigned int nr = context & ASID_MASK; + unsigned long flags = ihk_mc_spinlock_lock(&cpu_asid_lock); + + /* clear used context number bitmap */ + mmu_context_bmap[nr >> 6] &= ~(1UL << (nr & 63)); + ihk_mc_spinlock_unlock(&cpu_asid_lock, flags); +} + +/* set ttbr0 assembler code extern */ +/* in arch/arm64/kernel/proc.S */ +extern void *cpu_do_switch_mm(translation_table_t* tt_pa, unsigned int asid); + +/* @ref.impl arch/arm64/include/asm/mmu_context.h::switch_new_context */ +/* ASID allocation for new process */ +static inline void switch_new_context(struct page_table *pgtbl) +{ + unsigned long flags; + translation_table_t* tt_pa; + unsigned int context; + + /* ASID allocation */ + __new_context(pgtbl); + context = get_address_space_id(pgtbl); + + /* disable interrupt save */ + flags = cpu_disable_interrupt_save(); + + tt_pa = get_translation_table_as_paddr(pgtbl); + cpu_do_switch_mm(tt_pa, context & ASID_MASK); + + /* interrupt restore */ + cpu_restore_interrupt(flags); +} + +/* @ref.impl arch/arm64/include/asm/mmu_context.h::check_and_switch_context */ +/* ASID allocation */ +void switch_mm(struct page_table *pgtbl) +{ + unsigned int context = get_address_space_id(pgtbl); + + /* During switch_mm, you want to disable the TTBR */ + cpu_set_reserved_ttbr0(); + + /* check new process or existing process */ + if (!((context ^ cpu_last_asid) >> MAX_ASID_BITS)) { + translation_table_t* tt_pa; + + /* for existing process */ + tt_pa = get_translation_table_as_paddr(pgtbl); + cpu_do_switch_mm(tt_pa, context & ASID_MASK); + +/* TODO: tif_switch_mm / after context switch */ +// } else if (irqs_disabled()) { +// /* +// * Defer the new ASID allocation until after the context +// * switch critical region since __new_context() cannot be +// * called with interrupts disabled. +// */ +// set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); + } else { + /* for new process */ + /* ASID allocation & set ttbr0 */ + switch_new_context(pgtbl); + } +} + +/* context switch assembler code extern */ +/* in arch/arm64/kernel/entry.S */ +extern void *cpu_switch_to(struct thread_info *prev, struct thread_info *next, void *prev_proc); + +/* context switch C function */ +/* TODO: fpreg etc.. save & restore */ +static inline void *switch_to(struct thread_info *prev, + struct thread_info *next, + void *prev_proc) +{ + void *last = NULL; + + next->cpu = ihk_mc_get_processor_id(); + last = cpu_switch_to(prev, next, prev_proc); + + return last; +} + +/* common unit I/F, for context switch */ +void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx, + ihk_mc_kernel_context_t *new_ctx, + void *prev) +{ + struct thread_info *prev_ti = NULL; + struct thread_info *next_ti = NULL; + + /* get next thread_info addr */ + next_ti = new_ctx->thread; + if (likely(old_ctx)) { + /* get prev thread_info addr */ + prev_ti = old_ctx->thread; + } + + /* switch next thread_info & process */ + return switch_to(prev_ti, next_ti, prev); +} diff --git a/arch/arm64/kernel/copy_template.S b/arch/arm64/kernel/copy_template.S new file mode 100644 index 00000000..617126ab --- /dev/null +++ b/arch/arm64/kernel/copy_template.S @@ -0,0 +1,194 @@ +/* copy_template.S COPYRIGHT FUJITSU LIMITED 2017 */ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm64/kernel/coredump.c b/arch/arm64/kernel/coredump.c new file mode 100644 index 00000000..026adf48 --- /dev/null +++ b/arch/arm64/kernel/coredump.c @@ -0,0 +1,35 @@ +/* coredump.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#include +#include +#include + +void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0) +{ + struct pt_regs *regs = regs0; + struct elf_prstatus64 tmp_prstatus; +/* + We ignore following entries for now. + + struct elf_siginfo pr_info; + short int pr_cursig; + a8_uint64_t pr_sigpend; + a8_uint64_t pr_sighold; + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus64_timeval pr_utime; + struct prstatus64_timeval pr_stime; + struct prstatus64_timeval pr_cutime; + struct prstatus64_timeval pr_cstime; + */ + /* copy x0-30, sp, pc, pstate */ + memcpy(&tmp_prstatus.pr_reg, ®s->user_regs, sizeof(tmp_prstatus.pr_reg)); + tmp_prstatus.pr_fpvalid = 0; /* We assume no fp */ + + /* copy unaligned prstatus addr */ + memcpy(prstatus, &tmp_prstatus, sizeof(*prstatus)); +} + +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/arm64/kernel/cpu.c b/arch/arm64/kernel/cpu.c new file mode 100644 index 00000000..3fe31507 --- /dev/null +++ b/arch/arm64/kernel/cpu.c @@ -0,0 +1,1629 @@ +/* cpu.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef POSTK_DEBUG_ARCH_DEP_65 +#include +#endif /* POSTK_DEBUG_ARCH_DEP_65 */ + +//#define DEBUG_PRINT_CPU + +#include "postk_print_sysreg.c" + +#ifdef DEBUG_PRINT_CPU +#define dkprintf kprintf +#define ekprintf kprintf +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf kprintf +#endif + +#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ + __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) + +struct cpuinfo_arm64 cpuinfo_data[NR_CPUS]; /* index is logical cpuid */ +static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 }; + +static struct list_head handlers[1024]; +static void cpu_init_interrupt_handler(void); + +void init_processors_local(int max_id); +void assign_processor_id(void); +void arch_delay(int); +int gettime_local_support = 0; + +extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt); +extern int interrupt_from_user(void *); + +extern unsigned long ihk_param_gic_dist_base_pa; +extern unsigned long ihk_param_gic_dist_map_size; +extern unsigned long ihk_param_gic_cpu_base_pa; +extern unsigned long ihk_param_gic_cpu_map_size; +extern unsigned int ihk_param_gic_version; +extern int snprintf(char * buf, size_t size, const char *fmt, ...); + +/* Function pointers for GIC */ +void (*gic_dist_init)(unsigned long dist_base_pa, unsigned long size); +void (*gic_cpu_init)(unsigned long cpu_base_pa, unsigned long size); +void (*gic_enable)(void); +void (*arm64_issue_ipi)(unsigned int cpid, unsigned int vector); +void (*handle_arch_irq)(struct pt_regs *); + +static void gic_init(void) +{ + if(ihk_param_gic_version >= 3) { + /* Setup functions for GICv3 */ + gic_dist_init = gic_dist_init_gicv3; + gic_cpu_init = gic_cpu_init_gicv3; + gic_enable = gic_enable_gicv3; + arm64_issue_ipi = arm64_issue_ipi_gicv3; + handle_arch_irq = handle_interrupt_gicv3; + } else { + /* Setup functions for GICv2 */ + gic_dist_init = gic_dist_init_gicv2; + gic_cpu_init = gic_cpu_init_gicv2; + gic_enable = gic_enable_gicv2; + arm64_issue_ipi = arm64_issue_ipi_gicv2; + handle_arch_irq = handle_interrupt_gicv2; + } + + gic_dist_init(ihk_param_gic_dist_base_pa, ihk_param_gic_dist_map_size); + gic_cpu_init(ihk_param_gic_cpu_base_pa, ihk_param_gic_cpu_map_size); +} + +static void remote_tlb_flush_interrupt_handler(void *priv) +{ + /*Interim support*/ + flush_tlb(); +} + +static struct ihk_mc_interrupt_handler remote_tlb_flush_handler = { + .func = remote_tlb_flush_interrupt_handler, + .priv = NULL, +}; + +static void cpu_stop_interrupt_handler(void *priv) +{ + kprintf("CPU%d: shutdown.\n", ihk_mc_get_processor_id()); + psci_cpu_off(); +} + +static struct ihk_mc_interrupt_handler cpu_stop_handler = { + .func = cpu_stop_interrupt_handler, + .priv = NULL, +}; + +/* @ref.impl include/clocksource/arm_arch_timer.h */ +#define ARCH_TIMER_CTRL_ENABLE (1 << 0) +#define ARCH_TIMER_CTRL_IT_MASK (1 << 1) +#define ARCH_TIMER_CTRL_IT_STAT (1 << 2) + +static void physical_timer_handler(void *priv) +{ + unsigned int ctrl = 0; + int cpu = ihk_mc_get_processor_id(); + + dkprintf("CPU%d: catch physical timer\n", cpu); + + asm volatile("mrs %0, cntp_ctl_el0" : "=r" (ctrl)); + if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { + unsigned int zero = 0; + unsigned int val = ctrl; + unsigned int clocks = per_cpu_timer_val[cpu]; + unsigned long irqstate; + struct cpu_local_var *v = get_this_cpu_local_var(); + + /* set resched flag */ + irqstate = ihk_mc_spinlock_lock(&v->runq_lock); + v->flags |= CPU_FLAG_NEED_RESCHED; + ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + + /* gen control register value */ + val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK); + val |= ARCH_TIMER_CTRL_ENABLE; + + /* set timer re-enable for periodic */ + asm volatile("msr cntp_ctl_el0, %0" : : "r" (zero)); + asm volatile("msr cntp_tval_el0, %0" : : "r" (clocks)); + asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + } +} + +static struct ihk_mc_interrupt_handler phys_timer_handler = { + .func = physical_timer_handler, + .priv = NULL, +}; + +static void virtual_timer_handler(void *priv) +{ + unsigned int ctrl = 0; + int cpu = ihk_mc_get_processor_id(); + + dkprintf("CPU%d: catch virtual timer\n", cpu); + + asm volatile("mrs %0, cntv_ctl_el0" : "=r" (ctrl)); + if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { + unsigned int zero = 0; + unsigned int val = ctrl; + unsigned int clocks = per_cpu_timer_val[cpu]; + unsigned long irqstate; + struct cpu_local_var *v = get_this_cpu_local_var(); + + /* set resched flag */ + irqstate = ihk_mc_spinlock_lock(&v->runq_lock); + v->flags |= CPU_FLAG_NEED_RESCHED; + ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + + /* gen control register value */ + val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK); + val |= ARCH_TIMER_CTRL_ENABLE; + + /* set timer re-enable for periodic */ + asm volatile("msr cntv_ctl_el0, %0" : : "r" (zero)); + asm volatile("msr cntv_tval_el0, %0" : : "r" (clocks)); + asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + } +} + +static struct ihk_mc_interrupt_handler virt_timer_handler = { + .func = virtual_timer_handler, + .priv = NULL, +}; + +static void memdump_interrupt_handler(void *priv) +{ + struct pt_regs *regs; + union arm64_cpu_local_variables *clv; + + regs = cpu_local_var(current)->uctx; + clv = get_arm64_this_cpu_local(); + + if (regs && interrupt_from_user(regs)) { + memcpy(clv->arm64_cpu_local_thread.panic_regs, regs->regs, sizeof(regs->regs)); + clv->arm64_cpu_local_thread.panic_regs[31] = regs->sp; + clv->arm64_cpu_local_thread.panic_regs[32] = regs->pc; + clv->arm64_cpu_local_thread.panic_regs[33] = regs->pstate; + } + else { + asm volatile ( + "stp x0, x1, [%3, #16 * 0]\n" + "stp x2, x3, [%3, #16 * 1]\n" + "stp x4, x5, [%3, #16 * 2]\n" + "stp x6, x7, [%3, #16 * 3]\n" + "stp x8, x9, [%3, #16 * 4]\n" + "stp x10, x11, [%3, #16 * 5]\n" + "stp x12, x13, [%3, #16 * 6]\n" + "stp x14, x15, [%3, #16 * 7]\n" + "stp x16, x17, [%3, #16 * 8]\n" + "stp x18, x19, [%3, #16 * 9]\n" + "stp x20, x21, [%3, #16 * 10]\n" + "stp x22, x23, [%3, #16 * 11]\n" + "stp x24, x25, [%3, #16 * 12]\n" + "stp x26, x27, [%3, #16 * 13]\n" + "stp x28, x29, [%3, #16 * 14]\n" + "str x30, [%3, #16 * 15]\n" + "mov %0, sp\n" + "adr %1, 1f\n" + "mrs %2, spsr_el1\n" + "1:" + : "=r" (clv->arm64_cpu_local_thread.panic_regs[31]), /* sp */ + "=r" (clv->arm64_cpu_local_thread.panic_regs[32]), /* pc */ + "=r" (clv->arm64_cpu_local_thread.panic_regs[33]) /* spsr_el1 */ + : "r" (&clv->arm64_cpu_local_thread.panic_regs) + : "memory" + ); + } + + clv->arm64_cpu_local_thread.paniced = 1; + + while(1) + { + cpu_halt(); + } +} + +static struct ihk_mc_interrupt_handler memdump_handler = { + .func = memdump_interrupt_handler, + .priv = NULL, +}; + +static void init_smp_processor(void) +{ + /* nothing */ +} + +/* @ref.impl arch/arm64/include/asm/cputype.h */ +static inline uint32_t read_cpuid_cachetype(void) +{ + return read_cpuid(CTR_EL0); +} + +/* @ref.impl arch/arm64/include/asm/arch_timer.h */ +static inline uint32_t arch_timer_get_cntfrq(void) +{ + return read_sysreg(cntfrq_el0); +} + +/* @ref.impl arch/arm64/kernel/cpuinfo.c::__cpuinfo_store_cpu */ +static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) +{ + info->hwid = ihk_mc_get_hardware_processor_id(); /* McKernel Original. */ + + info->reg_cntfrq = arch_timer_get_cntfrq(); + info->reg_ctr = read_cpuid_cachetype(); + info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_midr = read_cpuid_id(); + info->reg_revidr = read_cpuid(REVIDR_EL1); + + info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); + info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); + info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); + info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); + + /* Update the 32bit ID registers only if AArch32 is implemented */ +// if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { +// panic("AArch32 is not supported."); +// } + + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + uint64_t zcr; + + write_sysreg_s(ZCR_EL1_LEN_MASK, SYS_ZCR_EL1); + zcr = read_sysreg_s(SYS_ZCR_EL1); + zcr &= ~(uint64_t)ZCR_EL1_LEN_MASK; + zcr |= sve_get_vl() / 16 - 1; + + info->reg_zcr = zcr; + } +} + +/* @ref.impl arch/arm64/kernel/cpuinfo.c */ +static void cpuinfo_store_boot_cpu(void) +{ + struct cpuinfo_arm64 *info = &cpuinfo_data[0]; + __cpuinfo_store_cpu(info); + init_cpu_features(info); +} + +/* @ref.impl arch/arm64/kernel/cpuinfo.c */ +static void cpuinfo_store_cpu(void) +{ + int cpuid = ihk_mc_get_processor_id(); + struct cpuinfo_arm64 *boot_cpu_data = &cpuinfo_data[0]; + struct cpuinfo_arm64 *info = &cpuinfo_data[cpuid]; + __cpuinfo_store_cpu(info); + update_cpu_features(cpuid, info, boot_cpu_data); +} + +/* @ref.impl arch/arm64/kernel/setup.c::setup_processor */ +static void setup_processor(void) +{ + cpuinfo_store_boot_cpu(); + enable_mrs_emulation(); +} + +static char *trampoline_va, *first_page_va; + +unsigned long is_use_virt_timer(void) +{ + extern unsigned long ihk_param_use_virt_timer; + + switch (ihk_param_use_virt_timer) { + case 0: /* physical */ + case 1: /* virtual */ + break; + default: /* invalid */ + panic("PANIC: is_use_virt_timer(): timer select neither phys-timer nor virt-timer.\n"); + break; + } + return ihk_param_use_virt_timer; +} + +/*@ + @ assigns torampoline_va; + @ assigns first_page_va; + @*/ +void ihk_mc_init_ap(void) +{ + struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info(); + + trampoline_va = map_fixed_area(ap_trampoline, AP_TRAMPOLINE_SIZE, 0); + kprintf("Trampoline area: 0x%lx \n", ap_trampoline); + first_page_va = map_fixed_area(0, PAGE_SIZE, 0); + + kprintf("# of cpus : %d\n", cpu_info->ncpus); + init_processors_local(cpu_info->ncpus); + + kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n", + ihk_ikc_irq, ihk_ikc_irq_apicid); + + /* Do initialization for THIS cpu (BSP) */ + assign_processor_id(); + + ihk_mc_register_interrupt_handler(INTRID_CPU_STOP, &cpu_stop_handler); + ihk_mc_register_interrupt_handler(INTRID_MEMDUMP, &memdump_handler); + ihk_mc_register_interrupt_handler( + ihk_mc_get_vector(IHK_TLB_FLUSH_IRQ_VECTOR_START), &remote_tlb_flush_handler); + + if (is_use_virt_timer()) { + ihk_mc_register_interrupt_handler(get_virt_timer_intrid(), &virt_timer_handler); + } else { + ihk_mc_register_interrupt_handler(get_phys_timer_intrid(), &phys_timer_handler); + } + init_smp_processor(); +} + +extern void vdso_init(void); +long (*__arm64_syscall_handler)(int, ihk_mc_user_context_t *); + +/* @ref.impl arch/arm64/include/asm/arch_timer.h::arch_timer_get_cntkctl */ +static inline unsigned int arch_timer_get_cntkctl(void) +{ + unsigned int cntkctl; + asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); + return cntkctl; +} + +/* @ref.impl arch/arm64/include/asm/arch_timer.h::arch_timer_set_cntkctl */ +static inline void arch_timer_set_cntkctl(unsigned int cntkctl) +{ + asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); +} + +#ifdef CONFIG_ARM_ARCH_TIMER_EVTSTREAM +/* @ref.impl drivers/clocksource/arm_arch_timer.c::arch_timer_evtstrm_enable */ +static void arch_timer_evtstrm_enable(int divider) +{ + uint32_t cntkctl = arch_timer_get_cntkctl(); + + cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK; + /* Set the divider and enable virtual event stream */ + cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) + | ARCH_TIMER_VIRT_EVT_EN; + arch_timer_set_cntkctl(cntkctl); +} + +/* @ref.impl include/clocksource/arm_arch_timer.h::ARCH_TIMER_EVT_STREAM_FREQ */ +#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */ + +/* @ref.impl drivers/clocksource/arm_arch_timer.c::arch_timer_configure_evtstream */ +static void arch_timer_configure_evtstream(void) +{ + int evt_stream_div, pos; + extern unsigned long ihk_param_evtstrm_timer_rate; + + /* Find the closest power of two to the divisor */ + evt_stream_div = ihk_param_evtstrm_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ; + pos = fls(evt_stream_div); + if (pos > 1 && !(evt_stream_div & (1 << (pos - 2)))) + pos--; + /* enable event stream */ + arch_timer_evtstrm_enable(pos > 15 ? 15 : pos); +} +#else /* CONFIG_ARM_ARCH_TIMER_EVTSTREAM */ +static inline void arch_timer_configure_evtstream(void) {} +#endif /* CONFIG_ARM_ARCH_TIMER_EVTSTREAM */ + +/* @ref.impl drivers/clocksource/arm_arch_timer.c::arch_counter_set_user_access */ +static void arch_counter_set_user_access(void) +{ + unsigned int cntkctl = arch_timer_get_cntkctl(); + + /* Disable user access to the timers and the physical counter */ + /* Also disable virtual event stream */ + + cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN + | ARCH_TIMER_USR_VT_ACCESS_EN + | ARCH_TIMER_VIRT_EVT_EN + | ARCH_TIMER_USR_PCT_ACCESS_EN); + + /* Enable user access to the virtual counter */ + cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN; + arch_timer_set_cntkctl(cntkctl); +} + +static void init_gettime_support(void) +{ + arch_counter_set_user_access(); + + gettime_local_support = 1; +} + +void init_cpu(void) +{ + if(gic_enable) + gic_enable(); + arm64_enable_pmu(); +} + +#ifdef CONFIG_ARM64_VHE +/* @ref.impl arch/arm64/include/asm/virt.h */ +static inline int is_kernel_in_hyp_mode(void) +{ + unsigned long el; + + asm("mrs %0, CurrentEL" : "=r" (el)); + return el == CurrentEL_EL2; +} + +/* @ref.impl arch/arm64/kernel/smp.c */ +/* Whether the boot CPU is running in HYP mode or not */ +static int boot_cpu_hyp_mode; + +static inline void save_boot_cpu_run_el(void) +{ + boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); +} + +static inline int is_boot_cpu_in_hyp_mode(void) +{ + return boot_cpu_hyp_mode; +} + +/* + * Verify that a secondary CPU is running the kernel at the same + * EL as that of the boot CPU. + */ +static void verify_cpu_run_el(void) +{ + int in_el2 = is_kernel_in_hyp_mode(); + int boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); + + if (in_el2 ^ boot_cpu_el2) { + kprintf("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", + ihk_mc_get_processor_id(), + in_el2 ? 2 : 1, + boot_cpu_el2 ? 2 : 1); + panic("verify_cpu_run_el(): PANIC: mismatched Exception Level.\n"); + } +} +#else /* CONFIG_ARM64_VHE */ +static inline void save_boot_cpu_run_el(void) {} +static inline void verify_cpu_run_el(void) {} +#endif /* CONFIG_ARM64_VHE */ + +void setup_arm64(void) +{ + cpu_disable_interrupt(); + + cpu_init_interrupt_handler(); + + arm64_init_perfctr(); + + gic_init(); + + init_cpu(); + + init_gettime_support(); + + setup_processor(); + + save_boot_cpu_run_el(); + + arch_hw_breakpoint_init(); + + debug_monitors_init(); + + arch_timer_configure_evtstream(); + + if (psci_init()) { + panic("setup_arm64(): PANIC: HOST-Linux does not have a psci -> method property.\n"); + } + + kprintf("setup_arm64 done.\n"); +} + +static volatile int cpu_boot_status; + +void call_ap_func(void (*next_func)(void)) +{ + /* ap boot flag ON */ + cpu_boot_status = 1; + cpu_enable_interrupt(); + next_func(); +} + +void setup_arm64_ap(void (*next_func)(void)) +{ + /* set this core logical cpuid for struct thread_info */ + assign_processor_id(); + verify_cpu_run_el(); + arch_counter_set_user_access(); + cpuinfo_store_cpu(); + hw_breakpoint_reset(); + debug_monitors_init(); + arch_timer_configure_evtstream(); + init_cpu(); + call_ap_func(next_func); + + /* BUG */ + while(1); +} + +void arch_show_interrupt_context(const void *reg); +extern void tlb_flush_handler(int vector); + +static void show_context_stack(struct pt_regs *regs) +{ + const int min_stack_frame_size = 0x10; + uintptr_t sp; + uintptr_t stack_top; + int max_loop; + int i; + + if (interrupt_from_user(regs)) { + kprintf("It is a user stack region and it ends.\n"); + return; + } + + sp = (uintptr_t)regs + sizeof(*regs); + stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE); + max_loop = (stack_top - sp) / min_stack_frame_size; + + for (i = 0; i < max_loop; i++) { + uintptr_t *fp, *lr; + fp = (uintptr_t *)sp; + lr = (uintptr_t *)(sp + 8); + + if ((*fp <= sp) || (*fp > stack_top)) { + break; + } + + if ((*lr < MAP_KERNEL_START) || (*lr > MAP_KERNEL_START + MAP_KERNEL_SIZE)) { + break; + } + + kprintf("LR: %016lx, SP: %016lx, FP: %016lx\n", *lr, sp, *fp); + sp = *fp; + } +} + +void handle_IPI(unsigned int vector, struct pt_regs *regs) +{ + struct ihk_mc_interrupt_handler *h; + + dkprintf("CPU[%d] got interrupt, vector: %d\n", + ihk_mc_get_processor_id(), vector); + + if (vector > ((sizeof(handlers) / sizeof(handlers[0])) - 1)) { + panic("Maybe BUG."); + } + else if (vector == INTRID_STACK_TRACE) { + show_context_stack(regs); + } + else { + list_for_each_entry(h, &handlers[vector], list) { + if (h->func) { + h->func(h->priv); + } + } + } +} + +static void __arm64_wakeup(int hw_cpuid, unsigned long entry) +{ + if (cpu_psci_cpu_boot(hw_cpuid, entry)) { + panic("ap kickup cpu_psci_cpu_boot() failed.\n"); + } +} + +/** IHK Functions **/ + +/* send WFI(Wait For Interrupt) instruction */ +extern void cpu_do_idle(void); + +/* halt by WFI(Wait For Interrupt) */ +void cpu_halt(void) +{ + cpu_do_idle(); +} + +/*@ + @ assigns \nothing; + @ ensures \interrupt_disabled == 0; + @*/ +void cpu_safe_halt(void) +{ + cpu_do_idle(); + cpu_enable_interrupt(); +} + +#if defined(CONFIG_HAS_NMI) +#include + +/* enable interrupt (ICC_PMR_EL1 <= ICC_PMR_EL1_UNMASKED) */ +void cpu_enable_interrupt(void) +{ + unsigned long unmasked = ICC_PMR_EL1_UNMASKED; + asm volatile( + "msr_s " __stringify(ICC_PMR_EL1) ",%0" + : + : "r" (unmasked) + : "memory"); +} + +/* disable interrupt (ICC_PMR_EL1 <= ICC_PMR_EL1_MASKED) */ +void cpu_disable_interrupt(void) +{ + unsigned long masked = ICC_PMR_EL1_MASKED; + asm volatile( + "msr_s " __stringify(ICC_PMR_EL1) ",%0" + : + : "r" (masked) + : "memory"); +} + +/* restore interrupt (ICC_PMR_EL1 <= flags) */ +void cpu_restore_interrupt(unsigned long flags) +{ + asm volatile( + "msr_s " __stringify(ICC_PMR_EL1) ",%0" + : + : "r" (flags) + : "memory"); +} + +/* save ICC_PMR_EL1 & disable interrupt (ICC_PMR_EL1 <= ICC_PMR_EL1_MASKED) */ +unsigned long cpu_disable_interrupt_save(void) +{ + unsigned long flags; + unsigned long masked = ICC_PMR_EL1_MASKED; + + asm volatile( + "mrs_s %0, " __stringify(ICC_PMR_EL1) "\n" + "msr_s " __stringify(ICC_PMR_EL1) ",%1" + : "=&r" (flags) + : "r" (masked) + : "memory"); + return flags; +} + +#else /* defined(CONFIG_HAS_NMI) */ + +/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_enable */ +/* enable interrupt (PSTATE.DAIF I bit clear) */ +void cpu_enable_interrupt(void) +{ + asm volatile( + "msr daifclr, #2 // arch_local_irq_enable" + : + : + : "memory"); +} + +/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_disable */ +/* disable interrupt (PSTATE.DAIF I bit set) */ +void cpu_disable_interrupt(void) +{ + asm volatile( + "msr daifset, #2 // arch_local_irq_disable" + : + : + : "memory"); +} + +/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_local_irq_restore */ +/* restore interrupt (PSTATE.DAIF = flags restore) */ +void cpu_restore_interrupt(unsigned long flags) +{ + asm volatile( + "msr daif, %0 // arch_local_irq_restore" + : + : "r" (flags) + : "memory"); +} + +/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_save */ +/* save PSTATE.DAIF & disable interrupt (PSTATE.DAIF I bit set) */ +unsigned long cpu_disable_interrupt_save(void) +{ + unsigned long flags; + asm volatile( + "mrs %0, daif // arch_local_irq_save\n" + "msr daifset, #2" + : "=r" (flags) + : + : "memory"); + return flags; +} +#endif /* defined(CONFIG_HAS_NMI) */ + +/* we not have "pause" instruction, instead "yield" instruction */ +void cpu_pause(void) +{ + asm volatile("yield" ::: "memory"); +} + +static void cpu_init_interrupt_handler(void) +{ + int i; + for (i = 0; i < (sizeof(handlers) / sizeof(handlers[0])); i++) { + INIT_LIST_HEAD(&handlers[i]); + } +} + +/*@ + @ behavior valid_vector: + @ assumes 0 <= vector <= 15; + @ requires \valid(h); + @ assigns handlers[vector-32]; + @ ensures \result == 0; + @ behavior invalid_vector: + @ assumes (vector > 15); + @ assigns \nothing; + @ ensures \result == -EINVAL; + @*/ +int ihk_mc_register_interrupt_handler(int vector, + struct ihk_mc_interrupt_handler *h) +{ + if ((vector < 0) || (vector > ((sizeof(handlers) / sizeof(handlers[0])) - 1))) { + return -EINVAL; + } + + list_add_tail(&h->list, &handlers[vector]); + + return 0; +} +int ihk_mc_unregister_interrupt_handler(int vector, + struct ihk_mc_interrupt_handler *h) +{ + list_del(&h->list); + + return 0; +} + +extern unsigned long __page_fault_handler_address; + +/*@ + @ requires \valid(h); + @ assigns __page_fault_handler_address; + @ ensures __page_fault_handler_address == h; + @*/ +void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *)) +{ + __page_fault_handler_address = (unsigned long)h; +} + +extern char trampoline_code_data[], trampoline_code_data_end[]; +unsigned long get_transit_page_table(void); + +int get_virt_cpuid(int hw_cpuid) +{ + int virt_cpuid = -1; + const struct ihk_mc_cpu_info *cpu_info; + int i; + + cpu_info = ihk_mc_get_cpu_info(); + for (i = 0; i < cpu_info->ncpus; i++) { + if (cpu_info->hw_ids[i] == hw_cpuid) { + virt_cpuid = i; + break; + } + } + return virt_cpuid; +} + +/* reusable, but not reentrant */ +/*@ + @ requires \valid_apicid(cpuid); // valid APIC ID or not + @ requires \valid(pc); + @ requires \valid(trampoline_va); + @ requires \valid(trampoline_code_data + @ +(0..(trampoline_code_data_end - trampoline_code_data))); + @ requires \valid_physical(ap_trampoline); // valid physical address or not + @ assigns (char *)trampoline_va+(0..trampoline_code_data_end - trampoline_code_data); + @ assigns cpu_boot_status; + @ ensures cpu_boot_status != 0; + @*/ +void ihk_mc_boot_cpu(int cpuid, unsigned long pc) +{ + int virt_cpuid = get_virt_cpuid(cpuid); + extern void arch_ap_start(); + extern int num_processors; + int ncpus; + + /* virt cpuid check */ + if (virt_cpuid == -1) { + panic("exchange failed, PHYSCPUID --> VIRTCPUID\n"); + } + + /* ap stack address set for secondary_data */ + secondary_data.stack = + (void *)get_arm64_cpu_local_variable(virt_cpuid) + THREAD_START_SP - sizeof(ihk_mc_user_context_t); + + /* next_pc address set for secondary_data (setup_arm64_ap) */ + secondary_data.next_pc = (uint64_t)setup_arm64_ap; + + /* next_pc argument set for secondary_data (ihk_mc_boot_cpu argument 2) */ + secondary_data.arg = pc; + + /* ap wait flag initialize */ + cpu_boot_status = 0; + + /* ap kickup */ + __arm64_wakeup(cpuid, virt_to_phys(arch_ap_start)); + + /* wait for ap call call_ap_func() */ + while (!cpu_boot_status) { + cpu_pause(); + } + + ncpus = ihk_mc_get_cpu_info()->ncpus; + if (ncpus - 1 <= num_processors) { + setup_cpu_features(); + } + + init_sve_vl(); +} + +/* for ihk_mc_init_context() */ +extern void ret_from_fork(void); + +/*@ + @ requires \valid(new_ctx); + @ requires (stack_pointer == NULL) || \valid((unsigned long *)stack_pointer-1); + @ requires \valid(next_function); + @*/ +/* initialize context */ +/* stack_pointer == NULL is idle process context */ +/* stack_pointer != NULL is user thread context */ +void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx, + void *stack_pointer, void (*next_function)(void)) +{ + unsigned long sp = 0; + ihk_mc_user_context_t *new_uctx = NULL; + + if (unlikely(!stack_pointer)) { + /* for idle process */ + /* get idle stack address */ + sp = (unsigned long)get_arm64_this_cpu_kstack(); + + /* get thread_info address */ + new_ctx->thread = (struct thread_info *)((unsigned long)ALIGN_DOWN(sp, KERNEL_STACK_SIZE)); + + /* set ret_from_fork address */ + new_ctx->thread->cpu_context.pc = (unsigned long)ret_from_fork; + + /* set idle address */ + /* branch in ret_from_fork */ + new_ctx->thread->cpu_context.x19 = (unsigned long)next_function; + + /* set stack_pointer */ + new_ctx->thread->cpu_context.sp = sp - sizeof(ihk_mc_user_context_t); + + /* clear pt_regs area */ + new_uctx = (ihk_mc_user_context_t *)new_ctx->thread->cpu_context.sp; + memset(new_uctx, 0, sizeof(ihk_mc_user_context_t)); + + /* set pt_regs->pstate */ + new_uctx->pstate = PSR_MODE_EL1h; + } else { + /* for user thread, kernel stack */ + /* save logical cpuid (for execve) */ + const int lcpuid = ihk_mc_get_processor_id(); + const unsigned long syscallno = current_pt_regs()->syscallno; +#ifdef CONFIG_ARM64_SVE + const uint16_t orig_sve_vl = current_thread_info()->sve_vl; + const uint16_t orig_sve_vl_onexec = current_thread_info()->sve_vl_onexec; + const uint16_t orig_sve_flags = current_thread_info()->sve_flags; +#endif /* CONFIG_ARM64_SVE */ + + /* get kernel stack address */ + sp = (unsigned long)stack_pointer; + + /* get thread_info address */ + new_ctx->thread = (struct thread_info *)((unsigned long)ALIGN_DOWN(sp, KERNEL_STACK_SIZE)); + + /* clear thread_info */ + memset(new_ctx->thread, 0, sizeof(struct thread_info)); + + /* restore logical cpuid (for execve) */ + new_ctx->thread->cpu = lcpuid; + + /* set ret_from_fork address */ + new_ctx->thread->cpu_context.pc = (unsigned long)ret_from_fork; + + /* set stack_pointer */ + new_ctx->thread->cpu_context.sp = sp; + + /* clear pt_regs area */ + new_uctx = (ihk_mc_user_context_t *)new_ctx->thread->cpu_context.sp; + memset(new_uctx, 0, sizeof(ihk_mc_user_context_t)); + + /* initialize user context */ + /* copy from current_pt_regs */ + *new_uctx = *((ihk_mc_user_context_t *)current_pt_regs()); + new_uctx->regs[0] = 0; + new_uctx->pc = (unsigned long)next_function; + new_uctx->pstate = (new_uctx->pstate & ~PSR_MODE_MASK) | PSR_MODE_EL0t; + +#ifdef CONFIG_ARM64_SVE + /* SVE-VL inherit */ + if (likely(elf_hwcap & HWCAP_SVE)) { + new_ctx->thread->sve_vl_onexec = orig_sve_vl_onexec; + new_ctx->thread->sve_flags = orig_sve_flags; + + if (syscallno == __NR_execve) { + new_ctx->thread->sve_vl = orig_sve_vl_onexec ? + orig_sve_vl_onexec : sve_default_vl; + + BUG_ON(!sve_vl_valid(new_ctx->thread->sve_vl)); + + if (!(new_ctx->thread->sve_flags & THREAD_VL_INHERIT)) { + new_ctx->thread->sve_vl_onexec = 0; + } + } else { + new_ctx->thread->sve_vl = orig_sve_vl ? + orig_sve_vl : sve_default_vl; + } + } +#endif /* CONFIG_ARM64_SVE */ + } +} + +/* + * Release runq_lock before entering user space. + * This is needed because schedule() holds the runq lock throughout + * the context switch and when a new process is created it starts + * execution in enter_user_mode, which in turn calls this function. + */ +void release_runq_lock(void) +{ + ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), + cpu_local_var(runq_irqstate)); +} + +/*@ + @ requires \valid(ctx); + @ requires \valid(puctx); + @ requires \valid((ihk_mc_user_context_t *)stack_pointer-1); + @ requires \valid_user(new_pc); // valid user space address or not + @ requires \valid_user(user_sp-1); + @ assigns *((ihk_mc_user_context_t *)stack_pointer-1); + @ assigns ctx->rsp0; + @*/ +void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx, + ihk_mc_user_context_t **puctx, + void *stack_pointer, unsigned long new_pc, + unsigned long user_sp) +{ + char *sp = NULL; + + /* calc aligned kernel stack address */ + /* higher 16 byte area is padding area */ + sp = (char *)(ALIGN_DOWN((unsigned long)stack_pointer, KERNEL_STACK_SIZE) - 16); + + /* get pt_regs address */ + sp -= sizeof(ihk_mc_user_context_t); + + /* puctx return value set */ + *puctx = (ihk_mc_user_context_t *)sp; + + /* initialize kernel context */ + ihk_mc_init_context(ctx, sp, (void (*)(void))new_pc); +} + +/*@ + @ behavior rsp: + @ assumes reg == IHK_UCR_STACK_POINTER; + @ requires \valid(uctx); + @ assigns uctx->gpr.rsp; + @ ensures uctx->gpr.rsp == value; + @ behavior rip: + @ assumes reg == IHK_UCR_PROGRAM_COUNTER; + @ requires \valid(uctx); + @ assigns uctx->gpr.rip; + @ ensures uctx->gpr.rip == value; + @*/ +void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx, + enum ihk_mc_user_context_regtype reg, + unsigned long value) +{ + if (reg == IHK_UCR_STACK_POINTER) { + if (value & 15) { + panic("User Stack Pointer Unaligned !!\n"); + } + uctx->sp = value; + } else if (reg == IHK_UCR_PROGRAM_COUNTER) { + uctx->pc = value; + } +} + +/* @ref.impl arch/arm64/kernel/setup.c::hwcap_str */ +static const char *const hwcap_str[] = { + "fp", + "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + "atomics", + "fphp", + "asimdhp", + "cpuid", + "asimdrdm", + "sve", + NULL +}; + +#define CPUINFO_LEN_PER_CORE 0x100 +long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int *eofp) +{ + extern int num_processors; + int i = 0; + char *lbuf = NULL; + const size_t lbuf_size = CPUINFO_LEN_PER_CORE * num_processors; + size_t loff = 0; + long ret = 0; + + /* eof flag initialization */ + *eofp = 0; + + /* offset is over lbuf_size, return */ + if (read_off >= lbuf_size) { + *eofp = 1; + return 0; + } + + /* local buffer allocate */ + lbuf = kmalloc(lbuf_size, IHK_MC_AP_NOWAIT); + if (lbuf == NULL) { + ekprintf("%s: ERROR Local buffer allocation failed.\n"); + ret = -ENOMEM; + *eofp = 1; + goto err; + } + memset(lbuf, '\0', lbuf_size); + + /* cpuinfo strings generate and copy */ + for (i = 0; i < num_processors; i++) { + const struct cpuinfo_arm64 *cpuinfo = &cpuinfo_data[i]; + const unsigned int midr = cpuinfo->reg_midr; + int j = 0; + + /* generate strings */ + loff += snprintf(lbuf + loff, lbuf_size - loff, "processor\t: %d\n", cpuinfo->hwid); + loff += snprintf(lbuf + loff, lbuf_size - loff, "Features\t:"); + + for (j = 0; hwcap_str[j]; j++) { + if (elf_hwcap & (1 << j)) { + loff += snprintf(lbuf + loff, lbuf_size - loff, " %s", hwcap_str[j]); + } + } + loff += snprintf(lbuf + loff, lbuf_size - loff, "\n"); + loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU implementer\t: 0x%02x\n", MIDR_IMPLEMENTOR(midr)); + loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU architecture: 8\n"); + loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); + loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); + loff += snprintf(lbuf + loff, lbuf_size - loff, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); + + /* check buffer depletion */ + if ((i < num_processors - 1) && ((lbuf_size - loff) == 1)) { + ekprintf("%s: ERROR Local buffer size shortage.\n", __FUNCTION__); + ret = -ENOMEM; + *eofp = 1; + goto err_free; + } + } + + /* copy to host buffer */ + memcpy(buf, lbuf + read_off, buf_size); + if (read_off + buf_size >= loff) { + *eofp = 1; + ret = loff - read_off; + } else { + ret = buf_size; + } + +err_free: + kfree(lbuf); +err: + return ret; +} + +static int check_and_allocate_fp_regs(struct thread *thread); +void save_fp_regs(struct thread *thread); + +#ifdef POSTK_DEBUG_ARCH_DEP_23 /* add arch dep. clone_thread() function */ +void arch_clone_thread(struct thread *othread, unsigned long pc, + unsigned long sp, struct thread *nthread) +{ + unsigned long tls = 0; + + /* get tpidr_el0 value, and set original-thread->tlsblock_base, new-thread->tlsblock_base */ + asm("mrs %0, tpidr_el0" : "=r" (tls)); + othread->tlsblock_base = nthread->tlsblock_base = tls; + + /* copy fp_regs values from parent. */ + save_fp_regs(othread); + if ((othread->fp_regs != NULL) && (check_and_allocate_fp_regs(nthread) == 0)) { + memcpy(nthread->fp_regs, othread->fp_regs, sizeof(fp_regs_struct)); + } + + /* if SVE enable, takeover lower 128 bit register */ + if (likely(elf_hwcap & HWCAP_SVE)) { + fp_regs_struct fp_regs; + + memset(&fp_regs, 0, sizeof(fp_regs_struct)); + fpsimd_save_state(&fp_regs); + thread_fpsimd_to_sve(nthread, &fp_regs); + } +} +#endif /* POSTK_DEBUG_ARCH_DEP_23 */ + +/*@ + @ requires \valid(handler); + @ assigns __arm64_syscall_handler; + @ ensures __arm64_syscall_handler == handler; + @*/ +void ihk_mc_set_syscall_handler(long (*handler)(int, ihk_mc_user_context_t *)) +{ + __arm64_syscall_handler = handler; +} + +/*@ + @ assigns \nothing; + @*/ +void ihk_mc_delay_us(int us) +{ + arch_delay(us); +} + +void arch_show_interrupt_context(const void *reg) +{ + const struct pt_regs *regs = (struct pt_regs *)reg; + kprintf("dump pt_regs:\n"); + kprintf(" x0 : %016lx x1 : %016lx x2 : %016lx x3 : %016lx\n", + regs->regs[0], regs->regs[1], regs->regs[2], regs->regs[3]); + kprintf(" x4 : %016lx x5 : %016lx x6 : %016lx x7 : %016lx\n", + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); + kprintf(" x8 : %016lx x9 : %016lx x10 : %016lx x11 : %016lx\n", + regs->regs[8], regs->regs[9], regs->regs[10], regs->regs[11]); + kprintf(" x12 : %016lx x13 : %016lx x14 : %016lx x15 : %016lx\n", + regs->regs[12], regs->regs[13], regs->regs[14], regs->regs[15]); + kprintf(" x16 : %016lx x17 : %016lx x18 : %016lx x19 : %016lx\n", + regs->regs[16], regs->regs[17], regs->regs[18], regs->regs[19]); + kprintf(" x20 : %016lx x21 : %016lx x22 : %016lx x23 : %016lx\n", + regs->regs[20], regs->regs[21], regs->regs[22], regs->regs[23]); + kprintf(" x24 : %016lx x25 : %016lx x26 : %016lx x27 : %016lx\n", + regs->regs[24], regs->regs[25], regs->regs[26], regs->regs[27]); + kprintf(" x28 : %016lx x29 : %016lx x30 : %016lx\n", + regs->regs[28], regs->regs[29], regs->regs[30]); + kprintf(" sp : %016lx\n", regs->sp); + kprintf(" pc : %016lx\n", regs->pc); + kprintf(" pstate : %016lx(N:%d Z:%d C:%d V:%d SS:%d IL:%d D:%d A:%d I:%d F:%d M[4]:%d M:%d)\n", + regs->pstate, + (regs->pstate >> 31 & 1), (regs->pstate >> 30 & 1), (regs->pstate >> 29 & 1), + (regs->pstate >> 28 & 1), (regs->pstate >> 21 & 1), (regs->pstate >> 20 & 1), + (regs->pstate >> 9 & 1), (regs->pstate >> 8 & 1), (regs->pstate >> 7 & 1), + (regs->pstate >> 6 & 1), (regs->pstate >> 4 & 1), (regs->pstate & 7)); + kprintf(" orig_x0 : %016lx\n", regs->orig_x0); + kprintf(" syscallno : %016lx\n", regs->syscallno); +} + +/*@ + @ behavior fs_base: + @ assumes type == IHK_ASR_X86_FS; + @ ensures \result == 0; + @ behavior invaiid_type: + @ assumes type != IHK_ASR_X86_FS; + @ ensures \result == -EINVAL; + @*/ +int ihk_mc_arch_set_special_register(enum ihk_asr_type type, + unsigned long value) +{ +/* TODO(pka_idle) */ + return -1; +} + +/*@ + @ behavior fs_base: + @ assumes type == IHK_ASR_X86_FS; + @ requires \valid(value); + @ ensures \result == 0; + @ behavior invalid_type: + @ assumes type != IHK_ASR_X86_FS; + @ ensures \result == -EINVAL; + @*/ +int ihk_mc_arch_get_special_register(enum ihk_asr_type type, + unsigned long *value) +{ +/* TODO(pka_idle) */ + return -1; +} + +/*@ + @ requires \valid_apicid(cpu); // valid APIC ID or not + @ ensures \result == 0 + @*/ +int ihk_mc_interrupt_cpu(int cpu, int vector) +{ + dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu); + (*arm64_issue_ipi)(cpu, vector); + return 0; +} + +#ifdef POSTK_DEBUG_ARCH_DEP_22 +/* + * @ref.impl linux-linaro/arch/arm64/kernel/process.c::tls_thread_switch() + */ +static void tls_thread_switch(struct thread *prev, struct thread *next) +{ + unsigned long tpidr, tpidrro; + + asm("mrs %0, tpidr_el0" : "=r" (tpidr)); + prev->tlsblock_base = tpidr; + + tpidr = next->tlsblock_base; + tpidrro = 0; + + asm( + " msr tpidr_el0, %0\n" + " msr tpidrro_el0, %1" + : : "r" (tpidr), "r" (tpidrro)); +} + +struct thread *arch_switch_context(struct thread *prev, struct thread *next) +{ + // TODO[PMU]: 暫定的にここに関数宣言を置く。共通部のヘッダに書くのが作法だと思うが、今後の動向を様子見。 + extern void perf_start(struct mc_perf_event *event); + extern void perf_reset(struct mc_perf_event *event); + struct thread *last; +#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + struct mcs_rwlock_node_irqsave lock; +#endif /* POSTK_DEBUG_TEMP_FIX_41 */ + + /* Set up new TLS.. */ + dkprintf("[%d] arch_switch_context: tlsblock_base: 0x%lX\n", + ihk_mc_get_processor_id(), next->tlsblock_base); + + /* Performance monitoring inherit */ + if(next->proc->monitoring_event) { + if(next->proc->perf_status == PP_RESET) + perf_reset(next->proc->monitoring_event); + if(next->proc->perf_status != PP_COUNT) { + perf_reset(next->proc->monitoring_event); + perf_start(next->proc->monitoring_event); + } + } + if (likely(prev)) { + tls_thread_switch(prev, next); + +#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock); + if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) { + switch (prev->proc->status) { + case PS_DELAY_STOPPED: + prev->proc->status = PS_STOPPED; + break; + case PS_DELAY_TRACED: + prev->proc->status = PS_TRACED; + break; + default: + break; + } + mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock); + waitq_wakeup(&prev->proc->parent->waitpid_q); + } else { + mcs_rwlock_writer_unlock(&prev->proc->update_lock, &lock); + } +#endif /* POSTK_DEBUG_TEMP_FIX_41 */ + + last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); + } + else { + last = ihk_mc_switch_context(NULL, &next->ctx, prev); + } + + return last; +} +#endif /* POSTK_DEBUG_ARCH_DEP_22 */ + +/*@ + @ requires \valid(thread); + @ ensures thread->fp_regs == NULL; + @*/ +void +release_fp_regs(struct thread *thread) +{ + if (!thread) { + return; + } + + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { + int pages; + + if (thread->fp_regs) { + // calcurate number of pages for fp regs area + pages = (sizeof(fp_regs_struct) + PAGE_SIZE -1) >> PAGE_SHIFT; + ihk_mc_free_pages(thread->fp_regs, pages); + thread->fp_regs = NULL; + } + +#ifdef CONFIG_ARM64_SVE + if (likely(elf_hwcap & HWCAP_SVE)) { + sve_free(thread); + } +#endif /* CONFIG_ARM64_SVE */ + } +} + +static int +check_and_allocate_fp_regs(struct thread *thread) +{ + int result = 0; + int pages; + + if (!thread->fp_regs) { + pages = (sizeof(fp_regs_struct) + PAGE_SIZE -1) >> PAGE_SHIFT; + thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT); + + if (!thread->fp_regs) { + kprintf("error: allocating fp_regs pages\n"); + result = 1; + panic("panic: error allocating fp_regs pages"); + goto out; + } + + memset(thread->fp_regs, 0, sizeof(fp_regs_struct)); + } + +#ifdef CONFIG_ARM64_SVE + if (likely(elf_hwcap & HWCAP_SVE)) { + sve_alloc(thread); + } +#endif /* CONFIG_ARM64_SVE */ +out: + return result; +} + +/*@ + @ requires \valid(thread); + @*/ +void +save_fp_regs(struct thread *thread) +{ + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { + if (check_and_allocate_fp_regs(thread) != 0) { + // alloc error. + return; + } + thread_fpsimd_save(thread); + } +} + +void +clear_fp_regs(struct thread *thread) +{ + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { +#ifdef CONFIG_ARM64_SVE + if (likely(elf_hwcap & HWCAP_SVE)) { + unsigned int fpscr[2] = { 0, 0 }; + unsigned int vl = current_thread_info()->sve_vl; + struct fpsimd_sve_state(sve_vq_from_vl(sve_max_vl)) clear_sve; + + if (vl == 0) { + vl = sve_default_vl; + } + memset(&clear_sve, 0, sizeof(clear_sve)); + sve_load_state(clear_sve.ffr, fpscr, sve_vq_from_vl(vl) - 1); + } else { + fp_regs_struct clear_fp; + memset(&clear_fp, 0, sizeof(fp_regs_struct)); + fpsimd_load_state(&clear_fp); + } +#else /* CONFIG_ARM64_SVE */ + fp_regs_struct clear_fp; + memset(&clear_fp, 0, sizeof(fp_regs_struct)); + fpsimd_load_state(&clear_fp); +#endif /* CONFIG_ARM64_SVE */ + } +} + +/*@ + @ requires \valid(thread); + @ assigns thread->fp_regs; + @*/ +void +restore_fp_regs(struct thread *thread) +{ + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { + if (!thread->fp_regs) { + // only clear fpregs. + clear_fp_regs(thread); + return; + } + thread_fpsimd_load(thread); + } +} + +void +lapic_timer_enable(unsigned int clocks) +{ + unsigned int val = 0; + + /* gen control register value */ + asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); + val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK); + val |= ARCH_TIMER_CTRL_ENABLE; + + if (is_use_virt_timer()) { + asm volatile("msr cntv_tval_el0, %0" : : "r" (clocks)); + asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + } else { + asm volatile("msr cntp_tval_el0, %0" : : "r" (clocks)); + asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + } + per_cpu_timer_val[ihk_mc_get_processor_id()] = clocks; +} + +void +unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) +{ + const uintptr_t address = (uintptr_t)fault_addr; + struct process_vm *vm = thread->vm; + struct vm_range *range; + char found; + unsigned long irqflags; + unsigned long error = 0; + + irqflags = kprintf_lock(); + __kprintf("Page fault for 0x%lx\n", address); + __kprintf("%s for %s access in %s mode (reserved bit %s set), " + "it %s an instruction fetch\n", + (error & PF_PROT ? "protection fault" : "no page found"), + (error & PF_WRITE ? "write" : "read"), + (error & PF_USER ? "user" : "kernel"), + (error & PF_RSVD ? "was" : "wasn't"), + (error & PF_INSTR ? "was" : "wasn't")); + + found = 0; + list_for_each_entry(range, &vm->vm_range_list, list) { + if (range->start <= address && range->end > address) { + found = 1; + __kprintf("address is in range, flag: 0x%lx\n", + range->flag); + ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); + break; + } + } + if (!found) { + __kprintf("address is out of range! \n"); + } + + kprintf_unlock(irqflags); + + /* TODO */ + ihk_mc_debug_show_interrupt_context(regs); + + if (!interrupt_from_user(regs)) { + panic("panic: kernel mode PF"); + } + + //dkprintf("now dump a core file\n"); + //coredump(proc, regs); + + #ifdef DEBUG_PRINT_MEM + { + uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs); + + kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n", + sp[0], sp[1], sp[2], sp[3]); + } + #endif + + return; +} + +void +lapic_timer_disable() +{ + unsigned int zero = 0; + unsigned int val = 0; + + /* gen control register value */ + asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); + val &= ~(ARCH_TIMER_CTRL_IT_STAT | ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE); + + if (is_use_virt_timer()) { + asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + asm volatile("msr cntv_tval_el0, %0" : : "r" (zero)); + } else { + asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + asm volatile("msr cntp_tval_el0, %0" : : "r" (zero)); + } + per_cpu_timer_val[ihk_mc_get_processor_id()] = 0; +} + +void init_tick(void) +{ + dkprintf("init_tick():\n"); + return; +} + +void init_delay(void) +{ + dkprintf("init_delay():\n"); + return; +} + +void sync_tick(void) +{ + dkprintf("sync_tick():\n"); + return; +} + +void arch_start_pvclock(void) +{ + /* linux-linaro(aarch64)ではKVM向けpvclockの処理が未サポート */ + dkprintf("arch_start_pvclock(): not supported\n"); + return; +} + +void +mod_nmi_ctx(void *nmi_ctx, void (*func)()) +{ + /* TODO: skeleton for rusage */ +} + +int arch_cpu_read_write_register( + struct ihk_os_cpu_register *desc, + enum mcctrl_os_cpu_operation op) +{ + /* TODO: skeleton for patch:0676 */ + if (op == MCCTRL_OS_CPU_READ_REGISTER) { +// desc->val = rdmsr(desc->addr); + } + else if (op == MCCTRL_OS_CPU_WRITE_REGISTER) { +// wrmsr(desc->addr, desc->val); + } + else { + return -1; + } + + return 0; +} + +int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg) +{ + /* TODO: skeleton for smp_call_func */ + return -1; +} + +/*** end of file ***/ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c new file mode 100644 index 00000000..af25ad76 --- /dev/null +++ b/arch/arm64/kernel/cpufeature.c @@ -0,0 +1,1005 @@ +/* cpufeature.c COPYRIGHT FUJITSU LIMITED 2017 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef POSTK_DEBUG_ARCH_DEP_65 +unsigned long elf_hwcap; +#endif /* POSTK_DEBUG_ARCH_DEP_65 */ + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + { \ + .sign = SIGNED, \ + .visible = VISIBLE, \ + .strict = STRICT, \ + .type = TYPE, \ + .shift = SHIFT, \ + .width = WIDTH, \ + .safe_val = SAFE_VAL, \ + } + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* Define a feature with unsigned values */ +#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* Define a feature with a signed value */ +#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +#define ARM64_FTR_END \ + { \ + .width = 0, \ + } + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + /* Linux doesn't care about the EL3 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + /* Linux shouldn't care about secure memory */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + /* + * Differing PARange is fine as long as all peripherals and memory are mapped + * within the minimum PARange of all CPUs + */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/include/asm/cachetype.h */ +#define ICACHE_POLICY_RESERVED 0 +#define ICACHE_POLICY_AIVIVT 1 +#define ICACHE_POLICY_VIPT 2 +#define ICACHE_POLICY_PIPT 3 + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_ctr[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + /* + * Linux can handle differing I-cache policies. Userspace JITs will + * make use of *minLine. + * If we have differing I-cache policies, report it as the weakest - AIVIVT. + */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { + .name = "SYS_CTR_EL0", + .ftr_bits = ftr_ctr +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_mmfr0[] = { + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + /* + * We can instantiate multiple PMU instances with different levels + * of support. + */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_mvfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_dczid[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_isar5[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_mmfr4[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_pfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_id_dfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_zcr[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, + ZCR_EL1_LEN_SHIFT, ZCR_EL1_LEN_SIZE, 0), /* LEN */ + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* + * Common ftr bits for a 32bit register with all hidden, strict + * attributes, with 4bit feature fields and a default safe value of + * 0. Covers the following 32bit registers: + * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + */ +static const struct arm64_ftr_bits ftr_generic_32bits[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* Table for a single 32bit feature value */ +static const struct arm64_ftr_bits ftr_single32[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 32, 0), + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_ftr_bits ftr_raz[] = { + ARM64_FTR_END, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +#define ARM64_FTR_REG(id, table) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ + .name = #id, \ + .ftr_bits = &((table)[0]), \ + }} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct __ftr_reg_entry { + uint32_t sys_id; + struct arm64_ftr_reg *reg; +} arm64_ftr_regs[] = { + + /* Op1 = 0, CRn = 0, CRm = 1 */ + ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), + ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0), + ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), + ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), + + /* Op1 = 0, CRn = 0, CRm = 2 */ + ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), + ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), + + /* Op1 = 0, CRn = 0, CRm = 3 */ + ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), + + /* Op1 = 0, CRn = 0, CRm = 4 */ + ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), + + /* Op1 = 0, CRn = 0, CRm = 5 */ + ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), + ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz), + + /* Op1 = 0, CRn = 0, CRm = 6 */ + ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), + + /* Op1 = 0, CRn = 0, CRm = 7 */ + ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), + ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), + + /* Op1 = 0, CRn = 1, CRm = 2 */ + ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), + + /* Op1 = 3, CRn = 0, CRm = 0 */ + { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, + ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), + + /* Op1 = 3, CRn = 14, CRm = 0 */ + ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_single32), +}; + + +/* @ref.impl lib/bsearch.c */ +/* + * bsearch - binary search an array of elements + * @key: pointer to item being searched for + * @base: pointer to first element to search + * @num: number of elements + * @size: size of each element + * @cmp: pointer to comparison function + * + * This function does a binary search on the given array. The + * contents of the array should already be in ascending sorted order + * under the provided comparison function. + * + * Note that the key need not have the same type as the elements in + * the array, e.g. key could be a string and the comparison function + * could compare the string with the struct's name field. However, if + * the key and elements in the array are of the same type, you can use + * the same comparison function for both sort() and bsearch(). + */ +void *bsearch(const void *key, const void *base, size_t num, size_t size, + int (*cmp)(const void *key, const void *elt)) +{ + size_t start = 0, end = num; + int result; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = cmp(key, base + mid * size); + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return (void *)base + mid * size; + } + + return NULL; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int search_cmp_ftr_reg(const void *id, const void *regp) +{ + return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* + * get_arm64_ftr_reg - Lookup a feature register entry using its + * sys_reg() encoding. With the array arm64_ftr_regs sorted in the + * ascending order of sys_id , we use binary search to find a matching + * entry. + * + * returns - Upon success, matching ftr_reg entry for id. + * - NULL on failure. It is upto the caller to decide + * the impact of a failure. + */ +static struct arm64_ftr_reg *get_arm64_ftr_reg(uint32_t sys_id) +{ + const struct __ftr_reg_entry *ret; + + ret = bsearch((const void *)(unsigned long)sys_id, + arm64_ftr_regs, + sizeof(arm64_ftr_regs)/sizeof(arm64_ftr_regs[0]), + sizeof(arm64_ftr_regs[0]), + search_cmp_ftr_reg); + if (ret) + return ret->reg; + return NULL; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static uint64_t arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, int64_t reg, + int64_t ftr_val) +{ + uint64_t mask = arm64_ftr_mask(ftrp); + + reg &= ~mask; + reg |= (ftr_val << ftrp->shift) & mask; + return reg; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int64_t arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, int64_t new, + int64_t cur) +{ + int64_t ret = 0; + + switch (ftrp->type) { + case FTR_EXACT: + ret = ftrp->safe_val; + break; + case FTR_LOWER_SAFE: + ret = new < cur ? new : cur; + break; + case FTR_HIGHER_SAFE: + ret = new > cur ? new : cur; + break; + default: + kprintf("Unknown FTR type: %d\n", ftrp->type); + panic("Unknown FTR type"); + } + + return ret; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static void sort_ftr_regs(void) +{ + int i; + + /* Check that the array is sorted so that we can do the binary search */ + for (i = 1; i < sizeof(arm64_ftr_regs)/sizeof(arm64_ftr_regs[0]); i++) { + if (arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id) { + panic("FTR regs array is broken."); + } + } +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static void init_cpu_ftr_reg(uint32_t sys_reg, uint64_t new) +{ + uint64_t val = 0; + uint64_t strict_mask = ~0x0ULL; + uint64_t user_mask = 0; + uint64_t valid_mask = 0; + + const struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); + + if (!reg) { + kprintf("missing sys_reg: 0x%x\n", sys_reg); + panic("FTR register not found."); + } + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + uint64_t ftr_mask = arm64_ftr_mask(ftrp); + int64_t ftr_new = arm64_ftr_value(ftrp, new); + + val = arm64_ftr_set_value(ftrp, val, ftr_new); + + valid_mask |= ftr_mask; + if (!ftrp->strict) + strict_mask &= ~ftr_mask; + if (ftrp->visible) + user_mask |= ftr_mask; + else + reg->user_val = arm64_ftr_set_value(ftrp, + reg->user_val, + ftrp->safe_val); + } + + val &= valid_mask; + + reg->sys_val = val; + reg->strict_mask = strict_mask; + reg->user_mask = user_mask; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +void init_cpu_features(struct cpuinfo_arm64 *info) +{ + /* Before we start using the tables, make sure it is sorted */ + sort_ftr_regs(); + + init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); + init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); + init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); + init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); + init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); + init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); + init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); + init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); + init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); + init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); + + //if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + // panic("AArch32 is not supported."); + //} + + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); + } +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, uint64_t new) +{ + const struct arm64_ftr_bits *ftrp; + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + int64_t ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); + int64_t ftr_new = arm64_ftr_value(ftrp, new); + + if (ftr_cur == ftr_new) + continue; + /* Find a safe value */ + ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur); + reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new); + } + +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int check_update_ftr_reg(uint32_t sys_id, int cpu, uint64_t val, uint64_t boot) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); + + if (!regp) { + kprintf("missing sys_reg: 0x%x\n", sys_id); + panic("FTR register not found."); + } + + update_cpu_ftr_reg(regp, val); + if ((boot & regp->strict_mask) == (val & regp->strict_mask)) + return 0; + kprintf("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n", + regp->name, boot, cpu, val); + return 1; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* + * Update system wide CPU feature registers with the values from a + * non-boot CPU. Also performs SANITY checks to make sure that there + * aren't any insane variations from that of the boot CPU. + */ +void update_cpu_features(int cpu, + struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + int taint = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu, + info->reg_ctr, boot->reg_ctr); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, + info->reg_dczid, boot->reg_dczid); + + /* If different, timekeeping will be broken (especially with KVM) */ + taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, + info->reg_cntfrq, boot->reg_cntfrq); + + /* + * The kernel uses self-hosted debug features and expects CPUs to + * support identical debug features. We presently need CTX_CMPs, WRPs, + * and BRPs to be identical. + * ID_AA64DFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu, + info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu, + info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1); + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu, + info->reg_id_aa64isar0, boot->reg_id_aa64isar0); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, + info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu, + info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, + info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, + info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, + info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, + info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, + info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); + + /* + * If we have AArch32, we care about 32-bit features for compat. + * If the system doesn't support AArch32, don't update them. + */ + //if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + // id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { + // panic("AArch32 is not supported."); + //} + + if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { + taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, + info->reg_zcr, boot->reg_zcr); + } + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + if (taint) { + kprintf("Unsupported CPU feature variation.\n"); + } +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +uint64_t read_system_reg(uint32_t id) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); + + /* We shouldn't get a request for an unsupported register */ + if (!regp) { + kprintf("missing sys_reg: 0x%x\n", id); + panic("FTR register not found."); + } + + return regp->sys_val; +} + +/* @ref.impl arch/arm64/include/asm/insn.h */ +enum aarch64_insn_imm_type { + AARCH64_INSN_IMM_ADR, + AARCH64_INSN_IMM_26, + AARCH64_INSN_IMM_19, + AARCH64_INSN_IMM_16, + AARCH64_INSN_IMM_14, + AARCH64_INSN_IMM_12, + AARCH64_INSN_IMM_9, + AARCH64_INSN_IMM_7, + AARCH64_INSN_IMM_6, + AARCH64_INSN_IMM_S, + AARCH64_INSN_IMM_R, + AARCH64_INSN_IMM_MAX +}; + +/* @ref.impl arch/arm64/include/asm/insn.h */ +enum aarch64_insn_register_type { + AARCH64_INSN_REGTYPE_RT, + AARCH64_INSN_REGTYPE_RN, + AARCH64_INSN_REGTYPE_RT2, + AARCH64_INSN_REGTYPE_RM, + AARCH64_INSN_REGTYPE_RD, + AARCH64_INSN_REGTYPE_RA, +}; + +/* @ref.impl arch/arm64/kernel/insn.c */ +static int aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, + uint32_t *maskp, int *shiftp) +{ + uint32_t mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case AARCH64_INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + case AARCH64_INSN_IMM_7: + mask = BIT(7) - 1; + shift = 15; + break; + case AARCH64_INSN_IMM_6: + case AARCH64_INSN_IMM_S: + mask = BIT(6) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_R: + mask = BIT(6) - 1; + shift = 16; + break; + default: + return -EINVAL; + } + + *maskp = mask; + *shiftp = shift; + + return 0; +} + +/* @ref.impl arch/arm64/kernel/insn.c */ +#define ADR_IMM_HILOSPLIT 2 +#define ADR_IMM_SIZE (1UL << 21) //2MiB +#define ADR_IMM_LOMASK ((1 << ADR_IMM_HILOSPLIT) - 1) +#define ADR_IMM_HIMASK ((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1) +#define ADR_IMM_LOSHIFT 29 +#define ADR_IMM_HISHIFT 5 + +/* @ref.impl arch/arm64/kernel/insn.c */ +uint64_t aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, uint32_t insn) +{ + uint32_t immlo, immhi, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + shift = 0; + immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK; + immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK; + insn = (immhi << ADR_IMM_HILOSPLIT) | immlo; + mask = ADR_IMM_SIZE - 1; + break; + default: + if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) { + kprintf("aarch64_insn_decode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + } + + return (insn >> shift) & mask; +} + +/* @ref.impl arch/arm64/kernel/insn.c */ +uint32_t aarch64_insn_decode_register(enum aarch64_insn_register_type type, + uint32_t insn) +{ + int shift; + + switch (type) { + case AARCH64_INSN_REGTYPE_RT: + case AARCH64_INSN_REGTYPE_RD: + shift = 0; + break; + case AARCH64_INSN_REGTYPE_RN: + shift = 5; + break; + case AARCH64_INSN_REGTYPE_RT2: + case AARCH64_INSN_REGTYPE_RA: + shift = 10; + break; + case AARCH64_INSN_REGTYPE_RM: + shift = 16; + break; + default: + kprintf("%s: unknown register type encoding %d\n", __func__, + type); + return 0; + } + + return (insn >> shift) & GENMASK(4, 0); +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* + * With CRm == 0, reg should be one of : + * MIDR_EL1, MPIDR_EL1 or REVIDR_EL1. + */ +static inline int emulate_id_reg(uint32_t id, uint64_t *valp) +{ + switch (id) { + case SYS_MIDR_EL1: + *valp = read_cpuid_id(); + break; + case SYS_MPIDR_EL1: + *valp = SYS_MPIDR_SAFE_VAL; + break; + case SYS_REVIDR_EL1: + /* IMPLEMENTATION DEFINED values are emulated with 0 */ + *valp = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +/* + * We emulate only the following system register space. + * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] + * See Table C5-6 System instruction encodings for System register accesses, + * ARMv8 ARM(ARM DDI 0487A.f) for more details. + */ +static inline int is_emulated(uint32_t id) +{ + return (sys_reg_Op0(id) == 0x3 && + sys_reg_CRn(id) == 0x0 && + sys_reg_Op1(id) == 0x0 && + (sys_reg_CRm(id) == 0 || + ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7)))); +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int emulate_sys_reg(uint32_t id, uint64_t *valp) +{ + struct arm64_ftr_reg *regp; + + if (!is_emulated(id)) + return -EINVAL; + + if (sys_reg_CRm(id) == 0) + return emulate_id_reg(id, valp); + + regp = get_arm64_ftr_reg(id); + if (regp) + *valp = arm64_ftr_reg_user_value(regp); + else + /* + * The untracked registers are either IMPLEMENTATION DEFINED + * (e.g, ID_AFR0_EL1) or reserved RAZ. + */ + *valp = 0; + return 0; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int emulate_mrs(struct pt_regs *regs, uint32_t insn) +{ + int rc; + uint32_t sys_reg, dst; + uint64_t val; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (uint32_t)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rc = emulate_sys_reg(sys_reg, &val); + if (!rc) { + dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + pt_regs_write_reg(regs, dst, val); + regs->pc += 4; + } + + return rc; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static struct undef_hook mrs_hook = { + .instr_mask = 0xfff00000, + .instr_val = 0xd5300000, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = PSR_MODE_EL0t, + .fn = emulate_mrs, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +int enable_mrs_emulation(void) +{ + register_undef_hook(&mrs_hook); + return 0; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int +feature_matches(uint64_t reg, const struct arm64_cpu_capabilities *entry) +{ + int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); + + return val >= entry->min_field_value; +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static int +has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) +{ + uint64_t val = 0; + + //WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) { + val = read_system_reg(entry->sys_reg); + } else { + panic("invalid argument. SCOPE_LOCAL_CPU is not implemented."); + //val = __raw_read_system_reg(entry->sys_reg); + } + return feature_matches(val, entry); +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ + { \ + .desc = #cap, \ + .def_scope = SCOPE_SYSTEM, \ + .matches = has_cpuid_feature, \ + .sys_reg = reg, \ + .field_pos = field, \ + .sign = s, \ + .min_field_value = min_value, \ + .hwcap_type = type, \ + .hwcap = cap, \ + } + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), +#ifdef CONFIG_ARM64_SVE + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SVE), +#endif + {}, +}; + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) +{ + switch (cap->hwcap_type) { + case CAP_HWCAP: + elf_hwcap |= cap->hwcap; + break; + default: + kprintf("invalid cpu capability type.\n"); + } +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) +{ + /* @ref.impl drivers/clocksource/arm_arch_timer.c::arch_timer_evtstrm_enable */ +#ifdef CONFIG_ARM_ARCH_TIMER_EVTSTREAM + elf_hwcap |= HWCAP_EVTSTRM; +#endif /* CONFIG_ARM_ARCH_TIMER_EVTSTREAM */ + + /* We support emulation of accesses to CPU ID feature registers */ + elf_hwcap |= HWCAP_CPUID; + for (; hwcaps->matches; hwcaps++) { + if (hwcaps->matches(hwcaps, hwcaps->def_scope)) { + cap_set_elf_hwcap(hwcaps); + } + } +} + +/* @ref.impl arch/arm64/kernel/cpufeature.c */ +void setup_cpu_features(void) +{ + setup_elf_hwcaps(arm64_elf_hwcaps); +} + +#ifdef POSTK_DEBUG_ARCH_DEP_65 +unsigned long arch_get_hwcap(void) +{ + return elf_hwcap; +} +#endif /* POSTK_DEBUG_ARCH_DEP_65 */ diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c new file mode 100644 index 00000000..1e1673e0 --- /dev/null +++ b/arch/arm64/kernel/cputable.c @@ -0,0 +1,14 @@ +/* cputable.c COPYRIGHT FUJITSU LIMITED 2015 */ + +#include + +extern unsigned long __cpu_setup(void); +struct cpu_info cpu_table[] = { + { + .cpu_id_val = 0x000f0000, + .cpu_id_mask = 0x000f0000, + .cpu_name = "AArch64 Processor", + .cpu_setup = __cpu_setup, + }, + { /* Empty */ }, +}; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c new file mode 100644 index 00000000..e630d2d8 --- /dev/null +++ b/arch/arm64/kernel/debug-monitors.c @@ -0,0 +1,110 @@ +/* debug-monitors.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::debug_monitors_arch */ +/* Determine debug architecture. */ +unsigned char debug_monitors_arch(void) +{ + return read_cpuid(ID_AA64DFR0_EL1) & 0xf; +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::mdscr_write */ +void mdscr_write(unsigned int mdscr) +{ + unsigned long flags = local_dbg_save(); + asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); + local_dbg_restore(flags); +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::mdscr_read */ +unsigned int mdscr_read(void) +{ + unsigned int mdscr; + asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); + return mdscr; +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::clear_os_lock */ +static void clear_os_lock(void) +{ + asm volatile("msr oslar_el1, %0" : : "r" (0)); +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::debug_monitors_init */ +void debug_monitors_init(void) +{ + clear_os_lock(); +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::set_regs_spsr_ss */ +void set_regs_spsr_ss(struct pt_regs *regs) +{ + unsigned long spsr; + + spsr = regs->pstate; + spsr &= ~DBG_SPSR_SS; + spsr |= DBG_SPSR_SS; + regs->pstate = spsr; +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::set_regs_spsr_ss */ +void clear_regs_spsr_ss(struct pt_regs *regs) +{ + unsigned long spsr; + + spsr = regs->pstate; + spsr &= ~DBG_SPSR_SS; + regs->pstate = spsr; +} + +extern int interrupt_from_user(void *); +extern void clear_single_step(struct thread *thread); + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::single_step_handler */ +int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + siginfo_t info; + int ret = -EFAULT; + + if (interrupt_from_user(regs)) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info._sifields._sigfault.si_addr = (void *)regs->pc; + set_signal(SIGTRAP, regs, &info); + clear_single_step(cpu_local_var(current)); + + ret = 0; + } else { + kprintf("Unexpected kernel single-step exception at EL1\n"); + } + return ret; +} + +/* @ref.impl arch/arm64/kernel/debug-monitors.c::brk_handler */ +int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + siginfo_t info; + int ret = -EFAULT; + + if (interrupt_from_user(regs)) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info._sifields._sigfault.si_addr = (void *)regs->pc; + set_signal(SIGTRAP, regs, &info); + + ret = 0; + } else { + kprintf("Unexpected kernel BRK exception at EL1\n"); + } + return ret; +} diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S new file mode 100644 index 00000000..b4ad1fca --- /dev/null +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -0,0 +1,126 @@ +/* entry-fpsimd.S COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include + +/* + * @ref.impl linux-linaro/arch/arm64/include/asm/fpsimdmacros.h + */ +/* + * FP/SIMD state saving and restoring macros + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +.macro fpsimd_save state, tmpnr + stp q0, q1, [\state, #16 * 0] + stp q2, q3, [\state, #16 * 2] + stp q4, q5, [\state, #16 * 4] + stp q6, q7, [\state, #16 * 6] + stp q8, q9, [\state, #16 * 8] + stp q10, q11, [\state, #16 * 10] + stp q12, q13, [\state, #16 * 12] + stp q14, q15, [\state, #16 * 14] + stp q16, q17, [\state, #16 * 16] + stp q18, q19, [\state, #16 * 18] + stp q20, q21, [\state, #16 * 20] + stp q22, q23, [\state, #16 * 22] + stp q24, q25, [\state, #16 * 24] + stp q26, q27, [\state, #16 * 26] + stp q28, q29, [\state, #16 * 28] + stp q30, q31, [\state, #16 * 30]! + mrs x\tmpnr, fpsr + str w\tmpnr, [\state, #16 * 2] + mrs x\tmpnr, fpcr + str w\tmpnr, [\state, #16 * 2 + 4] +.endm + +.macro fpsimd_restore_fpcr state, tmp + /* + * Writes to fpcr may be self-synchronising, so avoid restoring + * the register if it hasn't changed. + */ + mrs \tmp, fpcr + cmp \tmp, \state + b.eq 9999f + msr fpcr, \state +9999: +.endm + +/* Clobbers \state */ +.macro fpsimd_restore state, tmpnr + ldp q0, q1, [\state, #16 * 0] + ldp q2, q3, [\state, #16 * 2] + ldp q4, q5, [\state, #16 * 4] + ldp q6, q7, [\state, #16 * 6] + ldp q8, q9, [\state, #16 * 8] + ldp q10, q11, [\state, #16 * 10] + ldp q12, q13, [\state, #16 * 12] + ldp q14, q15, [\state, #16 * 14] + ldp q16, q17, [\state, #16 * 16] + ldp q18, q19, [\state, #16 * 18] + ldp q20, q21, [\state, #16 * 20] + ldp q22, q23, [\state, #16 * 22] + ldp q24, q25, [\state, #16 * 24] + ldp q26, q27, [\state, #16 * 26] + ldp q28, q29, [\state, #16 * 28] + ldp q30, q31, [\state, #16 * 30]! + ldr w\tmpnr, [\state, #16 * 2] + msr fpsr, x\tmpnr + ldr w\tmpnr, [\state, #16 * 2 + 4] + fpsimd_restore_fpcr x\tmpnr, \state +.endm + +/* + * @ref.impl linux-linaro/arch/arm64/kernel/entry-fpsimd.S + */ +/* + * Save the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_save_state) + fpsimd_save x0, 8 + ret +ENDPROC(fpsimd_save_state) + +/* + * Load the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_load_state) + fpsimd_restore x0, 8 + ret +ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_ARM64_SVE +ENTRY(sve_save_state) + sve_save 0, x1, 2 + ret +ENDPROC(sve_save_state) + +ENTRY(sve_load_state) + sve_load 0, x1, x2, 3 + ret +ENDPROC(sve_load_state) + +ENTRY(sve_get_vl) + _zrdvl 0, 1 + ret +ENDPROC(sve_get_vl) +#endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S new file mode 100644 index 00000000..ee48c3ba --- /dev/null +++ b/arch/arm64/kernel/entry.S @@ -0,0 +1,558 @@ +/* entry.S COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include +#include +#include + +/* + * Bad Abort numbers + *----------------- + */ +#define BAD_SYNC 0 +#define BAD_IRQ 1 +#define BAD_FIQ 2 +#define BAD_ERROR 3 + + .macro kernel_entry, el, regsize = 64 + sub sp, sp, #S_FRAME_SIZE + .if \regsize == 32 + mov w0, w0 // zero upper 32 bits of x0 + .endif + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + + .if \el == 0 + mrs x21, sp_el0 + get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug + disable_step_tsk x19, x20 // exceptions when scheduling. + .else + add x21, sp, #S_FRAME_SIZE + .endif + mrs x22, elr_el1 + mrs x23, spsr_el1 +#if defined(CONFIG_HAS_NMI) + mrs_s x20, ICC_PMR_EL1 // Get PMR + and x20, x20, #ICC_PMR_EL1_G_BIT // Extract mask bit + lsl x20, x20, #PSR_G_PMR_G_SHIFT // Shift to a PSTATE RES0 bit + eor x20, x20, #PSR_G_BIT // Invert bit + orr x23, x20, x23 // Store PMR within PSTATE + mov x20, #ICC_PMR_EL1_MASKED + msr_s ICC_PMR_EL1, x20 // Mask normal interrupts at PMR +#endif /* defined(CONFIG_HAS_NMI) */ + stp lr, x21, [sp, #S_LR] + stp x22, x23, [sp, #S_PC] + + /* + * Set syscallno to -1 by default (overridden later if real syscall). + */ + .if \el == 0 + mvn x21, xzr + str x21, [sp, #S_SYSCALLNO] + .endif + + /* + * Registers that may be useful after this macro is invoked: + * + * x21 - aborted SP + * x22 - aborted PC + * x23 - aborted PSTATE + */ + .endm + + .macro kernel_exit, el, need_enable_step = 0 + .if \el == 0 + mov x0, #0 + mov x1, sp + mov x2, #0 + bl check_signal // check whether the signal is delivered + bl check_need_resched // or reschedule is needed. + mov x0, #0 + mov x1, sp + mov x2, #0 + bl check_signal_irq_disabled // check whether the signal is delivered(for kernel_exit) + .endif + disable_irq x1 // disable interrupts + .if \need_enable_step == 1 + ldr x1, [tsk, #TI_FLAGS] + enable_step_tsk x1, x2 + .endif + disable_nmi + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR + .if \el == 0 +// ct_user_enter // McKernel, disable (debugcode?) + ldr x23, [sp, #S_SP] // load return stack pointer + msr sp_el0, x23 + .endif +#if defined(CONFIG_HAS_NMI) + and x20, x22, #PSR_G_BIT // Get stolen PSTATE bit + and x22, x22, #~PSR_G_BIT // Clear stolen bit + lsr x20, x20, #PSR_G_PMR_G_SHIFT // Shift back to PMR mask + eor x20, x20, #ICC_PMR_EL1_UNMASKED // x20 gets 0xf0 or 0xb0 + msr_s ICC_PMR_EL1, x20 // Write to PMR +#endif /* defined(CONFIG_HAS_NMI) */ + msr elr_el1, x21 // set up the return data + msr spsr_el1, x22 + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + ldp x16, x17, [sp, #16 * 8] + ldp x18, x19, [sp, #16 * 9] + ldp x20, x21, [sp, #16 * 10] + ldp x22, x23, [sp, #16 * 11] + ldp x24, x25, [sp, #16 * 12] + ldp x26, x27, [sp, #16 * 13] + ldp x28, x29, [sp, #16 * 14] + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp + eret // return to kernel + .endm + + .macro get_thread_info, rd + mov \rd, sp + and \rd, \rd, #~(KERNEL_STACK_SIZE - 1) // top of stack + .endm + +/* + * These are the registers used in the syscall handler, and allow us to + * have in theory up to 7 arguments to a function - x0 to x6. + * + * x7 is reserved for the system call number in 32-bit mode. + */ +sc_nr .req x25 // number of system calls +scno .req x26 // syscall number +stbl .req x27 // syscall table pointer +tsk .req x28 // current thread_info + +/* + * Interrupt handling. + */ + .macro irq_handler + adrp x1, handle_arch_irq + ldr x1, [x1, #:lo12:handle_arch_irq] + mov x0, sp + blr x1 + .endm + + .text + +/* + * Exception vectors. + */ + + .align 11 +ENTRY(vectors) + ventry el1_sync_invalid // Synchronous EL1t + ventry el1_irq_invalid // IRQ EL1t + ventry el1_fiq_invalid // FIQ EL1t + ventry el1_error_invalid // Error EL1t + + ventry el1_sync // Synchronous EL1h + ventry el1_irq // IRQ EL1h + ventry el1_fiq_invalid // FIQ EL1h + ventry el1_error_invalid // Error EL1h + + ventry el0_sync // Synchronous 64-bit EL0 + ventry el0_irq // IRQ 64-bit EL0 + ventry el0_fiq_invalid // FIQ 64-bit EL0 + ventry el0_error_invalid // Error 64-bit EL0 + + ventry el0_sync_invalid // Synchronous 32-bit EL0 + ventry el0_irq_invalid // IRQ 32-bit EL0 + ventry el0_fiq_invalid // FIQ 32-bit EL0 + ventry el0_error_invalid // Error 32-bit EL0 +END(vectors) + +/* + * Invalid mode handlers + */ + .macro inv_entry, el, reason, regsize = 64 + kernel_entry el, \regsize + mov x0, sp + mov x1, #\reason + mrs x2, esr_el1 + enable_nmi + .if \el == 0 + bl bad_mode + b ret_to_user + .else + b bad_mode + .endif + .endm + +el0_sync_invalid: + inv_entry 0, BAD_SYNC +ENDPROC(el0_sync_invalid) + +el0_irq_invalid: + inv_entry 0, BAD_IRQ +ENDPROC(el0_irq_invalid) + +el0_fiq_invalid: + inv_entry 0, BAD_FIQ +ENDPROC(el0_fiq_invalid) + +el0_error_invalid: + inv_entry 0, BAD_ERROR +ENDPROC(el0_error_invalid) + +el1_sync_invalid: + inv_entry 1, BAD_SYNC +ENDPROC(el1_sync_invalid) + +el1_irq_invalid: + inv_entry 1, BAD_IRQ +ENDPROC(el1_irq_invalid) + +el1_fiq_invalid: + inv_entry 1, BAD_FIQ +ENDPROC(el1_fiq_invalid) + +el1_error_invalid: + inv_entry 1, BAD_ERROR +ENDPROC(el1_error_invalid) + +/* + * EL1 mode handlers. + */ + .align 6 +el1_sync: + kernel_entry 1 + mrs x1, esr_el1 // read the syndrome register + lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 + b.eq el1_da +// cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 +// b.eq el1_ia + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + b.eq el1_undef + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception + b.eq el1_sp_pc + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception + b.eq el1_sp_pc + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 + b.eq el1_undef + // cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 + // b.ge el1_dbg + b el1_inv +el1_ia: + /* + * Fall through to the Data abort case + */ +el1_da: + /* + * Data abort handling + */ + mrs x0, far_el1 + enable_nmi + enable_dbg +#if defined(CONFIG_HAS_NMI) +# define PSR_INTR_SHIFT PSR_G_SHIFT // PSR_G_BIT +#else /* defined(CONFIG_HAS_NMI) */ +# define PSR_INTR_SHIFT 7 // PSR_I_BIT +#endif /* defined(CONFIG_HAS_NMI) */ + // re-enable interrupts if they were enabled in the aborted context + tbnz x23, #PSR_INTR_SHIFT, 1f + enable_irq x2 +1: + mov x2, sp // struct pt_regs + bl do_mem_abort + + // disable interrupts before pulling preserved data off the stack + kernel_exit 1 + +el1_sp_pc: + /* + * Stack or PC alignment exception handling + */ + mrs x0, far_el1 + enable_nmi + enable_dbg + mov x2, sp + b do_sp_pc_abort +el1_undef: + /* + * Undefined instruction + */ + enable_nmi + enable_dbg + mov x0, sp + b do_undefinstr +// el1_dbg: +// /* +// * Debug exception handling +// */ +// cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 +// cinc x24, x24, eq // set bit '0' +// tbz x24, #0, el1_inv // EL1 only +// mrs x0, far_el1 +// mov x2, sp // struct pt_regs +// bl do_debug_exception +// kernel_exit 1 +el1_inv: + // TODO: add support for undefined instructions in kernel mode + mov x0, sp + mov x1, #BAD_SYNC + mrs x2, esr_el1 + enable_nmi + enable_dbg + b bad_mode +ENDPROC(el1_sync) + +/* + * EL1 mode handlers. + */ + .align 6 +el1_irq: + kernel_entry 1 + enable_dbg + + irq_handler + + kernel_exit 1 +ENDPROC(el1_irq) + +/* + * EL0 mode handlers. + */ + .align 6 +el0_sync: + kernel_entry 0 + mrs x25, esr_el1 // read the syndrome register + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state + b.eq el0_svc + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 + b.eq el0_da + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 + b.eq el0_ia + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access + b.eq el0_fpsimd_acc +#ifdef CONFIG_ARM64_SVE + cmp x24, #ESR_ELx_EC_SVE // SVE access + b.eq el0_sve_acc +#endif + cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception + b.eq el0_fpsimd_exc + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + b.eq el0_undef + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception + b.eq el0_sp_pc + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception + b.eq el0_sp_pc + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 + b.eq el0_undef + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 + b.ge el0_dbg + b el0_inv +el0_svc: + uxtw scno, w8 // syscall number in w8 + stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number + enable_nmi + enable_dbg_and_irq x0 + adrp x16, __arm64_syscall_handler + ldr x16, [x16, #:lo12:__arm64_syscall_handler] + mov x0, scno + mov x1, sp + blr x16 // __arm64_syscall_handler(int, syscall_num, ihk_mc_user_context_t *uctx); + /* Signal check has been completed at the stage of came back. */ + b ret_fast_syscall +el0_da: + /* + * Data abort handling + */ + mrs x26, far_el1 + // enable interrupts before calling the main handler + enable_nmi + enable_dbg_and_irq x0 + // ct_user_exit + bic x0, x26, #(0xff << 56) + mov x1, x25 + mov x2, sp + bl do_mem_abort + b ret_to_user +el0_ia: + /* + * Instruction abort handling + */ + mrs x26, far_el1 + // enable interrupts before calling the main handler + enable_nmi + enable_dbg_and_irq x0 + // ct_user_exit + mov x0, x26 + mov x1, x25 + mov x2, sp + bl do_mem_abort + b ret_to_user +el0_fpsimd_acc: + /* + * Floating Point or Advanced SIMD access + */ + enable_nmi + enable_dbg + // ct_user_exit + mov x0, x25 + mov x1, sp + bl do_fpsimd_acc + b ret_to_user +#ifdef CONFIG_ARM64_SVE + /* + * Scalable Vector Extension access + */ +el0_sve_acc: + enable_nmi + enable_dbg + // ct_user_exit + mov x0, x25 + mov x1, sp + bl do_sve_acc + b ret_to_user +#endif +el0_fpsimd_exc: + /* + * Floating Point, Advanced SIMD or SVE exception + */ + enable_nmi + enable_dbg + // ct_user_exit + mov x0, x25 + mov x1, sp + bl do_fpsimd_exc + b ret_to_user +el0_sp_pc: + /* + * Stack or PC alignment exception handling + */ + mrs x26, far_el1 + // enable interrupts before calling the main handler + enable_nmi + enable_dbg_and_irq x0 + mov x0, x26 + mov x1, x25 + mov x2, sp + bl do_sp_pc_abort + b ret_to_user +el0_undef: + /* + * Undefined instruction + */ + // enable interrupts before calling the main handler + enable_nmi + enable_dbg_and_irq x0 + // ct_user_exit + mov x0, sp + bl do_undefinstr + b ret_to_user +el0_dbg: + /* + * Debug exception handling + */ + tbnz x24, #0, el0_inv // EL0 only + mrs x0, far_el1 + mov x1, x25 + mov x2, sp + enable_nmi + bl do_debug_exception + enable_dbg + // ct_user_exit + b ret_to_user +el0_inv: + enable_dbg + mov x0, sp + mov x1, #BAD_SYNC + mrs x2, esr_el1 + enable_nmi + bl bad_mode + b ret_to_user +ENDPROC(el0_sync) + .align 6 +el0_irq: + kernel_entry 0 + enable_dbg + irq_handler + b ret_to_user +ENDPROC(el0_irq) + +/* + * Register switch for AArch64. The callee-saved registers need to be saved + * and restored. On entry: + * x0 = previous task_struct (must be preserved across the switch) + * x1 = next task_struct + * Previous and next are guaranteed not to be the same. + * + */ +ENTRY(cpu_switch_to) + cmp x0, xzr // for idle process branch(skip save) + b.eq 1f + add x8, x0, #TI_CPU_CONTEXT + mov x9, sp + stp x19, x20, [x8], #16 // store callee-saved registers + stp x21, x22, [x8], #16 + stp x23, x24, [x8], #16 + stp x25, x26, [x8], #16 + stp x27, x28, [x8], #16 + stp x29, x9, [x8], #16 + str lr, [x8] +1: add x8, x1, #TI_CPU_CONTEXT + ldp x19, x20, [x8], #16 // restore callee-saved registers + ldp x21, x22, [x8], #16 + ldp x23, x24, [x8], #16 + ldp x25, x26, [x8], #16 + ldp x27, x28, [x8], #16 + ldp x29, x9, [x8], #16 + ldr lr, [x8] + mov sp, x9 + mov x0, x2 // return void *prev + ret +ENDPROC(cpu_switch_to) + + +ret_fast_syscall: + kernel_exit 0, 1 +ENDPROC(ret_fast_syscall) + +/* + * "slow" syscall return path. + */ +ret_to_user: +no_work_pending: + kernel_exit 0, 1 +ENDPROC(ret_to_user) + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) +// bl schedule_tail + cbz x19, 1f // not a kernel thread + mov x0, x20 + blr x19 +1: get_thread_info tsk + bl release_runq_lock + b ret_to_user +ENDPROC(ret_from_fork) + +/* TODO: skeleton for rusage */ +ENTRY(__freeze) +ENDPROC(__freeze) diff --git a/arch/arm64/kernel/fault.c b/arch/arm64/kernel/fault.c new file mode 100644 index 00000000..6fc7f157 --- /dev/null +++ b/arch/arm64/kernel/fault.c @@ -0,0 +1,289 @@ +/* fault.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long __page_fault_handler_address; +extern int interrupt_from_user(void *); + +void set_signal(int sig, void *regs, struct siginfo *info); +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs); +static int do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs); +static int do_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs); +static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs); +static int do_alignment_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs); + +static struct fault_info { + int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); + int sig; + int code; + const char *name; +} fault_info[] = { + { do_bad, SIGBUS, 0, "ttbr address size fault" }, + { do_bad, SIGBUS, 0, "level 1 address size fault" }, + { do_bad, SIGBUS, 0, "level 2 address size fault" }, + { do_bad, SIGBUS, 0, "level 3 address size fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "unknown 8" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, + { do_bad, SIGBUS, 0, "unknown 12" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, + { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_bad, SIGBUS, 0, "unknown 32" }, + { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGBUS, 0, "unknown 34" }, + { do_bad, SIGBUS, 0, "unknown 35" }, + { do_bad, SIGBUS, 0, "unknown 36" }, + { do_bad, SIGBUS, 0, "unknown 37" }, + { do_bad, SIGBUS, 0, "unknown 38" }, + { do_bad, SIGBUS, 0, "unknown 39" }, + { do_bad, SIGBUS, 0, "unknown 40" }, + { do_bad, SIGBUS, 0, "unknown 41" }, + { do_bad, SIGBUS, 0, "unknown 42" }, + { do_bad, SIGBUS, 0, "unknown 43" }, + { do_bad, SIGBUS, 0, "unknown 44" }, + { do_bad, SIGBUS, 0, "unknown 45" }, + { do_bad, SIGBUS, 0, "unknown 46" }, + { do_bad, SIGBUS, 0, "unknown 47" }, + { do_bad, SIGBUS, 0, "TLB conflict abort" }, + { do_bad, SIGBUS, 0, "unknown 49" }, + { do_bad, SIGBUS, 0, "unknown 50" }, + { do_bad, SIGBUS, 0, "unknown 51" }, + { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, 0, "implementation fault (unsupported exclusive)" }, + { do_bad, SIGBUS, 0, "unknown 54" }, + { do_bad, SIGBUS, 0, "unknown 55" }, + { do_bad, SIGBUS, 0, "unknown 56" }, + { do_bad, SIGBUS, 0, "unknown 57" }, + { do_bad, SIGBUS, 0, "unknown 58" }, + { do_bad, SIGBUS, 0, "unknown 59" }, + { do_bad, SIGBUS, 0, "unknown 60" }, + { do_bad, SIGBUS, 0, "section domain fault" }, + { do_bad, SIGBUS, 0, "page domain fault" }, + { do_bad, SIGBUS, 0, "unknown 63" }, +}; + +static const char *fault_name(unsigned int esr) +{ + const struct fault_info *inf = fault_info + (esr & 63); + return inf->name; +} + +/* + * Dispatch a data abort to the relevant handler. + */ +void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + const struct fault_info *inf = fault_info + (esr & 63); + struct siginfo info; + + /* set_cputime called in inf->fn() */ + if (!inf->fn(addr, esr, regs)) + return; + + set_cputime(interrupt_from_user(regs)? 1: 2); + kprintf("Unhandled fault: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr); + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info._sifields._sigfault.si_addr = (void*)addr; + + arm64_notify_die("", regs, &info, esr); + set_cputime(0); +} + +/* + * Handle stack alignment exceptions. + */ +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + struct siginfo info; + + set_cputime(interrupt_from_user(regs)? 1: 2); + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRALN; + info._sifields._sigfault.si_addr = (void*)addr; + arm64_notify_die("", regs, &info, esr); + set_cputime(0); +} + +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + struct siginfo info; + set_cputime(interrupt_from_user(regs) ? 1: 2); + /* + * If we are in kernel mode at this point, we have no context to + * handle this fault with. + */ + if (interrupt_from_user(regs)) { + kprintf("unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", + fault_name(esr), SIGSEGV, addr, esr); + + current_thread_info()->fault_address = addr; + current_thread_info()->fault_code = esr; + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = SEGV_MAPERR; + info._sifields._sigfault.si_addr = (void *)addr; + set_signal(SIGSEGV, regs, &info); + + } else { + kprintf("Unable to handle kernel %s at virtual address %08lx\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); + panic("OOps."); + } + set_cputime(0); +} + +static int is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; +} + +static int do_page_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + void (*page_fault_handler)(void *, uint64_t, void *); + uint64_t reason = 0; + int esr_ec_dfsc = (esr & 63); + + if (interrupt_from_user(regs)) { + reason |= PF_USER; + } + + if (is_el0_instruction_abort(esr)) { + reason |= PF_INSTR; + } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + reason |= PF_WRITE; + if (13 <= esr_ec_dfsc && esr_ec_dfsc <= 15 ) { + /* level [1-3] permission fault */ + reason |= PF_PROT; + } + } + + page_fault_handler = (void *)__page_fault_handler_address; + (*page_fault_handler)((void *)addr, reason, regs); + + return 0; +} + +/* + * First Level Translation Fault Handler + * + * We enter here because the first level page table doesn't contain a valid + * entry for the address. + * + * If the address is in kernel space (>= TASK_SIZE), then we are probably + * faulting in the vmalloc() area. + * + * If the init_task's first level page tables contains the relevant entry, we + * copy the it to this task. If not, we send the process a signal, fixup the + * exception, or oops the kernel. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt + * or a critical region, and should only copy the information from the master + * page table, nothing more. + */ +static int do_translation_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + if (addr < USER_END) + return do_page_fault(addr, esr, regs); + + do_bad_area(addr, esr, regs); + return 0; +} + +static int do_alignment_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + do_bad_area(addr, esr, regs); + return 0; +} + +extern int breakpoint_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs); +extern int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs); +extern int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs); +extern int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs); +static struct fault_info debug_fault_info[] = { + { breakpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-breakpoint handler" }, + { single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler" }, + { watchpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-watchpoint handler" }, + { do_bad, SIGBUS, 0, "unknown 3" }, + { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, + { do_bad, SIGTRAP, 0, "aarch32 vector catch" }, + { brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler" }, + { do_bad, SIGBUS, 0, "unknown 7" }, +}; + +int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + struct siginfo info; + int from_user = interrupt_from_user(regs); + int ret = -1; + + set_cputime(from_user ? 1: 2); + + if (!inf->fn(addr, esr, regs)) { + ret = 1; + goto out; + } + + kprintf("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info._sifields._sigfault.si_addr = (void *)addr; + + arm64_notify_die("", regs, &info, 0); + + ret = 0; +out: + set_cputime(0); + return ret; +} + +/* + * This abort handler always returns "fault". + */ +static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + set_cputime(interrupt_from_user(regs) ? 1: 2); + set_cputime(0); + return 1; +} diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c new file mode 100644 index 00000000..a32e7d78 --- /dev/null +++ b/arch/arm64/kernel/fpsimd.c @@ -0,0 +1,325 @@ +/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG_PRINT_FPSIMD + +#ifdef DEBUG_PRINT_FPSIMD +#define dkprintf kprintf +#define ekprintf kprintf +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf kprintf +#endif + +#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ + __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) + +#ifdef CONFIG_ARM64_SVE + +/* Maximum supported vector length across all CPUs (initially poisoned) */ +int sve_max_vl = -1; +/* Default VL for tasks that don't set it explicitly: */ +int sve_default_vl = -1; + +size_t sve_state_size(struct thread const *thread) +{ + unsigned int vl = thread->ctx.thread->sve_vl; + + BUG_ON(!sve_vl_valid(vl)); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); +} + +void sve_free(struct thread *thread) +{ + if (thread->ctx.thread->sve_state) { + kfree(thread->ctx.thread->sve_state); + thread->ctx.thread->sve_state = NULL; + } +} + +void sve_alloc(struct thread *thread) +{ + if (thread->ctx.thread->sve_state) { + return; + } + + thread->ctx.thread->sve_state = + kmalloc(sve_state_size(thread), IHK_MC_AP_NOWAIT); + BUG_ON(!thread->ctx.thread->sve_state); + + memset(thread->ctx.thread->sve_state, 0, sve_state_size(thread)); +} + +static int get_nr_threads(struct process *proc) +{ + struct thread *child; + struct mcs_rwlock_node_irqsave lock; + int nr_threads = 0; + + mcs_rwlock_reader_lock(&proc->threads_lock, &lock); + list_for_each_entry(child, &proc->threads_list, siblings_list){ + nr_threads++; + } + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + return nr_threads; +} + +extern void save_fp_regs(struct thread *thread); +extern void clear_fp_regs(struct thread *thread); +extern void restore_fp_regs(struct thread *thread); +/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */ +int sve_set_vector_length(struct thread *thread, + unsigned long vl, unsigned long flags) +{ + struct thread_info *ti = thread->ctx.thread; + + BUG_ON(thread == cpu_local_var(current) && cpu_local_var(no_preempt) == 0); + + /* + * To avoid accidents, forbid setting for individual threads of a + * multithreaded process. User code that knows what it's doing can + * pass PR_SVE_SET_VL_THREAD to override this restriction: + */ + if (!(flags & PR_SVE_SET_VL_THREAD) && get_nr_threads(thread->proc) != 1) { + return -EINVAL; + } + flags &= ~(unsigned long)PR_SVE_SET_VL_THREAD; + + if (flags & ~(unsigned long)(PR_SVE_SET_VL_INHERIT | + PR_SVE_SET_VL_ONEXEC)) { + return -EINVAL; + } + + if (!sve_vl_valid(vl)) { + return -EINVAL; + } + + if (vl > sve_max_vl) { + BUG_ON(!sve_vl_valid(sve_max_vl)); + vl = sve_max_vl; + } + + if (flags & (PR_SVE_SET_VL_ONEXEC | + PR_SVE_SET_VL_INHERIT)) { + ti->sve_vl_onexec = vl; + } else { + /* Reset VL to system default on next exec: */ + ti->sve_vl_onexec = 0; + } + + /* Only actually set the VL if not deferred: */ + if (flags & PR_SVE_SET_VL_ONEXEC) { + goto out; + } + + if (vl != ti->sve_vl) { + if ((elf_hwcap & HWCAP_SVE)) { + fp_regs_struct fp_regs; + memset(&fp_regs, 0, sizeof(fp_regs)); + + /* for self at prctl syscall */ + if (thread == cpu_local_var(current)) { + save_fp_regs(thread); + clear_fp_regs(thread); + thread_sve_to_fpsimd(thread, &fp_regs); + sve_free(thread); + + ti->sve_vl = vl; + + sve_alloc(thread); + thread_fpsimd_to_sve(thread, &fp_regs); + restore_fp_regs(thread); + /* for target thread at ptrace */ + } else { + thread_sve_to_fpsimd(thread, &fp_regs); + sve_free(thread); + + ti->sve_vl = vl; + + sve_alloc(thread); + thread_fpsimd_to_sve(thread, &fp_regs); + } + } + } + ti->sve_vl = vl; + +out: + ti->sve_flags = flags & PR_SVE_SET_VL_INHERIT; + + return 0; +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_prctl_status */ +/* + * Encode the current vector length and flags for return. + * This is only required for prctl(): ptrace has separate fields + */ +static int sve_prctl_status(const struct thread_info *ti) +{ + int ret = ti->sve_vl; + + ret |= ti->sve_flags << 16; + + return ret; +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_task_vl */ +int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length, + const unsigned long flags) +{ + int ret; + + if (!(elf_hwcap & HWCAP_SVE)) { + return -EINVAL; + } + + BUG_ON(thread != cpu_local_var(current)); + + preempt_disable(); + ret = sve_set_vector_length(thread, vector_length, flags); + preempt_enable(); + + if (ret) { + return ret; + } + return sve_prctl_status(thread->ctx.thread); +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_get_ti_vl */ +int sve_get_thread_vl(const struct thread *thread) +{ + if (!(elf_hwcap & HWCAP_SVE)) { + return -EINVAL; + } + return sve_prctl_status(thread->ctx.thread); +} + +void do_sve_acc(unsigned int esr, struct pt_regs *regs) +{ + kprintf("PANIC: CPU: %d PID: %d ESR: %x Trapped SVE access.\n", + ihk_mc_get_processor_id(), cpu_local_var(current)->proc->pid, esr); + panic(""); +} + +void init_sve_vl(void) +{ + extern unsigned long ihk_param_default_vl; + uint64_t zcr; + + if (unlikely(!(elf_hwcap & HWCAP_SVE))) { + return; + } + + zcr = read_system_reg(SYS_ZCR_EL1); + BUG_ON(((zcr & ZCR_EL1_LEN_MASK) + 1) * 16 > sve_max_vl); + + sve_max_vl = ((zcr & ZCR_EL1_LEN_MASK) + 1) * 16; + sve_default_vl = ihk_param_default_vl; + + if (sve_default_vl == 0) { + kprintf("SVE: Getting default VL = 0 from HOST-Linux.\n"); + sve_default_vl = sve_max_vl > 64 ? 64 : sve_max_vl; + kprintf("SVE: Using default vl(%d byte).\n", sve_default_vl); + } + + kprintf("SVE: maximum available vector length %u bytes per vector\n", + sve_max_vl); + kprintf("SVE: default vector length %u bytes per vector\n", + sve_default_vl); +} + +#else /* CONFIG_ARM64_SVE */ + +void init_sve_vl(void) +{ + /* nothing to do. */ +} + +#endif /* CONFIG_ARM64_SVE */ + +/* @ref.impl arch/arm64/kernel/fpsimd.c::__task_pffr */ +static void *__thread_pffr(struct thread *thread) +{ + unsigned int vl = thread->ctx.thread->sve_vl; + + BUG_ON(!sve_vl_valid(vl)); + return (char *)thread->ctx.thread->sve_state + 34 * vl; +} + +/* There is a need to call from to check the HWCAP_FP and HWCAP_ASIMD state. */ +void thread_fpsimd_load(struct thread *thread) +{ + if (likely(elf_hwcap & HWCAP_SVE)) { + unsigned int vl = thread->ctx.thread->sve_vl; + + BUG_ON(!sve_vl_valid(vl)); + sve_load_state(__thread_pffr(thread), &thread->fp_regs->fpsr, sve_vq_from_vl(vl) - 1); + dkprintf("sve for TID %d restored\n", thread->tid); + } else { + // Load the current FPSIMD state to memory. + fpsimd_load_state(thread->fp_regs); + dkprintf("fp_regs for TID %d restored\n", thread->tid); + } +} + +/* There is a need to call from to check the HWCAP_FP and HWCAP_ASIMD state. */ +void thread_fpsimd_save(struct thread *thread) +{ + if (likely(elf_hwcap & HWCAP_SVE)) { + sve_save_state(__thread_pffr(thread), &thread->fp_regs->fpsr); + dkprintf("sve for TID %d saved\n", thread->tid); + } else { + // Save the current FPSIMD state to memory. + fpsimd_save_state(thread->fp_regs); + dkprintf("fp_regs for TID %d saved\n", thread->tid); + } +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::__task_fpsimd_to_sve */ +static void __thread_fpsimd_to_sve(struct thread *thread, fp_regs_struct *fp_regs, unsigned int vq) +{ + struct fpsimd_sve_state(vq) *sst = thread->ctx.thread->sve_state; + unsigned int i; + + for (i = 0; i < 32; i++) { + sst->zregs[i][0] = fp_regs->vregs[i]; + } +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::task_fpsimd_to_sve */ +void thread_fpsimd_to_sve(struct thread *thread, fp_regs_struct *fp_regs) +{ + unsigned int vl = thread->ctx.thread->sve_vl; + + BUG_ON(!sve_vl_valid(vl)); + __thread_fpsimd_to_sve(thread, fp_regs, sve_vq_from_vl(vl)); +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::__task_sve_to_fpsimd */ +static void __thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs, unsigned int vq) +{ + struct fpsimd_sve_state(vq) *sst = thread->ctx.thread->sve_state; + unsigned int i; + + for (i = 0; i < 32; i++) { + fp_regs->vregs[i] = sst->zregs[i][0]; + } +} + +/* @ref.impl arch/arm64/kernel/fpsimd.c::task_sve_to_fpsimd */ +void thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs) +{ + unsigned int vl = thread->ctx.thread->sve_vl; + + BUG_ON(!sve_vl_valid(vl)); + __thread_sve_to_fpsimd(thread, fp_regs, sve_vq_from_vl(vl)); +} diff --git a/arch/arm64/kernel/gencore.c b/arch/arm64/kernel/gencore.c new file mode 100644 index 00000000..e193fc4d --- /dev/null +++ b/arch/arm64/kernel/gencore.c @@ -0,0 +1,463 @@ +/* gencore.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#include +#include +#include +#include +#include +#include +#include + +#define align32(x) ((((x) + 3) / 4) * 4) +#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE)) + +//#define DEBUG_PRINT_GENCORE + +#ifdef DEBUG_PRINT_GENCORE +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +/* + * Generate a core file image, which consists of many chunks. + * Returns an allocated table, an etnry of which is a pair of the address + * of a chunk and its length. + */ + +/** + * \brief Fill the elf header. + * + * \param eh An Elf64_Ehdr structure. + * \param segs Number of segments of the core file. + */ + +void fill_elf_header(Elf64_Ehdr *eh, int segs) +{ + eh->e_ident[EI_MAG0] = 0x7f; + eh->e_ident[EI_MAG1] = 'E'; + eh->e_ident[EI_MAG2] = 'L'; + eh->e_ident[EI_MAG3] = 'F'; + eh->e_ident[EI_CLASS] = ELFCLASS64; + eh->e_ident[EI_DATA] = ELFDATA2LSB; + eh->e_ident[EI_VERSION] = El_VERSION; + eh->e_ident[EI_OSABI] = ELFOSABI_NONE; + eh->e_ident[EI_ABIVERSION] = El_ABIVERSION_NONE; + + eh->e_type = ET_CORE; +#ifdef CONFIG_MIC + eh->e_machine = EM_K10M; +#else + eh->e_machine = EM_X86_64; +#endif + eh->e_version = EV_CURRENT; + eh->e_entry = 0; /* Do we really need this? */ + eh->e_phoff = 64; /* fixed */ + eh->e_shoff = 0; /* no section header */ + eh->e_flags = 0; + eh->e_ehsize = 64; /* fixed */ + eh->e_phentsize = 56; /* fixed */ + eh->e_phnum = segs; + eh->e_shentsize = 0; + eh->e_shnum = 0; + eh->e_shstrndx = 0; +} + +/** + * \brief Return the size of the prstatus entry of the NOTE segment. + * + */ + +int get_prstatus_size(void) +{ + return sizeof(struct note) + align32(sizeof("CORE")) + + align32(sizeof(struct elf_prstatus64)); +} + +/** + * \brief Fill a prstatus structure. + * + * \param head A pointer to a note structure. + * \param thread A pointer to the current thread structure. + * \param regs0 A pointer to a x86_regs structure. + */ + +void fill_prstatus(struct note *head, struct thread *thread, void *regs0) +{ +/* TODO(pka_idle) */ +} + +/** + * \brief Return the size of the prpsinfo entry of the NOTE segment. + * + */ + +int get_prpsinfo_size(void) +{ + return sizeof(struct note) + align32(sizeof("CORE")) + + align32(sizeof(struct elf_prpsinfo64)); +} + +/** + * \brief Fill a prpsinfo structure. + * + * \param head A pointer to a note structure. + * \param thread A pointer to the current thread structure. + * \param regs A pointer to a x86_regs structure. + */ + +void fill_prpsinfo(struct note *head, struct thread *thread, void *regs) +{ + void *name; + struct elf_prpsinfo64 *prpsinfo; + + head->namesz = sizeof("CORE"); + head->descsz = sizeof(struct elf_prpsinfo64); + head->type = NT_PRPSINFO; + name = (void *) (head + 1); + memcpy(name, "CORE", sizeof("CORE")); + prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE"))); + + prpsinfo->pr_state = thread->status; + prpsinfo->pr_pid = thread->proc->pid; + +/* + We leave most of the fields unfilled. + + char pr_sname; + char pr_zomb; + char pr_nice; + a8_uint64_t pr_flag; + unsigned int pr_uid; + unsigned int pr_gid; + int pr_ppid, pr_pgrp, pr_sid; + char pr_fname[16]; + char pr_psargs[ELF_PRARGSZ]; +*/ +} + +/** + * \brief Return the size of the AUXV entry of the NOTE segment. + * + */ + +int get_auxv_size(void) +{ + return sizeof(struct note) + align32(sizeof("CORE")) + + sizeof(unsigned long) * AUXV_LEN; +} + +/** + * \brief Fill an AUXV structure. + * + * \param head A pointer to a note structure. + * \param thread A pointer to the current thread structure. + * \param regs A pointer to a x86_regs structure. + */ + +void fill_auxv(struct note *head, struct thread *thread, void *regs) +{ + void *name; + void *auxv; + + head->namesz = sizeof("CORE"); + head->descsz = sizeof(unsigned long) * AUXV_LEN; + head->type = NT_AUXV; + name = (void *) (head + 1); + memcpy(name, "CORE", sizeof("CORE")); + auxv = name + align32(sizeof("CORE")); + memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN); +} + +/** + * \brief Return the size of the whole NOTE segment. + * + */ + +int get_note_size(void) +{ + return get_prstatus_size() + get_prpsinfo_size() + + get_auxv_size(); +} + +/** + * \brief Fill the NOTE segment. + * + * \param head A pointer to a note structure. + * \param thread A pointer to the current thread structure. + * \param regs A pointer to a x86_regs structure. + */ + +void fill_note(void *note, struct thread *thread, void *regs) +{ + fill_prstatus(note, thread, regs); + note += get_prstatus_size(); + fill_prpsinfo(note, thread, regs); + note += get_prpsinfo_size(); + fill_auxv(note, thread, regs); +} + +/** + * \brief Generate an image of the core file. + * + * \param thread A pointer to the current thread structure. + * \param regs A pointer to a x86_regs structure. + * \param coretable(out) An array of core chunks. + * \param chunks(out) Number of the entires of coretable. + * + * A core chunk is represented by a pair of a physical + * address of memory region and its size. If there are + * no corresponding physical address for a VM area + * (an unallocated demand-paging page, e.g.), the address + * should be zero. + */ + +int gencore(struct thread *thread, void *regs, + struct coretable **coretable, int *chunks) +{ + struct coretable *ct = NULL; + Elf64_Ehdr eh; + Elf64_Phdr *ph = NULL; + void *note = NULL; + struct vm_range *range; + struct process_vm *vm = thread->vm; + int segs = 1; /* the first one is for NOTE */ + int notesize, phsize, alignednotesize; + unsigned int offset = 0; + int i; + + *chunks = 3; /* Elf header , header table and NOTE segment */ + + if (vm == NULL) { + dkprintf("no vm found.\n"); + return -1; + } + + list_for_each_entry(range, &vm->vm_range_list, list) { + dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", + range->start, range->end, range->flag, range->objoff); + /* We omit reserved areas because they are only for + mckernel's internal use. */ + if (range->flag & VR_RESERVED) + continue; + /* We need a chunk for each page for a demand paging area. + This can be optimized for spacial complexity but we would + lose simplicity instead. */ + if (range->flag & VR_DEMAND_PAGING) { + unsigned long p, phys; + int prevzero = 0; + for (p = range->start; p < range->end; p += PAGE_SIZE) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, + (void *)p, &phys) != 0) { + prevzero = 1; + } else { + if (prevzero == 1) + (*chunks)++; + (*chunks)++; + prevzero = 0; + } + } + if (prevzero == 1) + (*chunks)++; + } else { + (*chunks)++; + } + segs++; + } + dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks); + + { + struct vm_regions region = thread->vm->region; + + dkprintf("text: %lx-%lx\n", region.text_start, region.text_end); + dkprintf("data: %lx-%lx\n", region.data_start, region.data_end); + dkprintf("brk: %lx-%lx\n", region.brk_start, region.brk_end); + dkprintf("map: %lx-%lx\n", region.map_start, region.map_end); + dkprintf("stack: %lx-%lx\n", region.stack_start, region.stack_end); + dkprintf("user: %lx-%lx\n\n", region.user_start, region.user_end); + } + + dkprintf("now generate a core file image\n"); + + offset += sizeof(eh); + fill_elf_header(&eh, segs); + + /* program header table */ + phsize = sizeof(Elf64_Phdr) * segs; + ph = kmalloc(phsize, IHK_MC_AP_NOWAIT); + if (ph == NULL) { + dkprintf("could not alloc a program header table.\n"); + goto fail; + } + memset(ph, 0, phsize); + + offset += phsize; + + /* NOTE segment + * To align the next segment page-sized, we prepare a padded + * region for our NOTE segment. + */ + notesize = get_note_size(); + alignednotesize = alignpage(notesize + offset) - offset; + note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT); + if (note == NULL) { + dkprintf("could not alloc NOTE for core.\n"); + goto fail; + } + memset(note, 0, alignednotesize); + fill_note(note, thread, regs); + + /* prgram header for NOTE segment is exceptional */ + ph[0].p_type = PT_NOTE; + ph[0].p_flags = 0; + ph[0].p_offset = offset; + ph[0].p_vaddr = 0; + ph[0].p_paddr = 0; + ph[0].p_filesz = notesize; + ph[0].p_memsz = notesize; + ph[0].p_align = 0; + + offset += alignednotesize; + + /* program header for each memory chunk */ + i = 1; + list_for_each_entry(range, &vm->vm_range_list, list) { + unsigned long flag = range->flag; + unsigned long size = range->end - range->start; + + if (range->flag & VR_RESERVED) + continue; + + ph[i].p_type = PT_LOAD; + ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0) + | ((flag & VR_PROT_WRITE) ? PF_W : 0) + | ((flag & VR_PROT_EXEC) ? PF_X : 0); + ph[i].p_offset = offset; + ph[i].p_vaddr = range->start; + ph[i].p_paddr = 0; + ph[i].p_filesz = size; + ph[i].p_memsz = size; + ph[i].p_align = PAGE_SIZE; + i++; + offset += size; + } + + /* coretable to send to host */ + ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT); + if (!ct) { + dkprintf("could not alloc a coretable.\n"); + goto fail; + } + + ct[0].addr = virt_to_phys(&eh); /* ELF header */ + ct[0].len = 64; + dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, &eh); + + ct[1].addr = virt_to_phys(ph); /* program header table */ + ct[1].len = phsize; + dkprintf("coretable[1]: %lx@%lx(%lx)\n", ct[1].len, ct[1].addr, ph); + + ct[2].addr = virt_to_phys(note); /* NOTE segment */ + ct[2].len = alignednotesize; + dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note); + + i = 3; /* memory segments */ + list_for_each_entry(range, &vm->vm_range_list, list) { + unsigned long phys; + + if (range->flag & VR_RESERVED) + continue; + if (range->flag & VR_DEMAND_PAGING) { + /* Just an ad hoc kluge. */ + unsigned long p, start, phys; + int prevzero = 0; + unsigned long size = 0; + + for (start = p = range->start; + p < range->end; p += PAGE_SIZE) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, + (void *)p, &phys) != 0) { + if (prevzero == 0) { + /* We begin a new chunk */ + size = PAGE_SIZE; + start = p; + } else { + /* We extend the previous chunk */ + size += PAGE_SIZE; + } + prevzero = 1; + } else { + if (prevzero == 1) { + /* Flush out an empty chunk */ + ct[i].addr = 0; + ct[i].len = size; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, start); + i++; + + } + ct[i].addr = phys; + ct[i].len = PAGE_SIZE; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, p); + i++; + prevzero = 0; + } + } + if (prevzero == 1) { + /* An empty chunk */ + ct[i].addr = 0; + ct[i].len = size; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, start); + i++; + } + } else { + if ((thread->vm->region.user_start <= range->start) && + (range->end <= thread->vm->region.user_end)) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, + (void *)range->start, &phys) != 0) { + dkprintf("could not convert user virtual address %lx" + "to physical address", range->start); + goto fail; + } + } else { + phys = virt_to_phys((void *)range->start); + } + ct[i].addr = phys; + ct[i].len = range->end - range->start; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, range->start); + i++; + } + } + *coretable = ct; + + return 0; + + fail: + if (ct) + kfree(ct); + if (ph) + kfree(ph); + if (note) + kfree(note); + return -1; +} + +/** + * \brief Free all the allocated spaces for an image of the core file. + * + * \param coretable An array of core chunks. + */ + +void freecore(struct coretable **coretable) +{ + struct coretable *ct = *coretable; + kfree(phys_to_virt(ct[2].addr)); /* NOTE segment */ + kfree(phys_to_virt(ct[1].addr)); /* ph */ + kfree(*coretable); +} +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S new file mode 100644 index 00000000..9a706887 --- /dev/null +++ b/arch/arm64/kernel/head.S @@ -0,0 +1,805 @@ +/* head.S COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KERNEL_RAM_VADDR MAP_KERNEL_START +#define EARLY_ALLOC_VADDR MAP_EARLY_ALLOC +#define BOOT_PARAM_VADDR MAP_BOOT_PARAM + +//#ifndef CONFIG_SMP +//# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF +//# define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF +//#else +# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED +# define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S +//#endif /*CONFIG_SMP*/ + +#ifdef CONFIG_ARM64_64K_PAGES +# define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS +#else +# define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS +#endif + + .macro pgtbl_init_core, name, dir, tbl, ents, virt_to_phys + ldr \tbl, =\name + ldr \ents, =\dir + add \tbl, \tbl, \virt_to_phys + str \ents, [\tbl] + add \tbl, \tbl, #8 + add \ents, \ents, \virt_to_phys + str \ents, [\tbl] + .endm + + .macro pgtbl_init, tbl, ents, virt_to_phys + pgtbl_init_core swapper_page_table, swapper_pg_dir, \tbl, \ents, \virt_to_phys + pgtbl_init_core idmap_page_table, idmap_pg_dir, \tbl, \ents, \virt_to_phys + .endm + + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys + .endm + +#ifdef CONFIG_ARM64_64K_PAGES +# define BLOCK_SHIFT PAGE_SHIFT +# define BLOCK_SIZE PAGE_SIZE +# define TABLE_SHIFT PMD_SHIFT +#else +# define BLOCK_SHIFT SECTION_SHIFT +# define BLOCK_SIZE SECTION_SIZE +# define TABLE_SHIFT PUD_SHIFT +#endif + +#define KERNEL_START KERNEL_RAM_VADDR +#define KERNEL_END _end + +/* ihk param offset */ +#define TRAMPOLINE_DATA_RESERVED_SIZE 0x08 +#define TRAMPOLINE_DATA_PGTBL_SIZE 0x08 +#define TRAMPOLINE_DATA_LOAD_SIZE 0x08 +#define TRAMPOLINE_DATA_STACK_SIZE 0x08 +#define TRAMPOLINE_DATA_BOOT_PARAM_SIZE 0x08 +#define TRAMPOLINE_DATA_STARTUP_DATA_SIZE 0x08 +#define TRAMPOLINE_DATA_ST_PHYS_BASE_SIZE 0x08 +#define TRAMPOLINE_DATA_ST_PHYS_SIZE_SIZE 0x08 +#define TRAMPOLINE_DATA_GIC_DIST_PA_SIZE 0x08 +#define TRAMPOLINE_DATA_GIC_DIST_MAP_SIZE_SIZE 0x08 +#define TRAMPOLINE_DATA_GIC_CPU_PA_SIZE 0x08 +#define TRAMPOLINE_DATA_GIC_CPU_MAP_SIZE_SIZE 0x08 +#define TRAMPOLINE_DATA_GIC_PERCPU_OFF_SIZE 0x04 +#define TRAMPOLINE_DATA_GIC_VERSION_SIZE 0x04 +#define TRAMPOLINE_DATA_LPJ_SIZE 0x08 +#define TRAMPOLINE_DATA_HZ_SIZE 0x08 +#define TRAMPOLINE_DATA_PSCI_METHOD_SIZE 0x08 +#define TRAMPOLINE_DATA_USE_VIRT_TIMER_SIZE 0x08 +#define TRAMPOLINE_DATA_EVTSTRM_TIMER_RATE_SIZE 0x08 +#define TRAMPOLINE_DATA_DEFAULT_VL_SIZE 0x08 +#define TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE 0x08 +#define TRAMPOLINE_DATA_CPU_MAP_SIZE (NR_CPUS * 8) +#define TRAMPOLINE_DATA_DATA_RDISTS_PA_SIZE (NR_CPUS * 8) +#define TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE 0x04 +#define TRAMPOLINE_DATA_PMU_AFF_SIZE (CONFIG_SMP_MAX_CORES * 4) + +#define STARTUP_DATA_RESERVED 0x00 +#define STARTUP_DATA_BASE 0x08 +#define STARTUP_DATA_PGTBL 0x10 +#define STARTUP_DATA_STACK 0x18 +#define STARTUP_DATA_ARG2 0x20 +#define STARTUP_DATA_TRAMPILINE 0x28 +#define STARTUP_DATA_NEXT_PC 0x30 + +/* ihk param save area */ + .globl ihk_param_head + .globl ihk_param_gic_dist_base_pa, ihk_param_gic_cpu_base_pa + .globl ihk_param_gic_dist_map_size, ihk_param_gic_cpu_map_size + .globl ihk_param_gic_percpu_offset, ihk_param_gic_version + .globl ihk_param_lpj, ihk_param_hz, ihk_param_psci_method + .globl ihk_param_cpu_logical_map, ihk_param_gic_rdist_base_pa + .globl ihk_param_pmu_irq_affiniry, ihk_param_nr_pmu_irq_affiniry + .globl ihk_param_use_virt_timer, ihk_param_evtstrm_timer_rate + .globl ihk_param_default_vl +ihk_param_head: +ihk_param_param_addr: + .quad 0 +ihk_param_phys_addr: + .quad 0 +ihk_param_st_phys_base: + .quad 0 +ihk_param_st_phys_size: + .quad 0 +ihk_param_gic_dist_base_pa: + .quad 0 +ihk_param_gic_dist_map_size: + .quad 0 +ihk_param_gic_cpu_base_pa: + .quad 0 +ihk_param_gic_cpu_map_size: + .quad 0 +ihk_param_gic_percpu_offset: + .word 0 +ihk_param_gic_version: + .word 0 +ihk_param_lpj: + .quad 0 /* udelay loops value */ +ihk_param_hz: + .quad 0 /* host HZ value */ +ihk_param_psci_method: + .quad 0 /* hvc or smc ? */ +ihk_param_use_virt_timer: + .quad 0 /* virt timer or phys timer ? */ +ihk_param_evtstrm_timer_rate: + .quad 0 /* event stream timer rate */ +ihk_param_default_vl: + .quad 0 /* SVE default VL */ +ihk_param_cpu_logical_map: + .skip NR_CPUS * 8 /* array of the MPIDR and the core number */ +ihk_param_gic_rdist_base_pa: + .skip NR_CPUS * 8 /* per-cpu re-distributer PA */ +ihk_param_pmu_irq_affiniry: + .skip CONFIG_SMP_MAX_CORES * 4 /* array of the pmu affinity list */ +ihk_param_nr_pmu_irq_affiniry: + .word 0 /* number of pmu affinity list elements. */ + +/* @ref.impl arch/arm64/include/asm/kvm_arm.h */ +#define HCR_E2H (UL(1) << 34) +#define HCR_RW_SHIFT 31 +#define HCR_RW (UL(1) << HCR_RW_SHIFT) +#define HCR_TGE (UL(1) << 27) + +/* + * end early head section, begin head code that is also used for + * hotplug and needs to have the same protections as the text region + */ + .section ".text","ax" + +ENTRY(arch_start) + /* store ihk param */ + /* x4 = ihk_smp_trampoline_data PA */ + add x0, x4, #TRAMPOLINE_DATA_RESERVED_SIZE + /* header_pgtbl */ + add x0, x0, #TRAMPOLINE_DATA_PGTBL_SIZE + /* header_load */ + add x0, x0, #TRAMPOLINE_DATA_LOAD_SIZE + /* stack_ptr */ + add x0, x0, #TRAMPOLINE_DATA_STACK_SIZE + /* notify_address */ + ldr x16, [x0], #TRAMPOLINE_DATA_BOOT_PARAM_SIZE + adr x15, ihk_param_param_addr + str x16, [x15] + /* startup_data */ + ldr x16, [x0], #TRAMPOLINE_DATA_STARTUP_DATA_SIZE + ldr x15, [x16, #STARTUP_DATA_ARG2] + adr x17, ihk_param_phys_addr + str x15, [x17] + /* st_phys_base */ + ldr x16, [x0], #TRAMPOLINE_DATA_ST_PHYS_BASE_SIZE + adr x15, ihk_param_st_phys_base + str x16, [x15] + /* st_phys_size */ + ldr x16, [x0], #TRAMPOLINE_DATA_ST_PHYS_SIZE_SIZE + adr x15, ihk_param_st_phys_size + str x16, [x15] + /* dist_base_pa */ + ldr x16, [x0], #TRAMPOLINE_DATA_GIC_DIST_PA_SIZE + adr x15, ihk_param_gic_dist_base_pa + str x16, [x15] + /* dist_map_size */ + ldr x16, [x0], #TRAMPOLINE_DATA_GIC_DIST_MAP_SIZE_SIZE + adr x15, ihk_param_gic_dist_map_size + str x16, [x15] + /* cpu_base_pa */ + ldr x16, [x0], #TRAMPOLINE_DATA_GIC_CPU_PA_SIZE + adr x15, ihk_param_gic_cpu_base_pa + str x16, [x15] + /* cpu_map_size */ + ldr x16, [x0], #TRAMPOLINE_DATA_GIC_CPU_MAP_SIZE_SIZE + adr x15, ihk_param_gic_cpu_map_size + str x16, [x15] + /* percpu_offset */ + ldr w16, [x0], #TRAMPOLINE_DATA_GIC_PERCPU_OFF_SIZE + adr x15, ihk_param_gic_percpu_offset + str w16, [x15] + /* gic_version */ + ldr w16, [x0], #TRAMPOLINE_DATA_GIC_VERSION_SIZE + adr x15, ihk_param_gic_version + str w16, [x15] + /* loops_per_jiffy */ + ldr x16, [x0], #TRAMPOLINE_DATA_LPJ_SIZE + adr x15, ihk_param_lpj + str x16, [x15] + /* hz */ + ldr x16, [x0], #TRAMPOLINE_DATA_HZ_SIZE + adr x15, ihk_param_hz + str x16, [x15] + /* psci_method */ + ldr x16, [x0], #TRAMPOLINE_DATA_PSCI_METHOD_SIZE + adr x15, ihk_param_psci_method + str x16, [x15] + /* use_virt_timer */ + ldr x16, [x0], #TRAMPOLINE_DATA_USE_VIRT_TIMER_SIZE + adr x15, ihk_param_use_virt_timer + str x16, [x15] + /* evtstrm_timer_rate */ + ldr x16, [x0], #TRAMPOLINE_DATA_EVTSTRM_TIMER_RATE_SIZE + adr x15, ihk_param_evtstrm_timer_rate + str x16, [x15] + /* SVE default VL */ + ldr x16, [x0], #TRAMPOLINE_DATA_DEFAULT_VL_SIZE + adr x15, ihk_param_default_vl + str x16, [x15] + /* cpu_logical_map_size */ + ldr x16, [x0], #TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE + mov x1, x16 + /* cpu_logical_map */ + adr x15, ihk_param_cpu_logical_map + mov x18, x0 +1: ldr x17, [x18], #8 + str x17, [x15], #8 + sub x16, x16, #1 + cmp x16, #0 + b.ne 1b + mov x16, #NR_CPUS /* calc next data */ + lsl x16, x16, 3 + add x0, x0, x16 + + /* reset cpu_logical_map_size */ + mov x16, x1 + /* gic_rdist_base_pa */ + adr x15, ihk_param_gic_rdist_base_pa + mov x18, x0 +1: ldr x17, [x18], #8 + str x17, [x15], #8 + sub x16, x16, #1 + cmp x16, #0 + b.ne 1b + mov x16, #NR_CPUS /* calc next data */ + lsl x16, x16, 3 + add x0, x0, x16 + /* nr_pmu_irq_affiniry */ + ldr w16, [x0], #TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE + adr x15, ihk_param_nr_pmu_irq_affiniry + str w16, [x15] + /* pmu_irq_affiniry */ + mov x18, x0 + adr x15, ihk_param_pmu_irq_affiniry + b 2f +1: ldr w17, [x18], #4 + str w17, [x15], #4 + sub w16, w16, #1 +2: cmp w16, #0 + b.ne 1b + + mov x16, #CONFIG_SMP_MAX_CORES /* calc next data */ + lsl x16, x16, 2 + add x0, x0, x16 + /* */ + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-KERNEL_START + bl __create_page_tables // x25=TTBR0, x26=TTBR1 + b secondary_entry_common +ENDPROC(arch_start) + +ENTRY(arch_ap_start) + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-KERNEL_START + b secondary_entry_common +ENDPROC(arch_ap_start) + +/* + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end + lsr \phys, \phys, #BLOCK_SHIFT + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: + pgtbl_init x25, x26, x28 + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + mov x0, x25 // idmap_pg_dir + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) + create_pgd_entry x0, x3, x5, x6 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + mov x0, x26 // swapper_pg_dir + ldr x5, =KERNEL_START + create_pgd_entry x0, x5, x3, x6 + ldr x6, =KERNEL_END + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the early_alloc_pages area, kernel_img next block + */ + ldr x3, =KERNEL_END + add x3, x3, x28 // __pa(KERNEL_END) + add x3, x3, #BLOCK_SIZE + sub x3, x3, #1 + bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc. + ldr x5, =EARLY_ALLOC_VADDR // get start VA + mov x6, #1 + lsl x6, x6, #(PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT) + add x6, x5, x6 // end VA calc + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the boot_param area + */ + adr x3, ihk_param_param_addr + ldr x3, [x3] // get boot_param PA + ldr x5, =BOOT_PARAM_VADDR // get boot_param VA + mov x6, #1 + lsl x6, x6, #MAP_BOOT_PARAM_SHIFT + add x6, x5, x6 // end VA calc + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ +/* FDT disable for McKernel */ +// mov x3, x21 // FDT phys address +// and x3, x3, #~((1 << 21) - 1) // 2MB aligned +// mov x6, #PAGE_OFFSET +// sub x5, x3, x24 // subtract PHYS_OFFSET +// tst x5, #~((1 << 29) - 1) // within 512MB? +// csel x21, xzr, x21, ne // zero the FDT pointer +// b.ne 1f +// add x5, x5, x6 // __va(FDT blob) +// add x6, x5, #1 << 21 // 2MB for the FDT blob +// sub x6, x6, #1 // inclusive range +// create_block_map x0, x7, x3, x5, x6 +1: + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 + ret +ENDPROC(__create_page_tables) + .ltorg + +/* + * If we're fortunate enough to boot at EL2, ensure that the world is + * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. + */ +ENTRY(el2_setup) + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb + ret + +2: +#ifdef CONFIG_ARM64_VHE + /* + * Check for VHE being present. For the rest of the EL2 setup, + * x2 being non-zero indicates that we do have VHE, and that the + * kernel is intended to run at EL2. + */ + mrs x2, id_aa64mmfr1_el1 + ubfx x2, x2, #8, #4 +#else /* CONFIG_ARM64_VHE */ + mov x2, xzr +#endif /* CONFIG_ARM64_VHE */ + + /* Hyp configuration. */ + mov x0, #HCR_RW // 64-bit EL1 + cbz x2, set_hcr + orr x0, x0, #HCR_TGE // Enable Host Extensions + orr x0, x0, #HCR_E2H +set_hcr: + msr hcr_el2, x0 + isb + + /* Generic timers. */ + mrs x0, cnthctl_el2 + orr x0, x0, #3 // Enable EL1 physical timers + msr cnthctl_el2, x0 + msr cntvoff_el2, xzr // Clear virtual offset + +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs_s x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + + /* Populate ID registers. */ + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 + + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + cbnz x2, 1f + + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + + /* SVE register access */ + mrs x1, id_aa64pfr0_el1 + ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 + cbz x1, 4f + + bic x0, x0, #CPTR_EL2_TZ // Disable SVE traps to EL2 + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + + mov x1, #ZCR_EL1_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL1, x1 // length for EL1. + b 1f + +4: msr cptr_el2, x0 // Disable copro. traps to EL2 +1: +#ifdef CONFIG_COMPAT + msr hstr_el2, xzr // Disable CP15 traps to EL2 +#endif + + /* Stage-2 translation */ + msr vttbr_el2, xzr + + cbz x2, install_el2_stub + + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + isb + ret + +install_el2_stub: + /* Hypervisor stub */ + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors + msr vbar_el2, x0 + + /* spsr */ + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, x0 + msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + eret +ENDPROC(el2_setup) + +/* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + dmb sy + dc ivac, x1 // Invalidate potentially stale cache line + ret +ENDPROC(set_cpu_boot_mode_flag) + +#if defined(CONFIG_HAS_NMI) +/* + * void maybe_switch_to_sysreg_gic_cpuif(void) + * + * Enable interrupt controller system register access if this feature + * has been detected by the alternatives system. + * + * Before we jump into generic code we must enable interrupt controller system + * register access because this is required by the irqflags macros. We must + * also mask interrupts at the PMR and unmask them within the PSR. That leaves + * us set up and ready for the kernel to make its first call to + * arch_local_irq_enable(). + * + */ +ENTRY(maybe_switch_to_sysreg_gic_cpuif) + mrs_s x0, ICC_SRE_EL1 + orr x0, x0, #1 + msr_s ICC_SRE_EL1, x0 // Set ICC_SRE_EL1.SRE==1 + isb // Make sure SRE is now set + mov x0, ICC_PMR_EL1_MASKED + msr_s ICC_PMR_EL1, x0 // Prepare for unmask of I bit + msr daifclr, #2 // Clear the I bit + ret +ENDPROC(maybe_switch_to_sysreg_gic_cpuif) +#else +ENTRY(maybe_switch_to_sysreg_gic_cpuif) + ret +ENDPROC(maybe_switch_to_sysreg_gic_cpuif) +#endif /* defined(CONFIG_HAS_NMI) */ + +/* + * We need to find out the CPU boot mode long after boot, so we need to + * store it in a writable variable. + * + * This is not in .bss, because we set it sufficiently early that the boot-time + * zeroing of .bss would clobber it. + */ + .pushsection .data..cacheline_aligned +ENTRY(__boot_cpu_mode) + .align L1_CACHE_SHIFT + .long BOOT_CPU_MODE_EL2 + .long 0 + .popsection + +ENTRY(secondary_entry_common) + bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag + b secondary_startup +ENDPROC(secondary_entry_common) + +ENTRY(secondary_startup) + /* + * Common entry point for secondary CPUs. + */ + mrs x22, midr_el1 // x22=cpuid + mov x0, x22 + bl lookup_processor_type + mov x23, x0 // x23=current cpu_table + cbz x23, __error_p // invalid processor (x23=0)? + + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 + ldr x12, [x23, #CPU_INFO_SETUP] + add x12, x12, x28 // __virt_to_phys + blr x12 // initialise processor + + ldr x21, =secondary_data + ldr x27, =__secondary_switched // address to jump to after enabling the MMU + b __enable_mmu +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) + ldr x0, [x21, #SECONDARY_DATA_STACK] // get secondary_data.stack + mov sp, x0 + + /* + * Conditionally switch to GIC PMR for interrupt masking (this + * will be a nop if we are using normal interrupt masking) + */ + bl maybe_switch_to_sysreg_gic_cpuif + mov x29, #0 + + adr x1, secondary_data + ldr x0, [x1, #SECONDARY_DATA_ARG] // get secondary_data.arg + ldr x27, [x1, #SECONDARY_DATA_NEXT_PC] // get secondary_data.next_pc + br x27 // secondary_data.next_pc(secondary_data.arg); +ENDPROC(__secondary_switched) + +/* + * Setup common bits before finally enabling the MMU. Essentially this is just + * loading the page table pointer and vector base registers. + * + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on + * the MMU. + */ +__enable_mmu: + ldr x5, =vectors + msr vbar_el1, x5 + msr ttbr0_el1, x25 // load TTBR0 + msr ttbr1_el1, x26 // load TTBR1 + isb + b __turn_mmu_on +ENDPROC(__enable_mmu) + +/* + * Enable the MMU. This completely changes the structure of the visible memory + * space. You will not be able to trace execution through this. + * + * x0 = system control register + * x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. + */ + .align 4 +__turn_mmu_on: + msr sctlr_el1, x0 + isb + br x27 +ENDPROC(__turn_mmu_on) + +/* + * Calculate the start of physical memory. + */ +__calc_phys_offset: + adr x0, 1f + ldp x1, x2, [x0] + sub x28, x0, x1 // x28 = PHYS_OFFSET - KERNEL_START + add x24, x2, x28 // x24 = PHYS_OFFSET + ret +ENDPROC(__calc_phys_offset) + + .align 3 +1: .quad . + .quad KERNEL_START + +/* + * Exception handling. Something went wrong and we can't proceed. We ought to + * tell the user, but since we don't have any guarantee that we're even + * running on the right architecture, we do virtually nothing. + */ +__error_p: +ENDPROC(__error_p) + +__error: +1: nop + b 1b +ENDPROC(__error) + +/* + * This function gets the processor ID in w0 and searches the cpu_table[] for + * a match. It returns a pointer to the struct cpu_info it found. The + * cpu_table[] must end with an empty (all zeros) structure. + * + * This routine can be called via C code and it needs to work with the MMU + * both disabled and enabled (the offset is calculated automatically). + */ +ENTRY(lookup_processor_type) + adr x1, __lookup_processor_type_data + ldp x2, x3, [x1] + sub x1, x1, x2 // get offset between VA and PA + add x3, x3, x1 // convert VA to PA +1: + ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask + cbz w5, 2f // end of list? + and w6, w6, w0 + cmp w5, w6 + b.eq 3f + add x3, x3, #CPU_INFO_SZ + b 1b +2: + mov x3, #0 // unknown processor +3: + mov x0, x3 + ret +ENDPROC(lookup_processor_type) + + .align 3 + .type __lookup_processor_type_data, %object +__lookup_processor_type_data: + .quad . + .quad cpu_table + .size __lookup_processor_type_data, . - __lookup_processor_type_data + diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c new file mode 100644 index 00000000..28cfe6b0 --- /dev/null +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -0,0 +1,409 @@ +/* hw_breakpoint.c COPYRIGHT FUJITSU LIMITED 2016 */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::core_num_[brps|wrps] */ +/* Number of BRP/WRP registers on this CPU. */ +int core_num_brps; +int core_num_wrps; + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::get_num_brps */ +/* Determine number of BRP registers available. */ +int get_num_brps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::get_num_wrps */ +/* Determine number of WRP registers available. */ +int get_num_wrps(void) +{ + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::hw_breakpoint_slots */ +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + kprintf("unknown slot type: %d\n", type); + return 0; + } +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::READ_WB_REG_CASE */ +#define READ_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + AARCH64_DBG_READ(N, REG, VAL); \ + break + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::READ_WB_REG_CASE */ +#define WRITE_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + AARCH64_DBG_WRITE(N, REG, VAL); \ + break + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::GEN_READ_WB_REG_CASES */ +#define GEN_READ_WB_REG_CASES(OFF, REG, VAL) \ + READ_WB_REG_CASE(OFF, 0, REG, VAL); \ + READ_WB_REG_CASE(OFF, 1, REG, VAL); \ + READ_WB_REG_CASE(OFF, 2, REG, VAL); \ + READ_WB_REG_CASE(OFF, 3, REG, VAL); \ + READ_WB_REG_CASE(OFF, 4, REG, VAL); \ + READ_WB_REG_CASE(OFF, 5, REG, VAL); \ + READ_WB_REG_CASE(OFF, 6, REG, VAL); \ + READ_WB_REG_CASE(OFF, 7, REG, VAL); \ + READ_WB_REG_CASE(OFF, 8, REG, VAL); \ + READ_WB_REG_CASE(OFF, 9, REG, VAL); \ + READ_WB_REG_CASE(OFF, 10, REG, VAL); \ + READ_WB_REG_CASE(OFF, 11, REG, VAL); \ + READ_WB_REG_CASE(OFF, 12, REG, VAL); \ + READ_WB_REG_CASE(OFF, 13, REG, VAL); \ + READ_WB_REG_CASE(OFF, 14, REG, VAL); \ + READ_WB_REG_CASE(OFF, 15, REG, VAL) + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::GEN_WRITE_WB_REG_CASES */ +#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL) \ + WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::read_wb_reg */ +unsigned long read_wb_reg(int reg, int n) +{ + unsigned long val = 0; + + switch (reg + n) { + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: + kprintf("attempt to read from unknown breakpoint register %d\n", n); + } + + return val; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::write_wb_reg */ +void write_wb_reg(int reg, int n, unsigned long val) +{ + switch (reg + n) { + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); + GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); + default: + kprintf("attempt to write to unknown breakpoint register %d\n", n); + } + isb(); +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::hw_breakpoint_reset */ +void hw_breakpoint_reset(void) +{ + int i = 0; + + /* clear DBGBVR_EL1 and DBGBCR_EL1 (n=0-(core_num_brps-1)) */ + for (i = 0; i < core_num_brps; i++) { + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + } + + /* clear DBGWVR_EL1 and DBGWCR_EL1 (n=0-(core_num_wrps-1)) */ + for (i = 0; i < core_num_wrps; i++) { + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + } +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_hw_breakpoint_init */ +void arch_hw_breakpoint_init(void) +{ + struct user_hwdebug_state hws; + int max_hws_dbg_regs = sizeof(hws.dbg_regs) / sizeof(hws.dbg_regs[0]); + + core_num_brps = get_num_brps(); + core_num_wrps = get_num_wrps(); + + if (max_hws_dbg_regs < core_num_brps) { + kprintf("debugreg struct size is less than Determine number of BRP registers available.\n"); + core_num_brps = max_hws_dbg_regs; + } + + if (max_hws_dbg_regs < core_num_wrps) { + kprintf("debugreg struct size is less than Determine number of WRP registers available.\n"); + core_num_wrps = max_hws_dbg_regs; + } + hw_breakpoint_reset(); +} + +struct arch_hw_breakpoint_ctrl { + unsigned int __reserved : 19, + len : 8, + type : 2, + privilege : 2, + enabled : 1; +}; + +static inline unsigned int encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) +{ + return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) | + ctrl.enabled; +} + +static inline void decode_ctrl_reg(unsigned int reg, struct arch_hw_breakpoint_ctrl *ctrl) +{ + ctrl->enabled = reg & 0x1; + reg >>= 1; + ctrl->privilege = reg & 0x3; + reg >>= 2; + ctrl->type = reg & 0x3; + reg >>= 2; + ctrl->len = reg & 0xff; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_bp_generic_fields */ +/* + * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl. + * Hopefully this will disappear when ptrace can bypass the conversion + * to generic breakpoint descriptions. + */ +int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type) +{ + /* Type */ + switch (ctrl.type) { + case ARM_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; + break; + case ARM_BREAKPOINT_LOAD: + *gen_type = HW_BREAKPOINT_R; + break; + case ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_W; + break; + case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE: + *gen_type = HW_BREAKPOINT_RW; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (ctrl.len) { + case ARM_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case ARM_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case ARM_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; + case ARM_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_check_bp_in_kernelspace */ +/* + * Check whether bp virtual address is in kernel space. + */ +int arch_check_bp_in_kernelspace(unsigned long addr, unsigned int len) +{ + return (addr >= USER_END) && ((addr + len - 1) >= USER_END); +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_validate_hwbkpt_settings */ +int arch_validate_hwbkpt_settings(long note_type, struct user_hwdebug_state *hws, size_t len) +{ + int i; + unsigned long alignment_mask; + size_t cpysize, cpynum; + + switch(note_type) { + case NT_ARM_HW_BREAK: /* breakpoint */ + alignment_mask = 0x3; + break; + case NT_ARM_HW_WATCH: /* watchpoint */ + alignment_mask = 0x7; + break; + default: + return -EINVAL; + } + + cpysize = len - offsetof(struct user_hwdebug_state, dbg_regs[0]); + cpynum = cpysize / sizeof(hws->dbg_regs[0]); + + for (i = 0; i < cpynum; i++) { + unsigned long addr = hws->dbg_regs[i].addr; + unsigned int uctrl = hws->dbg_regs[i].ctrl; + struct arch_hw_breakpoint_ctrl ctrl; + int err, len, type; + + /* empty dbg_regs check skip */ + if (addr == 0 && uctrl == 0) { + continue; + } + + /* check address alignment */ + if (addr & alignment_mask) { + return -EINVAL; + } + + /* decode control bit */ + decode_ctrl_reg(uctrl, &ctrl); + + /* disabled, continue */ + if (!ctrl.enabled) { + continue; + } + + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) { + return err; + } + + /* type check */ + switch (note_type) { + case NT_ARM_HW_BREAK: /* breakpoint */ + if ((type & HW_BREAKPOINT_X) != type) { + return -EINVAL; + } + break; + case NT_ARM_HW_WATCH: /* watchpoint */ + if ((type & HW_BREAKPOINT_RW) != type) { + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + /* privilege generate */ + if (arch_check_bp_in_kernelspace(addr, len)) { + /* kernel space breakpoint unsupported. */ + return -EINVAL; + } else { + ctrl.privilege = AARCH64_BREAKPOINT_EL0; + } + + /* ctrl check OK. */ + hws->dbg_regs[i].ctrl = encode_ctrl_reg(ctrl); + } + return 0; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::breakpoint_handler */ +/* + * Debug exception handlers. + */ +int breakpoint_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs) +{ + int i = 0; + unsigned long val; + unsigned int ctrl_reg; + struct arch_hw_breakpoint_ctrl ctrl; + siginfo_t info; + + for (i = 0; i < core_num_brps; i++) { + + /* Check if the breakpoint value matches. */ + val = read_wb_reg(AARCH64_DBG_REG_BVR, i); + if (val != (regs->pc & ~0x3)) { + continue; + } + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (regs->pc & 0x3)) & ctrl.len)) { + continue; + } + + /* send SIGTRAP */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info._sifields._sigfault.si_addr = (void *)regs->pc; + set_signal(SIGTRAP, regs, &info); + } + return 0; +} + +/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::watchpoint_handler */ +int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + int i = 0; + int access; + unsigned long val; + unsigned int ctrl_reg; + struct arch_hw_breakpoint_ctrl ctrl; + siginfo_t info; + + for (i = 0; i < core_num_wrps; i++) { + /* Check if the watchpoint value matches. */ + val = read_wb_reg(AARCH64_DBG_REG_WVR, i); + if (val != (addr & ~0x7)) { + continue; + } + + /* Possible match, check the byte address select to confirm. */ + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (addr & 0x7)) & ctrl.len)) { + continue; + } + + /* + * Check that the access type matches. + * 0 => load, otherwise => store + */ + access = (esr & AARCH64_ESR_ACCESS_MASK) ? ARM_BREAKPOINT_STORE : + ARM_BREAKPOINT_LOAD; + if (!(access & ctrl.type)) { + continue; + } + + /* send SIGTRAP */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info._sifields._sigfault.si_addr = (void *)addr; + set_signal(SIGTRAP, regs, &info); + } + return 0; +} diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S new file mode 100644 index 00000000..efd67d06 --- /dev/null +++ b/arch/arm64/kernel/hyp-stub.S @@ -0,0 +1,58 @@ +/* hyp-stub.S COPYRIGHT FUJITSU LIMITED 2015 */ +#include +#include + + .text + .align 11 + +ENTRY(__hyp_stub_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__hyp_stub_vectors) + + .align 11 + +el1_sync: + mrs x1, esr_el2 + lsr x1, x1, #26 + cmp x1, #0x16 + b.ne 2f // Not an HVC trap + cbz x0, 1f + msr vbar_el2, x0 // Set vbar_el2 + b 2f +1: mrs x0, vbar_el2 // Return vbar_el2 +2: eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + diff --git a/arch/arm64/kernel/include/arch-bitops.h b/arch/arm64/kernel/include/arch-bitops.h new file mode 100644 index 00000000..560be3db --- /dev/null +++ b/arch/arm64/kernel/include/arch-bitops.h @@ -0,0 +1,19 @@ +/* arch-bitops.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_COMMON_BITOPS_H +#define __HEADER_ARM64_COMMON_BITOPS_H + +#ifndef INCLUDE_BITOPS_H +# error only can be included directly +#endif + +#ifndef __ASSEMBLY__ + +#include "bitops-fls.h" +#include "bitops-__ffs.h" +#include "bitops-ffz.h" +#include "bitops-set_bit.h" +#include "bitops-clear_bit.h" + +#endif /*__ASSEMBLY__*/ +#endif /* !__HEADER_ARM64_COMMON_BITOPS_H */ + diff --git a/arch/arm64/kernel/include/arch-futex.h b/arch/arm64/kernel/include/arch-futex.h new file mode 100644 index 00000000..29fb564a --- /dev/null +++ b/arch/arm64/kernel/include/arch-futex.h @@ -0,0 +1,137 @@ +/* arch-futex.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_ARCH_FUTEX_H +#define __HEADER_ARM64_COMMON_ARCH_FUTEX_H + + +/* + * @ref.impl + * linux-linaro/arch/arm64/include/asm/futex.h:__futex_atomic_op + */ +#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ + asm volatile( \ +"1: ldxr %w1, %2\n" \ + insn "\n" \ +"2: stlxr %w3, %w0, %2\n" \ +" cbnz %w3, 1b\n" \ +" dmb ish\n" \ +"3:\n" \ +" .pushsection .fixup,\"ax\"\n" \ +" .align 2\n" \ +"4: mov %w0, %w5\n" \ +" b 3b\n" \ +" .popsection\n" \ +" .pushsection __ex_table,\"a\"\n" \ +" .align 3\n" \ +" .quad 1b, 4b, 2b, 4b\n" \ +" .popsection\n" \ + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ + : "r" (oparg), "Ir" (-EFAULT) \ + : "memory") + +/* + * @ref.impl + * linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_op_inuser + */ +static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tmp; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + +#ifdef __UACCESS__ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; +#endif + + // pagefault_disable(); /* implies preempt_disable() */ + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("mov %w0, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("add %w0, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("orr %w0, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("and %w0, %w1, %w4", + ret, oldval, uaddr, tmp, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("eor %w0, %w1, %w4", + ret, oldval, uaddr, tmp, oparg); + break; + default: + ret = -ENOSYS; + } + + // pagefault_enable(); /* subsumes preempt_enable() */ + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +/* + * @ref.impl + * linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_cmpxchg_inatomic + * mckernel/kernel/include/futex.h:futex_atomic_cmpxchg_inatomic (x86 depend) + */ +static inline int +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +{ + int ret = 0; + int val, tmp; + + if(uaddr == NULL) { + return -EFAULT; + } +#ifdef __UACCESS__ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) { + return -EFAULT; + } +#endif + + asm volatile("// futex_atomic_cmpxchg_inatomic\n" +"1: ldxr %w1, %2\n" +" sub %w3, %w1, %w4\n" +" cbnz %w3, 3f\n" +"2: stlxr %w3, %w5, %2\n" +" cbnz %w3, 1b\n" +" dmb ish\n" +"3:\n" +" .pushsection .fixup,\"ax\"\n" +"4: mov %w0, %w6\n" +" b 3b\n" +" .popsection\n" +" .pushsection __ex_table,\"a\"\n" +" .align 3\n" +" .quad 1b, 4b, 2b, 4b\n" +" .popsection\n" + : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) + : "r" (oldval), "r" (newval), "Ir" (-EFAULT) + : "memory"); + + return ret; +} + +#endif /* !__HEADER_ARM64_COMMON_ARCH_FUTEX_H */ diff --git a/arch/arm64/kernel/include/arch-lock.h b/arch/arm64/kernel/include/arch-lock.h new file mode 100644 index 00000000..9c924a9c --- /dev/null +++ b/arch/arm64/kernel/include/arch-lock.h @@ -0,0 +1,605 @@ +/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_ARCH_LOCK_H +#define __HEADER_ARM64_COMMON_ARCH_LOCK_H + +#define IHK_STATIC_SPINLOCK_FUNCS + +#include +#include + +//#define DEBUG_SPINLOCK +//#define DEBUG_MCS_RWLOCK + +#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK) +int __kprintf(const char *format, ...); +#endif + +/* @ref.impl arch/arm64/include/asm/spinlock_types.h::TICKET_SHIFT */ +#define TICKET_SHIFT 16 + +/* @ref.impl arch/arm64/include/asm/spinlock_types.h::arch_spinlock_t */ +typedef struct { +//#ifdef __AARCH64EB__ +// uint16_t next; +// uint16_t owner; +//#else /* __AARCH64EB__ */ + uint16_t owner; + uint16_t next; +//#endif /* __AARCH64EB__ */ +} ihk_spinlock_t; + +extern void preempt_enable(void); +extern void preempt_disable(void); + +/* @ref.impl arch/arm64/include/asm/spinlock_types.h::__ARCH_SPIN_LOCK_UNLOCKED */ +#define SPIN_LOCK_UNLOCKED { 0, 0 } + +/* initialized spinlock struct */ +static void ihk_mc_spinlock_init(ihk_spinlock_t *lock) +{ + *lock = (ihk_spinlock_t)SPIN_LOCK_UNLOCKED; +} + +/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */ +/* spinlock lock */ +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_lock_noirq(l) { \ +__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__ihk_mc_spinlock_lock_noirq(l); \ +__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq +#endif + +static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) +{ + unsigned int tmp; + ihk_spinlock_t lockval, newval; + + preempt_disable(); + + asm volatile( + /* Atomically increment the next ticket. */ +" prfm pstl1strm, %3\n" +"1: ldaxr %w0, %3\n" +" add %w1, %w0, %w5\n" +" stxr %w2, %w1, %3\n" +" cbnz %w2, 1b\n" + /* Did we get the lock? */ +" eor %w1, %w0, %w0, ror #16\n" +" cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ +" sevl\n" +"2: wfe\n" +" ldaxrh %w2, %4\n" +" eor %w1, %w2, %w0, lsr #16\n" +" cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ +"3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->owner), "I" (1 << TICKET_SHIFT) + : "memory"); +} + +/* spinlock lock & interrupt disable & PSTATE.DAIF save */ +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\ +__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +rc = __ihk_mc_spinlock_lock(l);\ +__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\ +}) +#else +#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock +#endif +static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock) +{ + unsigned long flags; + + flags = cpu_disable_interrupt_save(); + + __ihk_mc_spinlock_lock_noirq(lock); + + return flags; +} + +/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */ +/* spinlock unlock */ +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_unlock_noirq(l) { \ +__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__ihk_mc_spinlock_unlock_noirq(l); \ +__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq +#endif +static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock) +{ + asm volatile( +" stlrh %w1, %0\n" + : "=Q" (lock->owner) + : "r" (lock->owner + 1) + : "memory"); + + preempt_enable(); +} + +/* spinlock unlock & restore PSTATE.DAIF */ +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_unlock(l, f) { \ +__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__ihk_mc_spinlock_unlock((l), (f)); \ +__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \ +} +#else +#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock +#endif +static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags) +{ + __ihk_mc_spinlock_unlock_noirq(lock); + + cpu_restore_interrupt(flags); +} + +/* An implementation of the Mellor-Crummey Scott (MCS) lock */ +typedef struct mcs_lock_node { + unsigned long locked; + struct mcs_lock_node *next; + unsigned long irqsave; +} __attribute__((aligned(64))) mcs_lock_node_t; + +static void mcs_lock_init(struct mcs_lock_node *node) +{ + node->locked = 0; + node->next = NULL; +} + +static void __mcs_lock_lock(struct mcs_lock_node *lock, + struct mcs_lock_node *node) +{ + struct mcs_lock_node *pred; + + node->next = NULL; + node->locked = 0; + pred = xchg8(&(lock->next), node); + + if (pred) { + node->locked = 1; + pred->next = node; + while (node->locked != 0) { + cpu_pause(); + } + } +} + +static void __mcs_lock_unlock(struct mcs_lock_node *lock, + struct mcs_lock_node *node) +{ + if (node->next == NULL) { + struct mcs_lock_node *old = atomic_cmpxchg8(&(lock->next), node, 0); + + if (old == node) { + return; + } + + while (node->next == NULL) { + cpu_pause(); + } + } + + node->next->locked = 0; +} + +static void mcs_lock_lock_noirq(struct mcs_lock_node *lock, + struct mcs_lock_node *node) +{ + preempt_disable(); + __mcs_lock_lock(lock, node); +} + +static void mcs_lock_unlock_noirq(struct mcs_lock_node *lock, + struct mcs_lock_node *node) +{ + __mcs_lock_unlock(lock, node); + preempt_enable(); +} + +static void mcs_lock_lock(struct mcs_lock_node *lock, + struct mcs_lock_node *node) +{ + node->irqsave = cpu_disable_interrupt_save(); + mcs_lock_lock_noirq(lock, node); +} + +static void mcs_lock_unlock(struct mcs_lock_node *lock, + struct mcs_lock_node *node) +{ + mcs_lock_unlock_noirq(lock, node); + cpu_restore_interrupt(node->irqsave); +} + + +#define SPINLOCK_IN_MCS_RWLOCK + +// reader/writer lock +typedef struct mcs_rwlock_node { + ihk_atomic_t count; // num of readers (use only common reader) + char type; // lock type +#define MCS_RWLOCK_TYPE_COMMON_READER 0 +#define MCS_RWLOCK_TYPE_READER 1 +#define MCS_RWLOCK_TYPE_WRITER 2 + char locked; // lock +#define MCS_RWLOCK_LOCKED 1 +#define MCS_RWLOCK_UNLOCKED 0 + char dmy1; // unused + char dmy2; // unused + struct mcs_rwlock_node *next; +} __attribute__((aligned(64))) mcs_rwlock_node_t; + +typedef struct mcs_rwlock_node_irqsave { +#ifndef SPINLOCK_IN_MCS_RWLOCK + struct mcs_rwlock_node node; +#endif + unsigned long irqsave; +} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t; + +typedef struct mcs_rwlock_lock { +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_spinlock_t slock; +#else + struct mcs_rwlock_node reader; /* common reader lock */ + struct mcs_rwlock_node *node; /* base */ +#endif +} __attribute__((aligned(64))) mcs_rwlock_lock_t; + +static void +mcs_rwlock_init(struct mcs_rwlock_lock *lock) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_init(&lock->slock); +#else + ihk_atomic_set(&lock->reader.count, 0); + lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER; + lock->node = NULL; +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_lock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_lock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq +#endif +static void +__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_lock_noirq(&lock->slock); +#else + struct mcs_rwlock_node *pred; + + preempt_disable(); + + node->type = MCS_RWLOCK_TYPE_WRITER; + node->next = NULL; + + pred = xchg8(&(lock->node), node); + + if (pred) { + node->locked = MCS_RWLOCK_LOCKED; + pred->next = node; + while (node->locked != MCS_RWLOCK_UNLOCKED) { + cpu_pause(); + } + } +#endif +} + +#ifndef SPINLOCK_IN_MCS_RWLOCK +static void +mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock) +{ + struct mcs_rwlock_node *p; + struct mcs_rwlock_node *f = NULL; + struct mcs_rwlock_node *n; + int breakf = 0; + + ihk_atomic_inc(&lock->reader.count); // protect to unlock reader + for(p = &lock->reader; p->next; p = n){ + n = p->next; + if(p->next->type == MCS_RWLOCK_TYPE_READER){ + p->next = n->next; + if(lock->node == n){ + struct mcs_rwlock_node *old; + + old = atomic_cmpxchg8(&(lock->node), n, p); + + if(old != n){ // couldn't change + while (n->next == NULL) { + cpu_pause(); + } + p->next = n->next; + } + else{ + breakf = 1; + } + } + else if(p->next == NULL){ + while (n->next == NULL) { + cpu_pause(); + } + p->next = n->next; + } + if(f){ + ihk_atomic_inc(&lock->reader.count); + n->locked = MCS_RWLOCK_UNLOCKED; + } + else + f = n; + n = p; + if(breakf) + break; + } + if(n->next == NULL && lock->node != n){ + while (n->next == NULL && lock->node != n) { + cpu_pause(); + } + } + } + + f->locked = MCS_RWLOCK_UNLOCKED; +} +#endif + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_unlock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_unlock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq +#endif +static void +__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_unlock_noirq(&lock->slock); +#else + if (node->next == NULL) { + struct mcs_rwlock_node *old = atomic_cmpxchg8(&(lock->node), node, 0); + + if (old == node) { + goto out; + } + + while (node->next == NULL) { + cpu_pause(); + } + } + + if(node->next->type == MCS_RWLOCK_TYPE_READER){ + lock->reader.next = node->next; + mcs_rwlock_unlock_readers(lock); + } + else{ + node->next->locked = MCS_RWLOCK_UNLOCKED; + } + +out: + preempt_enable(); +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_lock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_lock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq +#endif + +static inline unsigned int +atomic_inc_ifnot0(ihk_atomic_t *v) +{ + unsigned int *p = (unsigned int *)(&(v)->counter); + unsigned int old; + unsigned int new; + unsigned int val; + + do{ + if(!(old = *p)) + break; + new = old + 1; + val = atomic_cmpxchg4(p, old, new); + }while(val != old); + return old; +} + +static void +__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_lock_noirq(&lock->slock); +#else + struct mcs_rwlock_node *pred; + + preempt_disable(); + + node->type = MCS_RWLOCK_TYPE_READER; + node->next = NULL; + node->dmy1 = ihk_mc_get_processor_id(); + + pred = xchg8(&(lock->node), node); + + if (pred) { + if(pred == &lock->reader){ + if(atomic_inc_ifnot0(&pred->count)){ + struct mcs_rwlock_node *old; + + old = atomic_cmpxchg8(&(lock->node), node, pred); + + if (old == node) { + goto out; + } + + while (node->next == NULL) { + cpu_pause(); + } + + node->locked = MCS_RWLOCK_LOCKED; + lock->reader.next = node; + mcs_rwlock_unlock_readers(lock); + ihk_atomic_dec(&pred->count); + goto out; + } + } + node->locked = MCS_RWLOCK_LOCKED; + pred->next = node; + while (node->locked != MCS_RWLOCK_UNLOCKED) { + cpu_pause(); + } + } + else { + lock->reader.next = node; + mcs_rwlock_unlock_readers(lock); + } +out: + return; +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_unlock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_unlock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq +#endif +static void +__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_unlock_noirq(&lock->slock); +#else + if(ihk_atomic_dec_return(&lock->reader.count)) + goto out; + + if (lock->reader.next == NULL) { + struct mcs_rwlock_node *old; + + old = atomic_cmpxchg8(&(lock->node), &(lock->reader), 0); + + if (old == &lock->reader) { + goto out; + } + + while (lock->reader.next == NULL) { + cpu_pause(); + } + } + + if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){ + mcs_rwlock_unlock_readers(lock); + } + else{ + lock->reader.next->locked = MCS_RWLOCK_UNLOCKED; + } + +out: + preempt_enable(); +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_lock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_lock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock +#endif +static void +__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + node->irqsave = ihk_mc_spinlock_lock(&lock->slock); +#else + node->irqsave = cpu_disable_interrupt_save(); + __mcs_rwlock_writer_lock_noirq(lock, &node->node); +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_unlock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_unlock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock +#endif +static void +__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_unlock(&lock->slock, node->irqsave); +#else + __mcs_rwlock_writer_unlock_noirq(lock, &node->node); + cpu_restore_interrupt(node->irqsave); +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_lock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_lock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock +#endif +static void +__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + node->irqsave = ihk_mc_spinlock_lock(&lock->slock); +#else + node->irqsave = cpu_disable_interrupt_save(); + __mcs_rwlock_reader_lock_noirq(lock, &node->node); +#endif +} + +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_unlock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_unlock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock +#endif +static void +__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) +{ +#ifdef SPINLOCK_IN_MCS_RWLOCK + ihk_mc_spinlock_unlock(&lock->slock, node->irqsave); +#else + __mcs_rwlock_reader_unlock_noirq(lock, &node->node); + cpu_restore_interrupt(node->irqsave); +#endif +} + +#endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */ diff --git a/arch/arm64/kernel/include/arch-memory.h b/arch/arm64/kernel/include/arch-memory.h new file mode 100644 index 00000000..38cb7d8b --- /dev/null +++ b/arch/arm64/kernel/include/arch-memory.h @@ -0,0 +1,487 @@ +/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H +#define __HEADER_ARM64_COMMON_ARCH_MEMORY_H + +#include + +#define _SZ4KB (1UL<<12) +#define _SZ16KB (1UL<<14) +#define _SZ64KB (1UL<<16) + +#ifdef CONFIG_ARM64_64K_PAGES +# define GRANULE_SIZE _SZ64KB +#else +# define GRANULE_SIZE _SZ4KB +#endif +#define VA_BITS CONFIG_ARM64_VA_BITS + +/* + * Address define + */ +#define MAP_KERNEL_SHIFT 21 +#define MAP_KERNEL_SIZE (UL(1) << MAP_KERNEL_SHIFT) +#define MAP_EARLY_ALLOC_SHIFT 9 +#define MAP_EARLY_ALLOC_SIZE (UL(1) << (PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT)) +#define MAP_BOOT_PARAM_SHIFT 21 +#define MAP_BOOT_PARAM_SIZE (UL(1) << MAP_BOOT_PARAM_SHIFT) + +#if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB) +# +# define TASK_UNMAPPED_BASE UL(0x0000000800000000) +# define USER_END UL(0x0000002000000000) +# define MAP_VMAP_START UL(0xffffffbdc0000000) +# define MAP_VMAP_SIZE UL(0x0000000100000000) +# define MAP_FIXED_START UL(0xffffffbffbdfd000) +# define MAP_ST_START UL(0xffffffc000000000) +# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000 +# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_003f_ff80_0000 +# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000 +# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) +# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000 +# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000 +# +#elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB) +# +# define TASK_UNMAPPED_BASE UL(0x0000004000000000) +# define USER_END UL(0x0000010000000000) +# define MAP_VMAP_START UL(0xfffffdfee0000000) +# define MAP_VMAP_SIZE UL(0x0000000100000000) +# define MAP_FIXED_START UL(0xfffffdfffbdd0000) +# define MAP_ST_START UL(0xfffffe0000000000) +# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000 +# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_01ff_e000_0000 +# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000 +# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) +# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000 +# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000 +# +#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB) +# +# define TASK_UNMAPPED_BASE UL(0x0000100000000000) +# define USER_END UL(0x0000400000000000) +# define MAP_VMAP_START UL(0xffff7bffc0000000) +# define MAP_VMAP_SIZE UL(0x0000000100000000) +# define MAP_FIXED_START UL(0xffff7ffffbdfd000) +# define MAP_ST_START UL(0xffff800000000000) +# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000 +# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_ff80_0000 +# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000 +# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) +# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000 +# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000 +# +# +#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB) +# +# define TASK_UNMAPPED_BASE UL(0x0000100000000000) +# define USER_END UL(0x0000400000000000) +# define MAP_VMAP_START UL(0xffff780000000000) +# define MAP_VMAP_SIZE UL(0x0000000100000000) +# define MAP_FIXED_START UL(0xffff7ffffbdd0000) +# define MAP_ST_START UL(0xffff800000000000) +# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000 +# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_e000_0000 +# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000 +# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE) +# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000 +# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000 +# +#else +# error address space is not defined. +#endif + +#define STACK_TOP(region) ((region)->user_end) + +/* + * pagetable define + */ +#if GRANULE_SIZE == _SZ4KB +# define __PTL4_SHIFT 39 +# define __PTL3_SHIFT 30 +# define __PTL2_SHIFT 21 +# define __PTL1_SHIFT 12 +# define PTL4_INDEX_MASK ((UL(1) << 9) - 1) +# define PTL3_INDEX_MASK PTL4_INDEX_MASK +# define PTL2_INDEX_MASK PTL3_INDEX_MASK +# define PTL1_INDEX_MASK PTL2_INDEX_MASK +# define FIRST_LEVEL_BLOCK_SUPPORT 1 +#elif GRANULE_SIZE == _SZ16KB +# define __PTL4_SHIFT 47 +# define __PTL3_SHIFT 36 +# define __PTL2_SHIFT 25 +# define __PTL1_SHIFT 14 +# define PTL4_INDEX_MASK ((UL(1) << 1) - 1) +# define PTL3_INDEX_MASK ((UL(1) << 11) - 1) +# define PTL2_INDEX_MASK PTL3_INDEX_MASK +# define PTL1_INDEX_MASK PTL2_INDEX_MASK +# define FIRST_LEVEL_BLOCK_SUPPORT 0 +#elif GRANULE_SIZE == _SZ64KB +# define __PTL4_SHIFT 0 +# define __PTL3_SHIFT 42 +# define __PTL2_SHIFT 29 +# define __PTL1_SHIFT 16 +# define PTL4_INDEX_MASK 0 +# define PTL3_INDEX_MASK ((UL(1) << 6) - 1) +# define PTL2_INDEX_MASK ((UL(1) << 13) - 1) +# define PTL1_INDEX_MASK PTL2_INDEX_MASK +# define FIRST_LEVEL_BLOCK_SUPPORT 0 +#else +# error granule size error. +#endif + +# define __PTL4_SIZE (UL(1) << __PTL4_SHIFT) +# define __PTL3_SIZE (UL(1) << __PTL3_SHIFT) +# define __PTL2_SIZE (UL(1) << __PTL2_SHIFT) +# define __PTL1_SIZE (UL(1) << __PTL1_SHIFT) +# define __PTL4_MASK (~__PTL4_SIZE - 1) +# define __PTL3_MASK (~__PTL3_SIZE - 1) +# define __PTL2_MASK (~__PTL2_SIZE - 1) +# define __PTL1_MASK (~__PTL1_SIZE - 1) + +/* calculate entries */ +#if (CONFIG_ARM64_PGTABLE_LEVELS > 3) && (VA_BITS > __PTL4_SHIFT) +# define __PTL4_ENTRIES (UL(1) << (VA_BITS - __PTL4_SHIFT)) +# define __PTL3_ENTRIES (UL(1) << (__PTL1_SHIFT - 3)) +# define __PTL2_ENTRIES (UL(1) << (__PTL1_SHIFT - 3)) +# define __PTL1_ENTRIES (UL(1) << (__PTL1_SHIFT - 3)) +#elif (CONFIG_ARM64_PGTABLE_LEVELS > 2) && (VA_BITS > __PTL3_SHIFT) +# define __PTL4_ENTRIES 1 +# define __PTL3_ENTRIES (UL(1) << (VA_BITS - __PTL3_SHIFT)) +# define __PTL2_ENTRIES (UL(1) << (__PTL1_SHIFT - 3)) +# define __PTL1_ENTRIES (UL(1) << (__PTL1_SHIFT - 3)) +#elif (CONFIG_ARM64_PGTABLE_LEVELS > 1) && (VA_BITS > __PTL2_SHIFT) +# define __PTL4_ENTRIES 1 +# define __PTL3_ENTRIES 1 +# define __PTL2_ENTRIES (UL(1) << (VA_BITS - __PTL2_SHIFT)) +# define __PTL1_ENTRIES (UL(1) << (__PTL1_SHIFT - 3)) +#elif VA_BITS > __PTL1_SHIFT +# define __PTL4_ENTRIES 1 +# define __PTL3_ENTRIES 1 +# define __PTL2_ENTRIES 1 +# define __PTL1_ENTRIES (UL(1) << (VA_BITS - __PTL1_SHIFT)) +#else +# define __PTL4_ENTRIES 1 +# define __PTL3_ENTRIES 1 +# define __PTL2_ENTRIES 1 +# define __PTL1_ENTRIES 1 +#endif + +#ifndef __ASSEMBLY__ +static const unsigned int PTL4_SHIFT = __PTL4_SHIFT; +static const unsigned int PTL3_SHIFT = __PTL3_SHIFT; +static const unsigned int PTL2_SHIFT = __PTL2_SHIFT; +static const unsigned int PTL1_SHIFT = __PTL1_SHIFT; +static const unsigned long PTL4_SIZE = __PTL4_SIZE; +static const unsigned long PTL3_SIZE = __PTL3_SIZE; +static const unsigned long PTL2_SIZE = __PTL2_SIZE; +static const unsigned long PTL1_SIZE = __PTL1_SIZE; +static const unsigned long PTL4_MASK = __PTL4_MASK; +static const unsigned long PTL3_MASK = __PTL3_MASK; +static const unsigned long PTL2_MASK = __PTL2_MASK; +static const unsigned long PTL1_MASK = __PTL1_MASK; +static const unsigned int PTL4_ENTRIES = __PTL4_ENTRIES; +static const unsigned int PTL3_ENTRIES = __PTL3_ENTRIES; +static const unsigned int PTL2_ENTRIES = __PTL2_ENTRIES; +static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES; +#else +# define PTL4_SHIFT __PTL4_SHIFT +# define PTL3_SHIFT __PTL3_SHIFT +# define PTL2_SHIFT __PTL2_SHIFT +# define PTL1_SHIFT __PTL1_SHIFT +# define PTL4_SIZE __PTL4_SIZE +# define PTL3_SIZE __PTL3_SIZE +# define PTL2_SIZE __PTL2_SIZE +# define PTL1_SIZE __PTL1_SIZE +# define PTL4_MASK __PTL4_MASK +# define PTL3_MASK __PTL3_MASK +# define PTL2_MASK __PTL2_MASK +# define PTL1_MASK __PTL1_MASK +# define PTL4_ENTRIES __PTL4_ENTRIES +# define PTL3_ENTRIES __PTL3_ENTRIES +# define PTL2_ENTRIES __PTL2_ENTRIES +# define PTL1_ENTRIES __PTL1_ENTRIES +#endif/*__ASSEMBLY__*/ + +#define __page_offset(addr, size) ((unsigned long)(addr) & ((size) - 1)) +#define __page_align(addr, size) ((unsigned long)(addr) & ~((size) - 1)) +#define __page_align_up(addr, size) __page_align((unsigned long)(addr) + (size) - 1, size) + +/* + * nornal page + */ +#define PAGE_SHIFT __PTL1_SHIFT +#define PAGE_SIZE (UL(1) << __PTL1_SHIFT) +#define PAGE_MASK (~(PTL1_SIZE - 1)) +#define PAGE_P2ALIGN 0 +#define page_offset(addr) __page_offset(addr, PAGE_SIZE) +#define page_align(addr) __page_align(addr, PAGE_SIZE) +#define page_align_up(addr) __page_align_up((addr, PAGE_SIZE) + +/* + * large page + */ +#define LARGE_PAGE_SHIFT __PTL2_SHIFT +#define LARGE_PAGE_SIZE (UL(1) << __PTL2_SHIFT) +#define LARGE_PAGE_MASK (~(PTL2_SIZE - 1)) +#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT) +#define large_page_offset(addr) __page_offset(addr, LARGE_PAGE_SIZE) +#define large_page_align(addr) __page_align(addr, LARGE_PAGE_SIZE) +#define large_page_align_up(addr) __page_align_up(addr, LARGE_PAGE_SIZE) + +/* + * + */ +#define TTBR_ASID_SHIFT 48 +#define TTBR_ASID_MASK (0xFFFFUL << TTBR_ASID_SHIFT) +#define TTBR_BADDR_MASK (~TTBR_ASID_MASK) + +#include "pgtable-hwdef.h" + +#define KERNEL_PHYS_OFFSET + +#define PT_PHYSMASK PHYS_MASK +/* We allow user programs to access all the memory (D_Block, D_Page) */ +#define PFL_KERN_BLK_ATTR PROT_SECT_NORMAL_EXEC +#define PFL_KERN_PAGE_ATTR PAGE_KERNEL_EXEC +/* for the page table entry that points another page table (D_Table) */ +#define PFL_PDIR_TBL_ATTR PMD_TYPE_TABLE + +#ifdef CONFIG_ARM64_64K_PAGES +# define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) +#else +# define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) +#endif +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) + +/* [Page level Write Throgh] ページキャッシュ方式 0:ライトバック 1:ライトスルー */ +#define PFL1_PWT 0 //< DEBUG_ARCH_DEP, devobj.cの直接参照を関数化 (is_pte_pwd) +/* [Page level Cache Disable] ページキャッシュ 0:有効 1:無効 */ +#define PFL1_PCD 0 //< DEBUG_ARCH_DEP, devobj.cの直接参照を関数化 (is_pte_pcd) + +#define PTE_NULL (0) + +#define PTE_FILEOFF PTE_SPECIAL + +#ifndef __ASSEMBLY__ + +#include + +typedef unsigned long pte_t; + +/* + * pagemap kernel ABI bits + */ +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + +/* For easy conversion, it is better to be the same as architecture's ones */ +enum ihk_mc_pt_attribute { + /* ページが物理メモリにロードされているか */ + PTATTR_ACTIVE = PTE_VALID, + /* Read/Writeフラグ */ + PTATTR_WRITABLE = PTE_RDONLY, //共通定義と意味が反転するので注意 + /* ユーザ/特権フラグ */ + PTATTR_USER = PTE_USER | PTE_NG, + /* ページの変更を示す */ + PTATTR_DIRTY = PTE_DIRTY, + /* ラージページを示す */ + PTATTR_LARGEPAGE = PMD_TABLE_BIT, //共通定義と意味が反転するので注意 + /* remap_file_page フラグ */ + PTATTR_FILEOFF = PTE_FILEOFF, + /* 実行不可フラグ */ + PTATTR_NO_EXECUTE = PTE_UXN, + /* キャッシュ無し */ + PTATTR_UNCACHABLE = PTE_ATTRINDX(1), + /* ユーザ空間向けを示す */ + PTATTR_FOR_USER = UL(1) << (PHYS_MASK_SHIFT - 1), + /* WriteCombine */ + PTATTR_WRITE_COMBINED = PTE_ATTRINDX(2), +}; +extern enum ihk_mc_pt_attribute attr_mask; + +static inline int pfn_is_write_combined(uintptr_t pfn) +{ + return ((pfn & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC)); +} + +//共通部と意味がするビット定義 +#define attr_flip_bits (PTATTR_WRITABLE | PTATTR_LARGEPAGE) + +static inline int pte_is_type_page(const pte_t *ptep, size_t pgsize) +{ + int ret = 0; //default D_TABLE + if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) || + (PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) || + (PTL2_SIZE == pgsize)) { + // check D_BLOCK + ret = ((*ptep & PMD_TYPE_MASK) == PMD_TYPE_SECT); + } + else if (PTL1_SIZE == pgsize) { + // check D_PAGE + ret = ((*ptep & PTE_TYPE_MASK) == PTE_TYPE_PAGE); + } + return ret; +} + +static inline int pte_is_null(pte_t *ptep) +{ + return (*ptep == PTE_NULL); +} + +static inline int pte_is_present(pte_t *ptep) +{ + return !!(*ptep & PMD_SECT_VALID); +} + +static inline int pte_is_writable(pte_t *ptep) +{ + extern int kprintf(const char *format, ...); + kprintf("ERROR: %s is not implemented. \n", __func__); + return 0; +} + +static inline int pte_is_dirty(pte_t *ptep, size_t pgsize) +{ + int ret = 0; + int do_check = pte_is_type_page(ptep, pgsize); + if (do_check) { + ret = !!(*ptep & PTE_DIRTY); + } + return ret; +} + +static inline int pte_is_fileoff(pte_t *ptep, size_t pgsize) +{ + int ret = 0; + int do_check = pte_is_type_page(ptep, pgsize); + if (do_check) { + ret = !!(*ptep & PTE_FILEOFF); + } + + return ret; +} + +static inline void pte_update_phys(pte_t *ptep, unsigned long phys) +{ + *ptep = (*ptep & ~PT_PHYSMASK) | (phys & PT_PHYSMASK); +} + +static inline uintptr_t pte_get_phys(pte_t *ptep) +{ + return (uintptr_t)(*ptep & PT_PHYSMASK); +} + +static inline off_t pte_get_off(pte_t *ptep, size_t pgsize) +{ + return (off_t)(*ptep & PHYS_MASK); +} + +static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize) +{ + enum ihk_mc_pt_attribute attr; + + attr = *ptep & attr_mask; + attr ^= attr_flip_bits; + if ((*ptep & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_DEVICE_nGnRE)) { + attr |= PTATTR_UNCACHABLE; + } else if ((*ptep & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC)) { + attr |= PTATTR_WRITE_COMBINED; + } + if (((pgsize == PTL2_SIZE) || (pgsize == PTL3_SIZE)) + && ((*ptep & PMD_TYPE_MASK) == PMD_TYPE_SECT)) { + attr |= PTATTR_LARGEPAGE; + } + + return attr; +} + +static inline void pte_make_null(pte_t *ptep, size_t pgsize) +{ + if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) || + (PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) || + (PTL2_SIZE == pgsize) || + (PTL1_SIZE == pgsize)) { + *ptep = PTE_NULL; + } +} + +static inline void pte_make_fileoff(off_t off, + enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep) +{ + if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) || + (PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) || + (PTL2_SIZE == pgsize) || + (PTL1_SIZE == pgsize)) { + *ptep = PTE_FILEOFF | off | PTE_TYPE_PAGE; + } +} + +#if 0 /* XXX: workaround. cannot use panic() here */ +static inline void pte_xchg(pte_t *ptep, pte_t *valp) +{ + *valp = xchg(ptep, *valp); +} +#else +#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0) +#endif + +static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize) +{ + int do_clear = pte_is_type_page(ptep, pgsize); + if (do_clear) { + *ptep = *ptep & ~PTE_DIRTY; + } +} + +static inline void pte_set_dirty(pte_t *ptep, size_t pgsize) +{ + int do_set = pte_is_type_page(ptep, pgsize); + if (do_set) { + *ptep |= PTE_DIRTY; + } +} + +struct page_table; +void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr); +pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr); + +struct page_table *get_init_page_table(void); +void *early_alloc_pages(int nr_pages); +void *get_last_early_heap(void); +void flush_tlb(void); +void flush_tlb_single(unsigned long addr); + +void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable); + +void set_address_space_id(struct page_table *pt, int asid); +int get_address_space_id(const struct page_table *pt); + +typedef pte_t translation_table_t; +void set_translation_table(struct page_table *pt, translation_table_t* tt); +translation_table_t* get_translation_table(const struct page_table *pt); +translation_table_t* get_translation_table_as_paddr(const struct page_table *pt); + +extern unsigned long ap_trampoline; +//#define AP_TRAMPOLINE 0x10000 +#define AP_TRAMPOLINE_SIZE 0x2000 + +/* Local is cachable */ +#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE) + +#endif /* !__ASSEMBLY__ */ + +#endif /* !__HEADER_ARM64_COMMON_ARCH_MEMORY_H */ diff --git a/arch/arm64/kernel/include/arch-perfctr.h b/arch/arm64/kernel/include/arch-perfctr.h new file mode 100644 index 00000000..fad87bcb --- /dev/null +++ b/arch/arm64/kernel/include/arch-perfctr.h @@ -0,0 +1,72 @@ +/* arch-perfctr.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#ifndef __ARCH_PERFCTR_H__ +#define __ARCH_PERFCTR_H__ + +#include +#include + +/* @ref.impl arch/arm64/include/asm/pmu.h */ +struct arm_pmu { + struct ihk_mc_interrupt_handler* handler; + uint32_t (*read_counter)(int); + void (*write_counter)(int, uint32_t); + void (*reset)(void*); + int (*enable_pmu)(void); + void (*disable_pmu)(void); + int (*enable_counter)(int); + int (*disable_counter)(int); + int (*enable_intens)(int); + int (*disable_intens)(int); + int (*set_event_filter)(unsigned long*, int); + void (*write_evtype)(int, uint32_t); + int (*get_event_idx)(int, unsigned long); + int (*map_event)(uint32_t, uint64_t); + int num_events; +}; + +static inline const struct arm_pmu* get_cpu_pmu(void) +{ + extern struct arm_pmu cpu_pmu; + return &cpu_pmu; +} +int arm64_init_perfctr(void); +int arm64_enable_pmu(void); +void arm64_disable_pmu(void); +int armv8pmu_init(struct arm_pmu* cpu_pmu); + +/* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */ +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + PERF_COUNT_HW_CACHE_NODE = 6, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +#endif diff --git a/arch/arm64/kernel/include/arch-string.h b/arch/arm64/kernel/include/arch-string.h new file mode 100644 index 00000000..b484bcc5 --- /dev/null +++ b/arch/arm64/kernel/include/arch-string.h @@ -0,0 +1,13 @@ +/* arch-string.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#ifndef __HEADER_ARM64_COMMON_ARCH_STRING_H +#define __HEADER_ARM64_COMMON_ARCH_STRING_H + +#define ARCH_FAST_MEMCPY + +extern void *__inline_memcpy(void *to, const void *from, size_t t); + +#define ARCH_FAST_MEMSET + +extern void *__inline_memset(void *s, unsigned long c, size_t count); + +#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */ diff --git a/arch/arm64/kernel/include/arch-timer.h b/arch/arm64/kernel/include/arch-timer.h new file mode 100644 index 00000000..8c3720cb --- /dev/null +++ b/arch/arm64/kernel/include/arch-timer.h @@ -0,0 +1,14 @@ +/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_COMMON_ARCH_TIMER_H +#define __HEADER_ARM64_COMMON_ARCH_TIMER_H + +/* @ref.impl include/clocksource/arm_arch_timer.h */ +#define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */ +#define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */ +#define ARCH_TIMER_VIRT_EVT_EN (1 << 2) +#define ARCH_TIMER_EVT_TRIGGER_SHIFT (4) +#define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT) +#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */ +#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */ + +#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */ diff --git a/arch/arm64/kernel/include/arch/auxvec.h b/arch/arm64/kernel/include/arch/auxvec.h new file mode 100644 index 00000000..99076fce --- /dev/null +++ b/arch/arm64/kernel/include/arch/auxvec.h @@ -0,0 +1,7 @@ +/* auxvec.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_ARCH_AUXVEC_H +#define __HEADER_ARM64_ARCH_AUXVEC_H + +#define AT_SYSINFO_EHDR 33 + +#endif /* __HEADER_ARM64_ARCH_AUXVEC_H */ diff --git a/arch/arm64/kernel/include/arch/cpu.h b/arch/arm64/kernel/include/arch/cpu.h new file mode 100644 index 00000000..5f3139ec --- /dev/null +++ b/arch/arm64/kernel/include/arch/cpu.h @@ -0,0 +1,103 @@ +/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#ifndef __HEADER_ARM64_ARCH_CPU_H +#define __HEADER_ARM64_ARCH_CPU_H + +#ifndef __ASSEMBLY__ + +#define sev() asm volatile("sev" : : : "memory") +#define wfe() asm volatile("wfe" : : : "memory") +#define wfi() asm volatile("wfi" : : : "memory") + +#define isb() asm volatile("isb" : : : "memory") +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define dsb(opt) asm volatile("dsb " #opt : : : "memory") + +#define mb() dsb(sy) +#define rmb() dsb(ld) +#define wmb() dsb(st) + +#define dma_rmb() dmb(oshld) +#define dma_wmb() dmb(oshst) + +//#ifndef CONFIG_SMP +//#else +#define smp_mb() dmb(ish) +#define smp_rmb() dmb(ishld) +#define smp_wmb() dmb(ishst) + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) : "r" (v) : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) : "r" (v) : "memory"); \ + break; \ + } \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + break; \ + } \ + ___p1; \ +}) +//#endif /*CONFIG_SMP*/ + +#define read_barrier_depends() do { } while(0) +#define smp_read_barrier_depends() do { } while(0) + +#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define nop() asm volatile("nop"); + +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() smp_mb() + +/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */ +#define read_tsc() \ +({ \ + unsigned long cval; \ + isb(); \ + asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); \ + cval; \ +}) + +void init_tod_data(void); + +#if defined(CONFIG_HAS_NMI) +static inline void cpu_enable_nmi(void) +{ + asm volatile("msr daifclr, #2": : : "memory"); +} + +static inline void cpu_disable_nmi(void) +{ + asm volatile("msr daifset, #2": : : "memory"); +} +#else/*defined(CONFIG_HAS_NMI)*/ +static inline void cpu_enable_nmi(void) +{ +} + +static inline void cpu_disable_nmi(void) +{ +} +#endif/*defined(CONFIG_HAS_NMI)*/ + +#endif /* __ASSEMBLY__ */ + +#endif /* !__HEADER_ARM64_ARCH_CPU_H */ diff --git a/arch/arm64/kernel/include/arch/mm.h b/arch/arm64/kernel/include/arch/mm.h new file mode 100644 index 00000000..cb31a2f1 --- /dev/null +++ b/arch/arm64/kernel/include/arch/mm.h @@ -0,0 +1,17 @@ +/* mm.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_ARCH_MM_H +#define __HEADER_ARM64_ARCH_MM_H + +struct process_vm; + +static inline void +flush_nfo_tlb() +{ +} + +static inline void +flush_nfo_tlb_mm(struct process_vm *vm) +{ +} + +#endif /* __HEADER_ARM64_ARCH_MM_H */ diff --git a/arch/arm64/kernel/include/arch/mman.h b/arch/arm64/kernel/include/arch/mman.h new file mode 100644 index 00000000..6b6c8e81 --- /dev/null +++ b/arch/arm64/kernel/include/arch/mman.h @@ -0,0 +1,37 @@ +/* mman.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +/* @ref.impl linux-linaro/include/uapi/asm-generic/mman.h */ + +#ifndef __HEADER_ARM64_ARCH_MMAN_H +#define __HEADER_ARM64_ARCH_MMAN_H + +#include + +/* + * mapping flags + */ +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ + +/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ +#define MAP_HUGE_SHIFT 26 +#if FIRST_LEVEL_BLOCK_SUPPORT +# define MAP_HUGE_FIRST_BLOCK (__PTL3_SHIFT << MAP_HUGE_SHIFT) +#else +# define MAP_HUGE_FIRST_BLOCK -1 /* not supported */ +#endif +#define MAP_HUGE_SECOND_BLOCK (__PTL2_SHIFT << MAP_HUGE_SHIFT) + +/* + * for mlockall() + */ +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#endif /* __HEADER_ARM64_ARCH_MMAN_H */ diff --git a/arch/arm64/kernel/include/arch/rusage.h b/arch/arm64/kernel/include/arch/rusage.h new file mode 100644 index 00000000..01063a67 --- /dev/null +++ b/arch/arm64/kernel/include/arch/rusage.h @@ -0,0 +1,60 @@ +#ifndef ARCH_RUSAGE_H_INCLUDED +#define ARCH_RUSAGE_H_INCLUDED + +#define DEBUG_RUSAGE + +#define IHK_OS_PGSIZE_4KB 0 +#define IHK_OS_PGSIZE_2MB 1 +#define IHK_OS_PGSIZE_1GB 2 + +extern struct ihk_os_monitor *monitor; + +extern int sprintf(char * buf, const char *fmt, ...); + +#define DEBUG_ARCH_RUSAGE +#ifdef DEBUG_ARCH_RUSAGE +#define dprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + kprintf("%s,%s", __FUNCTION__, msg); \ + } while (0); +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + kprintf("%s,%s", __FUNCTION__, msg); \ + } while (0); +#else +#define dprintf(...) do { } while (0) +#define eprintf(...) \ + do { \ + char msg[1024]; \ + sprintf(msg, __VA_ARGS__); \ + kprintf("%s,%s", __FUNCTION__, msg); \ + } while (0); +#endif + +static inline int rusage_pgsize_to_pgtype(size_t pgsize) +{ + int ret = IHK_OS_PGSIZE_4KB; +#if 0 /* postk-TODO */ + switch (pgsize) { + case PTL1_SIZE: + ret = IHK_OS_PGSIZE_4KB; + break; + case PTL2_SIZE: + ret = IHK_OS_PGSIZE_2MB; + break; + case PTL3_SIZE: + ret = IHK_OS_PGSIZE_1GB; + break; + default: + eprintf("unknown pgsize=%ld\n", pgsize); + break; + } +#endif + return ret; +} + +#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */ diff --git a/arch/arm64/kernel/include/arch/shm.h b/arch/arm64/kernel/include/arch/shm.h new file mode 100644 index 00000000..8facbff7 --- /dev/null +++ b/arch/arm64/kernel/include/arch/shm.h @@ -0,0 +1,41 @@ +/* shm.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_ARCH_SHM_H +#define __HEADER_ARM64_ARCH_SHM_H + +#include + +/* shmflg */ +#define SHM_HUGE_SHIFT 26 +#if FIRST_LEVEL_BLOCK_SUPPORT +# define SHM_HUGE_FIRST_BLOCK (__PTL3_SHIFT << SHM_HUGE_SHIFT) +#else +# define SHM_HUGE_FIRST_BLOCK -1 /* not supported */ +#endif +#define SHM_HUGE_SECOND_BLOCK (__PTL2_SHIFT << SHM_HUGE_SHIFT) + +struct ipc_perm { + key_t key; + uid_t uid; + gid_t gid; + uid_t cuid; + gid_t cgid; + uint16_t mode; + uint8_t padding[2]; + uint16_t seq; + uint8_t padding2[22]; +}; + +struct shmid_ds { + struct ipc_perm shm_perm; + size_t shm_segsz; + time_t shm_atime; + time_t shm_dtime; + time_t shm_ctime; + pid_t shm_cpid; + pid_t shm_lpid; + uint64_t shm_nattch; + uint8_t padding[12]; + int init_pgshift; +}; + +#endif /* __HEADER_ARM64_ARCH_SHM_H */ diff --git a/arch/arm64/kernel/include/arm-gic-v2.h b/arch/arm64/kernel/include/arm-gic-v2.h new file mode 100644 index 00000000..0195af34 --- /dev/null +++ b/arch/arm64/kernel/include/arm-gic-v2.h @@ -0,0 +1,106 @@ +/* arm-gic-v2.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* + * include/linux/irqchip/arm-gic.h + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_H +#define __LINUX_IRQCHIP_ARM_GIC_H + +/* check config */ +#if defined(CONFIG_HAS_NMI) && !defined(CONFIG_ARM_GIC_V3) +# error GICv2 is not support NMI +#endif + +/* @ref.impl include/linux/irqchip/arm-gic.h */ + +#define GIC_CPU_CTRL 0x00 +#define GIC_CPU_PRIMASK 0x04 +#define GIC_CPU_BINPOINT 0x08 +#define GIC_CPU_INTACK 0x0c +#define GIC_CPU_EOI 0x10 +#define GIC_CPU_RUNNINGPRI 0x14 +#define GIC_CPU_HIGHPRI 0x18 +#define GIC_CPU_ALIAS_BINPOINT 0x1c +#define GIC_CPU_ACTIVEPRIO 0xd0 +#define GIC_CPU_IDENT 0xfc + +#define GICC_ENABLE 0x1 +#define GICC_INT_PRI_THRESHOLD 0xf0 +#define GICC_IAR_INT_ID_MASK 0x3ff +#define GICC_INT_SPURIOUS 1023 +#define GICC_DIS_BYPASS_MASK 0x1e0 + +#define GIC_DIST_CTRL 0x000 +#define GIC_DIST_CTR 0x004 +#define GIC_DIST_IGROUP 0x080 +#define GIC_DIST_ENABLE_SET 0x100 +#define GIC_DIST_ENABLE_CLEAR 0x180 +#define GIC_DIST_PENDING_SET 0x200 +#define GIC_DIST_PENDING_CLEAR 0x280 +#define GIC_DIST_ACTIVE_SET 0x300 +#define GIC_DIST_ACTIVE_CLEAR 0x380 +#define GIC_DIST_PRI 0x400 +#define GIC_DIST_TARGET 0x800 +#define GIC_DIST_CONFIG 0xc00 +#define GIC_DIST_SOFTINT 0xf00 +#define GIC_DIST_SGI_PENDING_CLEAR 0xf10 +#define GIC_DIST_SGI_PENDING_SET 0xf20 + +#define GICD_ENABLE 0x1 +#define GICD_DISABLE 0x0 +#define GICD_INT_ACTLOW_LVLTRIG 0x0 +#define GICD_INT_EN_CLR_X32 0xffffffff +#define GICD_INT_EN_SET_SGI 0x0000ffff +#define GICD_INT_EN_CLR_PPI 0xffff0000 + +#ifdef CONFIG_HAS_NMI +#define GICD_INT_NMI_PRI 0x40 +#define GICD_INT_DEF_PRI 0xc0 +#else +#define GICD_INT_DEF_PRI 0xa0 +#endif +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + +#define GICH_HCR 0x0 +#define GICH_VTR 0x4 +#define GICH_VMCR 0x8 +#define GICH_MISR 0x10 +#define GICH_EISR0 0x20 +#define GICH_EISR1 0x24 +#define GICH_ELRSR0 0x30 +#define GICH_ELRSR1 0x34 +#define GICH_APR 0xf0 +#define GICH_LR0 0x100 + +#define GICH_HCR_EN (1 << 0) +#define GICH_HCR_UIE (1 << 1) + +#define GICH_LR_VIRTUALID (0x3ff << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_STATE (3 << 28) +#define GICH_LR_PENDING_BIT (1 << 28) +#define GICH_LR_ACTIVE_BIT (1 << 29) +#define GICH_LR_EOI (1 << 19) + +#define GICH_VMCR_CTRL_SHIFT 0 +#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) +#define GICH_VMCR_PRIMASK_SHIFT 27 +#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) +#define GICH_VMCR_BINPOINT_SHIFT 21 +#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT) +#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18 +#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT) + +#define GICH_MISR_EOI (1 << 0) +#define GICH_MISR_U (1 << 1) + +#endif /* __LINUX_IRQCHIP_ARM_GIC_H */ diff --git a/arch/arm64/kernel/include/arm-gic-v3.h b/arch/arm64/kernel/include/arm-gic-v3.h new file mode 100644 index 00000000..f4a2009c --- /dev/null +++ b/arch/arm64/kernel/include/arm-gic-v3.h @@ -0,0 +1,391 @@ +/* arm-gic-v3.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H +#define __LINUX_IRQCHIP_ARM_GIC_V3_H + +/* @ref.impl include/linux/irqchip/arm-gic-v3.h */ +#include + +/* + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. + */ +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_SEIR 0x0068 +#define GICD_IGROUPR 0x0080 +#define GICD_ISENABLER 0x0100 +#define GICD_ICENABLER 0x0180 +#define GICD_ISPENDR 0x0200 +#define GICD_ICPENDR 0x0280 +#define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 +#define GICD_IPRIORITYR 0x0400 +#define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 +#define GICD_IROUTER 0x6000 +#define GICD_IDREGS 0xFFD0 +#define GICD_PIDR2 0xFFE8 + +/* + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). + */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + +#define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_DS (1U << 6) +#define GICD_CTLR_ARE_NS (1U << 4) +#define GICD_CTLR_ENABLE_G1A (1U << 1) +#define GICD_CTLR_ENABLE_G1 (1U << 0) + +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) + +#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) +#define GICD_TYPER_LPIS (1U << 17) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +#define GIC_V3_DIST_SIZE 0x10000 + +/* + * Re-Distributor registers, offsets from RD_base + */ +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_SEIR GICD_SEIR +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_MOVLPIR 0x0100 +#define GICR_MOVALLR 0x0110 +#define GICR_IDREGS GICD_IDREGS +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_CTLR_ENABLE_LPIS (1UL << 0) + +#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) + +#define GICR_WAKER_ProcessorSleep (1U << 1) +#define GICR_WAKER_ChildrenAsleep (1U << 2) + +#define GICR_PROPBASER_NonShareable (0U << 10) +#define GICR_PROPBASER_InnerShareable (1U << 10) +#define GICR_PROPBASER_OuterShareable (2U << 10) +#define GICR_PROPBASER_SHAREABILITY_MASK (3UL << 10) +#define GICR_PROPBASER_nCnB (0U << 7) +#define GICR_PROPBASER_nC (1U << 7) +#define GICR_PROPBASER_RaWt (2U << 7) +#define GICR_PROPBASER_RaWb (3U << 7) +#define GICR_PROPBASER_WaWt (4U << 7) +#define GICR_PROPBASER_WaWb (5U << 7) +#define GICR_PROPBASER_RaWaWt (6U << 7) +#define GICR_PROPBASER_RaWaWb (7U << 7) +#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7) +#define GICR_PROPBASER_IDBITS_MASK (0x1f) + +#define GICR_PENDBASER_NonShareable (0U << 10) +#define GICR_PENDBASER_InnerShareable (1U << 10) +#define GICR_PENDBASER_OuterShareable (2U << 10) +#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10) +#define GICR_PENDBASER_nCnB (0U << 7) +#define GICR_PENDBASER_nC (1U << 7) +#define GICR_PENDBASER_RaWt (2U << 7) +#define GICR_PENDBASER_RaWb (3U << 7) +#define GICR_PENDBASER_WaWt (4U << 7) +#define GICR_PENDBASER_WaWb (5U << 7) +#define GICR_PENDBASER_RaWaWt (6U << 7) +#define GICR_PENDBASER_RaWaWb (7U << 7) +#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7) + +/* + * Re-Distributor registers, offsets from SGI_base + */ +#define GICR_IGROUPR0 GICD_IGROUPR +#define GICR_ISENABLER0 GICD_ISENABLER +#define GICR_ICENABLER0 GICD_ICENABLER +#define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR +#define GICR_ISACTIVER0 GICD_ISACTIVER +#define GICR_ICACTIVER0 GICD_ICACTIVER +#define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR + +#define GICR_TYPER_PLPIS (1U << 0) +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_LAST (1U << 4) + +#define GIC_V3_REDIST_SIZE 0x20000 + +#define LPI_PROP_GROUP1 (1 << 1) +#define LPI_PROP_ENABLED (1 << 0) + +/* + * ITS registers, offsets from ITS_base + */ +#define GITS_CTLR 0x0000 +#define GITS_IIDR 0x0004 +#define GITS_TYPER 0x0008 +#define GITS_CBASER 0x0080 +#define GITS_CWRITER 0x0088 +#define GITS_CREADR 0x0090 +#define GITS_BASER 0x0100 +#define GITS_PIDR2 GICR_PIDR2 + +#define GITS_TRANSLATER 0x10040 + +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_PTA (1UL << 19) + +#define GITS_CBASER_VALID (1UL << 63) +#define GITS_CBASER_nCnB (0UL << 59) +#define GITS_CBASER_nC (1UL << 59) +#define GITS_CBASER_RaWt (2UL << 59) +#define GITS_CBASER_RaWb (3UL << 59) +#define GITS_CBASER_WaWt (4UL << 59) +#define GITS_CBASER_WaWb (5UL << 59) +#define GITS_CBASER_RaWaWt (6UL << 59) +#define GITS_CBASER_RaWaWb (7UL << 59) +#define GITS_CBASER_CACHEABILITY_MASK (7UL << 59) +#define GITS_CBASER_NonShareable (0UL << 10) +#define GITS_CBASER_InnerShareable (1UL << 10) +#define GITS_CBASER_OuterShareable (2UL << 10) +#define GITS_CBASER_SHAREABILITY_MASK (3UL << 10) + +#define GITS_BASER_NR_REGS 8 + +#define GITS_BASER_VALID (1UL << 63) +#define GITS_BASER_nCnB (0UL << 59) +#define GITS_BASER_nC (1UL << 59) +#define GITS_BASER_RaWt (2UL << 59) +#define GITS_BASER_RaWb (3UL << 59) +#define GITS_BASER_WaWt (4UL << 59) +#define GITS_BASER_WaWb (5UL << 59) +#define GITS_BASER_RaWaWt (6UL << 59) +#define GITS_BASER_RaWaWb (7UL << 59) +#define GITS_BASER_CACHEABILITY_MASK (7UL << 59) +#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1) +#define GITS_BASER_NonShareable (0UL << 10) +#define GITS_BASER_InnerShareable (1UL << 10) +#define GITS_BASER_OuterShareable (2UL << 10) +#define GITS_BASER_SHAREABILITY_SHIFT (10) +#define GITS_BASER_SHAREABILITY_MASK (3UL << GITS_BASER_SHAREABILITY_SHIFT) +#define GITS_BASER_PAGE_SIZE_SHIFT (8) +#define GITS_BASER_PAGE_SIZE_4K (0UL << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_16K (1UL << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_64K (2UL << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_MASK (3UL << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGES_MAX 256 + +#define GITS_BASER_TYPE_NONE 0 +#define GITS_BASER_TYPE_DEVICE 1 +#define GITS_BASER_TYPE_VCPU 2 +#define GITS_BASER_TYPE_CPU 3 +#define GITS_BASER_TYPE_COLLECTION 4 +#define GITS_BASER_TYPE_RESERVED5 5 +#define GITS_BASER_TYPE_RESERVED6 6 +#define GITS_BASER_TYPE_RESERVED7 7 + +/* + * ITS commands + */ +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPVI 0x0a +#define GITS_CMD_MOVI 0x01 +#define GITS_CMD_DISCARD 0x0f +#define GITS_CMD_INV 0x0c +#define GITS_CMD_MOVALL 0x0e +#define GITS_CMD_INVALL 0x0d +#define GITS_CMD_INT 0x03 +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_SYNC 0x05 + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) +#define ICC_CTLR_EL1_EOImode_drop (1U << 1) +#define ICC_SRE_EL1_SRE (1U << 0) + +/* + * Hypervisor interface registers (SRE only) + */ +#define ICH_LR_VIRTUAL_ID_MASK ((1UL << 32) - 1) + +#define ICH_LR_EOI (1UL << 41) +#define ICH_LR_GROUP (1UL << 60) +#define ICH_LR_STATE (3UL << 62) +#define ICH_LR_PENDING_BIT (1UL << 62) +#define ICH_LR_ACTIVE_BIT (1UL << 63) + +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) + +#define ICH_VMCR_CTLR_SHIFT 0 +#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) + +#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) +#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) + +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) + +#ifdef CONFIG_HAS_NMI +/* PMR values used to mask/unmask interrupts */ +#define ICC_PMR_EL1_G_SHIFT 6 +#define ICC_PMR_EL1_G_BIT (1 << ICC_PMR_EL1_G_SHIFT) +#define ICC_PMR_EL1_UNMASKED 0xf0 +#define ICC_PMR_EL1_MASKED (ICC_PMR_EL1_UNMASKED ^ ICC_PMR_EL1_G_BIT) + +/* + * This is the GIC interrupt mask bit. It is not actually part of the + * PSR and so does not appear in the user API, we are simply using some + * reserved bits in the PSR to store some state from the interrupt + * controller. The context save/restore functions will extract the + * ICC_PMR_EL1_G_BIT and save it as the PSR_G_BIT. + */ +#define PSR_G_BIT 0x00400000 +#define PSR_G_SHIFT 22 +#define PSR_G_PMR_G_SHIFT (PSR_G_SHIFT - ICC_PMR_EL1_G_SHIFT) +#define PSR_I_PMR_G_SHIFT (7 - ICC_PMR_EL1_G_SHIFT) +#endif /* CONFIG_HAS_NMI */ + +/* + * System register definitions + */ +#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) + +#define ICH_LR0_EL2 __LR0_EL2(0) +#define ICH_LR1_EL2 __LR0_EL2(1) +#define ICH_LR2_EL2 __LR0_EL2(2) +#define ICH_LR3_EL2 __LR0_EL2(3) +#define ICH_LR4_EL2 __LR0_EL2(4) +#define ICH_LR5_EL2 __LR0_EL2(5) +#define ICH_LR6_EL2 __LR0_EL2(6) +#define ICH_LR7_EL2 __LR0_EL2(7) +#define ICH_LR8_EL2 __LR8_EL2(0) +#define ICH_LR9_EL2 __LR8_EL2(1) +#define ICH_LR10_EL2 __LR8_EL2(2) +#define ICH_LR11_EL2 __LR8_EL2(3) +#define ICH_LR12_EL2 __LR8_EL2(4) +#define ICH_LR13_EL2 __LR8_EL2(5) +#define ICH_LR14_EL2 __LR8_EL2(6) +#define ICH_LR15_EL2 __LR8_EL2(7) + +#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) +#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) +#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) +#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) + +#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) +#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) +#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) +#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) + + +/** + * @ref.impl host-kernel/include/linux/stringify.h + */ +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_H */ diff --git a/arch/arm64/kernel/include/asm-offsets.h b/arch/arm64/kernel/include/asm-offsets.h new file mode 100644 index 00000000..b1f90ed8 --- /dev/null +++ b/arch/arm64/kernel/include/asm-offsets.h @@ -0,0 +1,27 @@ +/* asm-offsets.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_COMMON_ASM_OFFSETS_H +#define __HEADER_ARM64_COMMON_ASM_OFFSETS_H + +#define S_X0 0x00 /* offsetof(struct pt_regs, regs[0]) */ +#define S_X1 0x08 /* offsetof(struct pt_regs, regs[1]) */ +#define S_X2 0x10 /* offsetof(struct pt_regs, regs[2]) */ +#define S_X3 0x18 /* offsetof(struct pt_regs, regs[3]) */ +#define S_X4 0x20 /* offsetof(struct pt_regs, regs[4]) */ +#define S_X5 0x28 /* offsetof(struct pt_regs, regs[5]) */ +#define S_X6 0x30 /* offsetof(struct pt_regs, regs[6]) */ +#define S_X7 0x38 /* offsetof(struct pt_regs, regs[7]) */ +#define S_LR 0xf0 /* offsetof(struct pt_regs, regs[30]) */ +#define S_SP 0xf8 /* offsetof(struct pt_regs, sp) */ +#define S_PC 0x100 /* offsetof(struct pt_regs, pc) */ +#define S_PSTATE 0x108 /* offsetof(struct pt_regs, pstate) */ +#define S_ORIG_X0 0x110 /* offsetof(struct pt_regs, orig_x0) */ +#define S_SYSCALLNO 0x118 /* offsetof(struct pt_regs, syscallno) */ +#define S_FRAME_SIZE 0x120 /* sizeof(struct pt_regs) */ + +#define CPU_INFO_SETUP 0x10 /* offsetof(struct cpu_info, cpu_setup) */ +#define CPU_INFO_SZ 0x18 /* sizeof(struct cpu_info) */ + +#define TI_FLAGS 0x00 /* offsetof(struct thread_info, flags) */ +#define TI_CPU_CONTEXT 0x10 /* offsetof(struct thread_info, cpu_context) */ + +#endif /* !__HEADER_ARM64_COMMON_ASM_OFFSETS_H */ diff --git a/arch/arm64/kernel/include/assembler.h b/arch/arm64/kernel/include/assembler.h new file mode 100644 index 00000000..212c366d --- /dev/null +++ b/arch/arm64/kernel/include/assembler.h @@ -0,0 +1,147 @@ +/* assembler.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_ASSEMBLER_H +#define __HEADER_ARM64_COMMON_ASSEMBLER_H + +#include + +#if defined(CONFIG_HAS_NMI) +#include +#endif /* defined(CONFIG_HAS_NMI) */ + +#if defined(CONFIG_HAS_NMI) +/* + * Enable and disable pseudo NMI. + */ + .macro disable_nmi + msr daifset, #2 + .endm + + .macro enable_nmi + msr daifclr, #2 + .endm + +/* + * Enable and disable interrupts. + */ + .macro disable_irq, tmp + mov \tmp, #ICC_PMR_EL1_MASKED + msr_s ICC_PMR_EL1, \tmp + .endm + + .macro enable_irq, tmp + mov \tmp, #ICC_PMR_EL1_UNMASKED + msr_s ICC_PMR_EL1, \tmp + .endm + +#else /* defined(CONFIG_HAS_NMI) */ +/* + * Enable and disable pseudo NMI. + */ + .macro disable_nmi + .endm + + .macro enable_nmi + .endm + +/* + * Enable and disable interrupts. + */ + .macro disable_irq, tmp + msr daifset, #2 + .endm + + .macro enable_irq, tmp + msr daifclr, #2 + .endm +#endif /* defined(CONFIG_HAS_NMI) */ + +/* + * Enable and disable debug exceptions. + */ + .macro disable_dbg + msr daifset, #8 + .endm + + .macro enable_dbg + msr daifclr, #8 + .endm + + .macro disable_step_tsk, flgs, tmp + tbz \flgs, #TIF_SINGLESTEP, 9990f + mrs \tmp, mdscr_el1 + bic \tmp, \tmp, #1 + msr mdscr_el1, \tmp + isb // Synchronise with enable_dbg +9990: + .endm + + .macro enable_step_tsk, flgs, tmp + tbz \flgs, #TIF_SINGLESTEP, 9990f + disable_dbg + mrs \tmp, mdscr_el1 + orr \tmp, \tmp, #1 + msr mdscr_el1, \tmp + b 9991f +9990: + mrs \tmp, mdscr_el1 + bic \tmp, \tmp, #1 + msr mdscr_el1, \tmp + isb // Synchronise with enable_dbg +9991: + .endm + +/* + * Enable both debug exceptions and interrupts. This is likely to be + * faster than two daifclr operations, since writes to this register + * are self-synchronising. + */ +#if defined(CONFIG_HAS_NMI) + .macro enable_dbg_and_irq, tmp + enable_dbg + enable_irq \tmp + .endm +#else /* defined(CONFIG_HAS_NMI) */ + .macro enable_dbg_and_irq, tmp + msr daifclr, #(8 | 2) + .endm +#endif /* defined(CONFIG_HAS_NMI) */ + +/* + * Register aliases. + */ +lr .req x30 // link register + +/* + * Vector entry + */ + .macro ventry label + .align 7 + b \label + .endm + +/* + * Select code when configured for BE. + */ +//#ifdef CONFIG_CPU_BIG_ENDIAN +//#define CPU_BE(code...) code +//#else +#define CPU_BE(code...) +//#endif + +/* + * Select code when configured for LE. + */ +//#ifdef CONFIG_CPU_BIG_ENDIAN +//#define CPU_LE(code...) +//#else +#define CPU_LE(code...) code +//#endif + +#define ENDPIPROC(x) \ + .globl __pi_##x; \ + .type __pi_##x, %function; \ + .set __pi_##x, x; \ + .size __pi_##x, . - x; \ + ENDPROC(x) + +#endif /* !__HEADER_ARM64_COMMON_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/include/cache.h b/arch/arm64/kernel/include/cache.h new file mode 100644 index 00000000..582ba131 --- /dev/null +++ b/arch/arm64/kernel/include/cache.h @@ -0,0 +1,7 @@ +/* cache.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_CACHE_H +#define __HEADER_ARM64_COMMON_CACHE_H + +#define L1_CACHE_SHIFT 6 + +#endif /* !__HEADER_ARM64_COMMON_CACHE_H */ diff --git a/arch/arm64/kernel/include/cas.h b/arch/arm64/kernel/include/cas.h new file mode 100644 index 00000000..bdc2b151 --- /dev/null +++ b/arch/arm64/kernel/include/cas.h @@ -0,0 +1,32 @@ +/* cas.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_COMMON_CAS_H +#define __HEADER_ARM64_COMMON_CAS_H + +#include + +/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__cmpxchg (size == 8 case) */ +/* 8 byte compare and swap, return 0:fail, 1:success */ +static inline int +compare_and_swap(void *addr, unsigned long olddata, unsigned long newdata) +{ + unsigned long oldval = 0, res = 0; + + smp_mb(); + do { + asm volatile("// __cmpxchg8\n" + " ldxr %1, %2\n" + " mov %w0, #0\n" + " cmp %1, %3\n" + " b.ne 1f\n" + " stxr %w0, %4, %2\n" + "1:\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned long *)addr) + : "Ir" (olddata), "r" (newdata) + : "cc"); + } while (res); + smp_mb(); + + return (oldval == olddata); +} + +#endif /* !__HEADER_ARM64_COMMON_CAS_H */ diff --git a/arch/arm64/kernel/include/compiler.h b/arch/arm64/kernel/include/compiler.h new file mode 100644 index 00000000..e0e0d35f --- /dev/null +++ b/arch/arm64/kernel/include/compiler.h @@ -0,0 +1,32 @@ +/* compiler.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __ASM_COMPILER_H +#define __ASM_COMPILER_H + +/* @ref.impl arch/arm64/include/asm/compiler.h::__asmeq(x,y) */ +/* + * This is used to ensure the compiler did actually allocate the register we + * asked it for some inline assembly sequences. Apparently we can't trust the + * compiler from one version to another so a bit of paranoia won't hurt. This + * string is meant to be concatenated with the inline asm string and will + * cause compilation to stop on mismatch. (for details, see gcc PR 15089) + */ +#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" + +/* @ref.impl include/linux/compiler.h::__section(S) */ +/* Simple shorthand for a section definition */ +# define __section(S) __attribute__ ((__section__(#S))) + +/* @ref.impl include/linux/compiler.h::__aligned(x) */ +/* + * From the GCC manual: + * + * Many functions have no effects except the return value and their + * return value depends only on the parameters and/or global + * variables. Such a function can be subject to common subexpression + * elimination and loop optimization just as an arithmetic operator + * would be. + * [...] + */ +#define __aligned(x) __attribute__((aligned(x))) + +#endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/kernel/include/const.h b/arch/arm64/kernel/include/const.h new file mode 100644 index 00000000..311396b3 --- /dev/null +++ b/arch/arm64/kernel/include/const.h @@ -0,0 +1,23 @@ +/* const.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_CONST_H +#define __HEADER_ARM64_COMMON_CONST_H + +#ifndef __ASSEMBLY__ +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) +#else /* !__ASSEMBLY__ */ +#define _AC(X,Y) X +#define _AT(T,X) X +#endif /* !__ASSEMBLY__ */ + +#define _BITUL(x) (_AC(1,UL) << (x)) +#define _BITULL(x) (_AC(1,ULL) << (x)) + +/* + * Allow for constants defined here to be used from assembly code + * by prepending the UL suffix only with actual C code compilation. + */ +#define UL(x) _AC(x, UL) + +#endif /* !__HEADER_ARM64_COMMON_CONST_H */ diff --git a/arch/arm64/kernel/include/context.h b/arch/arm64/kernel/include/context.h new file mode 100644 index 00000000..475db6b8 --- /dev/null +++ b/arch/arm64/kernel/include/context.h @@ -0,0 +1,8 @@ +/* context.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_CONTEXT_H +#define __HEADER_ARM64_COMMON_CONTEXT_H + +void switch_mm(struct page_table *pgtbl); +void free_mmu_context(struct page_table *pgtbl); + +#endif /*__HEADER_ARM64_COMMON_CONTEXT_H*/ diff --git a/arch/arm64/kernel/include/cpufeature.h b/arch/arm64/kernel/include/cpufeature.h new file mode 100644 index 00000000..590c8dd1 --- /dev/null +++ b/arch/arm64/kernel/include/cpufeature.h @@ -0,0 +1,191 @@ +/* cpufeature.h COPYRIGHT FUJITSU LIMITED 2017 */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include +#include +#include + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +/* CPU feature register tracking */ +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE,/* Bigger value is safe */ +}; + +#define FTR_STRICT (1) /* SANITY check strict matching required */ +#define FTR_NONSTRICT (0) /* SANITY check ignored */ + +#define FTR_SIGNED (1) /* Value should be treated as signed */ +#define FTR_UNSIGNED (0) /* Value should be treated as unsigned */ + +#define FTR_VISIBLE (1) /* Feature visible to the user space */ +#define FTR_HIDDEN (0) /* Feature is hidden from the user */ + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +struct arm64_ftr_bits { + int sign; /* Value is signed ? */ + int visible; + int strict; /* CPU Sanity check: strict matching required ? */ + enum ftr_type type; + uint8_t shift; + uint8_t width; + int64_t safe_val; /* safe value for FTR_EXACT features */ +}; + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +/* + * @arm64_ftr_reg - Feature register + * @strict_mask Bits which should match across all CPUs for sanity. + * @sys_val Safe value across the CPUs (system view) + */ +struct arm64_ftr_reg { + const char *name; + uint64_t strict_mask; + uint64_t user_mask; + uint64_t sys_val; + uint64_t user_val; + const struct arm64_ftr_bits *ftr_bits; +}; + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +/* scope of capability check */ +enum { + SCOPE_SYSTEM, + SCOPE_LOCAL_CPU, +}; + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +struct arm64_cpu_capabilities { + const char *desc; + uint16_t capability; + int def_scope;/* default scope */ + int (*matches)(const struct arm64_cpu_capabilities *caps, int scope); + int (*enable)(void *);/* Called on all active CPUs */ + union { + struct {/* To be used for erratum handling only */ + uint32_t midr_model; + uint32_t midr_range_min, midr_range_max; + }; + + struct {/* Feature register checking */ + uint32_t sys_reg; + uint8_t field_pos; + uint8_t min_field_value; + uint8_t hwcap_type; + int sign; + unsigned long hwcap; + }; + }; +}; + +/* @ref.impl include/linux/bitops.h */ +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline uint64_t arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) +{ + return (uint64_t)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int +cpuid_feature_extract_signed_field_width(uint64_t features, int field, int width) +{ + return (int64_t)(features << (64 - width - field)) >> (64 - width); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int +cpuid_feature_extract_signed_field(uint64_t features, int field) +{ + return cpuid_feature_extract_signed_field_width(features, field, 4); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline unsigned int +cpuid_feature_extract_unsigned_field_width(uint64_t features, int field, int width) +{ + return (uint64_t)(features << (64 - width - field)) >> (64 - width); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline unsigned int +cpuid_feature_extract_unsigned_field(uint64_t features, int field) +{ + return cpuid_feature_extract_unsigned_field_width(features, field, 4); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline uint64_t arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg) +{ + return (reg->user_val | (reg->sys_val & reg->user_mask)); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int +cpuid_feature_extract_field_width(uint64_t features, int field, int width, int sign) +{ + return (sign) ? + cpuid_feature_extract_signed_field_width(features, field, width) : + cpuid_feature_extract_unsigned_field_width(features, field, width); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int +cpuid_feature_extract_field(uint64_t features, int field, int sign) +{ + return cpuid_feature_extract_field_width(features, field, 4, sign); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int64_t arm64_ftr_value(const struct arm64_ftr_bits *ftrp, uint64_t val) +{ + return (int64_t)cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width, ftrp->sign); +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int id_aa64pfr0_32bit_el0(uint64_t pfr0) +{ + uint32_t val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); + + return val == ID_AA64PFR0_EL0_32BIT_64BIT; +} + +/* @ref.impl arch/arm64/include/asm/cpufeature.h */ +static inline int id_aa64pfr0_sve(uint64_t pfr0) +{ + uint32_t val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT); + + return val > 0; +} + +void setup_cpu_features(void); +void update_cpu_features(int cpu, + struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot); +uint64_t read_system_reg(uint32_t id); +void init_cpu_features(struct cpuinfo_arm64 *info); +int enable_mrs_emulation(void); + +/* @ref.impl arch/arm64/include/asm/hwcap.h */ +enum { + CAP_HWCAP = 1, +#ifdef CONFIG_COMPAT + CAP_COMPAT_HWCAP, + CAP_COMPAT_HWCAP2, +#endif +}; + +#endif /* __ASM_CPUFEATURE_H */ diff --git a/arch/arm64/kernel/include/cpuinfo.h b/arch/arm64/kernel/include/cpuinfo.h new file mode 100644 index 00000000..cac2ef9e --- /dev/null +++ b/arch/arm64/kernel/include/cpuinfo.h @@ -0,0 +1,34 @@ +/* cpuinfo.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#ifndef __HEADER_ARM64_COMMON_CPUINFO_H +#define __HEADER_ARM64_COMMON_CPUINFO_H + +#include + +/* @ref.impl arch/arm64/include/cpu.h */ +/* + * Records attributes of an individual CPU. + */ +struct cpuinfo_arm64 { + uint32_t reg_midr; + unsigned int hwid; /* McKernel Original. */ + + uint32_t reg_ctr; + uint32_t reg_cntfrq; + uint32_t reg_dczid; + uint32_t reg_revidr; + + uint64_t reg_id_aa64dfr0; + uint64_t reg_id_aa64dfr1; + uint64_t reg_id_aa64isar0; + uint64_t reg_id_aa64isar1; + uint64_t reg_id_aa64mmfr0; + uint64_t reg_id_aa64mmfr1; + uint64_t reg_id_aa64mmfr2; + uint64_t reg_id_aa64pfr0; + uint64_t reg_id_aa64pfr1; + uint64_t reg_id_aa64zfr0; + + uint64_t reg_zcr; +}; + +#endif /* !__HEADER_ARM64_COMMON_CPUINFO_H */ diff --git a/arch/arm64/kernel/include/cpulocal.h b/arch/arm64/kernel/include/cpulocal.h new file mode 100644 index 00000000..0bc72574 --- /dev/null +++ b/arch/arm64/kernel/include/cpulocal.h @@ -0,0 +1,13 @@ +/* cpulocal.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_CPULOCAL_H +#define __HEADER_ARM64_COMMON_CPULOCAL_H + +#include +#include +#include + +union arm64_cpu_local_variables *get_arm64_cpu_local_variable(int id); +union arm64_cpu_local_variables *get_arm64_this_cpu_local(void); +void *get_arm64_this_cpu_kstack(void); + +#endif /* !__HEADER_ARM64_COMMON_CPULOCAL_H */ diff --git a/arch/arm64/kernel/include/cputable.h b/arch/arm64/kernel/include/cputable.h new file mode 100644 index 00000000..e767aed4 --- /dev/null +++ b/arch/arm64/kernel/include/cputable.h @@ -0,0 +1,12 @@ +/* cputable.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_CPUTABLE_H +#define __HEADER_ARM64_COMMON_CPUTABLE_H + +struct cpu_info { + unsigned int cpu_id_val; + unsigned int cpu_id_mask; + const char *cpu_name; + unsigned long (*cpu_setup)(void); +}; + +#endif /* !__HEADER_ARM64_COMMON_CPUTABLE_H */ diff --git a/arch/arm64/kernel/include/cputype.h b/arch/arm64/kernel/include/cputype.h new file mode 100644 index 00000000..c578fc2b --- /dev/null +++ b/arch/arm64/kernel/include/cputype.h @@ -0,0 +1,47 @@ +/* cputype.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* @ref.impl arch/arm64/include/asm/cputype.h */ +#ifndef __HEADER_ARM64_COMMON_CPUTYPE_H +#define __HEADER_ARM64_COMMON_CPUTYPE_H + +#include + +#define MPIDR_LEVEL_BITS_SHIFT 3 +#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_LEVEL_SHIFT(level) \ + (((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT) + +#define MPIDR_AFFINITY_LEVEL(mpidr, level) \ + ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) + +#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) + +#define MIDR_REVISION_MASK 0xf +#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) + +#define MIDR_PARTNUM_SHIFT 4 +#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +#define MIDR_PARTNUM(midr) \ + (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) + +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_VARIANT(midr) \ + (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) + +#define MIDR_IMPLEMENTOR_SHIFT 24 +#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR(midr) \ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + +#ifndef __ASSEMBLY__ + +static unsigned int read_cpuid_id(void) +{ + return read_cpuid(MIDR_EL1); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* !__HEADER_ARM64_COMMON_CPUTYPE_H */ diff --git a/arch/arm64/kernel/include/debug-monitors.h b/arch/arm64/kernel/include/debug-monitors.h new file mode 100644 index 00000000..ff19f400 --- /dev/null +++ b/arch/arm64/kernel/include/debug-monitors.h @@ -0,0 +1,35 @@ +/* debug-monitors.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#ifndef __HEADER_ARM64_COMMON_DEBUG_MONITORS_H +#define __HEADER_ARM64_COMMON_DEBUG_MONITORS_H + +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS (1 << 0) +#define DBG_SPSR_SS (1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE (1 << 13) +#define DBG_MDSCR_MDE (1 << 15) +#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + +#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) + +/* AArch64 */ +#define DBG_ESR_EVT_HWBP 0x0 +#define DBG_ESR_EVT_HWSS 0x1 +#define DBG_ESR_EVT_HWWP 0x2 +#define DBG_ESR_EVT_BRK 0x6 + +#ifndef __ASSEMBLY__ + +unsigned char debug_monitors_arch(void); +void mdscr_write(unsigned int mdscr); +unsigned int mdscr_read(void); +void debug_monitors_init(void); + +struct pt_regs; +void set_regs_spsr_ss(struct pt_regs *regs); +void clear_regs_spsr_ss(struct pt_regs *regs); + +#endif /* !__ASSEMBLY__ */ + +#endif /* !__HEADER_ARM64_COMMON_DEBUG_MONITORS_H */ diff --git a/arch/arm64/kernel/include/elf.h b/arch/arm64/kernel/include/elf.h new file mode 100644 index 00000000..c52a96b3 --- /dev/null +++ b/arch/arm64/kernel/include/elf.h @@ -0,0 +1,28 @@ +/* elf.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_ELF_H +#define __HEADER_ARM64_COMMON_ELF_H + +#include + +/* ELF target machines defined */ +#define EM_AARCH64 183 + +/* ELF header defined */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_OSABI ELFOSABI_NONE +#define ELF_ABIVERSION El_ABIVERSION_NONE +#define ELF_ARCH EM_AARCH64 + +#define ELF_NGREG64 (sizeof (struct user_pt_regs) / sizeof(elf_greg64_t)) + +/* PTRACE_GETREGSET and PTRACE_SETREGSET requests. */ +#define NT_ARM_TLS 0x401 /* ARM TLS register */ +#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ +#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ + +typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64]; + +#endif /* __HEADER_ARM64_COMMON_ELF_H */ diff --git a/arch/arm64/kernel/include/elfcore.h b/arch/arm64/kernel/include/elfcore.h new file mode 100644 index 00000000..adeafb0c --- /dev/null +++ b/arch/arm64/kernel/include/elfcore.h @@ -0,0 +1,92 @@ +/* elfcore.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#ifndef __HEADER_ARM64_COMMON_ELFCORE_H +#define __HEADER_ARM64_COMMON_ELFCORE_H + +typedef uint16_t Elf64_Half; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Xword; +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; + +#define EI_NIDENT 16 + +typedef struct { + unsigned char e_ident[EI_NIDENT]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +#define EI_MAG0 0 +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_ABIVERSION 8 +#define EI_PAD 9 + + +#define ELFMAG0 0x7f +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' + +#define ELFCLASS64 2 /* 64-bit object */ +#define ELFDATA2LSB 1 /* LSB */ +#define El_VERSION 1 /* defined to be the same as EV CURRENT */ +#define ELFOSABI_NONE 0 /* unspecied */ +#define El_ABIVERSION_NONE 0 /* unspecied */ +#define ET_CORE 4 /* Core file */ +#define EM_X86_64 62 /* AMD x86-64 architecture */ +#define EM_K10M 181 /* Intel K10M */ +#define EV_CURRENT 1 /* Current version */ + +typedef struct { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; + Elf64_Addr p_vaddr; + Elf64_Addr p_paddr; + Elf64_Xword p_filesz; + Elf64_Xword p_memsz; + Elf64_Xword p_align; +} Elf64_Phdr; + +#define PT_LOAD 1 +#define PT_NOTE 4 + +#define PF_X 1 /* executable bit */ +#define PF_W 2 /* writable bit */ +#define PF_R 4 /* readable bit */ + +struct note { + Elf64_Word namesz; + Elf64_Word descsz; + Elf64_Word type; + /* name char[namesz] and desc[descsz] */ +}; + +#define NT_PRSTATUS 1 +#define NT_PRFRPREG 2 +#define NT_PRPSINFO 3 +#define NT_AUXV 6 +#define NT_X86_STATE 0x202 + +#include "elfcoregpl.h" + +#endif /* !__HEADER_ARM64_COMMON_ELFCORE_H */ +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/arm64/kernel/include/elfcoregpl.h b/arch/arm64/kernel/include/elfcoregpl.h new file mode 100644 index 00000000..cd8c00a6 --- /dev/null +++ b/arch/arm64/kernel/include/elfcoregpl.h @@ -0,0 +1,98 @@ +/* elfcoregpl.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#ifndef __HEADER_ARM64_COMMON_ELFCOREGPL_H +#define __HEADER_ARM64_COMMON_ELFCOREGPL_H + +#define pid_t int + +/* From /usr/include/linux/elfcore.h of Linux */ + +#define ELF_PRARGSZ (80) + +/* From /usr/include/linux/elfcore.h fro Linux */ + +struct elf_siginfo +{ + int si_signo; + int si_code; + int si_errno; +}; + +/* From bfd/hosts/x86-64linux.h of gdb. */ + +typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t; +typedef a8_uint64_t elf_greg64_t; + +struct user_regs64_struct +{ + a8_uint64_t r15; + a8_uint64_t r14; + a8_uint64_t r13; + a8_uint64_t r12; + a8_uint64_t rbp; + a8_uint64_t rbx; + a8_uint64_t r11; + a8_uint64_t r10; + a8_uint64_t r9; + a8_uint64_t r8; + a8_uint64_t rax; + a8_uint64_t rcx; + a8_uint64_t rdx; + a8_uint64_t rsi; + a8_uint64_t rdi; + a8_uint64_t orig_rax; + a8_uint64_t rip; + a8_uint64_t cs; + a8_uint64_t eflags; + a8_uint64_t rsp; + a8_uint64_t ss; + a8_uint64_t fs_base; + a8_uint64_t gs_base; + a8_uint64_t ds; + a8_uint64_t es; + a8_uint64_t fs; + a8_uint64_t gs; +}; + +#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t)) + +typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64]; + +struct prstatus64_timeval +{ + a8_uint64_t tv_sec; + a8_uint64_t tv_usec; +}; +struct elf_prstatus64 +{ + struct elf_siginfo pr_info; + short int pr_cursig; + a8_uint64_t pr_sigpend; + a8_uint64_t pr_sighold; + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus64_timeval pr_utime; + struct prstatus64_timeval pr_stime; + struct prstatus64_timeval pr_cutime; + struct prstatus64_timeval pr_cstime; + elf_gregset64_t pr_reg; + int pr_fpvalid; +}; +struct elf_prpsinfo64 +{ + char pr_state; + char pr_sname; + char pr_zomb; + char pr_nice; + a8_uint64_t pr_flag; + unsigned int pr_uid; + unsigned int pr_gid; + int pr_pid, pr_ppid, pr_pgrp, pr_sid; + char pr_fname[16]; + char pr_psargs[ELF_PRARGSZ]; +}; + +#endif /* !__HEADER_ARM64_COMMON_ELFCOREGPL_H */ +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/arm64/kernel/include/elfnote.h b/arch/arm64/kernel/include/elfnote.h new file mode 100644 index 00000000..fdd3a475 --- /dev/null +++ b/arch/arm64/kernel/include/elfnote.h @@ -0,0 +1,60 @@ +/* elfnote.h COPYRIGHT FUJITSU LIMITED 2016 */ +/* @ref.impl include/linux/elfnote.h */ +/* + * Helper macros to generate ELF Note structures, which are put into a + * PT_NOTE segment of the final vmlinux image. These are useful for + * including name-value pairs of metadata into the kernel binary (or + * modules?) for use by external programs. + * + * Each note has three parts: a name, a type and a desc. The name is + * intended to distinguish the note's originator, so it would be a + * company, project, subsystem, etc; it must be in a suitable form for + * use in a section name. The type is an integer which is used to tag + * the data, and is considered to be within the "name" namespace (so + * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The + * "desc" field is the actual data. There are no constraints on the + * desc field's contents, though typically they're fairly small. + * + * All notes from a given NAME are put into a section named + * .note.NAME. When the kernel image is finally linked, all the notes + * are packed into a single .notes section, which is mapped into the + * PT_NOTE segment. Because notes for a given name are grouped into + * the same section, they'll all be adjacent the output file. + * + * This file defines macros for both C and assembler use. Their + * syntax is slightly different, but they're semantically similar. + * + * See the ELF specification for more detail about ELF notes. + */ +#ifndef __HEADER_ARM64_COMMON_ELFNOTE_H +#define __HEADER_ARM64_COMMON_ELFNOTE_H + +#ifdef __ASSEMBLER__ + +/* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and + * desc data with appropriate padding. The 'desctype' argument is the + * assembler pseudo op defining the type of the data e.g. .asciz while + * 'descdata' is the data itself e.g. "hello, world". + * + * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") + * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) + */ +#define ELFNOTE_START(name, type, flags) \ +.pushsection .note.name, flags,@note ; \ + .balign 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4484f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz #name ; \ +2:.balign 4 ; \ +3: + +#define ELFNOTE_END \ +4484:.balign 4 ; \ +.popsection ; + +#endif /* __ASSEMBLER__ */ + +#endif /* !__HEADER_ARM64_COMMON_ELFNOTE_H */ diff --git a/arch/arm64/kernel/include/errno.h b/arch/arm64/kernel/include/errno.h new file mode 100644 index 00000000..becea141 --- /dev/null +++ b/arch/arm64/kernel/include/errno.h @@ -0,0 +1,112 @@ +/* errno.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_COMMON_ERRNO_H +#define __HEADER_ARM64_COMMON_ERRNO_H + +#include + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + +#define ERFKILL 132 /* Operation not possible due to RF-kill */ + +#endif diff --git a/arch/arm64/kernel/include/esr.h b/arch/arm64/kernel/include/esr.h new file mode 100644 index 00000000..a705a2ee --- /dev/null +++ b/arch/arm64/kernel/include/esr.h @@ -0,0 +1,180 @@ +/* esr.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +/* + * Copyright (C) 2013 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_ESR_H +#define __ASM_ESR_H + +#include + +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) +/* Unallocated EC: 0x09 - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) +#define ESR_ELx_EC_SMC32 (0x13) +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) +#define ESR_ELx_EC_SMC64 (0x17) +#define ESR_ELx_EC_SYS64 (0x18) +#define ESR_ELx_EC_SVE (0x19) +/* Unallocated EC: 0x1A - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) +/* Unallocted EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL (UL(1) << 25) +#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR (UL(1) << 6) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_S1PTW (UL(1) << 7) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) + +/* ISS field definitions for Data Aborts */ +#define ESR_ELx_ISV (UL(1) << 24) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE (UL(1) << 21) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF (UL(1) << 15) +#define ESR_ELx_AR (UL(1) << 14) +#define ESR_ELx_CM (UL(1) << 8) + +/* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) +#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) + +/* ESR value templates for specific events */ + +/* BRK instruction trap from AArch64 state */ +#define ESR_ELx_VAL_BRK64(imm) \ + ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ + ((imm) & 0xffff)) + +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#endif /* __ASM_ESR_H */ diff --git a/arch/arm64/kernel/include/fpsimd.h b/arch/arm64/kernel/include/fpsimd.h new file mode 100644 index 00000000..bf9c14d2 --- /dev/null +++ b/arch/arm64/kernel/include/fpsimd.h @@ -0,0 +1,99 @@ +/* fpsimd.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#ifndef __HEADER_ARM64_COMMON_FPSIMD_H +#define __HEADER_ARM64_COMMON_FPSIMD_H + +#include + +#ifndef __ASSEMBLY__ + +/* + * FP/SIMD storage area has: + * - FPSR and FPCR + * - 32 128-bit data registers + * + * Note that user_fpsimd forms a prefix of this structure, which is + * relied upon in the ptrace FP/SIMD accessors. + */ +/* @ref.impl arch/arm64/include/asm/fpsimd.h::struct fpsimd_state */ +struct fpsimd_state { + union { + struct user_fpsimd_state user_fpsimd; + struct { + __uint128_t vregs[32]; + unsigned int fpsr; + unsigned int fpcr; + /* + * For ptrace compatibility, pad to next 128-bit + * boundary here if extending this struct. + */ + }; + }; + /* the id of the last cpu to have restored this state */ + unsigned int cpu; +}; + +/* need for struct process */ +typedef struct fpsimd_state fp_regs_struct; + +extern void thread_fpsimd_to_sve(struct thread *thread, fp_regs_struct *fp_regs); +extern void thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs); + +#ifdef CONFIG_ARM64_SVE + +extern size_t sve_state_size(struct thread const *thread); +extern void sve_free(struct thread *thread); +extern void sve_alloc(struct thread *thread); +extern void sve_save_state(void *state, unsigned int *pfpsr); +extern void sve_load_state(void const *state, unsigned int const *pfpsr, unsigned long vq_minus_1); +extern unsigned int sve_get_vl(void); +extern int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length, const unsigned long flags); +extern int sve_get_thread_vl(const struct thread *thread); +extern int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags); + +#define SVE_SET_VL(thread, vector_length, flags) sve_set_thread_vl(thread, vector_length, flags) +#define SVE_GET_VL(thread) sve_get_thread_vl(thread) + +#else /* CONFIG_ARM64_SVE */ + +#include +#include + +static void sve_save_state(void *state, unsigned int *pfpsr) +{ + panic("PANIC:sve_save_state() was called CONFIG_ARM64_SVE off.\n"); +} + +static void sve_load_state(void const *state, unsigned int const *pfpsr, unsigned long vq_minus_1) +{ + panic("PANIC:sve_load_state() was called CONFIG_ARM64_SVE off.\n"); +} + +static unsigned int sve_get_vl(void) +{ + panic("PANIC:sve_get_vl() was called CONFIG_ARM64_SVE off.\n"); + return (unsigned int)-1; +} + +static int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags) +{ + return -EINVAL; +} + +/* for prctl syscall */ +#define SVE_SET_VL(a,b,c) (-EINVAL) +#define SVE_GET_VL(a) (-EINVAL) + +#endif /* CONFIG_ARM64_SVE */ + +extern void init_sve_vl(void); +extern void fpsimd_save_state(struct fpsimd_state *state); +extern void fpsimd_load_state(struct fpsimd_state *state); +extern void thread_fpsimd_save(struct thread *thread); +extern void thread_fpsimd_load(struct thread *thread); + +extern int sve_max_vl; +extern int sve_default_vl; + +#endif /* !__ASSEMBLY__ */ + +#endif /* !__HEADER_ARM64_COMMON_FPSIMD_H */ diff --git a/arch/arm64/kernel/include/fpsimdmacros.h b/arch/arm64/kernel/include/fpsimdmacros.h new file mode 100644 index 00000000..bb71107c --- /dev/null +++ b/arch/arm64/kernel/include/fpsimdmacros.h @@ -0,0 +1,151 @@ +/* fpsimdmacros.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ + +.macro _check_reg nr + .if (\nr) < 0 || (\nr) > 31 + .error "Bad register number \nr." + .endif +.endm + +.macro _check_zreg znr + .if (\znr) < 0 || (\znr) > 31 + .error "Bad Scalable Vector Extension vector register number \znr." + .endif +.endm + +.macro _check_preg pnr + .if (\pnr) < 0 || (\pnr) > 15 + .error "Bad Scalable Vector Extension predicate register number \pnr." + .endif +.endm + +.macro _check_num n, min, max + .if (\n) < (\min) || (\n) > (\max) + .error "Number \n out of range [\min,\max]" + .endif +.endm + +.macro _zstrv znt, nspb, ioff=0 + _check_zreg \znt + _check_reg \nspb + _check_num (\ioff), -0x100, 0xff + .inst 0xe5804000 \ + | (\znt) \ + | ((\nspb) << 5) \ + | (((\ioff) & 7) << 10) \ + | (((\ioff) & 0x1f8) << 13) +.endm + +.macro _zldrv znt, nspb, ioff=0 + _check_zreg \znt + _check_reg \nspb + _check_num (\ioff), -0x100, 0xff + .inst 0x85804000 \ + | (\znt) \ + | ((\nspb) << 5) \ + | (((\ioff) & 7) << 10) \ + | (((\ioff) & 0x1f8) << 13) +.endm + +.macro _zstrp pnt, nspb, ioff=0 + _check_preg \pnt + _check_reg \nspb + _check_num (\ioff), -0x100, 0xff + .inst 0xe5800000 \ + | (\pnt) \ + | ((\nspb) << 5) \ + | (((\ioff) & 7) << 10) \ + | (((\ioff) & 0x1f8) << 13) +.endm + +.macro _zldrp pnt, nspb, ioff=0 + _check_preg \pnt + _check_reg \nspb + _check_num (\ioff), -0x100, 0xff + .inst 0x85800000 \ + | (\pnt) \ + | ((\nspb) << 5) \ + | (((\ioff) & 7) << 10) \ + | (((\ioff) & 0x1f8) << 13) +.endm + +.macro _zrdvl nspd, is1 + _check_reg \nspd + _check_num (\is1), -0x20, 0x1f + .inst 0x04bf5000 \ + | (\nspd) \ + | (((\is1) & 0x3f) << 5) +.endm + +.macro _zrdffr pnd + _check_preg \pnd + .inst 0x2519f000 \ + | (\pnd) +.endm + +.macro _zwrffr pnd + _check_preg \pnd + .inst 0x25289000 \ + | ((\pnd) << 5) +.endm + +.macro for from, to, insn + .if (\from) >= (\to) + \insn (\from) + .exitm + .endif + + for \from, ((\from) + (\to)) / 2, \insn + for ((\from) + (\to)) / 2 + 1, \to, \insn +.endm + +.macro sve_save nb, xpfpsr, ntmp + .macro savez n + _zstrv \n, \nb, (\n) - 34 + .endm + + .macro savep n + _zstrp \n, \nb, (\n) - 16 + .endm + + for 0, 31, savez + for 0, 15, savep + _zrdffr 0 + _zstrp 0, \nb + _zldrp 0, \nb, -16 + + mrs x\ntmp, fpsr + str w\ntmp, [\xpfpsr] + mrs x\ntmp, fpcr + str w\ntmp, [\xpfpsr, #4] + + .purgem savez + .purgem savep +.endm + +.macro sve_load nb, xpfpsr, xvqminus1 ntmp + mrs_s x\ntmp, SYS_ZCR_EL1 + bic x\ntmp, x\ntmp, ZCR_EL1_LEN_MASK + orr x\ntmp, x\ntmp, \xvqminus1 + msr_s SYS_ZCR_EL1, x\ntmp // self-synchronising + + .macro loadz n + _zldrv \n, \nb, (\n) - 34 + .endm + + .macro loadp n + _zldrp \n, \nb, (\n) - 16 + .endm + + for 0, 31, loadz + _zldrp 0, \nb + _zwrffr 0 + for 0, 15, loadp + + ldr w\ntmp, [\xpfpsr] + msr fpsr, x\ntmp + ldr w\ntmp, [\xpfpsr, #4] + msr fpcr, x\ntmp + + .purgem loadz + .purgem loadp +.endm diff --git a/arch/arm64/kernel/include/hw_breakpoint.h b/arch/arm64/kernel/include/hw_breakpoint.h new file mode 100644 index 00000000..2e8c8175 --- /dev/null +++ b/arch/arm64/kernel/include/hw_breakpoint.h @@ -0,0 +1,92 @@ +/* hw_breakpoint.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_COMMON_HW_BREAKPOINT_H +#define __HEADER_ARM64_COMMON_HW_BREAKPOINT_H + +#include + +int hw_breakpoint_slots(int type); +unsigned long read_wb_reg(int reg, int n); +void write_wb_reg(int reg, int n, unsigned long val); +void hw_breakpoint_reset(void); +void arch_hw_breakpoint_init(void); + +struct user_hwdebug_state; +int arch_validate_hwbkpt_settings(long note_type, struct user_hwdebug_state *hws, size_t len); + +extern int core_num_brps; +extern int core_num_wrps; + +/* @ref.impl include/uapi/linux/hw_breakpoint.h::HW_BREAKPOINT_LEN_n, HW_BREAKPOINT_xxx, bp_type_idx */ +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, +}; + +enum { + HW_BREAKPOINT_EMPTY = 0, + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, + HW_BREAKPOINT_X = 4, + HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, +}; + +enum bp_type_idx { + TYPE_INST = 0, + TYPE_DATA = 1, + TYPE_MAX +}; + +/* Breakpoint */ +#define ARM_BREAKPOINT_EXECUTE 0 + +/* Watchpoints */ +#define ARM_BREAKPOINT_LOAD 1 +#define ARM_BREAKPOINT_STORE 2 +#define AARCH64_ESR_ACCESS_MASK (1 << 6) + +/* Privilege Levels */ +#define AARCH64_BREAKPOINT_EL1 1 +#define AARCH64_BREAKPOINT_EL0 2 + +/* Lengths */ +#define ARM_BREAKPOINT_LEN_1 0x1 +#define ARM_BREAKPOINT_LEN_2 0x3 +#define ARM_BREAKPOINT_LEN_4 0xf +#define ARM_BREAKPOINT_LEN_8 0xff + +/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::ARM_MAX_[BRP|WRP] */ +/* + * Limits. + * Changing these will require modifications to the register accessors. + */ +#define ARM_MAX_BRP 16 +#define ARM_MAX_WRP 16 + +/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::AARCH64_DBG_REG_xxx */ +/* Virtual debug register bases. */ +#define AARCH64_DBG_REG_BVR 0 +#define AARCH64_DBG_REG_BCR (AARCH64_DBG_REG_BVR + ARM_MAX_BRP) +#define AARCH64_DBG_REG_WVR (AARCH64_DBG_REG_BCR + ARM_MAX_BRP) +#define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP) + +/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::AARCH64_DBG_REG_NAME_xxx */ +/* Debug register names. */ +#define AARCH64_DBG_REG_NAME_BVR "bvr" +#define AARCH64_DBG_REG_NAME_BCR "bcr" +#define AARCH64_DBG_REG_NAME_WVR "wvr" +#define AARCH64_DBG_REG_NAME_WCR "wcr" + +/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::AARCH64_DBG_[READ|WRITE] */ +/* Accessor macros for the debug registers. */ +#define AARCH64_DBG_READ(N, REG, VAL) do {\ + asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\ +} while (0) + +#define AARCH64_DBG_WRITE(N, REG, VAL) do {\ + asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\ +} while (0) + +#endif /* !__HEADER_ARM64_COMMON_HW_BREAKPOINT_H */ diff --git a/arch/arm64/kernel/include/hwcap.h b/arch/arm64/kernel/include/hwcap.h new file mode 100644 index 00000000..a8562a0f --- /dev/null +++ b/arch/arm64/kernel/include/hwcap.h @@ -0,0 +1,28 @@ +/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */ +#ifdef POSTK_DEBUG_ARCH_DEP_65 +#ifndef _UAPI__ASM_HWCAP_H +#define _UAPI__ASM_HWCAP_H + +/* + * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + */ +#define HWCAP_FP (1 << 0) +#define HWCAP_ASIMD (1 << 1) +#define HWCAP_EVTSTRM (1 << 2) +#define HWCAP_AES (1 << 3) +#define HWCAP_PMULL (1 << 4) +#define HWCAP_SHA1 (1 << 5) +#define HWCAP_SHA2 (1 << 6) +#define HWCAP_CRC32 (1 << 7) +#define HWCAP_ATOMICS (1 << 8) +#define HWCAP_FPHP (1 << 9) +#define HWCAP_ASIMDHP (1 << 10) +#define HWCAP_CPUID (1 << 11) +#define HWCAP_ASIMDRDM (1 << 12) +#define HWCAP_SVE (1 << 13) + +unsigned long arch_get_hwcap(void); +extern unsigned long elf_hwcap; + +#endif /* _UAPI__ASM_HWCAP_H */ +#endif /* POSTK_DEBUG_ARCH_DEP_65 */ diff --git a/arch/arm64/kernel/include/ihk/atomic.h b/arch/arm64/kernel/include/ihk/atomic.h new file mode 100644 index 00000000..ed3a8032 --- /dev/null +++ b/arch/arm64/kernel/include/ihk/atomic.h @@ -0,0 +1,363 @@ +/* atomic.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_IHK_ATOMIC_H +#define __HEADER_ARM64_IHK_ATOMIC_H + +#include + +/*********************************************************************** + * ihk_atomic_t + */ + +typedef struct { + int counter; +} ihk_atomic_t; + +#define IHK_ATOMIC_INIT(i) { (i) } + +static inline int ihk_atomic_read(const ihk_atomic_t *v) +{ + return (*(volatile int *)&(v)->counter); +} + +static inline void ihk_atomic_set(ihk_atomic_t *v, int i) +{ + v->counter = i; +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_add (atomic_##op) */ +static inline void ihk_atomic_add(int i, ihk_atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_add\n" +"1: ldxr %w0, %2\n" +" add %w0, %w0, %w3\n" +" stxr %w1, %w0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : "Ir" (i)); +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_sub (atomic_##op) */ +static inline void ihk_atomic_sub(int i, ihk_atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_sub\n" +"1: ldxr %w0, %2\n" +" sub %w0, %w0, %w3\n" +" stxr %w1, %w0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : "Ir" (i)); +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_inc */ +#define ihk_atomic_inc(v) ihk_atomic_add(1, v) + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_dec */ +#define ihk_atomic_dec(v) ihk_atomic_sub(1, v) + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_add_return (atomic_##op##_return) */ +static inline int ihk_atomic_add_return(int i, ihk_atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_add_return\n" +"1: ldxr %w0, %2\n" +" add %w0, %w0, %w3\n" +" stlxr %w1, %w0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : "Ir" (i) + : "memory"); + + smp_mb(); + return result; +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_sub_return (atomic_##op##_return) */ +static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v) +{ + unsigned long tmp; + int result; + + asm volatile("// atomic_sub_return\n" +"1: ldxr %w0, %2\n" +" sub %w0, %w0, %w3\n" +" stlxr %w1, %w0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : "Ir" (i) + : "memory"); + + smp_mb(); + return result; +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_inc_and_test */ +#define ihk_atomic_inc_and_test(v) (ihk_atomic_add_return(1, v) == 0) + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_dec_and_test */ +#define ihk_atomic_dec_and_test(v) (ihk_atomic_sub_return(1, v) == 0) + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_inc_return */ +#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v)) + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_dec_return */ +#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v)) + +/*********************************************************************** + * ihk_atomic64_t + */ +typedef struct { + long counter64; +} ihk_atomic64_t; + +#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) } + +static inline long ihk_atomic64_read(const ihk_atomic64_t *v) +{ + return *(volatile long *)&(v)->counter64; +} + +static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i) +{ + v->counter64 = i; +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic64_add (atomic64_##op) */ +static inline void ihk_atomic64_add(long i, ihk_atomic64_t *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_add\n" +"1: ldxr %0, %2\n" +" add %0, %0, %3\n" +" stxr %w1, %0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter64) + : "Ir" (i)); +} + +/* @ref.impl arch/arm64/include/asm/atomic.h::atomic64_inc */ +#define ihk_atomic64_inc(v) ihk_atomic64_add(1LL, (v)) + +/*********************************************************************** + * others + */ +/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__xchg */ +static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) +{ + unsigned long ret = 0, tmp; + + switch (size) { + case 1: + asm volatile("// __xchg1\n" + "1: ldxrb %w0, %2\n" + " stlxrb %w1, %w3, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned char *)ptr) + : "r" (x) + : "memory"); + break; + case 2: + asm volatile("// __xchg2\n" + "1: ldxrh %w0, %2\n" + " stlxrh %w1, %w3, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned short *)ptr) + : "r" (x) + : "memory"); + break; + case 4: + asm volatile("// __xchg4\n" + "1: ldxr %w0, %2\n" + " stlxr %w1, %w3, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned int *)ptr) + : "r" (x) + : "memory"); + break; + case 8: + asm volatile("// __xchg8\n" + "1: ldxr %0, %2\n" + " stlxr %w1, %3, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr) + : "r" (x) + : "memory"); + break; +/* + default: + BUILD_BUG(); +*/ + } + + smp_mb(); + return ret; +} + +/* @ref.impl arch/arm64/include/asm/cmpxchg.h::xchg */ +#define xchg(ptr,x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) + +#define xchg4(ptr, x) xchg(ptr,x) +#define xchg8(ptr, x) xchg(ptr,x) + +/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__cmpxchg */ +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long oldval = 0, res; + + switch (size) { + case 1: + do { + asm volatile("// __cmpxchg1\n" + " ldxrb %w1, %2\n" + " mov %w0, #0\n" + " cmp %w1, %w3\n" + " b.ne 1f\n" + " stxrb %w0, %w4, %2\n" + "1:\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned char *)ptr) + : "Ir" (old), "r" (new) : "cc"); + } while (res); + break; + + case 2: + do { + asm volatile("// __cmpxchg2\n" + " ldxrh %w1, %2\n" + " mov %w0, #0\n" + " cmp %w1, %w3\n" + " b.ne 1f\n" + " stxrh %w0, %w4, %2\n" + "1:\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned short *)ptr) + : "Ir" (old), "r" (new) + : "cc"); + } while (res); + break; + + case 4: + do { + asm volatile("// __cmpxchg4\n" + " ldxr %w1, %2\n" + " mov %w0, #0\n" + " cmp %w1, %w3\n" + " b.ne 1f\n" + " stxr %w0, %w4, %2\n" + "1:\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned int *)ptr) + : "Ir" (old), "r" (new) + : "cc"); + } while (res); + break; + + case 8: + do { + asm volatile("// __cmpxchg8\n" + " ldxr %1, %2\n" + " mov %w0, #0\n" + " cmp %1, %3\n" + " b.ne 1f\n" + " stxr %w0, %4, %2\n" + "1:\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned long *)ptr) + : "Ir" (old), "r" (new) + : "cc"); + } while (res); + break; +/* + default: + BUILD_BUG(); +*/ + } + + return oldval; +} + +/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__cmpxchg_mb */ +static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long ret; + + smp_mb(); + ret = __cmpxchg(ptr, old, new, size); + smp_mb(); + + return ret; +} + +/* @ref.impl arch/arm64/include/asm/cmpxchg.h::cmpxchg */ +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) + +#define atomic_cmpxchg4(ptr, o, n) cmpxchg(ptr,o,n) +#define atomic_cmpxchg8(ptr, o, n) cmpxchg(ptr,o,n) + +static inline void ihk_atomic_add_long(long i, long *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_add\n" +"1: ldxr %0, %2\n" +" add %0, %0, %3\n" +" stxr %w1, %0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (*v) + : "Ir" (i)); +} + +static inline void ihk_atomic_add_ulong(long i, unsigned long *v) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_add\n" +"1: ldxr %0, %2\n" +" add %0, %0, %3\n" +" stxr %w1, %0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (*v) + : "Ir" (i)); +} + +static inline unsigned long ihk_atomic_add_long_return(long i, long *v) +{ + unsigned long result; + unsigned long tmp; + + asm volatile("// atomic64_add_return\n" +"1: ldxr %0, %2\n" +" add %0, %0, %3\n" +" stlxr %w1, %0, %2\n" +" cbnz %w1, 1b" + : "=&r" (result), "=&r" (tmp), "+Q" (*v) + : "Ir" (i) + : "memory"); + + smp_mb(); + return result; +} + +#endif /* !__HEADER_ARM64_COMMON_IHK_ATOMIC_H */ diff --git a/arch/arm64/kernel/include/ihk/context.h b/arch/arm64/kernel/include/ihk/context.h new file mode 100644 index 00000000..cf21cd44 --- /dev/null +++ b/arch/arm64/kernel/include/ihk/context.h @@ -0,0 +1,80 @@ +/* context.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_IHK_CONTEXT_H +#define __HEADER_ARM64_IHK_CONTEXT_H + +#include + +struct thread_info; +typedef struct { + struct thread_info *thread; +} ihk_mc_kernel_context_t; + +struct user_pt_regs { + unsigned long regs[31]; + unsigned long sp; + unsigned long pc; + unsigned long pstate; +}; + +struct pt_regs { + union { + struct user_pt_regs user_regs; + struct { + unsigned long regs[31]; + unsigned long sp; + unsigned long pc; + unsigned long pstate; + }; + }; + unsigned long orig_x0; + unsigned long syscallno; +}; + +typedef struct pt_regs ihk_mc_user_context_t; + +/* @ref.impl arch/arm64/include/asm/ptrace.h */ +#define GET_IP(regs) ((unsigned long)(regs)->pc) +#define SET_IP(regs, value) ((regs)->pc = ((uint64_t) (value))) + +/* @ref.impl arch/arm64/include/asm/ptrace.h */ +/* AArch32 CPSR bits */ +#define COMPAT_PSR_MODE_MASK 0x0000001f + +/* @ref.impl include/asm-generic/ptrace.h */ +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return GET_IP(regs); +} +/* @ref.impl include/asm-generic/ptrace.h */ +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + SET_IP(regs, val); +} + +/* @ref.impl arch/arm64/include/asm/ptrace.h */ +/* + * Write a register given an architectural register index r. + * This handles the common case where 31 means XZR, not SP. + */ +static inline void pt_regs_write_reg(struct pt_regs *regs, int r, + unsigned long val) +{ + if (r != 31) + regs->regs[r] = val; +} + +/* temp */ +#define ihk_mc_syscall_arg0(uc) (uc)->regs[0] +#define ihk_mc_syscall_arg1(uc) (uc)->regs[1] +#define ihk_mc_syscall_arg2(uc) (uc)->regs[2] +#define ihk_mc_syscall_arg3(uc) (uc)->regs[3] +#define ihk_mc_syscall_arg4(uc) (uc)->regs[4] +#define ihk_mc_syscall_arg5(uc) (uc)->regs[5] + +#define ihk_mc_syscall_ret(uc) (uc)->regs[0] + +#define ihk_mc_syscall_pc(uc) (uc)->pc +#define ihk_mc_syscall_sp(uc) (uc)->sp + +#endif /* !__HEADER_ARM64_IHK_CONTEXT_H */ diff --git a/arch/arm64/kernel/include/ihk/ikc.h b/arch/arm64/kernel/include/ihk/ikc.h new file mode 100644 index 00000000..fb64ade9 --- /dev/null +++ b/arch/arm64/kernel/include/ihk/ikc.h @@ -0,0 +1,14 @@ +/* ikc.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_IHK_IKC_H +#define __HEADER_ARM64_IHK_IKC_H + +#include + +#define IKC_PORT_IKC2MCKERNEL 501 +#define IKC_PORT_IKC2LINUX 503 + +/* manycore side */ +int ihk_mc_ikc_init_first(struct ihk_ikc_channel_desc *, + ihk_ikc_ph_t handler); + +#endif /* !__HEADER_ARM64_IHK_IKC_H */ diff --git a/arch/arm64/kernel/include/ihk/types.h b/arch/arm64/kernel/include/ihk/types.h new file mode 100644 index 00000000..5bd93996 --- /dev/null +++ b/arch/arm64/kernel/include/ihk/types.h @@ -0,0 +1,35 @@ +/* types.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_IHK_TYPES_H +#define __HEADER_ARM64_IHK_TYPES_H + +#ifndef __ASSEMBLY__ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef signed long long int64_t; + +typedef int64_t ptrdiff_t; +typedef int64_t intptr_t; +typedef uint64_t uintptr_t; +typedef uint64_t size_t; +typedef int64_t ssize_t; +typedef int64_t off_t; + +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +typedef int32_t key_t; +typedef uint32_t uid_t; +typedef uint32_t gid_t; +typedef int64_t time_t; +typedef int32_t pid_t; +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ + +#endif /* __ASSEMBLY__ */ + +#define NULL ((void *)0) + +#endif /* !__HEADER_ARM64_IHK_TYPES_H */ diff --git a/arch/arm64/kernel/include/io.h b/arch/arm64/kernel/include/io.h new file mode 100644 index 00000000..3150f284 --- /dev/null +++ b/arch/arm64/kernel/include/io.h @@ -0,0 +1,99 @@ +/* io.h COPYRIGHT FUJITSU LIMITED 2015 */ +/* + * Based on arch/arm/include/asm/io.h + * + * Copyright (C) 1996-2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_IO_H +#define __ASM_IO_H + +#include + +#ifdef __KERNEL__ + +/* + * Generic IO read/write. These perform native-endian accesses. + */ +static inline void __raw_writeb(uint8_t val, volatile void *addr) +{ + asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline void __raw_writew(uint16_t val, volatile void *addr) +{ + asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline void __raw_writel(uint32_t val, volatile void *addr) +{ + asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline void __raw_writeq(uint64_t val, volatile void *addr) +{ + asm volatile("str %0, [%1]" : : "r" (val), "r" (addr)); +} + +static inline uint8_t __raw_readb(const volatile void *addr) +{ + uint8_t val; + asm volatile("ldarb %w0, [%1]" + : "=r" (val) : "r" (addr)); + return val; +} + +static inline uint16_t __raw_readw(const volatile void *addr) +{ + uint16_t val; + + asm volatile("ldarh %w0, [%1]" + : "=r" (val) : "r" (addr)); + return val; +} + +static inline uint32_t __raw_readl(const volatile void *addr) +{ + uint32_t val; + asm volatile("ldar %w0, [%1]" + : "=r" (val) : "r" (addr)); + return val; +} + +static inline uint64_t __raw_readq(const volatile void *addr) +{ + uint64_t val; + asm volatile("ldar %0, [%1]" + : "=r" (val) : "r" (addr)); + return val; +} + +/* + * Relaxed I/O memory access primitives. These follow the Device memory + * ordering rules but do not guarantee any ordering relative to Normal memory + * accesses. + */ +#define readb_relaxed(c) ({ uint8_t __v = (uint8_t)__raw_readb(c); __v; }) +#define readw_relaxed(c) ({ uint16_t __v = (uint16_t)__raw_readw(c); __v; }) +#define readl_relaxed(c) ({ uint32_t __v = (uint32_t)__raw_readl(c); __v; }) +#define readq_relaxed(c) ({ uint64_t __v = (uint64_t)__raw_readq(c); __v; }) + +#define writeb_relaxed(v,c) ((void)__raw_writeb((uint8_t)(v),(c))) +#define writew_relaxed(v,c) ((void)__raw_writew((uint16_t)(v),(c))) +#define writel_relaxed(v,c) ((void)__raw_writel((uint32_t)(v),(c))) +#define writeq_relaxed(v,c) ((void)__raw_writeq((uint64_t)(v),(c))) + +#endif /* __KERNEL__ */ +#endif /* __ASM_IO_H */ diff --git a/arch/arm64/kernel/include/irq.h b/arch/arm64/kernel/include/irq.h new file mode 100644 index 00000000..91fd6ffd --- /dev/null +++ b/arch/arm64/kernel/include/irq.h @@ -0,0 +1,70 @@ +/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#ifndef __HEADER_ARM64_IRQ_H +#define __HEADER_ARM64_IRQ_H + +#include +#include +#include +#include + +/* use SGI interrupt number */ +#define INTRID_CPU_NOTIFY 0 +#define INTRID_IKC 1 +#define INTRID_QUERY_FREE_MEM 2 +#define INTRID_CPU_STOP 3 +#define INTRID_TLB_FLUSH 4 +#define INTRID_STACK_TRACE 6 +#define INTRID_MEMDUMP 7 + +/* use PPI interrupt number */ +#define INTRID_HYP_PHYS_TIMER 26 /* cnthp */ +#define INTRID_VIRT_TIMER 27 /* cntv */ +#define INTRID_HYP_VIRT_TIMER 28 /* cnthv */ +#define INTRID_PHYS_TIMER 30 /* cntp */ + +/* timer intrid getter */ +static int get_virt_timer_intrid(void) +{ +#ifdef CONFIG_ARM64_VHE + unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1); + + if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) { + return INTRID_HYP_VIRT_TIMER; + } +#endif /* CONFIG_ARM64_VHE */ + return INTRID_VIRT_TIMER; +} + +static int get_phys_timer_intrid(void) +{ +#ifdef CONFIG_ARM64_VHE + unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1); + + if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) { + return INTRID_HYP_PHYS_TIMER; + } +#endif /* CONFIG_ARM64_VHE */ + return INTRID_PHYS_TIMER; +} + +/* use timer checker */ +extern unsigned long is_use_virt_timer(void); + +/* Functions for GICv2 */ +extern void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size); +extern void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size); +extern void gic_enable_gicv2(void); +extern void arm64_issue_ipi_gicv2(unsigned int cpuid, unsigned int vector); +extern void handle_interrupt_gicv2(struct pt_regs *regs); + +/* Functions for GICv3 */ +extern void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size); +extern void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size); +extern void gic_enable_gicv3(void); +extern void arm64_issue_ipi_gicv3(unsigned int cpuid, unsigned int vector); +extern void handle_interrupt_gicv3(struct pt_regs *regs); + +void handle_IPI(unsigned int vector, struct pt_regs *regs); + +#endif /* __HEADER_ARM64_IRQ_H */ diff --git a/arch/arm64/kernel/include/irqflags.h b/arch/arm64/kernel/include/irqflags.h new file mode 100644 index 00000000..b482d4fe --- /dev/null +++ b/arch/arm64/kernel/include/irqflags.h @@ -0,0 +1,31 @@ +/* irqflags.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_IRQFLAGS_H +#define __HEADER_ARM64_COMMON_IRQFLAGS_H + +#include + +/* + * save and restore debug state + */ +static inline unsigned long local_dbg_save(void) +{ + unsigned long flags; + asm volatile( + "mrs %0, daif // local_dbg_save\n" + "msr daifset, #8" + : "=r" (flags) + : + : "memory"); + return flags; +} + +static inline void local_dbg_restore(unsigned long flags) +{ + asm volatile( + "msr daif, %0 // local_dbg_restore" + : + : "r" (flags) + : "memory"); +} + +#endif /* !__HEADER_ARM64_COMMON_IRQFLAGS_H */ diff --git a/arch/arm64/kernel/include/linkage.h b/arch/arm64/kernel/include/linkage.h new file mode 100644 index 00000000..f0858667 --- /dev/null +++ b/arch/arm64/kernel/include/linkage.h @@ -0,0 +1,25 @@ +/* linkage.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_COMMON_LINKAGE_H +#define __HEADER_ARM64_COMMON_LINKAGE_H + +#include +#include + +#define ASM_NL ; + +#define __ALIGN .align 4 +#define __ALIGN_STR ".align 4" + +#define ENTRY(name) \ + .globl name ASM_NL \ + __ALIGN ASM_NL \ + name: + +#define END(name) \ + .size name, .-name + +#define ENDPROC(name) \ + .type name, @function ASM_NL \ + END(name) + +#endif /* !__HEADER_ARM64_COMMON_LINKAGE_H */ diff --git a/arch/arm64/kernel/include/mmu_context.h b/arch/arm64/kernel/include/mmu_context.h new file mode 100644 index 00000000..5db58e8f --- /dev/null +++ b/arch/arm64/kernel/include/mmu_context.h @@ -0,0 +1,22 @@ +/* mmu_context.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_MMU_CONTEXT_H +#define __HEADER_ARM64_COMMON_MMU_CONTEXT_H + +#include +#include + +/* + * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0. + */ +static inline void cpu_set_reserved_ttbr0(void) +{ + unsigned long ttbr = virt_to_phys(empty_zero_page); + + asm( + " msr ttbr0_el1, %0 // set TTBR0\n" + " isb" + : + : "r" (ttbr)); +} + +#endif /* !__HEADER_ARM64_COMMON_MMU_CONTEXT_H */ diff --git a/arch/arm64/kernel/include/pgtable-hwdef.h b/arch/arm64/kernel/include/pgtable-hwdef.h new file mode 100644 index 00000000..599ce6ef --- /dev/null +++ b/arch/arm64/kernel/include/pgtable-hwdef.h @@ -0,0 +1,196 @@ +/* pgtable-hwdef.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_PGTABLE_HWDEF_H +#define __HEADER_ARM64_COMMON_PGTABLE_HWDEF_H + +#ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H +# error arch-memory.h +#endif + +#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) + +/* + * PMD_SHIFT determines the size a level 2 page table entry can map. + */ +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +# define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) +# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_MASK (~(PMD_SIZE-1)) +# define PTRS_PER_PMD PTRS_PER_PTE +#endif + + /* + * PUD_SHIFT determines the size a level 1 page table entry can map. + */ +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +# define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) +# define PUD_SIZE (1UL << PUD_SHIFT) +# define PUD_MASK (~(PUD_SIZE-1)) +# define PTRS_PER_PUD PTRS_PER_PTE +#endif + +/* + * PGDIR_SHIFT determines the size a top-level page table entry can map + * (depending on the configuration, this level can be 0, 1 or 2). + */ +#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) + +/* + * Section address mask and size definitions. + */ +#define SECTION_SHIFT PMD_SHIFT +#define SECTION_SIZE (UL(1) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + +/* + * Level 2 descriptor (PMD). + */ +#define PMD_TYPE_MASK (UL(3) << 0) +#define PMD_TYPE_FAULT (UL(0) << 0) +#define PMD_TYPE_TABLE (UL(3) << 0) +#define PMD_TYPE_SECT (UL(1) << 0) +#define PMD_TABLE_BIT (UL(1) << 1) + +/* + * Table (D_Block) + */ +#define PMD_TBL_PXNT (UL(1) << 59) +#define PMD_TBL_UXNT (UL(1) << 60) +#define PMD_TBL_APT_USER (UL(1) << 61) /* 0:Access at EL0 permitted, 1:Access at EL0 not permitted */ +#define PMD_TBL_APT_RDONLY (UL(2) << 61) /* 0:read write(EL0-3) 0:read only(EL0-3) */ +#define PMD_TBL_NST (UL(1) << 63) /* 0:secure, 1:non-secure */ + +/* + * Section (D_Page) + */ +#define PMD_SECT_VALID (UL(1) << 0) +#define PMD_SECT_PROT_NONE (UL(1) << 58) +#define PMD_SECT_USER (UL(1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (UL(1) << 7) /* AP[2] */ +#define PMD_SECT_S (UL(3) << 8) +#define PMD_SECT_AF (UL(1) << 10) +#define PMD_SECT_NG (UL(1) << 11) +#define PMD_SECT_PXN (UL(1) << 53) +#define PMD_SECT_UXN (UL(1) << 54) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PMD_ATTRINDX(t) (UL(t) << 2) +#define PMD_ATTRINDX_MASK (UL(7) << 2) + +/* + * Level 3 descriptor (PTE). + */ +#define PTE_TYPE_MASK (UL(3) << 0) +#define PTE_TYPE_FAULT (UL(0) << 0) +#define PTE_TYPE_PAGE (UL(3) << 0) +#define PTE_TABLE_BIT (UL(1) << 1) +#define PTE_USER (UL(1) << 6) /* AP[1] */ +#define PTE_RDONLY (UL(1) << 7) /* AP[2] */ +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF (UL(1) << 10) /* Access Flag */ +#define PTE_NG (UL(1) << 11) /* nG */ +#define PTE_PXN (UL(1) << 53) /* Privileged XN */ +#define PTE_UXN (UL(1) << 54) /* User XN */ +/* Software defined PTE bits definition.*/ +#define PTE_VALID (UL(1) << 0) +#define PTE_FILE (UL(1) << 2) /* only when !pte_present() */ +#define PTE_DIRTY (UL(1) << 55) +#define PTE_SPECIAL (UL(1) << 56) +#define PTE_WRITE (UL(1) << 57) +#define PTE_PROT_NONE (UL(1) << 58) /* only when !PTE_VALID */ + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) (UL(t) << 2) +#define PTE_ATTRINDX_MASK (UL(7) << 2) + +/* + * Highest possible physical address supported. + */ +#define PHYS_MASK_SHIFT (48) +#define PHYS_MASK (((UL(1) << PHYS_MASK_SHIFT) - 1) & PAGE_MASK) + +/* + * TCR flags. + */ +#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0)) +#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) +#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) +#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) +#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24)) +#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24)) +#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26)) +#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26)) +#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26)) +#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) +#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) +#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) +#define TCR_TG0_4K (UL(0) << 14) +#define TCR_TG0_64K (UL(1) << 14) +#define TCR_TG0_16K (UL(2) << 14) +#define TCR_TG1_16K (UL(1) << 30) +#define TCR_TG1_4K (UL(2) << 30) +#define TCR_TG1_64K (UL(3) << 30) +#define TCR_ASID16 (UL(1) << 36) +#define TCR_TBI0 (UL(1) << 37) + +/* + * Memory types available. + */ +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 + +/* + * page table entry attribute set. + */ +#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define PAGE_KERNEL (_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC (_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) + +#define PAGE_NONE (((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) + +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +#endif /* !__HEADER_ARM64_COMMON_PGTABLE_HWDEF_H */ diff --git a/arch/arm64/kernel/include/pgtable.h b/arch/arm64/kernel/include/pgtable.h new file mode 100644 index 00000000..2f02cef2 --- /dev/null +++ b/arch/arm64/kernel/include/pgtable.h @@ -0,0 +1,7 @@ +/* pgtable.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_PGTABLE_H +#define __HEADER_ARM64_COMMON_PGTABLE_H + +extern char empty_zero_page[]; + +#endif /* !__HEADER_ARM64_COMMON_PGTABLE_H */ diff --git a/arch/arm64/kernel/include/prctl.h b/arch/arm64/kernel/include/prctl.h new file mode 100644 index 00000000..d345e38e --- /dev/null +++ b/arch/arm64/kernel/include/prctl.h @@ -0,0 +1,17 @@ +/* prctl.h COPYRIGHT FUJITSU LIMITED 2017 */ +#ifndef __HEADER_ARM64_COMMON_PRCTL_H +#define __HEADER_ARM64_COMMON_PRCTL_H + +/* arm64 Scalable Vector Extension controls */ +#define PR_SVE_SET_VL 48 /* set task vector length */ +#define PR_SVE_SET_VL_THREAD (1 << 1) /* set just this thread */ +#define PR_SVE_SET_VL_INHERIT (1 << 2) /* inherit across exec */ +#define PR_SVE_SET_VL_ONEXEC (1 << 3) /* defer effect until exec */ + +#define PR_SVE_GET_VL 49 /* get task vector length */ +/* Decode helpers for the return value from PR_SVE_GET_VL: */ +#define PR_SVE_GET_VL_LEN(ret) ((ret) & 0x3fff) /* vector length */ +#define PR_SVE_GET_VL_INHERIT (PR_SVE_SET_VL_INHERIT << 16) +/* For conveinence, PR_SVE_SET_VL returns the result in the same encoding */ + +#endif /* !__HEADER_ARM64_COMMON_PRCTL_H */ diff --git a/arch/arm64/kernel/include/psci.h b/arch/arm64/kernel/include/psci.h new file mode 100644 index 00000000..c6f0e130 --- /dev/null +++ b/arch/arm64/kernel/include/psci.h @@ -0,0 +1,68 @@ +/* psci.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +/* @ref.impl include/uapi/linux/psci.h */ +/* + * ARM Power State and Coordination Interface (PSCI) header + * + * This header holds common PSCI defines and macros shared + * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space. + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Anup Patel + */ + +#ifndef __HEADER_ARM64_PSCI_H +#define __HEADER_ARM64_PSCI_H + +/* + * PSCI v0.1 interface + * + * The PSCI v0.1 function numbers are implementation defined. + * + * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED, + * INVALID_PARAMS, and DENIED defined below are applicable + * to PSCI v0.1. + */ + +/* PSCI v0.2 interface */ +#define PSCI_0_2_FN_BASE 0x84000000 +#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n)) +#define PSCI_0_2_64BIT 0x40000000 +#define PSCI_0_2_FN64_BASE (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT) +#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n)) + +#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0) +#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2) +#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3) +#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4) + +/* PSCI v0.2 power state encoding for CPU_SUSPEND function */ +#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +#define PSCI_0_2_POWER_STATE_ID_SHIFT 0 +#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_0_2_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24 +#define PSCI_0_2_POWER_STATE_AFFL_MASK \ + (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + +/* PSCI version decoding (independent of PSCI version) */ +#define PSCI_VERSION_MAJOR_SHIFT 16 +#define PSCI_VERSION_MINOR_MASK \ + ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1) +#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK +#define PSCI_VERSION_MAJOR(ver) \ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) +#define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) + +/* PSCI return values (inclusive of all PSCI versions) */ +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_NOT_SUPPORTED -1 +#define PSCI_RET_INVALID_PARAMS -2 +#define PSCI_RET_DENIED -3 + +int psci_init(void); +int psci_cpu_off(void); +int cpu_psci_cpu_boot(unsigned int cpu, unsigned long pc); + +#endif /* __HEADER_ARM64_PSCI_H */ diff --git a/arch/arm64/kernel/include/ptrace.h b/arch/arm64/kernel/include/ptrace.h new file mode 100644 index 00000000..ba033fd8 --- /dev/null +++ b/arch/arm64/kernel/include/ptrace.h @@ -0,0 +1,198 @@ +/* ptrace.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_PTRACE_H +#define __HEADER_ARM64_COMMON_PTRACE_H + +/* + * PSR bits + */ +#define PSR_MODE_EL0t 0x00000000 +#define PSR_MODE_EL1t 0x00000004 +#define PSR_MODE_EL1h 0x00000005 +#define PSR_MODE_EL2t 0x00000008 +#define PSR_MODE_EL2h 0x00000009 +#define PSR_MODE_EL3t 0x0000000c +#define PSR_MODE_EL3h 0x0000000d +#define PSR_MODE_MASK 0x0000000f + +/* AArch32 CPSR bits */ +#define PSR_MODE32_BIT 0x00000010 + +/* AArch64 SPSR bits */ +#define PSR_F_BIT 0x00000040 +#define PSR_I_BIT 0x00000080 +#define PSR_A_BIT 0x00000100 +#define PSR_D_BIT 0x00000200 +#define PSR_Q_BIT 0x08000000 +#define PSR_V_BIT 0x10000000 +#define PSR_C_BIT 0x20000000 +#define PSR_Z_BIT 0x40000000 +#define PSR_N_BIT 0x80000000 + +/* + * Groups of PSR bits + */ +#define PSR_f 0xff000000 /* Flags */ +#define PSR_s 0x00ff0000 /* Status */ +#define PSR_x 0x0000ff00 /* Extension */ +#define PSR_c 0x000000ff /* Control */ + +/* Current Exception Level values, as contained in CurrentEL */ +#define CurrentEL_EL1 (1 << 2) +#define CurrentEL_EL2 (2 << 2) + +/* thread->ptrace_debugreg lower-area and higher-area */ +#define HWS_BREAK 0 +#define HWS_WATCH 1 + +#ifndef __ASSEMBLY__ + +#include + +struct user_hwdebug_state { + uint32_t dbg_info; + uint32_t pad; + struct { + uint64_t addr; + uint32_t ctrl; + uint32_t pad; + } dbg_regs[16]; +}; + +struct user_fpsimd_state { + __uint128_t vregs[32]; + uint32_t fpsr; + uint32_t fpcr; + uint32_t __reserved[2]; +}; + +extern unsigned int ptrace_hbp_get_resource_info(unsigned int note_type); + +/* SVE/FP/SIMD state (NT_ARM_SVE) */ + +struct user_sve_header { + uint32_t size; /* total meaningful regset content in bytes */ + uint32_t max_size; /* maxmium possible size for this thread */ + uint16_t vl; /* current vector length */ + uint16_t max_vl; /* maximum possible vector length */ + uint16_t flags; + uint16_t __reserved; +}; + +/* Definitions for user_sve_header.flags: */ +#define SVE_PT_REGS_MASK (1 << 0) + +#define SVE_PT_REGS_FPSIMD 0 +#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK + +#define SVE_PT_VL_THREAD PR_SVE_SET_VL_THREAD +#define SVE_PT_VL_INHERIT PR_SVE_SET_VL_INHERIT +#define SVE_PT_VL_ONEXEC PR_SVE_SET_VL_ONEXEC + +/* + * The remainder of the SVE state follows struct user_sve_header. The + * total size of the SVE state (including header) depends on the + * metadata in the header: SVE_PT_SIZE(vq, flags) gives the total size + * of the state in bytes, including the header. + * + * Refer to for details of how to pass the correct + * "vq" argument to these macros. + */ + +/* Offset from the start of struct user_sve_header to the register data */ +#define SVE_PT_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16) + +/* + * The register data content and layout depends on the value of the + * flags field. + */ + +/* + * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case: + * + * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type + * struct user_fpsimd_state. Additional data might be appended in the + * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size. + * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than + * sizeof(struct user_fpsimd_state). + */ + +#define SVE_PT_FPSIMD_OFFSET SVE_PT_REGS_OFFSET + +#define SVE_PT_FPSIMD_SIZE(vq, flags) (sizeof(struct user_fpsimd_state)) + +/* + * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case: + * + * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size + * SVE_PT_SVE_SIZE(vq, flags). + * + * Additional macros describe the contents and layout of the payload. + * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to + * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is + * the size in bytes: + * + * x type description + * - ---- ----------- + * ZREGS \ + * ZREG | + * PREGS | refer to + * PREG | + * FFR / + * + * FPSR uint32_t FPSR + * FPCR uint32_t FPCR + * + * Additional data might be appended in the future. + */ + +#define SVE_PT_SVE_ZREG_SIZE(vq) SVE_SIG_ZREG_SIZE(vq) +#define SVE_PT_SVE_PREG_SIZE(vq) SVE_SIG_PREG_SIZE(vq) +#define SVE_PT_SVE_FFR_SIZE(vq) SVE_SIG_FFR_SIZE(vq) +#define SVE_PT_SVE_FPSR_SIZE sizeof(uint32_t) +#define SVE_PT_SVE_FPCR_SIZE sizeof(uint32_t) + +#define __SVE_SIG_TO_PT(offset) \ + ((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET) + +#define SVE_PT_SVE_OFFSET SVE_PT_REGS_OFFSET + +#define SVE_PT_SVE_ZREGS_OFFSET \ + __SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET) +#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \ + __SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n)) +#define SVE_PT_SVE_ZREGS_SIZE(vq) \ + (SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET) + +#define SVE_PT_SVE_PREGS_OFFSET(vq) \ + __SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq)) +#define SVE_PT_SVE_PREG_OFFSET(vq, n) \ + __SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n)) +#define SVE_PT_SVE_PREGS_SIZE(vq) \ + (SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \ + SVE_PT_SVE_PREGS_OFFSET(vq)) + +#define SVE_PT_SVE_FFR_OFFSET(vq) \ + __SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq)) + +#define SVE_PT_SVE_FPSR_OFFSET(vq) \ + ((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + 15) / 16 * 16) +#define SVE_PT_SVE_FPCR_OFFSET(vq) \ + (SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE) + +/* + * Any future extension appended after FPCR must be aligned to the next + * 128-bit boundary. + */ + +#define SVE_PT_SVE_SIZE(vq, flags) \ + ((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE - \ + SVE_PT_SVE_OFFSET + 15) / 16 * 16) + +#define SVE_PT_SIZE(vq, flags) \ + (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \ + SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \ + : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags)) + +#endif /* !__ASSEMBLY__ */ + +#endif /* !__HEADER_ARM64_COMMON_PTRACE_H */ diff --git a/arch/arm64/kernel/include/registers.h b/arch/arm64/kernel/include/registers.h new file mode 100644 index 00000000..805768e2 --- /dev/null +++ b/arch/arm64/kernel/include/registers.h @@ -0,0 +1,129 @@ +/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifndef __HEADER_ARM64_COMMON_REGISTERS_H +#define __HEADER_ARM64_COMMON_REGISTERS_H + +#include +#include + +#define RFLAGS_CF (1 << 0) +#define RFLAGS_PF (1 << 2) +#define RFLAGS_AF (1 << 4) +#define RFLAGS_ZF (1 << 6) +#define RFLAGS_SF (1 << 7) +#define RFLAGS_TF (1 << 8) +#define RFLAGS_IF (1 << 9) +#define RFLAGS_DF (1 << 10) +#define RFLAGS_OF (1 << 11) +#define RFLAGS_IOPL (3 << 12) +#define RFLAGS_NT (1 << 14) +#define RFLAGS_RF (1 << 16) +#define RFLAGS_VM (1 << 17) +#define RFLAGS_AC (1 << 18) +#define RFLAGS_VIF (1 << 19) +#define RFLAGS_VIP (1 << 20) +#define RFLAGS_ID (1 << 21) + +#define DB6_B0 (1 << 0) +#define DB6_B1 (1 << 1) +#define DB6_B2 (1 << 2) +#define DB6_B3 (1 << 3) +#define DB6_BD (1 << 13) +#define DB6_BS (1 << 14) +#define DB6_BT (1 << 15) + +#define MSR_EFER 0xc0000080 +#define MSR_STAR 0xc0000081 +#define MSR_LSTAR 0xc0000082 +#define MSR_FMASK 0xc0000084 +#define MSR_FS_BASE 0xc0000100 +#define MSR_GS_BASE 0xc0000101 + +#define MSR_IA32_APIC_BASE 0x000000001b +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_IA32_MISC_ENABLE 0x000001a0 +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_IA32_CR_PAT 0x00000277 + + +#define CVAL(event, mask) \ + ((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff)) +#define CVAL2(event, mask, inv, count) \ + ((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff) | \ + ((inv & 1) << 23) | ((count & 0xff) << 24)) + +/* AMD */ +#define MSR_PERF_CTL_0 0xc0010000 +#define MSR_PERF_CTR_0 0xc0010004 + +static unsigned long xgetbv(unsigned int index) +{ + return 0; +} + +static void xsetbv(unsigned int index, unsigned long val) +{ +} + +static unsigned long rdpmc(unsigned int counter) +{ + return 0; +} + +static unsigned long rdmsr(unsigned int index) +{ + return 0; +} + +/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */ +static unsigned long rdtsc(void) +{ + unsigned long cval; + + isb(); + asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); + + return cval; +} + +static void set_perfctl(int counter, int event, int mask) +{ +} + +static void start_perfctr(int counter) +{ +} +static void stop_perfctr(int counter) +{ +} + +static void clear_perfctl(int counter) +{ +} + +static void set_perfctr(int counter, unsigned long value) +{ +} + +static unsigned long read_perfctr(int counter) +{ + return 0; +} + +#define ihk_mc_mb() do {} while(0); + +#define REGS_GET_STACK_POINTER(regs) (((struct pt_regs *)regs)->sp) + +enum arm64_pf_error_code { + PF_PROT = 1 << 0, + PF_WRITE = 1 << 1, + PF_USER = 1 << 2, + PF_RSVD = 1 << 3, + PF_INSTR = 1 << 4, + + PF_PATCH = 1 << 29, + PF_POPULATE = 1 << 30, +}; + +#endif /* !__HEADER_ARM64_COMMON_REGISTERS_H */ diff --git a/arch/arm64/kernel/include/rlimit.h b/arch/arm64/kernel/include/rlimit.h new file mode 100644 index 00000000..da2c50a3 --- /dev/null +++ b/arch/arm64/kernel/include/rlimit.h @@ -0,0 +1,96 @@ +/* rlimit.h COPYRIGHT FUJITSU LIMITED 2016 */ +/** + * \file rlimit.h + * License details are found in the file LICENSE. + * \brief + * Kinds of resource limit + * \author Taku Shimosawa \par + * Copyright (C) 2011 - 2012 Taku Shimosawa + */ +/* + * HISTORY + */ + +#ifndef __HEADER_ARM64_COMMON_RLIMIT_H +#define __HEADER_ARM64_COMMON_RLIMIT_H + +/* Kinds of resource limit. */ +enum __rlimit_resource +{ + /* Per-process CPU limit, in seconds. */ + RLIMIT_CPU = 0, +#define RLIMIT_CPU RLIMIT_CPU + + /* Largest file that can be created, in bytes. */ + RLIMIT_FSIZE = 1, +#define RLIMIT_FSIZE RLIMIT_FSIZE + + /* Maximum size of data segment, in bytes. */ + RLIMIT_DATA = 2, +#define RLIMIT_DATA RLIMIT_DATA + + /* Maximum size of stack segment, in bytes. */ + RLIMIT_STACK = 3, +#define RLIMIT_STACK RLIMIT_STACK + + /* Largest core file that can be created, in bytes. */ + RLIMIT_CORE = 4, +#define RLIMIT_CORE RLIMIT_CORE + + /* Largest resident set size, in bytes. + This affects swapping; processes that are exceeding their + resident set size will be more likely to have physical memory + taken from them. */ + __RLIMIT_RSS = 5, +#define RLIMIT_RSS __RLIMIT_RSS + + /* Number of open files. */ + RLIMIT_NOFILE = 7, + __RLIMIT_OFILE = RLIMIT_NOFILE, /* BSD name for same. */ +#define RLIMIT_NOFILE RLIMIT_NOFILE +#define RLIMIT_OFILE __RLIMIT_OFILE + + /* Address space limit. */ + RLIMIT_AS = 9, +#define RLIMIT_AS RLIMIT_AS + + /* Number of processes. */ + __RLIMIT_NPROC = 6, +#define RLIMIT_NPROC __RLIMIT_NPROC + + /* Locked-in-memory address space. */ + __RLIMIT_MEMLOCK = 8, +#define RLIMIT_MEMLOCK __RLIMIT_MEMLOCK + + /* Maximum number of file locks. */ + __RLIMIT_LOCKS = 10, +#define RLIMIT_LOCKS __RLIMIT_LOCKS + + /* Maximum number of pending signals. */ + __RLIMIT_SIGPENDING = 11, +#define RLIMIT_SIGPENDING __RLIMIT_SIGPENDING + + /* Maximum bytes in POSIX message queues. */ + __RLIMIT_MSGQUEUE = 12, +#define RLIMIT_MSGQUEUE __RLIMIT_MSGQUEUE + + /* Maximum nice priority allowed to raise to. + Nice levels 19 .. -20 correspond to 0 .. 39 + values of this resource limit. */ + __RLIMIT_NICE = 13, +#define RLIMIT_NICE __RLIMIT_NICE + + /* Maximum realtime priority allowed for non-priviledged + processes. */ + __RLIMIT_RTPRIO = 14, +#define RLIMIT_RTPRIO __RLIMIT_RTPRIO + + __RLIMIT_NLIMITS = 15, + __RLIM_NLIMITS = __RLIMIT_NLIMITS +#define RLIMIT_NLIMITS __RLIMIT_NLIMITS +#define RLIM_NLIMITS __RLIM_NLIMITS +}; + +#include + +#endif diff --git a/arch/arm64/kernel/include/signal.h b/arch/arm64/kernel/include/signal.h new file mode 100644 index 00000000..cd9bff0a --- /dev/null +++ b/arch/arm64/kernel/include/signal.h @@ -0,0 +1,409 @@ +/* signal.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_SIGNAL_H +#define __HEADER_ARM64_COMMON_SIGNAL_H + +#include +#include + +#define _NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long int __sigset_t; +#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1)) + +typedef struct { + __sigset_t __val[_NSIG_WORDS]; +} sigset_t; + +#define SIG_BLOCK 0 +#define SIG_UNBLOCK 1 +#define SIG_SETMASK 2 + +struct sigaction { + void (*sa_handler)(int); + unsigned long sa_flags; + void (*sa_restorer)(int); + sigset_t sa_mask; +}; + +typedef void __sig_fn_t(int); +typedef __sig_fn_t *__sig_handler_t; +#define SIG_DFL (__sig_handler_t)0 +#define SIG_IGN (__sig_handler_t)1 +#define SIG_ERR (__sig_handler_t)-1 + +#define SA_NOCLDSTOP 0x00000001U +#define SA_NOCLDWAIT 0x00000002U +#define SA_NODEFER 0x40000000U +#define SA_ONSTACK 0x08000000U +#define SA_RESETHAND 0x80000000U +#define SA_RESTART 0x10000000U +#define SA_SIGINFO 0x00000004U + +/* Required for AArch32 compatibility. */ +#define SA_RESTORER 0x04000000U + +struct k_sigaction { + struct sigaction sa; +}; + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#define MINSIGSTKSZ 5120 +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +typedef union sigval { + int sival_int; + void *sival_ptr; +} sigval_t; + +#define __SI_MAX_SIZE 128 +#define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 4) + +typedef struct siginfo { + int si_signo; /* Signal number. */ + int si_errno; /* If non-zero, an errno value associated with + this signal, as defined in . */ + int si_code; /* Signal code. */ +#define SI_USER 0 /* sent by kill, sigsend, raise */ +#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ +#define SI_QUEUE -1 /* sent by sigqueue */ +#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ +#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change + */ +#define SI_ASYNCIO -4 /* sent by AIO completion */ +#define SI_SIGIO -5 /* sent by queued SIGIO */ +#define SI_TKILL -6 /* sent by tkill system call */ +#define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ + +#define ILL_ILLOPC 1 /* illegal opcode */ +#define ILL_ILLOPN 2 /* illegal operand */ +#define ILL_ILLADR 3 /* illegal addressing mode */ +#define ILL_ILLTRP 4 /* illegal trap */ +#define ILL_PRVOPC 5 /* privileged opcode */ +#define ILL_PRVREG 6 /* privileged register */ +#define ILL_COPROC 7 /* coprocessor error */ +#define ILL_BADSTK 8 /* internal stack error */ + +#define FPE_INTDIV 1 /* integer divide by zero */ +#define FPE_INTOVF 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating point divide by zero */ +#define FPE_FLTOVF 4 /* floating point overflow */ +#define FPE_FLTUND 5 /* floating point underflow */ +#define FPE_FLTRES 6 /* floating point inexact result */ +#define FPE_FLTINV 7 /* floating point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ + +#define SEGV_MAPERR 1 /* address not mapped to object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ + +#define BUS_ADRALN 1 /* invalid address alignment */ +#define BUS_ADRERR 2 /* non-existant physical address */ +#define BUS_OBJERR 3 /* object specific hardware error */ +/* hardware memory error consumed on a machine check: action required */ +#define BUS_MCEERR_AR 4 +/* hardware memory error detected in process but not consumed: action optional*/ +#define BUS_MCEERR_AO 5 + +#define TRAP_BRKPT 1 /* process breakpoint */ +#define TRAP_TRACE 2 /* process trace trap */ +#define TRAP_BRANCH 3 /* process taken branch trap */ +#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */ + +#define CLD_EXITED 1 /* child has exited */ +#define CLD_KILLED 2 /* child was killed */ +#define CLD_DUMPED 3 /* child terminated abnormally */ +#define CLD_TRAPPED 4 /* traced child has trapped */ +#define CLD_STOPPED 5 /* child has stopped */ +#define CLD_CONTINUED 6 /* stopped child has continued */ + +#define POLL_IN 1 /* data input available */ +#define POLL_OUT 2 /* output buffers available */ +#define POLL_MSG 3 /* input message available */ +#define POLL_ERR 4 /* i/o error */ +#define POLL_PRI 5 /* high priority input available */ +#define POLL_HUP 6 /* device disconnected */ + +#define SIGEV_SIGNAL 0 /* notify via signal */ +#define SIGEV_NONE 1 /* other notification: meaningless */ +#define SIGEV_THREAD 2 /* deliver via thread creation */ +#define SIGEV_THREAD_ID 4 /* deliver to thread */ + + union { + int _pad[__SI_PAD_SIZE]; + + /* kill(). */ + struct { + int si_pid;/* Sending process ID. */ + int si_uid;/* Real user ID of sending process. */ + } _kill; + + /* POSIX.1b timers. */ + struct { + int si_tid; /* Timer ID. */ + int si_overrun; /* Overrun count. */ + sigval_t si_sigval; /* Signal value. */ + } _timer; + + /* POSIX.1b signals. */ + struct { + int si_pid; /* Sending process ID. */ + int si_uid; /* Real user ID of sending process. */ + sigval_t si_sigval; /* Signal value. */ + } _rt; + + /* SIGCHLD. */ + struct { + int si_pid; /* Which child. */ + int si_uid; /* Real user ID of sending process. */ + int si_status; /* Exit value or signal. */ + long si_utime; + long si_stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ + struct { + void *si_addr; /* Faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL. */ + struct { + long int si_band; /* Band event for SIGPOLL. */ + int si_fd; + } _sigpoll; + } _sifields; +} siginfo_t; + +struct signalfd_siginfo { + unsigned int ssi_signo; + int ssi_errno; + int ssi_code; + unsigned int ssi_pid; + unsigned int ssi_uid; + int ssi_fd; + unsigned int ssi_tid; + unsigned int ssi_band; + unsigned int ssi_overrun; + unsigned int ssi_trapno; + int ssi_status; + int ssi_int; + unsigned long ssi_ptr; + unsigned long ssi_utime; + unsigned long ssi_stime; + unsigned long ssi_addr; + unsigned short ssi_addr_lsb; + + char __pad[46]; +}; + + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 +#define SIGRTMIN 32 + +#ifndef SIGRTMAX +#define SIGRTMAX _NSIG +#endif + +#define PTRACE_EVENT_EXEC 4 + +/* + * @ref.impl linux-linaro/arch/arm64/include/uapi/asm/sigcontext.h + */ +struct sigcontext { + unsigned long fault_address; + /* AArch64 registers */ + unsigned long regs[31]; + unsigned long sp; + unsigned long pc; + unsigned long pstate; + /* 4K reserved for FP/SIMD state and future expansion */ + unsigned char __reserved[4096] /*__attribute__((__aligned__(16)))*/; +}; + +/* + * Header to be used at the beginning of structures extending the user + * context. Such structures must be placed after the rt_sigframe on the stack + * and be 16-byte aligned. The last structure must be a dummy one with the + * magic and size set to 0. + */ +struct _aarch64_ctx { + unsigned int magic; + unsigned int size; +}; + +#define FPSIMD_MAGIC 0x46508001 + +struct fpsimd_context { + struct _aarch64_ctx head; + unsigned int fpsr; + unsigned int fpcr; + __uint128_t vregs[32]; +}; + +/* ESR_EL1 context */ +#define ESR_MAGIC 0x45535201 + +struct esr_context { + struct _aarch64_ctx head; + unsigned long esr; +}; + +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + void *data; /* 16-byte aligned pointer to the extra space */ + uint32_t size; /* size in bytes of the extra space */ +}; + +#define SVE_MAGIC 0x53564501 + +#define fpsimd_sve_state(vq) { \ + __uint128_t zregs[32][vq]; \ + uint16_t pregs[16][vq]; \ + uint16_t ffr[vq]; \ +} + +struct sve_context { + struct _aarch64_ctx head; + uint16_t vl; + uint16_t __reserved[3]; +}; + +/* + * The SVE architecture leaves space for future expansion of the + * vector length beyond its initial architectural limit of 2048 bits + * (16 quadwords). + */ +#define SVE_VQ_MIN 1 +#define SVE_VQ_MAX 0x200 + +#define SVE_VL_MIN (SVE_VQ_MIN * 0x10) +#define SVE_VL_MAX (SVE_VQ_MAX * 0x10) + +#define SVE_NUM_ZREGS 32 +#define SVE_NUM_PREGS 16 + +#define sve_vl_valid(vl) \ + ((vl) % 0x10 == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX) +#define sve_vq_from_vl(vl) ((vl) / 0x10) + +/* + * The total size of meaningful data in the SVE context in bytes, + * including the header, is given by SVE_SIG_CONTEXT_SIZE(vq). + * + * Note: for all these macros, the "vq" argument denotes the SVE + * vector length in quadwords (i.e., units of 128 bits). + * + * The correct way to obtain vq is to use sve_vq_from_vl(vl). The + * result is valid if and only if sve_vl_valid(vl) is true. This is + * guaranteed for a struct sve_context written by the kernel. + * + * + * Additional macros describe the contents and layout of the payload. + * For each, SVE_SIG_x_OFFSET(args) is the start offset relative to + * the start of struct sve_context, and SVE_SIG_x_SIZE(args) is the + * size in bytes: + * + * + * x type description + * - ---- ----------- + * REGS the entire SVE context + * + * ZREGS __uint128_t[SVE_NUM_ZREGS][vq] all Z-registers + * ZREG __uint128_t[vq] individual Z-register Zn + * + * PREGS uint16_t[SVE_NUM_PREGS][vq] all P-registers + * PREG uint16_t[vq] individual P-register Pn + * + * FFR uint16_t[vq] first-fault status register + * + * Additional data might be appended in the future. + */ + +#define SVE_SIG_ZREG_SIZE(vq) ((uint32_t)(vq) * 16) +#define SVE_SIG_PREG_SIZE(vq) ((uint32_t)(vq) * 2) +#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq) + +#define SVE_SIG_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16) + +#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET +#define SVE_SIG_ZREG_OFFSET(vq, n) \ + (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n)) +#define SVE_SIG_ZREGS_SIZE(vq) \ + (SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET) + +#define SVE_SIG_PREGS_OFFSET(vq) \ + (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq)) +#define SVE_SIG_PREG_OFFSET(vq, n) \ + (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n)) +#define SVE_SIG_PREGS_SIZE(vq) \ + (SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq)) + +#define SVE_SIG_FFR_OFFSET(vq) \ + (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq)) + +#define SVE_SIG_REGS_SIZE(vq) \ + (SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET) + +#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) + +/* + * @ref.impl linux-linaro/arch/arm64/include/asm/ucontext.h + */ +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + sigset_t uc_sigmask; + /* glibc uses a 1024-bit sigset_t */ + unsigned char __unused[1024 / 8 - sizeof(sigset_t)]; + /* last for future expansion */ + struct sigcontext uc_mcontext; +}; + +void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err); +void set_signal(int sig, void *regs, struct siginfo *info); +void check_signal(unsigned long rc, void *regs, int num); +void check_signal_irq_disabled(unsigned long rc, void *regs, int num); + +#endif /* __HEADER_ARM64_COMMON_SIGNAL_H */ diff --git a/arch/arm64/kernel/include/smp.h b/arch/arm64/kernel/include/smp.h new file mode 100644 index 00000000..f8df53d6 --- /dev/null +++ b/arch/arm64/kernel/include/smp.h @@ -0,0 +1,23 @@ +/* smp.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_SMP_H +#define __HEADER_ARM64_COMMON_SMP_H + +#ifndef __ASSEMBLY__ +/* + * Initial data for bringing up a secondary CPU. + */ +struct secondary_data { + void *stack; + unsigned long next_pc; + unsigned long arg; +}; +extern struct secondary_data secondary_data; + +#endif /* __ASSEMBLY__ */ + +/* struct secondary_data offsets */ +#define SECONDARY_DATA_STACK 0x00 +#define SECONDARY_DATA_NEXT_PC 0x08 +#define SECONDARY_DATA_ARG 0x10 + +#endif /* !__HEADER_ARM64_COMMON_SMP_H */ diff --git a/arch/arm64/kernel/include/stringify.h b/arch/arm64/kernel/include/stringify.h new file mode 100644 index 00000000..aeb9b070 --- /dev/null +++ b/arch/arm64/kernel/include/stringify.h @@ -0,0 +1,17 @@ +/* stringify.h COPYRIGHT FUJITSU LIMITED 2017 */ + +/** + * @ref.impl host-kernel/include/linux/stringify.h + */ +#ifndef __LINUX_STRINGIFY_H +#define __LINUX_STRINGIFY_H + +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + */ + +#define __stringify_1(x...)#x +#define __stringify(x...)__stringify_1(x) + +#endif/* !__LINUX_STRINGIFY_H */ diff --git a/arch/arm64/kernel/include/syscall_list.h b/arch/arm64/kernel/include/syscall_list.h new file mode 100644 index 00000000..80a1222a --- /dev/null +++ b/arch/arm64/kernel/include/syscall_list.h @@ -0,0 +1,148 @@ +/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +SYSCALL_DELEGATED(4, io_getevents) +SYSCALL_DELEGATED(17, getcwd) +SYSCALL_DELEGATED(22, epoll_pwait) +SYSCALL_DELEGATED(25, fcntl) +SYSCALL_HANDLED(29, ioctl) +SYSCALL_DELEGATED(43, statfs) +SYSCALL_DELEGATED(44, fstatfs) +#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */ +SYSCALL_HANDLED(56, openat) +#else /* POSTK_DEBUG_ARCH_DEP_62 */ +SYSCALL_DELEGATED(56, openat) +#endif /* POSTK_DEBUG_ARCH_DEP_62 */ +SYSCALL_HANDLED(57, close) +SYSCALL_DELEGATED(61, getdents64) +SYSCALL_DELEGATED(62, lseek) +SYSCALL_HANDLED(63, read) +SYSCALL_DELEGATED(64, write) +SYSCALL_DELEGATED(66, writev) +SYSCALL_DELEGATED(67, pread64) +SYSCALL_DELEGATED(68, pwrite64) +SYSCALL_DELEGATED(72, pselect6) +SYSCALL_DELEGATED(73, ppoll) +SYSCALL_HANDLED(74, signalfd4) +SYSCALL_DELEGATED(78, readlinkat) +SYSCALL_DELEGATED(80, fstat) +SYSCALL_HANDLED(93, exit) +SYSCALL_HANDLED(94, exit_group) +SYSCALL_HANDLED(95, waitid) +SYSCALL_HANDLED(96, set_tid_address) +SYSCALL_HANDLED(98, futex) +SYSCALL_HANDLED(99, set_robust_list) +SYSCALL_HANDLED(101, nanosleep) +SYSCALL_HANDLED(102, getitimer) +SYSCALL_HANDLED(103, setitimer) +SYSCALL_HANDLED(113, clock_gettime) +SYSCALL_DELEGATED(114, clock_getres) +SYSCALL_DELEGATED(115, clock_nanosleep) +SYSCALL_HANDLED(117, ptrace) +SYSCALL_HANDLED(118, sched_setparam) +SYSCALL_HANDLED(119, sched_setscheduler) +SYSCALL_HANDLED(120, sched_getscheduler) +SYSCALL_HANDLED(121, sched_getparam) +SYSCALL_HANDLED(122, sched_setaffinity) +SYSCALL_HANDLED(123, sched_getaffinity) +SYSCALL_HANDLED(124, sched_yield) +SYSCALL_HANDLED(125, sched_get_priority_max) +SYSCALL_HANDLED(126, sched_get_priority_min) +SYSCALL_HANDLED(127, sched_rr_get_interval) +SYSCALL_HANDLED(129, kill) +SYSCALL_HANDLED(130, tkill) +SYSCALL_HANDLED(131, tgkill) +SYSCALL_HANDLED(132, sigaltstack) +SYSCALL_HANDLED(133, rt_sigsuspend) +SYSCALL_HANDLED(134, rt_sigaction) +SYSCALL_HANDLED(135, rt_sigprocmask) +SYSCALL_HANDLED(136, rt_sigpending) +SYSCALL_HANDLED(137, rt_sigtimedwait) +SYSCALL_HANDLED(138, rt_sigqueueinfo) +SYSCALL_HANDLED(139, rt_sigreturn) +SYSCALL_HANDLED(143, setregid) +SYSCALL_HANDLED(144, setgid) +SYSCALL_HANDLED(145, setreuid) +SYSCALL_HANDLED(146, setuid) +SYSCALL_HANDLED(147, setresuid) +SYSCALL_HANDLED(148, getresuid) +SYSCALL_HANDLED(149, setresgid) +SYSCALL_HANDLED(150, getresgid) +SYSCALL_HANDLED(151, setfsuid) +SYSCALL_HANDLED(152, setfsgid) +SYSCALL_HANDLED(153, times) +SYSCALL_HANDLED(154, setpgid) +SYSCALL_DELEGATED(160, uname) +SYSCALL_HANDLED(163, getrlimit) +SYSCALL_HANDLED(164, setrlimit) +SYSCALL_HANDLED(165, getrusage) +SYSCALL_HANDLED(167, prctl) +SYSCALL_HANDLED(168, getcpu) +SYSCALL_HANDLED(169, gettimeofday) +SYSCALL_HANDLED(170, settimeofday) +SYSCALL_HANDLED(172, getpid) +SYSCALL_HANDLED(173, getppid) +SYSCALL_HANDLED(174, getuid) +SYSCALL_HANDLED(175, geteuid) +SYSCALL_HANDLED(176, getgid) +SYSCALL_HANDLED(177, getegid) +SYSCALL_HANDLED(178, gettid) +SYSCALL_DELEGATED(188, msgrcv) +SYSCALL_DELEGATED(189, msgsnd) +SYSCALL_DELEGATED(192, semtimedop) +SYSCALL_DELEGATED(193, semop) +SYSCALL_HANDLED(194, shmget) +SYSCALL_HANDLED(195, shmctl) +SYSCALL_HANDLED(196, shmat) +SYSCALL_HANDLED(197, shmdt) +SYSCALL_HANDLED(214, brk) +SYSCALL_HANDLED(215, munmap) +SYSCALL_HANDLED(216, mremap) +SYSCALL_HANDLED(220, clone) +SYSCALL_HANDLED(221, execve) +SYSCALL_HANDLED(222, mmap) +SYSCALL_HANDLED(226, mprotect) +SYSCALL_HANDLED(227, msync) +SYSCALL_HANDLED(228, mlock) +SYSCALL_HANDLED(229, munlock) +SYSCALL_HANDLED(230, mlockall) +SYSCALL_HANDLED(231, munlockall) +SYSCALL_HANDLED(232, mincore) +SYSCALL_HANDLED(233, madvise) +SYSCALL_HANDLED(234, remap_file_pages) +SYSCALL_HANDLED(235, mbind) +SYSCALL_HANDLED(236, get_mempolicy) +SYSCALL_HANDLED(237, set_mempolicy) +SYSCALL_HANDLED(238, migrate_pages) +SYSCALL_HANDLED(239, move_pages) +SYSCALL_HANDLED(241, perf_event_open) +SYSCALL_HANDLED(260, wait4) +SYSCALL_HANDLED(270, process_vm_readv) +SYSCALL_HANDLED(271, process_vm_writev) +SYSCALL_HANDLED(601, pmc_init) +SYSCALL_HANDLED(602, pmc_start) +SYSCALL_HANDLED(603, pmc_stop) +SYSCALL_HANDLED(604, pmc_reset) +SYSCALL_HANDLED(700, get_cpu_id) +#ifdef PROFILE_ENABLE +SYSCALL_HANDLED(__NR_profile, profile) +#endif // PROFILE_ENABLE +SYSCALL_HANDLED(730, util_migrate_inter_kernel) +SYSCALL_HANDLED(731, util_indicate_clone) +SYSCALL_HANDLED(732, get_system) + +/* McKernel Specific */ +SYSCALL_HANDLED(801, swapout) +SYSCALL_HANDLED(802, linux_mlock) +SYSCALL_HANDLED(803, suspend_threads) +SYSCALL_HANDLED(804, resume_threads) +SYSCALL_HANDLED(811, linux_spawn) + +SYSCALL_DELEGATED(1024, open) +SYSCALL_DELEGATED(1026, unlink) +SYSCALL_DELEGATED(1035, readlink) +SYSCALL_HANDLED(1045, signalfd) +SYSCALL_DELEGATED(1049, stat) +SYSCALL_DELEGATED(1060, getpgrp) +SYSCALL_DELEGATED(1062, time) +SYSCALL_HANDLED(1071, vfork) +SYSCALL_DELEGATED(1079, fork) diff --git a/arch/arm64/kernel/include/sysreg.h b/arch/arm64/kernel/include/sysreg.h new file mode 100644 index 00000000..44f7c638 --- /dev/null +++ b/arch/arm64/kernel/include/sysreg.h @@ -0,0 +1,339 @@ +/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2017 */ +/* + * Macros for accessing system registers with older binutils. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_SYSREG_H +#define __ASM_SYSREG_H + +#include +#include + +/* + * ARMv8 ARM reserves the following encoding for system registers: + * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", + * C5.2, version:ARM DDI 0487A.f) + * [20-19] : Op0 + * [18-16] : Op1 + * [15-12] : CRn + * [11-8] : CRm + * [7-5] : Op2 + */ +#define Op0_shift 19 +#define Op0_mask 0x3 +#define Op1_shift 16 +#define Op1_mask 0x7 +#define CRn_shift 12 +#define CRn_mask 0xf +#define CRm_shift 8 +#define CRm_mask 0xf +#define Op2_shift 5 +#define Op2_mask 0x7 + +#define sys_reg(op0, op1, crn, crm, op2) \ + (((op0) << Op0_shift) | ((op1) << Op1_shift) | \ + ((crn) << CRn_shift) | ((crm) << CRm_shift) | \ + ((op2) << Op2_shift)) + +#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask) +#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask) +#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask) +#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask) +#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask) + +#ifdef __ASSEMBLY__ +#define __emit_inst(x).inst (x) +#else +#define __emit_inst(x)".inst " __stringify((x)) "\n\t" +#endif + +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) +#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) + +#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) +#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) + +/* +#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM | \ + (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM | \ + (!!x)<<8 | 0x1f) +*/ + +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_I (1 << 12) +#define SCTLR_ELx_SA (1 << 3) +#define SCTLR_ELx_C (1 << 2) +#define SCTLR_ELx_A (1 << 1) +#define SCTLR_ELx_M 1 + +#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I) + +/* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_UCI (1 << 26) +#define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_UCT (1 << 15) +#define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_CP15BEN (1 << 5) + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_SVE_SHIFT 32 +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_SVE 0x1 +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +#define ID_AA64MMFR1_VMIDBITS_8 0 +#define ID_AA64MMFR1_VMIDBITS_16 2 + +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_LVA_SHIFT 16 +#define ID_AA64MMFR2_IESB_SHIFT 12 +#define ID_AA64MMFR2_LSM_SHIFT 8 +#define ID_AA64MMFR2_UAO_SHIFT 4 +#define ID_AA64MMFR2_CNP_SHIFT 0 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_PMSVER_SHIFT 32 +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#endif + +#define ZCR_EL1_LEN_SHIFT 0 +#define ZCR_EL1_LEN_SIZE 9 +#define ZCR_EL1_LEN_MASK 0x1ff + +#define CPACR_EL1_ZEN_EL1EN (1 << 16) +#define CPACR_EL1_ZEN_EL0EN (1 << 17) +#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) + +/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ +#define SYS_MPIDR_SAFE_VAL (1UL << 31) + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__reg_num_x\num, \num + .endr + .equ .L__reg_num_xzr, 31 + + .macro mrs_s, rt, sreg + __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + .endm + + .macro msr_s, sreg, rt + __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + .endm + +#else + +asm( +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" +" .equ .L__reg_num_x\\num, \\num\n" +" .endr\n" +" .equ .L__reg_num_xzr, 31\n" +"\n" +" .macro mrs_s, rt, sreg\n" + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) +" .endm\n" +"\n" +" .macro msr_s, sreg, rt\n" + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) +" .endm\n" +); + +#endif + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + uint64_t __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ + }) + +/* + * The "Z" constraint normally means a zero immediate, but when combined with + * the "%x0" template means XZR. + */ +#define write_sysreg(v, r) do { \ + uint64_t __val = (uint64_t)v; \ + asm volatile("msr " __stringify(r) ", %x0" \ + : : "rZ" (__val)); \ +} while (0) + +/* + * For registers without architectural names, or simply unsupported by + * GAS. + */ +#define read_sysreg_s(r) ({ \ + uint64_t __val; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + __val; \ + }) + +#define write_sysreg_s(v, r) do { \ + uint64_t __val = (uint64_t)v; \ + asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ +} while (0) + +/* @ref.impl arch/arm64/include/asm/kvm_arm.h */ +#define CPTR_EL2_TZ (1 << 8) + +#endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/include/thread_info.h b/arch/arm64/kernel/include/thread_info.h new file mode 100644 index 00000000..069cdf0c --- /dev/null +++ b/arch/arm64/kernel/include/thread_info.h @@ -0,0 +1,100 @@ +/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#ifndef __HEADER_ARM64_COMMON_THREAD_INFO_H +#define __HEADER_ARM64_COMMON_THREAD_INFO_H + +#define KERNEL_STACK_SIZE 32768 /* 8 page */ +#define THREAD_START_SP KERNEL_STACK_SIZE - 16 + +#ifndef __ASSEMBLY__ + +#define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align) +#define ALIGN_DOWN(x, align) ((x) & ~((align) - 1)) + +#include +#include + +struct cpu_context { + unsigned long x19; + unsigned long x20; + unsigned long x21; + unsigned long x22; + unsigned long x23; + unsigned long x24; + unsigned long x25; + unsigned long x26; + unsigned long x27; + unsigned long x28; + unsigned long fp; + unsigned long sp; + unsigned long pc; +}; + +struct thread_info { + unsigned long flags; /* low level flags */ +// mm_segment_t addr_limit; /* address limit */ +// struct task_struct *task; /* main task structure */ +// struct exec_domain *exec_domain; /* execution domain */ +// struct restart_block restart_block; +// int preempt_count; /* 0 => preemptable, <0 => bug */ + int cpu; /* cpu */ + struct cpu_context cpu_context; /* kernel_context */ + void *sve_state; /* SVE registers, if any */ + uint16_t sve_vl; /* SVE vector length */ + uint16_t sve_vl_onexec; /* SVE vl after next exec */ + uint16_t sve_flags; /* SVE related flags */ + unsigned long fault_address; /* fault info */ + unsigned long fault_code; /* ESR_EL1 value */ +}; + +/* Flags for sve_flags (intentionally defined to match the prctl flags) */ + +/* Inherit sve_vl and sve_flags across execve(): */ +#define THREAD_VL_INHERIT PR_SVE_SET_VL_INHERIT + +struct arm64_cpu_local_thread { + struct thread_info thread_info; + unsigned long paniced; /* 136 */ + uint64_t panic_regs[34]; /* 144 */ +}; + +union arm64_cpu_local_variables { + struct arm64_cpu_local_thread arm64_cpu_local_thread; + unsigned long stack[KERNEL_STACK_SIZE / sizeof(unsigned long)]; +}; +extern union arm64_cpu_local_variables init_thread_info; + +/* + * how to get the current stack pointer from C + */ +register unsigned long current_stack_pointer asm ("sp"); + +/* + * how to get the thread information struct from C + */ +static inline struct thread_info *current_thread_info(void) +{ + unsigned long ti = 0; + + ti = ALIGN_DOWN(current_stack_pointer, KERNEL_STACK_SIZE); + + return (struct thread_info *)ti; +} + +/* + * how to get the pt_regs struct from C + */ +static inline struct pt_regs *current_pt_regs(void) +{ + unsigned long regs = 0; + + regs = ALIGN_DOWN(current_stack_pointer, KERNEL_STACK_SIZE); + regs += THREAD_START_SP - sizeof(struct pt_regs); + + return (struct pt_regs *)regs; +} + +#endif /* !__ASSEMBLY__ */ + +#define TIF_SINGLESTEP 21 + +#endif /* !__HEADER_ARM64_COMMON_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/include/traps.h b/arch/arm64/kernel/include/traps.h new file mode 100644 index 00000000..563d788c --- /dev/null +++ b/arch/arm64/kernel/include/traps.h @@ -0,0 +1,27 @@ +/* traps.h COPYRIGHT FUJITSU LIMITED 2017 */ + +#ifndef __ASM_TRAP_H +#define __ASM_TRAP_H + +#include + +struct pt_regs; + +/* @ref.impl arch/arm64/include/asm/traps.h */ +struct undef_hook { + struct list_head node; + uint32_t instr_mask; + uint32_t instr_val; + uint64_t pstate_mask; + uint64_t pstate_val; + int (*fn)(struct pt_regs *regs, uint32_t instr); +}; + +/* @ref.impl arch/arm64/include/asm/traps.h */ +void register_undef_hook(struct undef_hook *hook); + +/* @ref.impl arch/arm64/include/asm/traps.h */ +void unregister_undef_hook(struct undef_hook *hook); + +#endif /* __ASM_TRAP_H */ + diff --git a/arch/arm64/kernel/include/vdso.h b/arch/arm64/kernel/include/vdso.h new file mode 100644 index 00000000..b37eb7f7 --- /dev/null +++ b/arch/arm64/kernel/include/vdso.h @@ -0,0 +1,30 @@ +/* vdso.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_COMMON_VDSO_H +#define __HEADER_ARM64_COMMON_VDSO_H + +#ifdef __KERNEL__ + +/* @ref.impl arch/arm64/include/asm/vsdo.h::VDSO_LBASE */ +/* + * Default link address for the vDSO. + * Since we randomise the VDSO mapping, there's little point in trying + * to prelink this. + */ +#define VDSO_LBASE 0x0 + +#ifndef __ASSEMBLY__ + +#include + +/* @ref.impl arch/arm64/include/asm/vsdo.h::VDSO_SYMBOL */ +#define VDSO_SYMBOL(base, name) vdso_symbol_##name((unsigned long)(base)) +void* vdso_symbol_sigtramp(unsigned long base); + +int add_vdso_pages(struct thread *thread); + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* !__HEADER_ARM64_COMMON_VDSO_H */ + diff --git a/arch/arm64/kernel/include/virt.h b/arch/arm64/kernel/include/virt.h new file mode 100644 index 00000000..1fec182d --- /dev/null +++ b/arch/arm64/kernel/include/virt.h @@ -0,0 +1,8 @@ +/* virt.h COPYRIGHT FUJITSU LIMITED 2015 */ +#ifndef __HEADER_ARM64_COMMON_VIRT_H +#define __HEADER_ARM64_COMMON_VIRT_H + +#define BOOT_CPU_MODE_EL1 (0xe11) +#define BOOT_CPU_MODE_EL2 (0xe12) + +#endif /* !__HEADER_ARM64_COMMON_VIRT_H */ diff --git a/arch/arm64/kernel/irq-gic-v2.c b/arch/arm64/kernel/irq-gic-v2.c new file mode 100644 index 00000000..d29b45ca --- /dev/null +++ b/arch/arm64/kernel/irq-gic-v2.c @@ -0,0 +1,158 @@ +/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ + +#include +#include +#include +#include +#include +#include +#include + +// #define DEBUG_GICV2 + +#ifdef DEBUG_GICV2 +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +void *dist_base; +void *cpu_base; + +#define gic_hwid_to_affinity(hw_cpuid) (1UL << hw_cpuid) + +/** + * arm64_raise_sgi_gicv2 + * @ref.impl drivers/irqchip/irq-gic.c:gic_raise_softirq + * + * @note Because it performs interrupt control at a higher + * function, it is not necessary to perform the disable/enable + * interrupts in this function as gic_raise_softirq() . + */ +static void arm64_raise_sgi_gicv2(unsigned int cpuid, unsigned int vector) +{ + /* Build interrupt destination of the target cpu */ + unsigned int hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid]; + uint8_t cpu_target_list = gic_hwid_to_affinity(hw_cpuid); + + /* + * Ensure that stores to Normal memory are visible to the + * other CPUs before they observe us issuing the IPI. + */ + dmb(ishst); + + /* write to GICD_SGIR */ + writel_relaxed( + cpu_target_list << 16 | vector, + (void *)(dist_base + GIC_DIST_SOFTINT) + ); +} + +/** + * arm64_raise_spi_gicv2 + * @ref.impl nothing. + */ +extern unsigned int ihk_ikc_irq_apicid; +static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector) +{ + uint64_t spi_reg_offset; + uint32_t spi_set_pending_bitpos; + + if (cpuid != ihk_ikc_irq_apicid) { + ekprintf("SPI(irq#%d) cannot send other than the host.\n", vector); + return; + } + + /** + * calculates register offset and bit position corresponding to the numbers. + * + * For interrupt vector m, + * - the corresponding GICD_ISPENDR number, n, is given by n = m / 32 + * - the offset of the required GICD_ISPENDR is (0x200 + (4*n)) + * - the bit number of the required Set-pending bit in this register is m % 32. + */ + spi_reg_offset = vector / 32 * 4; + spi_set_pending_bitpos = vector % 32; + + /* write to GICD_ISPENDR */ + writel_relaxed( + 1 << spi_set_pending_bitpos, + (void *)(dist_base + GIC_DIST_PENDING_SET + spi_reg_offset) + ); +} + +/** + * arm64_issue_ipi_gicv2 + * @param cpuid : hardware cpu id + * @param vector : interrupt vector number + */ +void arm64_issue_ipi_gicv2(unsigned int cpuid, unsigned int vector) +{ + dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid); + + if(vector < 16){ + // send SGI + arm64_raise_sgi_gicv2(cpuid, vector); + } else if (32 <= vector && vector < 1020) { + // send SPI (allow only to host) + arm64_raise_spi_gicv2(cpuid, vector); + } else { + ekprintf("#%d is bad irq number.", vector); + } +} + +/** + * handle_interrupt_gicv2 + * @ref.impl drivers/irqchip/irq-gic.c:gic_handle_irq + */ +extern int interrupt_from_user(void *); +void handle_interrupt_gicv2(struct pt_regs *regs) +{ + unsigned int irqstat, irqnr; + + set_cputime(interrupt_from_user(regs)? 1: 2); + do { + // get GICC_IAR.InterruptID + irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK); + irqnr = irqstat & GICC_IAR_INT_ID_MASK; + + if (irqnr < 32) { + writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); + handle_IPI(irqnr, regs); + continue; + } else if (irqnr != 1023) { + panic("PANIC: handle_interrupt_gicv2(): catch invalid interrupt."); + } + + /* + * If another interrupt is not pending, GICC_IAR.InterruptID + * returns 1023 (see GICv2 spec. Chap. 4.4.4) . + */ + break; + } while (1); + set_cputime(0); +} + +void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size) +{ + dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/); +} + +void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size) +{ + cpu_base = map_fixed_area(cpu_base_pa, size, 1 /*non chachable*/); +} + +void gic_enable_gicv2(void) +{ + unsigned int enable_ppi_sgi = 0; + + if (is_use_virt_timer()) { + enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid(); + } else { + enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid(); + } + writel_relaxed(enable_ppi_sgi, dist_base + GIC_DIST_ENABLE_SET); +} diff --git a/arch/arm64/kernel/irq-gic-v3.c b/arch/arm64/kernel/irq-gic-v3.c new file mode 100644 index 00000000..12932bee --- /dev/null +++ b/arch/arm64/kernel/irq-gic-v3.c @@ -0,0 +1,406 @@ +/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG_GICV3 + +#ifdef DEBUG_GICV3 +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +void *dist_base; +void *rdist_base[NR_CPUS]; + +extern uint64_t ihk_param_cpu_logical_map; +static uint64_t *__cpu_logical_map = &ihk_param_cpu_logical_map; + +extern uint64_t ihk_param_gic_rdist_base_pa[NR_CPUS]; + +#define cpu_logical_map(cpu) __cpu_logical_map[cpu] + +/* Our default, arbitrary priority value. Linux only uses one anyway. */ +#define DEFAULT_PMR_VALUE 0xf0 + +/** + * Low level accessors + * @ref.impl host-kernel/drivers/irqchip/irq-gic-v3.c + */ +static uint64_t gic_read_iar_common(void) +{ + uint64_t irqstat; + +#ifdef CONFIG_HAS_NMI + uint64_t daif; + uint64_t pmr; + uint64_t default_pmr_value = DEFAULT_PMR_VALUE; + + /* + * The PMR may be configured to mask interrupts when this code is + * called, thus in order to acknowledge interrupts we must set the + * PMR to its default value before reading from the IAR. + * + * To do this without taking an interrupt we also ensure the I bit + * is set whilst we are interfering with the value of the PMR. + */ + asm volatile( + "mrs %1, daif\n\t" /* save I bit */ + "msr daifset, #2\n\t" /* set I bit */ + "mrs_s %2, " __stringify(ICC_PMR_EL1) "\n\t" /* save PMR */ + "msr_s " __stringify(ICC_PMR_EL1) ",%3\n\t" /* set PMR */ + "mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t" /* ack int */ + "msr_s " __stringify(ICC_PMR_EL1) ",%2\n\t" /* restore PMR */ + "isb\n\t" + "msr daif, %1" /* restore I */ + : "=r" (irqstat), "=&r" (daif), "=&r" (pmr) + : "r" (default_pmr_value)); +#else /* CONFIG_HAS_NMI */ + asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); +#endif /* CONFIG_HAS_NMI */ + + return irqstat; +} + +#ifdef USE_CAVIUM_THUNDER_X +/* Cavium ThunderX erratum 23154 */ +static uint64_t gic_read_iar_cavium_thunderx(void) +{ + uint64_t irqstat; + +#ifdef CONFIG_HAS_NMI + uint64_t daif; + uint64_t pmr; + uint64_t default_pmr_value = DEFAULT_PMR_VALUE; + + /* + * The PMR may be configured to mask interrupts when this code is + * called, thus in order to acknowledge interrupts we must set the + * PMR to its default value before reading from the IAR. + * + * To do this without taking an interrupt we also ensure the I bit + * is set whilst we are interfering with the value of the PMR. + */ + asm volatile( + "mrs %1, daif\n\t" /* save I bit */ + "msr daifset, #2\n\t" /* set I bit */ + "mrs_s %2, " __stringify(ICC_PMR_EL1) "\n\t" /* save PMR */ + "msr_s " __stringify(ICC_PMR_EL1) ",%3\n\t" /* set PMR */ + "nop;nop;nop;nop\n\t" + "nop;nop;nop;nop\n\t" + "mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t" /* ack int */ + "nop;nop;nop;nop\n\t" + "msr_s " __stringify(ICC_PMR_EL1) ",%2\n\t" /* restore PMR */ + "isb\n\t" + "msr daif, %1" /* restore I */ + : "=r" (irqstat), "=&r" (daif), "=&r" (pmr) + : "r" (default_pmr_value)); +#else /* CONFIG_HAS_NMI */ + asm volatile("nop;nop;nop;nop;"); + asm volatile("nop;nop;nop;nop;"); + asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + asm volatile("nop;nop;nop;nop;"); + mb(); +#endif /* CONFIG_HAS_NMI */ + + return irqstat; +} +#endif + +static uint64_t gic_read_iar(void) +{ +#ifdef USE_CAVIUM_THUNDER_X + if (static_key_false(&is_cavium_thunderx)) + return gic_read_iar_cavium_thunderx(); + else +#endif + return gic_read_iar_common(); + +} + +static void gic_write_pmr(uint64_t val) +{ + asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val)); +} + +static void gic_write_ctlr(uint64_t val) +{ + asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val)); + isb(); +} + +static void gic_write_grpen1(uint64_t val) +{ + asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val)); + isb(); +} + +static inline void gic_write_eoir(uint64_t irq) +{ + asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq)); + isb(); +} + +static void gic_write_sgi1r(uint64_t val) +{ + asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val)); +} + +static inline uint32_t gic_read_sre(void) +{ + uint64_t val; + + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + return val; +} + +static inline void gic_write_sre(uint32_t val) +{ + asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" ((uint64_t)val)); + isb(); +} + +static uint32_t gic_enable_sre(void) +{ + uint32_t val; + + val = gic_read_sre(); + if (val & ICC_SRE_EL1_SRE) + return 1; /*ok*/ + + val |= ICC_SRE_EL1_SRE; + gic_write_sre(val); + val = gic_read_sre(); + + return !!(val & ICC_SRE_EL1_SRE); +} + +#ifdef CONFIG_HAS_NMI +static inline void gic_write_bpr1(uint32_t val) +{ + asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val)); +} +#endif + +static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector) +{ + uint64_t mpidr, cluster_id; + uint16_t tlist; + uint64_t val; + + /* Build interrupt destination of the target cpu */ + uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid]; + + /* + * Ensure that stores to Normal memory are visible to the + * other CPUs before issuing the IPI. + */ + smp_wmb(); + + mpidr = cpu_logical_map(hw_cpuid); + if((mpidr & 0xffUL) < 16) { + cluster_id = cpu_logical_map(hw_cpuid) & ~0xffUL; + tlist = (uint16_t)(1 << (mpidr & 0xf)); + +#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ + (MPIDR_AFFINITY_LEVEL(cluster_id, level) \ + << ICC_SGI1R_AFFINITY_## level ##_SHIFT) + + val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) | + MPIDR_TO_SGI_AFFINITY(cluster_id, 2) | + vector << ICC_SGI1R_SGI_ID_SHIFT | + MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | + tlist << ICC_SGI1R_TARGET_LIST_SHIFT); + + dkprintf("CPU%d: ICC_SGI1R_EL1 %llx\n", ihk_mc_get_processor_id(), val); + gic_write_sgi1r(val); + + /* Force the above writes to ICC_SGI1R_EL1 to be executed */ + isb(); + } else { + /* + * If we ever get a cluster of more than 16 CPUs, just + * scream and skip that CPU. + */ + ekprintf("GICv3 can't send SGI for TargetList=%d\n", (mpidr & 0xffUL)); + } +} + +static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector) +{ + uint64_t spi_reg_offset; + uint32_t spi_set_pending_bitpos; + + /** + * calculates register offset and bit position corresponding to the numbers. + * + * For interrupt vector m, + * - the corresponding GICD_ISPENDR number, n, is given by n = m / 32 + * - the offset of the required GICD_ISPENDR is (0x200 + (4*n)) + * - the bit number of the required Set-pending bit in this register is m % 32. + */ + spi_reg_offset = vector / 32 * 4; + spi_set_pending_bitpos = vector % 32; + + /* write to GICD_ISPENDR */ + writel_relaxed( + 1 << spi_set_pending_bitpos, + (void *)(dist_base + GICD_ISPENDR + spi_reg_offset) + ); +} + +static void arm64_raise_lpi_gicv3(uint32_t cpuid, uint32_t vector) +{ + // @todo.impl +} + +void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector) +{ + dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid); + + if(vector < 16){ + // send SGI + arm64_raise_sgi_gicv3(cpuid, vector); + } else if (32 <= vector && vector < 1020) { + // send SPI (allow only to host) + arm64_raise_spi_gicv3(cpuid, vector); + } else if (8192 <= vector) { + // send LPI (allow only to host) + arm64_raise_lpi_gicv3(cpuid, vector); + } else { + ekprintf("#%d is bad irq number.", vector); + } +} + +extern int interrupt_from_user(void *); +void handle_interrupt_gicv3(struct pt_regs *regs) +{ + uint64_t irqnr; + + irqnr = gic_read_iar(); + cpu_enable_nmi(); + set_cputime(interrupt_from_user(regs)? 1: 2); + while (irqnr != ICC_IAR1_EL1_SPURIOUS) { + if ((irqnr < 1020) || (irqnr >= 8192)) { + gic_write_eoir(irqnr); + handle_IPI(irqnr, regs); + } + irqnr = gic_read_iar(); + } + set_cputime(0); +} + +void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size) +{ + dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/); + +#ifdef USE_CAVIUM_THUNDER_X + /* Cavium ThunderX erratum 23154 */ + gicv3_check_capabilities(); +#endif +} + +void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size) +{ + int32_t cpuid, hw_cpuid; + struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info(); + + for(cpuid = 0; cpuid < cpu_info->ncpus; cpuid++) { + hw_cpuid = cpu_info->hw_ids[cpuid]; + if(ihk_param_gic_rdist_base_pa[hw_cpuid] != 0) { + rdist_base[hw_cpuid] = + map_fixed_area(ihk_param_gic_rdist_base_pa[hw_cpuid], size, 1 /*non chachable*/); + } + } +} + +static void gic_do_wait_for_rwp(void *base) +{ + uint32_t count = 1000000; /* 1s! */ + + while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + count--; + if (!count) { + ekprintf("RWP timeout, gone fishing\n"); + return; + } + cpu_pause(); + }; +} + +void gic_enable_gicv3(void) +{ + void *rbase = rdist_base[ihk_mc_get_hardware_processor_id()]; + void *rd_sgi_base = rbase + 0x10000 /* SZ_64K */; + int i; + unsigned int enable_ppi_sgi = GICD_INT_EN_SET_SGI; + + if (is_use_virt_timer()) { + enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid(); + } else { + enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid(); + } + + /* + * Deal with the banked PPI and SGI interrupts - disable all + * PPI interrupts, ensure all SGI interrupts are enabled. + */ + writel_relaxed(~enable_ppi_sgi, rd_sgi_base + GIC_DIST_ENABLE_CLEAR); + writel_relaxed(enable_ppi_sgi, rd_sgi_base + GIC_DIST_ENABLE_SET); + + /* + * Set priority on PPI and SGI interrupts + */ + for (i = 0; i < 32; i += 4) + writel_relaxed(GICD_INT_DEF_PRI_X4, + rd_sgi_base + GIC_DIST_PRI + i * 4 / 4); + + /* sync wait */ + gic_do_wait_for_rwp(rbase); + + /* + * Need to check that the SRE bit has actually been set. If + * not, it means that SRE is disabled at EL2. We're going to + * die painfully, and there is nothing we can do about it. + * + * Kindly inform the luser. + */ + if (!gic_enable_sre()) + panic("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); + +#ifndef CONFIG_HAS_NMI + /* Set priority mask register */ + gic_write_pmr(DEFAULT_PMR_VALUE); +#endif + + /* EOI deactivates interrupt too (mode 0) */ + gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir); + + /* ... and let's hit the road... */ + gic_write_grpen1(1); + +#ifdef CONFIG_HAS_NMI + /* + * Some firmwares hand over to the kernel with the BPR changed from + * its reset value (and with a value large enough to prevent + * any pre-emptive interrupts from working at all). Writing a zero + * to BPR restores is reset value. + */ + gic_write_bpr1(0); + + /* Set specific IPI to NMI */ + writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP); + writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP); + + /* sync wait */ + gic_do_wait_for_rwp(rbase); +#endif /* CONFIG_HAS_NMI */ +} diff --git a/arch/arm64/kernel/local.c b/arch/arm64/kernel/local.c new file mode 100644 index 00000000..7aed6b2e --- /dev/null +++ b/arch/arm64/kernel/local.c @@ -0,0 +1,87 @@ +/* local.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#include +#include +#include +#include +#include +#include +#include + +#define LOCALS_SPAN (8 * PAGE_SIZE) + +/* BSP initialized stack area */ +union arm64_cpu_local_variables init_thread_info __attribute__((aligned(KERNEL_STACK_SIZE))); + +/* BSP/AP idle stack pointer head */ +static union arm64_cpu_local_variables *locals; +size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */ + +/* allocate & initialize BSP/AP idle stack */ +void init_processors_local(int max_id) +{ + int i = 0; + union arm64_cpu_local_variables *tmp; + + /* allocate one more for alignment */ + locals = ihk_mc_alloc_pages((max_id + 1) * (KERNEL_STACK_SIZE / PAGE_SIZE), IHK_MC_AP_CRITICAL); + locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE); + + /* clear struct process, struct process_vm, struct thread_info area */ + for (i = 0, tmp = locals; i < max_id; i++, tmp++) { + memset(tmp, 0, sizeof(struct thread_info)); + } + kprintf("locals = %p\n", locals); +} + +/* get id (logical processor id) local variable address */ +union arm64_cpu_local_variables *get_arm64_cpu_local_variable(int id) +{ + return locals + id; +} + +/* get id (logical processor id) kernel stack address */ +static void *get_arm64_cpu_local_kstack(int id) +{ + return (char *)get_arm64_cpu_local_variable(id) + THREAD_START_SP; +} + +/* get current cpu local variable address */ +union arm64_cpu_local_variables *get_arm64_this_cpu_local(void) +{ + int id = ihk_mc_get_processor_id(); + return get_arm64_cpu_local_variable(id); +} + +/* get current kernel stack address */ +void *get_arm64_this_cpu_kstack(void) +{ + int id = ihk_mc_get_processor_id(); + return get_arm64_cpu_local_kstack(id); +} + +/* assign logical processor id for current_thread_info.cpu */ +/* logical processor id BSP:0, AP0:1, AP1:2, ... APn:n-1 */ +static ihk_atomic_t last_processor_id = IHK_ATOMIC_INIT(-1); +void assign_processor_id(void) +{ + int id; + union arm64_cpu_local_variables *v; + + id = ihk_atomic_inc_return(&last_processor_id); + + v = get_arm64_cpu_local_variable(id); + v->arm64_cpu_local_thread.thread_info.cpu = id; +} + +/** IHK **/ +/* get current logical processor id */ +int ihk_mc_get_processor_id(void) +{ + return current_thread_info()->cpu; +} + +/* get current physical processor id (not equal AFFINITY !!) */ +int ihk_mc_get_hardware_processor_id(void) +{ + return ihk_mc_get_cpu_info()->hw_ids[ihk_mc_get_processor_id()]; +} diff --git a/arch/arm64/kernel/memcpy.S b/arch/arm64/kernel/memcpy.S new file mode 100644 index 00000000..046942db --- /dev/null +++ b/arch/arm64/kernel/memcpy.S @@ -0,0 +1,78 @@ +/* memcpy.S COPYRIGHT FUJITSU LIMITED 2017 */ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm + + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm + + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm + + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm + + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm + + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm + + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm + + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm + + .weak memcpy +ENTRY(____inline_memcpy) +ENTRY(__inline_memcpy) +#include "copy_template.S" + ret +ENDPIPROC(__inline_memcpy) +ENDPROC(____inline_memcpy) diff --git a/arch/arm64/kernel/memory.c b/arch/arm64/kernel/memory.c new file mode 100644 index 00000000..be356863 --- /dev/null +++ b/arch/arm64/kernel/memory.c @@ -0,0 +1,3167 @@ +/* memory.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) + +#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) + +static char *last_page = (void*)MAP_EARLY_ALLOC; +extern char _head[], _end[]; + +char empty_zero_page[PAGE_SIZE] = { 0 }; + +extern unsigned long arm64_kernel_phys_base; +extern unsigned long arm64_st_phys_base; +extern unsigned long arm64_st_phys_size; + +/* Arch specific early allocation routine */ +void *early_alloc_pages(int nr_pages) +{ + void *p; + + if (last_page == (void *)-1) { + panic("Early allocator is already finalized. Do not use it.\n"); + } else if (MAP_EARLY_ALLOC_END <= (unsigned long)last_page) { + panic("Early allocator is out of memory.\n"); + } + p = last_page; + last_page += (nr_pages * PAGE_SIZE); + + return p; +} + +void early_alloc_invalidate(void) +{ + last_page = (void *)-1; +} + +void *ihk_mc_allocate(int size, int flag) +{ + if (!cpu_local_var(kmalloc_initialized)) { + kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__); + return NULL; + } + return kmalloc(size, IHK_MC_AP_NOWAIT); +} + +void ihk_mc_free(void *p) +{ + if (!cpu_local_var(kmalloc_initialized)) { + kprintf("%s: error, kmalloc not yet initialized\n", __FUNCTION__); + return; + } + kfree(p); +} + +void *get_last_early_heap(void) +{ + return last_page; +} + +/* + * flush_tlb(void) + * - Flush all EL1 & 0 stage 1 TLB entries if + * current VMID have the same entries. + * (If VHE=on, EL2 & 0 entries.) + * + * - All cores in the same Inner Shareable domain. + * + * flush_tlb_single(unsigned long addr) + * - Flush EL1 & 0 stage 1 TLB entries if given VA, current ASID and + * current VMID have the same entries. + * (If VHE=on, EL2 & 0 entries.) + * + * - All cores in the same Inner Shareable domain. + * + * arch_flush_tlb_single(int asid, unsigned long addr) + * - Flush EL1 & 0 stage 1 TLB entries if given VA, given ASID and + * current VMID have the same entries. + * (If VHE=on, EL2 & 0 entries.) + * + * - All cores in the same Inner Shareable domain. + * + */ +void flush_tlb(void) +{ + dsb(ishst); + asm("tlbi vmalle1is"); + dsb(ish); + isb(); +} + +static inline void arch_flush_tlb_single(const int asid, const unsigned long addr) +{ + unsigned long flush = 0; + flush = addr >> 12UL; + flush |= (unsigned long)asid << 48UL; + + dsb(ishst); + asm("tlbi vae1is, %0" : : "r" (flush)); + dsb(ish); +} + +void flush_tlb_single(unsigned long addr) +{ + struct process_vm* vm = NULL; + struct page_table* pt = NULL; + int asid = 0; + + vm = cpu_local_var(current)->vm; + if (vm) { + pt = vm->address_space->page_table; + if (pt) { + asid = get_address_space_id(pt); + } + } + arch_flush_tlb_single(asid, addr); +} + +struct page_table { + translation_table_t* tt; + translation_table_t* tt_pa; + int asid; +}; + +extern struct page_table swapper_page_table; +static struct page_table *init_pt = &swapper_page_table; +static ihk_spinlock_t init_pt_lock; + +/* val */ +static inline pte_t ptl4_val(const pte_t* l4p) +{ + pte_t pte = 0; + if (CONFIG_ARM64_PGTABLE_LEVELS > 3) { + pte = *l4p; + } else { + /* ダミー値を返却 */ + void* phys = (void*)0; + pte = (pte_t)phys & PT_PHYSMASK; + pte = pte | PMD_SECT_VALID | PMD_TYPE_TABLE; + } + return pte; +} +static inline pte_t ptl3_val(const pte_t* l3p) +{ + pte_t pte = 0; + if (CONFIG_ARM64_PGTABLE_LEVELS > 2) { + pte = *l3p; + } else { + /* ダミー値を返却 */ + void* phys = (void*)0; + pte = (pte_t)phys & PT_PHYSMASK; + pte = pte | PMD_SECT_VALID | PMD_TYPE_TABLE; + } + return pte; +} +static inline pte_t ptl2_val(const pte_t* l2p) +{ + return *l2p; +} +static inline pte_t ptl1_val(const pte_t* l1p) +{ + return *l1p; +} +static inline pte_t ptl_val(const pte_t* p, int level) +{ + pte_t pte = PTE_NULL; + switch (level) { + case 4: + pte = ptl4_val(p); + break; + case 3: + pte = ptl3_val(p); + break; + case 2: + pte = ptl2_val(p); + break; + case 1: + pte = ptl1_val(p); + break; + default: + panic("ptl_val failed.\n"); + } + return pte; +} + +/* index */ +static inline int ptl4_index(unsigned long addr) +{ + int idx = (addr >> PTL4_SHIFT) & PTL4_INDEX_MASK; + return idx; +} +static inline int ptl3_index(unsigned long addr) +{ + int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK; + return idx; +} +static inline int ptl2_index(unsigned long addr) +{ + int idx = (addr >> PTL2_SHIFT) & PTL2_INDEX_MASK; + return idx; +} +static inline int ptl1_index(unsigned long addr) +{ + int idx = (addr >> PTL1_SHIFT) & PTL1_INDEX_MASK; + return idx; +} +static inline int ptl_index(unsigned long addr, int level) +{ + int idx = 0; + switch (level) { + case 4: + idx = ptl4_index(addr); + break; + case 3: + idx = ptl3_index(addr); + break; + case 2: + idx = ptl2_index(addr); + break; + case 1: + idx = ptl1_index(addr); + break; + default: + panic("ptl_index failed.\n"); + } + return idx; +} + +/* offset */ +static inline pte_t* ptl4_offset(const translation_table_t* ptl4, unsigned long addr) +{ + pte_t* ptep = NULL; + int idx = 0; + + switch (CONFIG_ARM64_PGTABLE_LEVELS) + { + case 4: + idx = ptl4_index(addr); + ptep = (pte_t*)ptl4 + idx; + break; + case 3: + case 2: + case 1: + /* PTL4が無いときにはエントリではなくページテーブルのアドレスを引渡していく */ + ptep = (pte_t*)ptl4; + break; + } + return ptep; +} +static inline pte_t* ptl3_offset(const pte_t* l4p, unsigned long addr) +{ + pte_t* ptep = NULL; + pte_t pte = 0; + unsigned long phys = 0; + translation_table_t* ptl3 = NULL; + int idx = 0; + + switch (CONFIG_ARM64_PGTABLE_LEVELS) + { + case 4: + pte = ptl4_val(l4p); + phys = pte & PT_PHYSMASK; + ptl3 = phys_to_virt(phys); + idx = ptl3_index(addr); + ptep = (pte_t*)ptl3 + idx; + break; + case 3: + ptl3 = (translation_table_t*)l4p; + idx = ptl3_index(addr); + ptep = (pte_t*)ptl3 + idx; + break; + case 2: + case 1: + /* PTL3が無いときにはエントリではなくページテーブルのアドレスを引渡していく。*/ + ptep = (pte_t*)l4p; + break; + } + return ptep; +} +static inline pte_t* ptl2_offset(const pte_t* l3p, unsigned long addr) +{ + pte_t* ptep = NULL; + pte_t pte = 0; + unsigned long phys = 0; + translation_table_t* ptl2 = NULL; + int idx; + + switch (CONFIG_ARM64_PGTABLE_LEVELS) + { + case 4: + case 3: + pte = ptl3_val(l3p); + phys = pte & PT_PHYSMASK; + ptl2 = phys_to_virt(phys); + break; + case 2: + case 1: + /* PTL2は必ずある*/ + ptl2 = (translation_table_t*)l3p; + break; + } + idx = ptl2_index(addr); + ptep = (pte_t*)ptl2 + idx; + return ptep; +} +static inline pte_t* ptl1_offset(const pte_t* l2p, unsigned long addr) +{ + pte_t pte = ptl2_val(l2p); + unsigned long phys = pte & PT_PHYSMASK; + translation_table_t* ptl1 = phys_to_virt(phys); + int idx = ptl1_index(addr); + + return (pte_t*)ptl1 + idx; +} +static inline pte_t* ptl_offset(const void* p, unsigned long addr, int level) +{ + pte_t* ptep = NULL; + switch (level) { + case 4: + ptep = ptl4_offset((const translation_table_t*)p, addr); + break; + case 3: + ptep = ptl3_offset((const pte_t*)p, addr); + break; + case 2: + ptep = ptl2_offset((const pte_t*)p, addr); + break; + case 1: + ptep = ptl1_offset((const pte_t*)p, addr); + break; + default: + panic("ptl_offset failed.\n"); + } + return ptep; +} + +/* set */ +static inline void ptl4_set(pte_t* l4p, pte_t l4) +{ + if (CONFIG_ARM64_PGTABLE_LEVELS > 3) { + *l4p = l4; + } +} +static inline void ptl3_set(pte_t* l3p, pte_t l3) +{ + if (CONFIG_ARM64_PGTABLE_LEVELS > 2) { + *l3p = l3; + } +} +static inline void ptl2_set(pte_t* l2p, pte_t l2) +{ + *l2p = l2; +} +static inline void ptl1_set(pte_t* l1p, pte_t l1) +{ + *l1p = l1; +} +static inline void ptl_set(pte_t* p, pte_t v, int level) +{ + switch (level) { + case 4: + ptl4_set(p, v); + break; + case 3: + ptl3_set(p, v); + break; + case 2: + ptl2_set(p, v); + break; + case 1: + ptl1_set(p, v); + break; + default: + panic("ptl_set failed.\n"); + } +} +/* clear */ +static inline void ptl4_clear(pte_t* l4p) +{ + ptl4_set(l4p, PTE_NULL); +} +static inline void ptl3_clear(pte_t* l3p) +{ + ptl3_set(l3p, PTE_NULL); +} +static inline void ptl2_clear(pte_t* l2p) +{ + ptl2_set(l2p, PTE_NULL); +} +static inline void ptl1_clear(pte_t* l1p) +{ + ptl1_set(l1p, PTE_NULL); +} +static inline void ptl_clear(pte_t* p, int level) +{ + switch (level) { + case 4: + ptl4_clear(p); + break; + case 3: + ptl3_clear(p); + break; + case 2: + ptl2_clear(p); + break; + case 1: + ptl1_clear(p); + break; + default: + panic("ptl_clear failed.\n"); + } +} +/* null */ +static inline int ptl4_null(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + return pte_is_null(&pte); +} +static inline int ptl3_null(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + return pte_is_null(&pte); +} +static inline int ptl2_null(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + return pte_is_null(&pte); +} +static inline int ptl1_null(const pte_t* l1p) +{ + pte_t pte = ptl1_val(l1p); + return pte_is_null(&pte); +} +static inline int ptl_null(const pte_t* p, int level) +{ + int ret = 0; + switch (level) { + case 4: + ret = ptl4_null(p); + break; + case 3: + ret = ptl3_null(p); + break; + case 2: + ret = ptl2_null(p); + break; + case 1: + ret = ptl1_null(p); + break; + default: + panic("ptl_null failed.\n"); + } + return ret; +} +/* present */ +static inline int ptl4_present(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + return pte_is_present(&pte); +} +static inline int ptl3_present(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + return pte_is_present(&pte); +} +static inline int ptl2_present(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + return pte_is_present(&pte); +} +static inline int ptl1_present(const pte_t* l1p) +{ + pte_t pte = ptl1_val(l1p); + return pte_is_present(&pte); +} +static inline int ptl_present(const pte_t* p, int level) +{ + int ret = 0; + switch (level) { + case 4: + ret = ptl4_present(p); + break; + case 3: + ret = ptl3_present(p); + break; + case 2: + ret = ptl2_present(p); + break; + case 1: + ret = ptl1_present(p); + break; + default: + panic("ptl_present failed.\n"); + } + return ret; +} +/* type_block/type_page */ +static inline int ptl4_type_block(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + int ret = pte_is_type_page(&pte, PTL4_SIZE); + return ret; +} +static inline int ptl3_type_block(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + int ret = pte_is_type_page(&pte, PTL3_SIZE); + return ret; +} +static inline int ptl2_type_block(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + int ret = pte_is_type_page(&pte, PTL2_SIZE); + return ret; +} +static inline int ptl1_type_page(const pte_t* l1p) +{ + pte_t pte = ptl1_val(l1p); + int ret = pte_is_type_page(&pte, PTL1_SIZE); + return ret; +} +static inline int ptl_type_page(const pte_t* p, int level) +{ + int ret = 0; + switch (level) { + case 4: + ret = ptl4_type_block(p); + break; + case 3: + ret = ptl3_type_block(p); + break; + case 2: + ret = ptl2_type_block(p); + break; + case 1: + ret = ptl1_type_page(p); + break; + default: + panic("ptl_page failed.\n"); + } + return ret; +} +/* type_table */ +static inline int ptl4_type_table(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + return (pte & PMD_TYPE_MASK) == PMD_TYPE_TABLE; +} +static inline int ptl3_type_table(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + return (pte & PMD_TYPE_MASK) == PMD_TYPE_TABLE; +} +static inline int ptl2_type_table(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + return (pte & PMD_TYPE_MASK) == PMD_TYPE_TABLE; +} +static inline int ptl1_type_table(const pte_t* l1p) +{ + return 0; +} +static inline int ptl_type_table(const pte_t* p, int level) +{ + int ret = 0; + switch (level) { + case 4: + ret = ptl4_type_table(p); + break; + case 3: + ret = ptl3_type_table(p); + break; + case 2: + ret = ptl2_type_table(p); + break; + case 1: + ret = ptl1_type_table(p); + break; + default: + panic("ptl_table failed.\n"); + } + return ret; +} +/* phys */ +static inline unsigned long ptl4_phys(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + return pte_get_phys(&pte); +} +static inline unsigned long ptl3_phys(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + return pte_get_phys(&pte); +} +static inline unsigned long ptl2_phys(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + return pte_get_phys(&pte); +} +static inline unsigned long ptl1_phys(const pte_t* l1p) +{ + pte_t pte = ptl1_val(l1p); + return pte_get_phys(&pte); +} +static inline unsigned long ptl_phys(const pte_t* p, int level) +{ + unsigned long ret = 0; + switch (level) { + case 4: + ret = ptl4_phys(p); + break; + case 3: + ret = ptl3_phys(p); + break; + case 2: + ret = ptl2_phys(p); + break; + case 1: + ret = ptl1_phys(p); + break; + default: + panic("ptl_phys failed.\n"); + } + return ret; +} +/* dirty */ +static inline int ptl4_dirty(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + return pte_is_dirty(&pte, PTL4_SIZE); +} +static inline int ptl3_dirty(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + return pte_is_dirty(&pte, PTL3_SIZE); +} +static inline int ptl2_dirty(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + return pte_is_dirty(&pte, PTL2_SIZE); +} +static inline int ptl1_dirty(const pte_t* l1p) +{ + pte_t pte = ptl1_val(l1p); + return pte_is_dirty(&pte, PTL1_SIZE); +} +static inline int ptl_dirty(const pte_t* p, int level) +{ + int ret = 0; + switch (level) { + case 4: + ret = ptl4_dirty(p); + break; + case 3: + ret = ptl3_dirty(p); + break; + case 2: + ret = ptl2_dirty(p); + break; + case 1: + ret = ptl1_dirty(p); + break; + default: + panic("ptl_dirty failed.\n"); + } + return ret; +} +/* fileoff */ +static inline int ptl4_fileoff(const pte_t* l4p) +{ + pte_t pte = ptl4_val(l4p); + return pte_is_fileoff(&pte, PTL4_SIZE); +} +static inline int ptl3_fileoff(const pte_t* l3p) +{ + pte_t pte = ptl3_val(l3p); + return pte_is_fileoff(&pte, PTL3_SIZE); +} +static inline int ptl2_fileoff(const pte_t* l2p) +{ + pte_t pte = ptl2_val(l2p); + return pte_is_fileoff(&pte, PTL2_SIZE); +} +static inline int ptl1_fileoff(const pte_t* l1p) +{ + pte_t pte = ptl1_val(l1p); + return pte_is_fileoff(&pte, PTL1_SIZE); +} +static inline int ptl_fileoff(const pte_t* p, int level) +{ + int ret = 0; + switch (level) { + case 4: + ret = ptl4_fileoff(p); + break; + case 3: + ret = ptl3_fileoff(p); + break; + case 2: + ret = ptl2_fileoff(p); + break; + case 1: + ret = ptl1_fileoff(p); + break; + default: + panic("ptl_fileoff failed.\n"); + } + return ret; +} + +typedef void (*setup_normal_area_t)( + translation_table_t *tt, + unsigned long base_start, + unsigned long base_end); + +static void setup_l2(translation_table_t *tt, + unsigned long base_start, unsigned long base_end) +{ + int i, sidx, eidx; + unsigned long start, end; + unsigned long virt_start, virt_end; + + //開始インデックスを算出 + virt_start = (unsigned long)phys_to_virt(base_start); + sidx = ptl2_index(virt_start); + + //現在のテーブルに登録できるアドレスの限界値を算出 + end = __page_align(base_start, PTL2_SIZE * PTL2_ENTRIES); + end += PTL2_SIZE * PTL2_ENTRIES; + + //終了インデックスを求める + if (end <= base_end) { + //現在のテーブルの最終エントリまでを登録対象とする + eidx = PTL2_ENTRIES - 1; + } else { + //base_endが現在のテーブルの管理内ならインデックスを算出 + virt_end = (unsigned long)phys_to_virt(base_end); + eidx = ptl2_index(virt_end); + } + + //エントリを登録する + start = __page_align(base_start, PTL2_SIZE); + for (i = sidx; i <= eidx; i++) { + pte_t* ptr; + pte_t val; + + // 登録先エントリのアドレスを取得 + ptr = &tt[i]; + + val = (start & PHYS_MASK) | PFL_KERN_BLK_ATTR; + + // エントリを登録 + ptl2_set(ptr, val); + + // 次のエントリの情報に更新 + start += PTL2_SIZE; + } +} + + +static inline void setup_middle_level(translation_table_t *tt, unsigned long base_start, unsigned long base_end, + setup_normal_area_t setup, int shift, unsigned long pgsize, int entries, int level) +{ + int i, sidx, eidx; + unsigned long start, end; + unsigned long virt_start, virt_end; + + //開始インデックスを算出 + //start = __page_align(base_start, pgsize); + virt_start = (unsigned long)phys_to_virt(base_start); + sidx = ptl_index(virt_start, level); + + //現在のテーブルに登録できるアドレスの限界値を算出 + end = __page_align(base_start, pgsize * entries); + end += pgsize * entries; + + //終了インデックスを求める + if (end <= base_end) { + //現在のテーブルの最終エントリまでを登録対象とする + eidx = entries - 1; + } else { + //base_endが現在のテーブルの管理内ならインデックスを算出 + virt_end = (unsigned long)phys_to_virt(base_end); + eidx = ptl_index(virt_end, level); + } + + //エントリを登録する + start = base_start; + for (i = sidx; i <= eidx; i++) { + pte_t* ptr; + pte_t val; + unsigned long next; + translation_table_t* next_tt = NULL; + + // 登録先エントリのアドレスを取得 + ptr = &tt[i]; + + // ページテーブルを確保して初期化 + if (ptl_null(ptr, level)) { + next_tt = ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL); + next = virt_to_phys(next_tt); + memset(next_tt, 0, PAGE_SIZE); + } else { + unsigned long arm64_kernel_phys_end; + unsigned long arm64_early_alloc_phys_end; +#ifdef CONFIG_ARM64_64K_PAGES + arm64_kernel_phys_end = arm64_kernel_phys_base + (page_align_up(_end) - (unsigned long)_head); +#else + arm64_kernel_phys_end = arm64_kernel_phys_base + (large_page_align_up(_end) - (unsigned long)_head); +#endif + arm64_early_alloc_phys_end = arm64_kernel_phys_end + (MAP_EARLY_ALLOC_END - MAP_EARLY_ALLOC); + + next = ptl_phys(ptr, level); + if (arm64_kernel_phys_base <= next && next < arm64_kernel_phys_end) { + // phys_to_virt of kernel image area. + struct page_table* pt = get_init_page_table(); + unsigned long va = (unsigned long)pt->tt; + unsigned long pa = (unsigned long)pt->tt_pa; + unsigned long diff = va - pa; + next_tt = (void*)(next + diff); + } else if (arm64_kernel_phys_end <= next && next < arm64_early_alloc_phys_end) { + // phys_to_virt of early alloc area. + unsigned long early_alloc_phys_base = arm64_kernel_phys_end; + unsigned long offset = next - early_alloc_phys_base; + next_tt = (void*)(MAP_EARLY_ALLOC + offset); + } else { + kprintf("init normal area: leval=%d, next_phys=%p\n", level, next); + panic("unexpected physical memory area."); + } + } + setup(next_tt, start, base_end); + + val = (next & PHYS_MASK) | PFL_PDIR_TBL_ATTR; + + // エントリを登録 + ptl_set(ptr, val, level); + + // startをページアラインする + // (各ページレベルにおいて2枚目以降のsidxを0にさせる) + start = __page_align(start, pgsize); + + // 次のエントリの情報に更新 + start += pgsize; + } +} + +static void setup_l3(translation_table_t *tt, + unsigned long base_start, unsigned long base_end) +{ + setup_middle_level(tt, base_start, base_end, + setup_l2, PTL3_SHIFT, PTL3_SIZE, PTL3_ENTRIES, 3); +} + +static void setup_l4(translation_table_t *tt, + unsigned long base_start, unsigned long base_end) +{ + setup_middle_level(tt, base_start, base_end, + setup_l3, PTL4_SHIFT, PTL4_SIZE, PTL4_ENTRIES, 4); +} + +/** + * Map the straight map area. + * @param pt_va page table address(va of the kernel image area or early_alloc area) + */ +static void init_normal_area(struct page_table *pt) +{ + setup_normal_area_t setup_func_table[] = {setup_l2, setup_l3, setup_l4}; + setup_normal_area_t setup = setup_func_table[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + translation_table_t* tt; + int i; + + tt = get_translation_table(pt); + + for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) { + unsigned long map_start, map_end; + int numa_id; + ihk_mc_get_memory_chunk(i, &map_start, &map_end, &numa_id); + kprintf("[%d] map_start = %lx, map_end = %lx @ NUMA: %d\n", + i, map_start, map_end, numa_id); + setup(tt, map_start, map_end); + } +} + +static translation_table_t* __alloc_new_tt(ihk_mc_ap_flag ap_flag) +{ + translation_table_t* newtt = ihk_mc_alloc_pages(1, ap_flag); + + if(newtt) + memset(newtt, 0, PAGE_SIZE); + + return (void*)virt_to_phys(newtt); +} + +/* + * Conversion of attributes for D_Page and D_Block. + * D_Table is PFL_PDIR_TBL_ATTR fixed. + */ +enum ihk_mc_pt_attribute attr_mask = + 0 + | PTATTR_ACTIVE + | PTATTR_WRITABLE + | PTATTR_USER + | PTATTR_DIRTY + | PTATTR_FILEOFF + | PTATTR_LARGEPAGE + | PTATTR_NO_EXECUTE + ; +#define ATTR_MASK attr_mask + +static unsigned long attr_to_blockattr(enum ihk_mc_pt_attribute attr) +{ + unsigned long pte = (attr & ATTR_MASK); + // append D_Block attributes. + pte = (pte & ~PMD_TYPE_MASK) | PMD_TYPE_SECT; + if (attr & PTATTR_UNCACHABLE) { + pte |= PROT_SECT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE); + } else if (attr & PTATTR_WRITE_COMBINED) { + pte |= PROT_SECT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC); + } else { + pte |= PROT_SECT_DEFAULT | PTE_ATTRINDX(MT_NORMAL); + } + return pte; +} + +static unsigned long attr_to_pageattr(enum ihk_mc_pt_attribute attr) +{ + unsigned long pte = (attr & ATTR_MASK); + // append D_Page attribute. + pte = (pte & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE; + if (attr & PTATTR_UNCACHABLE) { + pte |= PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE); + } else if (attr & PTATTR_WRITE_COMBINED) { + pte |= PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC); + } else { + pte |= PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL); + } + return pte; +} + +static unsigned long attr_to_l4attr_not_flip(enum ihk_mc_pt_attribute attr){ return attr_to_blockattr(attr); } +static unsigned long attr_to_l3attr_not_flip(enum ihk_mc_pt_attribute attr){ return attr_to_blockattr(attr); } +static unsigned long attr_to_l2attr_not_flip(enum ihk_mc_pt_attribute attr){ return attr_to_blockattr(attr); } +static unsigned long attr_to_l1attr_not_flip(enum ihk_mc_pt_attribute attr){ return attr_to_pageattr(attr); } +static unsigned long attr_to_lattr_not_flip(enum ihk_mc_pt_attribute attr, int level) +{ + switch (level) + { + case 4: return attr_to_l4attr_not_flip(attr); + case 3: return attr_to_l3attr_not_flip(attr); + case 2: return attr_to_l2attr_not_flip(attr); + case 1: return attr_to_l1attr_not_flip(attr); + } + panic("invalid page table level.\n"); + return 0; +} + +static unsigned long attr_to_l4attr(enum ihk_mc_pt_attribute attr){ return attr_to_l4attr_not_flip(attr ^ attr_flip_bits); } +static unsigned long attr_to_l3attr(enum ihk_mc_pt_attribute attr){ return attr_to_l3attr_not_flip(attr ^ attr_flip_bits); } +static unsigned long attr_to_l2attr(enum ihk_mc_pt_attribute attr){ return attr_to_l2attr_not_flip(attr ^ attr_flip_bits); } +static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr){ return attr_to_l1attr_not_flip(attr ^ attr_flip_bits); } +static unsigned long attr_to_lattr(enum ihk_mc_pt_attribute attr, int level) +{ + return attr_to_lattr_not_flip(attr ^ attr_flip_bits, level); +} + +static int __set_pt_page(struct page_table *pt, void *virt, unsigned long phys, + enum ihk_mc_pt_attribute attr) +{ + unsigned long v = (unsigned long)virt; + translation_table_t* newtt; + ihk_mc_ap_flag ap_flag; + int in_kernel = (v >= USER_END); + unsigned long init_pt_lock_flags; + int ret = -ENOMEM; + pte_t* ptep; + pte_t pte; + translation_table_t* tt = NULL; + + init_pt_lock_flags = 0; /* for avoidance of warning */ + if (in_kernel) { + init_pt_lock_flags = ihk_mc_spinlock_lock(&init_pt_lock); + } + + ap_flag = (attr & PTATTR_FOR_USER) ? + IHK_MC_AP_NOWAIT: IHK_MC_AP_CRITICAL; + + if (!pt) { + pt = get_init_page_table(); + } + tt = get_translation_table(pt); + + if (attr & PTATTR_LARGEPAGE) { + phys &= LARGE_PAGE_MASK; + } else { + phys &= PAGE_MASK; + } + + /* TODO: more detailed attribute check */ + ptep = ptl4_offset(tt, v); + if (!ptl4_present(ptep)) { + if((newtt = __alloc_new_tt(ap_flag)) == NULL) + goto out; + pte = (pte_t)newtt | PFL_PDIR_TBL_ATTR; + ptl4_set(ptep, pte); + } + + ptep = ptl3_offset(ptep, v); + if (!ptl3_present(ptep)) { + if((newtt = __alloc_new_tt(ap_flag)) == NULL) + goto out; + pte = (pte_t)newtt | PFL_PDIR_TBL_ATTR; + ptl3_set(ptep, pte); + } + + ptep = ptl2_offset(ptep, v); + if (attr & PTATTR_LARGEPAGE) { + // D_Block + if (ptl2_present(ptep)) { + unsigned long _phys = ptl2_val(ptep) & LARGE_PAGE_MASK; + if (_phys == phys && ptl2_type_block(ptep)) { + ret = 0; + } else { + ret = -EBUSY; + kprintf("EBUSY: page table for 0x%lX is already set\n", virt); + } + } else { + ptl2_set(ptep, phys | attr_to_l2attr(attr)); + ret = 0; + } + goto out; + } + // D_Table + if (!ptl2_present(ptep)) { + if((newtt = __alloc_new_tt(ap_flag)) == NULL) + goto out; + pte = (pte_t)newtt | PFL_PDIR_TBL_ATTR; + ptl2_set(ptep, pte); + } + + //D_Page + ptep = ptl1_offset(ptep, v); + if (ptl1_present(ptep)) { + unsigned long _phys = ptl1_val(ptep) & PAGE_MASK; + if (_phys == phys && ptl1_type_page(ptep)) { + ret = 0; + } else { + ret = -EBUSY; + kprintf("EBUSY: page table for 0x%lX is already set\n", virt); + } + } else { + ptl1_set(ptep, phys | attr_to_l1attr(attr)); + ret = 0; + } +out: + if (in_kernel) { + ihk_mc_spinlock_unlock(&init_pt_lock, init_pt_lock_flags); + } + return ret; +} + +static int __clear_pt_page(struct page_table *pt, void *virt, int largepage) +{ + unsigned long v = (unsigned long)virt; + pte_t *ptep; + translation_table_t *tt; + + if (!pt) { + pt = get_init_page_table(); + } + tt = get_translation_table(pt); + + if (largepage) { + v &= LARGE_PAGE_MASK; + } else { + v &= PAGE_MASK; + } + + ptep = ptl4_offset(tt, v); + if (!ptl4_present(ptep)) { + return -EINVAL; + } + + ptep = ptl3_offset(ptep, v); + if (!ptl3_present(ptep)) { + return -EINVAL; + } + + ptep = ptl2_offset(ptep, v); + if (largepage) { + // D_Block + if (!ptl2_present(ptep) || !ptl2_type_block(ptep)) { + return -EINVAL; + } + ptl2_clear(ptep); + return 0; + } + // D_Table + if (!ptl2_present(ptep) || !ptl2_type_table(ptep)) { + return -EINVAL; + } + // D_Page + ptep = ptl1_offset(ptep, v); + ptl1_clear(ptep); + return 0; +} + +uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt) +{ + uint64_t ret = 0; + unsigned long v = (unsigned long)virt; + pte_t* ptep; + translation_table_t* tt; + + unsigned long paddr; + unsigned long size; + unsigned long mask; + unsigned long shift; + + if (!pt) { + pt = get_init_page_table(); + } + tt = get_translation_table(pt); + + ptep = ptl4_offset(tt, v); + if (!ptl4_present(ptep)) { + return ret; + } + + ptep = ptl3_offset(ptep, v); + if (!ptl3_present(ptep)) { + return ret; + } + if (ptl3_type_block(ptep)) { + paddr = ptl3_phys(ptep); + size = PTL3_SIZE; + mask = PTL3_MASK; + shift = PTL3_SHIFT; + goto out; + } + + ptep = ptl2_offset(ptep, v); + if (!ptl2_present(ptep)) { + return ret; + } + if (ptl2_type_block(ptep)) { + paddr = ptl2_phys(ptep); + size = PTL2_SIZE; + mask = PTL2_MASK; + shift = PTL2_SHIFT; + goto out; + } + + ptep = ptl1_offset(ptep, v); + if (!ptl1_present(ptep)) { + return ret; + } + paddr = ptl1_phys(ptep); + size = PTL1_SIZE; + mask = PTL1_MASK; + shift = PTL1_SHIFT; +out: + ret = PM_PFRAME(((paddr & mask) + (v & (size - 1))) >> PAGE_SHIFT); + ret |= PM_PSHIFT(shift) | PM_PRESENT; + return ret; +} + + +int ihk_mc_pt_virt_to_phys_size(struct page_table *pt, + const void *virt, + unsigned long *phys, + unsigned long *size) +{ + unsigned long v = (unsigned long)virt; + pte_t* ptep; + translation_table_t* tt; + + unsigned long paddr; + unsigned long lsize; + + if (!pt) { + pt = get_init_page_table(); + } + tt = get_translation_table(pt); + + ptep = ptl4_offset(tt, v); + if (!ptl4_present(ptep)) { + return -EFAULT; + } + + ptep = ptl3_offset(ptep, v); + if (!ptl3_present(ptep)) { + return -EFAULT; + } + if (ptl3_type_block(ptep)) { + paddr = ptl3_phys(ptep); + lsize = PTL3_SIZE; + goto out; + } + + ptep = ptl2_offset(ptep, v); + if (!ptl2_present(ptep)) { + return -EFAULT; + } + if (ptl2_type_block(ptep)) { + paddr = ptl2_phys(ptep); + lsize = PTL2_SIZE; + goto out; + } + + ptep = ptl1_offset(ptep, v); + if (!ptl1_present(ptep)) { + return -EFAULT; + } + paddr = ptl1_phys(ptep); + lsize = PTL1_SIZE; +out: + *phys = paddr | (v & (lsize - 1)); + if(size) *size = lsize; + return 0; +} + +int ihk_mc_pt_virt_to_phys(struct page_table *pt, + const void *virt, unsigned long *phys) +{ + return ihk_mc_pt_virt_to_phys_size(pt, virt, phys, NULL); +} + + +int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) +{ + const unsigned long v = (unsigned long)virt; + const pte_t* ptep; + translation_table_t* tt; + + if (!pt) { + pt = get_init_page_table(); + } + tt = get_translation_table(pt); + + ptep = ptl4_offset(tt, v); + __kprintf("l4 table: 0x%lX l4idx: %d\n", virt_to_phys(tt), ptl4_index(v)); + if (!(ptl4_present(ptep))) { + __kprintf("0x%lX l4idx not present! \n", v); + return -EFAULT; + } + __kprintf("l4 entry: 0x%lX\n", ptl4_val(ptep)); + + ptep = ptl3_offset(ptep, v); + __kprintf("l3 table: 0x%lX l3idx: %d\n", ptl3_phys(ptep), ptl3_index(v)); + if (!(ptl3_present(ptep))) { + __kprintf("0x%lX l3idx not present! \n", v); + return -EFAULT; + } + __kprintf("l3 entry: 0x%lX\n", ptl3_val(ptep)); + if (ptl3_type_block(ptep)) { + __kprintf("l3 entry size: 0x%lx\n", PTL3_SIZE); + return 0; + } + + ptep = ptl2_offset(ptep, v); + __kprintf("l2 table: 0x%lX l2idx: %d\n", ptl2_phys(ptep), ptl2_index(v)); + if (!(ptl2_present(ptep))) { + __kprintf("0x%lX l2idx not present! \n", v); + return -EFAULT; + } + __kprintf("l2 entry: 0x%lX\n", ptl2_val(ptep)); + if (ptl2_type_block(ptep)) { + __kprintf("l2 entry size: 0x%lx\n", PTL2_SIZE); + return 0; + } + + ptep = ptl1_offset(ptep, v); + __kprintf("l1 table: 0x%lX l1idx: %d\n", ptl1_phys(ptep), ptl1_index(v)); + if (!(ptl1_present(ptep))) { + __kprintf("0x%lX l1idx not present! \n", v); + __kprintf("l1 entry: 0x%lX\n", ptl1_val(ptep)); + return -EFAULT; + } + + __kprintf("l1 entry: 0x%lX\n", ptl1_val(ptep)); + return 0; +} + +int set_pt_large_page(struct page_table *pt, void *virt, unsigned long phys, + enum ihk_mc_pt_attribute attr) +{ + return __set_pt_page(pt, virt, phys, attr | PTATTR_LARGEPAGE + | PTATTR_ACTIVE); +} + +int ihk_mc_pt_set_large_page(page_table_t pt, void *virt, + unsigned long phys, enum ihk_mc_pt_attribute attr) +{ + return __set_pt_page(pt, virt, phys, attr | PTATTR_LARGEPAGE + | PTATTR_ACTIVE); +} + +int ihk_mc_pt_set_page(page_table_t pt, void *virt, + unsigned long phys, enum ihk_mc_pt_attribute attr) +{ + return __set_pt_page(pt, virt, phys, attr | PTATTR_ACTIVE); +} + +int ihk_mc_pt_prepare_map(page_table_t p, void *virt, unsigned long size, + enum ihk_mc_pt_prepare_flag flag) +{ + /* + vmap 領域の PGD を事前に用意するために使われているが + (virtual_allocator_initがIHK_MC_PT_FIRST_LEVELを指定して呼ぶ) + 最上位のページテーブルがD_Tableになるとは限らない。 + D_Blockで作成するにも vmap なので対象の PhysAddr は特定できない。 + 他の使われ方がされるまでは、空実装としておく。 + */ + return 0; +} + +struct page_table *ihk_mc_pt_create(ihk_mc_ap_flag ap_flag) +{ + struct page_table *pt; + translation_table_t* tt; + + // allocate page_table + pt = (struct page_table*)kmalloc(sizeof(*pt), ap_flag); + if (pt == NULL) { + return NULL; + } + // allocate translation_table + tt = ihk_mc_alloc_pages(1, ap_flag); //call __alloc_new_tt()? + if (tt == NULL) { + kfree(pt); + return NULL; + } + // initialize + memset(pt, 0, sizeof(*pt)); + memset(tt, 0, PAGE_SIZE); + set_translation_table(pt, tt); + set_address_space_id(pt, 0); + return pt; +} + +static void destroy_page_table(int level, translation_table_t* tt) +{ + if ((level < 1) || (CONFIG_ARM64_PGTABLE_LEVELS < level)) { + panic("destroy_page_table: level is out of range"); + } + if (tt == NULL) { + panic("destroy_page_table: tt is NULL"); + } + + if (level > 1) { + const int entries[] = { + PTL2_ENTRIES, + PTL3_ENTRIES, + PTL4_ENTRIES + }; + const int ents = entries[level-2]; + int ix; + pte_t pte; + translation_table_t *lower; + + for (ix = 0; ix < ents; ++ix) { + pte = tt[ix]; + if (!ptl_present(&pte, level)) { + /* entry is not valid */ + continue; + } + if (!ptl_type_table(&pte, level)) { + /* not a page table */ + continue; + } + lower = (translation_table_t*)ptl_phys(&pte, level); + lower = phys_to_virt((unsigned long)lower); + destroy_page_table(level-1, lower); + } + } + + ihk_mc_free_pages(tt, 1); + return; +} + +void ihk_mc_pt_destroy(struct page_table *pt) +{ + const int level = CONFIG_ARM64_PGTABLE_LEVELS; + translation_table_t* tt; + + tt = get_translation_table(pt); + destroy_page_table(level, tt); + free_mmu_context(pt); + kfree(pt); + return; +} + +int ihk_mc_pt_clear_page(page_table_t pt, void *virt) +{ + return __clear_pt_page(pt, virt, 0); +} + +int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt) +{ + return __clear_pt_page(pt, virt, 1); +} + +typedef int walk_pte_fn_t(void *args, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end); + +typedef int walk_pte_t(translation_table_t *tt, uint64_t base, uint64_t start, + uint64_t end, walk_pte_fn_t *funcp, void *args); + +static int walk_pte_l1(translation_table_t *tt, uint64_t base, uint64_t start, + uint64_t end, walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL1_SHIFT); + eix = ((end == 0) || ((base + PTL2_SIZE) <= end))? PTL1_ENTRIES + : (((end - base) + (PTL1_SIZE - 1)) >> PTL1_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL1_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int walk_pte_l2(translation_table_t *tt, uint64_t base, uint64_t start, + uint64_t end, walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL2_SHIFT); + eix = ((end == 0) || ((base + PTL3_SIZE) <= end))? PTL2_ENTRIES + : (((end - base) + (PTL2_SIZE - 1)) >> PTL2_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL2_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int walk_pte_l3(translation_table_t *tt, uint64_t base, uint64_t start, + uint64_t end, walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL3_SHIFT); + eix = ((end == 0) || ((base + PTL4_SIZE) <= end))? PTL3_ENTRIES + : (((end - base) + (PTL3_SIZE - 1)) >> PTL3_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL3_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int walk_pte_l4(translation_table_t *tt, uint64_t base, uint64_t start, + uint64_t end, walk_pte_fn_t *funcp, void *args) +{ + int six; + int eix; + int ret; + int i; + int error; + uint64_t off; + + six = (start <= base)? 0: ((start - base) >> PTL4_SHIFT); + eix = (end == 0)? PTL4_ENTRIES + :(((end - base) + (PTL4_SIZE - 1)) >> PTL4_SHIFT); + + ret = -ENOENT; + for (i = six; i < eix; ++i) { + off = i * PTL4_SIZE; + error = (*funcp)(args, &tt[i], base+off, start, end); + if (!error) { + ret = 0; + } + else if (error != -ENOENT) { + ret = error; + break; + } + } + + return ret; +} + +static int split_large_page(pte_t *ptep, size_t pgsize) +{ + translation_table_t *tt, *tt_pa; + uintptr_t phys_base; + unsigned int i; + uintptr_t phys; + struct page *page; + pte_t pte; + pte_t d_table; + int table_level; + unsigned int entries; + unsigned long under_pgsize; + + // ラージページ判定 + switch (pgsize) + { +#if FIRST_LEVEL_BLOCK_SUPPORT + case __PTL3_SIZE: + table_level = 3; + entries = PTL3_ENTRIES; + under_pgsize = PTL2_SIZE; + break; +#endif + case __PTL2_SIZE: + table_level = 2; + entries = PTL2_ENTRIES; + under_pgsize = PTL1_SIZE; + break; + default: + ekprintf("split_large_page:invalid pgsize %#lx\n", pgsize); + return -EINVAL; + } + + // D_Tableを作成 + tt_pa = __alloc_new_tt(IHK_MC_AP_NOWAIT); + if (tt_pa == NULL) { + ekprintf("split_large_page:__alloc_new_tt failed\n"); + return -ENOMEM; + } + tt = phys_to_virt((unsigned long)tt_pa); + + // descriptor typeを変更 (PTL3 は PTL2 の D_Block に分割するので属性変更無し) + pte = ptl_val(ptep, table_level); + if (pgsize == PTL2_SIZE) { + // D_Block -> D_Page + pte = (pte & ~PMD_TYPE_MASK) | PTE_TYPE_PAGE; + } + + if (pte_is_fileoff(ptep, pgsize)) { + // remap_file_pages中など未割当てはこっち + phys_base = NOPHYS; + } + else { + phys_base = ptl_phys(ptep, table_level); + } + + for (i = 0; i < entries; ++i) { + if (phys_base != NOPHYS) { + phys = phys_base + (i * under_pgsize); + page = phys_to_page(phys); + if (page) { + page_map(page); + } + } + tt[i] = pte; + pte += under_pgsize; + } + + d_table = (pte_t)((unsigned long)tt_pa & PT_PHYSMASK) | PFL_PDIR_TBL_ATTR; + ptl_set(ptep, d_table, table_level); + +#if 1 + // revert git:4c8f583c0c0bb6f6fb2b103a006caee67e6668be + // always page_unmap. + pgsize = PTL1_SIZE; +#endif + + /* Do not do this check for large pages as they don't come from the zeroobj + * and are not actually mapped. + * TODO: clean up zeroobj as we don't really need it, anonymous mappings + * should be allocated for real */ + if (pgsize != PTL2_SIZE) { + if (phys_base != NOPHYS) { + page = phys_to_page(phys_base); + if (pgsize != PTL2_SIZE && page && page_unmap(page)) { + kprintf("split_large_page:page_unmap:%p\n", page); +#ifndef POSTK_DEBUG_TEMP_FIX_15 + panic("split_large_page:page_unmap\n"); +#endif /* POSTK_DEBUG_TEMP_FIX_15 */ + } + } + } + return 0; +} + +struct visit_pte_args { + page_table_t pt; + enum visit_pte_flag flags; + int pgshift; + pte_visitor_t *funcp; + void *arg; +}; + +static int visit_pte_range_middle(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level); + +static int visit_pte_l1(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + struct visit_pte_args *args = arg0; + + if (ptl1_null(ptep) && (args->flags & VPTEF_SKIP_NULL)) + return 0; + + return (*args->funcp)(args->arg, args->pt, ptep, (void *)base, PTL1_SHIFT); +} + +static int visit_pte_l2(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + return visit_pte_range_middle(arg0, ptep, base, start, end, 2); +} + +static int visit_pte_l3(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + return visit_pte_range_middle(arg0, ptep, base, start, end, 3); +} + +static int visit_pte_l4(void *arg0, pte_t *ptep, uintptr_t base, + uintptr_t start, uintptr_t end) +{ + return visit_pte_range_middle(arg0, ptep, base, start, end, 4); +} + +static int visit_pte_range_middle(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + unsigned long pgsize; /* curent level page size */ + unsigned long pgshift; /* curent level page shift */ + } table[] = { + { walk_pte_l1, visit_pte_l1, PTL2_SIZE, PTL2_SHIFT }, /*PTL2*/ + { walk_pte_l2, visit_pte_l2, PTL3_SIZE, PTL3_SHIFT }, /*PTL3*/ + { walk_pte_l3, visit_pte_l3, PTL4_SIZE, PTL4_SHIFT }, /*PTL4*/ + }; + const struct table tbl = table[level-2]; + + int error; + struct visit_pte_args *args = arg0; + translation_table_t* tt; + + if (ptl_null(ptep, level) && (args->flags & VPTEF_SKIP_NULL)) + return 0; + + if ((ptl_null(ptep, level) || ptl_type_page(ptep, level)) + && (start <= base) + && (((base + tbl.pgsize) <= end) + || (end == 0)) + && (!args->pgshift || (args->pgshift == tbl.pgshift))) { + error = (*args->funcp)(args->arg, args->pt, ptep, + (void *)base, tbl.pgshift); + if (error != -E2BIG) { + return error; + } + } + + if (ptl_type_page(ptep, level)) { + ekprintf("visit_pte_range_middle(level=%d):split large page\n", level); + return -ENOMEM; + } + + if (ptl_null(ptep, level)) { + translation_table_t* tt_pa; + pte_t pte; + tt_pa = __alloc_new_tt(IHK_MC_AP_NOWAIT); + if (tt_pa == NULL) + return -ENOMEM; + pte = (pte_t)(((unsigned long)tt_pa & PT_PHYSMASK) | PFL_PDIR_TBL_ATTR); + ptl_set(ptep, pte, level); + tt = (translation_table_t*)phys_to_virt((unsigned long)tt_pa); + } + else { + tt = (translation_table_t*)phys_to_virt(ptl_phys(ptep, level)); + } + + return tbl.walk(tt, base, start, end, tbl.callback, arg0); +} + +int visit_pte_range(page_table_t pt, void *start0, void *end0, int pgshift, + enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + } tables[] = { + { walk_pte_l2, visit_pte_l2 }, /*second*/ + { walk_pte_l3, visit_pte_l3 }, /*first*/ + { walk_pte_l4, visit_pte_l4 }, /*zero*/ + }; + const struct table initial_lookup = tables[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + + const uintptr_t start = (uintptr_t)start0; + const uintptr_t end = (uintptr_t)end0; + struct visit_pte_args args; + translation_table_t* tt; + + args.pt = pt; + args.flags = flags; + args.funcp = funcp; + args.arg = arg; + args.pgshift = pgshift; + + tt = get_translation_table(pt); + return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args); +} + +struct clear_range_args { + int free_physical; + struct memobj *memobj; + struct process_vm *vm; +}; + +static int clear_range_middle(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level); + +static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + struct clear_range_args *args = args0; + uint64_t phys = 0; + struct page *page; + pte_t old; + + if (ptl1_null(ptep)) { + return -ENOENT; + } + + old = xchg(ptep, PTE_NULL); + arch_flush_tlb_single(get_address_space_id(args->vm->address_space->page_table), base); + + page = NULL; + if (!ptl1_fileoff(&old)) { + phys = ptl1_phys(&old); + page = phys_to_page(phys); + } + + if (page && page_is_in_memobj(page) && ptl1_dirty(&old) && (args->memobj) && + !(args->memobj->flags & MF_ZEROFILL)) { + memobj_flush_page(args->memobj, phys, PTL1_SIZE); + } + + if (!ptl1_fileoff(&old) && args->free_physical) { + if (!page || (page && page_unmap(page))) { + int npages = PTL1_SIZE / PAGE_SIZE; + ihk_mc_free_pages_user(phys_to_virt(phys), npages); + dkprintf("%s: freeing regular page at 0x%lx\n", __FUNCTION__, base); + } + args->vm->currss -= PTL1_SIZE; + } + + return 0; +} + +static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return clear_range_middle(args0, ptep, base, start, end, 2); +} + +static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return clear_range_middle(args0, ptep, base, start, end, 3); +} + +static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return clear_range_middle(args0, ptep, base, start, end, 4); +} + +static int clear_range_middle(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + unsigned long pgsize; + } table[] = { + {walk_pte_l1, clear_range_l1, PTL2_SIZE}, /*PTL2*/ + {walk_pte_l2, clear_range_l2, PTL3_SIZE}, /*PTL3*/ + {walk_pte_l3, clear_range_l3, PTL4_SIZE}, /*PTL4*/ + }; + const struct table tbl = table[level-2]; + + struct clear_range_args *args = args0; + uint64_t phys = 0; + translation_table_t *tt; + int error; + struct page *page; + pte_t old; + + if (ptl_null(ptep, level)) { + return -ENOENT; + } + + if (ptl_type_page(ptep, level) + && ((base < start) || (end < (base + tbl.pgsize)))) { + error = -EINVAL; + ekprintf("clear_range_middle(%p,%p,%lx,%lx,%lx,%d):" + "split page. %d\n", + args0, ptep, base, start, end, level, error); + return error; + } + + if (ptl_type_page(ptep, level)) { + old = xchg(ptep, PTE_NULL); + arch_flush_tlb_single(get_address_space_id(args->vm->address_space->page_table), base); + + page = NULL; + if (!ptl_fileoff(&old, level)) { + phys = ptl_phys(&old, level); + page = phys_to_page(phys); + } + + if (page && page_is_in_memobj(page) && ptl_dirty(&old, level) && + !(args->memobj->flags & MF_ZEROFILL)) { + memobj_flush_page(args->memobj, phys, tbl.pgsize); + } + + + if (!ptl_fileoff(&old, level) && args->free_physical) { + if (!page || (page && page_unmap(page))) { + int npages = tbl.pgsize / PAGE_SIZE; + ihk_mc_free_pages_user(phys_to_virt(phys), npages); + dkprintf("%s(level=%d): freeing large page at 0x%lx\n", __FUNCTION__, level, base); + } + args->vm->currss -= tbl.pgsize; + } + + return 0; + } + + tt = (translation_table_t*)phys_to_virt(ptl_phys(ptep, level)); + error = tbl.walk(tt, base, start, end, tbl.callback, args0); + if (error && (error != -ENOENT)) { + return error; + } + + if ((start <= base) && ((base + tbl.pgsize) <= end)) { + ptl_clear(ptep, level); + arch_flush_tlb_single(get_address_space_id(args->vm->address_space->page_table), base); + ihk_mc_free_pages(tt, 1); + } + + return 0; +} + +static int clear_range(struct page_table *pt, struct process_vm *vm, + uintptr_t start, uintptr_t end, int free_physical, + struct memobj *memobj) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + } tables[] = { + {walk_pte_l2, clear_range_l2}, /*second*/ + {walk_pte_l3, clear_range_l3}, /*first*/ + {walk_pte_l4, clear_range_l4}, /*zero*/ + }; + const struct table initial_lookup = tables[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + + int error; + struct clear_range_args args; + translation_table_t* tt; + + if ((start < vm->region.user_start) + || (vm->region.user_end < end) + || (end <= start)) { + ekprintf("clear_range(%p,%p,%p,%x):" + "invalid start and/or end.\n", + pt, start, end, free_physical); + return -EINVAL; + } + + args.free_physical = free_physical; + if (memobj && (memobj->flags & MF_DEV_FILE)) { + args.free_physical = 0; + } + if (memobj && ((memobj->flags & MF_PREMAP))) { + args.free_physical = 0; + } + args.memobj = memobj; + args.vm = vm; + + tt = get_translation_table(pt); + error = initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args); + return error; +} + +int ihk_mc_pt_clear_range(page_table_t pt, struct process_vm *vm, + void *start, void *end) +{ +#define KEEP_PHYSICAL 0 + return clear_range(pt, vm, (uintptr_t)start, (uintptr_t)end, + KEEP_PHYSICAL, NULL); +} + +int ihk_mc_pt_free_range(page_table_t pt, struct process_vm *vm, + void *start, void *end, struct memobj *memobj) +{ +#define FREE_PHYSICAL 1 + return clear_range(pt, vm, (uintptr_t)start, (uintptr_t)end, + FREE_PHYSICAL, memobj); +} + +struct change_attr_args { + pte_t clrpte[4]; + pte_t setpte[4]; +}; +static int change_attr_range_middle(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level); + +static int change_attr_range_l1(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + pte_t pte; + struct change_attr_args *args = arg0; + + if (ptl1_null(ptep) || ptl1_fileoff(ptep)) { + return -ENOENT; + } + pte = ptl1_val(ptep); + pte = (pte & ~args->clrpte[0]) | args->setpte[0]; + ptl1_set(ptep, pte); + return 0; +} + +static int change_attr_range_l2(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return change_attr_range_middle(arg0, ptep, base, start, end, 2); +} + +static int change_attr_range_l3(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return change_attr_range_middle(arg0, ptep, base, start, end, 3); +} + +static int change_attr_range_l4(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return change_attr_range_middle(arg0, ptep, base, start, end, 4); +} + +static int change_attr_range_middle(void *arg0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + unsigned long pgsize; + } table[] = { + {walk_pte_l1, change_attr_range_l1, PTL2_SIZE}, /*PTL2*/ + {walk_pte_l2, change_attr_range_l2, PTL3_SIZE}, /*PTL3*/ + {walk_pte_l3, change_attr_range_l3, PTL4_SIZE}, /*PTL4*/ + }; + const struct table tbl = table[level-2]; + struct change_attr_args *args = arg0; + int error; + translation_table_t* tt; + + if (ptl_null(ptep, level) || ptl_fileoff(ptep, level)) { + return -ENOENT; + } + + if (ptl_type_page(ptep, level) + && ((base < start) || (end < (base + tbl.pgsize)))) { + error = -EINVAL; + ekprintf("change_attr_range_middle(%p,%p,%lx,%lx,%lx,%d):" + "split failed. %d\n", + arg0, ptep, base, start, end, error, level); + return error; + } + + if (ptl_type_page(ptep, level)) { + if (!ptl_fileoff(ptep, level)) { + pte_t pte = ptl_val(ptep, level); + pte = (pte & ~args->clrpte[level-1]) | args->setpte[level-1]; + ptl_set(ptep, pte, level); + } + return 0; + } + + tt = (translation_table_t*)phys_to_virt(ptl_phys(ptep, level)); + return tbl.walk(tt, base, start, end, tbl.callback, arg0); +} + +int ihk_mc_pt_change_attr_range(page_table_t pt, void *start0, void *end0, + enum ihk_mc_pt_attribute clrattr, + enum ihk_mc_pt_attribute setattr) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + } tables[] = { + {walk_pte_l2, change_attr_range_l2}, /*second*/ + {walk_pte_l3, change_attr_range_l3}, /*first*/ + {walk_pte_l4, change_attr_range_l4}, /*zero*/ + }; + const struct table initial_lookup = tables[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + enum ihk_mc_pt_attribute flip_clrattr; + enum ihk_mc_pt_attribute flip_setattr; + + const intptr_t start = (intptr_t)start0; + const intptr_t end = (intptr_t)end0; + struct change_attr_args args; + translation_table_t* tt; + + // swap the flip bits + flip_clrattr = (clrattr & ~attr_flip_bits) | (setattr & attr_flip_bits); + flip_setattr = (setattr & ~attr_flip_bits) | (clrattr & attr_flip_bits); + + // conversion + switch (CONFIG_ARM64_PGTABLE_LEVELS) + { + case 4: args.clrpte[3] = attr_to_l4attr_not_flip(flip_clrattr); /*PTL4*/ + args.setpte[3] = attr_to_l4attr_not_flip(flip_setattr); + case 3: args.clrpte[2] = attr_to_l3attr_not_flip(flip_clrattr); /*PTL3*/ + args.setpte[2] = attr_to_l3attr_not_flip(flip_setattr); + case 2: args.clrpte[1] = attr_to_l2attr_not_flip(flip_clrattr); /*PTL2*/ + args.setpte[1] = attr_to_l2attr_not_flip(flip_setattr); + args.clrpte[0] = attr_to_l1attr_not_flip(flip_clrattr); /*PTL1*/ + args.setpte[0] = attr_to_l1attr_not_flip(flip_setattr); + } + tt = get_translation_table(pt); + return initial_lookup.walk(tt, 0, start, end, initial_lookup.callback, &args); +} + +static pte_t *lookup_pte(translation_table_t* tt, uintptr_t virt, int pgshift, + uintptr_t *basep, size_t *sizep, int *p2alignp) +{ + pte_t *ptep; + uintptr_t base; + size_t size; + int p2align; + + ptep = NULL; + if (!pgshift) { + pgshift = FIRST_LEVEL_BLOCK_SUPPORT ? PTL3_SHIFT : PTL2_SHIFT; + } + + ptep = ptl4_offset(tt, virt); + if (ptl4_null(ptep)) { + if (pgshift > PTL3_SHIFT) { + pgshift = PTL3_SHIFT; + } + goto out; + } + + ptep = ptl3_offset(ptep, virt); + if (ptl3_null(ptep) || ptl3_type_block(ptep)) { + if (pgshift >= PTL3_SHIFT) { + pgshift = PTL3_SHIFT; + } else { + ptep = NULL; + } + goto out; + } + + ptep = ptl2_offset(ptep, virt); + if (ptl2_null(ptep) || ptl2_type_block(ptep)) { + if (pgshift >= PTL2_SHIFT) { + pgshift = PTL2_SHIFT; + } else { + ptep = NULL;; + } + goto out; + } + + ptep = ptl1_offset(ptep, virt); + pgshift = PTL1_SHIFT; + +out: + size = (size_t)1 << pgshift; + base = virt & ~(size - 1); + p2align = pgshift - PAGE_SHIFT; + if (basep) *basep = base; + if (sizep) *sizep = size; + if (p2alignp) *p2alignp = p2align; + + return ptep; +} + +pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift, + void **basep, size_t *sizep, int *p2alignp) +{ + pte_t *ptep; + uintptr_t base; + size_t size; + int p2align; + translation_table_t* tt; + + dkprintf("ihk_mc_pt_lookup_pte(%p,%p,%d)\n", pt, virt, pgshift); + tt = get_translation_table(pt); + ptep = lookup_pte(tt, (uintptr_t)virt, pgshift, &base, &size, &p2align); + if (basep) *basep = (void *)base; + if (sizep) *sizep = size; + if (p2alignp) *p2alignp = p2align; + dkprintf("ihk_mc_pt_lookup_pte(%p,%p,%d): %p %lx %lx %d\n", + pt, virt, pgshift, ptep, base, size, p2align); + return ptep; +} + +struct set_range_args { + page_table_t pt; + uintptr_t phys; + pte_t attr[4]; + int pgshift; + uintptr_t diff; + struct process_vm *vm; + struct vm_range *range; /* To find pages we don't need to call memory_stat_rss_add() */ +}; + +int set_range_middle(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + uintptr_t end, int level); + +int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + uintptr_t end) +{ + struct set_range_args *args = args0; + int error; + uintptr_t phys; + pte_t pte; + + dkprintf("set_range_l1(%lx,%lx,%lx)\n", base, start, end); + + if (!ptl1_null(ptep)) { + error = -EBUSY; + ekprintf("set_range_l1(%lx,%lx,%lx):page exists. %d %lx\n", + base, start, end, error, *ptep); + (void)clear_range(args->pt, args->vm, start, base, KEEP_PHYSICAL, NULL); + goto out; + } + + phys = args->phys + (base - start); + pte = phys | args->attr[0]; + ptl1_set(ptep, pte); + + error = 0; +out: + dkprintf("set_range_l1(%lx,%lx,%lx): %d %lx\n", + base, start, end, error, *ptep); + return error; +} + +int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + uintptr_t end) +{ + return set_range_middle(args0, ptep, base, start, end, 2); +} + +int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + uintptr_t end) +{ + return set_range_middle(args0, ptep, base, start, end, 3); +} + +int set_range_l4(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + uintptr_t end) +{ + return set_range_middle(args0, ptep, base, start, end, 4); +} + +int set_range_middle(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start, + uintptr_t end, int level) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + unsigned long pgsize; + unsigned long pgshift; + } table[] = { + {walk_pte_l1, set_range_l1, PTL2_SIZE, PTL2_SHIFT}, /*PTL2: second*/ + {walk_pte_l2, set_range_l2, PTL3_SIZE, PTL3_SHIFT}, /*PTL3: first*/ + {walk_pte_l3, set_range_l3, PTL4_SIZE, PTL4_SHIFT}, /*PTL4: zero*/ + }; + const struct table tbl = table[level-2]; + + struct set_range_args *args = args0; + int error; + translation_table_t* tt; + translation_table_t* tt_pa = NULL; + + dkprintf("set_range_middle(%lx,%lx,%lx,%d)\n", base, start, end, level); + +retry: + if (ptl_null(ptep, level)) { + pte_t pte; + uintptr_t phys; + if (level == 2 || (level == 3 && FIRST_LEVEL_BLOCK_SUPPORT)) { + if ((start <= base) && ((base + tbl.pgsize) <= end) + && ((args->diff & (tbl.pgsize - 1)) == 0) + && (!args->pgshift + || (args->pgshift == tbl.pgshift))) { + phys = args->phys + (base - start); + ptl_set(ptep, phys | args->attr[level-1], level); + error = 0; + dkprintf("set_range_middle(%lx,%lx,%lx,%d):" + "large page. %d %lx\n", + base, start, end, level, error, *ptep); + goto out; + } + } + + if (!tt_pa) { + tt_pa = __alloc_new_tt(IHK_MC_AP_NOWAIT); + if (tt_pa == NULL) { + error = -ENOMEM; + ekprintf("set_range_middle(%lx,%lx,%lx,%d):" + "__alloc_new_tt failed. %d %lx\n", + base, start, end, level, error, *ptep); + (void)clear_range(args->pt, args->vm, start, base, + KEEP_PHYSICAL, NULL); + goto out; + } + } + + pte = (pte_t)(((unsigned long)(tt_pa) & PT_PHYSMASK) | PFL_PDIR_TBL_ATTR); + pte = atomic_cmpxchg8(ptep, PTE_NULL, pte); + if (pte != PTE_NULL) { + /* failed to set entry */ + goto retry; + } + + tt = (translation_table_t*)phys_to_virt((unsigned long)tt_pa); + tt_pa = NULL; + } + else if (ptl_type_page(ptep, level)) { + error = -EBUSY; + ekprintf("set_range_middle(%lx,%lx,%lx,%d):" + "page exists. %d %lx\n", + base, start, end, level, error, *ptep); + (void)clear_range(args->pt, args->vm, start, base, KEEP_PHYSICAL, NULL); + goto out; + } + else { + tt = (translation_table_t*)phys_to_virt(ptl_phys(ptep, level)); + } + + error = tbl.walk(tt, base, start, end, tbl.callback, args0); + if (error) { + ekprintf("set_range_middle(%lx,%lx,%lx,%d):" + "walk pte failed. %d %lx\n", + base, start, end, level, error, *ptep); + goto out; + } + + error = 0; +out: + if (tt_pa) { + ihk_mc_free_pages(tt_pa, 1); + } + dkprintf("set_range_middle(%lx,%lx,%lx,%d): %d %lx\n", + base, start, end, level, error, *ptep); + return error; +} + +int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start, + void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr, + int pgshift, struct vm_range *range) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + } tables[] = { + {walk_pte_l2, set_range_l2}, /*second*/ + {walk_pte_l3, set_range_l3}, /*first*/ + {walk_pte_l4, set_range_l4}, /*zero*/ + }; + const struct table initial_lookup = tables[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + int error; + struct set_range_args args; + translation_table_t* tt; + + dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%lx,%x,%d,%lx-%lx)\n", + pt, start, end, phys, attr, pgshift, range->start, range->end); + + args.pt = pt; + args.phys = phys; + args.diff = (uintptr_t)start ^ phys; + args.vm = vm; + args.pgshift = pgshift; + args.range = range; + + // conversion + switch (CONFIG_ARM64_PGTABLE_LEVELS) + { + case 4: args.attr[3] = attr_to_l4attr(attr); /*PTL4*/ + case 3: args.attr[2] = attr_to_l3attr(attr); /*PTL3*/ + case 2: args.attr[1] = attr_to_l2attr(attr); /*PTL2*/ + args.attr[0] = attr_to_l1attr(attr); /*PTL1*/ + } + + + tt = get_translation_table(pt); + error = initial_lookup.walk(tt, 0, (uintptr_t)start, (uintptr_t)end, + initial_lookup.callback, &args); + if (error) { + ekprintf("ihk_mc_pt_set_range(%p,%p,%p,%p,%lx,%x):" + "walk_pte failed. %d\n", + pt, vm, start, end, phys, attr, error); + goto out; + } + + error = 0; +out: + dkprintf("ihk_mc_pt_set_range(%p,%p,%p,%p,%lx,%x): %d\n", + pt, vm, start, end, phys, attr, error); + return error; +} + +int ihk_mc_pt_set_pte(page_table_t pt, pte_t *ptep, size_t pgsize, + uintptr_t phys, enum ihk_mc_pt_attribute attr) +{ + int error; + pte_t pte; + + dkprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x)\n", + pt, ptep, pgsize, phys, attr); + + if (pgsize == PTL1_SIZE) { + pte = phys | attr_to_l1attr(attr); + ptl1_set(ptep, pte); + } + else if (pgsize == PTL2_SIZE) { + if (phys & (PTL2_SIZE - 1)) { + kprintf("%s: error: phys needs to be PTL2_SIZE aligned\n", __FUNCTION__); + error = -1; + goto out; + } + pte = phys | attr_to_l2attr(attr | PTATTR_LARGEPAGE); + ptl2_set(ptep, pte); + } + else if (pgsize == PTL3_SIZE && FIRST_LEVEL_BLOCK_SUPPORT) { + if (phys & (PTL3_SIZE - 1)) { + kprintf("%s: error: phys needs to be PTL3_SIZE aligned\n", __FUNCTION__); + error = -1; + goto out; + } + pte = phys | attr_to_l3attr(attr | PTATTR_LARGEPAGE); + ptl3_set(ptep, pte); + } + else { + error = -EINVAL; + ekprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x):" + "page size. %d %lx\n", + pt, ptep, pgsize, phys, attr, error, *ptep); + panic("ihk_mc_pt_set_pte:page size"); + goto out; + } + + error = 0; +out: + dkprintf("ihk_mc_pt_set_pte(%p,%p,%lx,%lx,%x): %d %lx\n", + pt, ptep, pgsize, phys, attr, error, *ptep); + return error; +} + +int ihk_mc_pt_split(page_table_t pt, struct process_vm *vm, void *addr) +{ + int error; + pte_t *ptep; + void *pgaddr; + size_t pgsize; + intptr_t phys; + struct page *page; + + int level; + +retry: + ptep = ihk_mc_pt_lookup_pte(pt, addr, 0, &pgaddr, &pgsize, NULL); + switch (pgsize) { + case __PTL4_SIZE: + level = 4; + break; + case __PTL3_SIZE: + level = 3; + break; + case __PTL2_SIZE: + level = 2; + break; + case __PTL1_SIZE: + level = 1; + break; + default: + ekprintf("ihk_mc_pt_split:invalid pgsize %#lx\n", pgsize); + return -EINVAL; + } + + if (ptep && !ptl_null(ptep, level) && (pgaddr != addr)) { + page = NULL; + if (!ptl_fileoff(ptep, level)) { + phys = ptl_phys(ptep, level); + page = phys_to_page(phys); + } + if (page && (page_is_in_memobj(page) + || page_is_multi_mapped(page))) { + error = -EINVAL; + kprintf("ihk_mc_pt_split:NYI:page break down\n"); + goto out; + } + + error = split_large_page(ptep, pgsize); + if (error) { + kprintf("ihk_mc_pt_split:split_large_page failed. %d\n", error); + goto out; + } + arch_flush_tlb_single(get_address_space_id(vm->address_space->page_table), + (uint64_t)pgaddr); + goto retry; + } + + error = 0; +out: + return error; +} /* ihk_mc_pt_split() */ + +int arch_get_smaller_page_size(void *args, size_t cursize, size_t *newsizep, + int *p2alignp) +{ + size_t newsize; + int p2align; + int error; + + if (0) { + /* dummy */ + panic("not reached"); + } + else if ((cursize > PTL3_SIZE) && CONFIG_ARM64_PGTABLE_LEVELS > 2) { + newsize = PTL3_SIZE; + p2align = PTL3_SHIFT - PTL1_SHIFT; + } + else if (cursize > PTL2_SIZE) { + newsize = PTL2_SIZE; + p2align = PTL2_SHIFT - PTL1_SHIFT; + } + else if (cursize > PTL1_SIZE) { + newsize = PTL1_SIZE; + p2align = PTL1_SHIFT - PTL1_SHIFT; + } + else { + error = -ENOMEM; + newsize = 0; + p2align = -1; + goto out; + } + + error = 0; + if (newsizep) *newsizep = newsize; + if (p2alignp) *p2alignp = p2align; + +out: + dkprintf("arch_get_smaller_page_size(%p,%lx): %d %lx %d\n", + args, cursize, error, newsize, p2align); + return error; +} + +enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep) +{ + enum ihk_mc_pt_attribute attr; + + attr = common_vrflag_to_ptattr(flag, fault, ptep); + + if ((fault & PF_PROT) + || ((fault & PF_POPULATE) && (flag & VR_PRIVATE)) + || ((fault & PF_WRITE) && !(flag & VR_PRIVATE))) { + attr |= PTATTR_DIRTY; + } + return attr; +} + +struct move_args { + uintptr_t src; + uintptr_t dest; + struct process_vm *vm; + struct vm_range *range; +}; + +static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, + void *pgaddr, int pgshift) +{ + int error; + struct move_args *args = arg0; + const size_t pgsize = (size_t)1 << pgshift; + uintptr_t dest; + pte_t apte; + uintptr_t phys; + enum ihk_mc_pt_attribute attr; + + dkprintf("move_one_page(%p,%p,%p %#lx,%p,%d)\n", + arg0, pt, ptep, *ptep, pgaddr, pgshift); + if (pte_is_fileoff(ptep, pgsize)) { + error = -ENOTSUPP; + kprintf("move_one_page(%p,%p,%p %#lx,%p,%d):fileoff. %d\n", + arg0, pt, ptep, *ptep, pgaddr, pgshift, error); + goto out; + } + + dest = args->dest + ((uintptr_t)pgaddr - args->src); + + apte = PTE_NULL; + pte_xchg(ptep, &apte); + + phys = apte & PT_PHYSMASK; + attr = apte & ~PT_PHYSMASK; + + error = ihk_mc_pt_set_range(pt, args->vm, (void *)dest, + (void *)(dest + pgsize), phys, attr, pgshift, args->range); + if (error) { + kprintf("move_one_page(%p,%p,%p %#lx,%p,%d):" + "set failed. %d\n", + arg0, pt, ptep, *ptep, pgaddr, pgshift, error); + goto out; + } + + error = 0; +out: + dkprintf("move_one_page(%p,%p,%p %#lx,%p,%d):%d\n", + arg0, pt, ptep, *ptep, pgaddr, pgshift, error); + return error; +} + +int move_pte_range(page_table_t pt, struct process_vm *vm, + void *src, void *dest, size_t size, struct vm_range *range) +{ + int error; + struct move_args args; + + dkprintf("move_pte_range(%p,%p,%p,%#lx)\n", pt, src, dest, size); + args.src = (uintptr_t)src; + args.dest = (uintptr_t)dest; + args.vm = vm; + args.range = range; + + error = visit_pte_range(pt, src, src+size, 0, VPTEF_SKIP_NULL, + &move_one_page, &args); + flush_tlb(); /* XXX: TLB flush */ + if (error) { + goto out; + } + + error = 0; +out: + dkprintf("move_pte_range(%p,%p,%p,%#lx):%d\n", + pt, src, dest, size, error); + return error; +} + +void load_page_table(struct page_table *pt) +{ + if (pt == NULL) { + // load page table for idle(EL1) process. + return; + } + // load page table for user(EL0) thread. + switch_mm(pt); + return; +} + +void ihk_mc_load_page_table(struct page_table *pt) +{ + load_page_table(pt); +} + +struct page_table *get_init_page_table(void) +{ + return init_pt; +} + +static unsigned long fixed_virt; +static void init_fixed_area(struct page_table *pt) +{ + fixed_virt = MAP_FIXED_START; + + return; +} + +void init_text_area(struct page_table *pt) +{ + /* head.Sで初期化済み */ + unsigned long __end; + int nlpages; + + __end = ((unsigned long)_end + LARGE_PAGE_SIZE * 2 - 1) + & LARGE_PAGE_MASK; + nlpages = (__end - MAP_KERNEL_START) >> LARGE_PAGE_SHIFT; + + kprintf("TEXT: # of large pages = %d\n", nlpages); + kprintf("TEXT: Base address = %lx\n", arm64_kernel_phys_base); +} + +void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable) +{ + struct page_table* pt; + unsigned long poffset, paligned; + int i, npages; + void *v = (void *)fixed_virt; + enum ihk_mc_pt_attribute attr; + + poffset = phys & (PAGE_SIZE - 1); + paligned = phys & PAGE_MASK; + npages = (poffset + size + PAGE_SIZE - 1) >> PAGE_SHIFT; + + attr = PTATTR_WRITABLE | PTATTR_ACTIVE; +#if 0 /* In the case of LAPIC MMIO, something will happen */ + attr |= PTATTR_NO_EXECUTE; +#endif + if (uncachable) { + attr |= PTATTR_UNCACHABLE; + } + + kprintf("map_fixed: phys: 0x%lx => 0x%lx (%d pages)\n", + paligned, v, npages); + + pt = get_init_page_table(); + for (i = 0; i < npages; i++) { + if(__set_pt_page(pt, (void *)fixed_virt, paligned, attr)){ + return NULL; + } + + fixed_virt += PAGE_SIZE; + paligned += PAGE_SIZE; + } + + flush_tlb(); + + return (char *)v + poffset; +} + +void init_low_area(struct page_table *pt) +{ + set_pt_large_page(pt, 0, 0, PTATTR_NO_EXECUTE|PTATTR_WRITABLE); +} + +void init_page_table(void) +{ + ihk_mc_spinlock_init(&init_pt_lock); + + /* Normal memory area */ + init_normal_area(init_pt); + init_fixed_area(init_pt); + init_low_area(init_pt); + init_text_area(init_pt); + + /* virt to phys */ + kprintf("Page table is now at %p\n", init_pt); +} + +extern void __reserve_arch_pages(unsigned long, unsigned long, + void (*)(struct ihk_page_allocator_desc *, + unsigned long, unsigned long, int)); + +void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator, + unsigned long start, unsigned long end, + void (*cb)(struct ihk_page_allocator_desc *, + unsigned long, unsigned long, int)) +{ + kprintf("reserve arch pages (%#llx, %#llx, %p)\n", start, end, cb); + /* Reserve Text + temporal heap */ + cb(pa_allocator, virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0); + /* Reserve trampoline area to boot the second ap */ +// cb(pa_allocator, ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0); //TODO:他コア起動時には考慮が必要かも + /* Reserve the null page */ + cb(pa_allocator, 0, PAGE_SIZE, 0); + /* + * Micro-arch specific + * TODO: this does nothing in SMP mode, update it for KNC if necessary + */ + __reserve_arch_pages(start, end, cb); +} + +unsigned long virt_to_phys(void *v) +{ + unsigned long va = (unsigned long)v; + + if (MAP_KERNEL_START <= va) { + return va - MAP_KERNEL_START + arm64_kernel_phys_base; + } + return va - MAP_ST_START; +} + +void *phys_to_virt(unsigned long p) +{ + return (void *)(p | MAP_ST_START); +} + +int copy_from_user(void *dst, const void *src, size_t siz) +{ + struct process_vm *vm = cpu_local_var(current)->vm; + return read_process_vm(vm, dst, src, siz); +} + +int strlen_user(const char *s) +{ + struct process_vm *vm = cpu_local_var(current)->vm; + unsigned long pgstart; + int maxlen; + int error = 0; + const uint64_t reason = PF_USER; /* page not present */ + const char *head = s; + + maxlen = PAGE_SIZE - (((unsigned long)s) & (PAGE_SIZE - 1)); + pgstart = ((unsigned long)s) & PAGE_MASK; + + if (!pgstart || pgstart >= MAP_KERNEL_START) { + return -EFAULT; + } + + for (;;) { + error = page_fault_process_vm(vm, (void *)pgstart, reason); + if (error) { + return error; + } + + while (*s && maxlen > 0) { + s++; + maxlen--; + } + + if (!*s) { + break; + } + maxlen = PAGE_SIZE; + pgstart += PAGE_SIZE; + } + return s - head; +} + +int strcpy_from_user(char *dst, const char *src) +{ + struct process_vm *vm = cpu_local_var(current)->vm; + unsigned long pgstart; + int maxlen; + int error = 0; + const uint64_t reason = PF_USER; /* page not present */ + + maxlen = PAGE_SIZE - (((unsigned long)src) & (PAGE_SIZE - 1)); + pgstart = ((unsigned long)src) & PAGE_MASK; + + if (!pgstart || pgstart >= MAP_KERNEL_START) { + return -EFAULT; + } + + for (;;) { + error = page_fault_process_vm(vm, (void *)pgstart, reason); + if (error) { + return error; + } + + while (*src && maxlen > 0) { + *(dst++) = *(src++); + maxlen--; + } + + if (!*src) { + *dst = '\0'; + break; + } + maxlen = PAGE_SIZE; + pgstart += PAGE_SIZE; + } + return error; +} + +long getlong_user(long *dest, const long *p) +{ + int error; + + error = copy_from_user(dest, p, sizeof(long)); + if (error) { + return error; + } + + return 0; +} + +int getint_user(int *dest, const int *p) +{ + int error; + + error = copy_from_user(dest, p, sizeof(int)); + if (error) { + return error; + } + + return 0; +} + +int verify_process_vm(struct process_vm *vm, + const void *usrc, size_t size) +{ + const uintptr_t ustart = (uintptr_t)usrc; + const uintptr_t uend = ustart + size; + uint64_t reason; + uintptr_t addr; + int error = 0; + + if ((ustart < vm->region.user_start) + || (vm->region.user_end <= ustart) + || ((vm->region.user_end - ustart) < size)) { + kprintf("%s: error: out of user range\n", __FUNCTION__); + return -EFAULT; + } + + reason = PF_USER; /* page not present */ + for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) { + if (!addr) + return -EINVAL; + + error = page_fault_process_vm(vm, (void *)addr, reason); + if (error) { + kprintf("%s: error: PF for %p failed\n", __FUNCTION__, addr); + return error; + } + } + + return error; +} + +int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz) +{ + const uintptr_t ustart = (uintptr_t)usrc; + const uintptr_t uend = ustart + siz; + uint64_t reason; + uintptr_t addr; + int error; + const void *from; + void *to; + size_t remain; + size_t cpsize; + unsigned long pa; + void *va; + + if ((ustart < vm->region.user_start) + || (vm->region.user_end <= ustart) + || ((vm->region.user_end - ustart) < siz)) { + kprintf("%s: error: out of user range\n", __FUNCTION__); + return -EFAULT; + } + + reason = PF_USER; /* page not present */ + for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) { + error = page_fault_process_vm(vm, (void *)addr, reason); + if (error) { + kprintf("%s: error: PF for %p failed\n", __FUNCTION__, addr); + return error; + } + } + + from = usrc; + to = kdst; + remain = siz; + while (remain > 0) { + cpsize = PAGE_SIZE - ((uintptr_t)from & (PAGE_SIZE - 1)); + if (cpsize > remain) { + cpsize = remain; + } + + error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa); + if (error) { + kprintf("%s: error: resolving physical address or %p\n", __FUNCTION__, from); + return error; + } + +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else + if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || + pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p," + "cpsize: %d\n", __FUNCTION__, to, pa, cpsize); + va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); + memcpy(to, va, cpsize); + ihk_mc_unmap_virtual(va, 1, 1); + } + else { + va = phys_to_virt(pa); + memcpy(to, va, cpsize); + } + + from += cpsize; + to += cpsize; + remain -= cpsize; + } + + return 0; +} /* read_process_vm() */ + +int copy_to_user(void *dst, const void *src, size_t siz) +{ + struct process_vm *vm = cpu_local_var(current)->vm; + return write_process_vm(vm, dst, src, siz); +} + +int setlong_user(long *dst, long data) +{ + return copy_to_user(dst, &data, sizeof(data)); +} + +int setint_user(int *dst, int data) +{ + return copy_to_user(dst, &data, sizeof(data)); +} + +int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz) +{ + const uintptr_t ustart = (uintptr_t)udst; + const uintptr_t uend = ustart + siz; + uint64_t reason; + uintptr_t addr; + int error; + const void *from; + void *to; + size_t remain; + size_t cpsize; + unsigned long pa; + void *va; + + if ((ustart < vm->region.user_start) + || (vm->region.user_end <= ustart) + || ((vm->region.user_end - ustart) < siz)) { + return -EFAULT; + } + + reason = PF_POPULATE | PF_WRITE | PF_USER; + for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) { + error = page_fault_process_vm(vm, (void *)addr, reason); + if (error) { + return error; + } + } + + from = ksrc; + to = udst; + remain = siz; + while (remain > 0) { + cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1)); + if (cpsize > remain) { + cpsize = remain; + } + + error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa); + if (error) { + return error; + } + +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else + if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || + pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + dkprintf("%s: pa is outside of LWK memory, from: %p," + "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); + va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE); + memcpy(va, from, cpsize); + ihk_mc_unmap_virtual(va, 1, 1); + } + else { + va = phys_to_virt(pa); + memcpy(va, from, cpsize); + } + + from += cpsize; + to += cpsize; + remain -= cpsize; + } + + return 0; +} /* write_process_vm() */ + +int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz) +{ + const uintptr_t ustart = (uintptr_t)udst; + const uintptr_t uend = ustart + siz; + uint64_t reason; + uintptr_t addr; + int error; + const void *from; + void *to; + size_t remain; + size_t cpsize; + unsigned long pa; + void *va; + + dkprintf("patch_process_vm(%p,%p,%p,%lx)\n", vm, udst, ksrc, siz); + if ((ustart < vm->region.user_start) + || (vm->region.user_end <= ustart) + || ((vm->region.user_end - ustart) < siz)) { + ekprintf("patch_process_vm(%p,%p,%p,%lx):not in user\n", vm, udst, ksrc, siz); + return -EFAULT; + } + + reason = PF_PATCH | PF_POPULATE | PF_WRITE | PF_USER; + for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) { + error = page_fault_process_vm(vm, (void *)addr, reason); + if (error) { + ekprintf("patch_process_vm(%p,%p,%p,%lx):pf(%lx):%d\n", vm, udst, ksrc, siz, addr, error); + return error; + } + } + + from = ksrc; + to = udst; + remain = siz; + while (remain > 0) { + cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1)); + if (cpsize > remain) { + cpsize = remain; + } + + error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa); + if (error) { + ekprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error); + return error; + } + +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else + if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || + pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ + dkprintf("%s: pa is outside of LWK memory, from: %p," + "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); + va = ihk_mc_map_virtual(pa, 1, PTATTR_WRITABLE|PTATTR_ACTIVE); + memcpy(va, from, cpsize); + ihk_mc_unmap_virtual(va, 1, 1); + } + else { + va = phys_to_virt(pa); + memcpy(va, from, cpsize); + } + + from += cpsize; + to += cpsize; + remain -= cpsize; + } + + dkprintf("patch_process_vm(%p,%p,%p,%lx):%d\n", vm, udst, ksrc, siz, 0); + return 0; +} /* patch_process_vm() */ + +void set_address_space_id(struct page_table *pt, int asid) +{ + pt->asid = asid; +} + +int get_address_space_id(const struct page_table *pt) +{ + return pt->asid; +} + +void set_translation_table(struct page_table *pt, translation_table_t* tt) +{ + translation_table_t* tt_pa = (void*)virt_to_phys(tt); + pt->tt = tt; + pt->tt_pa = tt_pa; +} + +translation_table_t* get_translation_table(const struct page_table *pt) +{ + return pt->tt; +} + +translation_table_t* get_translation_table_as_paddr(const struct page_table *pt) +{ + return pt->tt_pa; +} + +#ifdef POSTK_DEBUG_ARCH_DEP_8 +void remote_flush_tlb_cpumask(struct process_vm *vm, + unsigned long addr, int cpu_id) +{ + unsigned long cpu; + cpu_set_t _cpu_set; + int flush_ind; + + if (addr) { + flush_ind = (addr >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; + } + /* Zero address denotes full TLB flush */ + else { + /* Random.. */ + flush_ind = (rdtsc()) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; + } + + /* Take a copy of the cpu set so that we don't hold the lock + * all the way while interrupting other cores */ + ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock); + memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t)); + ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock); + + /* Loop through CPUs in this address space and interrupt them for + * TLB flush on the specified address */ + for_each_set_bit(cpu, (const unsigned long*)&_cpu_set.__bits, CPU_SETSIZE) { + if (ihk_mc_get_processor_id() == cpu) + continue; + + dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n", + flush_ind, addr, cpu); + + ihk_mc_interrupt_cpu(cpu, + ihk_mc_get_vector(flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START)); + } +} +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ + diff --git a/arch/arm64/kernel/memset.S b/arch/arm64/kernel/memset.S new file mode 100644 index 00000000..5785ab14 --- /dev/null +++ b/arch/arm64/kernel/memset.S @@ -0,0 +1,220 @@ +/* memset.S COPYRIGHT FUJITSU LIMITED 2017 */ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +/* + * Fill in the buffer with character c (alignment handled by the hardware) + * + * Parameters: + * x0 - buf + * x1 - c + * x2 - n + * Returns: + * x0 - buf + */ + +dstin .req x0 +val .req w1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +zva_len_x .req x5 +zva_len .req w5 +zva_bits_x .req x6 + +A_l .req x7 +A_lw .req w7 +dst .req x8 +tmp3w .req w9 +tmp3 .req x9 + + .weak memset +ENTRY(____inline_memset) +ENTRY(__inline_memset) + mov dst, dstin /* Preserve return value. */ + and A_lw, val, #255 + orr A_lw, A_lw, A_lw, lsl #8 + orr A_lw, A_lw, A_lw, lsl #16 + orr A_l, A_l, A_l, lsl #32 + + cmp count, #15 + b.hi .Lover16_proc + /*All store maybe are non-aligned..*/ + tbz count, #3, 1f + str A_l, [dst], #8 +1: + tbz count, #2, 2f + str A_lw, [dst], #4 +2: + tbz count, #1, 3f + strh A_lw, [dst], #2 +3: + tbz count, #0, 4f + strb A_lw, [dst] +4: + ret + +.Lover16_proc: + /*Whether the start address is aligned with 16.*/ + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq .Laligned +/* +* The count is not less than 16, we can use stp to store the start 16 bytes, +* then adjust the dst aligned with 16.This process will make the current +* memory address at alignment boundary. +*/ + stp A_l, A_l, [dst] /*non-aligned store..*/ + /*make the dst aligned..*/ + sub count, count, tmp2 + add dst, dst, tmp2 + +.Laligned: + cbz A_l, .Lzero_mem + +.Ltail_maybe_long: + cmp count, #64 + b.ge .Lnot_short +.Ltail63: + ands tmp1, count, #0x30 + b.eq 3f + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + stp A_l, A_l, [dst], #16 +1: + stp A_l, A_l, [dst], #16 +2: + stp A_l, A_l, [dst], #16 +/* +* The last store length is less than 16,use stp to write last 16 bytes. +* It will lead some bytes written twice and the access is non-aligned. +*/ +3: + ands count, count, #15 + cbz count, 4f + add dst, dst, count + stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ +4: + ret + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line, this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lnot_short: + sub dst, dst, #16/* Pre-bias. */ + sub count, count, #64 +1: + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + stp A_l, A_l, [dst, #48] + stp A_l, A_l, [dst, #64]! + subs count, count, #64 + b.ge 1b + tst count, #0x3f + add dst, dst, #16 + b.ne .Ltail63 +.Lexitfunc: + ret + + /* + * For zeroing memory, check to see if we can use the ZVA feature to + * zero entire 'cache' lines. + */ +.Lzero_mem: + cmp count, #63 + b.le .Ltail63 + /* + * For zeroing small amounts of memory, it's not worth setting up + * the line-clear code. + */ + cmp count, #128 + b.lt .Lnot_short /*count is at least 128 bytes*/ + + mrs tmp1, dczid_el0 + tbnz tmp1, #4, .Lnot_short + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len + + ands tmp3w, zva_len, #63 + /* + * ensure the zva_len is not less than 64. + * It is not meaningful to use ZVA if the block size is less than 64. + */ + b.ne .Lnot_short +.Lzero_by_line: + /* + * Compute how far we need to go to become suitably aligned. We're + * already at quad-word alignment. + */ + cmp count, zva_len_x + b.lt .Lnot_short /* Not enough to reach alignment. */ + sub zva_bits_x, zva_len_x, #1 + neg tmp2, dst + ands tmp2, tmp2, zva_bits_x + b.eq 2f /* Already aligned. */ + /* Not aligned, check that there's enough to copy after alignment.*/ + sub tmp1, count, tmp2 + /* + * grantee the remain length to be ZVA is bigger than 64, + * avoid to make the 2f's process over mem range.*/ + cmp tmp1, #64 + ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ + b.lt .Lnot_short + /* + * We know that there's at least 64 bytes to zero and that it's safe + * to overrun by 64 bytes. + */ + mov count, tmp1 +1: + stp A_l, A_l, [dst] + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + subs tmp2, tmp2, #64 + stp A_l, A_l, [dst, #48] + add dst, dst, #64 + b.ge 1b + /* We've overrun a bit, so adjust dst downwards.*/ + add dst, dst, tmp2 +2: + sub count, count, zva_len_x +3: + dc zva, dst + add dst, dst, zva_len_x + subs count, count, zva_len_x + b.ge 3b + ands count, count, zva_bits_x + b.ne .Ltail_maybe_long + ret +ENDPIPROC(__inline_memset) +ENDPROC(____inline_memset) diff --git a/arch/arm64/kernel/mikc.c b/arch/arm64/kernel/mikc.c new file mode 100644 index 00000000..2d61af01 --- /dev/null +++ b/arch/arm64/kernel/mikc.c @@ -0,0 +1,44 @@ +/* mikc.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#include +#include +#include +#include +#include + +extern int num_processors; +extern void arch_set_mikc_queue(void *r, void *w); +ihk_ikc_ph_t arch_master_channel_packet_handler; + +int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel, + ihk_ikc_ph_t packet_handler) +{ + struct ihk_ikc_queue_head *rq, *wq; + size_t mikc_queue_pages; + + ihk_ikc_system_init(NULL); + + memset(channel, 0, sizeof(struct ihk_ikc_channel_desc)); + + mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE) + + (PAGE_SIZE - 1)) / PAGE_SIZE; + + /* Place both sides in this side */ + rq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL); + wq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL); + + ihk_ikc_init_queue(rq, 0, 0, + mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE); + ihk_ikc_init_queue(wq, 0, 0, + mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE); + + arch_master_channel_packet_handler = packet_handler; + + ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq, + ihk_ikc_master_channel_packet_handler, channel); + ihk_ikc_enable_channel(channel); + + /* Set boot parameter */ + arch_set_mikc_queue(rq, wq); + + return 0; +} diff --git a/arch/arm64/kernel/perfctr.c b/arch/arm64/kernel/perfctr.c new file mode 100644 index 00000000..5904e416 --- /dev/null +++ b/arch/arm64/kernel/perfctr.c @@ -0,0 +1,156 @@ +/* perfctr.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#include +#include +#include +#include +#include +#include +#include + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * Set at runtime when we know what CPU type we are. + */ +struct arm_pmu cpu_pmu; +extern int ihk_param_pmu_irq_affiniry[CONFIG_SMP_MAX_CORES]; +extern int ihk_param_nr_pmu_irq_affiniry; + + +int arm64_init_perfctr(void) +{ + int ret; + int i; + + memset(&cpu_pmu, 0, sizeof(cpu_pmu)); + ret = armv8pmu_init(&cpu_pmu); + if (!ret) { + return ret; + } + for (i = 0; i < ihk_param_nr_pmu_irq_affiniry; i++) { + ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affiniry[i], cpu_pmu.handler); + } + return ret; +} + +int arm64_enable_pmu(void) +{ + int ret; + if (cpu_pmu.reset) { + cpu_pmu.reset(&cpu_pmu); + } + ret = cpu_pmu.enable_pmu(); + return ret; +} + +void arm64_disable_pmu(void) +{ + cpu_pmu.disable_pmu(); +} + +extern unsigned int *arm64_march_perfmap; + +static int __ihk_mc_perfctr_init(int counter, uint32_t type, uint64_t config, int mode) +{ + int ret; + unsigned long config_base = 0; + int mapping; + + mapping = cpu_pmu.map_event(type, config); + if (mapping < 0) { + return mapping; + } + + ret = cpu_pmu.disable_counter(counter); + if (!ret) { + return ret; + } + + ret = cpu_pmu.enable_intens(counter); + if (!ret) { + return ret; + } + + ret = cpu_pmu.set_event_filter(&config_base, mode); + if (!ret) { + return ret; + } + config_base |= (unsigned long)mapping; + cpu_pmu.write_evtype(counter, config_base); + return ret; +} + +int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode) +{ + int ret; + ret = __ihk_mc_perfctr_init(counter, PERF_TYPE_RAW, config, mode); + return ret; +} + +int ihk_mc_perfctr_init(int counter, uint64_t config, int mode) +{ + int ret; + ret = __ihk_mc_perfctr_init(counter, PERF_TYPE_RAW, config, mode); + return ret; +} + +int ihk_mc_perfctr_start(int counter) +{ + int ret; + ret = cpu_pmu.enable_counter(counter); + return ret; +} + +int ihk_mc_perfctr_stop(int counter) +{ + cpu_pmu.disable_counter(counter); + + // ihk_mc_perfctr_startが呼ばれるときには、 + // init系関数が呼ばれるのでdisableにする。 + cpu_pmu.disable_intens(counter); + return 0; +} + +int ihk_mc_perfctr_reset(int counter) +{ + // TODO[PMU]: ihk_mc_perfctr_setと同様にサンプリングレートの共通部実装の扱いを見てから本実装。 + cpu_pmu.write_counter(counter, 0); + return 0; +} + +//int ihk_mc_perfctr_set(int counter, unsigned long val) +int ihk_mc_perfctr_set(int counter, long val) /* 0416_patchtemp */ +{ + // TODO[PMU]: 共通部でサンプリングレートの計算をして、設定するカウンタ値をvalに渡してくるようになると想定。サンプリングレートの扱いを見てから本実装。 + uint32_t v = val; + cpu_pmu.write_counter(counter, v); + return 0; +} + +int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value) +{ + /* this function not used yet. */ + panic("not implemented."); + return 0; +} + +unsigned long ihk_mc_perfctr_read(int counter) +{ + unsigned long count; + count = cpu_pmu.read_counter(counter); + return count; +} + +//int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status) +int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status) /* 0416_patchtemp */ +{ + int ret; + ret = cpu_pmu.get_event_idx(cpu_pmu.num_events, pmc_status); + return ret; +} + +/* 0416_patchtemp */ +/* ihk_mc_perfctr_fixed_init() stub added. */ +int ihk_mc_perfctr_fixed_init(int counter, int mode) +{ + return -1; +} diff --git a/arch/arm64/kernel/perfctr_armv8pmu.c b/arch/arm64/kernel/perfctr_armv8pmu.c new file mode 100644 index 00000000..2aea7343 --- /dev/null +++ b/arch/arm64/kernel/perfctr_armv8pmu.c @@ -0,0 +1,653 @@ +/* perfctr_armv8pmu.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#include +#include +#include +#include +#include + +#define BIT(nr) (1UL << (nr)) + +//#define DEBUG_PRINT_PMU +#ifdef DEBUG_PRINT_PMU +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * Perf Events' indices + */ +#define ARMV8_IDX_CYCLE_COUNTER 0 +#define ARMV8_IDX_COUNTER0 1 +#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + get_cpu_pmu()->num_events - 1) + +#define ARMV8_MAX_COUNTERS 32 +#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) + +/* + * ARMv8 low level PMU access + */ + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * Perf Event to low level counters mapping + */ +#define ARMV8_IDX_TO_COUNTER(x) \ + (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ +#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ +#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ +#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ +#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ +#define ARMV8_PMCR_N_MASK 0x1f +#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * PMOVSR: counters overflow flag status reg + */ +#define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * PMXEVTYPER: Event selection reg + */ +#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * Event filters for PMUv3 + */ +#define ARMV8_EXCLUDE_EL1 (1 << 31) +#define ARMV8_EXCLUDE_EL0 (1 << 30) +#define ARMV8_INCLUDE_EL2 (1 << 27) + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * ARMv8 PMUv3 Performance Events handling code. + * Common event types. + */ +enum armv8_pmuv3_perf_types { + /* Required events. */ + ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, + ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, + ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, + + /* At least one of the following is required. */ + ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, + ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, + + /* Common architectural events. */ + ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, + ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, + ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, + ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, + ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, + ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, + ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, + ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, + + /* Common microarchitectural events. */ + ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, + ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, + ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, + ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, + ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, + ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, + ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, + ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, + ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, + ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, + ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, + ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, +}; + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +#define HW_OP_UNSUPPORTED 0xFFFF + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +#define C(_x) \ + PERF_COUNT_HW_CACHE_##_x + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +#define CACHE_OP_UNSUPPORTED 0xFFFF + +/* + * @ref.impl arch/arm64/kernel/perf_event.c + * PMUv3 HW events mapping. + */ +static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED, /* TODO[PMU]: PERF_COUNT_HW_REF_CPU_CYCLESはCentOSに無かったので確認.*/ +}; + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + uint64_t config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int +armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], uint64_t config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int +armpmu_map_raw_event(uint32_t raw_event_mask, uint64_t config) +{ + return (int)(config & raw_event_mask); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int map_cpu_event(uint32_t type, uint64_t config, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + uint32_t raw_event_mask) +{ + switch (type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + return -ENOENT; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_counter_valid(int idx) +{ + return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline uint32_t armv8pmu_getreset_flags(void) +{ + uint32_t value; + + /* Read */ + asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); + + /* Write to clear flags */ + value &= ARMV8_OVSR_MASK; + asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); + + return value; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_has_overflowed(uint32_t pmovsr) +{ + return pmovsr & ARMV8_OVERFLOWED_MASK; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_counter_has_overflowed(uint32_t pmnc, int idx) +{ + int ret = 0; + uint32_t counter; + + if (!armv8pmu_counter_valid(idx)) { + ekprintf("CPU%u checking wrong counter %d overflow status\n", + ihk_mc_get_processor_id(), idx); + } else { + counter = ARMV8_IDX_TO_COUNTER(idx); + ret = pmnc & BIT(counter); + } + + return ret; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int armv8_pmuv3_map_event(uint32_t type, uint64_t config) +{ + return map_cpu_event(type, config, &armv8_pmuv3_perf_map, + &armv8_pmuv3_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline uint32_t armv8pmu_pmcr_read(void) +{ + uint32_t val; + asm volatile("mrs %0, pmcr_el0" : "=r" (val)); + return val; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline void armv8pmu_pmcr_write(uint32_t val) +{ + val &= ARMV8_PMCR_MASK; + isb(); + asm volatile("msr pmcr_el0, %0" :: "r" (val)); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_select_counter(int idx) +{ + uint32_t counter; + + if (!armv8pmu_counter_valid(idx)) { + ekprintf("CPU%u selecting wrong PMNC counter %d\n", + ihk_mc_get_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmselr_el0, %0" :: "r" (counter)); + isb(); + + return idx; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline uint32_t armv8pmu_read_counter(int idx) +{ + uint32_t value = 0; + + if (!armv8pmu_counter_valid(idx)) + ekprintf("CPU%u reading wrong counter %d\n", + ihk_mc_get_processor_id(), idx); + else if (idx == ARMV8_IDX_CYCLE_COUNTER) + asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); + else if (armv8pmu_select_counter(idx) == idx) + asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); + + return value; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline void armv8pmu_write_counter(int idx, uint32_t value) +{ + if (!armv8pmu_counter_valid(idx)) + ekprintf("CPU%u writing wrong counter %d\n", + ihk_mc_get_processor_id(), idx); + else if (idx == ARMV8_IDX_CYCLE_COUNTER) + asm volatile("msr pmccntr_el0, %0" :: "r" (value)); + else if (armv8pmu_select_counter(idx) == idx) + asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_enable_intens(int idx) +{ + uint32_t counter; + + if (!armv8pmu_counter_valid(idx)) { + ekprintf("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + ihk_mc_get_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); + return idx; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_disable_intens(int idx) +{ + uint32_t counter; + + if (!armv8pmu_counter_valid(idx)) { + ekprintf("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + ihk_mc_get_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); + isb(); + return idx; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int armv8pmu_set_event_filter(unsigned long* config_base, int mode) +{ + if (!(mode & PERFCTR_USER_MODE)) { + *config_base |= ARMV8_EXCLUDE_EL0; + } + + if (!(mode & PERFCTR_KERNEL_MODE)) { + *config_base |= ARMV8_EXCLUDE_EL1; + } + + if (0) { + /* 共通部がexclude_hvを無視してくるので常にexcludeとする。 */ + *config_base |= ARMV8_INCLUDE_EL2; + } + return 0; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline void armv8pmu_write_evtype(int idx, uint32_t val) +{ + if (armv8pmu_select_counter(idx) == idx) { + val &= ARMV8_EVTYPE_MASK; + asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); + } +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_enable_counter(int idx) +{ + uint32_t counter; + + if (!armv8pmu_counter_valid(idx)) { + ekprintf("CPU%u enabling wrong PMNC counter %d\n", + ihk_mc_get_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); + return idx; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static inline int armv8pmu_disable_counter(int idx) +{ + uint32_t counter; + + if (!armv8pmu_counter_valid(idx)) { + ekprintf("CPU%u disabling wrong PMNC counter %d\n", + ihk_mc_get_processor_id(), idx); + return -EINVAL; + } + + counter = ARMV8_IDX_TO_COUNTER(idx); + asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); + return idx; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int armv8pmu_start(void) +{ + /* Enable user-mode access to counters. */ + asm volatile("msr pmuserenr_el0, %0" :: "r"(1)); + + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); + return 0; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static void armv8pmu_stop(void) +{ + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); + + /* Disable user-mode access to counters. */ + asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static void armv8pmu_disable_event(int idx) +{ + /* + * Disable counter + */ + armv8pmu_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv8pmu_disable_intens(idx); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static void armv8pmu_reset(void* info) +{ + struct arm_pmu* cpu_pmu = (struct arm_pmu*)info; + uint32_t idx, nb_cnt = cpu_pmu->num_events; + + /* The counter and interrupt enable registers are unknown at reset. */ + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) + armv8pmu_disable_event(idx); + + /* Initialize & Reset PMNC: C and P bits. */ + armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + + /* Disable access from userspace. */ + asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static int armv8pmu_get_event_idx(int num_events, unsigned long used_mask) +{ + int idx; + for (idx = ARMV8_IDX_COUNTER0; idx < num_events; ++idx) { + if (!(used_mask & (1UL << idx))) { + return idx; + } + } + /* The counters are all in use. */ + return -EAGAIN; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static uint32_t armv8pmu_read_num_pmnc_events(void) +{ + uint32_t nb_cnt; + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +/* @ref.impl arch/arm64/kernel/perf_event.c */ +static void armv8pmu_handle_irq(void *priv) +{ + uint32_t pmovsr; + + /* + * Get and reset the IRQ flags + */ + pmovsr = armv8pmu_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv8pmu_has_overflowed(pmovsr)) + return; + + /* + * TODO[PMU]: Handle the counter(s) overflow(s) + */ +} + +static struct ihk_mc_interrupt_handler armv8pmu_handler = { + .func = armv8pmu_handle_irq, + .priv = NULL, +}; + +int armv8pmu_init(struct arm_pmu* cpu_pmu) +{ + cpu_pmu->num_events = armv8pmu_read_num_pmnc_events(); + cpu_pmu->read_counter = armv8pmu_read_counter; + cpu_pmu->write_counter = armv8pmu_write_counter; + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->write_evtype = armv8pmu_write_evtype; + cpu_pmu->enable_intens = armv8pmu_enable_intens; + cpu_pmu->disable_intens = armv8pmu_disable_intens; + cpu_pmu->enable_counter = armv8pmu_enable_counter; + cpu_pmu->disable_counter = armv8pmu_disable_counter; + cpu_pmu->enable_pmu = armv8pmu_start; + cpu_pmu->disable_pmu = armv8pmu_stop; + cpu_pmu->get_event_idx = armv8pmu_get_event_idx; + cpu_pmu->map_event = armv8_pmuv3_map_event; + cpu_pmu->handler = &armv8pmu_handler; + return 0; +} diff --git a/arch/arm64/kernel/postk_print_sysreg.c b/arch/arm64/kernel/postk_print_sysreg.c new file mode 100644 index 00000000..ea03c21e --- /dev/null +++ b/arch/arm64/kernel/postk_print_sysreg.c @@ -0,0 +1,311 @@ +/* postk_print_sysreg.c COPYRIGHT FUJITSU LIMITED 2016 */ +/* + * usage: + * (gdb) call/x postk_debug_sysreg_ttbr1_el1() + * $1 = 0x4e64f000 + */ +#define postk_debug_sysreg(sysreg) __postk_debug_sysreg(sysreg, sysreg) + +#define __postk_debug_sysreg(fname, regname) \ + unsigned long postk_debug_sysreg_ ## fname (void) \ + { \ + unsigned long sysreg; \ + asm volatile( \ + "mrs %0, " # regname "\n" \ + : "=r" (sysreg) \ + : \ + : "memory"); \ + return sysreg; \ + } + +/* + * ARMR Architecture Reference Manual ARMv8, for ARMv8-A architecture profile Errata markup Beta + * - Table J-5 Alphabetical index of AArch64 Registers + */ +postk_debug_sysreg(actlr_el1) +postk_debug_sysreg(actlr_el2) +postk_debug_sysreg(actlr_el3) +postk_debug_sysreg(afsr0_el1) +postk_debug_sysreg(afsr0_el2) +postk_debug_sysreg(afsr0_el3) +postk_debug_sysreg(afsr1_el1) +postk_debug_sysreg(afsr1_el2) +postk_debug_sysreg(afsr1_el3) +postk_debug_sysreg(aidr_el1) +postk_debug_sysreg(amair_el1) +postk_debug_sysreg(amair_el2) +postk_debug_sysreg(amair_el3) +/*postk_debug_sysreg(at s12e0r)*/ +/*postk_debug_sysreg(at s12e0w)*/ +/*postk_debug_sysreg(at s12e1r)*/ +/*postk_debug_sysreg(at s12e1w)*/ +/*postk_debug_sysreg(at s1e0r)*/ +/*postk_debug_sysreg(at s1e0w)*/ +/*postk_debug_sysreg(at s1e1r)*/ +/*postk_debug_sysreg(at s1e1w)*/ +/*postk_debug_sysreg(at s1e2r)*/ +/*postk_debug_sysreg(at s1e2w)*/ +/*postk_debug_sysreg(at s1e3r)*/ +/*postk_debug_sysreg(at s1e3w)*/ +postk_debug_sysreg(ccsidr_el1) +postk_debug_sysreg(clidr_el1) +postk_debug_sysreg(cntfrq_el0) +postk_debug_sysreg(cnthctl_el2) +postk_debug_sysreg(cnthp_ctl_el2) +postk_debug_sysreg(cnthp_cval_el2) +postk_debug_sysreg(cnthp_tval_el2) +postk_debug_sysreg(cntkctl_el1) +postk_debug_sysreg(cntp_ctl_el0) +postk_debug_sysreg(cntp_cval_el0) +postk_debug_sysreg(cntp_tval_el0) +postk_debug_sysreg(cntpct_el0) +postk_debug_sysreg(cntps_ctl_el1) +postk_debug_sysreg(cntps_cval_el1) +postk_debug_sysreg(cntps_tval_el1) +postk_debug_sysreg(cntv_ctl_el0) +postk_debug_sysreg(cntv_cval_el0) +postk_debug_sysreg(cntv_tval_el0) +postk_debug_sysreg(cntvct_el0) +postk_debug_sysreg(cntvoff_el2) +postk_debug_sysreg(contextidr_el1) +postk_debug_sysreg(cpacr_el1) +postk_debug_sysreg(cptr_el2) +postk_debug_sysreg(cptr_el3) +postk_debug_sysreg(csselr_el1) +postk_debug_sysreg(ctr_el0) +postk_debug_sysreg(currentel) +postk_debug_sysreg(dacr32_el2) +postk_debug_sysreg(daif) +postk_debug_sysreg(dbgauthstatus_el1) +/*postk_debug_sysreg(dbgbcr_el1)*/ +/*postk_debug_sysreg(dbgbvr_el1)*/ +postk_debug_sysreg(dbgclaimclr_el1) +postk_debug_sysreg(dbgclaimset_el1) +postk_debug_sysreg(dbgdtr_el0) +postk_debug_sysreg(dbgdtrrx_el0) +postk_debug_sysreg(dbgdtrtx_el0) +postk_debug_sysreg(dbgprcr_el1) +postk_debug_sysreg(dbgvcr32_el2) +/*postk_debug_sysreg(dbgwcr_el1)*/ +/*postk_debug_sysreg(dbgwvr_el1)*/ +/*postk_debug_sysreg(dc cisw)*/ +/*postk_debug_sysreg(dc civac)*/ +/*postk_debug_sysreg(dc csw)*/ +/*postk_debug_sysreg(dc cvac)*/ +/*postk_debug_sysreg(dc cvau)*/ +/*postk_debug_sysreg(dc isw)*/ +/*postk_debug_sysreg(dc ivac)*/ +/*postk_debug_sysreg(dc zva)*/ +postk_debug_sysreg(dczid_el0) +postk_debug_sysreg(dlr_el0) +postk_debug_sysreg(dspsr_el0) +postk_debug_sysreg(elr_el1) +postk_debug_sysreg(elr_el2) +postk_debug_sysreg(elr_el3) +postk_debug_sysreg(esr_el1) +postk_debug_sysreg(esr_el2) +postk_debug_sysreg(esr_el3) +postk_debug_sysreg(far_el1) +postk_debug_sysreg(far_el2) +postk_debug_sysreg(far_el3) +postk_debug_sysreg(fpcr) +postk_debug_sysreg(fpexc32_el2) +postk_debug_sysreg(fpsr) +postk_debug_sysreg(hacr_el2) +postk_debug_sysreg(hcr_el2) +postk_debug_sysreg(hpfar_el2) +postk_debug_sysreg(hstr_el2) +/*postk_debug_sysreg(ic iallu)*/ +/*postk_debug_sysreg(ic ialluis)*/ +/*postk_debug_sysreg(ic ivau)*/ +/*postk_debug_sysreg(icc_ap0r0_el1)*/ +/*postk_debug_sysreg(icc_ap0r1_el1)*/ +/*postk_debug_sysreg(icc_ap0r2_el1)*/ +/*postk_debug_sysreg(icc_ap0r3_el1)*/ +/*postk_debug_sysreg(icc_ap1r0_el1)*/ +/*postk_debug_sysreg(icc_ap1r1_el1)*/ +/*postk_debug_sysreg(icc_ap1r2_el1)*/ +/*postk_debug_sysreg(icc_ap1r3_el1)*/ +/*postk_debug_sysreg(icc_asgi1r_el1)*/ +/*postk_debug_sysreg(icc_bpr0_el1)*/ +/*postk_debug_sysreg(icc_bpr1_el1)*/ +/*postk_debug_sysreg(icc_ctlr_el1)*/ +/*postk_debug_sysreg(icc_ctlr_el3)*/ +/*postk_debug_sysreg(icc_dir_el1)*/ +/*postk_debug_sysreg(icc_eoir0_el1)*/ +/*postk_debug_sysreg(icc_eoir1_el1)*/ +/*postk_debug_sysreg(icc_hppir0_el1)*/ +/*postk_debug_sysreg(icc_hppir1_el1)*/ +/*postk_debug_sysreg(icc_iar0_el1)*/ +/*postk_debug_sysreg(icc_iar1_el1)*/ +/*postk_debug_sysreg(icc_igrpen0_el1)*/ +/*postk_debug_sysreg(icc_igrpen1_el1)*/ +/*postk_debug_sysreg(icc_igrpen1_el3)*/ +/*postk_debug_sysreg(icc_pmr_el1)*/ +/*postk_debug_sysreg(icc_rpr_el1)*/ +/*postk_debug_sysreg(icc_seien_el1)*/ +/*postk_debug_sysreg(icc_sgi0r_el1)*/ +/*postk_debug_sysreg(icc_sgi1r_el1)*/ +/*postk_debug_sysreg(icc_sre_el1)*/ +/*postk_debug_sysreg(icc_sre_el2)*/ +/*postk_debug_sysreg(icc_sre_el3)*/ +/*postk_debug_sysreg(ich_ap0r0_el2)*/ +/*postk_debug_sysreg(ich_ap0r1_el2)*/ +/*postk_debug_sysreg(ich_ap0r2_el2)*/ +/*postk_debug_sysreg(ich_ap0r3_el2)*/ +/*postk_debug_sysreg(ich_ap1r0_el2)*/ +/*postk_debug_sysreg(ich_ap1r1_el2)*/ +/*postk_debug_sysreg(ich_ap1r2_el2)*/ +/*postk_debug_sysreg(ich_ap1r3_el2)*/ +/*postk_debug_sysreg(ich_eisr_el2)*/ +/*postk_debug_sysreg(ich_elsr_el2)*/ +/*postk_debug_sysreg(ich_hcr_el2)*/ +/*postk_debug_sysreg(ich_lr_el2)*/ +/*postk_debug_sysreg(ich_misr_el2)*/ +/*postk_debug_sysreg(ich_vmcr_el2)*/ +/*postk_debug_sysreg(ich_vseir_el2)*/ +/*postk_debug_sysreg(ich_vtr_el2)*/ +postk_debug_sysreg(id_aa64afr0_el1) +postk_debug_sysreg(id_aa64afr1_el1) +postk_debug_sysreg(id_aa64dfr0_el1) +postk_debug_sysreg(id_aa64dfr1_el1) +postk_debug_sysreg(id_aa64isar0_el1) +postk_debug_sysreg(id_aa64isar1_el1) +postk_debug_sysreg(id_aa64mmfr0_el1) +postk_debug_sysreg(id_aa64mmfr1_el1) +postk_debug_sysreg(id_aa64pfr0_el1) +postk_debug_sysreg(id_aa64pfr1_el1) +postk_debug_sysreg(id_afr0_el1) +postk_debug_sysreg(id_dfr0_el1) +postk_debug_sysreg(id_isar0_el1) +postk_debug_sysreg(id_isar1_el1) +postk_debug_sysreg(id_isar2_el1) +postk_debug_sysreg(id_isar3_el1) +postk_debug_sysreg(id_isar4_el1) +postk_debug_sysreg(id_isar5_el1) +postk_debug_sysreg(id_mmfr0_el1) +postk_debug_sysreg(id_mmfr1_el1) +postk_debug_sysreg(id_mmfr2_el1) +postk_debug_sysreg(id_mmfr3_el1) +postk_debug_sysreg(id_pfr0_el1) +postk_debug_sysreg(id_pfr1_el1) +postk_debug_sysreg(ifsr32_el2) +postk_debug_sysreg(isr_el1) +postk_debug_sysreg(mair_el1) +postk_debug_sysreg(mair_el2) +postk_debug_sysreg(mair_el3) +postk_debug_sysreg(mdccint_el1) +postk_debug_sysreg(mdccsr_el0) +postk_debug_sysreg(mdcr_el2) +postk_debug_sysreg(mdcr_el3) +postk_debug_sysreg(mdrar_el1) +postk_debug_sysreg(mdscr_el1) +postk_debug_sysreg(midr_el1) +postk_debug_sysreg(mpidr_el1) +postk_debug_sysreg(mvfr0_el1) +postk_debug_sysreg(mvfr1_el1) +postk_debug_sysreg(mvfr2_el1) +postk_debug_sysreg(nzcv) +postk_debug_sysreg(osdlr_el1) +postk_debug_sysreg(osdtrrx_el1) +postk_debug_sysreg(osdtrtx_el1) +postk_debug_sysreg(oseccr_el1) +postk_debug_sysreg(oslar_el1) +postk_debug_sysreg(oslsr_el1) +postk_debug_sysreg(par_el1) +postk_debug_sysreg(pmccfiltr_el0) +postk_debug_sysreg(pmccntr_el0) +postk_debug_sysreg(pmceid0_el0) +postk_debug_sysreg(pmceid1_el0) +postk_debug_sysreg(pmcntenclr_el0) +postk_debug_sysreg(pmcntenset_el0) +postk_debug_sysreg(pmcr_el0) +/*postk_debug_sysreg(pmevcntr_el0)*/ +/*postk_debug_sysreg(pmevtyper_el0)*/ +postk_debug_sysreg(pmintenclr_el1) +postk_debug_sysreg(pmintenset_el1) +postk_debug_sysreg(pmovsclr_el0) +postk_debug_sysreg(pmovsset_el0) +postk_debug_sysreg(pmselr_el0) +postk_debug_sysreg(pmswinc_el0) +postk_debug_sysreg(pmuserenr_el0) +postk_debug_sysreg(pmxevcntr_el0) +postk_debug_sysreg(pmxevtyper_el0) +postk_debug_sysreg(revidr_el1) +postk_debug_sysreg(rmr_el1) +postk_debug_sysreg(rmr_el2) +postk_debug_sysreg(rmr_el3) +postk_debug_sysreg(rvbar_el1) +postk_debug_sysreg(rvbar_el2) +postk_debug_sysreg(rvbar_el3) +/*postk_debug_sysreg(s3____)*/ +postk_debug_sysreg(scr_el3) +postk_debug_sysreg(sctlr_el1) +postk_debug_sysreg(sctlr_el2) +postk_debug_sysreg(sctlr_el3) +postk_debug_sysreg(sder32_el3) +postk_debug_sysreg(sp_el0) +postk_debug_sysreg(sp_el1) +postk_debug_sysreg(sp_el2) +/*postk_debug_sysreg(sp_el3)*/ +postk_debug_sysreg(spsel) +postk_debug_sysreg(spsr_abt) +postk_debug_sysreg(spsr_el1) +postk_debug_sysreg(spsr_el2) +postk_debug_sysreg(spsr_el3) +postk_debug_sysreg(spsr_fiq) +postk_debug_sysreg(spsr_irq) +postk_debug_sysreg(spsr_und) +postk_debug_sysreg(tcr_el1) +postk_debug_sysreg(tcr_el2) +postk_debug_sysreg(tcr_el3) +postk_debug_sysreg(teecr32_el1) +postk_debug_sysreg(teehbr32_el1) +/*postk_debug_sysreg(tlbi alle1)*/ +/*postk_debug_sysreg(tlbi alle1is)*/ +/*postk_debug_sysreg(tlbi alle2)*/ +/*postk_debug_sysreg(tlbi alle2is)*/ +/*postk_debug_sysreg(tlbi alle3)*/ +/*postk_debug_sysreg(tlbi alle3is)*/ +/*postk_debug_sysreg(tlbi aside1)*/ +/*postk_debug_sysreg(tlbi aside1is)*/ +/*postk_debug_sysreg(tlbi ipas2e1)*/ +/*postk_debug_sysreg(tlbi ipas2e1is)*/ +/*postk_debug_sysreg(tlbi ipas2le1)*/ +/*postk_debug_sysreg(tlbi ipas2le1is)*/ +/*postk_debug_sysreg(tlbi vaae1)*/ +/*postk_debug_sysreg(tlbi vaae1is)*/ +/*postk_debug_sysreg(tlbi vaale1)*/ +/*postk_debug_sysreg(tlbi vaale1is)*/ +/*postk_debug_sysreg(tlbi vae1)*/ +/*postk_debug_sysreg(tlbi vae1is)*/ +/*postk_debug_sysreg(tlbi vae2)*/ +/*postk_debug_sysreg(tlbi vae2is)*/ +/*postk_debug_sysreg(tlbi vae3)*/ +/*postk_debug_sysreg(tlbi vae3is)*/ +/*postk_debug_sysreg(tlbi vale1)*/ +/*postk_debug_sysreg(tlbi vale1is)*/ +/*postk_debug_sysreg(tlbi vale2)*/ +/*postk_debug_sysreg(tlbi vale2is)*/ +/*postk_debug_sysreg(tlbi vale3)*/ +/*postk_debug_sysreg(tlbi vale3is)*/ +/*postk_debug_sysreg(tlbi vmalle1)*/ +/*postk_debug_sysreg(tlbi vmalle1is)*/ +/*postk_debug_sysreg(tlbi vmalls12e1)*/ +/*postk_debug_sysreg(tlbi vmalls12e1is)*/ +postk_debug_sysreg(tpidr_el0) +postk_debug_sysreg(tpidr_el1) +postk_debug_sysreg(tpidr_el2) +postk_debug_sysreg(tpidr_el3) +postk_debug_sysreg(tpidrro_el0) +postk_debug_sysreg(ttbr0_el1) +postk_debug_sysreg(ttbr0_el2) +postk_debug_sysreg(ttbr0_el3) +postk_debug_sysreg(ttbr1_el1) +postk_debug_sysreg(vbar_el1) +postk_debug_sysreg(vbar_el2) +postk_debug_sysreg(vbar_el3) +postk_debug_sysreg(vmpidr_el2) +postk_debug_sysreg(vpidr_el2) +postk_debug_sysreg(vtcr_el2) +postk_debug_sysreg(vttbr_el2) diff --git a/arch/arm64/kernel/proc-macros.S b/arch/arm64/kernel/proc-macros.S new file mode 100644 index 00000000..a7e51427 --- /dev/null +++ b/arch/arm64/kernel/proc-macros.S @@ -0,0 +1,13 @@ +/* proc-macros.S COPYRIGHT FUJITSU LIMITED 2015 */ + +#include + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm diff --git a/arch/arm64/kernel/proc.S b/arch/arm64/kernel/proc.S new file mode 100644 index 00000000..bdc382d8 --- /dev/null +++ b/arch/arm64/kernel/proc.S @@ -0,0 +1,148 @@ +/* proc.S COPYRIGHT FUJITSU LIMITED 2015-2017 */ + +#include +#include +#include +#include +#include "proc-macros.S" + +#ifdef CONFIG_ARM64_64K_PAGES +# define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K +#else +# define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K +#endif + +//#ifdef CONFIG_SMP +#define TCR_SMP_FLAGS TCR_SHARED +//#else +//#define TCR_SMP_FLAGS 0 +//#endif + +/* PTWs cacheable, inner/outer WBWA */ +#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA + +#define MAIR(attr, mt) ((attr) << ((mt) * 8)) + +/* + * cpu_do_idle() + * + * Idle the processor (wait for interrupt). + */ +#if defined(CONFIG_HAS_NMI) +#include +ENTRY(cpu_do_idle) + mrs x0, daif // save I bit + msr daifset, #2 // set I bit + mrs_s x1, ICC_PMR_EL1 // save PMR + mov x2, #ICC_PMR_EL1_UNMASKED + msr_s ICC_PMR_EL1, x2 // unmask at PMR + dsb sy // WFI may enter a low-power mode + wfi + msr_s ICC_PMR_EL1, x1 // restore PMR + msr daif, x0 // restore I bit + ret +ENDPROC(cpu_do_idle) +#else /* defined(CONFIG_HAS_NMI) */ +ENTRY(cpu_do_idle) + dsb sy // WFI may enter a low-power mode + wfi + ret +ENDPROC(cpu_do_idle) +#endif /* defined(CONFIG_HAS_NMI) */ + +/* + * cpu_do_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys. + * + * - pgd_phys - physical address of new TTB + */ +ENTRY(cpu_do_switch_mm) + //mmid w1, x1 // get mm->context.id + bfi x0, x1, #48, #16 // set the ASID + msr ttbr0_el1, x0 // set TTBR0 + isb + ret +ENDPROC(cpu_do_switch_mm) + + .section ".text.init", #alloc, #execinstr + +/* + * __cpu_setup + * + * Initialise the processor for turning the MMU on. Return in x0 the + * value of the SCTLR_EL1 register. + */ +ENTRY(__cpu_setup) + tlbi vmalle1 // Invalidate local TLB + dsb nsh + + mov x0, #3 << 20 + + /* SVE */ + mrs x5, id_aa64pfr0_el1 + ubfx x5, x5, #ID_AA64PFR0_SVE_SHIFT, #4 + cbz x5, 1f + + orr x0, x0, #CPACR_EL1_ZEN // SVE: trap disabled EL1 and EL0 +1: msr cpacr_el1, x0 // Enable FP/ASIMD + + mov x0, #1 << 12 // Reset mdscr_el1 and disable + msr mdscr_el1, x0 // access to the DCC from EL0 + isb // Unmask debug exceptions now, + enable_dbg // since this is per-cpu + + /* + * Memory region attributes for LPAE: + * + * n = AttrIndx[2:0] + * n MAIR + * DEVICE_nGnRnE 000 00000000 + * DEVICE_nGnRE 001 00000100 + * DEVICE_GRE 010 00001100 + * NORMAL_NC 011 01000100 + * NORMAL 100 11111111 + */ + ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ + MAIR(0x04, MT_DEVICE_nGnRE) | \ + MAIR(0x0c, MT_DEVICE_GRE) | \ + MAIR(0x44, MT_NORMAL_NC) | \ + MAIR(0xff, MT_NORMAL) + msr mair_el1, x5 + /* + * Prepare SCTLR + */ + adr x5, crval + ldp w5, w6, [x5] + mrs x0, sctlr_el1 + bic x0, x0, x5 // clear bits + orr x0, x0, x6 // set bits + /* + * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for + * both user and kernel. + */ + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ + TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in + * TCR_EL1. + */ + mrs x9, ID_AA64MMFR0_EL1 + bfi x10, x9, #32, #3 + msr tcr_el1, x10 + ret // return to head.S +ENDPROC(__cpu_setup) + + /* + * n n T + * U E WT T UD US IHBS + * CE0 XWHW CZ ME TEEA S + * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM + * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved + * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings + */ + .type crval, #object +crval: + .word 0x000802e2 // clear + .word 0x0405d11d // set + diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c new file mode 100644 index 00000000..db092998 --- /dev/null +++ b/arch/arm64/kernel/psci.c @@ -0,0 +1,155 @@ +/* psci.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +/* @ref.impl arch/arm64/kernel/psci.c */ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon + */ + +#include +#include +#include +#include +#include +#include + +//#define DEBUG_PRINT_PSCI + +#ifdef DEBUG_PRINT_PSCI +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +extern uint64_t ihk_param_cpu_logical_map; +static uint64_t *__cpu_logical_map = &ihk_param_cpu_logical_map; +#define cpu_logical_map(cpu) __cpu_logical_map[cpu] + +struct psci_power_state { + uint16_t id; + uint8_t type; + uint8_t affinity_level; +}; + +static int psci_to_linux_errno(int errno) +{ + switch (errno) { + case PSCI_RET_SUCCESS: + return 0; + case PSCI_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + case PSCI_RET_INVALID_PARAMS: + return -EINVAL; + case PSCI_RET_DENIED: + return -EPERM; + }; + + return -EINVAL; +} + +static uint32_t psci_power_state_pack(struct psci_power_state state) +{ + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); +} + +static noinline int __invoke_psci_fn_hvc(uint64_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2) +{ + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "hvc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static noinline int __invoke_psci_fn_smc(uint64_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2) +{ + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "smc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + + +static int (*invoke_psci_fn)(uint64_t, uint64_t, uint64_t, uint64_t) = NULL; + +#define PSCI_METHOD_INVALID -1 +#define PSCI_METHOD_HVC 0 +#define PSCI_METHOD_SMC 1 +int psci_init(void) +{ + extern unsigned long ihk_param_psci_method; + int ret = 0; + + if (ihk_param_psci_method == PSCI_METHOD_SMC) { + dkprintf("psci_init(): set invoke_psci_fn = __invoke_psci_fn_smc\n"); + invoke_psci_fn = __invoke_psci_fn_smc; + } else if (ihk_param_psci_method == PSCI_METHOD_HVC) { + dkprintf("psci_init(): set invoke_psci_fn = __invoke_psci_fn_hvc\n"); + invoke_psci_fn = __invoke_psci_fn_hvc; + } else { + ekprintf("psci_init(): ihk_param_psci_method invalid value. %ld\n", ihk_param_psci_method); + ret = -1; + } + return ret; +} + +int psci_cpu_off(void) +{ + int err; + uint32_t fn, power_state; + + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + fn = PSCI_0_2_FN_CPU_OFF; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + int err; + uint32_t fn; + + fn = PSCI_0_2_FN64_CPU_ON; + err = invoke_psci_fn(fn, cpuid, entry_point, 0); + return psci_to_linux_errno(err); +} + +int cpu_psci_cpu_boot(unsigned int cpu, unsigned long pc) +{ + return psci_cpu_on(cpu_logical_map(cpu), pc); +} diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c new file mode 100644 index 00000000..03dc39f9 --- /dev/null +++ b/arch/arm64/kernel/ptrace.c @@ -0,0 +1,1006 @@ +/* ptrace.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG_PRINT_SC + +#ifdef DEBUG_PRINT_SC +#define dkprintf kprintf +#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#endif + +#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) + +#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ + __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) + +extern void save_debugreg(unsigned long *debugreg); +extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); +extern int interrupt_from_user(void *); + +enum aarch64_regset { + REGSET_GPR, + REGSET_FPR, + REGSET_TLS, + REGSET_HW_BREAK, + REGSET_HW_WATCH, + REGSET_SYSTEM_CALL, +#ifdef CONFIG_ARM64_SVE + REGSET_SVE, +#endif /* CONFIG_ARM64_SVE */ +}; + +struct user_regset; +typedef long user_regset_get_fn(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); + +typedef long user_regset_set_fn(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +struct user_regset { + user_regset_get_fn *get; + user_regset_set_fn *set; + unsigned int n; + unsigned int size; + unsigned int core_note_type; +}; + +long ptrace_read_user(struct thread *thread, long addr, unsigned long *value) +{ + return -EIO; +} + +long ptrace_write_user(struct thread *thread, long addr, unsigned long value) +{ + return -EIO; +} + +long ptrace_read_fpregs(struct thread *thread, void *fpregs) +{ + return -EIO; +} + +long ptrace_write_fpregs(struct thread *thread, void *fpregs) +{ + return -EIO; +} + +/* @ref.impl arch/arm64/kernel/ptrace.c::ptrace_hbp_get_resource_info */ +unsigned int ptrace_hbp_get_resource_info(unsigned int note_type) +{ + unsigned char num; + unsigned int reg = 0; + + switch (note_type) { + case NT_ARM_HW_BREAK: + num = hw_breakpoint_slots(TYPE_INST); + break; + case NT_ARM_HW_WATCH: + num = hw_breakpoint_slots(TYPE_DATA); + break; + default: + return -EINVAL; + } + + reg |= debug_monitors_arch(); + reg <<= 8; + reg |= num; + + return reg; +} + +/* @ref.impl include/linux/regset.h::user_regset_copyout */ +static inline long user_regset_copyout(unsigned int *pos, unsigned int *count, + void **kbuf, + void __user **ubuf, const void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) { + return 0; + } + + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = 0; + + if ((end_pos < 0) || (*count < (end_pos - *pos))) { + copy = *count; + } else { + copy = (end_pos - *pos); + } + + data += *pos - start_pos; + if (*kbuf) { + memcpy(*kbuf, data, copy); + *kbuf += copy; + } else if (copy_to_user(*ubuf, data, copy)) { + return -EFAULT; + } else { + *ubuf += copy; + } + *pos += copy; + *count -= copy; + } + return 0; +} + +/* @ref.impl include/linux/regset.h::user_regset_copyin */ +static inline long user_regset_copyin(unsigned int *pos, unsigned int *count, + const void **kbuf, + const void __user **ubuf, void *data, + const int start_pos, const int end_pos) +{ + if (*count == 0) { + return 0; + } + + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = 0; + + if ((end_pos < 0) || (*count < (end_pos - *pos))) { + copy = *count; + } else { + copy = (end_pos - *pos); + } + + data += *pos - start_pos; + if (*kbuf) { + memcpy(data, *kbuf, copy); + *kbuf += copy; + } else if (copy_from_user(data, *ubuf, copy)) { + return -EFAULT; + } else { + *ubuf += copy; + } + *pos += copy; + *count -= copy; + } + return 0; +} + +/* @ref.impl include/linux/regset.h::user_regset_copyout_zero */ +static inline long user_regset_copyout_zero(unsigned int *pos, + unsigned int *count, + void **kbuf, void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) { + return 0; + } + + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = 0; + char *tmp = NULL; + + if ((end_pos < 0) || (*count < (end_pos - *pos))) { + copy = *count; + } else { + copy = (end_pos - *pos); + } + + if (*kbuf) { + memset(*kbuf, 0, copy); + *kbuf += copy; + } else { + tmp = kmalloc(copy, IHK_MC_AP_NOWAIT); + if (tmp == NULL) { + return -ENOMEM; + } + memset(tmp, 0, copy); + + if (copy_to_user(*ubuf, tmp, copy)) { + kfree(tmp); + return -EFAULT; + } else { + *ubuf += copy; + } + kfree(tmp); + } + *pos += copy; + *count -= copy; + } + return 0; +} + +/* @ref.impl include/linux/regset.h::user_regset_copyin_ignore */ +static inline int user_regset_copyin_ignore(unsigned int *pos, + unsigned int *count, + const void **kbuf, + const void __user **ubuf, + const int start_pos, + const int end_pos) +{ + if (*count == 0) { + return 0; + } + + BUG_ON(*pos < start_pos); + if (end_pos < 0 || *pos < end_pos) { + unsigned int copy = 0; + + if ((end_pos < 0) || (*count < (end_pos - *pos))) { + copy = *count; + } else { + copy = (end_pos - *pos); + } + + if (*kbuf) { + *kbuf += copy; + } else { + *ubuf += copy; + } + *pos += copy; + *count -= copy; + } + return 0; +} + +/* @ref.impl include/linux/regset.h::copy_regset_to_user */ +static inline long copy_regset_to_user(struct thread *target, + const struct user_regset *regset, + unsigned int offset, unsigned int size, + void __user *data) +{ + if (!regset->get) { + return -EOPNOTSUPP; + } + return regset->get(target, regset, offset, size, NULL, data); +} + +/* @ref.impl include/linux/regset.h::copy_regset_from_user */ +static inline long copy_regset_from_user(struct thread *target, + const struct user_regset *regset, + unsigned int offset, unsigned int size, + const void __user *data) +{ + if (!regset->set) { + return -EOPNOTSUPP; + } + return regset->set(target, regset, offset, size, NULL, data); +} + +/* + * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * Userspace cannot use these until they have an architectural meaning. + * We also reserve IL for the kernel; SS is handled dynamically. + */ +#define SPSR_EL1_AARCH64_RES0_BITS 0xffffffff0fdffc20UL + +static int valid_native_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS; + + if (interrupt_from_user(regs) && !(regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & PSR_D_BIT) == 0 && + (regs->pstate & PSR_A_BIT) == 0 && + (regs->pstate & PSR_I_BIT) == 0 && + (regs->pstate & PSR_F_BIT) == 0) { + return 1; + } + + /* Force PSR to a valid 64-bit EL0t */ + regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT; + + return 0; +} + +static int valid_user_regs(struct user_pt_regs *regs, struct thread *thread) +{ + if (!(thread->ctx.thread->flags & TIF_SINGLESTEP)) { + regs->pstate &= ~DBG_SPSR_SS; + } + return valid_native_regs(regs); +} + +/* read NT_PRSTATUS */ +static long gpr_get(struct thread *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_pt_regs *uregs = &target->uctx->user_regs; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +/* write NT_PRSTATUS */ +static long gpr_set(struct thread *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + long ret; + struct user_pt_regs newregs = target->uctx->user_regs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); + if (ret) { + goto out; + } + + if (!valid_user_regs(&newregs, target)) { + ret = -EINVAL; + goto out; + } + target->uctx->user_regs = newregs; +out: + return ret; +} + +/* read NT_PRFPREG */ +static long fpr_get(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + long ret = -EINVAL; + + if (target->fp_regs == NULL) { + ret = -ENOMEM; + goto out; + } + + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { + struct user_fpsimd_state *uregs; + + if (likely(elf_hwcap & HWCAP_SVE)) { + /* sync to sve --> fpsimd */ + thread_sve_to_fpsimd(target, target->fp_regs); + } + + uregs = &target->fp_regs->user_fpsimd; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); + } +out: + return ret; +} + +/* write NT_PRFPREG */ +static long __fpr_set(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned int start_pos) +{ + long ret = -EINVAL; + + if (target->fp_regs == NULL) { + ret = -ENOMEM; + goto out; + } + + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { + struct user_fpsimd_state newstate; + + if (likely(elf_hwcap & HWCAP_SVE)) { + /* sync to sve --> fpsimd */ + thread_sve_to_fpsimd(target, target->fp_regs); + } + + newstate = target->fp_regs->user_fpsimd; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, + start_pos, start_pos + sizeof(newstate)); + if (ret) { + goto out; + } + + target->fp_regs->user_fpsimd = newstate; + + if (likely(elf_hwcap & HWCAP_SVE)) { + /* sync to fpsimd --> sve */ + thread_fpsimd_to_sve(target, target->fp_regs); + } + } +out: + return ret; +} + +/* write NT_PRFPREG */ +static long fpr_set(struct thread *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); +} + +/* read NT_ARM_TLS */ +static long tls_get(struct thread *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned long *tls = &target->tlsblock_base; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); +} + +/* write NT_ARM_TLS */ +static long tls_set(struct thread *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + long ret; + unsigned long tls = target->tlsblock_base; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) { + goto out; + } + + target->tlsblock_base = tls; +out: + return ret; +} + +/* read NT_ARM_SYSTEM_CALL */ +static long system_call_get(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int syscallno = target->uctx->syscallno; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &syscallno, 0, -1); +} + +/* write NT_ARM_SYSTEM_CALL */ +static long system_call_set(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int syscallno = target->uctx->syscallno; + long ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); + if (ret) { + goto out; + } + + target->uctx->syscallno = syscallno; +out: + return ret; +} + +#define PTRACE_HBP_ADDR_SZ sizeof(uint64_t) +#define PTRACE_HBP_CTRL_SZ sizeof(uint32_t) +#define PTRACE_HBP_PAD_SZ sizeof(uint32_t) + +/* read NT_ARM_HW_BREAK or NT_ARM_HW_WATCH */ +static long hw_break_get(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int note_type = regset->core_note_type; + long ret = -EINVAL; + int idx = 0, offset, limit, bw; + uint32_t info, ctrl; + uint64_t addr; + struct user_hwdebug_state *hws = NULL; + + if (note_type != NT_ARM_HW_BREAK && + note_type != NT_ARM_HW_WATCH) { + goto out; + } + + if (target->ptrace_debugreg == NULL) { + ret = -ENOMEM; + goto out; + } + bw = (note_type == NT_ARM_HW_BREAK ? HWS_BREAK : HWS_WATCH); + hws = (struct user_hwdebug_state *)target->ptrace_debugreg + bw; + + /* Resource info */ + info = hws->dbg_info; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0, + sizeof(info)); + if (ret) { + goto out; + } + + /* Pad */ + offset = offsetof(struct user_hwdebug_state, pad); + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset, + offset + PTRACE_HBP_PAD_SZ); + if (ret) { + goto out; + } + + /* (address, ctrl) registers */ + offset = offsetof(struct user_hwdebug_state, dbg_regs); + limit = regset->n * regset->size; + while (count && offset < limit) { + addr = hws->dbg_regs[idx].addr; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr, + offset, offset + PTRACE_HBP_ADDR_SZ); + if (ret) { + goto out; + } + offset += PTRACE_HBP_ADDR_SZ; + + ctrl = hws->dbg_regs[idx].ctrl; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); + if (ret) { + goto out; + } + offset += PTRACE_HBP_CTRL_SZ; + + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + offset, + offset + PTRACE_HBP_PAD_SZ); + if (ret) { + goto out; + } + offset += PTRACE_HBP_PAD_SZ; + idx++; + } +out: + return ret; +} + +/* write NT_ARM_HW_BREAK or NT_ARM_HW_WATCH */ +static long hw_break_set(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int note_type = regset->core_note_type; + long ret = -EINVAL; + int idx = 0, offset, limit, bw; + uint32_t ctrl; + uint64_t addr; + struct user_hwdebug_state *hws = NULL; + + if (note_type != NT_ARM_HW_BREAK && + note_type != NT_ARM_HW_WATCH) { + goto out; + } + + if (target->ptrace_debugreg == NULL) { + ret = -ENOMEM; + goto out; + } + bw = (note_type == NT_ARM_HW_BREAK ? HWS_BREAK : HWS_WATCH); + hws = (struct user_hwdebug_state *)target->ptrace_debugreg + bw; + + /* Resource info and pad */ + offset = offsetof(struct user_hwdebug_state, dbg_regs); + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset); + if (ret) { + goto out; + } + + /* (address, ctrl) registers */ + limit = regset->n * regset->size; + while (count && offset < limit) { + if (count < PTRACE_HBP_ADDR_SZ) { + ret = -EINVAL; + goto out; + } + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, + offset, offset + PTRACE_HBP_ADDR_SZ); + if (ret) { + goto out; + } + hws->dbg_regs[idx].addr = addr; + offset += PTRACE_HBP_ADDR_SZ; + + if (!count) { + break; + } + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, + offset, offset + PTRACE_HBP_CTRL_SZ); + if (ret) { + goto out; + } + hws->dbg_regs[idx].ctrl = ctrl; + offset += PTRACE_HBP_CTRL_SZ; + + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + offset, + offset + PTRACE_HBP_PAD_SZ); + if (ret) { + goto out; + } + offset += PTRACE_HBP_PAD_SZ; + idx++; + } +out: + return ret; +} + +#ifdef CONFIG_ARM64_SVE + +/* read NT_ARM_SVE */ +static long sve_get(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + long ret = -EINVAL; + struct user_sve_header header; + unsigned int vq; + unsigned long start, end; + + if (target->fp_regs == NULL) { + ret = -ENOMEM; + goto out; + } + + /* Instead of system_supports_sve() */ + if (unlikely(!(elf_hwcap & HWCAP_SVE))) { + goto out; + } + + /* Header */ + memset(&header, 0, sizeof(header)); + + header.vl = target->ctx.thread->sve_vl; + + BUG_ON(!sve_vl_valid(header.vl)); + vq = sve_vq_from_vl(header.vl); + + BUG_ON(!sve_vl_valid(sve_max_vl)); + header.max_vl = sve_max_vl; + + /* McKernel processes always enable SVE. */ + header.flags = SVE_PT_REGS_SVE; + + header.size = SVE_PT_SIZE(vq, header.flags); + header.max_size = SVE_PT_SIZE(sve_vq_from_vl(header.max_vl), + SVE_PT_REGS_SVE); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) { + goto out; + } + + /* Registers: FPSIMD-only case */ + /* + * If McKernel, Nothing to do. + * Because McKernel processes always enable SVE. + */ + + /* Otherwise: full SVE case */ + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + + BUG_ON(end < start); + BUG_ON(end - start > sve_state_size(target)); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + target->ctx.thread->sve_state, + start, end); + if (ret) { + goto out; + } + + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + + BUG_ON(end < start); + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + start, end); + if (ret) { + goto out; + } + + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + + BUG_ON((char *)(&target->fp_regs->fpcr + 1) < + (char *)&target->fp_regs->fpsr); + BUG_ON(end < start); + BUG_ON((char *)(&target->fp_regs->fpcr + 1) - + (char *)&target->fp_regs->fpsr != + end - start); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->fp_regs->fpsr, + start, end); + if (ret) { + goto out; + } + + start = end; + end = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16 * 16; + + BUG_ON(end < start); + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + start, end); +out: + return ret; +} + +/* write NT_ARM_SVE case */ +static long sve_set(struct thread *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + long ret = -EINVAL; + struct user_sve_header header; + unsigned int vq; + unsigned long start, end; + + if (target->fp_regs == NULL) { + ret = -ENOMEM; + goto out; + } + + /* Instead of system_supports_sve() */ + if (unlikely(!(elf_hwcap & HWCAP_SVE))) { + goto out; + } + + /* Header */ + if (count < sizeof(header)) { + goto out; + } + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header, + 0, sizeof(header)); + if (ret) { + goto out; + } + + /* + * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by + * sve_set_vector_length(), which will also validate them for us: + */ + ret = sve_set_vector_length(target, header.vl, + header.flags & ~SVE_PT_REGS_MASK); + if (ret) { + goto out; + } + + /* Actual VL set may be less than the user asked for: */ + BUG_ON(!sve_vl_valid(target->ctx.thread->sve_vl)); + vq = sve_vq_from_vl(target->ctx.thread->sve_vl); + + /* Registers: FPSIMD-only case */ + if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) { + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, + SVE_PT_FPSIMD_OFFSET); + goto out; + } + + /* Otherwise: full SVE case */ + start = SVE_PT_SVE_OFFSET; + end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq); + + BUG_ON(end < start); + BUG_ON(end - start > sve_state_size(target)); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->ctx.thread->sve_state, + start, end); + if (ret) { + goto out; + } + + start = end; + end = SVE_PT_SVE_FPSR_OFFSET(vq); + + BUG_ON(end < start); + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + start, end); + if (ret) { + goto out; + } + + start = end; + end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; + + BUG_ON((char *)(&target->fp_regs->fpcr + 1) < + (char *)&target->fp_regs->fpsr); + BUG_ON(end < start); + BUG_ON((char *)(&target->fp_regs->fpcr + 1) - + (char *)&target->fp_regs->fpsr != + end - start); + + user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->fp_regs->fpsr, + start, end); +out: + return ret; +} + +#endif /* CONFIG_ARM64_SVE */ + +static const struct user_regset aarch64_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(struct user_pt_regs) / sizeof(uint64_t), + .size = sizeof(uint64_t), + .get = gpr_get, + .set = gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fpsimd_state) / sizeof(uint32_t), + /* + * We pretend we have 32-bit registers because the fpsr and + * fpcr are 32-bits wide. + */ + .size = sizeof(uint32_t), + .get = fpr_get, + .set = fpr_set + }, + [REGSET_TLS] = { + .core_note_type = NT_ARM_TLS, + .n = 1, + .size = sizeof(void *), + .get = tls_get, + .set = tls_set + }, + [REGSET_HW_BREAK] = { + .core_note_type = NT_ARM_HW_BREAK, + .n = sizeof(struct user_hwdebug_state) / sizeof(uint32_t), + .size = sizeof(uint32_t), + .get = hw_break_get, + .set = hw_break_set + }, + [REGSET_HW_WATCH] = { + .core_note_type = NT_ARM_HW_WATCH, + .n = sizeof(struct user_hwdebug_state) / sizeof(uint32_t), + .size = sizeof(uint32_t), + .get = hw_break_get, + .set = hw_break_set + }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_ARM_SYSTEM_CALL, + .n = 1, + .size = sizeof(int), + .get = system_call_get, + .set = system_call_set + }, +#ifdef CONFIG_ARM64_SVE + [REGSET_SVE] = { /* Scalable Vector Extension */ + .core_note_type = NT_ARM_SVE, + .n = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16, + .size = 16, + .get = sve_get, + .set = sve_set + }, +#endif /* CONFIG_ARM64_SVE */ +}; + +static const struct user_regset * +find_regset(const struct user_regset *regset, unsigned int type, int n) +{ + int i = 0; + + for (i = 0; i < n; i++) { + if (regset[i].core_note_type == type) { + return ®set[i]; + } + } + return NULL; +} + +static long ptrace_regset(struct thread *thread, int req, long type, struct iovec *iov) +{ + long rc = -EINVAL; + const struct user_regset *regset = find_regset(aarch64_regsets, type, + sizeof(aarch64_regsets) / sizeof(aarch64_regsets[0])); + + if (!regset) { + kprintf("%s: not supported type 0x%x\n", __FUNCTION__, type); + goto out; + } + + if ((iov->iov_len % regset->size) != 0) { + goto out; + } + + if ((size_t)(regset->n * regset->size) < iov->iov_len) { + iov->iov_len = (size_t)(regset->n * regset->size); + } + + if (req == PTRACE_GETREGSET) { + rc = copy_regset_to_user(thread, regset, 0, + iov->iov_len, iov->iov_base); + } else { + rc = copy_regset_from_user(thread, regset, 0, + iov->iov_len, iov->iov_base); + } +out: + return rc; +} + +long ptrace_read_regset(struct thread *thread, long type, struct iovec *iov) +{ + return ptrace_regset(thread, PTRACE_GETREGSET, type, iov); +} + +long ptrace_write_regset(struct thread *thread, long type, struct iovec *iov) +{ + return ptrace_regset(thread, PTRACE_SETREGSET, type, iov); +} + +void ptrace_report_signal(struct thread *thread, int sig) +{ + struct mcs_rwlock_node_irqsave lock; + struct process *proc = thread->proc; + int parent_pid; + siginfo_t info; + struct thread_info tinfo; + + dkprintf("ptrace_report_signal, tid=%d, pid=%d\n", thread->tid, thread->proc->pid); + + /* save thread_info, if called by ptrace_report_exec() */ + if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8)))) { + memcpy(&tinfo, thread->ctx.thread, sizeof(struct thread_info)); + } + + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + if(!(proc->ptrace & PT_TRACED)){ + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + return; + } + thread->exit_status = sig; + /* Transition thread state */ +#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + proc->status = PS_DELAY_TRACED; +#else /* POSTK_DEBUG_TEMP_FIX_41 */ + proc->status = PS_TRACED; +#endif /* POSTK_DEBUG_TEMP_FIX_41 */ + thread->status = PS_TRACED; + proc->ptrace &= ~PT_TRACE_SYSCALL_MASK; + if (sig == SIGSTOP || sig == SIGTSTP || + sig == SIGTTIN || sig == SIGTTOU) { + proc->signal_flags |= SIGNAL_STOP_STOPPED; + } else { + proc->signal_flags &= ~SIGNAL_STOP_STOPPED; + } + parent_pid = proc->parent->pid; + save_debugreg(thread->ptrace_debugreg); + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + + memset(&info, '\0', sizeof info); + info.si_signo = SIGCHLD; + info.si_code = CLD_TRAPPED; + info._sifields._sigchld.si_pid = thread->tid; + info._sifields._sigchld.si_status = thread->exit_status; + do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0); +#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + /* Wake parent (if sleeping in wait4()) */ + waitq_wakeup(&proc->parent->waitpid_q); +#endif /* !POSTK_DEBUG_TEMP_FIX_41 */ + + dkprintf("ptrace_report_signal,sleeping\n"); + /* Sleep */ + schedule(); + dkprintf("ptrace_report_signal,wake up\n"); + + /* restore thread_info, if called by ptrace_report_exec() */ + if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8)))) { + memcpy(thread->ctx.thread, &tinfo, sizeof(struct thread_info)); + } +} + +long +arch_ptrace(long request, int pid, long addr, long data) +{ + return -EIO; +} + diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c new file mode 100644 index 00000000..acdf5c26 --- /dev/null +++ b/arch/arm64/kernel/smp.c @@ -0,0 +1,22 @@ +/* smp.c COPYRIGHT FUJITSU LIMITED 2015 */ + +#include +#include + +/* + * as from 2.5, kernels no longer have an init_tasks structure + * so we need some other way of telling a new secondary core + * where to place its SVC stack + */ + +/* initialize value for BSP bootup */ +/* AP bootup value setup in ihk_mc_boot_cpu() */ +struct start_kernel_param; +extern void start_kernel(struct start_kernel_param *param); +extern struct start_kernel_param *ihk_param_head; + +struct secondary_data secondary_data = { + .stack = (char *)&init_thread_info + THREAD_START_SP, + .next_pc = (uint64_t)start_kernel, + .arg = (unsigned long)&ihk_param_head +}; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c new file mode 100644 index 00000000..0eaa3e5f --- /dev/null +++ b/arch/arm64/kernel/syscall.c @@ -0,0 +1,1869 @@ +/* syscall.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void ptrace_report_signal(struct thread *thread, int sig); +extern void clear_single_step(struct thread *thread); +void terminate(int, int); +extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact); +long syscall(int num, ihk_mc_user_context_t *ctx); +extern unsigned long do_fork(int, unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +static void __check_signal(unsigned long rc, void *regs, int num, int irq_disabled); + +//#define DEBUG_PRINT_SC + +#ifdef DEBUG_PRINT_SC +#define dkprintf kprintf +#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#endif + +#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0) + +#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\ + __FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0) + +uintptr_t debug_constants[] = { + sizeof(struct cpu_local_var), + offsetof(struct cpu_local_var, current), + offsetof(struct cpu_local_var, runq), + offsetof(struct cpu_local_var, status), + offsetof(struct cpu_local_var, idle), + offsetof(struct thread, ctx) + offsetof(struct thread_info, cpu_context), + offsetof(struct thread, sched_list), + offsetof(struct thread, proc), + offsetof(struct thread, status), + offsetof(struct process, pid), + offsetof(struct thread, tid), + -1, +}; + +static ihk_spinlock_t cpuid_head_lock = SPIN_LOCK_UNLOCKED; +static int cpuid_head = 1; + +extern int num_processors; + +int obtain_clone_cpuid(cpu_set_t *cpu_set) { + int min_queue_len = -1; + int i, min_cpu = -1; + + /* cpu_head lock */ + ihk_mc_spinlock_lock_noirq(&cpuid_head_lock); + + /* Find the first allowed core with the shortest run queue */ + for (i = 0; i < num_processors; cpuid_head++, i++) { + struct cpu_local_var *v; + unsigned long irqstate; + + /* cpuid_head over cpu_info->ncpus, cpuid_head = BSP reset. */ + if (cpuid_head >= num_processors) { + cpuid_head = 0; + } + + if (!CPU_ISSET(cpuid_head, cpu_set)) continue; + + v = get_cpu_local_var(cpuid_head); + irqstate = ihk_mc_spinlock_lock(&v->runq_lock); + if (min_queue_len == -1 || v->runq_len < min_queue_len) { + min_queue_len = v->runq_len; + min_cpu = cpuid_head; + } + ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + + if (min_queue_len == 0) { + cpuid_head++; + break; + } + } + + /* cpu_head unlock */ + ihk_mc_spinlock_unlock_noirq(&cpuid_head_lock); + + if (min_cpu != -1) { + if (get_cpu_local_var(min_cpu)->status != CPU_STATUS_RESERVED) + get_cpu_local_var(min_cpu)->status = CPU_STATUS_RESERVED; + } + return min_cpu; +} + +int +arch_clear_host_user_space() +{ + struct thread *th = cpu_local_var(current); + + /* XXX: might be unnecessary */ + clear_host_pte(th->vm->region.user_start, + (th->vm->region.user_end - th->vm->region.user_start)); + return 0; +} + +/* archtecture-depended syscall handlers */ +extern unsigned long do_fork(int clone_flags, unsigned long newsp, + unsigned long parent_tidptr, unsigned long child_tidptr, + unsigned long tlsblock_base, unsigned long curpc, + unsigned long cursp); + +SYSCALL_DECLARE(clone) +{ + if ((int)ihk_mc_syscall_arg0(ctx) & CLONE_VFORK) { + return do_fork(CLONE_VFORK|SIGCHLD, 0, 0, 0, 0, ihk_mc_syscall_pc(ctx), ihk_mc_syscall_sp(ctx)); + } else { + return do_fork((int)ihk_mc_syscall_arg0(ctx), /* clone_flags */ + ihk_mc_syscall_arg1(ctx), /* newsp */ + ihk_mc_syscall_arg2(ctx), /* parent_tidptr */ + ihk_mc_syscall_arg4(ctx), /* child_tidptr (swap arg3) */ + ihk_mc_syscall_arg3(ctx), /* tlsblock_base (swap arg4) */ + ihk_mc_syscall_pc(ctx), /* curpc */ + ihk_mc_syscall_sp(ctx)); /* cursp */ + } +} + +SYSCALL_DECLARE(rt_sigaction) +{ + int sig = ihk_mc_syscall_arg0(ctx); + const struct sigaction *act = (const struct sigaction *)ihk_mc_syscall_arg1(ctx); + struct sigaction *oact = (struct sigaction *)ihk_mc_syscall_arg2(ctx); + size_t sigsetsize = ihk_mc_syscall_arg3(ctx); + struct k_sigaction new_sa, old_sa; + int rc; + + if(sig == SIGKILL || sig == SIGSTOP || sig <= 0 || sig > SIGRTMAX) + return -EINVAL; + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if(act) + if(copy_from_user(&new_sa.sa, act, sizeof new_sa.sa)){ + goto fault; + } + rc = do_sigaction(sig, act? &new_sa: NULL, oact? &old_sa: NULL); + if(rc == 0 && oact) + if(copy_to_user(oact, &old_sa.sa, sizeof old_sa.sa)){ + goto fault; + } + + return rc; +fault: + return -EFAULT; +} + +SYSCALL_DECLARE(prctl) +{ + int option = (int)ihk_mc_syscall_arg0(ctx); + long error; + + switch (option) { + case PR_SVE_SET_VL: + error = SVE_SET_VL(cpu_local_var(current), + ihk_mc_syscall_arg1(ctx), ihk_mc_syscall_arg2(ctx)); + break; + case PR_SVE_GET_VL: + error = SVE_GET_VL(cpu_local_var(current)); + break; + default: + error = syscall_generic_forwarding(__NR_prctl, ctx); + break; + } + return error; +} + +/* + * @ref.impl linux-linaro/src/linux-linaro/arch/arm64/kernel/signal.c::struct rt_sigframe + * @ref.impl mckernel/arch/x86/kernel/syscall.c::struct sigsp + */ +struct sigsp { + unsigned long sigrc; + int syscallno; + int restart; + siginfo_t info; + struct ucontext uc; + uint64_t fp; + uint64_t lr; +}; + +struct rt_sigframe_user_layout { + struct sigsp __user *usigframe; + struct sigsp *ksigframe; + + unsigned long size; /* size of allocated sigframe data */ + unsigned long limit; /* largest allowed size */ + + unsigned long fpsimd_offset; + unsigned long esr_offset; + unsigned long sve_offset; + unsigned long extra_offset; + unsigned long end_offset; +}; + +static void preserve_fpsimd_context(struct fpsimd_context *ctx) +{ + struct fpsimd_state fpsimd; + + /* dump the hardware registers to the fpsimd_state structure */ + fpsimd_save_state(&fpsimd); + + /* copy the FP and status/control registers */ + memcpy(ctx->vregs, fpsimd.vregs, sizeof(fpsimd.vregs)); + ctx->fpsr = fpsimd.fpsr; + ctx->fpcr = fpsimd.fpcr; + + /* copy the magic/size information */ + ctx->head.magic = FPSIMD_MAGIC; + ctx->head.size = sizeof(struct fpsimd_context); +} + +/* @ref.impl arch/arm64/kernel/signal.c::preserve_sve_context */ +static void preserve_sve_context(void *ctx) +{ + struct sve_context *sve_ctx = ctx; + unsigned int vl = current_thread_info()->sve_vl; + unsigned int vq; + unsigned int fpscr[2] = { 0, 0 }; + + BUG_ON(!sve_vl_valid(vl)); + vq = sve_vq_from_vl(vl); + + /* sve_context header set */ + sve_ctx->head.magic = SVE_MAGIC; + sve_ctx->head.size = ALIGN_UP(SVE_SIG_CONTEXT_SIZE(vq), 16); + + /* sve_context vl set */ + sve_ctx->vl = vl; + + /* sve_context reserved area 0 clear */ + memset(sve_ctx->__reserved, 0, sizeof(sve_ctx->__reserved)); + + /* sve register save */ + /* fpsr & fpcr discards, because already saved by preserve_fpsimd_context() */ + sve_save_state(ctx + SVE_SIG_FFR_OFFSET(vq), fpscr); +} + +static int restore_fpsimd_context(struct fpsimd_context *ctx) +{ + struct fpsimd_state fpsimd; + unsigned int magic, size; + + /* check the magic/size information */ + magic = ctx->head.magic; + size = ctx->head.size; + if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context)) + return -EINVAL; + + // copy the FP and status/control registers + memcpy(fpsimd.vregs, ctx->vregs, sizeof(fpsimd.vregs)); + fpsimd.fpsr = ctx->fpsr; + fpsimd.fpcr = ctx->fpcr; + + /* load the hardware registers from the fpsimd_state structure */ + fpsimd_load_state(&fpsimd); + + return 0; +} + +/* @ref.impl arch/arm64/kernel/signal.c::__restore_sve_fpsimd_context */ +static int __restore_sve_fpsimd_context(void *ctx, unsigned int vq, struct fpsimd_context *fpsimd) +{ + struct fpsimd_sve_state(vq) *sst = + ctx + SVE_SIG_ZREGS_OFFSET; + int i = 0; + + /* vq check */ + if (vq != sve_vq_from_vl(current_thread_info()->sve_vl)) { + return -EINVAL; + } + + /* copy from fpsimd_context vregs */ + for (i = 0; i < 32; i++) { + sst->zregs[i][0] = fpsimd->vregs[i]; + } + + /* restore sve register */ + sve_load_state(sst->ffr, &fpsimd->fpsr, vq - 1); + + return 0; +} + +/* @ref.impl arch/arm64/kernel/signal.c::restore_sve_fpsimd_context */ +static int restore_sve_fpsimd_context(void *ctx, struct fpsimd_context *fpsimd) +{ + struct sve_context const *sve_ctx = ctx; + uint16_t vl = sve_ctx->vl; + uint16_t vq; + + /* vl check */ + if (!sve_vl_valid(vl)) { + return -EINVAL; + } + + vq = sve_vq_from_vl(vl); + + return __restore_sve_fpsimd_context(ctx, vq, fpsimd); +} + +/* @ref.impl arch/arm64/kernel/signal.c::SIGFRAME_MAXSZ */ +/* Sanity limit on the maximum size of signal frame we'll try to generate. */ +/* This is NOT ABI. */ +#define SIGFRAME_MAXSZ _SZ64KB + +/* @ref.impl arch/arm64/kernel/signal.c::BUILD_BUG_ON in the __sigframe_alloc */ +STATIC_ASSERT(SIGFRAME_MAXSZ == ALIGN_DOWN(SIGFRAME_MAXSZ, 16)); +STATIC_ASSERT(SIGFRAME_MAXSZ > ALIGN_UP(sizeof(struct _aarch64_ctx), 16)); +STATIC_ASSERT(ALIGN_UP(sizeof(struct sigsp), 16) < SIGFRAME_MAXSZ - ALIGN_UP(sizeof(struct _aarch64_ctx), 16)); + +/* @ref.impl arch/arm64/kernel/signal.c::parse_user_sigframe */ +static int parse_user_sigframe(struct sigsp *sf) +{ + struct sigcontext *sc = &sf->uc.uc_mcontext; + struct _aarch64_ctx *head; + char *base = (char *)&sc->__reserved; + size_t offset = 0; + size_t limit = sizeof(sc->__reserved); + int have_extra_context = 0, err = -EINVAL; + void *kextra_data = NULL; + struct fpsimd_context *fpsimd_ctx = NULL; + struct sve_context *sve_ctx = NULL; + + if (ALIGN_UP((unsigned long)base, 16) != (unsigned long)base) + goto invalid; + + while (1) { + unsigned int magic, size; + + BUG_ON(limit < offset); + + if (limit - offset < sizeof(*head)) + goto invalid; + + if (ALIGN_DOWN(offset, 16) != offset) + goto invalid; + + BUG_ON(ALIGN_UP((unsigned long)base + offset, 16) != (unsigned long)base + offset); + + head = (struct _aarch64_ctx *)(base + offset); + magic = head->magic; + size = head->size; + + if (limit - offset < size) + goto invalid; + + switch (magic) { + case 0: + if (size) + goto invalid; + + goto done; + + case FPSIMD_MAGIC: + if (fpsimd_ctx) + goto invalid; + + if (size < sizeof(struct fpsimd_context)) + goto invalid; + + fpsimd_ctx = container_of(head, struct fpsimd_context, head); + break; + + case ESR_MAGIC: + /* ignore */ + break; + + case SVE_MAGIC: { + struct sve_context *sve_head = + container_of(head, struct sve_context, head); + + if (!(elf_hwcap & HWCAP_SVE)) + goto invalid; + + if (sve_ctx) + goto invalid; + + if (size < sizeof(*sve_ctx)) + goto invalid; + + sve_ctx = sve_head; + break; + } /* SVE_MAGIC */ + + case EXTRA_MAGIC: { + struct extra_context const *extra; + void __user *extra_data; + unsigned int extra_size; + + if (have_extra_context) + goto invalid; + + if (size < sizeof(*extra)) + goto invalid; + + extra = (struct extra_context const *)head; + extra_data = extra->data; + extra_size = extra->size; + + /* Prevent looping/repeated parsing of extra_conext */ + have_extra_context = 1; + + kextra_data = kmalloc(extra_size + 15, IHK_MC_AP_NOWAIT); + if (copy_from_user((char *)ALIGN_UP((unsigned long)kextra_data, 16), extra_data, extra_size)) { + goto invalid; + } + + /* + * Rely on the __user accessors to reject bogus + * pointers. + */ + base = (char *)ALIGN_UP((unsigned long)kextra_data, 16); + if (ALIGN_UP((unsigned long)base, 16) != (unsigned long)base) + goto invalid; + + /* Reject "unreasonably large" frames: */ + limit = extra_size; + if (limit > SIGFRAME_MAXSZ - sizeof(sc->__reserved)) + goto invalid; + + /* + * Ignore trailing terminator in __reserved[] + * and start parsing extra_data: + */ + offset = 0; + continue; + } /* EXTRA_MAGIC */ + + default: + goto invalid; + } + + if (size < sizeof(*head)) + goto invalid; + + if (limit - offset < size) + goto invalid; + + offset += size; + } + +done: + if (!fpsimd_ctx) + goto invalid; + + if (sve_ctx) { + err = restore_sve_fpsimd_context(sve_ctx, fpsimd_ctx); + } else { + err = restore_fpsimd_context(fpsimd_ctx); + } + +invalid: + if (kextra_data) { + kfree(kextra_data); + kextra_data = NULL; + } + return err; +} + +SYSCALL_DECLARE(rt_sigreturn) +{ + int i, err = 0; + struct thread *thread = cpu_local_var(current); + ihk_mc_user_context_t *regs = ctx; + struct sigsp ksigsp; + struct sigsp __user *usigsp; + siginfo_t info; + + /* + * Since we stacked the signal on a 128-bit boundary, then 'sp' should + * be word aligned here. + */ + if (regs->sp & 15) + goto bad_frame; + + usigsp = (struct sigsp __user *)regs->sp; + if (copy_from_user(&ksigsp, usigsp, sizeof(ksigsp))) { + goto bad_frame; + } + + for (i = 0; i < 31; i++) { + regs->regs[i] = ksigsp.uc.uc_mcontext.regs[i]; + } + regs->sp = ksigsp.uc.uc_mcontext.sp; + regs->pc = ksigsp.uc.uc_mcontext.pc; + regs->pstate = ksigsp.uc.uc_mcontext.pstate; + + // Avoid sys_rt_sigreturn() restarting. + regs->syscallno = ~0UL; + + err = parse_user_sigframe(&ksigsp); + if (err) + goto bad_frame; + + thread->sigmask.__val[0] = ksigsp.uc.uc_sigmask.__val[0]; + thread->sigstack.ss_flags = ksigsp.uc.uc_stack.ss_flags; + if(ksigsp.restart){ + return syscall(ksigsp.syscallno, regs); + } + + if (thread->ctx.thread->flags & (1 << TIF_SINGLESTEP)) { + memset(&info, 0, sizeof(info)); + info.si_code = TRAP_HWBKPT; + regs->regs[0] = ksigsp.sigrc; + clear_single_step(thread); + set_signal(SIGTRAP, regs, &info); + check_signal(0, regs, 0); + check_need_resched(); + } + return ksigsp.sigrc; + +bad_frame: + ekprintf("[pid:%d]: bad frame in %s: pc=%08llx sp=%08llx\n", + thread->proc->pid, __FUNCTION__, regs->pc, regs->sp); + memset(&info, 0, sizeof(info)); + info.si_signo = SIGSEGV; + info.si_code = SI_KERNEL; + set_signal(info.si_signo, regs, &info); + return 0; +} + +extern struct cpu_local_var *clv; +extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); +extern void interrupt_syscall(struct thread *, int sig); +extern int num_processors; + +long +alloc_debugreg(struct thread *thread) +{ + struct user_hwdebug_state *hws = NULL; + + /* LOWER: breakpoint register area. */ + /* HIGHER: watchpoint register area. */ + hws = kmalloc(sizeof(struct user_hwdebug_state) * 2, IHK_MC_AP_NOWAIT); + if (hws == NULL) { + kprintf("alloc_debugreg: no memory.\n"); + return -ENOMEM; + } + memset(hws, 0, sizeof(struct user_hwdebug_state) * 2); + + /* initialize dbg_info */ + hws[HWS_BREAK].dbg_info = ptrace_hbp_get_resource_info(NT_ARM_HW_BREAK); + hws[HWS_WATCH].dbg_info = ptrace_hbp_get_resource_info(NT_ARM_HW_WATCH); + + thread->ptrace_debugreg = (unsigned long *)hws; + return 0; +} + +void +save_debugreg(unsigned long *debugreg) +{ + struct user_hwdebug_state *hws = (struct user_hwdebug_state *)debugreg; + int i = 0; + + /* save DBGBVR_EL1 and DBGBCR_EL1 (n=0-(core_num_brps-1)) */ + for (i = 0; i < core_num_brps; i++) { + hws[HWS_BREAK].dbg_regs[i].addr = read_wb_reg(AARCH64_DBG_REG_BVR, i); + hws[HWS_BREAK].dbg_regs[i].ctrl = read_wb_reg(AARCH64_DBG_REG_BCR, i); + } + + /* save DBGWVR_EL1 and DBGWCR_EL1 (n=0-(core_num_wrps-1)) */ + for (i = 0; i < core_num_wrps; i++) { + hws[HWS_WATCH].dbg_regs[i].addr = read_wb_reg(AARCH64_DBG_REG_WVR, i); + hws[HWS_WATCH].dbg_regs[i].ctrl = read_wb_reg(AARCH64_DBG_REG_WCR, i); + } +} + +void +restore_debugreg(unsigned long *debugreg) +{ + struct user_hwdebug_state *hws = (struct user_hwdebug_state *)debugreg; + unsigned int mdscr; + int i = 0; + + /* set MDSCR_EL1.MDE */ + mdscr = mdscr_read(); + mdscr |= DBG_MDSCR_MDE; + mdscr_write(mdscr); + + /* restore DBGBVR_EL1 and DBGBCR_EL1 (n=0-(core_num_brps-1)) */ + for (i = 0; i < core_num_brps; i++) { + write_wb_reg(AARCH64_DBG_REG_BVR, i, hws[HWS_BREAK].dbg_regs[i].addr); + write_wb_reg(AARCH64_DBG_REG_BCR, i, hws[HWS_BREAK].dbg_regs[i].ctrl); + } + + /* restore DBGWVR_EL1 and DBGWCR_EL1 (n=0-(core_num_wrps-1)) */ + for (i = 0; i < core_num_wrps; i++) { + write_wb_reg(AARCH64_DBG_REG_WVR, i, hws[HWS_WATCH].dbg_regs[i].addr); + write_wb_reg(AARCH64_DBG_REG_WCR, i, hws[HWS_WATCH].dbg_regs[i].ctrl); + } +} + +void +clear_debugreg(void) +{ + unsigned int mdscr; + + /* clear DBGBVR_EL1 and DBGBCR_EL1 (n=0-(core_num_brps-1)) */ + /* clear DBGWVR_EL1 and DBGWCR_EL1 (n=0-(core_num_wrps-1)) */ + hw_breakpoint_reset(); + + /* clear MDSCR_EL1.MDE */ + mdscr = mdscr_read(); + mdscr &= ~DBG_MDSCR_MDE; + mdscr_write(mdscr); +} + +void clear_single_step(struct thread *thread) +{ + clear_regs_spsr_ss(thread->uctx); + thread->ctx.thread->flags &= ~(1 << TIF_SINGLESTEP); +} + +void set_single_step(struct thread *thread) +{ + thread->ctx.thread->flags |= (1 << TIF_SINGLESTEP); + set_regs_spsr_ss(thread->uctx); +} + +extern void coredump(struct thread *thread, void *regs); + +static int +isrestart(int syscallno, unsigned long rc, int sig, int restart) +{ + if(syscallno == 0 || rc != -EINTR) + return 0; + + /* + * The following interfaces are never restarted after being interrupted + * by a signal handler, regardless of the use of SA_RESTART + * Interfaces used to wait for signals: + * pause(2), sigsuspend(2), sigtimedwait(2), and sigwaitinfo(2). + * File descriptor multiplexing interfaces: + * epoll_wait(2), epoll_pwait(2), poll(2), ppoll(2), select(2), and pselect(2). + * System V IPC interfaces: + * msgrcv(2), msgsnd(2), semop(2), and semtimedop(2). + * Sleep interfaces: + * clock_nanosleep(2), nanosleep(2), and usleep(3). + * io_getevents(2). + * + * Note: following functions will issue another systemcall. + * pause(2) -> rt_sigsuspend + * epoll_wait(2) -> epoll_pwait + * poll(2) -> ppoll + * select(2) -> pselect6 + */ + switch(syscallno){ + case __NR_rt_sigsuspend: + case __NR_rt_sigtimedwait: + case __NR_epoll_pwait: + case __NR_ppoll: + case __NR_pselect6: + case __NR_msgrcv: + case __NR_msgsnd: + case __NR_semop: + case __NR_semtimedop: + case __NR_clock_nanosleep: + case __NR_nanosleep: + case __NR_io_getevents: + return 0; + } + + if(sig == SIGCHLD) + return 1; + if(restart) + return 1; + return 0; +} + +/* @ref.impl arch/arm64/kernel/signal.c::init_user_layout */ +static void init_user_layout(struct rt_sigframe_user_layout *user) +{ + const size_t __reserved_size = + sizeof(user->usigframe->uc.uc_mcontext.__reserved); + const size_t terminator_size = + ALIGN_UP(sizeof(struct _aarch64_ctx), 16); + + memset(user, 0, sizeof *user); + user->size = offsetof(struct sigsp, uc.uc_mcontext.__reserved); + user->limit = user->size + (__reserved_size - terminator_size - + sizeof(struct extra_context)); + /* Reserve space for extension and terminator ^ */ + + BUG_ON(user->limit <= user->size); +} + +/* @ref.impl arch/arm64/kernel/signal.c::sigframe_size */ +static size_t sigframe_size(struct rt_sigframe_user_layout const *user) +{ + size_t size; + + /* FIXME: take user->limit into account? */ + if (user->size > sizeof(struct sigsp)) { + size = user->size; + } else { + size = sizeof(struct sigsp); + } + return ALIGN_UP(size, 16); +} + +/* @ref.impl arch/arm64/kernel/signal.c::__sigframe_alloc */ +static int __sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size, unsigned char extend) +{ + unsigned long padded_size = ALIGN_UP(size, 16); + + /* Sanity-check invariants */ + BUG_ON(user->limit < user->size); + BUG_ON(user->size != ALIGN_DOWN(user->size, 16)); + BUG_ON(size < sizeof(struct _aarch64_ctx)); + + if (padded_size > user->limit - user->size && + !user->extra_offset && + extend) { + int ret; + + ret = __sigframe_alloc(user, &user->extra_offset, + sizeof(struct extra_context), 0); + if (ret) { + return ret; + } + + /* + * Further allocations must go after the fixed-size + * part of the signal frame: + */ + user->size = ALIGN_UP(sizeof(struct sigsp), 16); + + /* + * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for + * the terminator: + */ + user->limit = SIGFRAME_MAXSZ - + ALIGN_UP(sizeof(struct _aarch64_ctx), 16); + } + + /* Still not enough space? Bad luck! */ + if (padded_size > user->limit - user->size) { + return -ENOMEM; + } + + /* Anti-leakage check: don't double-allocate the same block: */ + BUG_ON(*offset); + + *offset = user->size; + user->size += padded_size; + + /* Check invariants again */ + BUG_ON(user->limit < user->size); + BUG_ON(user->size != ALIGN_DOWN(user->size, 16)); + return 0; +} + +/* @ref.impl arch/arm64/kernel/signal.c::sigframe_alloc */ +/* Allocate space for an optional record of bytes in the user + * signal frame. The offset from the signal frame base address to the + * allocated block is assigned to *offset. + */ +static int sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size) +{ + return __sigframe_alloc(user, offset, size, 1); +} + +/* @ref.impl arch/arm64/kernel/signal.c::sigframe_alloc_end */ +/* Allocate the null terminator record and prevent further allocations */ +static int sigframe_alloc_end(struct rt_sigframe_user_layout *user) +{ + int ret; + const size_t __reserved_size = + sizeof(user->ksigframe->uc.uc_mcontext.__reserved); + const size_t __reserved_offset = + offsetof(struct sigsp, uc.uc_mcontext.__reserved); + const size_t terminator_size = + ALIGN_UP(sizeof(struct _aarch64_ctx), 16); + + if (user->extra_offset) { + BUG_ON(user->limit != SIGFRAME_MAXSZ - terminator_size); + } else { + BUG_ON(user->limit != __reserved_offset + + (__reserved_size - terminator_size - + sizeof(struct extra_context))); + } + + /* Un-reserve the space reserved for the terminator: */ + user->limit += terminator_size; + + ret = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + + if (ret) { + return ret; + } + + /* Prevent further allocation: */ + user->limit = user->size; + return 0; +} + +/* @ref.impl arch/arm64/kernel/signal.c::apply_user_offset */ +/* changed McKernel, void *p and return value is kernel area address, function name */ +static void *get_sigframe_context_kaddr( + struct rt_sigframe_user_layout const *user, unsigned long offset) +{ + char *base = (char *)user->ksigframe; + + BUG_ON(!base); + BUG_ON(!offset); + + /* + * TODO: sanity-check that the result is within appropriate bounds + * (should be ensured by the use of set_user_offset() to compute + * all offsets. + */ + return base + offset; +} + +/* @ref.impl arch/arm64/kernel/signal.c::apply_user_offset */ +/* changed McKernel, function name */ +static void __user *get_sigframe_context_uaddr( + struct rt_sigframe_user_layout const *user, unsigned long offset) +{ + char __user *base = (char __user *)user->usigframe; + + BUG_ON(!base); + BUG_ON(!offset); + + /* + * TODO: sanity-check that the result is within appropriate bounds + * (should be ensured by the use of set_user_offset() to compute + * all offsets. + */ + return base + offset; +} + +/* @ref.impl arch/arm64/kernel/signal.c::setup_sigframe_layout */ +/* Determine the layout of optional records in the signal frame */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +{ + int err; + + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + + /* fault information, if valid */ + if (current_thread_info()->fault_code) { + err = sigframe_alloc(user, &user->esr_offset, + sizeof(struct esr_context)); + if (err) + return err; + } + + if (likely(elf_hwcap & (HWCAP_FP | HWCAP_ASIMD))) { + if (likely(elf_hwcap & HWCAP_SVE)) { + unsigned int vq = sve_vq_from_vl(current_thread_info()->sve_vl); + + err = sigframe_alloc(user, &user->sve_offset, + SVE_SIG_CONTEXT_SIZE(vq)); + if (err) + return err; + } + } + return sigframe_alloc_end(user); +} + +/* @ref.impl arch/arm64/kernel/signal.c::get_sigframe */ +static int get_sigframe(struct thread *thread, + struct rt_sigframe_user_layout *user, + struct pt_regs *regs, unsigned long sa_flags) +{ + unsigned long sp, sp_top, frame_size; + int err; + + init_user_layout(user); + + // get signal frame + if ((sa_flags & SA_ONSTACK) && + !(thread->sigstack.ss_flags & SS_DISABLE) && + !(thread->sigstack.ss_flags & SS_ONSTACK)) { + unsigned long lsp; + lsp = ((unsigned long)(((char *)thread->sigstack.ss_sp) + thread->sigstack.ss_size)) & ~15UL; + sp = sp_top = lsp; + thread->sigstack.ss_flags |= SS_ONSTACK; + } + else { + sp = sp_top = regs->sp; + } + sp = ALIGN_DOWN(sp, 16); + + /* calc sigframe layout */ + err = setup_sigframe_layout(user); + if (err) + return err; + + /* calc new user stack pointer */ + frame_size = sigframe_size(user); + sp -= frame_size; + BUG_ON(ALIGN_DOWN(sp, 16) != sp); + + /* set user sp address and kernel sigframe address */ + user->usigframe = (struct sigsp __user *)sp; + return 0; +} + +/* @ref.impl arch/arm64/kernel/signal.c::setup_rt_frame */ +static int setup_rt_frame(int usig, unsigned long rc, int to_restart, + int syscallno, struct k_sigaction *k, struct sig_pending *pending, + struct pt_regs *regs, struct thread *thread) +{ + struct rt_sigframe_user_layout user; + struct sigsp *kframe; + struct sigsp __user *uframe; + int i = 0, err = 0, kpages = 0; + struct _aarch64_ctx *end; + + /* get signal frame info */ + memset(&user, 0, sizeof(user)); + if (get_sigframe(thread, &user, regs, k->sa.sa_flags)) { + return 1; + } + + /* allocate kernel sigframe buffer */ + kpages = (sigframe_size(&user) + PAGE_SIZE - 1) >> PAGE_SHIFT; + user.ksigframe = ihk_mc_alloc_pages(kpages, IHK_MC_AP_NOWAIT); + + /* set kernel sigframe lowest addr */ + kframe = user.ksigframe; + + /* set user sigframe lowest addr */ + uframe = user.usigframe; + + // init non use data. + kframe->uc.uc_flags = 0; + kframe->uc.uc_link = NULL; + + // save alternate stack infomation. + kframe->uc.uc_stack.ss_sp = uframe; + kframe->uc.uc_stack.ss_flags = thread->sigstack.ss_size; + kframe->uc.uc_stack.ss_size = thread->sigstack.ss_flags; + + // save signal frame. + kframe->fp = regs->regs[29]; + kframe->lr = regs->regs[30]; + kframe->sigrc = rc; + + for (i = 0; i < 31; i++) { + kframe->uc.uc_mcontext.regs[i] = regs->regs[i]; + } + kframe->uc.uc_mcontext.sp = regs->sp; + kframe->uc.uc_mcontext.pc = regs->pc; + kframe->uc.uc_mcontext.pstate = regs->pstate; + + kframe->uc.uc_mcontext.fault_address = current_thread_info()->fault_address; + + kframe->uc.uc_sigmask = thread->sigmask; + + // save fp simd context. + preserve_fpsimd_context(get_sigframe_context_kaddr(&user, user.fpsimd_offset)); + + if (user.esr_offset) { + // save esr context. + struct esr_context *esr_ctx = + get_sigframe_context_kaddr(&user, user.esr_offset); + + esr_ctx->head.magic = ESR_MAGIC; + esr_ctx->head.size = sizeof(*esr_ctx); + esr_ctx->esr = current_thread_info()->fault_code; + } + + if (user.sve_offset) { + // save sve context. + struct sve_context *sve_ctx = + get_sigframe_context_kaddr(&user, user.sve_offset); + preserve_sve_context(sve_ctx); + } + + if (user.extra_offset) { + // save extra context. + struct extra_context *extra = + get_sigframe_context_kaddr(&user, user.extra_offset); + struct _aarch64_ctx *end = + (struct _aarch64_ctx *)((char *)extra + + ALIGN_UP(sizeof(*extra), 16)); + void __user *extra_data = get_sigframe_context_uaddr(&user, + ALIGN_UP(sizeof(struct sigsp), 16)); + unsigned int extra_size = ALIGN_UP(user.size, 16) - + ALIGN_UP(sizeof(struct sigsp), 16); + + /* + * ^ FIXME: bounds sanity-checks: both of these should fit + * within __reserved! + */ + extra->head.magic = EXTRA_MAGIC; + extra->head.size = sizeof(*extra); + extra->data = extra_data; + extra->size = extra_size; + + /* Add the terminator */ + end->magic = 0; + end->size = 0; + } + + // set the "end" magic + end = get_sigframe_context_kaddr(&user, user.end_offset); + end->magic = 0; + end->size = 0; + + // save syscall infomation to restart. + kframe->syscallno = syscallno; + kframe->restart = to_restart; + + /* set sig handler context */ + // set restart context + regs->regs[0] = usig; + regs->sp = (unsigned long)uframe; + regs->regs[29] = (unsigned long)&uframe->fp; + regs->pc = (unsigned long)k->sa.sa_handler; + + if (k->sa.sa_flags & SA_RESTORER){ + regs->regs[30] = (unsigned long)k->sa.sa_restorer; + } else { + regs->regs[30] = (unsigned long)VDSO_SYMBOL(thread->vm->vdso_addr, sigtramp); + } + + if(k->sa.sa_flags & SA_SIGINFO){ + kframe->info = pending->info; + regs->regs[1] = (unsigned long)&uframe->info; + regs->regs[2] = (unsigned long)&uframe->uc; + } + + /* copy to user sigframe */ + err = copy_to_user(user.usigframe, user.ksigframe, sigframe_size(&user)); + + /* free kernel sigframe buffer */ + ihk_mc_free_pages(user.ksigframe, kpages); + + return err; +} + +void +do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int syscallno) +{ + struct pt_regs *regs = regs0; + struct k_sigaction *k; + int sig; + __sigset_t w; + struct process *proc = thread->proc; + int orgsig; + int ptraceflag = 0; + struct mcs_rwlock_node_irqsave lock; + struct mcs_rwlock_node_irqsave mcs_rw_node; + + for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1); + dkprintf("do_signal(): tid=%d, pid=%d, sig=%d\n", thread->tid, proc->pid, sig); + orgsig = sig; + + if((proc->ptrace & PT_TRACED) && + pending->ptracecont == 0 && + sig != SIGKILL) { + ptraceflag = 1; + sig = SIGSTOP; + } + + if(regs == NULL){ /* call from syscall */ + regs = thread->uctx; + } + else{ + rc = regs->regs[0]; + } + + mcs_rwlock_writer_lock(&thread->sigcommon->lock, &mcs_rw_node); + k = thread->sigcommon->action + sig - 1; + + if(k->sa.sa_handler == SIG_IGN){ + kfree(pending); + mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); + return; + } + else if(k->sa.sa_handler){ + /* save signal frame */ + int to_restart = 0; + + // check syscall to have restart ? + to_restart = isrestart(syscallno, rc, sig, k->sa.sa_flags & SA_RESTART); + if(syscallno != 0 && rc == -EINTR && sig == SIGCHLD) { + to_restart = 1; + } + if(to_restart == 1) { + /* Prepare for system call restart. */ + regs->regs[0] = regs->orig_x0; + } + + if (setup_rt_frame(sig, rc, to_restart, syscallno, k, pending, regs, thread)) { + kfree(pending); + mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); + kprintf("do_signal,page_fault_thread_vm failed\n"); + terminate(0, sig); + return; + } + + // check signal handler is ONESHOT + if(k->sa.sa_flags & SA_RESETHAND) { + k->sa.sa_handler = SIG_DFL; + } + + if(!(k->sa.sa_flags & SA_NODEFER)) + thread->sigmask.__val[0] |= pending->sigmask.__val[0]; + kfree(pending); + mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); + + if (thread->ctx.thread->flags & (1 << TIF_SINGLESTEP)) { + siginfo_t info = { + .si_code = TRAP_HWBKPT, + }; + clear_single_step(thread); + set_signal(SIGTRAP, regs, &info); + check_signal(0, regs, 0); + check_need_resched(); + } + } + else { + int coredumped = 0; + siginfo_t info; + + if(ptraceflag){ + if(thread->ptrace_recvsig) + kfree(thread->ptrace_recvsig); + thread->ptrace_recvsig = pending; + if(thread->ptrace_sendsig) + kfree(thread->ptrace_sendsig); + thread->ptrace_sendsig = NULL; + } + else + kfree(pending); + mcs_rwlock_writer_unlock(&thread->sigcommon->lock, &mcs_rw_node); + switch (sig) { + case SIGSTOP: + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + if(ptraceflag){ + ptrace_report_signal(thread, orgsig); + } + else{ + memset(&info, '\0', sizeof info); + info.si_signo = SIGCHLD; + info.si_code = CLD_STOPPED; + info._sifields._sigchld.si_pid = thread->proc->pid; + info._sifields._sigchld.si_status = (sig << 8) | 0x7f; + do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0); + dkprintf("do_signal,SIGSTOP,changing state\n"); + + /* Update thread state in fork tree */ + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + proc->group_exit_status = SIGSTOP; + + /* Reap and set new signal_flags */ + proc->signal_flags = SIGNAL_STOP_STOPPED; + +#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + proc->status = PS_DELAY_STOPPED; +#else /* POSTK_DEBUG_TEMP_FIX_41 */ + proc->status = PS_STOPPED; +#endif /* POSTK_DEBUG_TEMP_FIX_41 */ + thread->status = PS_STOPPED; + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + +#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + /* Wake up the parent who tried wait4 and sleeping */ + waitq_wakeup(&proc->parent->waitpid_q); +#endif /* !POSTK_DEBUG_TEMP_FIX_41 */ + + dkprintf("do_signal(): pid: %d, tid: %d SIGSTOP, sleeping\n", + proc->pid, thread->tid); + /* Sleep */ + schedule(); + dkprintf("SIGSTOP(): woken up\n"); + } + break; + case SIGTRAP: + dkprintf("do_signal,SIGTRAP\n"); + if(!(proc->ptrace & PT_TRACED)) { + goto core; + } + + /* Update thread state in fork tree */ + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + thread->exit_status = SIGTRAP; +#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + proc->status = PS_DELAY_TRACED; +#else /* POSTK_DEBUG_TEMP_FIX_41 */ + proc->status = PS_TRACED; +#endif /* POSTK_DEBUG_TEMP_FIX_41 */ + thread->status = PS_TRACED; + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + +#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ + /* Wake up the parent who tried wait4 and sleeping */ + waitq_wakeup(&thread->proc->parent->waitpid_q); +#endif /* !POSTK_DEBUG_TEMP_FIX_41 */ + + /* Sleep */ + dkprintf("do_signal,SIGTRAP,sleeping\n"); + + schedule(); + dkprintf("SIGTRAP(): woken up\n"); + break; + case SIGCONT: + memset(&info, '\0', sizeof info); + info.si_signo = SIGCHLD; + info.si_code = CLD_CONTINUED; + info._sifields._sigchld.si_pid = proc->pid; + info._sifields._sigchld.si_status = 0x0000ffff; + do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0); + proc->signal_flags = SIGNAL_STOP_CONTINUED; + proc->status = PS_RUNNING; + dkprintf("do_signal,SIGCONT,do nothing\n"); + break; + case SIGQUIT: + case SIGILL: + case SIGABRT: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + case SIGSYS: + case SIGXCPU: + case SIGXFSZ: + core: + dkprintf("do_signal,default,core,sig=%d\n", sig); + coredump(thread, regs); + coredumped = 0x80; + terminate(0, sig | coredumped); + break; + case SIGCHLD: + case SIGURG: + break; + default: + dkprintf("do_signal,default,terminate,sig=%d\n", sig); + terminate(0, sig); + break; + } + } +} + +static struct sig_pending * +getsigpending(struct thread *thread, int delflag){ + struct list_head *head; + mcs_rwlock_lock_t *lock; + struct mcs_rwlock_node_irqsave mcs_rw_node; + struct sig_pending *next; + struct sig_pending *pending; + __sigset_t w; + + w = thread->sigmask.__val[0]; + + lock = &thread->sigcommon->lock; + head = &thread->sigcommon->sigpending; + for(;;) { + if (delflag) + mcs_rwlock_writer_lock(lock, &mcs_rw_node); + else + mcs_rwlock_reader_lock(lock, &mcs_rw_node); + + list_for_each_entry_safe(pending, next, head, list){ + if(!(pending->sigmask.__val[0] & w)){ + if(delflag) + list_del(&pending->list); + + if (delflag) + mcs_rwlock_writer_unlock(lock, &mcs_rw_node); + else + mcs_rwlock_reader_unlock(lock, &mcs_rw_node); + return pending; + } + } + + if (delflag) + mcs_rwlock_writer_unlock(lock, &mcs_rw_node); + else + mcs_rwlock_reader_unlock(lock, &mcs_rw_node); + + if(lock == &thread->sigpendinglock) + return NULL; + + lock = &thread->sigpendinglock; + head = &thread->sigpending; + } + + return NULL; +} + +struct sig_pending * +hassigpending(struct thread *thread) +{ + return getsigpending(thread, 0); +} + +int +interrupt_from_user(void *regs0) +{ + struct pt_regs *regs = regs0; + + return((regs->pstate & PSR_MODE_MASK) == PSR_MODE_EL0t); +} + +void +check_signal(unsigned long rc, void *regs0, int num) +{ + __check_signal(rc, regs0, num, 0); +} + +void +check_signal_irq_disabled(unsigned long rc, void *regs0, int num) +{ + __check_signal(rc, regs0, num, 1); +} + +static void +__check_signal(unsigned long rc, void *regs0, int num, int irq_disabled) +{ + ihk_mc_user_context_t *regs = regs0; + struct thread *thread; + struct sig_pending *pending; + int irqstate; + + if(clv == NULL) + return; + thread = cpu_local_var(current); + + /** + * If check_signal is called from syscall(), + * then save syscall return value. + */ + if((regs == NULL)&&(num != __NR_rt_sigsuspend)){ /* It's call from syscall! */ + // Get user context through current thread + // and update syscall return. + ihk_mc_syscall_arg0(thread->uctx) = rc; + } + + if(thread == NULL || thread->proc->pid == 0){ + struct thread *t; + irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock))); + list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){ + if(t->proc->pid <= 0) + continue; + if(t->status == PS_INTERRUPTIBLE && + hassigpending(t)){ + t->status = PS_RUNNING; + break; + } + } + ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate); + goto out; + } + + if(regs != NULL && !interrupt_from_user(regs)) { + goto out; + } + + for(;;){ + /* When this function called from check_signal_irq_disabled, + * return with interrupt invalid. + * This is to eliminate signal loss. + */ + if (irq_disabled == 1) { + irqstate = cpu_disable_interrupt_save(); + } + pending = getsigpending(thread, 1); + if(!pending) { + dkprintf("check_signal,queue is empty\n"); + goto out; + } + if (irq_disabled == 1) { + cpu_restore_interrupt(irqstate); + } + do_signal(rc, regs, thread, pending, num); + } + +out: + return; +} + +unsigned long +do_kill(struct thread * thread, int pid, int tid, int sig, siginfo_t *info, int ptracecont) +{ + dkprintf("do_kill,pid=%d,tid=%d,sig=%d\n", pid, tid, sig); + struct thread *t; + struct process *tproc; + struct process *proc = thread? thread->proc: NULL; + struct thread *tthread = NULL; + int i; + __sigset_t mask; + mcs_rwlock_lock_t *savelock = NULL; + struct mcs_rwlock_node mcs_rw_node; + struct list_head *head = NULL; + int rc; + unsigned long irqstate = 0; + struct k_sigaction *k; + int doint; + int found = 0; + siginfo_t info0; + struct resource_set *rset = cpu_local_var(resource_set); + int hash; + struct thread_hash *thash = rset->thread_hash; + struct process_hash *phash = rset->process_hash; + struct mcs_rwlock_node lock; + struct mcs_rwlock_node updatelock; + + if(sig > SIGRTMAX || sig < 0) + return -EINVAL; + + if(info == NULL){ + memset(&info0, '\0', sizeof info0); + info = &info0; + info0.si_signo = sig; + info0.si_code = SI_KERNEL; + } + + if(tid == -1 && pid <= 0){ + struct process *p; + struct mcs_rwlock_node_irqsave slock; + int pgid = -pid; + int rc = -ESRCH; + int *pids; + int n = 0; + int sendme = 0; + + if(pid == 0){ + if(thread == NULL || thread->proc->pid <= 0) + return -ESRCH; + pgid = thread->proc->pgid; + } + pids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT); + if(!pids) + return -ENOMEM; + for(i = 0; i < HASH_SIZE; i++){ + mcs_rwlock_reader_lock(&phash->lock[i], &slock); + list_for_each_entry(p, &phash->list[i], hash_list){ + if(pgid != 1 && p->pgid != pgid) + continue; + + if(thread && p->pid == thread->proc->pid){ + sendme = 1; + continue; + } + + pids[n] = p->pid; + n++; + } + mcs_rwlock_reader_unlock(&phash->lock[i], &slock); + } + for(i = 0; i < n; i++) + rc = do_kill(thread, pids[i], -1, sig, info, ptracecont); + if(sendme) + rc = do_kill(thread, thread->proc->pid, -1, sig, info, ptracecont); + + kfree(pids); + return rc; + } + + irqstate = cpu_disable_interrupt_save(); + mask = __sigmask(sig); + if(tid == -1){ + struct thread *tthread0 = NULL; + struct mcs_rwlock_node plock; + struct mcs_rwlock_node updatelock; + + found = 0; + hash = process_hash(pid); + mcs_rwlock_reader_lock_noirq(&phash->lock[hash], &plock); + list_for_each_entry(tproc, &phash->list[hash], hash_list){ + if(tproc->pid == pid){ + found = 1; + break; + } + } + if(!found){ + mcs_rwlock_reader_unlock_noirq(&phash->lock[hash], &plock); + cpu_restore_interrupt(irqstate); + return -ESRCH; + } + + mcs_rwlock_reader_lock_noirq(&tproc->update_lock, &updatelock); + if(tproc->status == PS_EXITED || tproc->status == PS_ZOMBIE){ + goto done; + } + mcs_rwlock_reader_lock_noirq(&tproc->threads_lock, &lock); + list_for_each_entry(t, &tproc->threads_list, siblings_list){ + if(t->tid == pid || tthread == NULL){ + if(t->status == PS_EXITED){ + continue; + } + if(!(mask & t->sigmask.__val[0])){ + tthread = t; + found = 1; + } + else if(tthread == NULL && tthread0 == NULL){ + tthread0 = t; + found = 1; + } + } + } + if(tthread == NULL){ + tthread = tthread0; + } + if(tthread && tthread->status != PS_EXITED){ + savelock = &tthread->sigcommon->lock; + head = &tthread->sigcommon->sigpending; + hold_thread(tthread); + } + else + tthread = NULL; + mcs_rwlock_reader_unlock_noirq(&tproc->threads_lock, &lock); +done: + mcs_rwlock_reader_unlock_noirq(&tproc->update_lock, &updatelock); + mcs_rwlock_reader_unlock_noirq(&phash->lock[hash], &plock); + } + else{ + found = 0; + hash = thread_hash(tid); + mcs_rwlock_reader_lock_noirq(&thash->lock[hash], &lock); + list_for_each_entry(tthread, &thash->list[hash], hash_list){ + if(pid != -1 && tthread->proc->pid != pid){ + continue; + } + if (tthread->tid == tid && + tthread->status != PS_EXITED) { + found = 1; + break; + } + } + if(!found){ + mcs_rwlock_reader_unlock_noirq(&thash->lock[hash], &lock); + cpu_restore_interrupt(irqstate); + return -ESRCH; + } + + tproc = tthread->proc; + mcs_rwlock_reader_lock_noirq(&tproc->update_lock, &updatelock); + savelock = &tthread->sigpendinglock; + head = &tthread->sigpending; + if(sig == SIGKILL || + (tproc->status != PS_EXITED && + tproc->status != PS_ZOMBIE && + tthread->status != PS_EXITED)){ + hold_thread(tthread); + } + else{ + tthread = NULL; + } + mcs_rwlock_reader_unlock_noirq(&tproc->update_lock, &updatelock); + mcs_rwlock_reader_unlock_noirq(&thash->lock[hash], &lock); + } + + + if(sig != SIGCONT && + proc && + proc->euid != 0 && + proc->ruid != tproc->ruid && + proc->euid != tproc->ruid && + proc->ruid != tproc->suid && + proc->euid != tproc->suid){ + if(tthread) + release_thread(tthread); + cpu_restore_interrupt(irqstate); + return -EPERM; + } + + if(sig == 0 || tthread == NULL || tthread->status == PS_EXITED){ + if(tthread) + release_thread(tthread); + cpu_restore_interrupt(irqstate); + return 0; + } + + if (tthread->thread_offloaded) { + interrupt_syscall(tthread, sig); + release_thread(tthread); + return 0; + } + + doint = 0; + + mcs_rwlock_writer_lock_noirq(savelock, &mcs_rw_node); + + /* Put signal event even when handler is SIG_IGN or SIG_DFL + because target ptraced thread must call ptrace_report_signal + in check_signal */ + rc = 0; + k = tthread->sigcommon->action + sig - 1; + if((sig != SIGKILL && (tproc->ptrace & PT_TRACED)) || + (k->sa.sa_handler != SIG_IGN && + (k->sa.sa_handler != NULL || + (sig != SIGCHLD && sig != SIGURG)))){ + struct sig_pending *pending = NULL; + if (sig < SIGRTMIN) { // SIGRTMIN - SIGRTMAX + list_for_each_entry(pending, head, list){ + if(pending->sigmask.__val[0] == mask && + pending->ptracecont == ptracecont) + break; + } + if(&pending->list == head) + pending = NULL; + } + if(pending == NULL){ + doint = 1; + pending = kmalloc(sizeof(struct sig_pending), IHK_MC_AP_NOWAIT); + if(!pending){ + rc = -ENOMEM; + } + else{ + pending->sigmask.__val[0] = mask; + memcpy(&pending->info, info, sizeof(siginfo_t)); + pending->ptracecont = ptracecont; + if(sig == SIGKILL || sig == SIGSTOP) + list_add(&pending->list, head); + else + list_add_tail(&pending->list, head); + tthread->sigevent = 1; + } + } + } + mcs_rwlock_writer_unlock_noirq(savelock, &mcs_rw_node); + cpu_restore_interrupt(irqstate); + + if (doint && !(mask & tthread->sigmask.__val[0])) { + int status = tthread->status; + +#ifdef POSTK_DEBUG_TEMP_FIX_74 /* interrupt_syscall() timing change */ +#ifdef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ + if(tthread->proc->status != PS_EXITED) + interrupt_syscall(tthread, 0); +#else /* POSTK_DEBUG_TEMP_FIX_48 */ + if(!tthread->proc->nohost) + interrupt_syscall(tthread, 0); +#endif /* POSTK_DEBUG_TEMP_FIX_48 */ +#endif /* POSTK_DEBUG_TEMP_FIX_74 */ + + if (thread != tthread) { + dkprintf("do_kill,ipi,pid=%d,cpu_id=%d\n", + tproc->pid, tthread->cpu_id); +#define IPI_CPU_NOTIFY 0 + ihk_mc_interrupt_cpu(tthread->cpu_id, INTRID_CPU_NOTIFY); + } + +#ifndef POSTK_DEBUG_TEMP_FIX_74 /* interrupt_syscall() timing change */ + if(!tthread->proc->nohost) + interrupt_syscall(tthread, 0); +#endif /* !POSTK_DEBUG_TEMP_FIX_74 */ + + if (status != PS_RUNNING) { + if(sig == SIGKILL){ + /* Wake up the target only when stopped by ptrace-reporting */ + sched_wakeup_thread(tthread, PS_TRACED | PS_STOPPED | PS_INTERRUPTIBLE); + } + else if(sig == SIGCONT || ptracecont == 1){ + /* Wake up the target only when stopped by SIGSTOP */ + sched_wakeup_thread(tthread, PS_STOPPED); + tthread->proc->status = PS_RUNNING; + } + else { + sched_wakeup_thread(tthread, PS_INTERRUPTIBLE); + } + } + } + release_thread(tthread); + return rc; +} + +void +set_signal(int sig, void *regs0, siginfo_t *info) +{ + ihk_mc_user_context_t *regs = regs0; + struct thread *thread = cpu_local_var(current); + + if(thread == NULL || thread->proc->pid == 0) + return; + + if((__sigmask(sig) & thread->sigmask.__val[0]) || + !interrupt_from_user(regs)){ + coredump(thread, regs0); + terminate(0, sig | 0x80); + } + + do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0); +} + +SYSCALL_DECLARE(mmap) +{ + const int supported_flags = 0 + | MAP_SHARED // 0x01 + | MAP_PRIVATE // 0x02 + | MAP_FIXED // 0x10 + | MAP_ANONYMOUS // 0x20 + | MAP_LOCKED // 0x2000 + | MAP_POPULATE // 0x8000 + | MAP_HUGETLB // 00040000 + | (0x3F << MAP_HUGE_SHIFT) // FC000000 + ; + const int ignored_flags = 0 + | MAP_DENYWRITE // 0x0800 + | MAP_NORESERVE // 0x4000 + | MAP_STACK // 0x20000 + ; + const int error_flags = 0 + | MAP_GROWSDOWN // 0x0100 + | MAP_EXECUTABLE // 0x1000 + | MAP_NONBLOCK // 0x10000 + ; + + const intptr_t addr0 = ihk_mc_syscall_arg0(ctx); + const size_t len0 = ihk_mc_syscall_arg1(ctx); + const int prot = ihk_mc_syscall_arg2(ctx); + const int flags0 = ihk_mc_syscall_arg3(ctx); + const int fd = ihk_mc_syscall_arg4(ctx); + const off_t off0 = ihk_mc_syscall_arg5(ctx); + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; + int error; + intptr_t addr = 0; + size_t len; + int flags = flags0; + size_t pgsize; + + dkprintf("sys_mmap(%lx,%lx,%x,%x,%d,%lx)\n", + addr0, len0, prot, flags0, fd, off0); + + /* check constants for flags */ + if (1) { + int dup_flags; + + dup_flags = (supported_flags & ignored_flags); + dup_flags |= (ignored_flags & error_flags); + dup_flags |= (error_flags & supported_flags); + + if (dup_flags) { + ekprintf("sys_mmap:duplicate flags: %lx\n", dup_flags); + ekprintf("s-flags: %08x\n", supported_flags); + ekprintf("i-flags: %08x\n", ignored_flags); + ekprintf("e-flags: %08x\n", error_flags); + panic("sys_mmap:duplicate flags\n"); + /* no return */ + } + } + + /* check arguments */ + pgsize = PAGE_SIZE; + if (flags & MAP_HUGETLB) { + switch (flags & (0x3F << MAP_HUGE_SHIFT)) { + case 0: + flags |= MAP_HUGE_SECOND_BLOCK; /* default hugepage size */ + break; + + case MAP_HUGE_SECOND_BLOCK: + case MAP_HUGE_FIRST_BLOCK: + break; + + default: + ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):" + "not supported page size.\n", + addr0, len0, prot, flags0, fd, off0); + error = -EINVAL; + goto out; + } + + pgsize = (size_t)1 << ((flags >> MAP_HUGE_SHIFT) & 0x3F); + } + +#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1)) + addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR; + len = (len0 + pgsize - 1) & ~(pgsize - 1); + if ((addr & (pgsize - 1)) + || (len == 0) + || !(flags & (MAP_SHARED | MAP_PRIVATE)) + || ((flags & MAP_SHARED) && (flags & MAP_PRIVATE)) + || (off0 & (pgsize - 1))) { + ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):EINVAL\n", + addr0, len0, prot, flags0, fd, off0); + error = -EINVAL; + goto out; + } + + if ((flags & MAP_FIXED) && ((addr < region->user_start) + || (region->user_end <= addr))) { + ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):ENOMEM\n", + addr0, len0, prot, flags0, fd, off0); + error = -ENOMEM; + goto out; + } + + /* check not supported requests */ + if ((flags & error_flags) + || (flags & ~(supported_flags | ignored_flags))) { + ekprintf("sys_mmap(%lx,%lx,%x,%x,%x,%lx):unknown flags %x\n", + addr0, len0, prot, flags0, fd, off0, + (flags & ~(supported_flags | ignored_flags))); + error = -EINVAL; + goto out; + } + + addr = do_mmap(addr, len, prot, flags, fd, off0); + + error = 0; +out: + dkprintf("sys_mmap(%lx,%lx,%x,%x,%d,%lx): %ld %lx\n", + addr0, len0, prot, flags0, fd, off0, error, addr); + return (!error)? addr: error; +} + +SYSCALL_DECLARE(shmget) +{ + const key_t key = ihk_mc_syscall_arg0(ctx); + const size_t size = ihk_mc_syscall_arg1(ctx); + const int shmflg0 = ihk_mc_syscall_arg2(ctx); + int shmid = -EINVAL; + int error; + int shmflg = shmflg0; + + dkprintf("shmget(%#lx,%#lx,%#x)\n", key, size, shmflg0); + + if (shmflg & SHM_HUGETLB) { + switch (shmflg & (0x3F << SHM_HUGE_SHIFT)) { + case 0: + shmflg |= SHM_HUGE_SECOND_BLOCK; /* default hugepage size */ + break; + + case SHM_HUGE_SECOND_BLOCK: + case SHM_HUGE_FIRST_BLOCK: + break; + + default: + error = -EINVAL; + goto out; + } + } + + shmid = do_shmget(key, size, shmflg); + + error = 0; +out: + dkprintf("shmget(%#lx,%#lx,%#x): %d %d\n", key, size, shmflg0, error, shmid); + return (error)?: shmid; +} /* sys_shmget() */ + +void +save_uctx(void *uctx, struct pt_regs *regs) +{ + /* TODO: skeleton for UTI */ +} + +/*** End of File ***/ diff --git a/arch/arm64/kernel/trampoline.S b/arch/arm64/kernel/trampoline.S new file mode 100644 index 00000000..c3b0004d --- /dev/null +++ b/arch/arm64/kernel/trampoline.S @@ -0,0 +1,56 @@ +/* trampoline.S COPYRIGHT FUJITSU LIMITED 2015 */ +.section .rodata, "a", @progbits + +.globl trampoline_code_data +base = . +trampoline_code_data: + + .org 8 +header_pgtbl: + .quad 0 /* page table address */ +func_address: + .quad 0 /* load address */ +arg: + .quad 0 /* next address */ +stack_ptr: + .quad 0 /* initial stack */ +debug: + .quad 0 /* debug area */ +transit_pgtbl: + .quad 0 /* 32->64 bit table address */ + +cpu_start_body: + +.balign 8 +protect_start: + +.balign 8 +start_64: + +boot_idtptr: + .short 0 + .long 0 + +boot_gdtptr: + .short boot_gdt32_end - boot_gdt32 + .long boot_gdt32 - base + .align 4 +boot_gdt32: + .quad 0 + .quad 0 + .quad 0x00cf9b000000ffff + .quad 0x00cf93000000ffff + .quad 0x00af9b000000ffff + .quad 0x0000890000000067 +boot_gdt32_end: + +start_64_vec: + .long start_64 - base + .word 0 + +stack: + .org 0x1000 +stack_end: +.globl trampoline_code_data_end +trampoline_code_data_end: + diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c new file mode 100644 index 00000000..d60c7447 --- /dev/null +++ b/arch/arm64/kernel/traps.c @@ -0,0 +1,177 @@ +/* traps.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void arch_show_interrupt_context(const void *reg); +extern int interrupt_from_user(void *); + +void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err) +{ + if (interrupt_from_user(regs)) { + current_thread_info()->fault_address = 0; + current_thread_info()->fault_code = err; + set_signal(info->si_signo, regs, info); + } else { + panic(str); + kprintf("siginfo: signo(%d) code(%d)\n", info->si_signo, info->si_code); + } +} + +/* + * Trapped FP/ASIMD access. + */ +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +{ + // /* TODO: implement lazy context saving/restoring */ + set_cputime(1); + // WARN_ON(1); + kprintf("WARNING: CPU: %d PID: %d Trapped FP/ASIMD access.\n", + ihk_mc_get_processor_id(), cpu_local_var(current)->proc->pid); + set_cputime(0); +} + +/* + * Raise a SIGFPE for the current process. + */ +#define FPEXC_IOF (1 << 0) +#define FPEXC_DZF (1 << 1) +#define FPEXC_OFF (1 << 2) +#define FPEXC_UFF (1 << 3) +#define FPEXC_IXF (1 << 4) +#define FPEXC_IDF (1 << 7) + +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +{ + siginfo_t info; + unsigned int si_code = 0; + set_cputime(1); + + if (esr & FPEXC_IOF) + si_code = FPE_FLTINV; + else if (esr & FPEXC_DZF) + si_code = FPE_FLTDIV; + else if (esr & FPEXC_OFF) + si_code = FPE_FLTOVF; + else if (esr & FPEXC_UFF) + si_code = FPE_FLTUND; + else if (esr & FPEXC_IXF) + si_code = FPE_FLTRES; + + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = si_code; + info._sifields._sigfault.si_addr = (void*)regs->pc; + + set_signal(SIGFPE, regs, &info); + set_cputime(0); +} + +/* @ref.impl arch/arm64/kernel/traps.c */ +static LIST_HEAD(undef_hook); + +/* @ref.impl arch/arm64/kernel/traps.c */ +static ihk_spinlock_t undef_lock = SPIN_LOCK_UNLOCKED; + +/* @ref.impl arch/arm64/kernel/traps.c */ +void register_undef_hook(struct undef_hook *hook) +{ + unsigned long flags; + + flags = ihk_mc_spinlock_lock(&undef_lock); + list_add(&hook->node, &undef_hook); + ihk_mc_spinlock_unlock(&undef_lock, flags); +} + +/* @ref.impl arch/arm64/kernel/traps.c */ +void unregister_undef_hook(struct undef_hook *hook) +{ + unsigned long flags; + + flags = ihk_mc_spinlock_lock(&undef_lock); + list_del(&hook->node); + ihk_mc_spinlock_unlock(&undef_lock, flags); +} + +/* @ref.impl arch/arm64/kernel/traps.c */ +static int call_undef_hook(struct pt_regs *regs) +{ + struct undef_hook *hook; + unsigned long flags; + uint32_t instr; + int (*fn)(struct pt_regs *regs, uint32_t instr) = NULL; + void *pc = (void*)instruction_pointer(regs); + + if (!interrupt_from_user(regs)) + return 1; + + /* 32-bit ARM instruction */ + if (copy_from_user(&instr, pc, sizeof(instr))) + goto exit; +#ifdef __AARCH64EB__ +# error It is necessary to byte swap here. (e.g. instr = le32_to_cpu(instr);) +#endif + + flags = ihk_mc_spinlock_lock(&undef_lock); + list_for_each_entry(hook, &undef_hook, node) + if ((instr & hook->instr_mask) == hook->instr_val && + (regs->pstate & hook->pstate_mask) == hook->pstate_val) + fn = hook->fn; + + ihk_mc_spinlock_unlock(&undef_lock, flags); +exit: + return fn ? fn(regs, instr) : 1; +} + +/* @ref.impl arch/arm64/kernel/traps.c */ +void do_undefinstr(struct pt_regs *regs) +{ + siginfo_t info; + + set_cputime(interrupt_from_user(regs)? 1: 2); + + if (call_undef_hook(regs) == 0) { + goto out; + } + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info._sifields._sigfault.si_addr = (void*)regs->pc; + + arm64_notify_die("Oops - undefined instruction", regs, &info, 0); +out: + set_cputime(0); +} + +/* + * bad_mode handles the impossible case in the exception vector. + */ +//asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +{ + siginfo_t info; + set_cputime(interrupt_from_user(regs)? 1: 2); + kprintf("entering bad_mode !! (regs:0x%p, reason:%d, esr:0x%x)\n", regs, reason, esr); + + kprintf("esr Analyse:\n"); + kprintf(" Exception Class : 0x%x\n",((esr >> 26) & 0x3f)); + kprintf(" Instruction Length : %d (0:16-bit instruction, 1:32-bit instruction)\n",((esr >> 25) & 0x1)); + kprintf(" Instruction Specific Syndrome : 0x%x\n",(esr & 0x1ffffff)); + + arch_show_interrupt_context(regs); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info._sifields._sigfault.si_addr = (void*)regs->pc; + + arm64_notify_die("Oops - bad mode", regs, &info, 0); + set_cputime(0); +} diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c new file mode 100644 index 00000000..92b12949 --- /dev/null +++ b/arch/arm64/kernel/vdso.c @@ -0,0 +1,205 @@ +/* vdso.c COPYRIGHT FUJITSU LIMITED 2016 */ +/* @ref.impl arch/arm64/kernel/vdso.c */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//#define DEBUG_PRINT_VDSO + +#ifdef DEBUG_PRINT_VDSO +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +#ifdef POSTK_DEBUG_ARCH_DEP_52 +#define VDSO_MAXPAGES 1 +struct vdso { + long busy; + int vdso_npages; + int padding; + long vdso_physlist[VDSO_MAXPAGES]; + long vvar_phys; + long lbase; + long offset_sigtramp; +}; +#endif /*POSTK_DEBUG_ARCH_DEP_52*/ + +extern char vdso_start, vdso_end; +static struct vdso vdso; + +struct tod_data_s tod_data + __attribute__ ((section (".vdso.data"))) = { + .do_local = 0, + .version = IHK_ATOMIC64_INIT(0), +}; + +void* vdso_symbol_sigtramp(unsigned long base) +{ + return (void *)(vdso.offset_sigtramp - vdso.lbase + base); +} + +static int vdso_get_vdso_info(void) +{ + int error; + struct ikc_scd_packet packet; + struct ihk_ikc_channel_desc *ch = cpu_local_var(ikc2linux); + + dkprintf("vdso_get_vdso_info()\n"); + memset(&vdso, '\0', sizeof vdso); + vdso.busy = 1; + vdso.vdso_npages = 0; + + packet.msg = SCD_MSG_GET_VDSO_INFO; + packet.arg = virt_to_phys(&vdso); + + error = ihk_ikc_send(ch, &packet, 0); + if (error) { + ekprintf("vdso_get_vdso_info: ihk_ikc_send failed. %d\n", error); + goto out; + } + + while (vdso.busy) { + cpu_pause(); + } + error = 0; +out: + if (error) { + vdso.vdso_npages = 0; + } + dkprintf("vdso_get_vdso_info(): %d\n", error); + return error; +} + +int arch_setup_vdso(void) +{ + if (!vdso_get_vdso_info() && vdso.vdso_npages != 0) { + kprintf("Enable Host mapping vDSO.\n"); + return 0; + } + kprintf("Enable McK mapping vDSO.\n"); + + if (memcmp(&vdso_start, "\177ELF", 4)) { + panic("vDSO is not a valid ELF object!\n"); + } + + vdso.vdso_npages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + dkprintf("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", + vdso.vdso_npages + 1, vdso.vdso_npages, &vdso_start, 1L, &tod_data); + if (vdso.vdso_npages != 1) { + panic("vDSO is not a valid number of pages!\n"); + } + + vdso.vvar_phys = virt_to_phys((void *)&tod_data); + vdso.vdso_physlist[0] = virt_to_phys((void *)&vdso_start); + + vdso.lbase = VDSO_LBASE; + vdso.offset_sigtramp = vdso_offset_sigtramp; + + return 0; +} + +static int get_free_area(struct process_vm *vm, size_t len, intptr_t hint, + int pgshift, intptr_t *addrp) +{ + struct vm_regions *region = &vm->region; + intptr_t addr; + int error; + struct vm_range *range; + size_t pgsize = (size_t)1 << pgshift; + + dkprintf("get_free_area(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp); + + addr = hint; + for (;;) { + addr = (addr + pgsize - 1) & ~(pgsize - 1); + if ((region->user_end <= addr) + || ((region->user_end - len) < addr)) { + ekprintf("get_free_area(%lx,%lx,%p):" + "no space. %lx %lx\n", + len, hint, addrp, addr, + region->user_end); + error = -ENOMEM; + goto out; + } + + range = lookup_process_memory_range(vm, addr, addr+len); + if (range == NULL) { + break; + } + addr = range->end; + } + + error = 0; + *addrp = addr; + +out: + dkprintf("get_free_area(%lx,%lx,%d,%p): %d %lx\n", + len, hint, pgshift, addrp, error, addr); + return error; +} + +int arch_map_vdso(struct process_vm *vm) +{ + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + unsigned long start, end; + unsigned long flag; + int ret; + + vdso_text_len = vdso.vdso_npages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + PAGE_SIZE; + + ret = get_free_area(vm, vdso_mapping_len, TASK_UNMAPPED_BASE, + PAGE_SHIFT, (intptr_t *)&vdso_base); + if (ret != 0) { + dkprintf("arch_map_vdso:get_free_area(%lx,%lx) failed. %d\n", + vdso_mapping_len, TASK_UNMAPPED_BASE, ret); + goto exit; + } + + start = vdso_base; + end = vdso_base + PAGE_SIZE; + flag = VR_REMOTE | VR_PROT_READ; + flag |= VRFLAG_PROT_TO_MAXPROT(flag); + ret = add_process_memory_range(vm, start, end, vdso.vvar_phys, flag, + NULL, 0, PAGE_SHIFT, NULL); + if (ret != 0){ + dkprintf("ERROR: adding memory range for tod_data\n"); + goto exit; + } + vm->vvar_addr = (void *)start; + + start = end; + end = start + vdso_text_len; + flag = VR_REMOTE | VR_PROT_READ | VR_PROT_EXEC; + flag |= VRFLAG_PROT_TO_MAXPROT(flag); + ret = add_process_memory_range(vm, start, end, vdso.vdso_physlist[0], flag, + NULL, 0, PAGE_SHIFT, NULL); + if (ret != 0) { + dkprintf("ERROR: adding memory range for vdso_text\n"); + + start = vdso_base; + end = vdso_base + PAGE_SIZE; + remove_process_memory_range(vm, start, end, NULL); + + goto exit; + } + vm->vdso_addr = (void *)start; + +exit: + return ret; +} diff --git a/arch/arm64/kernel/vdso.so.S b/arch/arm64/kernel/vdso.so.S new file mode 100644 index 00000000..6d29b98f --- /dev/null +++ b/arch/arm64/kernel/vdso.so.S @@ -0,0 +1,32 @@ +/* vdso.so.S COPYRIGHT FUJITSU LIMITED 2016 */ +/* @ref.impl arch/arm64/kernel/vdso/vdso.S */ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Will Deacon + */ + +#include + + .section ".vdso.txet", "aw" + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "../../mckernel/arch/arm64/kernel/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/arm64/kernel/vdso/Makefile.in b/arch/arm64/kernel/vdso/Makefile.in new file mode 100644 index 00000000..c49a4e83 --- /dev/null +++ b/arch/arm64/kernel/vdso/Makefile.in @@ -0,0 +1,123 @@ +# Makefile.in COPYRIGHT FUJITSU LIMITED 2016 +# @ref.impl arch/arm64/kernel/vdso/Makefile +# Building a vDSO image for AArch64. + +HOST_DIR=@KDIR@ +HOST_CONFIG=$(HOST_DIR)/.config +HOST_KERNEL_CONFIG_ARM64_4K_PAGES=$(shell grep -E "^CONFIG_ARM64_4K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_4K_PAGES=||g') +HOST_KERNEL_CONFIG_ARM64_16K_PAGES=$(shell grep -E "^CONFIG_ARM64_16K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_16K_PAGES=||g') +HOST_KERNEL_CONFIG_ARM64_64K_PAGES=$(shell grep -E "^CONFIG_ARM64_64K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_64K_PAGES=||g') + +VDSOBASE = $(CURDIR) +INCDIR = $(VDSOBASE)/../include +ECHO_SUFFIX = [VDSO] + +VDSO_OBJS := gettimeofday.o note.o sigreturn.o +DESTOBJS = $(addprefix $(VDSOBASE)/, $(VDSO_OBJS)) + +$(if $(VDSOBASE),,$(error IHK output directory is not specified)) +$(if $(TARGET),,$(error Target is not specified)) + +#CFLAGS := -nostdinc -mlittle-endian -Wall -mabi=lp64 -Wa,-gdwarf-2 +CFLAGS := -nostdinc -mlittle-endian -Wall -Wa,-gdwarf-2 +CFLAGS += -D__KERNEL__ -I$(SRC)/include +CFLAGS += -I$(SRC)/../lib/include -I$(INCDIR) -I$(IHKBASE)/smp/arm64/include +CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) +CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) + +LDFLAGS := -nostdinc -mlittle-endian -Wall -Wundef -Wstrict-prototypes +LDFLAGS += -Wno-trigraphs -fno-strict-aliasing -fno-common +LDFLAGS += -Werror-implicit-function-declaration -Wno-format-security +#LDFLAGS += -std=gnu89 -mgeneral-regs-only -mabi=lp64 -O2 +LDFLAGS += -std=gnu89 -mgeneral-regs-only -O2 +LDFLAGS += -Wframe-larger-than=2048 -fno-stack-protector +LDFLAGS += -fno-delete-null-pointer-checks -Wno-unused-but-set-variable +LDFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +LDFLAGS += -fno-var-tracking-assignments -g -Wdeclaration-after-statement +LDFLAGS += -Wno-pointer-sign -fno-strict-overflow -fconserve-stack +LDFLAGS += -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time +LDFLAGS += -shared -fno-common -fno-builtin -nostdlib +LDFLAGS += -Wl,-soname=linux-vdso.so.1 -Wl,--hash-style=sysv -Wl,-n -Wl,-T +LDFLAGS += --param=allow-store-data-races=0 -DCC_HAVE_ASM_GOTO +LDFLAGS += -D"KBUILD_STR(s)=\#s" -D"KBUILD_BASENAME=KBUILD_STR(vdso.so)" +LDFLAGS += -D"KBUILD_MODNAME=KBUILD_STR(vdso.so)" -D__KERNEL__ + +DEPSRCS = $(wildcard $(VDSOBASE)/*.c $(VDSOBASE)/*.S) + +CFLAGS_lds := -E -P -C -U$(ARCH) +CFLAGS_lds += -nostdinc +CFLAGS_lds += -mlittle-endian +CFLAGS_lds += -D__KERNEL__ +CFLAGS_lds += -D__ASSEMBLY__ +CFLAGS_lds += -DLINKER_SCRIPT +CFLAGS_lds += -DVDSO_LBASE=0 +ifeq ($(HOST_KERNEL_CONFIG_ARM64_4K_PAGES), y) +CFLAGS_lds += -DPAGE_SIZE=0x1000 +endif +ifeq ($(HOST_KERNEL_CONFIG_ARM64_16K_PAGES), y) +CFLAGS_lds += -DPAGE_SIZE=0x4000 +endif +ifeq ($(HOST_KERNEL_CONFIG_ARM64_64K_PAGES), y) +CFLAGS_lds += -DPAGE_SIZE=0x10000 +endif + +#load mckernel config (append CPPFLAGS) +include $(IHKBASE)/$(TARGETDIR)/Makefile.predefines + +default: all + +.PHONY: all clean depend prepare + +all: depend $(VDSOBASE)/vdso.so $(INCDIR)/vdso-offsets.h + +# Strip rule for the .so file +$(VDSOBASE)/vdso.so: OBJCOPYFLAGS := -S +$(VDSOBASE)/vdso.so: $(VDSOBASE)/vdso.so.dbg + $(objcopy_cmd) + +# Generate VDSO offsets using helper script +$(INCDIR)/vdso-offsets.h: $(VDSOBASE)/vdso.so.dbg + $(call echo_cmd,VDSOSYM,$<) + @nm $< | sh gen_vdso_offsets.sh | LC_ALL=C sort > $@ + +# Link rule for the .so file, .lds has to be first +$(VDSOBASE)/vdso.so.dbg: $(VDSOBASE)/vdso.lds $(DESTOBJS) + $(ld_cmd) + +$(VDSOBASE)/vdso.lds: vdso.lds.S + $(lds_cmd) + +clean: + $(rm_cmd) $(DESTOBJS) $(VDSOBASE)/Makefile.dep + +depend: $(VDSOBASE)/Makefile.dep + +$(VDSOBASE)/Makefile.dep: + $(call dep_cmd,$(DEPSRCS)) + +prepare: + @$(RM) $(VDSOBASE)/Makefile.dep + +-include $(VDSOBASE)/Makefile.dep + +# Actual build commands +ifeq ($(V),1) +echo_cmd = +submake = make +else +echo_cmd = @echo ' ($(TARGET))' $1 $(ECHO_SUFFIX) $2; +submake = make --no-print-directory +endif + +cc_cmd = $(call echo_cmd,CC,$<)$(CC) $(CFLAGS) -c -o $@ $< +ld_cmd = $(call echo_cmd,LD,$@)$(CC) $(LDFLAGS) $^ -o $@ +dep_cmd = $(call echo_cmd,DEPEND,)$(CC) $(CFLAGS) -MM $1 > $@ +rm_cmd = $(call echo_cmd,CLEAN,)$(RM) +objcopy_cmd = $(call echo_cmd,OBJCOPY,$<)$(OBJCOPY) $(OBJCOPYFLAGS) $< $@ +lds_cmd = $(call echo_cmd,LDS,$<)$(CC) $(CFLAGS_lds) -c -o $@ $< + +.c.o: + $(cc_cmd) +.S.o: + $(cc_cmd) -D__ASSEMBLY__ + diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh new file mode 100644 index 00000000..a540fb33 --- /dev/null +++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# gen_vdso_offsets.sh COPYRIGHT FUJITSU LIMITED 2016 +# @ref.impl arch/arm64/kernel/vdso/gen_vdso_offsets.sh +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon +#include +#include +#include + +extern int __kernel_gettimeofday(struct timeval *tv, void *tz); + +static inline void cpu_pause_for_vsyscall(void) +{ + asm volatile ("yield" ::: "memory"); + return; +} + +static inline void calculate_time_from_tsc(struct timespec *ts, + struct tod_data_s *tod_data) +{ + long ver; + unsigned long current_tsc; + __time_t sec_delta; + long ns_delta; + + for (;;) { + while ((ver = ihk_atomic64_read(&tod_data->version)) & 1) { + /* settimeofday() is in progress */ + cpu_pause_for_vsyscall(); + } + rmb(); + *ts = tod_data->origin; + rmb(); + if (ver == ihk_atomic64_read(&tod_data->version)) { + break; + } + + /* settimeofday() has intervened */ + cpu_pause_for_vsyscall(); + } + + current_tsc = rdtsc(); + sec_delta = current_tsc / tod_data->clocks_per_sec; + ns_delta = NS_PER_SEC * (current_tsc % tod_data->clocks_per_sec) + / tod_data->clocks_per_sec; + /* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */ + + ts->tv_sec += sec_delta; + ts->tv_nsec += ns_delta; + if (ts->tv_nsec >= NS_PER_SEC) { + ts->tv_nsec -= NS_PER_SEC; + ++ts->tv_sec; + } + + return; +} + +static inline struct tod_data_s *get_tod_data_addr(void) +{ + unsigned long addr; + + asm volatile("adr %0, _tod_data\n" + : "=r" (addr) + : + : "memory"); + + return (struct tod_data_s *)addr; +} + +int __kernel_gettimeofday(struct timeval *tv, void *tz) +{ + long ret; + struct tod_data_s *tod_data; + struct timespec ats; + + if(!tv && !tz) { + /* nothing to do */ + return 0; + } + + tod_data = get_tod_data_addr(); + + /* DO it locally if supported */ + if (!tz && tod_data->do_local) { + calculate_time_from_tsc(&ats, tod_data); + + tv->tv_sec = ats.tv_sec; + tv->tv_usec = ats.tv_nsec / 1000; + + return 0; + } + + /* Otherwize syscall */ + asm volatile("mov w8, %w1\n" + "mov x0, %2\n" + "mov x1, %3\n" + "svc #0\n" + "mov %0, x0\n" + : "=r" (ret) + : "r" (__NR_gettimeofday), "r"(tv), "r"(tz) + : "memory"); + + if (ret) { + *(int *)0 = 0; /* i.e. raise(SIGSEGV) */ + } + return (int)ret; +} + + +/* + * The IDs of the various system clocks (for POSIX.1b interval timers): + * @ref.impl include/uapi/linux/time.h + */ +// #define CLOCK_REALTIME 0 +// #define CLOCK_MONOTONIC 1 +// #define CLOCK_PROCESS_CPUTIME_ID 2 +// #define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_SGI_CYCLE 10 /* Hardware specific */ +#define CLOCK_TAI 11 + +#define HIGH_RES_NSEC 1 /* nsec. */ +#define CLOCK_REALTIME_RES HIGH_RES_NSEC + +#define CLOCK_COARSE_RES ((NS_PER_SEC+CONFIG_HZ/2)/CONFIG_HZ) /* 10,000,000 nsec*/ + +typedef int clockid_t; + +int __kernel_clock_gettime(clockid_t clk_id, struct timespec *tp) +{ + long ret; + struct tod_data_s *tod_data; + struct timespec ats; + + if (!tp) { + /* nothing to do */ + return 0; + } + + tod_data = get_tod_data_addr(); + + /* DO it locally if supported */ + if (tod_data->do_local && clk_id == CLOCK_REALTIME) { + calculate_time_from_tsc(&ats, tod_data); + + tp->tv_sec = ats.tv_sec; + tp->tv_nsec = ats.tv_nsec; + + return 0; + } + + /* Otherwize syscall */ + asm volatile("mov w8, %w1\n" + "mov x0, %2\n" + "mov x1, %3\n" + "svc #0\n" + "mov %0, x0\n" + : "=r" (ret) + : "r" (__NR_clock_gettime), "r"(clk_id), "r"(tp) + : "memory"); + + return (int)ret; +} + +int __kernel_clock_getres(clockid_t clk_id, struct timespec *res) +{ + long ret; + + if (!res) { + /* nothing to do */ + return 0; + } + + switch (clk_id) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + res->tv_sec = 0; + res->tv_nsec = CLOCK_REALTIME_RES; + return 0; + break; + case CLOCK_REALTIME_COARSE: + case CLOCK_MONOTONIC_COARSE: + res->tv_sec = 0; + res->tv_nsec = CLOCK_COARSE_RES; + return 0; + break; + default: + break; + } + + /* Otherwise syscall */ + asm volatile("mov w8, %w1\n" + "mov x0, %2\n" + "mov x1, %3\n" + "svc #0\n" + "mov %0, x0\n" + : "=r" (ret) + : "r" (__NR_clock_getres), "r"(clk_id), "r"(res) + : "memory"); + + return (int)ret; +} diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S new file mode 100644 index 00000000..efec7ad9 --- /dev/null +++ b/arch/arm64/kernel/vdso/note.S @@ -0,0 +1,28 @@ +/* note.S COPYRIGHT FUJITSU LIMITED 2016 */ +/* @ref.impl arch/arm64/kernel/vdso/note.S */ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Will Deacon + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include + +ELFNOTE_START(McKernel, 0, "a") + .long 0x10000 /* MCKERNEL_VERSION_CODE */ +ELFNOTE_END diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S new file mode 100644 index 00000000..d4d3ae29 --- /dev/null +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -0,0 +1,39 @@ +/* sigreturn.S COPYRIGHT FUJITSU LIMITED 2016 */ +/* @ref.impl arch/arm64/kernel/vdso/sigreturn.S */ +/* + * Sigreturn trampoline for returning from a signal when the SA_RESTORER + * flag is not set. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Will Deacon + */ + +#include +#include "syscall.h" + + .text + + nop +ENTRY(__kernel_rt_sigreturn) + .cfi_startproc + .cfi_signal_frame + .cfi_def_cfa x29, 0 + .cfi_offset x29, 0 * 8 + .cfi_offset x30, 1 * 8 + mov x8, #__NR_rt_sigreturn + svc #0 + .cfi_endproc +ENDPROC(__kernel_rt_sigreturn) diff --git a/arch/arm64/kernel/vdso/syscall.h b/arch/arm64/kernel/vdso/syscall.h new file mode 100644 index 00000000..721b25a1 --- /dev/null +++ b/arch/arm64/kernel/vdso/syscall.h @@ -0,0 +1,15 @@ +/* syscall.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef __HEADER_ARM64_VDSO_SYSCALL_H +#define __HEADER_ARM64_VDSO_SYSCALL_H + +#define DECLARATOR(number,name) .equ __NR_##name, number +#define SYSCALL_HANDLED(number,name) DECLARATOR(number,name) +#define SYSCALL_DELEGATED(number,name) DECLARATOR(number,name) + +#include + +#undef DECLARATOR +#undef SYSCALL_HANDLED +#undef SYSCALL_DELEGATED + +#endif /* !__HEADER_ARM64_VDSO_SYSCALL_H */ diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S new file mode 100644 index 00000000..90fa559a --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -0,0 +1,96 @@ +/* vdso.lds.S COPYRIGHT FUJITSU LIMITED 2016 */ +/* @ref.impl arch/arm64/kernel/vdso/vdso.lds.S */ +/* + * GNU linker script for the VDSO library. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Will Deacon + * Heavily based on the vDSO linker scripts for other archs. + */ + +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) + +SECTIONS +{ + PROVIDE(_tod_data = . - PAGE_SIZE); + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + + .text : { *(.text*) } :text =0xd503201f + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + _end = .; + PROVIDE(end = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6.39 { + global: + __kernel_rt_sigreturn; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp = __kernel_rt_sigreturn; diff --git a/arch/x86/kernel/Makefile.arch b/arch/x86/kernel/Makefile.arch index cb366fbb..254fb1ea 100644 --- a/arch/x86/kernel/Makefile.arch +++ b/arch/x86/kernel/Makefile.arch @@ -1,2 +1,5 @@ IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o IHK_OBJS += perfctr.o syscall.o vsyscall.o +# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. +# IHK_OBJS added coredump.o +IHK_OBJS += coredump.o diff --git a/arch/x86/kernel/coredump.c b/arch/x86/kernel/coredump.c new file mode 100644 index 00000000..22e53c72 --- /dev/null +++ b/arch/x86/kernel/coredump.c @@ -0,0 +1,59 @@ +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#include +#include + +void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0) +{ + struct x86_user_context *uctx = regs0; + struct x86_basic_regs *regs = &uctx->gpr; + register unsigned long _r12 asm("r12"); + register unsigned long _r13 asm("r13"); + register unsigned long _r14 asm("r14"); + register unsigned long _r15 asm("r15"); + +/* + We ignore following entries for now. + + struct elf_siginfo pr_info; + short int pr_cursig; + a8_uint64_t pr_sigpend; + a8_uint64_t pr_sighold; + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus64_timeval pr_utime; + struct prstatus64_timeval pr_stime; + struct prstatus64_timeval pr_cutime; + struct prstatus64_timeval pr_cstime; + */ + + prstatus->pr_reg[0] = _r15; + prstatus->pr_reg[1] = _r14; + prstatus->pr_reg[2] = _r13; + prstatus->pr_reg[3] = _r12; + prstatus->pr_reg[4] = regs->rbp; + prstatus->pr_reg[5] = regs->rbx; + prstatus->pr_reg[6] = regs->r11; + prstatus->pr_reg[7] = regs->r10; + prstatus->pr_reg[8] = regs->r9; + prstatus->pr_reg[9] = regs->r8; + prstatus->pr_reg[10] = regs->rax; + prstatus->pr_reg[11] = regs->rcx; + prstatus->pr_reg[12] = regs->rdx; + prstatus->pr_reg[13] = regs->rsi; + prstatus->pr_reg[14] = regs->rdi; + prstatus->pr_reg[15] = regs->rax; /* ??? */ + prstatus->pr_reg[16] = regs->rip; + prstatus->pr_reg[17] = regs->cs; + prstatus->pr_reg[18] = regs->rflags; + prstatus->pr_reg[19] = regs->rsp; + prstatus->pr_reg[20] = regs->ss; + prstatus->pr_reg[21] = rdmsr(MSR_FS_BASE); + prstatus->pr_reg[22] = rdmsr(MSR_GS_BASE); + /* There is no ds, es, fs and gs. */ + + prstatus->pr_fpvalid = 0; /* We assume no fp */ +} + +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index a4e05a7f..bf4ac4f5 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -43,6 +43,11 @@ #define LAPIC_ICR0 0x300 #define LAPIC_ICR2 0x310 #define LAPIC_ESR 0x280 +#ifdef POSTK_DEBUG_ARCH_DEP_75 /* x86 depend hide */ +#define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERF_VECTOR 0xf0 +#define LOCAL_SMP_FUNC_CALL_VECTOR 0xf1 +#endif /* POSTK_DEBUG_ARCH_DEP_75 */ #define APIC_INT_LEVELTRIG 0x08000 #define APIC_INT_ASSERT 0x04000 @@ -1424,6 +1429,22 @@ void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx, } } +#ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ +long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int *eofp) +{ + *eofp = 1; + return -ENOMEM; +} +#endif /* POSTK_DEBUG_ARCH_DEP_42 */ + +#ifdef POSTK_DEBUG_ARCH_DEP_23 /* add arch dep. clone_thread() function */ +void arch_clone_thread(struct thread *othread, unsigned long pc, + unsigned long sp, struct thread *nthread) +{ + return; +} +#endif /* POSTK_DEBUG_ARCH_DEP_23 */ + void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx) { kprintf("CS:RIP = %04lx:%16lx\n", uctx->gpr.cs, uctx->gpr.rip); @@ -1565,6 +1586,51 @@ int ihk_mc_interrupt_cpu(int cpu, int vector) return 0; } +#ifdef POSTK_DEBUG_ARCH_DEP_22 +extern void perf_start(struct mc_perf_event *event); +extern void perf_reset(struct mc_perf_event *event); +struct thread *arch_switch_context(struct thread *prev, struct thread *next) +{ + struct thread *last; + + dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", + ihk_mc_get_processor_id(), next->tlsblock_base); + + /* Set up new TLS.. */ + ihk_mc_init_user_tlsbase(next->uctx, next->tlsblock_base); + + /* Performance monitoring inherit */ + if(next->proc->monitoring_event) { + if(next->proc->perf_status == PP_RESET) + perf_reset(next->proc->monitoring_event); + if(next->proc->perf_status != PP_COUNT) { + perf_reset(next->proc->monitoring_event); + perf_start(next->proc->monitoring_event); + } + } + +#ifdef PROFILE_ENABLE + if (prev->profile && prev->profile_start_ts != 0) { + prev->profile_elapsed_ts += + (rdtsc() - prev->profile_start_ts); + prev->profile_start_ts = 0; + } + + if (next->profile && next->profile_start_ts == 0) { + next->profile_start_ts = rdtsc(); + } +#endif + + if (prev) { + last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); + } + else { + last = ihk_mc_switch_context(NULL, &next->ctx, prev); + } + return last; +} +#endif + /*@ @ requires \valid(thread); @ ensures thread->fp_regs == NULL; @@ -1619,6 +1685,14 @@ save_fp_regs(struct thread *thread) } } +#ifdef POSTK_DEBUG_TEMP_FIX_19 +void +clear_fp_regs(struct thread *thread) +{ + return; +} +#endif /* POSTK_DEBUG_TEMP_FIX_19 */ + /*@ @ requires \valid(thread); @ assigns thread->fp_regs; diff --git a/arch/x86/kernel/gencore.c b/arch/x86/kernel/gencore.c index eec68983..2ddf8c68 100644 --- a/arch/x86/kernel/gencore.c +++ b/arch/x86/kernel/gencore.c @@ -1,3 +1,4 @@ +#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ #include #include #include @@ -531,3 +532,4 @@ void freecore(struct coretable **coretable) kfree(phys_to_virt(ct[1].addr)); /* ph */ kfree(*coretable); } +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/x86/kernel/include/arch-futex.h b/arch/x86/kernel/include/arch-futex.h index decbc3d0..be1fd001 100644 --- a/arch/x86/kernel/include/arch-futex.h +++ b/arch/x86/kernel/include/arch-futex.h @@ -64,4 +64,70 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, return oldval; } +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ +static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + +#ifdef __UACCESS__ + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; +#endif + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op1("lock; xaddl %0, %2", ret, oldval, + uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (oldval == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (oldval != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (oldval < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (oldval >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (oldval <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (oldval > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} +#endif /* !POSTK_DEBUG_ARCH_DEP_8 */ + #endif diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index d072251f..e38d5e28 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -159,6 +159,13 @@ enum ihk_mc_pt_attribute { enum ihk_mc_pt_attribute attr_mask; +#ifdef POSTK_DEBUG_ARCH_DEP_12 +static inline int pfn_is_write_combined(uintptr_t pfn) +{ + return ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)); +} +#endif /* #ifdef POSTK_DEBUG_ARCH_DEP_12 */ + static inline int pte_is_null(pte_t *ptep) { return (*ptep == PTE_NULL); diff --git a/arch/x86/kernel/include/elf.h b/arch/x86/kernel/include/elf.h new file mode 100644 index 00000000..874f453c --- /dev/null +++ b/arch/x86/kernel/include/elf.h @@ -0,0 +1,59 @@ +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#ifndef __HEADER_X86_COMMON_ELF_H +#define __HEADER_X86_COMMON_ELF_H + +/* NOTE segment type defined */ +#define NT_X86_STATE 0x202 + +/* ELF target machines defined */ +#define EM_K10M 181 /* Intel K10M */ +#define EM_X86_64 62 /* AMD x86-64 architecture */ + +/* ELF header defined */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_OSABI ELFOSABI_NONE +#define ELF_ABIVERSION El_ABIVERSION_NONE +#ifdef CONFIG_MIC +#define ELF_ARCH EM_K10M +#else /* CONFIG_MIC */ +#define ELF_ARCH EM_X86_64 +#endif /* CONFIG_MIC */ + +struct user_regs64_struct +{ + a8_uint64_t r15; + a8_uint64_t r14; + a8_uint64_t r13; + a8_uint64_t r12; + a8_uint64_t rbp; + a8_uint64_t rbx; + a8_uint64_t r11; + a8_uint64_t r10; + a8_uint64_t r9; + a8_uint64_t r8; + a8_uint64_t rax; + a8_uint64_t rcx; + a8_uint64_t rdx; + a8_uint64_t rsi; + a8_uint64_t rdi; + a8_uint64_t orig_rax; + a8_uint64_t rip; + a8_uint64_t cs; + a8_uint64_t eflags; + a8_uint64_t rsp; + a8_uint64_t ss; + a8_uint64_t fs_base; + a8_uint64_t gs_base; + a8_uint64_t ds; + a8_uint64_t es; + a8_uint64_t fs; + a8_uint64_t gs; +}; + +#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t)) + +typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64]; + +#endif /* __HEADER_S64FX_COMMON_ELF_H */ +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/x86/kernel/include/elfcore.h b/arch/x86/kernel/include/elfcore.h index d8d91c3d..be37f725 100644 --- a/arch/x86/kernel/include/elfcore.h +++ b/arch/x86/kernel/include/elfcore.h @@ -1,3 +1,4 @@ +#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ /* * Structures and definitions for ELF core file. * Extracted from @@ -90,3 +91,4 @@ struct note { #include "elfcoregpl.h" +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/x86/kernel/include/elfcoregpl.h b/arch/x86/kernel/include/elfcoregpl.h index 09207355..bffd8fad 100644 --- a/arch/x86/kernel/include/elfcoregpl.h +++ b/arch/x86/kernel/include/elfcoregpl.h @@ -1,3 +1,4 @@ +#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ /* * Structures and defines from GPLed file. */ @@ -92,3 +93,4 @@ struct elf_prpsinfo64 char pr_fname[16]; char pr_psargs[ELF_PRARGSZ]; }; +#endif /* !POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/arch/x86/kernel/include/ihk/types.h b/arch/x86/kernel/include/ihk/types.h index 9f000a96..7aa18a0a 100644 --- a/arch/x86/kernel/include/ihk/types.h +++ b/arch/x86/kernel/include/ihk/types.h @@ -29,6 +29,14 @@ typedef uint64_t size_t; typedef int64_t ssize_t; typedef int64_t off_t; +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +typedef int32_t key_t; +typedef uint32_t uid_t; +typedef uint32_t gid_t; +typedef int64_t time_t; +typedef int32_t pid_t; +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ + #define NULL ((void *)0) #endif diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index c8fe6e0f..7c6edcbc 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -1,3 +1,4 @@ +/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2017 */ /** * \file syscall_list.h * License details are found in the file LICENSE. @@ -132,6 +133,9 @@ SYSCALL_HANDLED(238, set_mempolicy) SYSCALL_HANDLED(239, get_mempolicy) SYSCALL_HANDLED(247, waitid) SYSCALL_HANDLED(256, migrate_pages) +#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */ +SYSCALL_HANDLED(257, openat) +#endif /* POSTK_DEBUG_ARCH_DEP_62 */ SYSCALL_DELEGATED(270, pselect6) SYSCALL_DELEGATED(271, ppoll) SYSCALL_HANDLED(273, set_robust_list) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 5d1e7568..34b50904 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -1103,6 +1103,109 @@ struct clear_range_args { int max_nr_addr; }; +#ifdef POSTK_DEBUG_ARCH_DEP_8 +void remote_flush_tlb_cpumask(struct process_vm *vm, + unsigned long addr, int cpu_id) +{ + unsigned long __addr = addr; + return remote_flush_tlb_array_cpumask(vm, &__addr, 1, cpu_id); +} + +void remote_flush_tlb_array_cpumask(struct process_vm *vm, + unsigned long *addr, + int nr_addr, + int cpu_id) +{ + unsigned long cpu; + int flush_ind; + struct tlb_flush_entry *flush_entry; + cpu_set_t _cpu_set; + + if (addr[0]) { + flush_ind = (addr[0] >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; + } + /* Zero address denotes full TLB flush */ + else { + /* Random.. */ + flush_ind = (rdtsc()) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; + } + + flush_entry = &tlb_flush_vector[flush_ind]; + + /* Take a copy of the cpu set so that we don't hold the lock + * all the way while interrupting other cores */ + ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock); + memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t)); + ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock); + + dkprintf("trying to aquire flush_entry->lock flush_ind: %d\n", flush_ind); + + ihk_mc_spinlock_lock_noirq(&flush_entry->lock); + + flush_entry->vm = vm; + flush_entry->addr = addr; + flush_entry->nr_addr = nr_addr; + ihk_atomic_set(&flush_entry->pending, 0); + + dkprintf("lock aquired, iterating cpu mask.. flush_ind: %d\n", flush_ind); + + /* Loop through CPUs in this address space and interrupt them for + * TLB flush on the specified address */ + for_each_set_bit(cpu, (const unsigned long*)&_cpu_set.__bits, CPU_SETSIZE) { + + if (ihk_mc_get_processor_id() == cpu) + continue; + + ihk_atomic_inc(&flush_entry->pending); + dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n", + flush_ind, addr, cpu); + +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + /* TODO(pka_idke) Interim support */ + ihk_mc_interrupt_cpu(cpu, + ihk_mc_get_vector(flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ + ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id, + flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ + } + +#ifdef DEBUG_IC_TLB + { + unsigned long tsc; + tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */ +#endif + if (flush_entry->addr[0]) { + int i; + + for (i = 0; i < flush_entry->nr_addr; ++i) { + flush_tlb_single(flush_entry->addr[i] & PAGE_MASK); + } + } + /* Zero address denotes full TLB flush */ + else { + flush_tlb(); + } + + /* Wait for all cores */ + while (ihk_atomic_read(&flush_entry->pending) != 0) { + cpu_pause(); + +#ifdef DEBUG_IC_TLB + if (rdtsc() > tsc) { + kprintf("waited 10 secs for remote TLB!! -> panic_all()\n"); + panic_all_cores("waited 10 secs for remote TLB!!\n"); + } +#endif + } +#ifdef DEBUG_IC_TLB + } +#endif + + ihk_mc_spinlock_unlock_noirq(&flush_entry->lock); +} +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ + static void remote_flush_tlb_add_addr(struct clear_range_args *args, unsigned long addr) { @@ -2470,8 +2573,12 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t return error; } +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ dkprintf("%s: pa is outside of LWK memory, to: %p, pa: %p," "cpsize: %d\n", __FUNCTION__, to, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); @@ -2550,8 +2657,12 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t return error; } +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ dkprintf("%s: pa is outside of LWK memory, from: %p," "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); @@ -2617,8 +2728,12 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t return error; } +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ dkprintf("%s: pa is outside of LWK memory, from: %p," "pa: %p, cpsize: %d\n", __FUNCTION__, from, pa, cpsize); va = ihk_mc_map_virtual(pa, 1, PTATTR_ACTIVE); diff --git a/arch/x86/kernel/perfctr.c b/arch/x86/kernel/perfctr.c index 962a4ace..ff05656b 100644 --- a/arch/x86/kernel/perfctr.c +++ b/arch/x86/kernel/perfctr.c @@ -16,6 +16,9 @@ extern unsigned int *x86_march_perfmap; extern int running_on_kvm(void); +#ifdef POSTK_DEBUG_TEMP_FIX_31 +int ihk_mc_perfctr_fixed_init(int counter, int mode); +#endif/*POSTK_DEBUG_TEMP_FIX_31*/ //#define PERFCTR_DEBUG #ifdef PERFCTR_DEBUG @@ -192,16 +195,52 @@ static int set_fixed_counter(int counter, int mode) return 0; } +#ifdef POSTK_DEBUG_TEMP_FIX_29 +int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode) +#else int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode) +#endif /*POSTK_DEBUG_TEMP_FIX_29*/ { +#ifdef POSTK_DEBUG_TEMP_FIX_31 + // PAPI_REF_CYC counted by fixed counter + if (counter >= X86_IA32_BASE_FIXED_PERF_COUNTERS) { + return ihk_mc_perfctr_fixed_init(counter, mode); + } +#endif /*POSTK_DEBUG_TEMP_FIX_31*/ + if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { return -EINVAL; } +#ifdef POSTK_DEBUG_TEMP_FIX_29 + return set_perfctr_x86_direct(counter, mode, config); +#else return set_perfctr_x86_direct(counter, mode, code); +#endif /*POSTK_DEBUG_TEMP_FIX_29*/ } + +#ifdef POSTK_DEBUG_TEMP_FIX_29 +int ihk_mc_perfctr_init(int counter, uint64_t config, int mode) +#else int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode) +#endif /*POSTK_DEBUG_TEMP_FIX_29*/ { +#ifdef POSTK_DEBUG_TEMP_FIX_29 + enum ihk_perfctr_type type; + + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES : + type = APT_TYPE_CYCLE; + break; + case PERF_COUNT_HW_INSTRUCTIONS : + type = APT_TYPE_INSTRUCTIONS; + break; + default : + // Not supported config. + type = PERFCTR_MAX_TYPE; + } +#endif /*POSTK_DEBUG_TEMP_FIX_29*/ + if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) { return -EINVAL; } @@ -219,11 +258,18 @@ int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode) extern void x86_march_perfctr_start(unsigned long counter_mask); #endif +#ifdef POSTK_DEBUG_TEMP_FIX_30 +int ihk_mc_perfctr_start(int counter) +#else int ihk_mc_perfctr_start(unsigned long counter_mask) +#endif /*POSTK_DEBUG_TEMP_FIX_30*/ { int ret = 0; unsigned long value = 0; unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK; +#ifdef POSTK_DEBUG_TEMP_FIX_30 + unsigned long counter_mask = 1UL << counter; +#endif /*POSTK_DEBUG_TEMP_FIX_30*/ PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL); @@ -240,11 +286,18 @@ int ihk_mc_perfctr_start(unsigned long counter_mask) goto fn_exit; } +#ifdef POSTK_DEBUG_TEMP_FIX_30 +int ihk_mc_perfctr_stop(int counter) +#else int ihk_mc_perfctr_stop(unsigned long counter_mask) +#endif/*POSTK_DEBUG_TEMP_FIX_30*/ { int ret = 0; unsigned long value; unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK; +#ifdef POSTK_DEBUG_TEMP_FIX_30 + unsigned long counter_mask = 1UL << counter; +#endif/*POSTK_DEBUG_TEMP_FIX_30*/ PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL); diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index c68e14ca..4f054b3d 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -66,6 +66,25 @@ uintptr_t debug_constants[] = { -1, }; +#ifdef POSTK_DEBUG_ARCH_DEP_52 +#define VDSO_MAXPAGES 2 +struct vdso { + long busy; + int vdso_npages; + char vvar_is_global; + char hpet_is_global; + char pvti_is_global; + char padding; + long vdso_physlist[VDSO_MAXPAGES]; + void *vvar_virt; + long vvar_phys; + void *hpet_virt; + long hpet_phys; + void *pvti_virt; + long pvti_phys; +}; +#endif /*POSTK_DEBUG_ARCH_DEP_52*/ + static struct vdso vdso; static size_t container_size = 0; static ptrdiff_t vdso_offset; diff --git a/config.h.in b/config.h.in index 71e51236..d7df6e9d 100644 --- a/config.h.in +++ b/config.h.in @@ -81,6 +81,9 @@ /* Define to address of kernel symbol zap_page_range, or 0 if exported */ #undef MCCTRL_KSYM_zap_page_range +/* Define to address of kernel symbol vdso_spec, or 0 if exported. POSTK_DEBUG_ARCH_DEP_50 */ +#undef MCCTRL_KSYM_vdso_spec + /* McKernel specific headers */ #undef MCKERNEL_INCDIR diff --git a/configure b/configure index 01e9f15e..249705ec 100755 --- a/configure +++ b/configure @@ -11,6 +11,7 @@ ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## +# configure COPYRIGHT FUJITSU LIMITED 2015-2017 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh @@ -3613,8 +3614,21 @@ _ACEOF case $WITH_TARGET in - attached-mic|builtin-x86|smp-x86) - ARCH=`uname -m` +# POSTK_DEBUG_ARCH_DEP_30, Expansion of the branch. +# attached-mic|builtin-x86|smp-x86) +# ARCH=`uname -m` +# AC_PROG_CC +# XCC=$CC +# ;; + attached-mic|builtin-x86|smp-*) + case $WITH_TARGET in + attached-mic|builtin-x86|smp-x86) + ARCH=`uname -m` + ;; + smp-arm64) + ARCH=arm64 + ;; + esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4197,6 +4211,48 @@ $as_echo "no" >&6; } fi + CC=$XCC + ;; + smp-arm64) + ARCH=arm64 + # Extract the first word of "${CROSS_COMPILE}gcc", so it can be a program name with args. +set dummy ${CROSS_COMPILE}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_XCC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$XCC"; then + ac_cv_prog_XCC="$XCC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_XCC="${CROSS_COMPILE}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_XCC" && ac_cv_prog_XCC="no" +fi +fi +XCC=$ac_cv_prog_XCC +if test -n "$XCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $XCC" >&5 +$as_echo "$XCC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + CC=$XCC ;; *) @@ -4249,6 +4305,9 @@ case $WITH_TARGET in if test "X$SBINDIR" = X; then SBINDIR="$prefix/sbin" fi + if test "X$ETCDIR" = X; then + ETCDIR="$prefix/etc" + fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -4275,9 +4334,6 @@ case $WITH_TARGET in if test "X$INCDIR" = X; then INCDIR="$prefix/include" fi - if test "X$ETCDIR" = X; then - ETCDIR="$prefix/etc" - fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -4285,6 +4341,32 @@ case $WITH_TARGET in MANDIR="$prefix/smp-x86/man" fi ;; + smp-arm64) + if test "X$KERNDIR" = X; then + KERNDIR="$prefix/smp-arm64/kernel" + fi + if test "X$BINDIR" = X; then + BINDIR="$prefix/bin" + fi + if test "X$SBINDIR" = X; then + SBINDIR="$prefix/sbin" + fi + if test "X$MCKERNEL_INCDIR" = X; then + MCKERNEL_INCDIR="$prefix/include" + fi + if test "X$MCKERNEL_LIBDIR" = X; then + MCKERNEL_LIBDIR="$prefix/lib" + fi + if test "X$INCDIR" = X; then + INCDIR="$prefix/include" + fi + if test "X$KMODDIR" = X; then + KMODDIR="$prefix/kmod" + fi + if test "X$MANDIR" = X; then + MANDIR="$prefix/smp-arm64/man" + fi + ;; *) as_fn_error $? "target $WITH_TARGET is unknwon" "$LINENO" 5 ;; @@ -4585,6 +4667,32 @@ _ACEOF fi +# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol. + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol vdso_spec" >&5 +$as_echo_n "checking System.map for symbol vdso_spec... " >&6; } + mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " vdso_spec\$" | cut -d\ -f1` + if test -z $mcctrl_addr; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +$as_echo "not found" >&6; } + else + mcctrl_result=$mcctrl_addr + mcctrl_addr="0x$mcctrl_addr" + + if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_vdso_spec\$" >/dev/null`; then + mcctrl_result="exported" + mcctrl_addr="0" + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $mcctrl_result" >&5 +$as_echo "$mcctrl_result" >&6; } + +cat >>confdefs.h <<_ACEOF +#define MCCTRL_KSYM_vdso_spec $mcctrl_addr +_ACEOF + + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking System.map for symbol hv_clock" >&5 $as_echo_n "checking System.map for symbol hv_clock... " >&6; } @@ -4665,7 +4773,9 @@ case $ENABLE_MEMDUMP in yes|no|auto) ;; default) - if test "x$WITH_TARGET" = "xsmp-x86" ; then +# POSTK_DEBUG_ARCH_DEP_30, Expansion of the branch. +# if test "x$WITH_TARGET" = "xsmp-x86" ; then + if test "x$WITH_TARGET" = "xsmp-*" ; then ENABLE_MEMDUMP=auto else ENABLE_MEMDUMP=no @@ -4678,6 +4788,54 @@ esac if test "x$ENABLE_MEMDUMP" != "xno" ; then enableval=yes +# POSTK_DEBUG_ARCH_DEP_32, AC_CHECK_LIB for libiberty + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hex_init in -liberty" >&5 +$as_echo_n "checking for hex_init in -liberty... " >&6; } +if ${ac_cv_lib_iberty_hex_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-liberty $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hex_init (); +int +main () +{ +return hex_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_iberty_hex_init=yes +else + ac_cv_lib_iberty_hex_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_iberty_hex_init" >&5 +$as_echo "$ac_cv_lib_iberty_hex_init" >&6; } +if test "x$ac_cv_lib_iberty_hex_init" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBIBERTY 1 +_ACEOF + + LIBS="-liberty $LIBS" + +else + enableval=no +fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for bfd_init in -lbfd" >&5 $as_echo_n "checking for bfd_init in -lbfd... " >&6; } if ${ac_cv_lib_bfd_bfd_init+:} false; then : @@ -4854,7 +5012,24 @@ _ACEOF ac_config_headers="$ac_config_headers config.h" -ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/x86_64/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in" +# POSTK_DEBUG_ARCH_DEP_37 +#AC_CONFIG_FILES([ +# Makefile +# executer/user/Makefile +# executer/kernel/mcctrl/Makefile +# executer/kernel/mcctrl/arch/x86_64/Makefile +# executer/kernel/mcoverlayfs/Makefile +# kernel/Makefile +# kernel/Makefile.build +# arch/x86/tools/mcreboot-attached-mic.sh +# arch/x86/tools/mcshutdown-attached-mic.sh +# arch/x86/tools/mcreboot-builtin-x86.sh +# arch/x86/tools/mcreboot-smp-x86.sh +# arch/x86/tools/mcstop+release-smp-x86.sh +# arch/x86/tools/mcshutdown-builtin-x86.sh +# arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in +#]) +ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/arch/$ARCH/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/eclair-dump-backtrace.exp arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in kernel/config/config.smp-arm64 arch/arm64/kernel/vdso/Makefile" if test "x$enable_dcfa" = xyes; then : @@ -5555,9 +5730,9 @@ do "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "executer/user/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/Makefile" ;; - "executer/user/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/arch/x86_64/Makefile" ;; + "executer/user/arch/$ARCH/Makefile") CONFIG_FILES="$CONFIG_FILES executer/user/arch/$ARCH/Makefile" ;; "executer/kernel/mcctrl/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/Makefile" ;; - "executer/kernel/mcctrl/arch/x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/arch/x86_64/Makefile" ;; + "executer/kernel/mcctrl/arch/$ARCH/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcctrl/arch/$ARCH/Makefile" ;; "executer/kernel/mcoverlayfs/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/Makefile" ;; "executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile" ;; "executer/kernel/mcoverlayfs/linux-4.0.9/Makefile") CONFIG_FILES="$CONFIG_FILES executer/kernel/mcoverlayfs/linux-4.0.9/Makefile" ;; @@ -5576,6 +5751,8 @@ do "arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;; "arch/x86/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.service" ;; "arch/x86/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/irqbalance_mck.in" ;; + "kernel/config/config.smp-arm64") CONFIG_FILES="$CONFIG_FILES kernel/config/config.smp-arm64" ;; + "arch/arm64/kernel/vdso/Makefile") CONFIG_FILES="$CONFIG_FILES arch/arm64/kernel/vdso/Makefile" ;; "kernel/Makefile.dcfa") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.dcfa" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; diff --git a/configure.ac b/configure.ac index eb3b9643..6ee43e85 100644 --- a/configure.ac +++ b/configure.ac @@ -1,3 +1,4 @@ +# configure.ac COPYRIGHT FUJITSU LIMITED 2015-2016 AC_PREREQ(2.63) m4_define([IHK_VERSION_m4],[0.9.0])dnl m4_define([MCKERNEL_VERSION_m4],[0.9.0])dnl @@ -169,8 +170,21 @@ test "x$prefix" = xNONE && prefix="$ac_default_prefix" AC_DEFINE_UNQUOTED(ROOTFSDIR,"$prefix/rootfs",[Path of bind-mount source directory]) case $WITH_TARGET in - attached-mic|builtin-x86|smp-x86) - ARCH=`uname -m` +# POSTK_DEBUG_ARCH_DEP_30, Expansion of the branch. +# attached-mic|builtin-x86|smp-x86) +# ARCH=`uname -m` +# AC_PROG_CC +# XCC=$CC +# ;; + attached-mic|builtin-x86|smp-*) + case $WITH_TARGET in + attached-mic|builtin-x86|smp-x86) + ARCH=`uname -m` + ;; + smp-arm64) + ARCH=arm64 + ;; + esac AC_PROG_CC XCC=$CC CFLAGS="$CFLAGS -ffreestanding -fno-tree-loop-distribute-patterns" @@ -183,6 +197,14 @@ case $WITH_TARGET in [no]) CC=$XCC ;; + smp-arm64) + ARCH=arm64 + AC_CHECK_PROG(XCC, + [${CROSS_COMPILE}gcc], + [${CROSS_COMPILE}gcc], + [no]) + CC=$XCC + ;; *) AC_MSG_ERROR([target $WITH_TARGET is unknwon]) ;; @@ -233,6 +255,9 @@ case $WITH_TARGET in if test "X$SBINDIR" = X; then SBINDIR="$prefix/sbin" fi + if test "X$ETCDIR" = X; then + ETCDIR="$prefix/etc" + fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -259,9 +284,6 @@ case $WITH_TARGET in if test "X$INCDIR" = X; then INCDIR="$prefix/include" fi - if test "X$ETCDIR" = X; then - ETCDIR="$prefix/etc" - fi if test "X$KMODDIR" = X; then KMODDIR="$prefix/kmod" fi @@ -269,6 +291,32 @@ case $WITH_TARGET in MANDIR="$prefix/smp-x86/man" fi ;; + smp-arm64) + if test "X$KERNDIR" = X; then + KERNDIR="$prefix/smp-arm64/kernel" + fi + if test "X$BINDIR" = X; then + BINDIR="$prefix/bin" + fi + if test "X$SBINDIR" = X; then + SBINDIR="$prefix/sbin" + fi + if test "X$MCKERNEL_INCDIR" = X; then + MCKERNEL_INCDIR="$prefix/include" + fi + if test "X$MCKERNEL_LIBDIR" = X; then + MCKERNEL_LIBDIR="$prefix/lib" + fi + if test "X$INCDIR" = X; then + INCDIR="$prefix/include" + fi + if test "X$KMODDIR" = X; then + KMODDIR="$prefix/kmod" + fi + if test "X$MANDIR" = X; then + MANDIR="$prefix/smp-arm64/man" + fi + ;; *) AC_MSG_ERROR([target $WITH_TARGET is unknwon]) ;; @@ -344,6 +392,8 @@ MCCTRL_FIND_KSYM([vdso_end]) MCCTRL_FIND_KSYM([vdso_pages]) MCCTRL_FIND_KSYM([__vvar_page]) MCCTRL_FIND_KSYM([hpet_address]) +# POSTK_DEBUG_ARCH_DEP_50, add:find kernel symbol. +MCCTRL_FIND_KSYM([vdso_spec]) MCCTRL_FIND_KSYM([hv_clock]) MCCTRL_FIND_KSYM([sys_readlink]) MCCTRL_FIND_KSYM([walk_page_range]) @@ -352,7 +402,9 @@ case $ENABLE_MEMDUMP in yes|no|auto) ;; default) - if test "x$WITH_TARGET" = "xsmp-x86" ; then +# POSTK_DEBUG_ARCH_DEP_30, Expansion of the branch. +# if test "x$WITH_TARGET" = "xsmp-x86" ; then + if test "x$WITH_TARGET" = "xsmp-*" ; then ENABLE_MEMDUMP=auto else ENABLE_MEMDUMP=no @@ -365,6 +417,8 @@ esac if test "x$ENABLE_MEMDUMP" != "xno" ; then enableval=yes +# POSTK_DEBUG_ARCH_DEP_32, AC_CHECK_LIB for libiberty + AC_CHECK_LIB([iberty],[hex_init],[],[enableval=no]) AC_CHECK_LIB([bfd],[bfd_init],[],[enableval=no]) AC_CHECK_HEADER([bfd.h],[],[enableval=no]) @@ -455,12 +509,29 @@ AC_SUBST(DCFA_RESEASE_DATE) AC_SUBST(uncomment_if_ENABLE_MEMDUMP) AC_CONFIG_HEADERS([config.h]) +# POSTK_DEBUG_ARCH_DEP_37 +#AC_CONFIG_FILES([ +# Makefile +# executer/user/Makefile +# executer/kernel/mcctrl/Makefile +# executer/kernel/mcctrl/arch/x86_64/Makefile +# executer/kernel/mcoverlayfs/Makefile +# kernel/Makefile +# kernel/Makefile.build +# arch/x86/tools/mcreboot-attached-mic.sh +# arch/x86/tools/mcshutdown-attached-mic.sh +# arch/x86/tools/mcreboot-builtin-x86.sh +# arch/x86/tools/mcreboot-smp-x86.sh +# arch/x86/tools/mcstop+release-smp-x86.sh +# arch/x86/tools/mcshutdown-builtin-x86.sh +# arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in +#]) AC_CONFIG_FILES([ Makefile executer/user/Makefile - executer/user/arch/x86_64/Makefile + executer/user/arch/$ARCH/Makefile executer/kernel/mcctrl/Makefile - executer/kernel/mcctrl/arch/x86_64/Makefile + executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile @@ -479,6 +550,8 @@ AC_CONFIG_FILES([ arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in arch/x86/tools/irqbalance_mck.service arch/x86/tools/irqbalance_mck.in + kernel/config/config.smp-arm64 + arch/arm64/kernel/vdso/Makefile ]) AS_IF([test "x$enable_dcfa" = xyes], [ diff --git a/executer/kernel/mcctrl/Makefile.in b/executer/kernel/mcctrl/Makefile.in index b5851465..f8727a14 100644 --- a/executer/kernel/mcctrl/Makefile.in +++ b/executer/kernel/mcctrl/Makefile.in @@ -1,3 +1,4 @@ +# Makefile.in COPYRIGHT FUJITSU LIMITED 2016 KDIR ?= @KDIR@ ARCH ?= @ARCH@ src = @abs_srcdir@ @@ -7,12 +8,18 @@ IHK_BASE=$(src)/../../../../ihk obj-m += mcctrl.o -ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ +# POSTK_DEBUG_ARCH_DEP_1, arch depend "-mcmodel" +# POSTK_DEBUG_ARCH_DEP_83, arch depend translate_rva_to_rpa() move +#ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ +ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -I$(src)/arch/$(ARCH)/include -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@ + mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o mcctrl-y += sysfs.o sysfs_files.o arch/$(ARCH)/archdeps.o KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../../ihk/linux/core/Module.symvers +EXTRA_CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) +EXTRA_CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) .PHONY: clean install modules diff --git a/executer/kernel/mcctrl/arch/arm64/Makefile.in b/executer/kernel/mcctrl/arch/arm64/Makefile.in new file mode 100644 index 00000000..357aacdb --- /dev/null +++ b/executer/kernel/mcctrl/arch/arm64/Makefile.in @@ -0,0 +1,2 @@ +# Makefile.in COPYRIGHT FUJITSU LIMITED 2016 +# dummy file diff --git a/executer/kernel/mcctrl/arch/arm64/archdeps.c b/executer/kernel/mcctrl/arch/arm64/archdeps.c new file mode 100644 index 00000000..6c297e84 --- /dev/null +++ b/executer/kernel/mcctrl/arch/arm64/archdeps.c @@ -0,0 +1,316 @@ +/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */ +#include +#include +#include +#include "../../../config.h" +#include "../../mcctrl.h" + +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +//#define SC_DEBUG + +#ifdef SC_DEBUG +#define dprintk(...) printk(__VA_ARGS__) +#else +#define dprintk(...) +#endif +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ + +#define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__) + +#ifdef MCCTRL_KSYM_vdso_start +# if MCCTRL_KSYM_vdso_start +void *vdso_start = (void *)MCCTRL_KSYM_vdso_start; +# endif +#else +# error missing address of vdso_start. +#endif + +#ifdef MCCTRL_KSYM_vdso_end +# if MCCTRL_KSYM_vdso_end +void *vdso_end = (void *)MCCTRL_KSYM_vdso_end; +# endif +#else +# error missing address of vdso_end. +#endif + +#ifdef MCCTRL_KSYM_vdso_spec +# if MCCTRL_KSYM_vdso_spec +static struct vm_special_mapping (*vdso_spec)[2] = (void*)MCCTRL_KSYM_vdso_spec; +# endif +#else +# error missing address of vdso_spec. +#endif + +#ifdef POSTK_DEBUG_ARCH_DEP_52 +#define VDSO_MAXPAGES 1 +struct vdso { + long busy; + int vdso_npages; + int padding; + long vdso_physlist[VDSO_MAXPAGES]; + long vvar_phys; + long lbase; + long offset_sigtramp; +}; +#endif /*POSTK_DEBUG_ARCH_DEP_52*/ + +unsigned long +reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end); + +int +reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp) +{ + struct vm_area_struct *vma; + unsigned long start = 0L; + unsigned long end; + + if (mutex_lock_killable(&usrdata->reserve_lock) < 0) { + return -1; + } + +#define DESIRED_USER_END TASK_UNMAPPED_BASE + end = DESIRED_USER_END; + down_write(¤t->mm->mmap_sem); + vma = find_vma(current->mm, 0); + if (vma->vm_start < end) { + printk("mcctrl:user space overlap.\n"); + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) + up_write(¤t->mm->mmap_sem); +#endif + start = reserve_user_space_common(usrdata, start, end); +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) + up_write(¤t->mm->mmap_sem); +#endif + + mutex_unlock(&usrdata->reserve_lock); + + if (IS_ERR_VALUE(start)) { + return start; + } + *startp = start; + *endp = end; + return 0; +} + +void get_vdso_info(ihk_os_t os, long vdso_rpa) +{ + ihk_device_t dev = ihk_os_to_dev(os); + struct vm_special_mapping* vvar_map; + struct vm_special_mapping* vdso_map; + int nr_vdso_page; + long vdso_pa; + struct vdso *vdso; + + vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso)); + vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) + vvar_map = &(*vdso_spec)[0]; + vdso_map = &(*vdso_spec)[1]; + nr_vdso_page = ((vdso_end - vdso_start) + PAGE_SIZE - 1) >> PAGE_SHIFT; + + /* VDSO pages */ + //D("nr_vdso_page:%d\n", nr_vdso_page); + vdso->vdso_npages = 1; //vdso page is supposed to be one + if (vdso->vdso_npages != nr_vdso_page) { + vdso->vdso_npages = 0; + goto out; + } + //D("vdso->vdso_physlist[0]:0x#lx\n", vdso->vdso_physlist[0]); + vdso->vdso_physlist[0] = page_to_phys(*vdso_map->pages); + + /* VVAR page */ + //D("vdso->vvar_phys:0x#lx\n", vdso->vvar_phys); + vdso->vvar_phys = page_to_phys(*vvar_map->pages); + + /* offsets */ + vdso->lbase = VDSO_LBASE; + vdso->offset_sigtramp = vdso_offset_sigtramp; +#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0)*/ +out: + wmb(); + vdso->busy = 0; + + ihk_device_unmap_virtual(dev, vdso, sizeof(*vdso)); + ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso)); + return; +} /* get_vdso_info() */ + +void * +get_user_sp(void) +{ + /* TODO; skeleton for UTI */ + return NULL; +} + +void +set_user_sp(void *usp) +{ + /* TODO; skeleton for UTI */ +} + +/* TODO; skeleton for UTI */ +struct trans_uctx { + volatile int cond; + int fregsize; + + unsigned long rax; + unsigned long rbx; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long rbp; + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long rflags; + unsigned long rip; + unsigned long rsp; + unsigned long fs; +}; + +void +restore_fs(unsigned long fs) +{ + /* TODO; skeleton for UTI */ +} + +void +save_fs_ctx(void *ctx) +{ + /* TODO; skeleton for UTI */ +} + +unsigned long +get_fs_ctx(void *ctx) +{ + /* TODO; skeleton for UTI */ + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0) +# define IHK_MC_PGTABLE_LEVELS CONFIG_ARM64_PGTABLE_LEVELS +#else +# define IHK_MC_PGTABLE_LEVELS CONFIG_PGTABLE_LEVELS +#endif + +typedef unsigned long translation_table_t; +struct page_table { + translation_table_t* tt; + translation_table_t* tt_pa; + int asid; +}; + +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, + unsigned long *rpap, unsigned long *pgsizep) +{ + unsigned long rpa; + int i; + int ix; + unsigned long phys; + unsigned long *pt; + int error; + unsigned long pgsize; + struct page_table* tbl; + + struct property { + int idx_bits; + int block; /*block support flag*/ + int pgshift; + } properties[3][4] = { + { /* 4KB */ + {.idx_bits = 47 - 39 + 1, .block = 0, .pgshift = 39}, /*zero*/ + {.idx_bits = 38 - 30 + 1, .block = 1, .pgshift = 30}, /*first*/ + {.idx_bits = 29 - 21 + 1, .block = 1, .pgshift = 21}, /*second*/ + {.idx_bits = 20 - 12 + 1, .block = 0, .pgshift = 12}, /*third*/ + }, + { /* 16KB */ + {.idx_bits = 47 - 47 + 1, .block = 0, .pgshift = 47}, /*zero*/ + {.idx_bits = 46 - 36 + 1, .block = 0, .pgshift = 36}, /*first*/ + {.idx_bits = 35 - 25 + 1, .block = 1, .pgshift = 25}, /*second*/ + {.idx_bits = 24 - 14 + 1, .block = 0, .pgshift = 14}, /*third*/ + }, + { /* 64KB */ + {0}, /*zero*/ + {.idx_bits = 47 - 42 + 1, .block = 0, .pgshift = 42}, /*first*/ + {.idx_bits = 41 - 29 + 1, .block = 1, .pgshift = 29}, /*second*/ + {.idx_bits = 28 - 16 + 1, .block = 0, .pgshift = 16}, /*third*/ + }, + }; + const struct property* prop = + (PAGE_SIZE == (1UL << 12)) ? &(properties[0][0]) : + (PAGE_SIZE == (1UL << 14)) ? &(properties[1][0]) : + (PAGE_SIZE == (1UL << 16)) ? &(properties[2][0]) : NULL; + + // page table to translation_table. + phys = ihk_device_map_memory(ihk_os_to_dev(os), rpt, PAGE_SIZE); + tbl = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0); + rpa = (unsigned long)tbl->tt_pa; + + /* i = 0:zero, 1:first, 2:second, 3:third */ + for (i = 4 - IHK_MC_PGTABLE_LEVELS; i < 4; ++i) { + ix = (rva >> prop[i].pgshift) & ((1 << prop[i].idx_bits) - 1); + phys = ihk_device_map_memory(ihk_os_to_dev(os), rpa, PAGE_SIZE); + pt = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0); + dprintk("rpa %#lx offsh %d ix %#x phys %#lx pt %p pt[ix] %#lx\n", + rpa, prop[i].pgshift, ix, phys, pt, pt[ix]); + +#define PG_DESC_VALID 0x1 + if (!(pt[ix] & PG_DESC_VALID)) { + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + error = -EFAULT; + dprintk("Remote PTE is not present for 0x%lx (rpt: %lx) ?\n", rva, rpt); + goto out; + } + +#define PG_DESC_TYEP_MASK 0x3 +#define PG_DESC_BLOCK 0x1 + if (prop[i].block && (pt[ix]&PG_DESC_TYEP_MASK) == PG_DESC_BLOCK) { + /* D_Block */ + pgsize = 1UL << prop[i].pgshift; + rpa = (pt[ix] & ((1UL << 47) - 1)) & ~(pgsize - 1); + rpa |= rva & (pgsize - 1); + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + error = 0; + goto found; + } + /* D_Table */ + rpa = (pt[ix] & ((1UL << 47) - 1)) & ~(PAGE_SIZE - 1); + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + } + /* D_Page */ + pgsize = PAGE_SIZE; + rpa |= rva & (pgsize - 1); + +found: + error = 0; + *rpap = rpa; + *pgsizep = pgsize; + +out: + dprintk("translate_rva_to_rpa: %d rva %#lx --> rpa %#lx (%lx)\n", + error, rva, rpa, pgsize); + return error; +} +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ + +#ifdef POSTK_DEBUG_ARCH_DEP_12 +#define PFN_WRITE_COMBINED PTE_ATTRINDX(MT_NORMAL_NC) +static inline bool pte_is_write_combined(pte_t pte) +{ + return ((pte_val(pte) & PTE_ATTRINDX_MASK) == PFN_WRITE_COMBINED); +} +#endif /* POSTK_DEBUG_ARCH_DEP_12 */ + diff --git a/executer/kernel/mcctrl/arch/arm64/include/archdeps.h b/executer/kernel/mcctrl/arch/arm64/include/archdeps.h new file mode 100644 index 00000000..06da517e --- /dev/null +++ b/executer/kernel/mcctrl/arch/arm64/include/archdeps.h @@ -0,0 +1,18 @@ +/* archdeps.h COPYRIGHT FUJITSU LIMITED 2017 */ +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +#ifndef __HEADER_MCCTRL_ARM64_ARCHDEPS_H +#define __HEADER_MCCTRL_ARM64_ARCHDEPS_H + +extern int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, + unsigned long *rpap, unsigned long *pgsizep); + +#ifdef POSTK_DEBUG_ARCH_DEP_12 +#define PFN_WRITE_COMBINED PTE_ATTRINDX(MT_NORMAL_NC) + +static inline bool pte_is_write_combined(pte_t pte) +{ + return ((pte_val(pte) & PTE_ATTRINDX_MASK) == PFN_WRITE_COMBINED); +} +#endif /* POSTK_DEBUG_ARCH_DEP_12 */ +#endif /* __HEADER_MCCTRL_ARM64_ARCHDEPS_H */ +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ diff --git a/executer/kernel/mcctrl/arch/x86_64/archdeps.c b/executer/kernel/mcctrl/arch/x86_64/archdeps.c index caec9c15..0f25d338 100644 --- a/executer/kernel/mcctrl/arch/x86_64/archdeps.c +++ b/executer/kernel/mcctrl/arch/x86_64/archdeps.c @@ -1,7 +1,18 @@ +/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */ #include #include "../../../config.h" #include "../../mcctrl.h" +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +//#define SC_DEBUG + +#ifdef SC_DEBUG +#define dprintk(...) printk(__VA_ARGS__) +#else +#define dprintk(...) +#endif +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ + #ifdef MCCTRL_KSYM_vdso_image_64 #if MCCTRL_KSYM_vdso_image_64 struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64; @@ -54,6 +65,25 @@ void **hv_clockp = NULL; #endif +#ifdef POSTK_DEBUG_ARCH_DEP_52 +#define VDSO_MAXPAGES 2 +struct vdso { + long busy; + int vdso_npages; + char vvar_is_global; + char hpet_is_global; + char pvti_is_global; + char padding; + long vdso_physlist[VDSO_MAXPAGES]; + void *vvar_virt; + long vvar_phys; + void *hpet_virt; + long hpet_phys; + void *pvti_virt; + long pvti_phys; +}; +#endif /*POSTK_DEBUG_ARCH_DEP_52*/ + unsigned long reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end); @@ -258,3 +288,76 @@ get_fs_ctx(void *ctx) return tctx->fs; } + +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, + unsigned long *rpap, unsigned long *pgsizep) +{ + unsigned long rpa; + int offsh; + int i; + int ix; + unsigned long phys; + unsigned long *pt; + int error; + unsigned long pgsize; + + rpa = rpt; + offsh = 39; + pgsize = 0; + /* i = 0: PML4, 1: PDPT, 2: PDT, 3: PT */ + for (i = 0; i < 4; ++i) { + ix = (rva >> offsh) & 0x1FF; + phys = ihk_device_map_memory(ihk_os_to_dev(os), rpa, PAGE_SIZE); + pt = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0); + dprintk("rpa %#lx offsh %d ix %#x phys %#lx pt %p pt[ix] %#lx\n", + rpa, offsh, ix, phys, pt, pt[ix]); + +#define PTE_P 0x001 + if (!(pt[ix] & PTE_P)) { + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + error = -EFAULT; + dprintk("Remote PTE is not present for 0x%lx (rpt: %lx) ?\n", rva, rpt); + goto out; + } + +#define PTE_PS 0x080 + if (pt[ix] & PTE_PS) { + pgsize = 1UL << offsh; + rpa = pt[ix] & ((1UL << 52) - 1) & ~(pgsize - 1); + rpa |= rva & (pgsize - 1); + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + error = 0; + goto found; + } + + rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << 12) - 1); + offsh -= 9; + ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE); + ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE); + } + pgsize = 1UL << 12; + rpa |= rva & (pgsize - 1); + +found: + error = 0; + *rpap = rpa; + *pgsizep = pgsize; + +out: + dprintk("translate_rva_to_rpa: %d rva %#lx --> rpa %#lx (%lx)\n", + error, rva, rpa, pgsize); + return error; +} +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ + +#ifdef POSTK_DEBUG_ARCH_DEP_12 +#define PFN_WRITE_COMBINED _PAGE_PWT +static inline bool pte_is_write_combined(pte_t pte) +{ + return ((pte_flags(pte) & _PAGE_PWT) && !(pte_flags(pte) & _PAGE_PCD)); +} +#endif /* POSTK_DEBUG_ARCH_DEP_12 */ + diff --git a/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h b/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h new file mode 100644 index 00000000..24645e6f --- /dev/null +++ b/executer/kernel/mcctrl/arch/x86_64/include/archdeps.h @@ -0,0 +1,18 @@ +/* archdeps.h COPYRIGHT FUJITSU LIMITED 2017 */ +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +#ifndef __HEADER_MCCTRL_X86_64_ARCHDEPS_H +#define __HEADER_MCCTRL_X86_64_ARCHDEPS_H + +extern int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, + unsigned long *rpap, unsigned long *pgsizep); + +#ifdef POSTK_DEBUG_ARCH_DEP_12 +#define PFN_WRITE_COMBINED _PAGE_PWT + +static inline bool pte_is_write_combined(pte_t pte) +{ + return ((pte_flags(pte) & _PAGE_PWT) && !(pte_flags(pte) & _PAGE_PCD)); +} +#endif /* POSTK_DEBUG_ARCH_DEP_12 */ +#endif /* __HEADER_MCCTRL_X86_64_ARCHDEPS_H */ +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ diff --git a/executer/kernel/mcctrl/binfmt_mcexec.c b/executer/kernel/mcctrl/binfmt_mcexec.c index 0685e70b..6ad12857 100644 --- a/executer/kernel/mcctrl/binfmt_mcexec.c +++ b/executer/kernel/mcctrl/binfmt_mcexec.c @@ -122,6 +122,23 @@ static int load_elf(struct linux_binprm *bprm for(i = 0, st = 0; mode != 2;){ if(st == 0){ off = p & ~PAGE_MASK; +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + rc = get_user_pages_remote(current, bprm->mm, + bprm->p, 1, FOLL_FORCE, &page, NULL, NULL); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) + rc = get_user_pages_remote(current, bprm->mm, + bprm->p, 1, FOLL_FORCE, &page, NULL); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0) + rc = get_user_pages_remote(current, bprm->mm, + bprm->p, 1, 0, 1, + &page, NULL); +#else + rc = get_user_pages(current, bprm->mm, + bprm->p, 1, 0, 1, + &page, NULL); +#endif +#else /* POSTK_DEBUG_ARCH_DEP_41 */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0) rc = get_user_pages_remote(current, bprm->mm, bprm->p, 1, 0, 1, @@ -131,6 +148,7 @@ static int load_elf(struct linux_binprm *bprm bprm->p, 1, 0, 1, &page, NULL); #endif +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ if(rc <= 0) { kfree(pbuf); return -EFAULT; diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 341826c5..62e39ef5 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -1,3 +1,4 @@ +/* control.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ /** * \file executer/kernel/control.c * License details are found in the file LICENSE. @@ -337,6 +338,11 @@ struct mcos_handler_info *new_mcos_handler_info(ihk_os_t os, struct file *file) struct mcos_handler_info *info; info = kmalloc(sizeof(struct mcos_handler_info), GFP_KERNEL); +#ifdef POSTK_DEBUG_TEMP_FIX_64 /* host process is SIGKILLed fix. */ + if (info == NULL) { + return NULL; + } +#endif /* POSTK_DEBUG_TEMP_FIX_64 */ memset(info, '\0', sizeof(struct mcos_handler_info)); info->ud = ihk_host_os_get_usrdata(os); info->file = file; @@ -403,6 +409,11 @@ static long mcexec_newprocess(ihk_os_t os, return -EFAULT; } info = new_mcos_handler_info(os, file); +#ifdef POSTK_DEBUG_TEMP_FIX_64 /* host process is SIGKILLed fix. */ + if (info == NULL) { + return -ENOMEM; + } +#endif /* POSTK_DEBUG_TEMP_FIX_64 */ info->pid = desc.pid; ihk_os_register_release_handler(file, release_handler, info); ihk_os_set_mcos_private_data(file, info); @@ -433,6 +444,12 @@ static long mcexec_start_image(ihk_os_t os, } info = new_mcos_handler_info(os, file); +#ifdef POSTK_DEBUG_TEMP_FIX_64 /* host process is SIGKILLed fix. */ + if (info == NULL) { + kfree(desc); + return -ENOMEM; + } +#endif /* POSTK_DEBUG_TEMP_FIX_64 */ info->pid = desc->pid; info->cpu = desc->cpu; ihk_os_register_release_handler(file, release_handler, info); @@ -540,7 +557,11 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg) struct mcctrl_usrdata *udp = ihk_host_os_get_usrdata(os); struct mcctrl_part_exec *pe; struct get_cpu_set_arg req; +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ + struct mcctrl_cpu_topology *cpu_top, *cpu_top_i; +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology *cpu_top, *cpu_top_i; +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ struct cache_topology *cache_top; int cpu, cpus_assigned, cpus_to_assign, cpu_prev; int ret = 0; @@ -610,6 +631,13 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg) pli_next = NULL; /* Add ourself to the list in order of start time */ list_for_each_entry(pli_iter, &pe->pli_list, list) { +#ifdef POSTK_DEBUG_ARCH_DEP_74 /* Fix HOST-Linux version dependent code (task_struct.start_time) */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) + if (pli_iter->task->start_time > current->start_time) { + pli_next = pli_iter; + break; + } +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) */ if ((pli_iter->task->start_time.tv_sec > current->start_time.tv_sec) || ((pli_iter->task->start_time.tv_sec == @@ -619,6 +647,18 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg) pli_next = pli_iter; break; } +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_74 */ + if ((pli_iter->task->start_time.tv_sec > + current->start_time.tv_sec) || + ((pli_iter->task->start_time.tv_sec == + current->start_time.tv_sec) && + ((pli_iter->task->start_time.tv_nsec > + current->start_time.tv_nsec)))) { + pli_next = pli_iter; + break; + } +#endif /* POSTK_DEBUG_ARCH_DEP_74 */ } /* Add in front of next */ @@ -1297,7 +1337,18 @@ retry_alloc: ret = -EINVAL;; goto put_ppd_out; } + +#ifdef POSTK_DEBUG_ARCH_DEP_46 /* user area direct access fix. */ + if (copy_to_user(&req->cpu, &packet->ref, sizeof(req->cpu))) { + if (mcctrl_delete_per_thread_data(ppd, current) < 0) { + kprintf("%s: error deleting per-thread data\n", __FUNCTION__); + } + ret = -EINVAL; + goto put_ppd_out; + } +#else /* POSTK_DEBUG_ARCH_DEP_46 */ req->cpu = packet->ref; +#endif /* POSTK_DEBUG_ARCH_DEP_46 */ ret = 0; goto put_ppd_out; @@ -1497,6 +1548,42 @@ mcexec_getcred(unsigned long phys) { int *virt = phys_to_virt(phys); +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + int ret = -EINVAL; + + if (virt[0] == 0 || virt[0] == task_pid_vnr(current)) { + virt[0] = GUIDVAL(current_uid()); + virt[1] = GUIDVAL(current_euid()); + virt[2] = GUIDVAL(current_suid()); + virt[3] = GUIDVAL(current_fsuid()); + virt[4] = GUIDVAL(current_gid()); + virt[5] = GUIDVAL(current_egid()); + virt[6] = GUIDVAL(current_sgid()); + virt[7] = GUIDVAL(current_fsgid()); + + ret = 0; + } else { + const struct task_struct *task_p = + pid_task(find_get_pid(virt[0]), PIDTYPE_PID); + if (task_p) { + const struct cred *t_cred = __task_cred(task_p); + + rcu_read_lock(); + virt[0] = GUIDVAL(t_cred->uid); + virt[1] = GUIDVAL(t_cred->euid); + virt[2] = GUIDVAL(t_cred->suid); + virt[3] = GUIDVAL(t_cred->fsuid); + virt[4] = GUIDVAL(t_cred->gid); + virt[5] = GUIDVAL(t_cred->egid); + virt[6] = GUIDVAL(t_cred->sgid); + virt[7] = GUIDVAL(t_cred->fsgid); + rcu_read_unlock(); + + ret = 0; + } + } + return ret; +#else /* POSTK_DEBUG_TEMP_FIX_45 */ virt[0] = GUIDVAL(current_uid()); virt[1] = GUIDVAL(current_euid()); virt[2] = GUIDVAL(current_suid()); @@ -1506,6 +1593,7 @@ mcexec_getcred(unsigned long phys) virt[6] = GUIDVAL(current_sgid()); virt[7] = GUIDVAL(current_fsgid()); return 0; +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } int @@ -2416,8 +2504,13 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg) cpumask_t *cpuset; struct mcctrl_usrdata *ud = ihk_host_os_get_usrdata(os); ihk_device_t dev = ihk_os_to_dev(os); +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ + struct mcctrl_cpu_topology *cpu_topo; + struct mcctrl_cpu_topology *target_cpu = NULL; +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology *cpu_topo; struct cpu_topology *target_cpu = NULL; +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ struct node_topology *node_topo; struct ihk_cache_topology *lcache_topo; struct ihk_node_topology *lnode_topo; @@ -2486,8 +2579,18 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg) continue; if(IS_ERR(lnode_topo)) continue; +#ifdef POSTK_DEBUG_ARCH_DEP_54 /* cpu_isset() linux version depend fix. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) + if (cpumask_test_cpu(target_cpu->saved->cpu_number, + &lnode_topo->cpumap)) { +#else if (cpu_isset(target_cpu->saved->cpu_number, lnode_topo->cpumap)) { +#endif +#else /* POSTK_DEBUG_ARCH_DEP_54 */ + if (cpu_isset(target_cpu->saved->cpu_number, + lnode_topo->cpumap)) { +#endif /* POSTK_DEBUG_ARCH_DEP_54 */ if (kattr->attr.flags & UTI_FLAG_SAME_NUMA_DOMAIN) { cpumask_or(wkmask, wkmask, diff --git a/executer/kernel/mcctrl/driver.c b/executer/kernel/mcctrl/driver.c index 15b2c4c9..af411169 100644 --- a/executer/kernel/mcctrl/driver.c +++ b/executer/kernel/mcctrl/driver.c @@ -1,3 +1,4 @@ +/* driver.c COPYRIGHT FUJITSU LIMITED 2016 */ /** * \file executer/kernel/driver.c * License details are found in the file LICENSE. @@ -174,6 +175,9 @@ int mcctrl_os_shutdown_notifier(int os_index) destroy_ikc_channels(os[os_index]); procfs_exit(os_index); } +#ifdef POSTK_DEBUG_TEMP_FIX_35 /* in shutdown phase, rus_page_hash_put_pages() call added. */ + rus_page_hash_put_pages(); +#endif /* POSTK_DEBUG_TEMP_FIX_35 */ os[os_index] = NULL; diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index 5166f447..b8ab3144 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -1,3 +1,4 @@ +// mcctrl.h COPYRIGHT FUJITSU LIMITED 2016-2017 /** * \file mcctrl.h * License details are found in the file LICENSE. @@ -106,10 +107,17 @@ #define __NR_coredump 999 +#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ +struct coretable { + loff_t len; + unsigned long addr; +}; +#else /* POSTK_DEBUG_TEMP_FIX_61 */ struct coretable { int len; unsigned long addr; }; +#endif /* POSTK_DEBUG_TEMP_FIX_61 */ enum mcctrl_os_cpu_operation { MCCTRL_OS_CPU_READ_REGISTER, @@ -263,7 +271,11 @@ struct cache_topology { struct list_head chain; }; +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ +struct mcctrl_cpu_topology { +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology { +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ //struct mcctrl_usrdata *udp; struct ihk_cpu_topology *saved; int mckernel_cpu_id; @@ -367,8 +379,31 @@ int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd, struct task_struct *task, void *data); int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd, struct task_struct *task); +#ifdef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */ +static inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( + struct mcctrl_per_proc_data *ppd, struct task_struct *task) +{ + struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; + int hash = (((uint64_t)task >> 4) & MCCTRL_PER_THREAD_DATA_HASH_MASK); + unsigned long flags; + + /* Check if data for this thread exists and return it */ + read_lock_irqsave(&ppd->per_thread_data_hash_lock[hash], flags); + + list_for_each_entry(ptd_iter, &ppd->per_thread_data_hash[hash], hash) { + if (ptd_iter->task == task) { + ptd = ptd_iter; + break; + } + } + + read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); + return ptd ? ptd->data : NULL; +} +#else /* POSTK_DEBUG_ARCH_DEP_56 */ inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data( struct mcctrl_per_proc_data *ppd, struct task_struct *task); +#endif /* POSTK_DEBUG_ARCH_DEP_56 */ void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, long ret, int stid); @@ -409,6 +444,7 @@ void reply_get_cpu_mapping(long req_pa); void free_topology_info(ihk_os_t os); /* archdep.c */ +#ifndef POSTK_DEBUG_ARCH_DEP_52 #define VDSO_MAXPAGES 2 struct vdso { long busy; @@ -425,6 +461,7 @@ struct vdso { void *pvti_virt; long pvti_phys; }; +#endif /*POSTK_DEBUG_ARCH_DEP_52*/ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp); diff --git a/executer/kernel/mcctrl/procfs.c b/executer/kernel/mcctrl/procfs.c index 1b9e0e66..c6d28251 100644 --- a/executer/kernel/mcctrl/procfs.c +++ b/executer/kernel/mcctrl/procfs.c @@ -9,6 +9,7 @@ /* * HISTORY: */ +/* procfs.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ #include #include @@ -508,6 +509,215 @@ procfs_exit(int osnum) * This function conforms to the 2) way of fs/proc/generic.c * from linux-2.6.39.4. */ +#ifdef POSTK_DEBUG_TEMP_FIX_43 /* Fixed an issue that failed pread / pwrite of size larger than 4MB */ +static ssize_t __mckernel_procfs_read_write( + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos, int read_write) +{ + struct inode * inode = file->f_inode; + char *kern_buffer = NULL; + int order = 0; + volatile struct procfs_read *r = NULL; + struct ikc_scd_packet isp; + int ret, osnum, pid, retw; + unsigned long pbuf; + size_t count = nbytes; + size_t copy_size = 0; + size_t copied = 0; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) + struct proc_dir_entry *dp = PDE(inode); + struct procfs_list_entry *e = dp->data; +#else + struct procfs_list_entry *e = PDE_DATA(inode); +#endif + loff_t offset = *ppos; + char pathbuf[PROCFS_NAME_MAX]; + char *path, *p; + ihk_os_t os = NULL; + struct mcctrl_usrdata *udp = NULL; + struct mcctrl_per_proc_data *ppd = NULL; + + if (count <= 0 || offset < 0) { + return 0; + } + + path = getpath(e, pathbuf, PROCFS_NAME_MAX); + dprintk("%s: invoked for %s, offset: %lu, count: %lu\n", + __FUNCTION__, path, + (unsigned long)offset, count); + + /* Verify OS number */ + ret = sscanf(path, "mcos%d/", &osnum); + if (ret != 1) { + printk("%s: error: couldn't determine OS number\n", __FUNCTION__); + return -EINVAL; + } + + if (osnum != e->osnum) { + printk("%s: error: OS numbers don't match\n", __FUNCTION__); + return -EINVAL; + } + + /* Is this request for a specific process? */ + p = strchr(path, '/') + 1; + ret = sscanf(p, "%d/", &pid); + if (ret != 1) { + pid = -1; + } + + os = osnum_to_os(osnum); + if (!os) { + printk("%s: error: no IHK OS data found for OS %d\n", + __FUNCTION__, osnum); + return -EINVAL; + } + + udp = ihk_host_os_get_usrdata(os); + if (!udp) { + printk("%s: error: no MCCTRL data found for OS %d\n", + __FUNCTION__, osnum); + return -EINVAL; + } + + if (pid > 0) { + ppd = mcctrl_get_per_proc_data(udp, pid); + + if (unlikely(!ppd)) { + printk("%s: error: no per-process structure for PID %d", + __FUNCTION__, pid); + return -EINVAL; + } + } + + /* NOTE: we need physically contigous memory to pass through IKC */ + for (order = get_order(count); order >= 0; order--) { + kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order); + if (kern_buffer) { + break; + } + } + + if (!kern_buffer) { + printk("%s: ERROR: allocating kernel buffer\n", __FUNCTION__); + ret = -ENOMEM; + goto out; + } + copy_size = PAGE_SIZE * (1 << order); + + pbuf = virt_to_phys(kern_buffer); + + r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL); + if (r == NULL) { + ret = -ENOMEM; + goto out; + } + + while (count > 0) { + int this_len = min_t(ssize_t, count, copy_size); + + r->pbuf = pbuf; + r->eof = 0; + r->ret = -EIO; /* default */ + r->status = 0; + r->offset = offset; + r->count = this_len; + r->readwrite = read_write; + strncpy((char *)r->fname, path, PROCFS_NAME_MAX); + isp.msg = SCD_MSG_PROCFS_REQUEST; + isp.ref = 0; + isp.arg = virt_to_phys(r); + isp.pid = pid; + + ret = mcctrl_ikc_send(osnum_to_os(e->osnum), + (pid > 0) ? ppd->ikc_target_cpu : 0, &isp); + + if (ret < 0) { + goto out; /* error */ + } + + /* Wait for a reply. */ + ret = -EIO; /* default exit code */ + dprintk("%s: waiting for reply\n", __FUNCTION__); + +retry_wait: + /* Wait for the status field of the procfs_read structure, + * wait on per-process or OS specific data depending on + * who the request is for. + */ + if (pid > 0) { + retw = wait_event_interruptible_timeout(ppd->wq_procfs, + r->status != 0, HZ); + } + else { + retw = wait_event_interruptible_timeout(udp->wq_procfs, + r->status != 0, HZ); + } + + /* Timeout? */ + if (retw == 0 && r->status == 0) { + printk("%s: error: timeout (1 sec)\n", __FUNCTION__); + goto out; + } + /* Interrupted? */ + else if (retw == -ERESTARTSYS) { + ret = -ERESTART; + goto out; + } + /* Were we woken up by a reply to another procfs request? */ + else if (r->status == 0) { + /* TODO: r->status is not set atomically, we could be woken + * up with status == 0 and it could change to 1 while in this + * code, we could potentially miss the wake_up()... + */ + printk("%s: stale wake-up, retrying\n", __FUNCTION__); + goto retry_wait; + } + + /* Wake up and check the result. */ + dprintk("%s: woke up. ret: %d, eof: %d\n", + __FUNCTION__, r->ret, r->eof); + + if (r->ret > 0) { + if (read_write == 0) { + if (copy_to_user(buf, kern_buffer, r->ret)) { + printk("%s: ERROR: copy_to_user failed.\n", __FUNCTION__); + ret = -EFAULT; + goto out; + } + } + + buf += r->ret; + offset += r->ret; + copied += r->ret; + count -= r->ret; + } + else { + if (!copied) { + /* Transmit error from McKernel */ + copied = r->ret; + } + break; + } + + if (r->eof != 0) { + break; + } + } + *ppos = offset; + ret = copied; + +out: + if (ppd) + mcctrl_put_per_proc_data(ppd); + if (kern_buffer) + free_pages((uintptr_t)kern_buffer, order); + if (r) + kfree((void *)r); + + return ret; +} +#else /* POSTK_DEBUG_TEMP_FIX_43 */ static ssize_t __mckernel_procfs_read_write( struct file *file, char __user *buf, size_t nbytes, @@ -693,6 +903,7 @@ out: return ret; } +#endif /* POSTK_DEBUG_TEMP_FIX_43 */ static ssize_t mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) @@ -832,7 +1043,11 @@ static const struct procfs_entry pid_entry_stuff[] = { static const struct procfs_entry base_entry_stuff[] = { // PROC_REG("cmdline", S_IRUGO, NULL), +#ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ + PROC_REG("cpuinfo", S_IRUGO, NULL), +#else /* POSTK_DEBUG_ARCH_DEP_42 */ // PROC_REG("cpuinfo", S_IRUGO, NULL), +#endif /* POSTK_DEBUG_ARCH_DEP_42 */ // PROC_REG("meminfo", S_IRUGO, NULL), // PROC_REG("pagetypeinfo",S_IRUGO, NULL), // PROC_REG("softirq", S_IRUGO, NULL), diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 7970822a..a3fe73d1 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -1,3 +1,4 @@ +/* syscall.c COPYRIGHT FUJITSU LIMITED 2016-2017 */ /** * \file executer/kernel/syscall.c * License details are found in the file LICENSE. @@ -48,6 +49,9 @@ #include "../../../config.h" #include "mcctrl.h" #include +#ifdef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ +#include +#endif /* POSTK_DEBUG_ARCH_DEP_83 */ #define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6) @@ -157,6 +161,7 @@ out: return ret; } +#ifndef POSTK_DEBUG_ARCH_DEP_56 /* Strange how to use inline declaration fix. */ struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc_data *ppd, struct task_struct *task) { struct mcctrl_per_thread_data *ptd_iter, *ptd = NULL; @@ -176,7 +181,9 @@ struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc read_unlock_irqrestore(&ppd->per_thread_data_hash_lock[hash], flags); return ptd ? ptd->data : NULL; } +#endif /* !POSTK_DEBUG_ARCH_DEP_56 */ +#ifndef POSTK_DEBUG_ARCH_DEP_83 /* arch depend translate_rva_to_rpa() move */ #if 1 /* x86 depend, host OS side */ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva, unsigned long *rpap, unsigned long *pgsizep) @@ -240,6 +247,7 @@ out: return error; } #endif +#endif /* !POSTK_DEBUG_ARCH_DEP_83 */ static int __notify_syscall_requester(ihk_os_t os, struct ikc_scd_packet *packet, struct syscall_response *res) @@ -764,8 +772,18 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct ikc_scd_packet *packet; int ret = 0; +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %#lx page %p\n", + vmf->flags, vmf->pgoff, vmf->address, vmf->page); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + dprintk("mcctrl:page fault:flags %#x pgoff %#lx va %p page %p\n", + vmf->flags, vmf->pgoff, vmf->virtual_address, vmf->page); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ /* Look up per-process structure */ ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current)); @@ -788,16 +806,41 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } for (try = 1; ; ++try) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + error = translate_rva_to_rpa(usrdata->os, ppd->rpgtable, + vmf->address, &rpa, &pgsize); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ error = translate_rva_to_rpa(usrdata->os, ppd->rpgtable, (unsigned long)vmf->virtual_address, &rpa, &pgsize); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + error = translate_rva_to_rpa(usrdata->os, ppd->rpgtable, + (unsigned long)vmf->virtual_address, + &rpa, &pgsize); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ #define NTRIES 2 if (!error || (try >= NTRIES)) { if (error) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + printk("%s: error translating 0x%#lx " + "(req: TID: %u, syscall: %lu)\n", + __FUNCTION__, vmf->address, + packet->req.rtid, packet->req.number); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ printk("%s: error translating 0x%p " "(req: TID: %u, syscall: %lu)\n", __FUNCTION__, vmf->virtual_address, packet->req.rtid, packet->req.number); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + printk("%s: error translating 0x%p " + "(req: TID: %u, syscall: %lu)\n", + __FUNCTION__, vmf->virtual_address, + packet->req.rtid, packet->req.number); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } break; @@ -808,12 +851,34 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #define PF_WRITE 0x02 reason |= PF_WRITE; } +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + error = remote_page_fault(usrdata, (void *)vmf->address, reason); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ error = remote_page_fault(usrdata, vmf->virtual_address, reason); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + error = remote_page_fault(usrdata, vmf->virtual_address, reason); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ if (error) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + printk("%s: error forwarding PF for 0x%#lx " + "(req: TID: %d, syscall: %lu)\n", + __FUNCTION__, vmf->address, + packet->req.rtid, packet->req.number); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ printk("%s: error forwarding PF for 0x%p " "(req: TID: %d, syscall: %lu)\n", __FUNCTION__, vmf->virtual_address, packet->req.rtid, packet->req.number); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + printk("%s: error forwarding PF for 0x%p " + "(req: TID: %d, syscall: %lu)\n", + __FUNCTION__, vmf->virtual_address, + packet->req.rtid, packet->req.number); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ break; } } @@ -822,7 +887,15 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto put_and_out; } +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + rva = vmf->address & ~(pgsize - 1); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ rva = (unsigned long)vmf->virtual_address & ~(pgsize - 1); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + rva = (unsigned long)vmf->virtual_address & ~(pgsize - 1); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ rpa = rpa & ~(pgsize - 1); phys = ihk_device_map_memory(dev, rpa, pgsize); @@ -841,26 +914,66 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = pfn_to_page(pfn+pix); if ((error = rus_page_hash_insert(page)) < 0) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + printk("%s: error adding page to RUS hash for 0x%#lx " + "(req: TID: %d, syscall: %lu)\n", + __FUNCTION__, vmf->address, + packet->req.rtid, packet->req.number); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ printk("%s: error adding page to RUS hash for 0x%p " "(req: TID: %d, syscall: %lu)\n", __FUNCTION__, vmf->virtual_address, packet->req.rtid, packet->req.number); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + printk("%s: error adding page to RUS hash for 0x%p " + "(req: TID: %d, syscall: %lu)\n", + __FUNCTION__, vmf->virtual_address, + packet->req.rtid, packet->req.number); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } error = vm_insert_page(vma, rva+(pix*PAGE_SIZE), page); if (error) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + printk("%s: error inserting mapping for 0x%#lx " + "(req: TID: %d, syscall: %lu) error: %d, " + "vm_start: 0x%lx, vm_end: 0x%lx\n", + __FUNCTION__, vmf->address, + packet->req.rtid, packet->req.number, error, + vma->vm_start, vma->vm_end); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ printk("%s: error inserting mapping for 0x%p " "(req: TID: %d, syscall: %lu) error: %d, " "vm_start: 0x%lx, vm_end: 0x%lx\n", __FUNCTION__, vmf->virtual_address, packet->req.rtid, packet->req.number, error, vma->vm_start, vma->vm_end); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + printk("%s: error inserting mapping for 0x%p " + "(req: TID: %d, syscall: %lu) error: %d, " + "vm_start: 0x%lx, vm_end: 0x%lx\n", + __FUNCTION__, vmf->virtual_address, + packet->req.rtid, packet->req.number, error, + vma->vm_start, vma->vm_end); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } } else error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE), pfn+pix); if (error) { +#ifdef POSTK_DEBUG_TEMP_FIX_11 /* rus_vm_fault() multi-thread fix */ + if (error == -EBUSY) { + error = 0; + } else { + break; + } +#else /* POSTK_DEBUG_TEMP_FIX_11 */ break; +#endif /* POSTK_DEBUG_TEMP_FIX_11 */ } } #else @@ -868,10 +981,24 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #endif ihk_device_unmap_memory(dev, phys, pgsize); if (error) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* HOST-Linux version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) + printk("%s: remote PF failed for 0x%#lx, pgoff: %lu " + "(req: TID: %d, syscall: %lu)\n", + __FUNCTION__, vmf->address, vmf->pgoff, + packet->req.rtid, packet->req.number); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ printk("%s: remote PF failed for 0x%p, pgoff: %lu " "(req: TID: %d, syscall: %lu)\n", __FUNCTION__, vmf->virtual_address, vmf->pgoff, packet->req.rtid, packet->req.number); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0) */ +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + printk("%s: remote PF failed for 0x%p, pgoff: %lu " + "(req: TID: %d, syscall: %lu)\n", + __FUNCTION__, vmf->virtual_address, vmf->pgoff, + packet->req.rtid, packet->req.number); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ ret = VM_FAULT_SIGBUS; goto put_and_out; } @@ -1322,12 +1449,17 @@ static int pager_req_read(ihk_os_t os, uintptr_t handle, off_t off, size_t size, pos = off; ss = vfs_read(file, buf, size, &pos); if ((ss != size) && (ss > 0)) { +#ifdef POSTK_DEBUG_TEMP_FIX_12 /* clear_user() used by kernel area, fix */ + memset(buf + ss, 0, size - ss); + ss = size; +#else /* POSTK_DEBUG_TEMP_FIX_12 */ if (clear_user(buf+ss, size-ss) == 0) { ss = size; } else { ss = -EFAULT; } +#endif /* POSTK_DEBUG_TEMP_FIX_12 */ } set_fs(fs); if (ss < 0) { @@ -1604,10 +1736,16 @@ retry: pfn |= PFN_VALID | PFN_PRESENT; /* Check if mapping is write-combined */ +#ifdef POSTK_DEBUG_ARCH_DEP_12 + if (pte_is_write_combined(*pte)) { + pfn |= PFN_WRITE_COMBINED; + } +#else /* POSTK_DEBUG_ARCH_DEP_12 */ if ((pte_flags(*pte) & _PAGE_PWT) && !(pte_flags(*pte) & _PAGE_PCD)) { pfn |= _PAGE_PWT; } +#endif /* POSTK_DEBUG_ARCH_DEP_12 */ } pte_unmap(pte); } @@ -1631,7 +1769,11 @@ retry: goto out_release; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) + fault = handle_mm_fault(vma, va, flags); +#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) */ fault = handle_mm_fault(current->mm, vma, va, flags); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) */ if (fault != 0) { printk("%s: error: faulting %lx at off: %lu\n", __FUNCTION__, va, off); @@ -1972,7 +2114,13 @@ static int clear_pte_range(uintptr_t start, uintptr_t len) static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { struct file *file; struct coretable *coretable; +#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ + int i, tablesize, error = 0; + loff_t size; + ssize_t ret; +#else /* POSTK_DEBUG_TEMP_FIX_61 */ int ret, i, tablesize, size, error = 0; +#endif /* POSTK_DEBUG_TEMP_FIX_61 */ mm_segment_t oldfs = get_fs(); unsigned long phys, tablephys, rphys; ihk_device_t dev = ihk_os_to_dev(os); @@ -1994,8 +2142,20 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { * dump routine of the Linux kernel in linux/fs/exec.c. * So we have a legitimate reason to do this. */ +#ifdef POSTK_DEBUG_TEMP_FIX_59 /* corefile open flag add O_TRUNC */ + file = filp_open("core", O_CREAT | O_RDWR | O_LARGEFILE | O_TRUNC, 0600); +#else /* POSTK_DEBUG_TEMP_FIX_59 */ file = filp_open("core", O_CREAT | O_RDWR | O_LARGEFILE, 0600); +#endif /* POSTK_DEBUG_TEMP_FIX_59 */ +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* use writehandler version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) + if (IS_ERR(file) || !file->f_op) { +#else if (IS_ERR(file) || !file->f_op || !file->f_op->write) { +#endif +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + if (IS_ERR(file) || !file->f_op || !file->f_op->write) { +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ dprintk("cannot open core file\n"); error = PTR_ERR(file); goto fail; @@ -2014,9 +2174,22 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { phys = ihk_device_map_memory(dev, rphys, size); dprintk("physical %lx, ", phys); pt = ihk_device_map_virtual(dev, phys, size, NULL, 0); +#ifdef POSTK_DEBUG_TEMP_FIX_38 + if (pt == NULL) { + pt = phys_to_virt(phys); + } +#endif /*POSTK_DEBUG_TEMP_FIX_38*/ dprintk("virtual %p\n", pt); if (pt != NULL) { +#ifdef POSTK_DEBUG_ARCH_DEP_41 /* use writehandler version switch add */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0) + ret = __kernel_write(file, pt, size, &file->f_pos); +#else ret = file->f_op->write(file, pt, size, &file->f_pos); +#endif +#else /* POSTK_DEBUG_ARCH_DEP_41 */ + ret = file->f_op->write(file, pt, size, &file->f_pos); +#endif /* POSTK_DEBUG_ARCH_DEP_41 */ } else { dprintk("cannot map physical memory(%lx) to virtual memory.\n", phys); @@ -2027,7 +2200,11 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { ihk_device_unmap_virtual(dev, pt, size); ihk_device_unmap_memory(dev, phys, size); if (ret != size) { +#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ + dprintk("core file write failed(%ld).\n", ret); +#else /* POSTK_DEBUG_TEMP_FIX_61 */ dprintk("core file write failed(%d).\n", ret); +#endif /* POSTK_DEBUG_TEMP_FIX_61 */ error = PTR_ERR(file); break; } @@ -2040,7 +2217,11 @@ static int writecore(ihk_os_t os, unsigned long rcoretable, int chunks) { } ret = file->f_op->llseek(file, size, SEEK_CUR); if (ret < 0) { +#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ + dprintk("core file seek failed(%ld).\n", ret); +#else /* POSTK_DEBUG_TEMP_FIX_61 */ dprintk("core file seek failed(%d).\n", ret); +#endif /* POSTK_DEBUG_TEMP_FIX_61 */ error = PTR_ERR(file); break; } @@ -2110,7 +2291,11 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) case __NR_coredump: error = writecore(os, sc->args[1], sc->args[0]); +#ifdef POSTK_DEBUG_TEMP_FIX_62 /* Fix to notify McKernel that core file generation failed */ + ret = error; +#else /* POSTK_DEBUG_TEMP_FIX_62 */ ret = 0; +#endif /* POSTK_DEBUG_TEMP_FIX_62 */ break; case __NR_sched_setparam: { diff --git a/executer/kernel/mcctrl/sysfs_files.c b/executer/kernel/mcctrl/sysfs_files.c index aefd9651..27ffbb06 100644 --- a/executer/kernel/mcctrl/sysfs_files.c +++ b/executer/kernel/mcctrl/sysfs_files.c @@ -1,3 +1,4 @@ +// sysfs_files.c COPYRIGHT FUJITSU LIMITED 2016 /** * \file sysfs_files.c * License details are found in the file LICENSE. @@ -151,8 +152,13 @@ static void free_node_topology(struct mcctrl_usrdata *udp) return; } /* free_node_topology() */ +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ +static void free_cpu_topology_one(struct mcctrl_usrdata *udp, + struct mcctrl_cpu_topology *cpu) +#else /* POSTK_DEBUG_ARCH_DEP_40 */ static void free_cpu_topology_one(struct mcctrl_usrdata *udp, struct cpu_topology *cpu) +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ { struct cache_topology *cache; struct cache_topology *next; @@ -168,8 +174,13 @@ static void free_cpu_topology_one(struct mcctrl_usrdata *udp, static void free_cpu_topology(struct mcctrl_usrdata *udp) { +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ + struct mcctrl_cpu_topology *cpu; + struct mcctrl_cpu_topology *next; +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology *cpu; struct cpu_topology *next; +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ list_for_each_entry_safe(cpu, next, &udp->cpu_topology_list, chain) { list_del(&cpu->chain); @@ -299,8 +310,13 @@ static int translate_cpumap(struct mcctrl_usrdata *udp, return error; } /* translate_cpumap() */ +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ +static struct cache_topology *get_cache_topology(struct mcctrl_usrdata *udp, + struct mcctrl_cpu_topology *cpu_topo, struct ihk_cache_topology *saved) +#else /* POSTK_DEBUG_ARCH_DEP_40 */ static struct cache_topology *get_cache_topology(struct mcctrl_usrdata *udp, struct cpu_topology *cpu_topo, struct ihk_cache_topology *saved) +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ { int error; struct cache_topology *topo = NULL; @@ -334,12 +350,21 @@ out: return (error)? ERR_PTR(error): topo; } /* get_cache_topology() */ +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ +static struct mcctrl_cpu_topology *get_one_cpu_topology(struct mcctrl_usrdata *udp, + int index) +#else /* POSTK_DEBUG_ARCH_DEP_40 */ static struct cpu_topology *get_one_cpu_topology(struct mcctrl_usrdata *udp, int index) +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ { int error; ihk_device_t dev = ihk_os_to_dev(udp->os); +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ + struct mcctrl_cpu_topology *topology = NULL; +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology *topology = NULL; +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ struct cache_topology *cache; struct ihk_cache_topology *saved_cache; @@ -357,7 +382,11 @@ static struct cpu_topology *get_one_cpu_topology(struct mcctrl_usrdata *udp, topology->saved = ihk_device_get_cpu_topology(dev, mckernel_cpu_2_hw_id(udp, index)); +#ifdef POSTK_DEBUG_TEMP_FIX_21 /* IS_ERR() through return NULL */ + if (!topology->saved) { +#else /* POSTK_DEBUG_TEMP_FIX_21 */ if (IS_ERR(topology->saved)) { +#endif /* POSTK_DEBUG_TEMP_FIX_21 */ error = PTR_ERR(topology->saved); eprintk("mcctrl:get_one_cpu_topology:" "ihk_device_get_cpu_topology failed. %d\n", @@ -413,7 +442,11 @@ static int get_cpu_topology(struct mcctrl_usrdata *udp) { int error; int index; +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ + struct mcctrl_cpu_topology *topology; +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology *topology; +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ dprintk("get_cpu_topology(%p)\n", udp); for (index = 0; index < udp->cpu_info->n_cpus; ++index) { @@ -435,8 +468,13 @@ out: return error; } /* get_cpu_topology() */ +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ +static void setup_cpu_sysfs_cache_files(struct mcctrl_usrdata *udp, + struct mcctrl_cpu_topology *cpu, struct cache_topology *cache) +#else /* POSTK_DEBUG_ARCH_DEP_40 */ static void setup_cpu_sysfs_cache_files(struct mcctrl_usrdata *udp, struct cpu_topology *cpu, struct cache_topology *cache) +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ { char *prefix = "/sys/devices/system/cpu"; int cpu_number = cpu->mckernel_cpu_id; @@ -488,8 +526,13 @@ static void setup_cpu_sysfs_cache_files(struct mcctrl_usrdata *udp, return; } /* setup_cpu_sysfs_cache_files() */ +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ +static void setup_cpu_sysfs_files(struct mcctrl_usrdata *udp, + struct mcctrl_cpu_topology *cpu) +#else /* POSTK_DEBUG_ARCH_DEP_40 */ static void setup_cpu_sysfs_files(struct mcctrl_usrdata *udp, struct cpu_topology *cpu) +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ { char *prefix = "/sys/devices/system/cpu"; int cpu_number = cpu->mckernel_cpu_id; @@ -566,7 +609,11 @@ static void setup_cpus_sysfs_files_node_link(struct mcctrl_usrdata *udp) static void setup_cpus_sysfs_files(struct mcctrl_usrdata *udp) { int error; +#ifdef POSTK_DEBUG_ARCH_DEP_40 /* cpu_topology name change */ + struct mcctrl_cpu_topology *cpu; +#else /* POSTK_DEBUG_ARCH_DEP_40 */ struct cpu_topology *cpu; +#endif /* POSTK_DEBUG_ARCH_DEP_40 */ error = get_cpu_topology(udp); if (error) { @@ -904,21 +951,30 @@ out: return error; } /* read_link() */ +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ +static int setup_one_pci(struct mcctrl_usrdata *udp, const char *name) +{ +#else /* POSTK_DEBUG_TEMP_FIX_22 */ static int setup_one_pci(void *arg0, const char *name, int namlen, loff_t offset, u64 ino, unsigned d_type) { struct mcctrl_usrdata *udp = arg0; +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ int error; char *buf = NULL; long node; struct sysfsm_bitmap_param param; +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ + dprintk("setup_one_pci(%p,%s)\n", udp, name); +#else /* POSTK_DEBUG_TEMP_FIX_22 */ dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d)\n", arg0, name, namlen, (long)offset, (long)ino, d_type); if (namlen != 12) { error = 0; goto out; } +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ buf = (void *)__get_free_pages(GFP_KERNEL, 0); if (!buf) { @@ -970,12 +1026,65 @@ static int setup_one_pci(void *arg0, const char *name, int namlen, error = 0; out: free_pages((long)buf, 0); +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ + dprintk("setup_one_pci(%p,%s): %d\n", udp, name, error); +#else /* POSTK_DEBUG_TEMP_FIX_22 */ dprintk("setup_one_pci(%p,%s,%d,%#lx,%#lx,%d): %d\n", arg0, name, namlen, (long)offset, (long)ino, d_type, error); +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ return error; } /* setup_one_pci() */ +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ +LIST_HEAD(pci_file_name_list); +struct pci_file_name { + char *name; + struct list_head chain; +}; + +static int pci_file_name_gen(void *buf, const char *name, int namlen, + loff_t offset, u64 ino, unsigned d_type) +{ + struct pci_file_name *p; + int error = -1; + + dprintk("pci_file_name_gen(%p,%s,%d,%#lx,%#lx,%d)\n", + buf, name, namlen, (long)offset, (long)ino, d_type); + + /* check namlen, name exmple, "0000:00:00.0" 12 chars */ + /* otherstring, return function */ + if (namlen != 12) { + error = 0; + goto out; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + error = -ENOMEM; + eprintk("mcctrl:pci_file_name_gen:kmalloc failed. %d\n", error); + goto out; + } + + p->name = kmalloc(sizeof(namlen + 1), GFP_KERNEL); + if (!p->name) { + error = -ENOMEM; + eprintk("mcctrl:pci_file_name_gen:kmalloc failed. %d\n", error); + kfree(p); + goto out; + } + memset(p->name, '\0', namlen + 1); + memcpy(p->name, name, namlen); + list_add(&p->chain, &pci_file_name_list); + + error = 0; +out: + dprintk("pci_file_name_gen(%p,%s,%d,%#lx,%#lx,%d): %d\n", + buf, name, namlen, (long)offset, (long)ino, d_type, error); + return error; +} +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ + #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0) typedef int (*mcctrl_filldir_t)(void *buf, const char *name, int namlen, loff_t offset, u64 ino, unsigned d_type); @@ -1019,6 +1128,11 @@ static int setup_pci_files(struct mcctrl_usrdata *udp) int error; int er; struct file *fp = NULL; +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ + int ret = 0; + struct pci_file_name *cur; + struct pci_file_name *next; +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ dprintk("setup_pci_files(%p)\n", udp); fp = filp_open("/sys/bus/pci/devices", O_DIRECTORY, 0); @@ -1028,13 +1142,28 @@ static int setup_pci_files(struct mcctrl_usrdata *udp) goto out; } +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ + error = mcctrl_vfs_readdir(fp, &pci_file_name_gen, udp); +#else /* POSTK_DEBUG_TEMP_FIX_22 */ error = mcctrl_vfs_readdir(fp, &setup_one_pci, udp); +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ if (error) { eprintk("mcctrl:setup_pci_files:" "mcctrl_vfs_readdir failed. %d\n", error); goto out; } +#ifdef POSTK_DEBUG_TEMP_FIX_22 /* iterate_dir() deadlock */ + list_for_each_entry_safe(cur, next, &pci_file_name_list, chain) { + if (!ret) { + ret = setup_one_pci(udp, cur->name); + } + list_del(&cur->chain); + kfree(cur->name); + kfree(cur); + } +#endif /* POSTK_DEBUG_TEMP_FIX_22 */ + error = 0; out: if (!IS_ERR_OR_NULL(fp)) { diff --git a/executer/user/Makefile.in b/executer/user/Makefile.in index 6e5959b6..e39c566b 100644 --- a/executer/user/Makefile.in +++ b/executer/user/Makefile.in @@ -1,3 +1,4 @@ +# Makefile.in COPYRIGHT FUJITSU LIMITED 2015-2016 CC=@CC@ MCC=mpicc BINDIR=@BINDIR@ @@ -25,13 +26,19 @@ ifeq ($(ENABLE_QLMPI),yes) TARGET+= libqlmpi.so ql_server ql_mpiexec_start ql_mpiexec_finalize ql_talker libqlfort.so endif +CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i))) +CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i))) + all: $(TARGET) mcexec: mcexec.c libmcexec.a $(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) $(LDFLAGS) -DLIBDIR=\"$(LIBDIR)\" -fPIE -pie -L. $(MCEXEC_LIBS) -o $@ $^ $(EXTRA_OBJS) $(RPATH) -eclair: eclair.c - $(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS) +# POSTK_DEBUG_ARCH_DEP_34, eclair arch depend separate. +#eclair: eclair.c +# $(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS) +eclair: eclair.c arch/$(ARCH)/arch-eclair.c + $(CC) -I.. -I. -I./arch/$(ARCH)/include -I${IHKDIR} $(CFLAGS) -o $@ $^ $(LIBS) libsched_yield: libsched_yield.c $(CC) -shared -fPIC -Wl,-soname,sched_yield.so.1 -o libsched_yield.so.1.0.0 $^ -lc -ldl diff --git a/executer/user/arch/arm64/Makefile.in b/executer/user/arch/arm64/Makefile.in new file mode 100644 index 00000000..29e52115 --- /dev/null +++ b/executer/user/arch/arm64/Makefile.in @@ -0,0 +1,23 @@ +CC=@CC@ +AR=ar +BINDIR=@BINDIR@ +KDIR ?= @KDIR@ +CFLAGS=-Wall -O -I. +VPATH=@abs_srcdir@ +TARGET=../../libmcexec.a +LIBS=@LIBS@ + +all: $(TARGET) + +../../libmcexec.a: archdep.o + $(AR) cr ../../libmcexec.a archdep.o + +archdep.o: archdep.S + $(CC) -c -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -pthread $< + +clean: + $(RM) $(TARGET) *.o + +.PHONY: all clean install + +install: diff --git a/executer/user/arch/arm64/arch-eclair.c b/executer/user/arch/arm64/arch-eclair.c new file mode 100644 index 00000000..a64f47ed --- /dev/null +++ b/executer/user/arch/arm64/arch-eclair.c @@ -0,0 +1,51 @@ +/* arch-eclair.c COPYRIGHT FUJITSU LIMITED 2016 */ +#include +#include +#include + +int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs) +{ + int i, ret, total = 0; + const unsigned long *regs[] = {&kregs->x19, &kregs->x20, &kregs->x21, + &kregs->x22, &kregs->x23, &kregs->x24, + &kregs->x25, &kregs->x26, &kregs->x27, + &kregs->x28}; + + for (i = 0; i < 18; i++) /* x0-x18 */{ + ret = snprintf(rbp, rbp_size, "xxxxxxxxxxxxxxxx"); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) { /* x19-x28 */ + ret = print_bin(rbp, rbp_size, (void *)regs[i], sizeof(*regs[0])); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + for (i = 0; i < 2; i++) { /* x29-x30 */ + ret = snprintf(rbp, rbp_size, "xxxxxxxxxxxxxxxx"); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + ret += print_bin(rbp, rbp_size, (void *)&kregs->sp, sizeof(kregs->sp)); + if (ret < 0) { + return ret; + } + total += ret; + + return total; +} diff --git a/executer/user/arch/arm64/arch_args.h b/executer/user/arch/arm64/arch_args.h new file mode 100644 index 00000000..262dd010 --- /dev/null +++ b/executer/user/arch/arm64/arch_args.h @@ -0,0 +1,126 @@ +/* arch_args.h COPYRIGHT FUJITSU LIMITED 2017 */ +#ifndef ARCH_ARGS_H +#define ARCH_ARGS_H + +#include + +typedef struct user_pt_regs syscall_args; + +static inline int +get_syscall_args(int pid, syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return -1; +} + +static inline int +set_syscall_args(int pid, syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return -1; +} + +static inline unsigned long +get_syscall_number(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_return(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_arg1(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_arg2(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_arg3(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_arg4(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_arg5(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline unsigned long +get_syscall_arg6(syscall_args *args) +{ + /* TODO: skeleton for UTI */ + return 0; +} + +static inline void +set_syscall_number(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_return(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_arg1(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_arg2(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_arg3(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_arg4(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_arg5(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} + +static inline void +set_syscall_arg6(syscall_args *args, unsigned long value) +{ + /* TODO: skeleton for UTI */ +} +#endif /* !ARCH_ARGS_H */ diff --git a/executer/user/arch/arm64/archdep.S b/executer/user/arch/arm64/archdep.S new file mode 100644 index 00000000..adcf3a54 --- /dev/null +++ b/executer/user/arch/arm64/archdep.S @@ -0,0 +1,16 @@ +/* archdep.S COPYRIGHT FUJITSU LIMITED 2017 */ +/* TODO: skeleton for UTI */ +.global switch_ctx +switch_ctx: + ret + +/* TODO: skeleton for UTI */ +.global compare_and_swap +compare_and_swap: + ret + +/* TODO: skeleton for UTI */ +.global compare_and_swap_int +compare_and_swap_int: + ret + diff --git a/executer/user/arch/arm64/include/arch-eclair.h b/executer/user/arch/arm64/include/arch-eclair.h new file mode 100644 index 00000000..bf73ac31 --- /dev/null +++ b/executer/user/arch/arm64/include/arch-eclair.h @@ -0,0 +1,24 @@ +/* arch-eclair.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef HEADER_USER_ARM64_ECLAIR_H +#define HEADER_USER_ARM64_ECLAIR_H + +/* VA_BITS=48, 4K_PAGE address */ +#define MAP_KERNEL 0xffffffffff800000 +#define MAP_ST 0xffff800000000000 +#define MAP_KERNEL_TEXT "0xffffffffff800000" + +#define ARCH_CLV_SPAN "arm64_cpu_local_variables_span" + +#define ARCH "aarch64" + +#define ARCH_REGS 34 + +#define PANIC_REGS_OFFSET 144 + +struct arch_kregs { + unsigned long x19, x20, x21, x22, x23; + unsigned long x24, x25, x26, x27, x28; + unsigned long fp, sp, pc; +}; + +#endif /* HEADER_USER_ARM64_ECLAIR_H */ diff --git a/executer/user/arch/x86_64/arch-eclair.c b/executer/user/arch/x86_64/arch-eclair.c new file mode 100644 index 00000000..841a92a9 --- /dev/null +++ b/executer/user/arch/x86_64/arch-eclair.c @@ -0,0 +1,101 @@ +/* arch-eclair.c COPYRIGHT FUJITSU LIMITED 2016 */ +#include +#include +#include + +int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs) +{ + int i, ret, total = 0; + uintptr_t ihk_mc_switch_context = -1; + const uint64_t *regs_1[] = {&kregs->rsi, &kregs->rdi, &kregs->rbp, + &kregs->rsp}; + const uint64_t *regs_2[] = {&kregs->r12, &kregs->r13, &kregs->r14, + &kregs->r15}; + + ihk_mc_switch_context = lookup_symbol("ihk_mc_switch_context"); + if (0) printf("ihk_mc_switch_context: %lx\n", ihk_mc_switch_context); + + ret = snprintf(rbp, rbp_size, "xxxxxxxxxxxxxxxx"); /* rax */ + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + + ret += print_bin(rbp, rbp_size, (void *)&kregs->rbx, sizeof(uint64_t)); /* rbx */ + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + + for (i = 0; i < 2; i++){ /* rcx, rdx */ + ret = snprintf(rbp, rbp_size, "xxxxxxxxxxxxxxxx"); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + for (i = 0; i < sizeof(regs_1)/sizeof(regs_1[0]); i++) { /* rsi, rdi, rbp, rsp */ + ret = print_bin(rbp, rbp_size, (void *)regs_1[i], sizeof(regs_1[0])); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + for (i = 0; i < 4; i++) { /* r8-x11 */ + ret = snprintf(rbp, rbp_size, "xxxxxxxxxxxxxxxx"); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + for (i = 0; i < sizeof(regs_2)/sizeof(regs_2[0]); i++) { /* r12-r15 */ + ret = print_bin(rbp, rbp_size, (void *)regs_2[i], sizeof(regs_2[0])); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + ret += print_bin(rbp, rbp_size, (void *)&ihk_mc_switch_context, sizeof(uint64_t)); /* rip */ + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + + ret += print_bin(rbp, rbp_size, (void *)&kregs->rflags, sizeof(uint32_t)); /* rflags */ + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + + for (i = 0; i < 6; i++) { /* cs, ss, ds, es, fs, gs */ + ret = snprintf(rbp, rbp_size, "xxxxxxxx"); + if (ret < 0) { + return ret; + } + rbp += ret; + total += ret; + rbp_size -= ret; + } + + return total; +} diff --git a/executer/user/arch/x86_64/arch_args.h b/executer/user/arch/x86_64/arch_args.h index 9cc99839..8c7fa740 100644 --- a/executer/user/arch/x86_64/arch_args.h +++ b/executer/user/arch/x86_64/arch_args.h @@ -1,6 +1,10 @@ #ifndef ARCH_ARGS_H #define ARCH_ARGS_H +#ifdef POSTK_DEBUG_ARCH_DEP_77 /* arch depend hide */ +#include +#endif /* !POSTK_DEBUG_ARCH_DEP_77 */ + typedef struct user_regs_struct syscall_args; static inline int diff --git a/executer/user/arch/x86_64/include/arch-eclair.h b/executer/user/arch/x86_64/include/arch-eclair.h new file mode 100644 index 00000000..a76053f3 --- /dev/null +++ b/executer/user/arch/x86_64/include/arch-eclair.h @@ -0,0 +1,24 @@ +/* arch-eclair.h COPYRIGHT FUJITSU LIMITED 2016 */ +#ifndef HEADER_USER_X86_ECLAIR_H +#define HEADER_USER_X86_ECLAIR_H + +#define MAP_KERNEL 0xFFFFFFFF80000000 +#define MAP_ST 0xFFFF800000000000 + +#define ARCH_CLV_SPAN "x86_cpu_local_variables_span" + +#define ARCH "i386:x86-64" + +#define ARCH_REGS 21 + +#define PANIC_REGS_OFFSET 240 + +#define MAP_KERNEL_TEXT "0xffffffff80001000" + +struct arch_kregs { + uintptr_t rsp, rbp, rbx, rsi; + uintptr_t rdi, r12, r13, r14; + uintptr_t r15, rflags, rsp0; +}; + +#endif /* HEADER_USER_x86_ECLAIR_H */ diff --git a/executer/user/eclair.c b/executer/user/eclair.c index 33370897..22a70f2f 100644 --- a/executer/user/eclair.c +++ b/executer/user/eclair.c @@ -1,3 +1,4 @@ +/* eclair.c COPYRIGHT FUJITSU LIMITED 2016 */ /** * \file eclair.c * License details are found in the file LICENSE. @@ -7,6 +8,9 @@ * Copyright (C) 2015 RIKEN AICS */ +#ifdef POSTK_DEBUG_ARCH_DEP_33 +#include "../config.h" +#endif /* POSTK_DEBUG_ARCH_DEP_33 */ #include #include #include @@ -18,6 +22,10 @@ #include #include #include +#ifdef POSTK_DEBUG_ARCH_DEP_34 +#include +#include +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ #define CPU_TID_BASE 1000000 @@ -51,7 +59,11 @@ struct thread_info { int idle; uintptr_t process; uintptr_t clv; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + uintptr_t arch_clv; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ uintptr_t x86_clv; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ }; /* struct thread_info */ static struct options opt; @@ -67,9 +79,15 @@ static uintptr_t kernel_base; static struct thread_info *tihead = NULL; static struct thread_info **titailp = &tihead; static struct thread_info *curr_thread = NULL; +#ifndef POSTK_DEBUG_ARCH_DEP_34 static uintptr_t ihk_mc_switch_context = -1; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ +#ifdef POSTK_DEBUG_ARCH_DEP_34 +uintptr_t lookup_symbol(char *name) { +#else /* POSTK_DEBUG_ARCH_DEP_34 */ static uintptr_t lookup_symbol(char *name) { +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ int i; for (i = 0; i < nsyms; ++i) { @@ -81,12 +99,17 @@ static uintptr_t lookup_symbol(char *name) { return NOSYMBOL; } /* lookup_symbol() */ + static uintptr_t virt_to_phys(uintptr_t va) { +#ifndef POSTK_DEBUG_ARCH_DEP_34 #define MAP_KERNEL 0xFFFFFFFF80000000 +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ if (va >= MAP_KERNEL) { return (va - MAP_KERNEL + kernel_base); } +#ifndef POSTK_DEBUG_ARCH_DEP_34 #define MAP_ST 0xFFFF800000000000 +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ if (va >= MAP_ST) { return (va - MAP_ST); } @@ -270,9 +293,17 @@ static int setup_threads(void) { return 1; } +#ifdef POSTK_DEBUG_ARCH_DEP_34 + error = read_symbol_64(ARCH_CLV_SPAN, &locals_span); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ error = read_symbol_64("x86_cpu_local_variables_span", &locals_span); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ if (error) { +#ifdef POSTK_DEBUG_ARCH_DEP_34 + locals_span = sysconf(_SC_PAGESIZE); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ locals_span = 4096; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ } if (0) printf("locals 0x%lx span 0x%lx\n", locals, locals_span); @@ -282,8 +313,10 @@ static int setup_threads(void) { return 1; } +#ifndef POSTK_DEBUG_ARCH_DEP_34 ihk_mc_switch_context = lookup_symbol("ihk_mc_switch_context"); if (0) printf("ihk_mc_switch_context: %lx\n", ihk_mc_switch_context); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ for (cpu = 0; cpu < num_processors; ++cpu) { uintptr_t v; @@ -354,7 +387,11 @@ static int setup_threads(void) { ti->process = thread; ti->idle = 0; ti->clv = v; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + ti->arch_clv = locals + locals_span*cpu; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ ti->x86_clv = locals + locals_span*cpu; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ *titailp = ti; titailp = &ti->next; @@ -430,7 +467,11 @@ static int setup_threads(void) { ti->process = thread; ti->idle = 1; ti->clv = v; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + ti->arch_clv = locals + locals_span*cpu; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ ti->x86_clv = locals + locals_span*cpu; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ *titailp = ti; titailp = &ti->next; @@ -484,7 +525,11 @@ static int setup_threads(void) { ti->process = current; ti->idle = 1; ti->clv = v; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + ti->arch_clv = locals + locals_span*cpu; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ ti->x86_clv = locals + locals_span*cpu; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ *titailp = ti; titailp = &ti->next; @@ -506,7 +551,12 @@ static int setup_symbols(char *fname) { ssize_t needs; bfd_boolean ok; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + symbfd = bfd_openr(fname, NULL); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ symbfd = bfd_openr(fname, "elf64-x86-64"); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ + if (!symbfd) { bfd_perror("bfd_openr"); return 1; @@ -547,7 +597,11 @@ static int setup_symbols(char *fname) { static int setup_dump(char *fname) { bfd_boolean ok; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + dumpbfd = bfd_fopen(opt.dump_path, NULL, "r", -1); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ dumpbfd = bfd_fopen(opt.dump_path, "elf64-x86-64", "r", -1); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ if (!dumpbfd) { bfd_perror("bfd_fopen"); return 1; @@ -589,20 +643,40 @@ static int setup_dump(char *fname) { return 0; } /* setup_dump() */ +#ifdef POSTK_DEBUG_ARCH_DEP_38 +static ssize_t print_hex(char *buf, size_t buf_size, char *str) { +#else /* POSTK_DEBUG_ARCH_DEP_38 */ static ssize_t print_hex(char *buf, char *str) { +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ + char *p; char *q; q = buf; for (p = str; *p != '\0'; ++p) { +#ifdef POSTK_DEBUG_ARCH_DEP_38 + int ret; + + ret = snprintf(q, buf_size, "%02x", *p); + if (ret < 0) { + return ret; + } + q += ret; + buf_size -= ret; +#else /* POSTK_DEBUG_ARCH_DEP_38 */ q += sprintf(q, "%02x", *p); +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ } *q = '\0'; return (q - buf); } /* print_hex() */ +#if defined(POSTK_DEBUG_ARCH_DEP_34) && defined(POSTK_DEBUG_ARCH_DEP_38) +ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size) { +#else /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/ static ssize_t print_bin(char *buf, void *data, size_t size) { +#endif /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/ uint8_t *p; char *q; int i; @@ -610,7 +684,18 @@ static ssize_t print_bin(char *buf, void *data, size_t size) { p = data; q = buf; for (i = 0; i < size; ++i) { +#ifdef POSTK_DEBUG_ARCH_DEP_38 + int ret; + + ret = snprintf(q, buf_size, "%02x", *p); + if (ret < 0) { + return ret; + } + q += ret; + buf_size -= ret; +#else /* POSTK_DEBUG_ARCH_DEP_38 */ q += sprintf(q, "%02x", *p); +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ ++p; } *q = '\0'; @@ -618,8 +703,13 @@ static ssize_t print_bin(char *buf, void *data, size_t size) { return (q - buf); } /* print_bin() */ +#ifdef POSTK_DEBUG_ARCH_DEP_38 +static void command(const char *cmd, char *res, size_t res_size) { + const char *p; +#else /* POSTK_DEBUG_ARCH_DEP_38 */ static void command(char *cmd, char *res) { char *p; +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ char *rbp; p = cmd; @@ -668,13 +758,24 @@ static void command(char *cmd, char *res) { rbp += sprintf(rbp, "1"); } else if (!strncmp(p, "qXfer:features:read:target.xml:", 31)) { +#ifdef POSTK_DEBUG_ARCH_DEP_34 + char *str = + "" + ""ARCH"" + ""; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ char *str = "" "i386:x86-64" ""; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ rbp += sprintf(rbp, "l"); if (0) +#ifdef POSTK_DEBUG_ARCH_DEP_38 + rbp += print_hex(rbp, res_size, str); +#else /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += print_hex(rbp, str); +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += sprintf(rbp, "%s", str); } else if (!strcmp(p, "D")) { @@ -683,14 +784,20 @@ static void command(char *cmd, char *res) { } else if (!strcmp(p, "g")) { if (curr_thread->cpu < 0) { +#ifndef POSTK_DEBUG_ARCH_DEP_34 struct x86_kregs { uintptr_t rsp, rbp, rbx, rsi; uintptr_t rdi, r12, r13, r14; uintptr_t r15, rflags, rsp0; }; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ int error; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + struct arch_kregs kregs; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ struct x86_kregs kregs; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ error = read_mem(curr_thread->process+K(CTX_OFFSET), &kregs, sizeof(kregs)); @@ -699,6 +806,9 @@ static void command(char *cmd, char *res) { break; } +#ifdef POSTK_DEBUG_ARCH_DEP_34 + print_kregs(rbp, res_size, &kregs); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rax */ rbp += print_bin(rbp, &kregs.rbx, sizeof(uint64_t)); rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rcx */ @@ -725,15 +835,25 @@ static void command(char *cmd, char *res) { rbp += sprintf(rbp, "xxxxxxxx"); /* es */ rbp += sprintf(rbp, "xxxxxxxx"); /* fs */ rbp += sprintf(rbp, "xxxxxxxx"); /* gs */ +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ } else { int error; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + uintptr_t regs[ARCH_REGS]; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ uintptr_t regs[21]; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ uint8_t *pu8; int i; +#ifdef POSTK_DEBUG_ARCH_DEP_34 + error = read_mem(curr_thread->arch_clv+PANIC_REGS_OFFSET, + ®s, sizeof(regs)); +#else /* POSTK_DEBUG_ARCH_DEP_34 */ error = read_mem(curr_thread->x86_clv+240, ®s, sizeof(regs)); +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ if (error) { perror("read_mem"); break; @@ -780,13 +900,24 @@ static void command(char *cmd, char *res) { rbp += sprintf(rbp, "T0;tnotrun:0"); } else if (!strncmp(p, "qXfer:memory-map:read::", 23)) { +#ifdef POSTK_DEBUG_ARCH_DEP_34 + char *str = + "" + "" + ""; +#else /* POSTK_DEBUG_ARCH_DEP_34 */ char *str = "" "" ""; +#endif /* POSTK_DEBUG_ARCH_DEP_34 */ rbp += sprintf(rbp, "l"); if (0) +#ifdef POSTK_DEBUG_ARCH_DEP_38 + rbp += print_hex(rbp, res_size, str); +#else /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += print_hex(rbp, str); +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += sprintf(rbp, "%s", str); } else if (!strncmp(p, "T", 1)) { @@ -878,7 +1009,11 @@ static void command(char *cmd, char *res) { else { q += sprintf(q, "status=%#x", ti->status); } +#ifdef POSTK_DEBUG_ARCH_DEP_38 + rbp += print_hex(rbp, res_size, buf); +#else /* POSTK_DEBUG_ARCH_DEP_38 */ rbp += print_hex(rbp, buf); +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ } } while (0); @@ -1107,7 +1242,11 @@ int main(int argc, char *argv[]) { } mode = 0; fputc('+', ofp); +#ifdef POSTK_DEBUG_ARCH_DEP_38 + command(lbuf, rbuf, sizeof(rbuf)); +#else /* POSTK_DEBUG_ARCH_DEP_38 */ command(lbuf, rbuf); +#endif /* POSTK_DEBUG_ARCH_DEP_38 */ sum = 0; for (p = rbuf; *p != '\0'; ++p) { sum += *p; diff --git a/executer/user/eclair.h b/executer/user/eclair.h new file mode 100644 index 00000000..a80c6c0f --- /dev/null +++ b/executer/user/eclair.h @@ -0,0 +1,22 @@ +/* eclair.h COPYRIGHT FUJITSU LIMITED 2016 */ + +#ifndef HEADER_USER_COMMON_ECLAIR_H +#define HEADER_USER_COMMON_ECLAIR_H + +#ifdef POSTK_DEBUG_ARCH_DEP_76 /* header path fix */ +#include "../config.h" +#else /* POSTK_DEBUG_ARCH_DEP_76 */ +#include +#endif /* POSTK_DEBUG_ARCH_DEP_76 */ +#include +#include +#include + +/* common */ +uintptr_t lookup_symbol(char *name); +ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size); + +/* arch depend */ +int print_kregs(char *rbp, size_t rbp_size, const struct arch_kregs *kregs); + +#endif /* HEADER_USER_COMMON_ECLAIR_H */ diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index fd945aff..31942848 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -1,3 +1,4 @@ +/* mcexec.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ /** * \file executer/user/mcexec.c * License details are found in the file LICENSE. @@ -63,9 +64,17 @@ #include #include #include +#ifdef POSTK_DEBUG_ARCH_DEP_35 +#ifndef __aarch64__ #include +#endif /* !__aarch64__ */ +#else /* POSTK_DEBUG_ARCH_DEP_35 */ +#include +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ #include +#ifndef POSTK_DEBUG_ARCH_DEP_77 /* arch depend hide */ #include +#endif /* !POSTK_DEBUG_ARCH_DEP_77 */ #include "../include/uprotocol.h" #include #include "archdep.h" @@ -81,6 +90,7 @@ #include "../include/qlmpi.h" //#define DEBUG +#define ADD_ENVS_OPTION #ifndef DEBUG #define __dprint(msg, ...) @@ -223,6 +233,11 @@ struct fork_sync_container { struct fork_sync_container *fork_sync_top; pthread_mutex_t fork_sync_mutex = PTHREAD_MUTEX_INITIALIZER; +#ifdef POSTK_DEBUG_ARCH_DEP_35 +unsigned long page_size; +unsigned long page_mask; +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ + pid_t gettid(void) { return syscall(SYS_gettid); @@ -573,6 +588,9 @@ retry: fprintf(stderr, "lookup_exec_path(): error allocating\n"); return ENOMEM; } +#ifdef POSTK_DEBUG_TEMP_FIX_6 /* dynamic allocate area initialize clear */ + memset(link_path, '\0', max_len); +#endif /* POSTK_DEBUG_TEMP_FIX_6 */ error = readlink(path, link_path, max_len); if (error == -1 || error == max_len) { @@ -743,9 +761,15 @@ int transfer_image(int fd, struct program_load_desc *desc) for (i = 0; i < desc->num_sections; i++) { fp = desc->sections[i].fp; +#ifdef POSTK_DEBUG_ARCH_DEP_35 + s = (desc->sections[i].vaddr) & page_mask; + e = (desc->sections[i].vaddr + desc->sections[i].len + + page_size - 1) & page_mask; +#else /* POSTK_DEBUG_ARCH_DEP_35 */ s = (desc->sections[i].vaddr) & PAGE_MASK; e = (desc->sections[i].vaddr + desc->sections[i].len + PAGE_SIZE - 1) & PAGE_MASK; +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ rpa = desc->sections[i].remote_pa; if (fseek(fp, desc->sections[i].offset, SEEK_SET) != 0) { @@ -761,15 +785,29 @@ int transfer_image(int fd, struct program_load_desc *desc) memset(&pt, '\0', sizeof pt); pt.rphys = rpa; pt.userp = dma_buf; +#ifdef POSTK_DEBUG_ARCH_DEP_35 + pt.size = page_size; +#else /* POSTK_DEBUG_ARCH_DEP_35 */ pt.size = PAGE_SIZE; +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ pt.direction = MCEXEC_UP_TRANSFER_TO_REMOTE; lr = 0; +#ifdef POSTK_DEBUG_ARCH_DEP_35 + memset(dma_buf, 0, page_size); +#else /* POSTK_DEBUG_ARCH_DEP_35 */ memset(dma_buf, 0, PAGE_SIZE); +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ if (s < desc->sections[i].vaddr) { +#ifdef POSTK_DEBUG_ARCH_DEP_35 + l = desc->sections[i].vaddr + & (page_size - 1); + lr = page_size - l; +#else /* POSTK_DEBUG_ARCH_DEP_35 */ l = desc->sections[i].vaddr & (PAGE_SIZE - 1); lr = PAGE_SIZE - l; +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ if (lr > flen) { lr = flen; } @@ -790,8 +828,13 @@ int transfer_image(int fd, struct program_load_desc *desc) flen -= lr; } else if (flen > 0) { +#ifdef POSTK_DEBUG_ARCH_DEP_35 + if (flen > page_size) { + lr = page_size; +#else /* POSTK_DEBUG_ARCH_DEP_35 */ if (flen > PAGE_SIZE) { lr = PAGE_SIZE; +#endif /*POSTK_DEBUG_ARCH_DEP_35 */ } else { lr = flen; } @@ -811,8 +854,13 @@ int transfer_image(int fd, struct program_load_desc *desc) } flen -= lr; } +#ifdef POSTK_DEBUG_ARCH_DEP_35 + s += page_size; + rpa += page_size; +#else /* POSTK_DEBUG_ARCH_DEP_35 */ s += PAGE_SIZE; rpa += PAGE_SIZE; +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ /* No more left to upload.. */ if (lr == 0 && flen == 0) break; @@ -1198,7 +1246,11 @@ static int reduce_stack(struct rlimit *orig_rlim, char *argv[]) void print_usage(char **argv) { +#ifdef ADD_ENVS_OPTION + fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [<-e ENV_NAME=value>...] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [] (program) [args...]\n", argv[0]); +#else /* ADD_ENVS_OPTION */ fprintf(stderr, "usage: %s [-c target_core] [-n nr_partitions] [--mpol-threshold=N] [--enable-straight-map] [--extend-heap-by=N] [--mpol-no-heap] [--mpol-no-bss] [--mpol-no-stack] [] (program) [args...]\n", argv[0]); +#endif /* ADD_ENVS_OPTION */ } void init_sigaction(void) @@ -1375,6 +1427,130 @@ static int rlimits[] = { char dev[64]; +#ifdef ADD_ENVS_OPTION +struct env_list_entry { + char* str; + char* name; + char* value; + struct env_list_entry *next; +}; + +static int get_env_list_entry_count(struct env_list_entry *head) +{ + int list_count = 0; + struct env_list_entry *current = head; + + while (current) { + list_count++; + current = current->next; + } + return list_count; +} + +static struct env_list_entry *search_env_list(struct env_list_entry *head, char *name) +{ + struct env_list_entry *current = head; + + while (current) { + if (!(strcmp(name, current->name))) { + return current; + } + current = current->next; + } + return NULL; +} + +static void add_env_list(struct env_list_entry **head, char *add_string) +{ + struct env_list_entry *current = NULL; + char *value = NULL; + char *name = NULL; + struct env_list_entry *exist = NULL; + + name = (char *)malloc(strlen(add_string) + 1); + strcpy(name, add_string); + + /* include '=' ? */ + if (!(value = strchr(name, '='))) { + printf("\"%s\" is not env value.\n", add_string); + free(name); + return; + } + *value = '\0'; + value++; + + /* name overlap serch */ + if (*head) { + exist = search_env_list(*head, name); + if (exist) { + free(name); + return; + } + } + + /* ADD env_list */ + current = (struct env_list_entry *)malloc(sizeof(struct env_list_entry)); + current->str = add_string; + current->name = name; + current->value = value; + if (*head) { + current->next = *head; + } else { + current->next = NULL; + } + *head = current; + return; +} + +static void destroy_env_list(struct env_list_entry *head) +{ + struct env_list_entry *current = head; + struct env_list_entry *next = NULL; + + while (current) { + next = current->next; + free(current->name); + free(current); + current = next; + } +} + +static char **create_local_environ(struct env_list_entry *inc_list) +{ + int list_count = 0; + int i = 0; + struct env_list_entry *current = inc_list; + char **local_env = NULL; + + list_count = get_env_list_entry_count(inc_list); + local_env = (char **)malloc(sizeof(char **) * (list_count + 1)); + local_env[list_count] = NULL; + + while (current) { + local_env[i] = (char *)malloc(strlen(current->str) + 1); + strcpy(local_env[i], current->str); + current = current->next; + i++; + } + return local_env; +} + +static void destroy_local_environ(char **local_env) +{ + int i = 0; + + if (!local_env) { + return; + } + + for (i = 0; local_env[i]; i++) { + free(local_env[i]); + local_env[i] = NULL; + } + free(local_env); +} +#endif /* ADD_ENVS_OPTION */ + unsigned long atobytes(char *string) { unsigned long mult = 1; @@ -1405,6 +1581,8 @@ unsigned long atobytes(char *string) } static struct option mcexec_options[] = { +#ifdef POSTK_DEBUG_ARCH_DEP_53 +#ifndef __aarch64__ { .name = "disable-vdso", .has_arg = no_argument, @@ -1417,6 +1595,8 @@ static struct option mcexec_options[] = { .flag = &enable_vdso, .val = 1, }, +#endif /*__aarch64__*/ +#endif /*POSTK_DEBUG_ARCH_DEP_53*/ { .name = "profile", .has_arg = no_argument, @@ -1626,12 +1806,21 @@ int main(int argc, char **argv) char shell_path[1024]; int num = 0; int persona; +#ifdef ADD_ENVS_OPTION + char **local_env = NULL; + struct env_list_entry *extra_env = NULL; +#endif /* ADD_ENVS_OPTION */ #ifdef USE_SYSCALL_MOD_CALL __glob_argc = argc; __glob_argv = argv; #endif +#ifdef POSTK_DEBUG_ARCH_DEP_35 + page_size = sysconf(_SC_PAGESIZE); + page_mask = ~(page_size - 1); +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ + altroot = getenv("MCEXEC_ALT_ROOT"); if (!altroot) { altroot = "/usr/linux-k1om-4.7/linux-k1om"; @@ -1669,7 +1858,11 @@ int main(int argc, char **argv) } /* Parse options ("+" denotes stop at the first non-option) */ +#ifdef ADD_ENVS_OPTION + while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:e:", mcexec_options, NULL)) != -1) { +#else /* ADD_ENVS_OPTION */ while ((opt = getopt_long(argc, argv, "+c:n:t:m:h:", mcexec_options, NULL)) != -1) { +#endif /* ADD_ENVS_OPTION */ switch (opt) { case 'c': target_core = atoi(optarg); @@ -1691,6 +1884,11 @@ int main(int argc, char **argv) heap_extension = atobytes(optarg); break; +#ifdef ADD_ENVS_OPTION + case 'e': + add_env_list(&extra_env, optarg); + break; +#endif /* ADD_ENVS_OPTION */ case 0: /* long opt */ break; @@ -1723,8 +1921,11 @@ int main(int argc, char **argv) ld_preload_init(); +#ifdef ADD_ENVS_OPTION +#else /* ADD_ENVS_OPTION */ /* Collect environment variables */ envs_len = flatten_strings(-1, NULL, environ, &envs); +#endif /* ADD_ENVS_OPTION */ #ifdef ENABLE_MCOVERLAYFS __dprint("mcoverlay enable\n"); @@ -1870,6 +2071,19 @@ int main(int argc, char **argv) argv[optind] = path; } +#ifdef ADD_ENVS_OPTION + /* Collect environment variables */ + for (i = 0; environ[i]; i++) { + add_env_list(&extra_env, environ[i]); + } + local_env = create_local_environ(extra_env); + envs_len = flatten_strings(-1, NULL, local_env, &envs); + destroy_local_environ(local_env); + local_env = NULL; + destroy_env_list(extra_env); + extra_env = NULL; +#endif /* ADD_ENVS_OPTION */ + for(i = 0; i < sizeof(rlimits) / sizeof(int); i += 2) getrlimit(rlimits[i], &desc->rlimit[rlimits[i + 1]]); desc->envs_len = envs_len; @@ -2202,12 +2416,20 @@ do_generic_syscall( __dprintf("do_generic_syscall(%ld)\n", w->sr.number); +#ifdef POSTK_DEBUG_TEMP_FIX_75 /* syscall return value check add. */ + ret = syscall(w->sr.number, w->sr.args[0], w->sr.args[1], w->sr.args[2], + w->sr.args[3], w->sr.args[4], w->sr.args[5]); + if (ret == -1) { + ret = -errno; + } +#else /* POSTK_DEBUG_TEMP_FIX_75 */ errno = 0; ret = syscall(w->sr.number, w->sr.args[0], w->sr.args[1], w->sr.args[2], w->sr.args[3], w->sr.args[4], w->sr.args[5]); if (errno != 0) { ret = -errno; } +#endif /* POSTK_DEBUG_TEMP_FIX_75 */ /* Overlayfs /sys/X directory lseek() problem work around */ if (w->sr.number == __NR_lseek && ret == -EINVAL) { @@ -2244,7 +2466,16 @@ do_generic_syscall( } /* Fake that nodeX in /sys/devices/system/node do not exist, * where X >= number of LWK NUMA nodes */ +#ifdef POSTK_DEBUG_ARCH_DEP_55 +# ifdef __aarch64__ +# define __nr_getdents __NR_getdents64 +# else +# define __nr_getdents __NR_getdents +# endif + else if (w->sr.number == __nr_getdents && ret > 0) { +#else /*POSTK_DEBUG_ARCH_DEP_55*/ else if (w->sr.number == __NR_getdents && ret > 0) { +#endif /*POSTK_DEBUG_ARCH_DEP_55*/ struct linux_dirent { long d_ino; off_t d_off; @@ -2327,7 +2558,11 @@ samepage(void *a, void *b) unsigned long aa = (unsigned long)a; unsigned long bb = (unsigned long)b; +#ifdef POSTK_DEBUG_ARCH_DEP_35 + return (aa & page_mask) == (bb & page_mask); +#else /* POSTK_DEBUG_ARCH_DEP_35 */ return (aa & PAGE_MASK) == (bb & PAGE_MASK); +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ } #ifdef DEBUG_UTI @@ -2508,8 +2743,17 @@ create_tracer(void *wp, int mck_tid, unsigned long key) exited++; continue; case __NR_clone: +#ifdef POSTK_DEBUG_ARCH_DEP_78 /* arch dep syscallno hide */ +#ifdef __NR_fork + case __NR_fork: +#endif +#ifdef __NR_vfork + case __NR_vfork: +#endif +#else /* POSTK_DEBUG_ARCH_DEP_78 */ case __NR_fork: case __NR_vfork: +#endif /* POSTK_DEBUG_ARCH_DEP_78 */ case __NR_execve: set_syscall_number(&args, -1); set_syscall_args(tid, &args); @@ -2582,20 +2826,34 @@ util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr) void *param[6]; int rc = 0; +#ifdef POSTK_DEBUG_ARCH_DEP_35 + wp = mmap(NULL, page_size * 3, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); +#else /* POSTK_DEBUG_ARCH_DEP_35 */ wp = mmap(NULL, PAGE_SIZE * 3, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ if (wp == (void *)-1) { rc = -errno; goto out; } +#ifdef POSTK_DEBUG_ARCH_DEP_35 + lctx = (char *)wp + page_size; + rctx = (char *)lctx + page_size; +#else /* POSTK_DEBUG_ARCH_DEP_35 */ lctx = (char *)wp + PAGE_SIZE; rctx = (char *)lctx + PAGE_SIZE; +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ param[0] = (void *)uctx_pa; param[1] = rctx; param[2] = lctx; param[4] = wp; +#ifdef POSTK_DEBUG_ARCH_DEP_35 + param[5] = (void *)(page_size * 3); +#else /* POSTK_DEBUG_ARCH_DEP_35 */ param[5] = (void *)(PAGE_SIZE * 3); +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ if ((rc = ioctl(fd, MCEXEC_UP_UTIL_THREAD1, param)) == -1) { fprintf(stderr, "util_thread1: %d errno=%d\n", rc, errno); rc = -errno; @@ -2625,7 +2883,11 @@ util_thread(unsigned long uctx_pa, int remote_tid, unsigned long pattr) out: if (wp) +#ifdef POSTK_DEBUG_ARCH_DEP_35 + munmap(wp, page_size * 3); +#else /* POSTK_DEBUG_ARCH_DEP_35 */ munmap(wp, PAGE_SIZE * 3); +#endif /* POSTK_DEBUG_ARCH_DEP_35 */ return rc; } @@ -2763,6 +3025,54 @@ chgpath(char *in, char *buf) return fn; } +#ifdef POSTK_DEBUG_ARCH_DEP_72 /* add __NR_newfstat */ +static int +syscall_pathname(int dirfd, char *pathname, size_t size) +{ + int ret = 0; + char *tempbuf = NULL; + size_t tempbuf_size; + + if (pathname[0] == '/') { + goto out; + } + + if (dirfd != AT_FDCWD) { + int len; + char dfdpath[64]; + snprintf(dfdpath, sizeof(dfdpath), "/proc/self/fd/%d", dirfd); + + tempbuf_size = size; + tempbuf = malloc(tempbuf_size); + if (tempbuf == NULL) { + ret = -ENOMEM; + goto out; + } + + ret = readlink(dfdpath, tempbuf, tempbuf_size); + if (ret == -1) { + ret = -errno; + goto out; + } + + len = strlen(pathname); + if (tempbuf_size <= ret + 1 + len + 1) { + ret = -ENAMETOOLONG; + goto out; + } + tempbuf[ret] = '/'; + strncpy(&tempbuf[ret+1], pathname, len+1); + + strcpy(pathname, tempbuf); + } +out: + if (tempbuf) { + free(tempbuf); + } + return ret; +} +#endif /*POSTK_DEBUG_ARCH_DEP_72*/ + int main_loop(struct thread_data_s *my_thread) { struct syscall_wait_desc w; @@ -2795,6 +3105,55 @@ int main_loop(struct thread_data_s *my_thread) my_thread->remote_cpu = w.cpu; switch (w.sr.number) { +#ifdef POSTK_DEBUG_ARCH_DEP_13 /* arch depend hide */ +#ifdef __aarch64__ + case __NR_openat: + /* initialize buffer */ + memset(tmpbuf, '\0', sizeof(tmpbuf)); + memset(pathbuf, '\0', sizeof(pathbuf)); + + /* check argument 1 dirfd */ + if ((int)w.sr.args[0] != AT_FDCWD) { + /* dirfd != AT_FDCWD */ + __dprintf("openat(dirfd != AT_FDCWD)\n"); + snprintf(tmpbuf, sizeof(tmpbuf), "/proc/self/fd/%d", (int)w.sr.args[0]); + ret = readlink(tmpbuf, pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + do_syscall_return(fd, cpu, -errno, 0, 0, 0, 0); + break; + } + __dprintf(" %s -> %s\n", tmpbuf, pathbuf); + ret = do_strncpy_from_user(fd, tmpbuf, (void *)w.sr.args[1], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + strncat(pathbuf, "/", 1); + strncat(pathbuf, tmpbuf, strlen(tmpbuf) + 1); + } else { + /* dirfd == AT_FDCWD */ + __dprintf("openat(dirfd == AT_FDCWD)\n"); + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[1], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + } + __dprintf("openat: %s\n", pathbuf); + + fn = chgpath(pathbuf, tmpbuf); + + ret = open(fn, w.sr.args[2], w.sr.args[3]); + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; +#else /* __aarch64__ */ case __NR_open: ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); if (ret >= PATH_MAX) { @@ -2812,6 +3171,26 @@ int main_loop(struct thread_data_s *my_thread) SET_ERR(ret); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; +#endif /* __aarch64__ */ +#else /* POSTK_DEBUG_ARCH_DEP_13 */ + case __NR_open: + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + __dprintf("open: %s\n", pathbuf); + + fn = chgpath(pathbuf, tmpbuf); + + ret = open(fn, w.sr.args[1], w.sr.args[2]); + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; +#endif /* POSTK_DEBUG_ARCH_DEP_13 */ case __NR_futex: ret = clock_gettime(w.sr.args[1], &tv); @@ -2932,7 +3311,11 @@ gettid_out: break; } +#ifdef POSTK_DEBUG_ARCH_DEP_13 /* arch depend hide */ + case 1079: { +#else /* POSTK_DEBUG_ARCH_DEP_13 */ case __NR_fork: { +#endif /* POSTK_DEBUG_ARCH_DEP_13 */ struct fork_sync *fs; struct fork_sync_container *fsc; struct fork_sync_container *fp; @@ -3192,14 +3575,22 @@ fork_err: goto return_execve1; } +#ifdef POSTK_DEBUG_TEMP_FIX_9 /* shell-script run via execve arg[0] fix */ + if (strlen(shell) >= SHELL_PATH_MAX_LEN) { +#else /* POSTK_DEBUG_TEMP_FIX_9 */ if (strlen(shell_path) >= SHELL_PATH_MAX_LEN) { +#endif /* POSTK_DEBUG_TEMP_FIX_9 */ fprintf(stderr, "execve(): error: shell path too long: %s\n", shell_path); ret = ENAMETOOLONG; goto return_execve1; } /* Let the LWK know the shell interpreter */ +#ifdef POSTK_DEBUG_TEMP_FIX_9 /* shell-script run via execve arg[0] fix */ + strcpy(desc->shell_path, shell); +#else /* POSTK_DEBUG_TEMP_FIX_9 */ strcpy(desc->shell_path, shell_path); +#endif /* POSTK_DEBUG_TEMP_FIX_9 */ } desc->enable_vdso = enable_vdso; @@ -3328,6 +3719,9 @@ return_execve2: } else{ ret = setfsuid(w.sr.args[0]); +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + ret |= (long)gettid() << 32; +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; @@ -3376,6 +3770,9 @@ return_execve2: case __NR_setfsgid: ret = setfsgid(w.sr.args[0]); +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + ret |= (long)gettid() << 32; +#endif /*POSTK_DEBUG_TEMP_FIX_45 */ do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; @@ -3386,7 +3783,57 @@ return_execve2: ret = do_generic_syscall(&w); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; +#ifdef POSTK_DEBUG_ARCH_DEP_36 +#ifdef __aarch64__ + case __NR_readlinkat: + /* initialize buffer */ + memset(tmpbuf, '\0', sizeof(tmpbuf)); + memset(pathbuf, '\0', sizeof(pathbuf)); + /* check argument 1 dirfd */ + if ((int)w.sr.args[0] != AT_FDCWD) { + /* dirfd != AT_FDCWD */ + __dprintf("readlinkat(dirfd != AT_FDCWD)\n"); + snprintf(tmpbuf, sizeof(tmpbuf), "/proc/self/fd/%d", (int)w.sr.args[0]); + ret = readlink(tmpbuf, pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + do_syscall_return(fd, cpu, -errno, 0, 0, 0, 0); + break; + } + __dprintf(" %s -> %s\n", tmpbuf, pathbuf); + ret = do_strncpy_from_user(fd, tmpbuf, (void *)w.sr.args[1], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + strncat(pathbuf, "/", 1); + strncat(pathbuf, tmpbuf, strlen(tmpbuf) + 1); + } else { + /* dirfd == AT_FDCWD */ + __dprintf("readlinkat(dirfd == AT_FDCWD)\n"); + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[1], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + } + __dprintf("readlinkat: %s\n", pathbuf); + + fn = chgpath(pathbuf, tmpbuf); + + ret = readlink(fn, (char *)w.sr.args[2], w.sr.args[3]); + __dprintf("readlinkat: dirfd=%d, path=%s, buf=%s, ret=%ld\n", + (int)w.sr.args[0], fn, (char *)w.sr.args[2], ret); + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; +#else /* __aarch64__ */ case __NR_readlink: ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); if (ret >= PATH_MAX) { @@ -3405,7 +3852,92 @@ return_execve2: SET_ERR(ret); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; +#endif /* __aarch64__ */ +#else /* POSTK_DEBUG_ARCH_DEP_36 */ + case __NR_readlink: + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + fn = chgpath(pathbuf, tmpbuf); + + ret = readlink(fn, (char *)w.sr.args[1], w.sr.args[2]); + __dprintf("readlink: path=%s, buf=%s, ret=%ld\n", + fn, (char *)w.sr.args[1], ret); + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; +#endif /* POSTK_DEBUG_ARCH_DEP_36 */ + +#ifdef POSTK_DEBUG_ARCH_DEP_72 /* add __NR_newfstat */ + case __NR_newfstatat: + /* initialize buffer */ + memset(tmpbuf, '\0', sizeof(tmpbuf)); + memset(pathbuf, '\0', sizeof(pathbuf)); + + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[1], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + + if (pathbuf[0] == '\0') { + // empty string + if ((int)w.sr.args[3] & AT_EMPTY_PATH) { + if ((int)w.sr.args[0] == AT_FDCWD) { + if (NULL == getcwd(pathbuf, PATH_MAX)) { + do_syscall_return(fd, cpu, -errno, 0, 0, 0, 0); + break; + } + } else { + char dfdpath[64]; + snprintf(dfdpath, sizeof(dfdpath), "/proc/self/fd/%d", (int)w.sr.args[0]); + ret = readlink(dfdpath, pathbuf, PATH_MAX); + if (ret == -1) { + do_syscall_return(fd, cpu, -errno, 0, 0, 0, 0); + break; + } + pathbuf[ret] = '\0'; + } + } + } else if (pathbuf[0] != '/') { + // relative path + ret = syscall_pathname((int)w.sr.args[0], pathbuf, PATH_MAX); + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + } + + fn = chgpath(pathbuf, tmpbuf); + if (fn[0] == '/') { + ret = fstatat((int)w.sr.args[0], + fn, + (struct stat*)w.sr.args[2], + (int)w.sr.args[3]); + __dprintf("fstatat: dirfd=%d, pathname=%s, buf=%p, flags=%x, ret=%ld\n", + (int)w.sr.args[0], fn, (void*)w.sr.args[2], (int)w.sr.args[3], ret); + } else { + ret = fstatat((int)w.sr.args[0], + (const char*)w.sr.args[1], + (struct stat*)w.sr.args[2], + (int)w.sr.args[3]); + __dprintf("fstatat: dirfd=%d, pathname=%s, buf=%p, flags=%x, ret=%ld\n", + (int)w.sr.args[0], (char*)w.sr.args[1], (void*)w.sr.args[2], (int)w.sr.args[3], ret); + } + + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; +#ifdef __NR_stat case __NR_stat: ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); if (ret >= PATH_MAX) { @@ -3423,6 +3955,26 @@ return_execve2: SET_ERR(ret); do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); break; +#endif /* __NR_stat */ +#else /* POSTK_DEBUG_ARCH_DEP_72 */ + case __NR_stat: + ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX); + if (ret >= PATH_MAX) { + ret = -ENAMETOOLONG; + } + if (ret < 0) { + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; + } + + fn = chgpath(pathbuf, tmpbuf); + + ret = stat(fn, (struct stat *)w.sr.args[1]); + __dprintf("stat: path=%s, ret=%ld\n", fn, ret); + SET_ERR(ret); + do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); + break; +#endif /* POSTK_DEBUG_ARCH_DEP_72 */ case __NR_sched_setaffinity: if (w.sr.args[0] == 0) { diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index f935e988..2a319005 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -1,3 +1,4 @@ +# Makefile.build.in COPYRIGHT FUJITSU LIMITED 2015-2016 VPATH=@abs_srcdir@ SRC=$(VPATH) IHKDIR=$(IHKBASE)/$(TARGETDIR) @@ -6,22 +7,30 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += rbtree.o OBJS += pager.o +# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. +# OBJS added gencore.o +OBJS += gencore.o DEPSRCS=$(wildcard $(SRC)/*.c) CFLAGS += -I$(SRC)/include -I@abs_builddir@/../ -I@abs_builddir@/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions LDFLAGS += -e arch_start IHKOBJ = ihk/ihk.o +# POSTK_DEBUG_ARCH_DEP_24 +default: all + include $(SRC)/config/config.$(TARGET) include @abs_builddir@/../../ihk/cokernel/Makefile.common # CFLAGS += -I$(SRC)/../arch/$(IHKARCH)/kernel/include -I$(SRC)/../lib/include -SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) SRC=$(SRC) - OBJDUMP ?= objdump OBJCOPY ?= objcopy +# POSTK_DEBUG_ARCH_DEP_26 +#SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) SRC=$(SRC) +SUBCMD_OPTS = TARGET=$(TARGET) O=$(CURDIR)/ihk CC=$(CC) LD=$(LD) OBJCOPY=$(OBJCOPY) SRC=$(SRC) + ld_kern_cmd_base = $(LD) $(LDFLAGS) -o $@.elf $^ mkimage_cmd_base = [ -f $(SRC)/script/mkimage.$(TARGET) ] && CC=$(CC) LD=$(LD) LDFLAGS="$(LDFLAGS_MKIMAGE)" OBJDUMP=$(OBJDUMP) OBJCOPY=$(OBJCOPY) sh $(SRC)/script/mkimage.$(TARGET) '$@.elf' '$@' '$(SRC)' || cp $@.elf $@ diff --git a/kernel/ap.c b/kernel/ap.c index 3601e53d..02c48cee 100644 --- a/kernel/ap.c +++ b/kernel/ap.c @@ -1,3 +1,4 @@ +/* ap.c COPYRIGHT FUJITSU LIMITED 2015 */ /** * \file ap.c * Licence details are found in the file LICENSE. diff --git a/kernel/config/config.smp-arm64.in b/kernel/config/config.smp-arm64.in new file mode 100644 index 00000000..6ce004ec --- /dev/null +++ b/kernel/config/config.smp-arm64.in @@ -0,0 +1,39 @@ +CC = ${CROSS_COMPILE}gcc +LD = ${CROSS_COMPILE}ld +OBJDUMP = ${CROSS_COMPILE}objdump +OBJCOPY = ${CROSS_COMPILE}objcopy + +# ARM64_MEMORY_LAYOUT +# ----+-----------+----------------------- +# # | page size | virtual memory space +# ----+-----------+----------------------- +# 1 | 4KB | 39bit [linux-linaro-tracking, upstream kernel] +# 2 | 4KB | 48bit +# 3 | 64KB | 42bit [CentOS] +# 4 | 64KB | 48bit +# ----+-----------+----------------------- +HOST_DIR=@KDIR@ +HOST_CONFIG=$(HOST_DIR)/.config +HOST_KERNEL_CONFIG_ARM64_64K_PAGES=$(shell grep -E "^CONFIG_ARM64_64K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_64K_PAGES=||g') +HOST_KERNEL_CONFIG_ARM64_VA_BITS=$(shell grep -E "^CONFIG_ARM64_VA_BITS=" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_VA_BITS=||g') + +ifeq ($(HOST_KERNEL_CONFIG_ARM64_64K_PAGES), y) + ifeq ($(HOST_KERNEL_CONFIG_ARM64_VA_BITS), 42) + $(info PAGE_SIZE:64KB VA_BITS:42 PGTABLE_LEVELS:2) + ARM64_MEMORY_LAYOUT=3 + else + $(info PAGE_SIZE:64KB VA_BITS:48, PGTABLE_LEVELS:3) + ARM64_MEMORY_LAYOUT=4 + endif +else + ifeq ($(HOST_KERNEL_CONFIG_ARM64_VA_BITS), 39) + $(info PAGE_SIZE:4KB VA_BITS:39 PGTABLE_LEVELS:3) + ARM64_MEMORY_LAYOUT=1 + else + $(info PAGE_SIZE:4KB VA_BITS:48 PGTABLE_LEVELS:4) + ARM64_MEMORY_LAYOUT=2 + endif +endif + +$(info linker script:smp-arm64_type$(ARM64_MEMORY_LAYOUT).lds) +LDFLAGS += -T $(SRC)/config/smp-arm64_type$(ARM64_MEMORY_LAYOUT).lds diff --git a/kernel/config/smp-arm64_type1.lds b/kernel/config/smp-arm64_type1.lds new file mode 100644 index 00000000..1a5969e6 --- /dev/null +++ b/kernel/config/smp-arm64_type1.lds @@ -0,0 +1,50 @@ +PHDRS +{ + text PT_LOAD FLAGS(5); + data PT_LOAD FLAGS(7); +} +SECTIONS +{ + . = 0xffffffffff800000; /* KERNEL_START */ + _head = .; + + .text : { + *(.text); + } : text + + . = ALIGN(0x1000); + .data : { + *(.data) + *(.data.*) + } :data + .rodata : { + *(.rodata .rodata.*) + } :data + + .vdso : ALIGN(0x1000) { + vdso_page = .; + + . = vdso_page + 0x0000; + *(.vdso.data) + + . = vdso_page + 0x1000; + *(.vdso.text) + + . = ALIGN(0x1000); + } : data = 0xf4 + + .bss : { + *(.bss .bss.*) + } + . = ALIGN(0x1000); + idmap_page_table = .; + . += 0x1000; /* PAGE_SIZE */ + swapper_page_table = .; + . += 0x1000; /* PAGE_SIZE */ + idmap_pg_dir = .; + . += 0x3000; /* IDMAP_DIR_SIZE */ + swapper_pg_dir = .; + . += 0x2000; /* SWAPPER_DIR_SIZE */ + + _end = .; +} diff --git a/kernel/config/smp-arm64_type2.lds b/kernel/config/smp-arm64_type2.lds new file mode 100644 index 00000000..32be18ba --- /dev/null +++ b/kernel/config/smp-arm64_type2.lds @@ -0,0 +1,50 @@ +PHDRS +{ + text PT_LOAD FLAGS(5); + data PT_LOAD FLAGS(7); +} +SECTIONS +{ + . = 0xffffffffff800000; /* KERNEL_START */ + _head = .; + + .text : { + *(.text); + } : text + + . = ALIGN(0x1000); + .data : { + *(.data) + *(.data.*) + } :data + .rodata : { + *(.rodata .rodata.*) + } :data + + .vdso : ALIGN(0x1000) { + vdso_page = .; + + . = vdso_page + 0x0000; + *(.vdso.data) + + . = vdso_page + 0x1000; + *(.vdso.text) + + . = ALIGN(0x1000); + } : data = 0xf4 + + .bss : { + *(.bss .bss.*) + } + . = ALIGN(0x1000); + idmap_page_table = .; + . += 0x1000; /* PAGE_SIZE */ + swapper_page_table = .; + . += 0x1000; /* PAGE_SIZE */ + idmap_pg_dir = .; + . += 0x3000; /* IDMAP_DIR_SIZE */ + swapper_pg_dir = .; + . += 0x3000; /* SWAPPER_DIR_SIZE */ + + _end = .; +} diff --git a/kernel/config/smp-arm64_type3.lds b/kernel/config/smp-arm64_type3.lds new file mode 100644 index 00000000..8a39b1ee --- /dev/null +++ b/kernel/config/smp-arm64_type3.lds @@ -0,0 +1,50 @@ +PHDRS +{ + text PT_LOAD FLAGS(5); + data PT_LOAD FLAGS(7); +} +SECTIONS +{ + . = 0xffffffffe0000000; /* KERNEL_START */ + _head = .; + + .text : { + *(.text); + } : text + + . = ALIGN(0x10000); + .data : { + *(.data) + *(.data.*) + } :data + .rodata : { + *(.rodata .rodata.*) + } :data + + .vdso : ALIGN(0x10000) { + vdso_page = .; + + . = vdso_page + 0x00000; + *(.vdso.data) + + . = vdso_page + 0x10000; + *(.vdso.text) + + . = ALIGN(0x10000); + } : data = 0xf4 + + .bss : { + *(.bss .bss.*) + } + . = ALIGN(0x10000); + idmap_page_table = .; + . += 0x10000; /* PAGE_SIZE */ + swapper_page_table = .; + . += 0x10000; /* PAGE_SIZE */ + idmap_pg_dir = .; + . += 0x30000; /* IDMAP_DIR_SIZE */ + swapper_pg_dir = .; + . += 0x20000; /* SWAPPER_DIR_SIZE */ + + _end = .; +} diff --git a/kernel/config/smp-arm64_type4.lds b/kernel/config/smp-arm64_type4.lds new file mode 100644 index 00000000..11843c44 --- /dev/null +++ b/kernel/config/smp-arm64_type4.lds @@ -0,0 +1,50 @@ +PHDRS +{ + text PT_LOAD FLAGS(5); + data PT_LOAD FLAGS(7); +} +SECTIONS +{ + . = 0xffffffffe0000000; /* KERNEL_START */ + _head = .; + + .text : { + *(.text); + } : text + + . = ALIGN(0x10000); + .data : { + *(.data) + *(.data.*) + } :data + .rodata : { + *(.rodata .rodata.*) + } :data + + .vdso : ALIGN(0x10000) { + vdso_page = .; + + . = vdso_page + 0x00000; + *(.vdso.data) + + . = vdso_page + 0x10000; + *(.vdso.text) + + . = ALIGN(0x10000); + } : data = 0xf4 + + .bss : { + *(.bss .bss.*) + } + . = ALIGN(0x10000); + idmap_page_table = .; + . += 0x10000; /* PAGE_SIZE */ + swapper_page_table = .; + . += 0x10000; /* PAGE_SIZE */ + idmap_pg_dir = .; + . += 0x30000; /* IDMAP_DIR_SIZE */ + swapper_pg_dir = .; + . += 0x30000; /* SWAPPER_DIR_SIZE */ + + _end = .; +} diff --git a/kernel/devobj.c b/kernel/devobj.c index d364b4ed..7b1de867 100644 --- a/kernel/devobj.c +++ b/kernel/devobj.c @@ -1,3 +1,4 @@ +/* devobj.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ /** * \file devobj.c * License details are found in the file LICENSE. @@ -87,7 +88,12 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp int error; struct devobj *obj = NULL; const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE; +#ifdef POSTK_DEBUG_TEMP_FIX_36 + const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); + const size_t pfn_npages = (npages + uintptr_per_page - 1) / uintptr_per_page; +#else const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1; +#endif /*POSTK_DEBUG_TEMP_FIX_36*/ dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off); @@ -168,8 +174,10 @@ static void devobj_release(struct memobj *memobj) struct devobj *obj = to_devobj(memobj); struct devobj *free_obj = NULL; uintptr_t handle; +#ifndef POSTK_DEBUG_TEMP_FIX_36 const size_t pfn_npages = (obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1; +#endif /*!POSTK_DEBUG_TEMP_FIX_36*/ dkprintf("devobj_release(%p %lx)\n", obj, obj->handle); @@ -201,7 +209,13 @@ static void devobj_release(struct memobj *memobj) if (obj->pfn_table) { // Don't call memory_stat_rss_sub() because devobj related pages don't reside in main memory +#ifdef POSTK_DEBUG_TEMP_FIX_36 + const size_t uintptr_per_page = (PAGE_SIZE / sizeof(uintptr_t)); + const size_t pfn_npages = (obj->npages + uintptr_per_page - 1) / uintptr_per_page; ihk_mc_free_pages(obj->pfn_table, pfn_npages); +#else + ihk_mc_free_pages(obj->pfn_table, pfn_npages); +#endif /*POSTK_DEBUG_TEMP_FIX_36*/ } kfree(free_obj); } @@ -258,7 +272,11 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt /* TODO: do an arch dependent PTE to mapping flag conversion * instead of this inline check, also, we rely on having the * same PAT config as Linux here.. */ +#ifdef POSTK_DEBUG_ARCH_DEP_12 + if (pfn_is_write_combined(pfn)) { +#else /* POSTK_DEBUG_ARCH_DEP_12 */ if ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)) { +#endif /* POSTK_DEBUG_ARCH_DEP_12 */ *flag |= VR_WRITE_COMBINED; } diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 0b1ae04e..50bb1dfc 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -1,3 +1,4 @@ +/* fileobj.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ /** * \file fileobj.c * License details are found in the file LICENSE. diff --git a/kernel/gencore.c b/kernel/gencore.c new file mode 100644 index 00000000..8a575ef3 --- /dev/null +++ b/kernel/gencore.c @@ -0,0 +1,499 @@ +/* gencore.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#include +#include +#include +#include +#include +#include +#include + +#define align32(x) ((((x) + 3) / 4) * 4) +#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE)) + +//#define DEBUG_PRINT_GENCORE + +#ifdef DEBUG_PRINT_GENCORE +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf(...) kprintf(__VA_ARGS__) +#endif + +/* + * Generate a core file image, which consists of many chunks. + * Returns an allocated table, an etnry of which is a pair of the address + * of a chunk and its length. + */ + +/** + * \brief Fill the elf header. + * + * \param eh An Elf64_Ehdr structure. + * \param segs Number of segments of the core file. + */ + +void fill_elf_header(Elf64_Ehdr *eh, int segs) +{ + eh->e_ident[EI_MAG0] = 0x7f; + eh->e_ident[EI_MAG1] = 'E'; + eh->e_ident[EI_MAG2] = 'L'; + eh->e_ident[EI_MAG3] = 'F'; + eh->e_ident[EI_CLASS] = ELF_CLASS; + eh->e_ident[EI_DATA] = ELF_DATA; + eh->e_ident[EI_VERSION] = El_VERSION; + eh->e_ident[EI_OSABI] = ELF_OSABI; + eh->e_ident[EI_ABIVERSION] = ELF_ABIVERSION; + + eh->e_type = ET_CORE; + eh->e_machine = ELF_ARCH; + eh->e_version = EV_CURRENT; + eh->e_entry = 0; /* Do we really need this? */ + eh->e_phoff = 64; /* fixed */ + eh->e_shoff = 0; /* no section header */ + eh->e_flags = 0; + eh->e_ehsize = 64; /* fixed */ + eh->e_phentsize = 56; /* fixed */ + eh->e_phnum = segs; + eh->e_shentsize = 0; + eh->e_shnum = 0; + eh->e_shstrndx = 0; +} + +/** + * \brief Return the size of the prstatus entry of the NOTE segment. + * + */ + +int get_prstatus_size(void) +{ + return sizeof(struct note) + align32(sizeof("CORE")) + + align32(sizeof(struct elf_prstatus64)); +} + +/** + * \brief Return the size of the prpsinfo entry of the NOTE segment. + * + */ + +int get_prpsinfo_size(void) +{ + return sizeof(struct note) + align32(sizeof("CORE")) + + align32(sizeof(struct elf_prpsinfo64)); +} + +/** + * \brief Fill a prstatus structure. + * + * \param head A pointer to a note structure. + * \param proc A pointer to the current process structure. + * \param regs0 A pointer to a ihk_mc_user_context_t structure. + */ +void fill_prstatus(struct note *head, struct thread *thread, void *regs0) +{ + void *name; + struct elf_prstatus64 *prstatus; + + head->namesz = sizeof("CORE"); + head->descsz = sizeof(struct elf_prstatus64); + head->type = NT_PRSTATUS; + name = (void *) (head + 1); + memcpy(name, "CORE", sizeof("CORE")); + prstatus = (struct elf_prstatus64 *)(name + align32(sizeof("CORE"))); + + arch_fill_prstatus(prstatus, thread, regs0); +} + +/** + * \brief Fill a prpsinfo structure. + * + * \param head A pointer to a note structure. + * \param proc A pointer to the current process structure. + * \param regs A pointer to a ihk_mc_user_context_t structure. + */ + +void fill_prpsinfo(struct note *head, struct thread *thread, void *regs) +{ + void *name; + struct elf_prpsinfo64 *prpsinfo; + + head->namesz = sizeof("CORE"); + head->descsz = sizeof(struct elf_prpsinfo64); + head->type = NT_PRPSINFO; + name = (void *) (head + 1); + memcpy(name, "CORE", sizeof("CORE")); + prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE"))); + + prpsinfo->pr_state = thread->status; + prpsinfo->pr_pid = thread->proc->pid; + +/* + We leave most of the fields unfilled. + + char pr_sname; + char pr_zomb; + char pr_nice; + a8_uint64_t pr_flag; + unsigned int pr_uid; + unsigned int pr_gid; + int pr_ppid, pr_pgrp, pr_sid; + char pr_fname[16]; + char pr_psargs[ELF_PRARGSZ]; +*/ +} + +/** + * \brief Return the size of the AUXV entry of the NOTE segment. + * + */ + +int get_auxv_size(void) +{ + return sizeof(struct note) + align32(sizeof("CORE")) + + sizeof(unsigned long) * AUXV_LEN; +} + +/** + * \brief Fill an AUXV structure. + * + * \param head A pointer to a note structure. + * \param proc A pointer to the current process structure. + * \param regs A pointer to a ihk_mc_user_context_t structure. + */ + +void fill_auxv(struct note *head, struct thread *thread, void *regs) +{ + void *name; + void *auxv; + + head->namesz = sizeof("CORE"); + head->descsz = sizeof(unsigned long) * AUXV_LEN; + head->type = NT_AUXV; + name = (void *) (head + 1); + memcpy(name, "CORE", sizeof("CORE")); + auxv = name + align32(sizeof("CORE")); + memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN); +} + +/** + * \brief Return the size of the whole NOTE segment. + * + */ + +int get_note_size(void) +{ + return get_prstatus_size() + get_prpsinfo_size() + + get_auxv_size(); +} + +/** + * \brief Fill the NOTE segment. + * + * \param head A pointer to a note structure. + * \param proc A pointer to the current process structure. + * \param regs A pointer to a ihk_mc_user_context_t structure. + */ + +void fill_note(void *note, struct thread *thread, void *regs) +{ + fill_prstatus(note, thread, regs); + note += get_prstatus_size(); + fill_prpsinfo(note, thread, regs); + note += get_prpsinfo_size(); + fill_auxv(note, thread, regs); +} + +/** + * \brief Generate an image of the core file. + * + * \param proc A pointer to the current process structure. + * \param regs A pointer to a ihk_mc_user_context_t structure. + * \param coretable(out) An array of core chunks. + * \param chunks(out) Number of the entires of coretable. + * + * A core chunk is represented by a pair of a physical + * address of memory region and its size. If there are + * no corresponding physical address for a VM area + * (an unallocated demand-paging page, e.g.), the address + * should be zero. + */ + +int gencore(struct thread *thread, void *regs, + struct coretable **coretable, int *chunks) +{ + struct coretable *ct = NULL; +#ifdef POSTK_DEBUG_TEMP_FIX_39 + Elf64_Ehdr *eh = NULL; +#else + Elf64_Ehdr eh; +#endif /*POSTK_DEBUG_TEMP_FIX_39*/ + Elf64_Phdr *ph = NULL; + void *note = NULL; + struct vm_range *range; + struct process_vm *vm = thread->vm; + int segs = 1; /* the first one is for NOTE */ + int notesize, phsize, alignednotesize; + unsigned int offset = 0; + int i; + + *chunks = 3; /* Elf header , header table and NOTE segment */ + + if (vm == NULL) { + dkprintf("no vm found.\n"); + return -1; + } + + list_for_each_entry(range, &vm->vm_range_list, list) { + dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", + range->start, range->end, range->flag, range->objoff); + /* We omit reserved areas because they are only for + mckernel's internal use. */ + if (range->flag & VR_RESERVED) + continue; + /* We need a chunk for each page for a demand paging area. + This can be optimized for spacial complexity but we would + lose simplicity instead. */ + if (range->flag & VR_DEMAND_PAGING) { + unsigned long p, phys; + int prevzero = 0; + for (p = range->start; p < range->end; p += PAGE_SIZE) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, + (void *)p, &phys) != 0) { + prevzero = 1; + } else { + if (prevzero == 1) + (*chunks)++; + (*chunks)++; + prevzero = 0; + } + } + if (prevzero == 1) + (*chunks)++; + } else { + (*chunks)++; + } + segs++; + } + dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks); + + { + struct vm_regions region = thread->vm->region; + + dkprintf("text: %lx-%lx\n", region.text_start, region.text_end); + dkprintf("data: %lx-%lx\n", region.data_start, region.data_end); + dkprintf("brk: %lx-%lx\n", region.brk_start, region.brk_end); + dkprintf("map: %lx-%lx\n", region.map_start, region.map_end); + dkprintf("stack: %lx-%lx\n", region.stack_start, region.stack_end); + dkprintf("user: %lx-%lx\n\n", region.user_start, region.user_end); + } + + dkprintf("now generate a core file image\n"); + +#ifdef POSTK_DEBUG_TEMP_FIX_39 + eh = kmalloc(sizeof(*eh), IHK_MC_AP_NOWAIT); + if (eh == NULL) { + dkprintf("could not alloc a elf header table.\n"); + goto fail; + } +#ifdef POSTK_DEBUG_TEMP_FIX_63 /* Add core table and elf header initialization */ + memset(eh, 0, sizeof(*eh)); +#endif /* POSTK_DEBUG_TEMP_FIX_63 */ + + offset += sizeof(*eh); + fill_elf_header(eh, segs); +#else + offset += sizeof(eh); + fill_elf_header(&eh, segs); +#endif /* POSTK_DEBUG_TEMP_FIX_39 */ + + /* program header table */ + phsize = sizeof(Elf64_Phdr) * segs; + ph = kmalloc(phsize, IHK_MC_AP_NOWAIT); + if (ph == NULL) { + dkprintf("could not alloc a program header table.\n"); + goto fail; + } + memset(ph, 0, phsize); + + offset += phsize; + + /* NOTE segment + * To align the next segment page-sized, we prepare a padded + * region for our NOTE segment. + */ + notesize = get_note_size(); + alignednotesize = alignpage(notesize + offset) - offset; + note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT); + if (note == NULL) { + dkprintf("could not alloc NOTE for core.\n"); + goto fail; + } + memset(note, 0, alignednotesize); + fill_note(note, thread, regs); + + /* prgram header for NOTE segment is exceptional */ + ph[0].p_type = PT_NOTE; + ph[0].p_flags = 0; + ph[0].p_offset = offset; + ph[0].p_vaddr = 0; + ph[0].p_paddr = 0; + ph[0].p_filesz = notesize; + ph[0].p_memsz = notesize; + ph[0].p_align = 0; + + offset += alignednotesize; + + /* program header for each memory chunk */ + i = 1; + list_for_each_entry(range, &vm->vm_range_list, list) { + unsigned long flag = range->flag; + unsigned long size = range->end - range->start; + + if (range->flag & VR_RESERVED) + continue; + + ph[i].p_type = PT_LOAD; + ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0) + | ((flag & VR_PROT_WRITE) ? PF_W : 0) + | ((flag & VR_PROT_EXEC) ? PF_X : 0); + ph[i].p_offset = offset; + ph[i].p_vaddr = range->start; + ph[i].p_paddr = 0; + ph[i].p_filesz = size; + ph[i].p_memsz = size; + ph[i].p_align = PAGE_SIZE; + i++; + offset += size; + } + + /* coretable to send to host */ + ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT); + if (!ct) { + dkprintf("could not alloc a coretable.\n"); + goto fail; + } +#ifdef POSTK_DEBUG_TEMP_FIX_63 /* Add core table and elf header initialization */ + memset(ct, 0, sizeof(*ct)); +#endif /* POSTK_DEBUG_TEMP_FIX_63 */ + +#ifdef POSTK_DEBUG_TEMP_FIX_39 + ct[0].addr = virt_to_phys(eh); /* ELF header */ + ct[0].len = 64; + dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, eh); +#else + ct[0].addr = virt_to_phys(&eh); /* ELF header */ + ct[0].len = 64; + dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, &eh); +#endif /* POSTK_DEBUG_TEMP_FIX_39 */ + + ct[1].addr = virt_to_phys(ph); /* program header table */ + ct[1].len = phsize; + dkprintf("coretable[1]: %lx@%lx(%lx)\n", ct[1].len, ct[1].addr, ph); + + ct[2].addr = virt_to_phys(note); /* NOTE segment */ + ct[2].len = alignednotesize; + dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note); + + i = 3; /* memory segments */ + list_for_each_entry(range, &vm->vm_range_list, list) { + unsigned long phys; + + if (range->flag & VR_RESERVED) + continue; + if (range->flag & VR_DEMAND_PAGING) { + /* Just an ad hoc kluge. */ + unsigned long p, start, phys; + int prevzero = 0; + unsigned long size = 0; + + for (start = p = range->start; + p < range->end; p += PAGE_SIZE) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, + (void *)p, &phys) != 0) { + if (prevzero == 0) { + /* We begin a new chunk */ + size = PAGE_SIZE; + start = p; + } else { + /* We extend the previous chunk */ + size += PAGE_SIZE; + } + prevzero = 1; + } else { + if (prevzero == 1) { + /* Flush out an empty chunk */ + ct[i].addr = 0; + ct[i].len = size; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, start); + i++; + + } + ct[i].addr = phys; + ct[i].len = PAGE_SIZE; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, p); + i++; + prevzero = 0; + } + } + if (prevzero == 1) { + /* An empty chunk */ + ct[i].addr = 0; + ct[i].len = size; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, start); + i++; + } + } else { + if ((thread->vm->region.user_start <= range->start) && + (range->end <= thread->vm->region.user_end)) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, + (void *)range->start, &phys) != 0) { + dkprintf("could not convert user virtual address %lx" + "to physical address", range->start); + goto fail; + } + } else { + phys = virt_to_phys((void *)range->start); + } + ct[i].addr = phys; + ct[i].len = range->end - range->start; + dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i, + ct[i].len, ct[i].addr, range->start); + i++; + } + } + *coretable = ct; + + return 0; + + fail: + if (ct) + kfree(ct); + if (ph) + kfree(ph); + if (note) + kfree(note); + return -1; +} + +/** + * \brief Free all the allocated spaces for an image of the core file. + * + * \param coretable An array of core chunks. + */ + +void freecore(struct coretable **coretable) +{ + struct coretable *ct = *coretable; + kfree(phys_to_virt(ct[2].addr)); /* NOTE segment */ + kfree(phys_to_virt(ct[1].addr)); /* ph */ +#ifdef POSTK_DEBUG_TEMP_FIX_39 + kfree(phys_to_virt(ct[0].addr)); /* eh */ +#endif /*POSTK_DEBUG_TEMP_FIX_39*/ + kfree(*coretable); +} + +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/kernel/host.c b/kernel/host.c index 29ef63df..9762f3d0 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -1,3 +1,4 @@ +/* host.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ /** * \file host.c * License details are found in the file LICENSE. diff --git a/kernel/include/auxvec.h b/kernel/include/auxvec.h index ec44024a..b9ea4295 100644 --- a/kernel/include/auxvec.h +++ b/kernel/include/auxvec.h @@ -1,3 +1,4 @@ +/* auxvec.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef _LINUX_AUXVEC_H #define _LINUX_AUXVEC_H diff --git a/kernel/include/elfcore.h b/kernel/include/elfcore.h new file mode 100644 index 00000000..e8de229f --- /dev/null +++ b/kernel/include/elfcore.h @@ -0,0 +1,119 @@ +/* elfcore.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#ifndef __HEADER_ELFCORE_H +#define __HEADER_ELFCORE_H + +/* + * Structures and definitions for ELF core file. + * Extracted from + * System V Application Binary Interface - DRAFT - 10 June 2013, + * http://www.sco.com/developers/gabi/latest/contents.html + */ +#include + +typedef uint16_t Elf64_Half; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Xword; +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; + +#define EI_NIDENT 16 + +typedef struct { + unsigned char e_ident[EI_NIDENT]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* e_ident table defined. */ +/* offset */ +#define EI_MAG0 0 +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_ABIVERSION 8 +#define EI_PAD 9 + +/* EI_MAG */ +#define ELFMAG0 0x7f +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' + +/* EI_CLASS */ +#define ELFCLASS64 2 /* 64-bit object */ + +/* EI_DATA */ +#define ELFDATA2LSB 1 /* LSB */ +#define ELFDATA2MSB 2 /* MSB */ + +/* EI_VERSION */ +#define El_VERSION 1 /* defined to be the same as EV CURRENT */ +#define EV_CURRENT 1 /* Current version */ + +/* EI_OSABI */ +#define ELFOSABI_NONE 0 /* unspecied */ + +/* EI_ABIVERSION */ +#define El_ABIVERSION_NONE 0 /* unspecied */ + +/* e_type defined */ +#define ET_CORE 4 /* Core file */ + +typedef struct { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; + Elf64_Addr p_vaddr; + Elf64_Addr p_paddr; + Elf64_Xword p_filesz; + Elf64_Xword p_memsz; + Elf64_Xword p_align; +} Elf64_Phdr; + +#define PT_LOAD 1 +#define PT_NOTE 4 + +#define PF_X 1 /* executable bit */ +#define PF_W 2 /* writable bit */ +#define PF_R 4 /* readable bit */ + +struct note { + Elf64_Word namesz; + Elf64_Word descsz; + Elf64_Word type; + /* name char[namesz] and desc[descsz] */ +}; + +#define NT_PRSTATUS 1 +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#define NT_PRFPREG 2 +#else /* POSTK_DEBUG_ARCH_DEP_18 */ +#define NT_PRFRPREG 2 +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ +#define NT_PRPSINFO 3 +#define NT_AUXV 6 + +#include "elfcoregpl.h" + +/* functions */ +struct thread; +extern void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0); + +#endif /* __HEADER_ELFCORE_H */ +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/kernel/include/elfcoregpl.h b/kernel/include/elfcoregpl.h new file mode 100644 index 00000000..2e59fb9e --- /dev/null +++ b/kernel/include/elfcoregpl.h @@ -0,0 +1,67 @@ +/* elfcoregpl.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#ifndef __HEADER_ELFCOREGPL_H +#define __HEADER_ELFCOREGPL_H + +#include + +/* From /usr/include/linux/elfcore.h of Linux */ + +#define ELF_PRARGSZ (80) + +/* From /usr/include/linux/elfcore.h fro Linux */ + +struct elf_siginfo +{ + int si_signo; + int si_code; + int si_errno; +}; + +/* From bfd/hosts/x86-64linux.h of gdb. */ + +typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t; +typedef a8_uint64_t elf_greg64_t; + +#include + +struct prstatus64_timeval +{ + a8_uint64_t tv_sec; + a8_uint64_t tv_usec; +}; + +struct elf_prstatus64 +{ + struct elf_siginfo pr_info; + short int pr_cursig; + a8_uint64_t pr_sigpend; + a8_uint64_t pr_sighold; + pid_t pr_pid; + pid_t pr_ppid; + pid_t pr_pgrp; + pid_t pr_sid; + struct prstatus64_timeval pr_utime; + struct prstatus64_timeval pr_stime; + struct prstatus64_timeval pr_cutime; + struct prstatus64_timeval pr_cstime; + elf_gregset64_t pr_reg; + int pr_fpvalid; +}; + +struct elf_prpsinfo64 +{ + char pr_state; + char pr_sname; + char pr_zomb; + char pr_nice; + a8_uint64_t pr_flag; + unsigned int pr_uid; + unsigned int pr_gid; + int pr_pid, pr_ppid, pr_pgrp, pr_sid; + char pr_fname[16]; + char pr_psargs[ELF_PRARGSZ]; +}; + +#endif /* __HEADER_ELFCOREGPL_H */ +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ diff --git a/kernel/include/futex.h b/kernel/include/futex.h index a0239b79..00ab92f9 100644 --- a/kernel/include/futex.h +++ b/kernel/include/futex.h @@ -1,3 +1,4 @@ +/* futex.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ /** * \file futex.h * Licence details are found in the file LICENSE. @@ -116,6 +117,8 @@ #include #endif +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ +#else static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) { int op = (encoded_op >> 28) & 7; @@ -180,6 +183,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } +#endif /* arch depend hide */ #endif // __KERNEL__ #endif // _ASM_X86_FUTEX_H diff --git a/kernel/include/lwk/compiler.h b/kernel/include/lwk/compiler.h index 533a0bfa..89c08d76 100644 --- a/kernel/include/lwk/compiler.h +++ b/kernel/include/lwk/compiler.h @@ -38,6 +38,17 @@ extern void __chk_io_ptr(void __iomem *); #ifdef __KERNEL__ +#ifdef POSTK_DEBUG_ARCH_DEP_72 +#if __GNUC__ > 5 +#error no compiler-gcc.h file for this gcc version +#elif __GNUC__ == 5 +# include +#elif __GNUC__ == 4 +# include +#else +# error Sorry, your compiler is too old/not recognized. +#endif +#else /* POSTK_DEBUG_ARCH_DEP_72 */ #if __GNUC__ > 4 #error no compiler-gcc.h file for this gcc version #elif __GNUC__ == 4 @@ -45,6 +56,7 @@ extern void __chk_io_ptr(void __iomem *); #else # error Sorry, your compiler is too old/not recognized. #endif +#endif /* POSTK_DEBUG_ARCH_DEP_72 */ /* * Generic compiler-dependent macros required for kernel diff --git a/kernel/include/memobj.h b/kernel/include/memobj.h index 496e3d4a..ad649db0 100644 --- a/kernel/include/memobj.h +++ b/kernel/include/memobj.h @@ -1,3 +1,4 @@ +/* memobj.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ /** * \file memobj.h * License details are found in the file LICENSE. @@ -19,6 +20,8 @@ #include #include +#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */ +#else /* POSTK_DEBUG_ARCH_DEP_18 */ /* begin types.h */ typedef int32_t key_t; typedef uint32_t uid_t; @@ -26,6 +29,7 @@ typedef uint32_t gid_t; typedef int64_t time_t; typedef int32_t pid_t; /* end types.h */ +#endif /* POSTK_DEBUG_ARCH_DEP_18 */ enum { /* for memobj.flags */ diff --git a/kernel/include/process.h b/kernel/include/process.h index 495e3a26..067e51b7 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -9,6 +9,7 @@ /* * HISTORY */ +/* process.h COPYRIGHT FUJITSU LIMITED 2015-2017 */ #ifndef HEADER_PROCESS_H #define HEADER_PROCESS_H @@ -66,6 +67,10 @@ #define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */ #define PS_STOPPING 0x80 #define PS_TRACING 0x100 +#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */ +#define PS_DELAY_STOPPED 0x200 +#define PS_DELAY_TRACED 0x400 +#endif /* POSTK_DEBUG_TEMP_FIX_41 */ #define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE) @@ -135,6 +140,10 @@ #define WEXITED 0x00000004 #define WCONTINUED 0x00000008 #define WNOWAIT 0x01000000 /* Don't reap, just poll status. */ + +#ifdef POSTK_DEBUG_ARCH_DEP_44 /* wait() add support __WALL */ +#define __WALL 0x40000000 /* Wait on all children, regardless of type */ +#endif /* POSTK_DEBUG_ARCH_DEP_44 */ #define __WCLONE 0x80000000 /* idtype */ diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 5fb6d9cf..491caecd 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -9,6 +9,7 @@ /* * HISTORY */ +/* syscall.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef __HEADER_SYSCALL_H #define __HEADER_SYSCALL_H @@ -346,13 +347,25 @@ enum { #undef SYSCALL_DELEGATED #define __NR_coredump 999 /* pseudo syscall for coredump */ +#ifdef POSTK_DEBUG_TEMP_FIX_61 /* Core table size and lseek return value to loff_t */ +struct coretable { /* table entry for a core chunk */ + off_t len; /* length of the chunk */ + unsigned long addr; /* physical addr of the chunk */ +}; +#else /* POSTK_DEBUG_TEMP_FIX_61 */ struct coretable { /* table entry for a core chunk */ int len; /* length of the chunk */ unsigned long addr; /* physical addr of the chunk */ }; +#endif /* POSTK_DEBUG_TEMP_FIX_61 */ +#ifdef POSTK_DEBUG_TEMP_FIX_1 +void create_proc_procfs_files(int pid, int tid, int cpuid); +void delete_proc_procfs_files(int pid, int tid); +#else /* POSTK_DEBUG_TEMP_FIX_1 */ void create_proc_procfs_files(int pid, int cpuid); void delete_proc_procfs_files(int pid); +#endif /* POSTK_DEBUG_TEMP_FIX_1 */ void create_os_procfs_files(void); void delete_os_procfs_files(void); @@ -452,6 +465,7 @@ int arch_setup_vdso(void); int arch_cpu_read_write_register(struct ihk_os_cpu_register *desc, enum mcctrl_os_cpu_operation op); +#ifndef POSTK_DEBUG_ARCH_DEP_52 #define VDSO_MAXPAGES 2 struct vdso { long busy; @@ -468,6 +482,7 @@ struct vdso { void *pvti_virt; long pvti_phys; }; +#endif /*POSTK_DEBUG_ARCH_DEP_52*/ struct cpu_mapping { int cpu_number; diff --git a/kernel/include/xpmem.h b/kernel/include/xpmem.h index e69d233a..b41fd063 100644 --- a/kernel/include/xpmem.h +++ b/kernel/include/xpmem.h @@ -1,3 +1,4 @@ +/* xpmem.h COPYRIGHT FUJITSU LIMITED 2017 */ /** * \file xpmem.h * License details are found in the file LICENSE. @@ -16,7 +17,11 @@ #define XPMEM_DEV_PATH "/dev/xpmem" +#if defined(POSTK_DEBUG_ARCH_DEP_46) || defined(POSTK_DEBUG_ARCH_DEP_62) +extern int xpmem_open(int, const char*, int, ihk_mc_user_context_t *ctx); +#else /* POSTK_DEBUG_ARCH_DEP_46 || POSTK_DEBUG_ARCH_DEP_62 */ extern int xpmem_open(ihk_mc_user_context_t *ctx); +#endif /* POSTK_DEBUG_ARCH_DEP_46 || POSTK_DEBUG_ARCH_DEP_62 */ extern int xpmem_remove_process_memory_range(struct process_vm *vm, struct vm_range *vmr); extern int xpmem_fault_process_memory_range(struct process_vm *vm, diff --git a/kernel/init.c b/kernel/init.c index 091b9122..f4f9aec9 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -244,10 +244,34 @@ static void time_init(void) return; } +#ifdef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */ +void monitor_init(void) +#else /* POSTK_DEBUG_TEMP_FIX_73 */ static void monitor_init() +#endif /* POSTK_DEBUG_TEMP_FIX_73 */ { int z; unsigned long phys; +#ifdef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */ + const struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info(); + + if (!cpu_info) { + panic("PANIC: in monitor_init() ihk_mc_cpu_info is NULL."); + return; + } + + z = sizeof(struct ihk_os_monitor) + + sizeof(struct ihk_os_cpu_monitor) * (cpu_info->ncpus - 1); + z = (z + PAGE_SIZE -1) >> PAGE_SHIFT; + monitor = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL); + memset(monitor, 0, z * PAGE_SIZE); + monitor->num_processors = (cpu_info->ncpus - 1); + monitor->ns_per_tsc = ihk_mc_get_ns_per_tsc(); + phys = virt_to_phys(monitor); + ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) + + sizeof(struct ihk_os_cpu_monitor) * (cpu_info->ncpus - 1)); + return; +#else /* POSTK_DEBUG_TEMP_FIX_73 */ z = sizeof(struct ihk_os_monitor) + sizeof(struct ihk_os_cpu_monitor) * num_processors; @@ -259,6 +283,7 @@ static void monitor_init() phys = virt_to_phys(monitor); ihk_set_monitor(phys, sizeof(struct ihk_os_monitor) + sizeof(struct ihk_os_cpu_monitor) * num_processors); +#endif /* POSTK_DEBUG_TEMP_FIX_73 */ } int nmi_mode; @@ -282,7 +307,9 @@ static void rest_init(void) //pc_test(); ap_init(); +#ifndef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */ monitor_init(); +#endif /* !POSTK_DEBUG_TEMP_FIX_73 */ cpu_local_var_init(); nmi_init(); time_init(); diff --git a/kernel/listeners.c b/kernel/listeners.c index 50be56e8..7dad945a 100644 --- a/kernel/listeners.c +++ b/kernel/listeners.c @@ -1,3 +1,4 @@ +/* listeners.c COPYRIGHT FUJITSU LIMITED 2015 */ /** * \file listeners.c * License details are found in the file LICENSE. diff --git a/kernel/mem.c b/kernel/mem.c index 63d66b29..601991b8 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -1,3 +1,4 @@ +/* mem.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ /** * \file mem.c * License details are found in the file LICENSE. @@ -867,6 +868,12 @@ void coredump(struct thread *thread, void *regs) struct coretable *coretable; int chunks; +#ifdef POSTK_DEBUG_ARCH_DEP_67 /* use limit corefile size. (temporarily fix.) */ + if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) { + return; + } +#endif /* POSTK_DEBUG_ARCH_DEP_67 */ + ret = gencore(thread, regs, &coretable, &chunks); if (ret != 0) { dkprintf("could not generate a core file image\n"); @@ -885,6 +892,7 @@ void coredump(struct thread *thread, void *regs) freecore(&coretable); } +#ifndef POSTK_DEBUG_ARCH_DEP_8 void remote_flush_tlb_cpumask(struct process_vm *vm, unsigned long addr, int cpu_id) { @@ -941,8 +949,14 @@ void remote_flush_tlb_array_cpumask(struct process_vm *vm, dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n", flush_ind, addr, cpu); +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + /* TODO(pka_idke) Interim support */ + ihk_mc_interrupt_cpu(cpu, + ihk_mc_get_vector(flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id, flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ } #ifdef DEBUG_IC_TLB @@ -979,6 +993,7 @@ void remote_flush_tlb_array_cpumask(struct process_vm *vm, ihk_mc_spinlock_unlock_noirq(&flush_entry->lock); } +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ void tlb_flush_handler(int vector) { @@ -1545,11 +1560,18 @@ void ihk_mc_unmap_virtual(void *va, int npages, int free_physical) for (i = 0; i < npages; i++) { ihk_mc_pt_clear_page(NULL, (char *)va + (i << PAGE_SHIFT)); } +#ifdef POSTK_DEBUG_TEMP_FIX_42 /* add unmap virtual tlb flush. */ + flush_tlb(); +#endif /* POSTK_DEBUG_TEMP_FIX_42 */ +#ifdef POSTK_DEBUG_TEMP_FIX_51 /* ihk_mc_unmap_virtual() free_physical disabled */ + ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages); +#else /* POSTK_DEBUG_TEMP_FIX_51 */ if (free_physical) { ihk_pagealloc_free(vmap_allocator, (unsigned long)va, npages); flush_tlb_single((unsigned long)va); } +#endif /* POSTK_DEBUG_TEMP_FIX_51 */ } #ifdef ATTACHED_MIC @@ -1604,8 +1626,14 @@ void ihk_mc_clean_micpa(void){ } #endif +#ifdef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */ +extern void monitor_init(void); +#endif /* POSTK_DEBUG_TEMP_FIX_73 */ void mem_init(void) { +#ifdef POSTK_DEBUG_TEMP_FIX_73 /* NULL access for *monitor fix */ + monitor_init(); +#endif /* !POSTK_DEBUG_TEMP_FIX_73 */ /* Initialize NUMA information and memory allocator bitmaps */ numa_init(); @@ -1944,10 +1972,17 @@ static void ___kmalloc_insert_chunk(struct list_head *free_list, if (next_chunk) { list_add_tail(&chunk->list, &next_chunk->list); } +#ifdef POSTK_DEBUG_TEMP_FIX_46 /* kmalloc free_list consolidate bug fix. */ + /* Add tail */ + else { + list_add_tail(&chunk->list, free_list); + } +#else /* POSTK_DEBUG_TEMP_FIX_46 */ /* Add after the head */ else { list_add(&chunk->list, free_list); } +#endif /* POSTK_DEBUG_TEMP_FIX_46 */ return; } @@ -2128,3 +2163,81 @@ void ___kmalloc_print_free_list(struct list_head *list) kprintf_unlock(irqflags); } +#ifdef POSTK_DEBUG_ARCH_DEP_27 +int search_free_space(struct thread *thread, size_t len, intptr_t hint, + int pgshift, intptr_t *addrp) +{ + struct vm_regions *region = &thread->vm->region; + intptr_t addr; + int error; + struct vm_range *range; + size_t pgsize = (size_t)1 << pgshift; + + dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp); + + addr = hint; + for (;;) { + addr = (addr + pgsize - 1) & ~(pgsize - 1); + if ((region->user_end <= addr) + || ((region->user_end - len) < addr)) { + ekprintf("search_free_space(%lx,%lx,%p):" + "no space. %lx %lx\n", + len, hint, addrp, addr, + region->user_end); + error = -ENOMEM; + goto out; + } + + range = lookup_process_memory_range(thread->vm, addr, addr+len); + if (range == NULL) { + break; + } + addr = range->end; + } + + error = 0; + *addrp = addr; + +out: + dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n", + len, hint, pgshift, addrp, error, addr); + return error; +} +#endif /* POSTK_DEBUG_ARCH_DEP_27 */ + +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* supports NUMA for memory area determination */ +#ifdef IHK_RBTREE_ALLOCATOR +int is_mckernel_memory(unsigned long phys) +{ + int i; + + for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { + unsigned long start, end; + int numa_id; + + ihk_mc_get_memory_chunk(i, &start, &end, &numa_id); + if (start <= phys && phys < end) { + return 1; + } + } + return 0; +} +#else /* IHK_RBTREE_ALLOCATOR */ +int is_mckernel_memory(unsigned long phys) +{ + int i; + + for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { + struct ihk_page_allocator_desc *pa_allocator; + + list_for_each_entry(pa_allocator, + &memory_nodes[i].allocators, list) { + if (pa_allocator->start <= phys && phys < pa_allocator->end) { + return 1; + } + } + } + return 0; +} +#endif /* IHK_RBTREE_ALLOCATOR */ +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ diff --git a/kernel/process.c b/kernel/process.c index f34a5100..517be6e9 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -1,3 +1,4 @@ +/* process.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ /** * \file process.c * License details are found in the file LICENSE. @@ -29,6 +30,9 @@ #include #include #include +#ifdef POSTK_DEBUG_ARCH_DEP_65 +#include +#endif /* POSTK_DEBUG_ARCH_DEP_65 */ #include #include #include @@ -45,6 +49,9 @@ #define ekprintf(...) kprintf(__VA_ARGS__) #endif +#ifdef POSTK_DEBUG_ARCH_DEP_22 +extern struct thread *arch_switch_context(struct thread *prev, struct thread *next); +#endif /* POSTK_DEBUG_ARCH_DEP_22 */ extern long alloc_debugreg(struct thread *proc); extern void save_debugreg(unsigned long *debugreg); extern void restore_debugreg(unsigned long *debugreg); @@ -66,8 +73,10 @@ int ptrace_detach(int pid, int data); extern unsigned long do_kill(struct thread *, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern void procfs_create_thread(struct thread *); extern void procfs_delete_thread(struct thread *); +#ifndef POSTK_DEBUG_ARCH_DEP_22 extern void perf_start(struct mc_perf_event *event); extern void perf_reset(struct mc_perf_event *event); +#endif /* !POSTK_DEBUG_ARCH_DEP_22 */ struct list_head resource_set_list; mcs_rwlock_lock_t resource_set_lock; @@ -95,8 +104,18 @@ init_process(struct process *proc, struct process *parent) proc->mpol_threshold = parent->mpol_threshold; memcpy(proc->rlimit, parent->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); +#ifdef POSTK_DEBUG_TEMP_FIX_69 /* Fix problem not to inherit parent cpu_set. */ + memcpy(&proc->cpu_set, &parent->cpu_set, + sizeof(proc->cpu_set)); +#endif /* POSTK_DEBUG_TEMP_FIX_69 */ } +#ifdef POSTK_DEBUG_ARCH_DEP_63 /* struct process member initialize add */ + INIT_LIST_HEAD(&proc->hash_list); + INIT_LIST_HEAD(&proc->siblings_list); + INIT_LIST_HEAD(&proc->ptraced_siblings_list); + mcs_rwlock_init(&proc->update_lock); +#endif /* POSTK_DEBUG_ARCH_DEP_63 */ INIT_LIST_HEAD(&proc->threads_list); INIT_LIST_HEAD(&proc->children_list); INIT_LIST_HEAD(&proc->ptraced_children_list); @@ -370,6 +389,9 @@ clone_thread(struct thread *org, unsigned long pc, unsigned long sp, /* NOTE: sp is the user mode stack! */ ihk_mc_init_user_process(&thread->ctx, &thread->uctx, ((char *)thread) + KERNEL_STACK_NR_PAGES * PAGE_SIZE, pc, sp); +#ifdef POSTK_DEBUG_ARCH_DEP_23 /* add arch dep. clone_process() function */ + arch_clone_thread(org, pc, sp, thread); +#endif /* POSTK_DEBUG_ARCH_DEP_23 */ memcpy(thread->uctx, org->uctx, sizeof(*org->uctx)); ihk_mc_modify_user_context(thread->uctx, IHK_UCR_STACK_POINTER, sp); @@ -1793,8 +1815,30 @@ retry: } dkprintf("%s: cow,copying virt:%lx<-%lx,phys:%lx<-%lx,pgsize=%lu\n", __FUNCTION__, virt, phys_to_virt(phys), virt_to_phys(virt), phys, pgsize); +#ifdef POSTK_DEBUG_TEMP_FIX_14 + if (page) { + // McKernel memory space + memcpy(virt, phys_to_virt(phys), pgsize); + } else { + // Host Kernel memory space + const enum ihk_mc_pt_attribute attr = 0; + const int remove_vmap_allocator_entry = 1; + void* vmap; + + vmap = ihk_mc_map_virtual(phys, npages, attr); + if (!vmap) { + error = -ENOMEM; + kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):cannot virtual mapping. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); + ihk_mc_free_pages(virt, npages); + goto out; + } + memcpy(virt, vmap, pgsize); + ihk_mc_unmap_virtual(vmap, npages, remove_vmap_allocator_entry); + } +#else /*POSTK_DEBUG_TEMP_FIX_14*/ memcpy(virt, phys_to_virt(phys), pgsize); +#endif /*POSTK_DEBUG_TEMP_FIX_14*/ /* Call rusage_memory_stat_add() because remote page fault may create a page not pointed-to by PTE */ if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) { dkprintf("%lx+,%s: remote page fault + cow, calling memory_stat_rss_add(),pgsize=%ld\n", @@ -1810,6 +1854,16 @@ retry: page = phys_to_page(phys); } } +#ifdef POSTK_DEBUG_ARCH_DEP_21 + else if (!(range->flag & VR_PRIVATE)) { /*VR_SHARED*/ + if (!(attr & PTATTR_DIRTY)) { + if (!(range->flag & VR_STACK)) { + attr &= ~PTATTR_WRITABLE; + } + } + } +#endif /*POSTK_DEBUG_ARCH_DEP_21*/ + /*****/ if (ptep) { if(rusage_memory_stat_add(range, phys, pgsize, pgsize)) { @@ -2031,8 +2085,12 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, /* Create stack range */ end = STACK_TOP(&thread->vm->region) & LARGE_PAGE_MASK; +#ifdef POSTK_DEBUG_ARCH_DEP_80 /* user stack prepage size fix */ + minsz = LARGE_PAGE_SIZE; +#else /* POSTK_DEBUG_ARCH_DEP_80 */ minsz = (proc->rlimit[MCK_RLIMIT_STACK].rlim_cur + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; +#endif /* POSTK_DEBUG_ARCH_DEP_80 */ size = (proc->rlimit[MCK_RLIMIT_STACK].rlim_max + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; dkprintf("%s: rlim_max: %lu, rlim_cur: %lu\n", @@ -2099,6 +2157,12 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, p = (unsigned long *)(stack + minsz); s_ind = -1; +#ifdef POSTK_DEBUG_ARCH_DEP_15 /* userstack 16byte align */ + if(!((envc + argc) % 2)){ + p[s_ind--] = 0; + } +#endif /* POSTK_DEBUG_ARCH_DEP_15 */ + /* "random" 16 bytes on the very top */ p[s_ind--] = 0x010101011; p[s_ind--] = 0x010101011; @@ -2109,6 +2173,10 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, AUXV_LEN in include/process.h. */ p[s_ind--] = 0; /* AT_NULL */ p[s_ind--] = 0; +#ifdef POSTK_DEBUG_ARCH_DEP_65 + p[s_ind--] = arch_get_hwcap(); /* AT_HWCAP */ + p[s_ind--] = AT_HWCAP; +#endif /* POSTK_DEBUG_ARCH_DEP_65 */ p[s_ind--] = pn->at_entry; /* AT_ENTRY */ p[s_ind--] = AT_ENTRY; p[s_ind--] = pn->at_phnum; /* AT_PHNUM */ @@ -2117,7 +2185,11 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, p[s_ind--] = AT_PHENT; p[s_ind--] = pn->at_phdr; /* AT_PHDR */ p[s_ind--] = AT_PHDR; +#ifdef POSTK_DEBUG_ARCH_DEP_50 + p[s_ind--] = PAGE_SIZE; /* AT_PAGESZ */ +#else p[s_ind--] = 4096; /* AT_PAGESZ */ +#endif /* POSTK_DEBUG_ARCH_DEP_50 */ p[s_ind--] = AT_PAGESZ; p[s_ind--] = pn->at_clktck; /* AT_CLKTCK */ p[s_ind--] = AT_CLKTCK; @@ -2163,12 +2235,31 @@ unsigned long extend_process_region(struct process_vm *vm, void *p; int rc; +#ifdef POSTK_DEBUG_TEMP_FIX_68 /* fix heap_extension p2align/shift */ + size_t align_size; + unsigned long align_mask; + int align_p2align; + int align_shift; + + if (vm->proc->heap_extension > PAGE_SIZE) { + align_size = LARGE_PAGE_SIZE; + align_mask = LARGE_PAGE_MASK; + align_p2align = LARGE_PAGE_P2ALIGN; + align_shift = LARGE_PAGE_SHIFT; + } else { + align_size = PAGE_SIZE; + align_mask = PAGE_MASK; + align_p2align = PAGE_P2ALIGN; + align_shift = PAGE_SHIFT; + } +#else /* POSTK_DEBUG_TEMP_FIX_68 */ size_t align_size = vm->proc->heap_extension > PAGE_SIZE ? LARGE_PAGE_SIZE : PAGE_SIZE; unsigned long align_mask = vm->proc->heap_extension > PAGE_SIZE ? LARGE_PAGE_MASK : PAGE_MASK; unsigned long align_p2align = vm->proc->heap_extension > PAGE_SHIFT ? LARGE_PAGE_P2ALIGN : PAGE_P2ALIGN; +#endif /* POSTK_DEBUG_TEMP_FIX_68 */ new_end_allocated = (address + (PAGE_SIZE - 1)) & PAGE_MASK; if ((new_end_allocated - end_allocated) < vm->proc->heap_extension) { @@ -2190,12 +2281,21 @@ unsigned long extend_process_region(struct process_vm *vm, } } +#ifdef POSTK_DEBUG_TEMP_FIX_68 /* fix heap_extension p2align/shift */ + if ((rc = add_process_memory_range(vm, end_allocated, new_end_allocated, + (p == 0 ? 0 : virt_to_phys(p)), flag, NULL, 0, + align_shift, NULL)) != 0) { + ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT); + return end_allocated; + } +#else /* POSTK_DEBUG_TEMP_FIX_68 */ if ((rc = add_process_memory_range(vm, end_allocated, new_end_allocated, (p == 0 ? 0 : virt_to_phys(p)), flag, NULL, 0, align_p2align, NULL)) != 0) { ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT); return end_allocated; } +#endif /* POSTK_DEBUG_TEMP_FIX_68 */ // memory_stat_rss_add() is called in add_process_memory_range() dkprintf("%s: new_end_allocated: 0x%lx, align_size: %lu, align_mask: %lx\n", @@ -2857,6 +2957,9 @@ static void do_migrate(void) __FUNCTION__, req->thread->tid, old_cpu_id, cpu_id); v->flags |= CPU_FLAG_NEED_RESCHED; +#ifdef POSTK_DEBUG_TEMP_FIX_57 /* migration wakeup IPI target fix. */ + ihk_mc_interrupt_cpu(cpu_id, ihk_mc_get_vector(IHK_GV_IKC)); +#endif /* POSTK_DEBUG_TEMP_FIX_57 */ waitq_wakeup(&req->wq); double_rq_unlock(cur_v, v, irqstate); continue; @@ -2931,6 +3034,12 @@ void spin_sleep_or_schedule(void) } ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); +#ifdef POSTK_DEBUG_TEMP_FIX_56 /* in futex_wait() signal handring fix. */ + if (hassigpending(cpu_local_var(current))) { + woken = 1; + } +#endif /* POSTK_DEBUG_TEMP_FIX_56 */ + if (woken) { return; } @@ -3027,9 +3136,15 @@ redo: } /* Take care of floating point registers except for idle process */ +#ifdef POSTK_DEBUG_ARCH_DEP_66 /* Fixed not to save fp_regs when the process ends */ + if (prev && (prev != &cpu_local_var(idle) && prev->status != PS_EXITED)) { + save_fp_regs(prev); + } +#else /* POSTK_DEBUG_ARCH_DEP_66 */ if (prev && prev != &cpu_local_var(idle)) { save_fp_regs(prev); } +#endif /* POSTK_DEBUG_ARCH_DEP_66 */ if (next != &cpu_local_var(idle)) { restore_fp_regs(next); @@ -3039,6 +3154,9 @@ redo: next->vm->address_space->page_table) ihk_mc_load_page_table(next->vm->address_space->page_table); +#ifdef POSTK_DEBUG_ARCH_DEP_22 + last = arch_switch_context(prev, next); +#else dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", ihk_mc_get_processor_id(), next->tlsblock_base); @@ -3073,6 +3191,7 @@ redo: else { last = ihk_mc_switch_context(NULL, &next->ctx, prev); } +#endif /* POSTK_DEBUG_ARCH_DEP_22 */ /* * We must hold the lock throughout the context switch, otherwise @@ -3166,16 +3285,25 @@ int __sched_wakeup_thread(struct thread *thread, status = -EINVAL; } +#ifdef POSTK_DEBUG_TEMP_FIX_55 /* runq_locked flag apply for unlock */ if (!runq_locked) { ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); } +#else /* POSTK_DEBUG_TEMP_FIX_55 */ + ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); +#endif /* POSTK_DEBUG_TEMP_FIX_55 */ if (!status && (thread->cpu_id != ihk_mc_get_processor_id())) { dkprintf("%s: issuing IPI, thread->cpu_id=%d\n", __FUNCTION__, thread->cpu_id); +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + ihk_mc_interrupt_cpu(thread->cpu_id, + ihk_mc_get_vector(IHK_GV_IKC)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ ihk_mc_interrupt_cpu( get_x86_cpu_local_variable(thread->cpu_id)->apic_id, 0xd1); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ } return status; @@ -3229,9 +3357,15 @@ void sched_request_migrate(int cpu_id, struct thread *thread) v->status = CPU_STATUS_RUNNING; ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + if (cpu_id != ihk_mc_get_processor_id()) + ihk_mc_interrupt_cpu(/* Kick scheduler */ + thread->cpu_id, ihk_mc_get_vector(IHK_GV_IKC)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ if (cpu_id != ihk_mc_get_processor_id()) ihk_mc_interrupt_cpu(/* Kick scheduler */ get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ dkprintf("%s: tid: %d -> cpu: %d\n", __FUNCTION__, thread->tid, cpu_id); @@ -3268,9 +3402,15 @@ void runq_add_thread(struct thread *thread, int cpu_id) rusage_num_threads_inc(); /* Kick scheduler */ +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + if (cpu_id != ihk_mc_get_processor_id()) + ihk_mc_interrupt_cpu( + thread->cpu_id, ihk_mc_get_vector(IHK_GV_IKC)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ if (cpu_id != ihk_mc_get_processor_id()) ihk_mc_interrupt_cpu( get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ } /* NOTE: shouldn't remove a running process! */ diff --git a/kernel/procfs.c b/kernel/procfs.c index 7c5122c4..5fa1efa6 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -9,6 +9,7 @@ /* * HISTORY: */ +/* procfs.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ #include #include @@ -231,6 +232,12 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) eof = 1; goto end; } +#ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ + else if (!strcmp(p, "cpuinfo")) { /* "/proc/cpuinfo" */ + ans = ihk_mc_show_cpuinfo(buf, count, offset, &eof); + goto end; + } +#endif /* POSTK_DEBUG_ARCH_DEP_42 */ else { kprintf("unsupported procfs entry: %s\n", p); goto end; @@ -285,8 +292,12 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) goto end; } +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* NUMA support(memory area determination) */ + if (!is_mckernel_memory(pa)) { +#else if (pa < ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0) || pa >= ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0)) { +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ ans = -EIO; goto end; } @@ -308,16 +319,34 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) */ if (strcmp(p, "maps") == 0) { struct vm_range *range; +#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ + int left = PAGE_SIZE * 2; +#else /* POSTK_DEBUG_TEMP_FIX_47 */ int left = r->count - 1; /* extra 1 for terminating NULL */ +#endif /* POSTK_DEBUG_TEMP_FIX_47 */ int written = 0; char *_buf = buf; +#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ + int len = 0; + char *tmp = NULL; + + _buf = tmp = kmalloc(left, IHK_MC_AP_CRITICAL); + if (!tmp) { + kprintf("%s: error allocating /proc/self/maps buffer\n", + __FUNCTION__); + ans = 0; + goto end; + } +#endif /* POSTK_DEBUG_TEMP_FIX_47 */ +#ifndef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ /* Starting from the middle of a proc file is not supported for maps */ if (offset > 0) { ans = 0; eof = 1; goto end; } +#endif /* POSTK_DEBUG_TEMP_FIX_47 */ ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); @@ -347,17 +376,42 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) _buf += written_now; written += written_now; +#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ if (left == 0) { kprintf("%s(): WARNING: buffer too small to fill proc/maps\n", __FUNCTION__); break; } +#else /* POSTK_DEBUG_TEMP_FIX_47 */ + if (left == 1) { + kprintf("%s(): WARNING: buffer too small to fill proc/maps\n", + __FUNCTION__); + break; + } +#endif /* POSTK_DEBUG_TEMP_FIX_47 */ } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); +#ifdef POSTK_DEBUG_TEMP_FIX_47 /* /proc//maps 1024 byte over read fix. */ + len = strlen(tmp); + if (r->offset < len) { + if (r->offset + r->count < len) { + ans = r->count; + } else { + eof = 1; + ans = len; + } + strncpy(buf, tmp + r->offset, ans); + } else if (r->offset == len) { + ans = 0; + eof = 1; + } + kfree(tmp); +#else /* POSTK_DEBUG_TEMP_FIX_47 */ ans = written + 1; eof = 1; +#endif /* POSTK_DEBUG_TEMP_FIX_47 */ goto end; } diff --git a/kernel/syscall.c b/kernel/syscall.c index 873b8a07..6d23702c 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -1,3 +1,4 @@ +/* syscall.c COPYRIGHT FUJITSU LIMITED 2015-2017 */ /** * \file syscall.c * License details are found in the file LICENSE. @@ -57,6 +58,9 @@ #include #include #include +#ifdef POSTK_DEBUG_ARCH_DEP_27 +#include +#endif /* POSTK_DEBUG_ARCH_DEP_27 */ /* Headers taken from kitten LWK */ #include @@ -140,12 +144,14 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid, struct s if(req->number == __NR_exit_group || req->number == __NR_kill){ // interrupt syscall +#ifndef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ if (req->number == __NR_kill) { req->rtid = -1; // no response pid = req->args[0]; } if (req->number == __NR_gettid) pid = req->args[1]; +#endif /* !POSTK_DEBUG_TEMP_FIX_26 */ } res->status = 0; @@ -159,7 +165,11 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid, struct s #ifdef SYSCALL_BY_IKC packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.ref = cpu; +#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ + packet.pid = pid; +#else /* POSTK_DEBUG_TEMP_FIX_26 */ packet.pid = pid ? pid : cpu_local_var(current)->proc->pid; +#endif /* POSTK_DEBUG_TEMP_FIX_26 */ packet.resp_pa = virt_to_phys(res); dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); @@ -189,6 +199,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) t_s = rdtsc(); } #endif // PROFILE_ENABLE +#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ + int target_pid = pid; +#endif /* POSTK_DEBUG_TEMP_FIX_26 */ dkprintf("SC(%d)[%3d] sending syscall\n", ihk_mc_get_processor_id(), @@ -199,11 +212,52 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) barrier(); +#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ + switch (req->number) { + case __NR_kill: + req->rtid = -1; // no response + target_pid = req->args[0]; + break; + case __NR_gettid: + target_pid = req->args[1]; + break; + default: + break; + } + target_pid = target_pid ? target_pid : proc->pid; +#endif /* POSTK_DEBUG_TEMP_FIX_26 */ + if(req->number != __NR_exit_group){ +#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ +#ifdef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ + struct process *target_proc = NULL; + struct mcs_rwlock_node_irqsave lock; + + if (target_pid != proc->pid) { + target_proc = find_process(target_pid, &lock); + if (!target_proc) { + return -EPIPE; + } + process_unlock(target_proc, &lock); + } else { + target_proc = proc; + } + + if (target_proc->nohost) { // host is down + return -EPIPE; + } +#else /* POSTK_DEBUG_TEMP_FIX_48 */ + if (proc->nohost && // host is down + target_pid == proc->pid) { + return -EPIPE; + } +#endif /* POSTK_DEBUG_TEMP_FIX_48 */ +#else /* POSTK_DEBUG_TEMP_FIX_26 */ if(proc->nohost && // host is down pid == proc->pid) { return -EPIPE; } +#endif /* POSTK_DEBUG_TEMP_FIX_26 */ ++thread->in_syscall_offload; } @@ -212,7 +266,11 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) req->rtid = cpu_local_var(current)->tid; req->ttid = 0; res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; +#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ + send_syscall(req, cpu, target_pid, &res); +#else /* POSTK_DEBUG_TEMP_FIX_26 */ send_syscall(req, cpu, pid, &res); +#endif /* POSTK_DEBUG_TEMP_FIX_26 */ if (req->rtid == -1) { preempt_disable(); @@ -305,7 +363,11 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) req2.ttid = res.stid; res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; +#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */ + send_syscall(&req2, cpu, target_pid, &res); +#else /* POSTK_DEBUG_TEMP_FIX_26 */ send_syscall(&req2, cpu, pid, &res); +#endif /* POSTK_DEBUG_TEMP_FIX_26 */ #ifdef PROFILE_ENABLE profile_event_add(PROFILE_remote_page_fault, (rdtsc() - t_s)); @@ -396,7 +458,12 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) /* -ERESTARTSYS indicates that the proxy process is gone * and the application should be terminated */ +#ifdef POSTK_DEBUG_TEMP_FIX_70 /* interrupt_syscall returned -ERESTARTSYS fix */ + if (rc == -ERESTARTSYS && req->number != __NR_exit_group + && req->number != __NR_kill) { +#else /* POSTK_DEBUG_TEMP_FIX_70 */ if (rc == -ERESTARTSYS && req->number != __NR_exit_group) { +#endif /* POSTK_DEBUG_TEMP_FIX_70 */ kprintf("%s: proxy PID %d is dead, terminate()\n", __FUNCTION__, thread->proc->pid); thread->proc->nohost = 1; @@ -556,15 +623,29 @@ do_wait(int pid, int *status, int options, void *rusage) dkprintf("wait4(): current->proc->pid: %d, pid: %d\n", thread->proc->pid, pid); rescan: +#ifdef POSTK_DEBUG_TEMP_FIX_65 /* wait4() lose infomation fix. */ + waitq_init_entry(&waitpid_wqe, thread); + waitq_prepare_to_wait(&thread->proc->waitpid_q, &waitpid_wqe, PS_INTERRUPTIBLE); +#endif /* POSTK_DEBUG_TEMP_FIX_65 */ pid = orgpid; mcs_rwlock_writer_lock_noirq(&thread->proc->children_lock, &lock); list_for_each_entry_safe(child, next, &proc->children_list, siblings_list) { +#ifdef POSTK_DEBUG_ARCH_DEP_44 /* wait() add support __WALL */ + /* + if (!(options & __WALL)) { + if (!(!!(options & __WCLONE) ^ (child->termsig == SIGCHLD))) { + continue; + } + } + */ +#else /* POSTK_DEBUG_ARCH_DEP_44 */ /* if (!(!!(options & __WCLONE) ^ (child->termsig == SIGCHLD))) { continue; } */ +#endif /* POSTK_DEBUG_ARCH_DEP_44 */ /* Find thread with pid == tid, this will be either the main thread * or the one we are looking for specifically when __WCLONE is passed */ @@ -681,8 +762,10 @@ do_wait(int pid, int *status, int options, void *rusage) /* Sleep */ dkprintf("wait4,sleeping\n"); +#ifndef POSTK_DEBUG_TEMP_FIX_65 /* wait4() lose infomation fix. */ waitq_init_entry(&waitpid_wqe, thread); waitq_prepare_to_wait(&thread->proc->waitpid_q, &waitpid_wqe, PS_INTERRUPTIBLE); +#endif /* !POSTK_DEBUG_TEMP_FIX_65 */ mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); if(hassigpending(thread)){ @@ -698,6 +781,9 @@ do_wait(int pid, int *status, int options, void *rusage) goto rescan; exit: +#ifdef POSTK_DEBUG_TEMP_FIX_65 /* wait4() lose infomation fix. */ + waitq_finish_wait(&thread->proc->waitpid_q, &waitpid_wqe); +#endif /* POSTK_DEBUG_TEMP_FIX_65 */ return ret; out_found: dkprintf("wait4,out_found\n"); @@ -718,7 +804,11 @@ SYSCALL_DECLARE(wait4) int rc; struct rusage usage; +#ifdef POSTK_DEBUG_ARCH_DEP_44 /* wait() add support __WALL */ + if(options & ~(WNOHANG | WUNTRACED | WCONTINUED | __WCLONE | __WALL)){ +#else /* POSTK_DEBUG_ARCH_DEP_44 */ if(options & ~(WNOHANG | WUNTRACED | WCONTINUED | __WCLONE)){ +#endif /* POSTK_DEBUG_ARCH_DEP_44 */ dkprintf("wait4: unexpected options(%x).\n", options); return -EINVAL; } @@ -750,7 +840,12 @@ SYSCALL_DECLARE(waitid) pid = -1; else return -EINVAL; +#ifdef POSTK_DEBUG_ARCH_DEP_44 /* wait() add support __WALL */ + if(options & ~(WEXITED | WSTOPPED | WCONTINUED | WNOHANG | WNOWAIT | __WCLONE | __WALL)){ +#else /* POSTK_DEBUG_ARCH_DEP_44 */ if(options & ~(WEXITED | WSTOPPED | WCONTINUED | WNOHANG | WNOWAIT | __WCLONE)){ +#endif /* POSTK_DEBUG_ARCH_DEP_44 */ + dkprintf("wait4: unexpected options(%x).\n", options); dkprintf("waitid: unexpected options(%x).\n", options); return -EINVAL; } @@ -983,8 +1078,13 @@ void terminate(int rc, int sig) if (!proc->nohost) { request.number = __NR_exit_group; request.args[0] = exit_status; +#ifdef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ + proc->nohost = 1; + do_syscall(&request, ihk_mc_get_processor_id(), proc->pid); +#else /* POSTK_DEBUG_TEMP_FIX_48 */ do_syscall(&request, ihk_mc_get_processor_id(), proc->pid); proc->nohost = 1; +#endif /* POSTK_DEBUG_TEMP_FIX_48 */ } // Send signal to parent @@ -1147,6 +1247,8 @@ int do_munmap(void *addr, size_t len) return error; } +#ifdef POSTK_DEBUG_ARCH_DEP_27 +#else static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *addrp) { struct thread *thread = cpu_local_var(current); @@ -1186,6 +1288,7 @@ out: len, hint, pgshift, addrp, error, addr); return error; } +#endif intptr_t do_mmap(const intptr_t addr0, const size_t len0, const int prot, @@ -1278,8 +1381,13 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot, } else { /* choose mapping address */ +#ifdef POSTK_DEBUG_ARCH_DEP_27 + error = search_free_space(cpu_local_var(current), len, + region->map_end, PAGE_SHIFT + p2align, &addr); +#else error = search_free_space(len, region->map_end, PAGE_SHIFT + p2align, &addr); +#endif /* POSTK_DEBUG_ARCH_DEP_27 */ if (error) { ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n", len, region->map_end, p2align, error); @@ -1770,7 +1878,11 @@ SYSCALL_DECLARE(brk) } /* Try to extend memory region */ +#ifdef POSTK_DEBUG_ARCH_DEP_60 /* brk() use demand-paging */ + vrflag = VR_PROT_READ | VR_PROT_WRITE | VR_DEMAND_PAGING; +#else /* POSTK_DEBUG_ARCH_DEP_60 */ vrflag = VR_PROT_READ | VR_PROT_WRITE; +#endif /* POSTK_DEBUG_ARCH_DEP_60 */ vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag); old_brk_end_allocated = region->brk_end_allocated; ihk_mc_spinlock_lock_noirq(&cpu_local_var(current)->vm->memory_range_lock); @@ -1810,6 +1922,10 @@ static void settid(struct thread *thread, int nr_tids, int *tids) int ret; struct syscall_request request IHK_DMA_ALIGN; +#ifdef POSTK_DEBUG_ARCH_DEP_58 /* settid() arguments area 0 clear add. */ + memset(&request, 0, sizeof(request)); +#endif /* POSTK_DEBUG_ARCH_DEP_58 */ + request.number = __NR_gettid; /* * If nr_tids is non-zero, tids should point to an array of ints @@ -1998,6 +2114,10 @@ static void munmap_all(void) return; } /* munmap_all() */ +#ifdef POSTK_DEBUG_TEMP_FIX_19 +extern void clear_fp_regs(struct thread *thread); +#endif /* POSTK_DEBUG_TEMP_FIX_19 */ + SYSCALL_DECLARE(execve) { int error; @@ -2052,8 +2172,13 @@ SYSCALL_DECLARE(execve) if (ret != 0) { dkprintf("execve(): ERROR: host failed to load elf header, errno: %d\n", ret); +#ifdef POSTK_DEBUG_TEMP_FIX_10 /* sys_execve() memleak fix. */ + ret = -ret; + goto desc_free; +#else /* POSTK_DEBUG_TEMP_FIX_10 */ ihk_mc_free_pages(desc, 4); return -ret; +#endif /* POSTK_DEBUG_TEMP_FIX_10 */ } dkprintf("execve(): ELF desc received, num sections: %d\n", @@ -2076,8 +2201,13 @@ SYSCALL_DECLARE(execve) kprintf("ERROR: no argv for executable: %s?\n", kfilename? kfilename: ""); if(kfilename) kfree(kfilename); +#ifdef POSTK_DEBUG_TEMP_FIX_10 /* sys_execve() memleak fix. */ + ret = -EINVAL; + goto desc_free; +#else /* POSTK_DEBUG_TEMP_FIX_10 */ ihk_mc_free_pages(desc, 4); return -EINVAL; +#endif /* POSTK_DEBUG_TEMP_FIX_10 */ } envp_flat_len = flatten_strings_from_user(-1, NULL, envp, &envp_flat); @@ -2091,7 +2221,12 @@ SYSCALL_DECLARE(execve) kprintf("ERROR: no envp for executable: %s?\n", kfilename? kfilename: ""); if(kfilename) kfree(kfilename); +#ifdef POSTK_DEBUG_TEMP_FIX_10 /* sys_execve() memleak fix. */ + ret = -EINVAL; + goto argv_free; +#else /* POSTK_DEBUG_TEMP_FIX_10 */ return -EINVAL; +#endif /* POSTK_DEBUG_TEMP_FIX_10 */ } /* Unmap all memory areas of the process, userspace will be gone */ @@ -2136,6 +2271,11 @@ SYSCALL_DECLARE(execve) thread->sigcommon->action[i].sa.sa_handler = SIG_DFL; } +#ifdef POSTK_DEBUG_TEMP_FIX_19 + /* The floating-point environment is reset to the default. */ + clear_fp_regs(thread); +#endif /* POSTK_DEBUG_TEMP_FIX_19 */ + error = ptrace_report_exec(cpu_local_var(current)); if(error) { kprintf("execve(): ERROR: ptrace_report_exec()\n"); @@ -2145,6 +2285,30 @@ SYSCALL_DECLARE(execve) dkprintf("execve(): switching to new process\n"); proc->execed = 1; +#ifdef POSTK_DEBUG_TEMP_FIX_10 /* sys_execve() memleak fix. */ + ret = 0; + if (envp_flat) { + kfree(envp_flat); + } + +argv_free: + if (argv_flat) { + kfree(argv_flat); + } + +desc_free: + ihk_mc_free_pages(desc, 4); + + if (!ret) { + /* Lock run queue because enter_user_mode expects to release it */ + cpu_local_var(runq_irqstate) = + ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); + + ihk_mc_switch_context(NULL, &cpu_local_var(current)->ctx, + cpu_local_var(current)); + } + return ret; +#else /* POSTK_DEBUG_TEMP_FIX_10 */ ihk_mc_free_pages(desc, 4); kfree(argv_flat); kfree(envp_flat); @@ -2158,6 +2322,7 @@ SYSCALL_DECLARE(execve) /* Never reach here */ return 0; +#endif /* POSTK_DEBUG_TEMP_FIX_10 */ } unsigned long do_fork(int clone_flags, unsigned long newsp, @@ -2299,7 +2464,11 @@ retry_tid: request1.args[0] = clone_flags; } newproc->pid = do_syscall(&request1, ihk_mc_get_processor_id(), 0); +#ifdef POSTK_DEBUG_TEMP_FIX_12 /* __NR_fork retval check fix. */ + if (newproc->pid < 0) { +#else /* POSTK_DEBUG_TEMP_FIX_12 */ if (newproc->pid == -1) { +#endif /* POSTK_DEBUG_TEMP_FIX_12 */ kprintf("ERROR: forking host process\n"); /* TODO: clean-up new */ @@ -2312,6 +2481,7 @@ retry_tid: new->vm->address_space->pids[0] = new->proc->pid; dkprintf("fork(): new pid: %d\n", new->proc->pid); +#ifndef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ /* clear user space PTEs and set new rpgtable so that consequent * page faults will look up the right mappings */ request1.number = __NR_munmap; @@ -2328,6 +2498,7 @@ retry_tid: if (do_syscall(&request1, ihk_mc_get_processor_id(), new->proc->pid)) { kprintf("ERROR: clearing PTEs in host process\n"); } +#endif /* !POSTK_DEBUG_TEMP_FIX_48 */ if(oldproc->monitoring_event && oldproc->monitoring_event->attr.inherit){ newproc->monitoring_event = oldproc->monitoring_event; @@ -2415,6 +2586,24 @@ retry_tid: chain_process(newproc); } +#ifdef POSTK_DEBUG_TEMP_FIX_48 /* nohost flag missed fix */ + /* clear user space PTEs and set new rpgtable so that consequent + * page faults will look up the right mappings */ + request1.number = __NR_munmap; + request1.args[0] = new->vm->region.user_start; + request1.args[1] = new->vm->region.user_end - + new->vm->region.user_start; + /* 3rd parameter denotes new rpgtable of host process */ + request1.args[2] = virt_to_phys(new->vm->address_space->page_table); + request1.args[3] = newproc->pid; + + dkprintf("fork(): requesting PTE clear and rpgtable (0x%lx) update\n", + request1.args[2]); + + if (do_syscall(&request1, ihk_mc_get_processor_id(), new->proc->pid)) { + kprintf("ERROR: clearing PTEs in host process\n"); + } +#endif /* !POSTK_DEBUG_TEMP_FIX_48 */ if (oldproc->ptrace) { ptrace_event = ptrace_check_clone_event(old, clone_flags); if (ptrace_event) { @@ -2548,7 +2737,11 @@ SYSCALL_DECLARE(tkill) } int * +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ +getcred(int *_buf, int tid) +#else /* POSTK_DEBUG_TEMP_FIX_45 */ getcred(int *_buf) +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ { int *buf; struct syscall_request request IHK_DMA_ALIGN; @@ -2558,6 +2751,9 @@ getcred(int *_buf) buf = _buf + 8; else buf = _buf; +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + buf[0] = tid; +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ phys = virt_to_phys(buf); request.number = __NR_setfsuid; request.args[0] = phys; @@ -2568,14 +2764,22 @@ getcred(int *_buf) } void +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ +do_setresuid(int tid) +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresuid() +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ { int _buf[16]; int *buf; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + buf = getcred(_buf, tid); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ buf = getcred(_buf); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ proc->ruid = buf[0]; proc->euid = buf[1]; @@ -2584,14 +2788,22 @@ do_setresuid() } void +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ +do_setresgid(int tid) +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresgid() +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ { int _buf[16]; int *buf; struct thread *thread = cpu_local_var(current); struct process *proc = thread->proc; +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + buf = getcred(_buf, tid); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ buf = getcred(_buf); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ proc->rgid = buf[4]; proc->egid = buf[5]; @@ -2605,7 +2817,11 @@ SYSCALL_DECLARE(setresuid) rc = syscall_generic_forwarding(__NR_setresuid, ctx); if(rc == 0){ +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresuid(0); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresuid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } return rc; } @@ -2616,7 +2832,11 @@ SYSCALL_DECLARE(setreuid) rc = syscall_generic_forwarding(__NR_setreuid, ctx); if(rc == 0){ +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresuid(0); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresuid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } return rc; } @@ -2627,7 +2847,11 @@ SYSCALL_DECLARE(setuid) rc = syscall_generic_forwarding(__NR_setuid, ctx); if(rc == 0){ +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresuid(0); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresuid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } return rc; } @@ -2642,7 +2866,12 @@ SYSCALL_DECLARE(setfsuid) request.args[0] = fsuid; request.args[1] = 0; newfsuid = do_syscall(&request, ihk_mc_get_processor_id(), 0); +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresuid((int)(newfsuid >> 32)); + newfsuid &= (1UL << 32) - 1; +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresuid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ return newfsuid; } @@ -2652,7 +2881,11 @@ SYSCALL_DECLARE(setresgid) rc = syscall_generic_forwarding(__NR_setresgid, ctx); if(rc == 0){ +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresgid(0); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresgid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } return rc; } @@ -2663,7 +2896,11 @@ SYSCALL_DECLARE(setregid) rc = syscall_generic_forwarding(__NR_setregid, ctx); if(rc == 0){ +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresgid(0); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresgid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } return rc; } @@ -2674,7 +2911,11 @@ SYSCALL_DECLARE(setgid) rc = syscall_generic_forwarding(__NR_setgid, ctx); if(rc == 0){ +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresgid(0); +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresgid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ } return rc; } @@ -2688,7 +2929,12 @@ SYSCALL_DECLARE(setfsgid) request.number = __NR_setfsgid; request.args[0] = fsgid; newfsgid = do_syscall(&request, ihk_mc_get_processor_id(), 0); +#ifdef POSTK_DEBUG_TEMP_FIX_45 /* setfsgid()/setfsuid() mismatch fix. */ + do_setresgid((int)(newfsgid >> 32)); + newfsgid &= (1UL << 32) - 1; +#else /* POSTK_DEBUG_TEMP_FIX_45 */ do_setresgid(); +#endif /* POSTK_DEBUG_TEMP_FIX_45 */ return newfsgid; } @@ -2800,7 +3046,12 @@ SYSCALL_DECLARE(setpgid) SYSCALL_DECLARE(set_robust_list) { +#ifdef POSTK_DEBUG_TEMP_FIX_2 + // Palliative fix. wait for impl. + return 0; +#else return -ENOSYS; +#endif } int @@ -2896,6 +3147,15 @@ SYSCALL_DECLARE(open) } dkprintf("open(): pathname=%s\n", xpmem_wk); rc = strcmp(xpmem_wk, XPMEM_DEV_PATH); +#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */ + if (!rc) { + rc = xpmem_open(__NR_open, xpmem_wk, (int)ihk_mc_syscall_arg1(ctx), ctx); + } + else { + rc = syscall_generic_forwarding(__NR_open, ctx); + } + kfree(xpmem_wk); +#else /* POSTK_DEBUG_ARCH_DEP_62 */ kfree(xpmem_wk); if (!rc) { rc = xpmem_open(ctx); @@ -2903,10 +3163,44 @@ SYSCALL_DECLARE(open) else { rc = syscall_generic_forwarding(__NR_open, ctx); } +#endif /* POSTK_DEBUG_ARCH_DEP_62 */ return rc; } +#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */ +SYSCALL_DECLARE(openat) +{ + const char *pathname_user = (const char *)ihk_mc_syscall_arg1(ctx); + int flags = (int)ihk_mc_syscall_arg2(ctx); + char *pathname; + int len = strlen_user(pathname_user) + 1; + long rc; + + pathname = kmalloc(len, IHK_MC_AP_NOWAIT); + if (!pathname) { + dkprintf("%s: error allocating pathname\n", __FUNCTION__); + return -ENOMEM; + } + if (copy_from_user(pathname, pathname_user, len)) { + dkprintf("%s: error: copy_from_user pathname\n", __FUNCTION__); + rc = -EFAULT; + goto out; + } + + dkprintf("openat(): pathname=%s\n", pathname); + if (!strcmp(pathname, XPMEM_DEV_PATH)) { + rc = xpmem_open(__NR_openat, pathname, flags, ctx); + } else { + rc = syscall_generic_forwarding(__NR_openat, ctx); + } + +out: + kfree(pathname); + return rc; +} +#endif /* POSTK_DEBUG_ARCH_DEP_62 */ + SYSCALL_DECLARE(close) { int fd = ihk_mc_syscall_arg0(ctx); @@ -3324,19 +3618,35 @@ perf_stop(struct mc_perf_event *event) int counter_id; struct mc_perf_event *leader = event->group_leader, *sub; +#ifdef POSTK_DEBUG_TEMP_FIX_30 counter_id = leader->counter_id; if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { - ihk_mc_perfctr_stop(1UL << counter_id); + ihk_mc_perfctr_stop(counter_id); } list_for_each_entry(sub, &leader->sibling_list, group_entry) { counter_id = sub->counter_id; if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | + (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { + ihk_mc_perfctr_stop(counter_id); + } + } +#else + counter_id = leader->counter_id; + if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | + (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { + ihk_mc_perfctr_stop(1UL << counter_id); + } + + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + counter_id = sub->counter_id; + if((1UL << counter_id & X86_IA32_PERF_COUNTERS_MASK) | (1UL << counter_id & X86_IA32_FIXED_PERF_COUNTERS_MASK)) { ihk_mc_perfctr_stop(1UL << counter_id); } } +#endif /*POSTK_DEBUG_TEMP_FIX_30*/ } static int @@ -3648,6 +3958,9 @@ SYSCALL_DECLARE(rt_sigtimedwait) cpu_pause(); } +#ifdef POSTK_DEBUG_TEMP_FIX_33 /* sigevent missed fix */ + thread->sigevent = 0; +#endif /* POSTK_DEBUG_TEMP_FIX_33 */ lock = &thread->sigcommon->lock; head = &thread->sigcommon->sigpending; @@ -3705,7 +4018,9 @@ SYSCALL_DECLARE(rt_sigtimedwait) return -EINTR; } mcs_rwlock_writer_unlock(lock, &mcs_rw_node); +#ifndef POSTK_DEBUG_TEMP_FIX_33 /* sigevent missed fix */ thread->sigevent = 0; +#endif /* !POSTK_DEBUG_TEMP_FIX_33 */ } if(info){ @@ -3778,6 +4093,9 @@ do_sigsuspend(struct thread *thread, const sigset_t *set) cpu_pause(); } } +#ifdef POSTK_DEBUG_TEMP_FIX_33 /* sigevent missed fix */ + thread->sigevent = 0; +#endif /* POSTK_DEBUG_TEMP_FIX_33 */ lock = &thread->sigcommon->lock; head = &thread->sigcommon->sigpending; @@ -3800,7 +4118,9 @@ do_sigsuspend(struct thread *thread, const sigset_t *set) } if(&pending->list == head){ mcs_rwlock_writer_unlock(lock, &mcs_rw_node); +#ifndef POSTK_DEBUG_TEMP_FIX_33 /* sigevent missed fix */ thread->sigevent = 0; +#endif /* POSTK_DEBUG_TEMP_FIX_33 */ continue; } @@ -3986,12 +4306,21 @@ change_attr_process_memory_range(struct process_vm *vm, } } +#ifdef POSTK_DEBUG_TEMP_FIX_37 + if((error = change_proc(range, arg)) != 0){ +#else if(!(error = change_proc(range, arg))){ +#endif /*POSTK_DEBUG_TEMP_FIX_37*/ break; } range = next_process_memory_range(vm, range); } + +#ifdef POSTK_DEBUG_TEMP_FIX_37 + if(error == 0){ +#else if(error){ +#endif /*POSTK_DEBUG_TEMP_FIX_37*/ next = next_process_memory_range(vm, range); if(!next) next = range; @@ -4224,12 +4553,27 @@ struct shminfo the_shminfo = { struct shm_info the_shm_info = { 0, }; time_t time(void) { +#ifndef POSTK_DEBUG_ARCH_DEP_13 /* arch depend tmp hide */ struct syscall_request sreq IHK_DMA_ALIGN; struct thread *thread = cpu_local_var(current); +#endif /* POSTK_DEBUG_ARCH_DEP_13 */ +#ifdef POSTK_DEBUG_ARCH_DEP_49 /* time() local calculate added. */ + struct timespec ats; + + if (gettime_local_support) { + calculate_time_from_tsc(&ats); + return ats.tv_sec; + } +#endif /* POSTK_DEBUG_ARCH_DEP_49 */ + +#ifdef POSTK_DEBUG_ARCH_DEP_13 /* arch depend tmp hide */ + return (time_t)0; +#else /* POSTK_DEBUG_ARCH_DEP_13 */ sreq.number = __NR_time; sreq.args[0] = (uintptr_t)NULL; return (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid); +#endif /* POSTK_DEBUG_ARCH_DEP_13 */ } static int make_shmid(struct shmobj *obj) @@ -4502,7 +4846,12 @@ SYSCALL_DECLARE(shmat) } } else { +#ifdef POSTK_DEBUG_ARCH_DEP_27 + error = search_free_space(cpu_local_var(current), len, + region->map_end, obj->pgshift, &addr); +#else error = search_free_space(len, region->map_end, obj->pgshift, &addr); +#endif /* POSTK_DEBUG_ARCH_DEP_27 */ if (error) { ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); shmobj_list_unlock(); @@ -5078,6 +5427,14 @@ SYSCALL_DECLARE(setrlimit) struct rlimit *rlm = (struct rlimit *)ihk_mc_syscall_arg1(ctx); struct thread *thread = cpu_local_var(current); int i; +#ifdef POSTK_DEBUG_TEMP_FIX_3 /* If rlim_cur is greater than rlim_max, return -EINVAL (S64FX_19) */ + struct rlimit new_rlim; + + if (copy_from_user(&new_rlim, rlm, sizeof(*rlm))) + return -EFAULT; + if (new_rlim.rlim_cur > new_rlim.rlim_max) + return -EINVAL; +#endif /* POSTK_DEBUG_TEMP_FIX_3 */ int mcresource; switch(resource){ @@ -5100,8 +5457,12 @@ SYSCALL_DECLARE(setrlimit) return syscall_generic_forwarding(__NR_setrlimit, ctx); } +#ifdef POSTK_DEBUG_TEMP_FIX_3 /* If rlim_cur is greater than rlim_max, return -EINVAL (S64FX_19) */ + memcpy(thread->proc->rlimit + mcresource, &new_rlim, sizeof(new_rlim)); +#else /* POSTK_DEBUG_TEMP_FIX_3 */ if(copy_from_user(thread->proc->rlimit + mcresource, rlm, sizeof(struct rlimit))) return -EFAULT; +#endif /* POSTK_DEBUG_TEMP_FIX_3 */ return 0; } @@ -5160,7 +5521,11 @@ SYSCALL_DECLARE(getrusage) child->status == PS_RUNNING && !child->in_kernel){ child->times_update = 0; +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + ihk_mc_interrupt_cpu(child->cpu_id, ihk_mc_get_vector(IHK_GV_IKC)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(child->cpu_id)->apic_id, 0xd1); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ } else child->times_update = 1; @@ -5184,10 +5549,15 @@ SYSCALL_DECLARE(getrusage) kusage.ru_maxrss = proc->maxrss / 1024; } else if(who == RUSAGE_CHILDREN){ +#ifdef POSTK_DEBUG_TEMP_FIX_72 /* fix RUSAGE_CHILDREN time */ + ts_to_tv(&kusage.ru_utime, &proc->utime_children); + ts_to_tv(&kusage.ru_stime, &proc->stime_children); +#else /* POSTK_DEBUG_TEMP_FIX_72 */ tsc_to_ts(thread->user_tsc, &ats); ts_to_tv(&kusage.ru_utime, &ats); tsc_to_ts(thread->system_tsc, &ats); ts_to_tv(&kusage.ru_stime, &ats); +#endif /* POSTK_DEBUG_TEMP_FIX_72 */ kusage.ru_maxrss = proc->maxrss_children / 1024; } @@ -5610,6 +5980,19 @@ static int ptrace_attach(int pid) } parent = child->parent; +#ifdef POSTK_DEBUG_TEMP_FIX_53 /* attach for child-process fix. */ + dkprintf("ptrace_attach() parent->pid=%d\n", parent->pid); + + mcs_rwlock_writer_lock_noirq(&parent->children_lock, &childlock); + list_del(&child->siblings_list); + list_add_tail(&child->ptraced_siblings_list, &parent->ptraced_children_list); + mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &childlock); + + mcs_rwlock_writer_lock_noirq(&proc->children_lock, &childlock); + list_add_tail(&child->siblings_list, &proc->children_list); + child->parent = proc; + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &childlock); +#else /* POSTK_DEBUG_TEMP_FIX_53 */ /* XXX: tmp */ if (parent != proc) { @@ -5625,6 +6008,7 @@ static int ptrace_attach(int pid) child->parent = proc; mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &childlock); } +#endif /* POSTK_DEBUG_TEMP_FIX_53 */ child->ptrace = PT_TRACED | PT_TRACE_EXEC; @@ -6293,11 +6677,22 @@ SYSCALL_DECLARE(sched_getaffinity) int ret; dkprintf("%s() len: %d, mask: %p\n", __FUNCTION__, len, u_cpu_set); +#ifdef POSTK_DEBUG_TEMP_FIX_5 /* sched_getaffinity arguments check add (S64FX_10) */ + if (len * 8 < num_processors) { + kprintf("%s:%d Too small buffer.\n", __FILE__, __LINE__); + return -EINVAL; + } + if (len & (sizeof(unsigned long)-1)) { + kprintf("%s:%d Size not align to unsigned long.\n", __FILE__, __LINE__); + return -EINVAL; + } +#else /* POSTK_DEBUG_TEMP_FIX_5 */ if (!len || u_cpu_set == (cpu_set_t *)-1) return -EINVAL; if ((len * BITS_PER_BYTE) < __CPU_SETSIZE) return -EINVAL; +#endif /* POSTK_DEBUG_TEMP_FIX_5 */ len = MIN2(len, sizeof(k_cpu_set)); @@ -6332,7 +6727,11 @@ SYSCALL_DECLARE(sched_getaffinity) } dkprintf("%s() len: %d, ret: %d\n", __FUNCTION__, len, ret); +#ifdef POSTK_DEBUG_TEMP_FIX_58 /* sched_getafifnity return value fix */ + return ret; +#else /* POSTK_DEBUG_TEMP_FIX_58 */ return len; +#endif /* POSTK_DEBUG_TEMP_FIX_58 */ } SYSCALL_DECLARE(get_cpu_id) @@ -6417,7 +6816,12 @@ SYSCALL_DECLARE(setitimer) if(!new){ return 0; } +#ifdef POSTK_DEBUG_TEMP_FIX_40 /* setitimer copy_from_user() error return fix. */ if(copy_from_user(&thread->itimer_virtual, new, sizeof(struct itimerval))) + return -EFAULT; +#else /* POSTK_DEBUG_TEMP_FIX_40 */ + if(copy_from_user(&thread->itimer_virtual, new, sizeof(struct itimerval))) +#endif /* POSTK_DEBUG_TEMP_FIX_40 */ thread->itimer_virtual_value.tv_sec = 0; thread->itimer_virtual_value.tv_nsec = 0; if(thread->itimer_virtual.it_value.tv_sec == 0 && @@ -6438,7 +6842,12 @@ SYSCALL_DECLARE(setitimer) if(!new){ return 0; } +#ifdef POSTK_DEBUG_TEMP_FIX_40 /* setitimer copy_from_user() error return fix. */ if(copy_from_user(&thread->itimer_prof, new, sizeof(struct itimerval))) + return -EFAULT; +#else /* POSTK_DEBUG_TEMP_FIX_40 */ + if(copy_from_user(&thread->itimer_prof, new, sizeof(struct itimerval))) +#endif /* POSTK_DEBUG_TEMP_FIX_40 */ thread->itimer_prof_value.tv_sec = 0; thread->itimer_prof_value.tv_nsec = 0; if(thread->itimer_prof.it_value.tv_sec == 0 && @@ -6533,7 +6942,11 @@ SYSCALL_DECLARE(clock_gettime) child->status == PS_RUNNING && !child->in_kernel){ child->times_update = 0; +#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */ + ihk_mc_interrupt_cpu(child->cpu_id, ihk_mc_get_vector(IHK_GV_IKC)); +#else /* POSTK_DEBUG_ARCH_DEP_8 */ ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(child->cpu_id)->apic_id, 0xd1); +#endif /* POSTK_DEBUG_ARCH_DEP_8 */ } } ats.tv_sec = proc->utime.tv_sec; @@ -6688,6 +7101,9 @@ SYSCALL_DECLARE(nanosleep) ret = -EINTR; break; } +#ifdef POSTK_DEBUG_ARCH_DEP_43 + cpu_pause(); +#endif } if ((ret == -EINTR) && rem) { @@ -7292,8 +7708,14 @@ SYSCALL_DECLARE(mremap) goto out; } need_relocate = 1; +#ifdef POSTK_DEBUG_ARCH_DEP_27 + error = search_free_space(cpu_local_var(current), newsize, + vm->region.map_end, + range->pgshift, (intptr_t *)&newstart); +#else error = search_free_space(newsize, vm->region.map_end, range->pgshift, (intptr_t *)&newstart); +#endif /* POSTK_DEBUG_ARCH_DEP_27 */ if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" "search failed. %d\n", @@ -9086,17 +9508,33 @@ SYSCALL_DECLARE(pmc_init) return ihk_mc_perfctr_init(counter, type, mode); } +#ifdef POSTK_DEBUG_TEMP_FIX_30 +SYSCALL_DECLARE(pmc_start) +{ + unsigned long counter = ihk_mc_syscall_arg0(ctx); + return ihk_mc_perfctr_start((int)counter); +} +#else SYSCALL_DECLARE(pmc_start) { unsigned long counter = ihk_mc_syscall_arg0(ctx); return ihk_mc_perfctr_start(1 << counter); } +#endif /*POSTK_DEBUG_TEMP_FIX_30*/ +#ifdef POSTK_DEBUG_TEMP_FIX_30 +SYSCALL_DECLARE(pmc_stop) +{ + unsigned long counter = ihk_mc_syscall_arg0(ctx); + return ihk_mc_perfctr_stop((int)counter); +} +#else SYSCALL_DECLARE(pmc_stop) { unsigned long counter = ihk_mc_syscall_arg0(ctx); return ihk_mc_perfctr_stop(1 << counter); } +#endif /*POSTK_DEBUG_TEMP_FIX_30*/ SYSCALL_DECLARE(pmc_reset) { @@ -9450,7 +9888,13 @@ set_cputime(int mode) long syscall(int num, ihk_mc_user_context_t *ctx) { long l; +#if !defined(POSTK_DEBUG_TEMP_FIX_60) && !defined(POSTK_DEBUG_TEMP_FIX_56) +#ifdef PROFILE_ENABLE struct thread *thread = cpu_local_var(current); +#endif // PROFILE_ENABLE +#else /* !defined(POSTK_DEBUG_TEMP_FIX_60) && !defined(POSTK_DEBUG_TEMP_FIX_56) */ + struct thread *thread = cpu_local_var(current); +#endif /* !defined(POSTK_DEBUG_TEMP_FIX_60) && !defined(POSTK_DEBUG_TEMP_FIX_56) */ #ifdef DISABLE_SCHED_YIELD if (num != __NR_sched_yield) @@ -9519,10 +9963,22 @@ long syscall(int num, ihk_mc_user_context_t *ctx) l = syscall_generic_forwarding(num, ctx); } +#if defined(POSTK_DEBUG_TEMP_FIX_60) && defined(POSTK_DEBUG_TEMP_FIX_56) + check_signal(l, NULL, num); +#elif defined(POSTK_DEBUG_TEMP_FIX_60) /* sched_yield called check_signal fix. */ + if (num != __NR_futex) { + check_signal(l, NULL, num); + } +#elif defined(POSTK_DEBUG_TEMP_FIX_56) /* in futex_wait() signal handring fix. */ + if (num != __NR_sched_yield) { + check_signal(l, NULL, num); + } +#else /* POSTK_DEBUG_TEMP_FIX_60 && POSTK_DEBUG_TEMP_FIX_56 */ if (!list_empty(&thread->sigpending) || !list_empty(&thread->sigcommon->sigpending)) { check_signal(l, NULL, num); } +#endif /* POSTK_DEBUG_TEMP_FIX_60 && POSTK_DEBUG_TEMP_FIX_56 */ #ifdef PROFILE_ENABLE { @@ -9545,10 +10001,22 @@ long syscall(int num, ihk_mc_user_context_t *ctx) } #endif // PROFILE_ENABLE +#if defined(POSTK_DEBUG_TEMP_FIX_60) && defined(POSTK_DEBUG_TEMP_FIX_56) + check_need_resched(); +#elif defined(POSTK_DEBUG_TEMP_FIX_60) /* sched_yield called check_signal fix. */ + if (num != __NR_futex) { + check_need_resched(); + } +#elif defined(POSTK_DEBUG_TEMP_FIX_56) /* in futex_wait() signal handring fix. */ + if (num != __NR_sched_yield) { + check_need_resched(); + } +#else /* POSTK_DEBUG_TEMP_FIX_60 && POSTK_DEBUG_TEMP_FIX_56 */ if (num != __NR_sched_yield && num != __NR_futex) { check_need_resched(); } +#endif /* POSTK_DEBUG_TEMP_FIX_60 && POSTK_DEBUG_TEMP_FIX_56 */ if (cpu_local_var(current)->proc->ptrace) { ptrace_syscall_exit(cpu_local_var(current)); diff --git a/kernel/xpmem.c b/kernel/xpmem.c index 9daaff1b..462a6274 100644 --- a/kernel/xpmem.c +++ b/kernel/xpmem.c @@ -1,3 +1,4 @@ +/* xpmem.c COPYRIGHT FUJITSU LIMITED 2017 */ /** * \file xpmem.c * License details are found in the file LICENSE. @@ -34,6 +35,19 @@ struct xpmem_partition *xpmem_my_part = NULL; /* pointer to this partition */ +#if defined(POSTK_DEBUG_ARCH_DEP_46) || defined(POSTK_DEBUG_ARCH_DEP_62) +int xpmem_open(int num, const char *pathname, + int flags, ihk_mc_user_context_t *ctx) +{ + int ret; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + int fd; + struct mckfd *mckfd; + long irqstate; + + XPMEM_DEBUG("call: syscall_num=%d, pathname=%s, flags=%d", num, pathname, flags); +#else /* POSTK_DEBUG_ARCH_DEP_46 || POSTK_DEBUG_ARCH_DEP_62 */ int xpmem_open( ihk_mc_user_context_t *ctx) { @@ -48,6 +62,7 @@ int xpmem_open( long irqstate; XPMEM_DEBUG("call: pathname=%s, flags=%d", pathname, flags); +#endif /* POSTK_DEBUG_ARCH_DEP_46 || POSTK_DEBUG_ARCH_DEP_62 */ if (!xpmem_my_part) { ret = xpmem_init(); @@ -56,6 +71,13 @@ int xpmem_open( } } +#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */ + fd = syscall_generic_forwarding(num, ctx); + if(fd < 0){ + XPMEM_DEBUG("syscall_num=%d error: fd=%d", num, fd); + return fd; + } +#else /* POSTK_DEBUG_ARCH_DEP_62 */ request.number = __NR_open; request.args[0] = (unsigned long)pathname; request.args[1] = flags; @@ -64,6 +86,7 @@ int xpmem_open( XPMEM_DEBUG("__NR_open error: fd=%d", fd); return fd; } +#endif /* POSTK_DEBUG_ARCH_DEP_62 */ ret = __xpmem_open(); if (ret) { diff --git a/lib/bitops.c b/lib/bitops.c index 868a59a8..8369de1f 100644 --- a/lib/bitops.c +++ b/lib/bitops.c @@ -1,4 +1,4 @@ -/* bitops.c COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ #include #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) diff --git a/lib/include/bitops-__ffs.h b/lib/include/bitops-__ffs.h index 6cc4152f..a7d23e44 100644 --- a/lib/include/bitops-__ffs.h +++ b/lib/include/bitops-__ffs.h @@ -1,4 +1,4 @@ -/* bitops-__ffs.h COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops-__ffs.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef INCLUDE_BITOPS___FFS_H #define INCLUDE_BITOPS___FFS_H diff --git a/lib/include/bitops-clear_bit.h b/lib/include/bitops-clear_bit.h index 4db6869f..7687d6ce 100644 --- a/lib/include/bitops-clear_bit.h +++ b/lib/include/bitops-clear_bit.h @@ -1,4 +1,4 @@ -/* bitops-clear_bit.h COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops-clear_bit.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef INCLUDE_BITOPS_CLEAR_BIT_H #define INCLUDE_BITOPS_CLEAR_BIT_H diff --git a/lib/include/bitops-ffz.h b/lib/include/bitops-ffz.h index e07afefe..1f3587d5 100644 --- a/lib/include/bitops-ffz.h +++ b/lib/include/bitops-ffz.h @@ -1,4 +1,4 @@ -/* bitops-ffz.h COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops-ffz.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef INCLUDE_BITOPS_FFZ_H #define INCLUDE_BITOPS_FFZ_H diff --git a/lib/include/bitops-fls.h b/lib/include/bitops-fls.h index a0abe9ac..88a8c19f 100644 --- a/lib/include/bitops-fls.h +++ b/lib/include/bitops-fls.h @@ -1,4 +1,4 @@ -/* bitops-fls.h COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops-fls.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef INCLUDE_BITOPS_FLS_H #define INCLUDE_BITOPS_FLS_H diff --git a/lib/include/bitops-set_bit.h b/lib/include/bitops-set_bit.h index fb2b8e88..694bdd11 100644 --- a/lib/include/bitops-set_bit.h +++ b/lib/include/bitops-set_bit.h @@ -1,4 +1,4 @@ -/* bitops-set_bit.h COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops-set_bit.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef INCLUDE_BITOPS_SET_BIT_H #define INCLUDE_BITOPS_SET_BIT_H diff --git a/lib/include/bitops.h b/lib/include/bitops.h index e6d7d9f1..61a61cf2 100644 --- a/lib/include/bitops.h +++ b/lib/include/bitops.h @@ -1,4 +1,4 @@ -/* bitops.h COPYRIGHT FUJITSU LIMITED 2014 */ +/* bitops.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef INCLUDE_BITOPS_H #define INCLUDE_BITOPS_H diff --git a/lib/include/ihk/cpu.h b/lib/include/ihk/cpu.h index d4850089..ce35e4ee 100644 --- a/lib/include/ihk/cpu.h +++ b/lib/include/ihk/cpu.h @@ -9,6 +9,7 @@ /* * HISTORY */ +/* cpu.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef IHK_CPU_H #define IHK_CPU_H @@ -88,11 +89,21 @@ void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx, void ihk_mc_init_user_tlsbase(ihk_mc_user_context_t *ctx, unsigned long tls_base_addr); +#ifdef POSTK_DEBUG_ARCH_DEP_42 /* /proc/cpuinfo support added. */ +long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int *eofp); +#endif /* POSTK_DEBUG_ARCH_DEP_42 */ + enum ihk_mc_user_context_regtype { IHK_UCR_STACK_POINTER = 1, IHK_UCR_PROGRAM_COUNTER = 2, }; +#ifdef POSTK_DEBUG_ARCH_DEP_23 /* add arch dep. clone_process() function */ +struct thread; +void arch_clone_thread(struct thread *othread, unsigned long pc, + unsigned long sp, struct thread *nthread); +#endif /* POSTK_DEBUG_ARCH_DEP_23 */ + void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx, enum ihk_mc_user_context_regtype reg, unsigned long value); @@ -104,15 +115,19 @@ enum ihk_asr_type { IHK_ASR_X86_GS, }; +#ifndef POSTK_DEBUG_ARCH_DEP_75 /* x86 depend hide */ /* Local IRQ vectors */ #define LOCAL_TIMER_VECTOR 0xef #define LOCAL_PERF_VECTOR 0xf0 +#endif /* !POSTK_DEBUG_ARCH_DEP_75 */ #define IHK_TLB_FLUSH_IRQ_VECTOR_START 68 #define IHK_TLB_FLUSH_IRQ_VECTOR_SIZE 64 #define IHK_TLB_FLUSH_IRQ_VECTOR_END (IHK_TLB_FLUSH_IRQ_VECTOR_START + IHK_TLB_FLUSH_IRQ_VECTOR_SIZE) +#ifndef POSTK_DEBUG_ARCH_DEP_75 /* x86 depend hide */ #define LOCAL_SMP_FUNC_CALL_VECTOR 0xf1 +#endif /* !POSTK_DEBUG_ARCH_DEP_75 */ int ihk_mc_arch_set_special_register(enum ihk_asr_type, unsigned long value); int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value); diff --git a/lib/include/ihk/debug.h b/lib/include/ihk/debug.h index 92ad0cbc..bf117b59 100644 --- a/lib/include/ihk/debug.h +++ b/lib/include/ihk/debug.h @@ -1,3 +1,4 @@ +/* debug.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ /** * \file debug.h * License details are found in the file LICENSE. @@ -17,6 +18,21 @@ #include #include +#ifdef POSTK_DEBUG_ARCH_DEP_9 /* want to add a static assertion */ + +/* Force a compilation error if condition is false */ +#define STATIC_ASSERT(cond) _STATIC_ASSERT(cond, __LINE__) +#define _STATIC_ASSERT(cond, line) __STATIC_ASSERT(cond, line) +#define __STATIC_ASSERT(cond, line) \ + static void __static_assert_ ## line (void) { \ + STATIC_ASSERT_LOCAL(cond); \ + } + +/* Force a compilation error if condition is false */ +#define STATIC_ASSERT_LOCAL(cond) ((void)sizeof(struct { int:-!!!(cond); })) + +#endif /* POSTK_DEBUG_ARCH_DEP_9 */ + struct ihk_kmsg_buf { int tail; int len; diff --git a/lib/include/ihk/perfctr.h b/lib/include/ihk/perfctr.h index 4df7d011..f63618ec 100644 --- a/lib/include/ihk/perfctr.h +++ b/lib/include/ihk/perfctr.h @@ -1,3 +1,4 @@ +/* perfctr.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ /** * \file perfctr.h * License details are found in the file LICENSE. @@ -13,6 +14,10 @@ #ifndef HEADER_GENERIC_IHK_PERFCTR_H #define HEADER_GENERIC_IHK_PERFCTR_H +#ifdef POSTK_DEBUG_TEMP_FIX_29 +#include +#endif /*POSTK_DEBUG_TEMP_FIX_29*/ + #define PERFCTR_USER_MODE 0x01 #define PERFCTR_KERNEL_MODE 0x02 @@ -48,10 +53,20 @@ enum ihk_perfctr_type { PERFCTR_MAX_TYPE, }; +#ifdef POSTK_DEBUG_TEMP_FIX_29 +int ihk_mc_perfctr_init(int counter, uint64_t config, int mode); +int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode); +#else int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode); int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode); +#endif/*POSTK_DEBUG_TEMP_FIX_29*/ +#ifdef POSTK_DEBUG_TEMP_FIX_30 +int ihk_mc_perfctr_start(int counter); +int ihk_mc_perfctr_stop(int counter); +#else int ihk_mc_perfctr_start(unsigned long counter_mask); int ihk_mc_perfctr_stop(unsigned long counter_mask); +#endif/*POSTK_DEBUG_TEMP_FIX_30*/ int ihk_mc_perfctr_fixed_init(int counter, int mode); int ihk_mc_perfctr_reset(int counter); int ihk_mc_perfctr_set(int counter, long value); diff --git a/lib/include/mc_perf_event.h b/lib/include/mc_perf_event.h index 8cdacde1..f22093c9 100644 --- a/lib/include/mc_perf_event.h +++ b/lib/include/mc_perf_event.h @@ -1,6 +1,10 @@ +/* mc_perf_event.h COPYRIGHT FUJITSU LIMITED 2016 */ #ifndef MC_PERF_EVNET_H #define MC_PERF_EVENT_H +#ifdef POSTK_DEBUG_TEMP_FIX_32 +#include +#endif /*POSTK_DEBUG_TEMP_FIX_32*/ #include struct perf_event_attr; diff --git a/lib/include/memory.h b/lib/include/memory.h index 7bb380bc..063103d9 100644 --- a/lib/include/memory.h +++ b/lib/include/memory.h @@ -9,6 +9,7 @@ /* * HISTORY */ +/* memory.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef __HEADER_GENERIC_MEMORY_H #define __HEADER_GENERIC_MEMORY_H @@ -45,6 +46,15 @@ int setlong_user(long *dst, long data); int setint_user(int *dst, int data); int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz); int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz); +#ifdef POSTK_DEBUG_ARCH_DEP_27 +struct thread; +int search_free_space(struct thread *thread, size_t len, intptr_t hint, + int pgshift, intptr_t *addrp); +#endif /* POSTK_DEBUG_ARCH_DEP_27 */ + +#ifdef POSTK_DEBUG_TEMP_FIX_52 /* supports NUMA for memory area determination */ +int is_mckernel_memory(unsigned long phys); +#endif /* POSTK_DEBUG_TEMP_FIX_52 */ #endif diff --git a/lib/include/types.h b/lib/include/types.h index 703a706c..a8cd3d87 100644 --- a/lib/include/types.h +++ b/lib/include/types.h @@ -9,6 +9,7 @@ /* * HISTORY */ +/* types.h COPYRIGHT FUJITSU LIMITED 2015-2016 */ #ifndef TYPES_H #define TYPES_H diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7846226a..1544b861 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1,3 +1,4 @@ +/* vsprintf.c COPYRIGHT FUJITSU LIMITED 2015-2016 */ /* * linux/lib/vsprintf.c * @@ -904,12 +905,19 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) char *str, *end, c; int read; struct printf_spec spec = {0}; +#ifdef POSTK_DEBUG_TEMP_FIX_28 /* vsnprintf size excess return value fix. */ + int ret = 0; +#endif /* POSTK_DEBUG_TEMP_FIX_28 */ /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ if (unlikely((int) size < 0)) { /* There can be only one.. */ +#ifdef POSTK_DEBUG_TEMP_FIX_28 /* vsnprintf size excess return value fix. */ + return ret; +#else /* POSTK_DEBUG_TEMP_FIX_28 */ return 0; +#endif /* POSTK_DEBUG_TEMP_FIX_28 */ } str = buf; @@ -1057,14 +1065,29 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) } if (size > 0) { +#ifdef POSTK_DEBUG_TEMP_FIX_28 /* vsnprintf size excess return value fix. */ + if (str < end) { + *str = '\0'; + ret = str - buf; + } + else { + end[-1] = '\0'; + ret = end - buf - 1; + } +#else /* POSTK_DEBUG_TEMP_FIX_28 */ if (str < end) *str = '\0'; else end[-1] = '\0'; +#endif /* POSTK_DEBUG_TEMP_FIX_28 */ } /* the trailing null byte doesn't count towards the total */ +#ifdef POSTK_DEBUG_TEMP_FIX_28 /* vsnprintf size excess return value fix. */ + return ret; +#else /* POSTK_DEBUG_TEMP_FIX_28 */ return str-buf; +#endif /* POSTK_DEBUG_TEMP_FIX_28 */ } EXPORT_SYMBOL(vsnprintf);