Compare commits

..

157 Commits

Author SHA1 Message Date
faa357d5a6 Merge "configure.ac: Update version number to 1.5.0-knl+hfi" into development+unimap+hfi+OFP 2018-06-21 02:39:43 +00:00
653aba17a1 mcreboot: load kernel modules from under /tmp
Change-Id: I81a8c451b6dd556a00699a2c8d0c7df5a99e4ea2
2018-06-20 20:53:00 +09:00
7736e25ca4 mpimcexec: fix empty ${COMMAND} check
Change-Id: I9e37e952fb756a4aafb4b2e218844120fe59af7b
2018-06-20 20:50:33 +09:00
73d16a9d79 configure.ac: Update version number to 1.5.0-knl+hfi
Change-Id: I9d36bcfe4b64a772f6492e39a1466a2e73ddd682
2018-06-20 17:07:30 +09:00
922bd7e6eb mpimcexec: use PJM_PROC_BY_NODE if available
Change-Id: Id8991f78e4d3bdfbb20adf202b43762a0d915c47
2018-06-20 15:18:53 +09:00
0d99072109 mpimcexec: man page proof-reading
Change-Id: I58223dd86e17fa896fe3e258d2dc2e5b881a0072
2018-06-18 16:31:42 +09:00
3ced3f6080 mcexec: Options -m and -M are described in man page
Change-Id: Ie4a860c8753af654ee842b16aabb9620e68f71a1
2018-06-18 15:00:29 +09:00
d9ff940528 mpimcexec: Man page
Change-Id: I99ea2821500cc1cfadc912d93c88d308b92ed9cf
2018-06-18 14:59:40 +09:00
cd63ec877d mpimcexec: Error handling is added
Change-Id: Id4e94adad2afff324b154d0c8be270ecc7568bab
2018-06-18 14:59:18 +09:00
6c0bb9e576 HFI1: Range-check proc->fd_priv_table[]
sockioctl01.c in LTP calls ioctl(1025, ...) and causes kernel page-fault without
the range-check.

Change-Id: I4117783e20107f274c0857b09745f12a5cc5ce2f
2018-06-13 00:31:44 +09:00
ca9894108b OFP: mpimcexec: use MPI_LOCALNRANKS for ppn if available 2018-06-13 00:31:44 +09:00
3f26e44f85 mremap: Don't premap destination vm_range
mremap works in the following steps:
(1) Unmap the destination memory area
(2) Create a new vm_range with add_process_memory_range
(3) Move the PTEs of the source range to the destination range by using move_pte_range

The problem is that step (3) expects the destination doesn't have any physical pages,
but step (2) premaps the destination when the optimization of premapping anonymous
map is turned on.

Change-Id: Ieeebd799b7169b9a6f6f658c204c31f49817030f
2018-06-13 00:31:44 +09:00
bacfb0c2b9 OFP: mpimcexec wrapper around mpirun for OFP users 2018-06-13 00:31:43 +09:00
09f63483cc OFP: temporary ANON mmap() rewrite 2018-06-13 00:31:43 +09:00
2f0c2aae9e OFP: avoid drop_caches in mcreboot 2018-06-13 00:31:43 +09:00
f7b277a623 HFI1: use ihk_mc_pt_lookup_fault_pte() in SDMA/exp receive 2018-06-13 00:31:43 +09:00
a3aa96af19 MM: introduction of ihk_mc_pt_lookup_fault_pte() 2018-06-13 00:31:43 +09:00
91d732308d HFI1: shorten lock held spin for SDMA status changes 2018-06-13 00:31:43 +09:00
166c6105ef queued_spin_lock: fix compatibility with Linux 2018-06-13 00:31:43 +09:00
5a2f8388a6 HFI1: handle Linux queued_spin_locks in the receive path as well 2018-06-13 00:31:42 +09:00
8164b63fc2 HFI1: port to IFS 10.7 rpv1 and support queued_spin_lock in Linux 3.10.0-693.11.6 2018-06-13 00:31:42 +09:00
af22ce62d2 HFI1: clean up and eliminate dead code in user SDMA 2018-06-13 00:31:42 +09:00
2eca75ead8 HFI1: clean up dead code in file ops 2018-06-13 00:31:42 +09:00
22992780cf HFI1: use kmalloc_cache_free() in clear_tid_node() for TID nodes 2018-06-13 00:31:42 +09:00
3043591e9a hfi1_user_exp_rcv_overlapping(): fix return value when overlapping 2018-06-13 00:31:42 +09:00
7e7c0f9ed3 init_process_vm(): remove vm_range_numa_policy_list (merge fix) 2018-06-13 00:31:42 +09:00
7193f165cc HFI1: fix page border iteration bug in hfi1_user_exp_rcv_setup() 2018-06-13 00:31:42 +09:00
c8c42576fd HFI1: increase lock timeout in sdma_send_txlist() 2018-06-13 00:31:42 +09:00
0412e1fcc6 HFI1: add generated user_sdma_request and user_sdma_txreq headers 2018-06-13 00:31:41 +09:00
238e346586 HFI1: use DWARF generated headers for user_sdma_request and user_sdma_txreq 2018-06-13 00:31:41 +09:00
0e57c715ad HFI1: look at DW_AT_upper_bound for resolving array size from DWARF info 2018-06-13 00:31:41 +09:00
3facd3dcca HFI1: release lock in sdma_send_txlist() when SDMA ring is full 2018-06-13 00:31:41 +09:00
ec5328de69 HFI1: refactor sdma_select_user_engine() 2018-06-13 00:31:41 +09:00
880dd6ddb2 page_fault_handler(): enable on-demand mapping of Linux ioremap area 2018-06-13 00:31:41 +09:00
898708b8b4 spinlock: rewrite spinlock to use Linux ticket head/tail format 2018-06-13 00:31:41 +09:00
b08331b21a ihk_hfi1_common.h: use IRQ restore unlock in spin_unlock 2018-06-13 00:31:41 +09:00
c196c996dd HFI: add dd to generated sdma_engine 2018-06-13 00:31:41 +09:00
20e179f6dc sdma_select_user_engine(): refactor selection code 2018-06-13 00:31:40 +09:00
32fbc015f5 HFI1: eliminate lots of dead code 2018-06-13 00:31:40 +09:00
558c250bb3 HFI1: generate headers for sdma_state and sdma_engine structures 2018-06-13 00:31:40 +09:00
96ea2d3658 dwarf-extract: support enumerations 2018-06-13 00:31:40 +09:00
9c91298ccf do_munmap(): hook to HFI1 deferred unmap 2018-06-13 00:31:40 +09:00
b08da83a51 hfi1_file_ioctl(): execute HFI1_IOCTL_TID_INVAL_READ locally 2018-06-13 00:31:40 +09:00
fcc8310454 HFI1: track receive TIDs in a tree 2018-06-13 00:31:40 +09:00
96b8b30516 MM: facility for deferred munmap()
Conflicts:
	kernel/process.c
2018-06-13 00:31:40 +09:00
521e0dc707 HFI1: add a bunch of fields to hfi1_devdata and hfi1_filedata for receive TID handling, do necessary mappings in hfi1_map_device_addresses() 2018-06-13 00:31:40 +09:00
e2e773d883 HFI: fix tidinfo and length calculation in program_rcvarray() 2018-06-13 00:31:39 +09:00
04d22d90a3 do_mmap(): debug message cosmetics 2018-06-13 00:31:39 +09:00
f6405081a6 page_fault_handler(): map Linux ioremap addresses on demand (disabled) 2018-06-13 00:31:39 +09:00
5bea237581 HFI1: make kmalloc caches per-CPU and pre-allocate at boot time 2018-06-13 00:31:39 +09:00
33ad55e72b kmalloc_cache_prealloc(): specify nr_elems as argument 2018-06-13 00:31:39 +09:00
6848c2ecf7 HFI1: move tid_rb_node to header 2018-06-13 00:31:39 +09:00
79f9a2d31a HFI1: don't print at open() time 2018-06-13 00:31:39 +09:00
2900ce20f7 HFI1: hfi1_unmap_device_addresses() at process terminate time 2018-06-13 00:31:39 +09:00
002b78372d open(): ignore /proc/sys/vm/overcommit_memory 2018-06-13 00:31:38 +09:00
5fce5e4e3c hfi1 generated headers: add missing filedata file 2018-06-13 00:31:38 +09:00
7a1ad31183 HFI: call hfi1_map_device_addresses() at initialization time
Conflicts:
	kernel/syscall.c
2018-06-13 00:31:38 +09:00
54bdb3419d hfi1 generated headers:
- split headers into one file per struct
 - add filedata
 - fix s/modprobe/modinfo/ for guessed .ko path
2018-06-13 00:31:38 +09:00
03fed4d1c8 automatically generate hfi structs from dwarf info 2018-06-13 00:31:38 +09:00
6279f69f5c compiler.h: take in recent linux updates for newer gcc support
Had to remove from original compiler-gcc:
 - things that deal with types, e.g. READ_ONCE macro and friends;
 - #define barrier(). This one would be better there at some point.

hfi1: remove ACCESS_ONCE from hfi1 header
2018-06-13 00:31:38 +09:00
6959d5ead4 HFI: port to SFI driver version 10.5.1.0.2 2018-06-13 00:31:38 +09:00
a5aa68744f hfi1: use kmalloc_cache for tid_rb_node allocations 2018-06-13 00:31:38 +09:00
89c5aaa9e9 hfi1_user_exp_rcv_setup(): rewrite main loop 2018-06-13 00:31:37 +09:00
15422d886f hif1_file_ioctl(): use dkprintf() 2018-06-13 00:31:37 +09:00
f139bef0cb mmap(): remove force large page extension (meant to be RESET) 2018-06-13 00:31:37 +09:00
de82cf8779 hfi1/user_exp_rcv/setup: keep track of position within page
ihk_mc_pt_lookup_pte + pte_get_phys will get us the physical address
for the start of the page we're looking at.
Re-offset it by position within buffer.
2018-06-13 00:31:37 +09:00
662895c020 hfi1/user_exp_rcv: explicitely call hfi1_map_device_addresses
There were cases where nobody else did this mapping for us
2018-06-13 00:31:37 +09:00
d23939da8c process/vm: fix lookup_process_memory_range (again)
That optimistically going left was a more serious bug than just
last iteration, we could just pass by a match and continue down
the tree if the match was not a leaf.

Fix the actual algorithm issue

Conflicts:
	kernel/process.c
2018-06-13 00:31:37 +09:00
67529f21ff hfi1: replace true/false defines by stddef include 2018-06-13 00:31:37 +09:00
5c11ff0950 process/vm: fix lookup_process_memory_range with small start address
Cherry-picked from 6370520e

Conflicts:
	kernel/process.c
2018-06-13 00:31:37 +09:00
ce4eb0d409 hfi1/user_exp_rcv/setup: add access_ok check 2018-06-13 00:31:36 +09:00
04434320fc hfi1/user_exp_rcv/setup: do not skip over pages
If the vaddr we consider is not at the start of a page, we could skip
over (smaller, not contigous) areas.

For example consider this segment of virtual memory:
[ 2MB | 4k | 4k | ... ]
Starting at 1MB offset, we would get a pgsize of 2MB so would skip
straight over 1MB worth of 4k pages.
2018-06-13 00:31:36 +09:00
50fafa6d71 hfi1/user_exp_rcv/setup: use cache_alloc for tidlist 2018-06-13 00:31:36 +09:00
f5ced648ef hfi1/user_exp_rcv: rework main loop
New loop now takes into account pages not physically contiguous.
Also some minor improvements, e.g. make the spin_lock used more locally,
reuse a group we had if we had one, etc.
2018-06-13 00:31:36 +09:00
0f8f88ca46 hfi1/user_exp_rcv/invalid: Remove function
user_exp_rcv_invalid is only used together with the mmu cache
(its purpose is the delayed freeing of tids that were invalidated in cache)

Since we do not use that cache, the function can go
2018-06-13 00:31:36 +09:00
e99f19e812 hfi1/user_exp_rcv/setup: set length in tidinfo
This was dropped early on by mistake/excessive haste, it's actually
pretty useful.
2018-06-13 00:31:36 +09:00
9a36e5d213 hfi1/user_exp_rcv/setup: increment phys appropriately
Old code was always registering the same section with different size,
instead of properly covering the requested map
2018-06-13 00:31:36 +09:00
4816f27639 hfi1/user_exp_rcv/setup: split into multiple tids
Do not round up to next power of two, but issue multiple requests
if necessary (e.g. 260k would be 256 + 4k in two registrations)
2018-06-13 00:31:36 +09:00
9c0b8aa812 mcctrl/control.c: fix debug print types 2018-06-13 00:31:36 +09:00
23f178d718 hfi1/user_exp_rcv/clear: implement TID_FREE ioctl 2018-06-13 00:31:36 +09:00
159c18b98b hfi1/ioctl: only forward ioctl if hfi1_file_ioctl didn't handle it
Conflicts:
	kernel/syscall.c
2018-06-13 00:31:35 +09:00
1847a3ac11 hfi1/user_exp_rcv/setup: cleanup locks/groups usage 2018-06-13 00:31:35 +09:00
15b16ffbbb hfi1/user_exp_rcv/setup: map is noop, skip it
In the original driver's dma.c hfi1_dma_map_single just passes
the physical address back, so directly use that.
2018-06-13 00:31:35 +09:00
e64d89cd48 hfi: bases for user_exp_rcv
This implements a skeleton setup function and call it on ioctl

Many missing points:
 - missing pci mapping to make setup work
 - no clear (passed to linux, so will likely bug out)
 - missing locks/safe-guards

Conflicts:
	kernel/Makefile.build.in
2018-06-13 00:31:35 +09:00
7366da4390 Fix other warnings
Most were harmless, but the change to ACCESS_ONCE from volatile
cast is probably useful.
Expanding macro, we basically went from:
    m = (volatile struct sdma_vl_map *)dd->sdma_map;
to
    m = *(volatile struct sdma_vl_map **)&(dd->sdma_map);
i.e. the explicit lookup is at a different level.
2018-06-13 00:31:35 +09:00
2dc85ee417 user_sdma: fix use of uninitialized variable (vl)
This defines a single field in hfi1_pportdata, getting offset
from dwarf headers -- need to compute that at configure time
2018-06-13 00:31:35 +09:00
73cc07f98e ioctl() investigation - TO RESET 2018-06-13 00:31:35 +09:00
815e2244ca HFI1: minor change of declarations 2018-06-13 00:31:34 +09:00
163af73554 HFI1: properly iterate iovecs according to underlying page sizes 2018-06-13 00:31:34 +09:00
fd316f3ca3 HFI1: pass per-CPU txreq_cache to user_sdma_send_pkts() 2018-06-13 00:31:34 +09:00
122588bc4d mcexec: --enable-hfi1 to runtime enable/disable HFI1 driver
Conflicts:
	executer/user/mcexec.c
2018-06-13 00:31:34 +09:00
70238982c2 HFI1: use embedded kmalloc cache for req->tids (fixes AllReduce hang) 2018-06-13 00:31:34 +09:00
5b5191ef64 HFI1: move txreq kmalloc cache header into CPU local variable 2018-06-13 00:31:34 +09:00
a65faeaed4 kmalloc cache: embed cache pointer into kmalloc_header
Conflicts:
	kernel/mem.c
2018-06-13 00:31:34 +09:00
4dea1842e0 kmalloc cache: embed cache pointer into kmalloc_header
Conflicts:
	kernel/mem.c
2018-06-13 00:31:34 +09:00
5353b11f90 HFI1: disable kmalloc cache for req->tids (AllReduce fails otherwise) 2018-06-13 00:31:34 +09:00
abdbf96254 HFI1: use process rank for SDMA engine selection 2018-06-13 00:31:33 +09:00
bd170e63ba kmalloc cache refactor and pre-alloc in HFI1 open() 2018-06-13 00:31:33 +09:00
d35fa16417 HFI1: more detailed profiling (disabled by default) 2018-06-13 00:31:33 +09:00
6406a0df6b HFI1: compute SDMA pkt length taking large pages into account 2018-06-13 00:31:33 +09:00
52e8f03b4b HFI1: store base physical address in iovec if physically contiguous 2018-06-13 00:31:33 +09:00
b071a3f32c HFI1: use fast_memcpy() in header fillings
Conflicts:
	kernel/user_sdma.c
2018-06-13 00:31:33 +09:00
90258f00bd HFI1: use generic kmalloc cache for user_sdma_txreqs and req tids 2018-06-13 00:31:33 +09:00
28eb649056 Generic lock-free kmalloc cache implementation
Conflicts:
	kernel/mem.c
2018-06-13 00:31:33 +09:00
744ebacf65 HFI1: more pre-allocation in txreq cache 2018-06-13 00:31:33 +09:00
62e438a0aa HFI1: do device ioremap() mappings in per-process fashion 2018-06-13 00:31:32 +09:00
5ac582a678 user_sdma_send_pkts(): unlikely() around slow path condition 2018-06-13 00:31:32 +09:00
51bc28acca sdma_select_user_engine(): hash on CPU number 2018-06-13 00:31:32 +09:00
c43654d69b user_sdma_send_pkts(): handle page sizes correctly 2018-06-13 00:31:32 +09:00
c1d2db6a73 fixed sdma_vl_map, just in case it will be used in the future 2018-06-13 00:31:32 +09:00
aeef55d1b0 kmalloc(): try to get from remote_free list when regular is empty 2018-06-13 00:31:32 +09:00
6e289e8d9f HFI1: txreq cache and profiling 2018-06-13 00:31:32 +09:00
3b5363c533 HFI1: use original length calculation in sdma_send_pkts()
Conflicts:
	kernel/include/hfi1/sdma.h
2018-06-13 00:31:32 +09:00
60f6862db2 HFI1: use local write if private data is present; fix lenght alignment 2018-06-13 00:31:31 +09:00
39deff4e10 HFI1: working but a bit slow 2018-06-13 00:31:31 +09:00
7f03c18d4d Real run test version (update_tail, kregbase+offset crash) 2018-06-13 00:31:31 +09:00
640dba627f Added debugging output. Bugfixes in user_sdma_send_pkts() and sdma_send_txreq(). 2018-06-13 00:31:31 +09:00
ae368d97d4 Implemented a replacement for sdma_txadd_page()
Conflicts:
	kernel/user_sdma.c
2018-06-13 00:31:31 +09:00
99c216d91e HFI1: fix kregbase/piobase types to avoid warnings 2018-06-13 00:31:31 +09:00
3c357dc30a HFI1: fix completion mapping 2018-06-13 00:31:31 +09:00
37866e61ab HFI1: map completion queues 2018-06-13 00:31:31 +09:00
076e6b9b12 Enabled _sdma_txadd_daddr() 2018-06-13 00:31:30 +09:00
fa6db686b4 Corrected spin_lock_irqsave() spin_unlock_irqrestore() definitions
Conflicts:
	kernel/include/hfi1/ihk_hfi1_common.h
2018-06-13 00:31:30 +09:00
74a636a612 Updated structs to use completion{} and wait_queue_head_t{} and added struct size checkes in hfi1_aio_write() 2018-06-13 00:31:30 +09:00
1c4a6568e6 Updated sdma.h (fixed struct sdma_engine size) 2018-06-13 00:31:30 +09:00
7d2e2f93b0 HFI1: map piobase and rcvarray_wc 2018-06-13 00:31:30 +09:00
7005110697 Updated and confirmed struct iowait{} and struct hfi1_user_sdma_pkt_q {}
Conflicts:
	kernel/include/hfi1/ihk_hfi1_common.h
2018-06-13 00:31:30 +09:00
c4ca4ae3ab Updated struct hfi1_devdata and confirmed its size 2018-06-13 00:31:30 +09:00
b024a486b9 Updated hfi1_filedata {} and confirmed its size against the original on Linux
Conflicts:
	kernel/include/hfi1/hfi.h
2018-06-13 00:31:30 +09:00
fe4c461f2f Updated kcalloc/kmalloc calls and enabled sdma_select_user_engine dependencies
Conflicts:
	kernel/include/hfi1/ihk_hfi1_common.h
2018-06-13 00:31:29 +09:00
b60a980088 hfi1_user_sdma_process_request(): map HFI1 kregbase 2018-06-13 00:31:29 +09:00
ec66229063 HFI1: adjust sdma_select_user_engine()
Conflicts:
	kernel/user_sdma.c
2018-06-13 00:31:29 +09:00
b875b5186f spinlock: make increment compatible with XPPSL Linux (v3.10) 2018-06-13 00:31:29 +09:00
5cf884ef41 Updated TODO tags and struct hfi1_user_sdma_pkt_q 2018-06-13 00:31:29 +09:00
64e2639adc * The relevant files have been modified in order to compile with McKernel.
Conflicts:
	kernel/Makefile.build.in
2018-06-13 00:31:29 +09:00
14b360e867 * Added the original files of the driver as a basis for comparison
Conflicts:
	kernel/include/hfi1/sdma.h
	kernel/sdma.c
	kernel/user_sdma.c
2018-06-13 00:31:29 +09:00
4a0e389953 HFI1: comments to keep in mind
Conflicts:
	kernel/include/hfi1/sdma.h
	kernel/sdma.c
	kernel/user_sdma.c
2018-06-13 00:31:28 +09:00
34363c2b68 close(): clear fd_priv_table 2018-06-13 00:31:28 +09:00
8a1d756cb1 Added private_data structure in process
Conflicts:
	executer/user/mcexec.c
	kernel/include/process.h
	kernel/process.c
2018-06-13 00:31:28 +09:00
e36abe57e7 open(): check on private_data for /dev/hfi 2018-06-13 00:31:28 +09:00
b2c8cc50dc open(): record private_data
Conflicts:
	kernel/syscall.c
2018-06-13 00:31:28 +09:00
b9b4a4fe36 search_free_space(): manage region->map_end internally
Cherry-pick of 87f72548a232a1626f2ca103da7f1ce62d139359

Conflicts:
	kernel/syscall.c
2018-06-13 00:31:28 +09:00
4b652c9353 atobytes(): restore postfix before return 2018-06-13 00:31:28 +09:00
60ac94cbb9 process/vm/access_ok: fix edge checks.
Add check for start/end being larger than the range we're checking.
Fix corner case where the access_check() was done on last vm range, and
we would be looking beyond last element (null deref)
2018-06-13 00:31:28 +09:00
42bbf5f2a4 process/vm: implement access_ok() 2018-06-13 00:31:27 +09:00
e29a40331d partitioned execution: pass process rank to LWK
Cherry-pick of d2d134d5e6a4b16a34d55d31b14614a2a91ecf47

Conflicts:
	kernel/include/process.h
2018-06-13 00:31:27 +09:00
655de2cd82 ihk_mc_get_linux_kernel_pgt(): add declaration
Cherry-pick of caff967a442907dd75f8cd878b9f2ea7608c77b2
2018-06-13 00:31:27 +09:00
205747594b Exclude areas not assigned to Mckernel from direct map of all phys. memory
It's enabled by adding -s to mcreboot.sh.

Cherry-pick of the following commit:

commit b5c13ce51a5a4926c2cf11c817cd0d369ac4402d
Author: Katsuya Horigome <katsuya.horigome.rj@ps.hitachi-solutions.com>
Date:   Mon Nov 20 09:40:41 2017 +0900

    Include measures to prevent memory destruction on Linux side (This is rebase commit for merging to development+hfi)
2018-06-13 00:31:27 +09:00
21f9a1ea33 eclair: fix MAP_KERNEL_START and apply Fujitsu's proposals
(1) Cherry-pick of 644afd8b45fc253ad7b90849e99aae354bac5b17
(2) Pass length to functions with arguments of variable length
    * POSTK_DEBUG_ARCH_DEP_38
(3) Separate architecture dependent functions/structures
    * POSTK_DEBUG_ARCH_DEP_34
(4) Fix include path
    * POSTK_DEBUG_ARCH_DEP_76
(5) Include config.h
    * POSTK_DEBUG_ARCH_DEP_33
2018-06-13 00:31:27 +09:00
aed099fbcb kmalloc_header: use signed integer for target CPU id
Cherry-pick of bdb2d4d8fa94f9c0268cdfdb21af1a2a5c2bcae5
2018-06-13 00:31:27 +09:00
48515970a0 ihk_mc_get_processor_id(): return -1 for non-McKernel CPUs
Cherry-pick of c45641e97add9fde467844d9272f2626cf4317de
2018-06-13 00:31:27 +09:00
b888f31b30 Map LWK TEXT to the end of Linux modules section (0xFFFFFFFFFE800000)
Cherry-pick of b9827b25883a9622058cb78006e705f09eaf9a84
2018-06-13 00:31:27 +09:00
7982008b5b virt_to_phys(): fix debug messages
Cherry-pick of 46eb3b73dac75b28ead62476f017ad0f29ec4b0a
2018-06-13 00:31:26 +09:00
f658173269 init_normal_area(): fix mapping start physical address
Cherry-pick of 2d3006818473af50c38a3d0e33595b4e74588004
2018-06-13 00:31:26 +09:00
ca7edf1df8 mem: make McKernel kernel heap virtual addresses Linux compatible
Cherry-pick of e5334c646d2dc6fb11d419918d8139a0de583fde
2018-06-13 00:31:26 +09:00
9a5f3ad4e6 mem: map Linux kernel virtual addresses properly
Cherry-pick of 5f37e846c3d70e5d5c0baea5b8eb8ceee3411c88
2018-06-13 00:31:26 +09:00
cfbab0ee82 move McKernel out of Linux kernel virtual
Cherry-pick of 88a8277f17da62d349b4340b66d37482344db649
2018-06-13 00:31:26 +09:00
2127 changed files with 48679 additions and 137328 deletions

41
.gitignore vendored
View File

@ -1,4 +1,3 @@
*~
*.o
*.elf
*.bin
@ -9,36 +8,10 @@
Module.symvers
*.order
.tmp_versions
old_timestamp
CMakeFiles
CMakeCache.txt
Makefile
!test/*/*/Makefile
!test/signalonfork+wait/Makefile
!test/perf_overflow/Makefile
!test/*/*/*.cmd
Kbuild
cmake_install.cmake
config.h
mcstop+release.sh
mcreboot.sh
mcreboot.1
mcoverlay-destroy.sh
mcoverlay-create.sh
kernel/mckernel.img
kernel/include/swapfmt.h
executer/user/vmcore2mckdump
executer/user/ql_talker
executer/user/mcexec.1
executer/user/mcexec
executer/user/libsched_yield.so.1.0.0
executer/user/libsched_yield.so
executer/user/libmcexec.a
executer/user/libldump2mcdump.so
executer/user/eclair
tools/mcstat/mcstat
/_CPack_Packages
/CPackSourceConfig.cmake
CPackConfig.cmake
/build
mckernel-*.tar.gz
elfboot/elfboot
elfboot/elfboot_test
linux/executer/mcexec
linux/mod_test*
linux/target
kernel/script/dwarf-extract-struct

6
.gitmodules vendored
View File

@ -1,6 +0,0 @@
[submodule "ihk"]
path = ihk
url = https://github.com/RIKEN-SysSoft/ihk.git
[submodule "executer/user/lib/libdwarf/libdwarf"]
path = executer/user/lib/libdwarf/libdwarf
url = https://github.com/bgerofi/libdwarf.git

View File

@ -1,262 +0,0 @@
cmake_minimum_required(VERSION 2.6)
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type: Debug Release..." FORCE)
endif (NOT CMAKE_BUILD_TYPE)
enable_language(C ASM)
project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.0")
# See "Fedora Packaging Guidlines -- Versioning"
set(MCKERNEL_RELEASE "0.7")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
if(DEFINED SYSCONF_INSTALL_DIR)
set(CMAKE_INSTALL_SYSCONFDIR "${SYSCONF_INSTALL_DIR}")
endif()
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
set(BUILD_TARGET "smp-x86" CACHE STRING "Build target: smp-x86 | smp-arm64")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(BUILD_TARGET "smp-arm64" CACHE STRING "Build target: smp-x86 | smp-arm64")
endif()
if (BUILD_TARGET STREQUAL "smp-x86")
set(ARCH "x86_64")
elseif (BUILD_TARGET STREQUAL "smp-arm64")
set(ARCH "arm64")
endif()
include(GNUInstallDirs)
include(CMakeParseArguments)
include(Kbuild)
include(CheckCCompilerFlag)
include(AutoconfHelper)
CHECK_C_COMPILER_FLAG(-Wno-implicit-fallthrough IMPLICIT_FALLTHROUGH)
if(IMPLICIT_FALLTHROUGH)
set(EXTRA_WARNINGS "-Wno-implicit-fallthrough")
endif(IMPLICIT_FALLTHROUGH)
# build options
set(CFLAGS_WARNING "-Wall" "-Wextra" "-Wno-unused-parameter" "-Wno-sign-compare" "-Wno-unused-function" ${EXTRA_WARNINGS} CACHE STRING "Warning flags")
add_compile_options(${CFLAGS_WARNING})
option(ENABLE_WERROR "Enable -Werror" OFF)
if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
add_definitions(-DIHK_IKC_USE_LINUX_WORK_IRQ)
endif()
if (BUILD_TARGET STREQUAL "smp-arm64")
foreach(i RANGE 1 120)
add_definitions(-DPOSTK_DEBUG_ARCH_DEP_${i} -DPOSTK_DEBUG_TEMP_FIX_${i})
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DPOSTK_DEBUG_ARCH_DEP_${i} -DPOSTK_DEBUG_TEMP_FIX_${i}")
endforeach()
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_64K_PAGES\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_64K_PAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_VA_BITS\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_VA_BITS OUTPUT_STRIP_TRAILING_WHITESPACE)
message("Host kernel CONFIG_ARM64_64K_PAGES=${CONFIG_ARM64_64K_PAGES}")
message("Host kernel CONFIG_ARM64_VA_BITS=${CONFIG_ARM64_VA_BITS}")
if(CONFIG_ARM64_64K_PAGES STREQUAL "y")
if(CONFIG_ARM64_VA_BITS STREQUAL 42)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=2 -DCONFIG_ARM64_VA_BITS=42 -DCONFIG_ARM64_64K_PAGES)
set(LINKER_SCRIPT "smp-arm64_type3.lds")
elseif(CONFIG_ARM64_VA_BITS STREQUAL 48)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=3 -DCONFIG_ARM64_VA_BITS=48 -DCONFIG_ARM64_64K_PAGES)
set(LINKER_SCRIPT "smp-arm64_type4.lds")
endif()
else(CONFIG_ARM64_64K_PAGES STREQUAL "y")
if(CONFIG_ARM64_VA_BITS STREQUAL 39)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=3 -DCONFIG_ARM64_VA_BITS=39)
set(LINKER_SCRIPT "smp-arm64_type1.lds")
elseif(CONFIG_ARM64_VA_BITS STREQUAL 48)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=4 -DCONFIG_ARM64_VA_BITS=48)
set(LINKER_SCRIPT "smp-arm64_type2.lds")
endif()
endif(CONFIG_ARM64_64K_PAGES STREQUAL "y")
endif()
set_property(CACHE BUILD_TARGET PROPERTY STRINGS smp-x86 smp-arm64)
# define MAP_KERNEL_START
set(tmpdir ${CMAKE_CURRENT_BINARY_DIR}/tmp.resolve_MODULES_END)
file(REMOVE_RECURSE ${tmpdir})
file(MAKE_DIRECTORY ${tmpdir})
file(WRITE ${tmpdir}/driver.c "#include <linux/module.h>\n")
file(APPEND ${tmpdir}/driver.c "unsigned long MAP_KERNEL_START = MODULES_END - (1UL << 23);\n")
file(WRITE ${tmpdir}/Makefile "obj-m := driver.o\n")
file(APPEND ${tmpdir}/Makefile "all:\n")
file(APPEND ${tmpdir}/Makefile "\tmake ${KBUILD_MAKE_FLAGS_STR} -C ${KERNEL_DIR} M=${tmpdir} modules\n")
execute_process(COMMAND make -C ${tmpdir})
execute_process(COMMAND bash -c "offset=`readelf -S ${tmpdir}/driver.ko | grep .data | sed 's/.* //g'`; echo $((0x$offset))"
OUTPUT_VARIABLE MAP_KERNEL_START_OFFSET OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "dd if=${tmpdir}/driver.ko bs=1 skip=${MAP_KERNEL_START_OFFSET} count=8 2>/dev/null | od -tx8 -Ax | head -1 | sed 's|.* |0x|g'"
OUTPUT_VARIABLE MAP_KERNEL_START OUTPUT_STRIP_TRAILING_WHITESPACE)
set(ENABLE_MEMDUMP ON)
option(ENABLE_PERF "Enable perf support" ON)
option(ENABLE_RUSAGE "Enable rusage support" ON)
option(ENABLE_QLMPI "Enable qlmpi programs" OFF)
option(ENABLE_UTI "Enable uti support" OFF)
option(ENABLE_UBSAN "Enable undefined behaviour sanitizer on mckernel size" OFF)
option(ENABLE_PER_CPU_ALLOC_CACHE "Enable per-CPU allocator cache (ThunderX2 workaround)" OFF)
find_package(PkgConfig REQUIRED)
set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ON)
find_library(LIBRT rt)
if (NOT LIBRT)
message(FATAL_ERROR "error: couldn't find librt")
endif()
find_library(LIBNUMA numa)
if (NOT LIBNUMA)
message(FATAL_ERROR "error: couldn't find libnuma")
endif()
find_library(LIBBFD bfd)
if (NOT LIBBFD)
message(FATAL_ERROR "error: couldn't find libbfd")
endif()
find_library(LIBIBERTY iberty)
if (NOT LIBIBERTY)
message(FATAL_ERROR "error: couldn't find libiberty")
endif()
find_library(LIBDWARF dwarf)
if (NOT LIBDWARF)
if (CMAKE_CROSSCOMPILING)
message(FATAL_ERROR "Could not find libdwarf.so, install libdwarf-devel to ${CMAKE_FIND_ROOT_PATH}")
endif()
message("WARNING: libdwarf will be compiled locally")
enable_language(CXX)
else()
# Note that libdwarf-devel provides /usr/include/libdwarf/dwarf.h
# but elfutils-devel provides /usr/include/dwarf.h
# while mcinspect.c performs "#include <dwarf.h>"
find_path(DWARF_H dwarf.h PATH_SUFFIXES libdwarf)
endif()
if (ENABLE_QLMPI)
find_package(MPI REQUIRED)
endif()
if (ENABLE_UTI)
pkg_check_modules(LIBSYSCALL_INTERCEPT REQUIRED libsyscall_intercept)
link_directories(${LIBSYSCALL_INTERCEPT_LIBRARY_DIRS})
endif()
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-([0-9]+)(.*))?" "\\1;\\2;\\3;\\5;\\6" LINUX_VERSION ${UNAME_R})
list(GET LINUX_VERSION 0 LINUX_VERSION_MAJOR)
list(GET LINUX_VERSION 1 LINUX_VERSION_MINOR)
list(GET LINUX_VERSION 2 LINUX_VERSION_PATCH)
list(GET LINUX_VERSION 3 LINUX_VERSION_RELEASE)
math(EXPR LINUX_VERSION_CODE "${LINUX_VERSION_MAJOR} * 65536 + ${LINUX_VERSION_MINOR} * 256 + ${LINUX_VERSION_PATCH}")
# compat with various install paths
set(BINDIR ${CMAKE_INSTALL_FULL_BINDIR})
set(SBINDIR ${CMAKE_INSTALL_FULL_SBINDIR})
set(ETCDIR ${CMAKE_INSTALL_PREFIX}/etc)
set(ROOTFSDIR "/rootfs")
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
set(KMODDIR "/lib/modules/${UNAME_R}/extra/mckernel")
set(MCKERNELDIR "${CMAKE_INSTALL_FULL_DATADIR}/mckernel/${BUILD_TARGET}")
else()
set(KMODDIR "${CMAKE_INSTALL_PREFIX}/kmod")
set(MCKERNELDIR "${CMAKE_INSTALL_PREFIX}/${BUILD_TARGET}/kernel")
endif()
set(prefix ${CMAKE_INSTALL_PREFIX})
# set rpath for everyone
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
# ihk: ultimately should support extrnal build, but add as subproject for now
if (EXISTS ${PROJECT_SOURCE_DIR}/ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
elseif (EXISTS ${PROJECT_SOURCE_DIR}/../ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "../ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
else()
set(IHK_SOURCE_DIR "ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
endif()
if (EXISTS ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR})
elseif (EXISTS /${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR /${IHK_SOURCE_DIR})
else()
message(FATAL_ERROR "Could not find ihk dir, or it does not contain CMakeLists.txt, either clone ihk or run git submodule update --init")
endif()
add_subdirectory(${IHK_SOURCE_DIR} ihk)
configure_file(config.h.in config.h)
# actual build section - just subdirs
add_subdirectory(executer/kernel/mcctrl)
add_subdirectory(executer/user)
add_subdirectory(kernel)
add_subdirectory(tools/mcstat)
add_subdirectory(tools/crash)
configure_file(scripts/mcreboot-smp.sh.in mcreboot.sh @ONLY)
configure_file(scripts/mcstop+release-smp.sh.in mcstop+release.sh @ONLY)
configure_file(scripts/mcreboot.1in mcreboot.1 @ONLY)
configure_file(scripts/eclair-dump-backtrace.exp.in eclair-dump-backtrace.exp @ONLY)
install(PROGRAMS
"${CMAKE_CURRENT_BINARY_DIR}/mcreboot.sh"
"${CMAKE_CURRENT_BINARY_DIR}/mcstop+release.sh"
DESTINATION "${CMAKE_INSTALL_SBINDIR}")
install(PROGRAMS
"${CMAKE_CURRENT_BINARY_DIR}/eclair-dump-backtrace.exp"
DESTINATION "${CMAKE_INSTALL_BINDIR}")
install(FILES "scripts/irqbalance_mck.in"
DESTINATION "${CMAKE_INSTALL_SYSCONFDIR}")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mcreboot.1"
DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")
configure_file(scripts/mckernel.spec.in scripts/mckernel.spec @ONLY)
set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${MCKERNEL_VERSION}")
set(CPACK_SOURCE_IGNORE_FILES "/.git/;/build;/CMakeCache.txt$;/CMakeFiles$;/Makefile$")
set(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};/;${IHK_FULL_SOURCE_DIR};/ihk;${CMAKE_BINARY_DIR}/scripts;/scripts")
set(CPACK_SOURCE_GENERATOR "TGZ")
include(CPack)
add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
# config report
message("-------------------------------")
message("Option summary")
message("-------------------------------")
message("Build type: ${CMAKE_BUILD_TYPE}")
message("Build target: ${BUILD_TARGET}")
message("IHK_SOURCE_DIR: ${IHK_SOURCE_DIR} (relative to mckernel source tree)")
message("UNAME_R: ${UNAME_R}")
message("KERNEL_DIR: ${KERNEL_DIR}")
message("SYSTEM_MAP: ${SYSTEM_MAP}")
message("VMLINUX: ${VMLINUX}")
message("KBUILD_C_FLAGS: ${KBUILD_C_FLAGS}")
message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")
message("ENABLE_WERROR: ${ENABLE_WERROR}")
message("ENABLE_UBSAN: ${ENABLE_UBSAN}")
message("ENABLE_LINUX_WORK_IRQ_FOR_IKC: ${ENABLE_LINUX_WORK_IRQ_FOR_IKC}")
message("ENABLE_PER_CPU_ALLOC_CACHE: ${ENABLE_PER_CPU_ALLOC_CACHE}")
message("-------------------------------")

View File

@ -1,70 +0,0 @@
Linux crash when offlining CPU (el7, hardware-specific)
=========================================================
On some hardware with el7 kernel, linux can crash due to a bug in the
irq handling when offlining CPUs (reserve cpu part of mcreboot)
Example stack trace:
```
[ 4147.052753] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040
[ 4147.060677] IP: [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.068226] PGD 1057e44067 PUD 105f1e7067 PMD 0
[ 4147.072935] Oops: 0000 [#1] SMP
[ 4147.076230] Modules linked in: mcctrl(OE) ihk_smp_x86_64(OE) ihk(OE) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_core
[ 4147.148619] dm_mirror dm_region_hash dm_log dm_mod sb_edac edac_core intel_powerclamp coretemp ext4 mbcache jbd2 intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ipmi_ssif glue_helper ablk_helper joydev iTCO_wdt iTCO_vendor_support cryptd ipmi_si ipmi_devintf ipmi_msghandler pcspkr wmi mei_me mei lpc_ich i2c_i801 sg ioatdma shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c mlx4_en sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm isci igb drm mlx4_core libsas ahci libahci scsi_transport_sas libata crct10dif_pclmul ptp crct10dif_common pps_core crc32c_intel dca i2c_algo_bit i2c_core devlink [last unloaded: ihk]
[ 4147.215370] CPU: 6 PID: 38 Comm: migration/6 Tainted: G OE ------------ T 3.10.0-693.2.2.el7.x86_64 #1
[ 4147.225672] Hardware name: SGI.COM C1104G-RP5/X9DRG-HF, BIOS 3.0 10/25/2013
[ 4147.232747] task: ffff880174689fa0 ti: ffff8801746ac000 task.ti: ffff8801746ac000
[ 4147.240278] RIP: 0010:[<ffffffff8102ce26>] [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.250275] RSP: 0018:ffff8801746afd30 EFLAGS: 00010046
[ 4147.255608] RAX: 0000000000000000 RBX: 000000000000004e RCX: 0000000000000000
[ 4147.262770] RDX: 0000000000000020 RSI: 000000000000005f RDI: 0000000000000023
[ 4147.269936] RBP: ffff8801746afd58 R08: 0000000000000001 R09: ffff88017f800490
[ 4147.277103] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000006
[ 4147.284269] R13: 0000000000000000 R14: ffff88085ca82500 R15: 000000000000005f
[ 4147.291429] FS: 0000000000000000(0000) GS:ffff88085fb80000(0000) knlGS:0000000000000000
[ 4147.299556] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4147.305326] CR2: 0000000000000040 CR3: 0000001059704000 CR4: 00000000001407e0
[ 4147.312490] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 4147.319659] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 4147.326827] Stack:
[ 4147.328857] ffff8808f43078c8 ffff8808f4307850 0000000000000286 ffff8808f4307701
[ 4147.336384] 0000000000000000 ffff8801746afd70 ffffffff81052a82 0000000200000000
[ 4147.343915] ffff8801746afd88 ffffffff81693ca3 0000000000000003 ffff8801746afdc0
[ 4147.351447] Call Trace:
[ 4147.353921] [<ffffffff81052a82>] native_cpu_disable+0x12/0x40
[ 4147.359795] [<ffffffff81693ca3>] take_cpu_down+0x13/0x40
[ 4147.365236] [<ffffffff81116899>] multi_cpu_stop+0xd9/0x100
[ 4147.370850] [<ffffffff811167c0>] ? cpu_stop_should_run+0x50/0x50
[ 4147.376983] [<ffffffff81116ab7>] cpu_stopper_thread+0x97/0x150
[ 4147.382942] [<ffffffff816a8fad>] ? __schedule+0x39d/0x8b0
[ 4147.388461] [<ffffffff810b909f>] smpboot_thread_fn+0x12f/0x180
[ 4147.394406] [<ffffffff810b8f70>] ? lg_double_unlock+0x40/0x40
[ 4147.400276] [<ffffffff810b098f>] kthread+0xcf/0xe0
[ 4147.405182] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[ 4147.411319] [<ffffffff816b4f58>] ret_from_fork+0x58/0x90
[ 4147.418893] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[ 4147.426524] Code: 81 fb 00 01 00 00 0f 84 8a 00 00 00 89 d8 65 44 8b 3c 85 20 c6 00 00 45 85 ff 78 e1 44 89 ff e8 91 31 10 00 48 63 15 7e 10 af 00 <48> 8b 70 40 48 c7 c7 80 71 cf 81 49 89 c6 48 83 c2 3f 48 c1 fa
[ 4147.450352] RIP [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.460135] RSP <ffff8801746afd30>
[ 4147.465154] CR2: 0000000000000040
```
This bug has been fixed upstream, but redhat will not backport the fixes.
You can work around the problem with a kpatch by backporting the three
following commits:
x86: irq: Get correct available vectors for cpu disable
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ac2a55395eddccd6e3e39532df9869d61e97b2ee
x86/irq: Check for valid irq descriptor in check_irq_vectors_for_cpu_disable()
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d97eb8966c91f2c9d05f0a22eb89ed5b76d966d1
x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable()
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cbb24dc761d95fe39a7a122bb1b298e9604cae15
Alternatively, since it is related to the irq configuration, it might
be possible to mitigate the issue by setting the irq affinities early
on and making sure none of the cpus that will be offlined have any irq
configured.

339
LICENSE
View File

@ -1,339 +0,0 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

84
Makefile.in Executable file
View File

@ -0,0 +1,84 @@
TARGET = @TARGET@
SBINDIR = @SBINDIR@
BINDIR = @BINDIR@
INCDIR = @INCDIR@
ETCDIR = @ETCDIR@
MANDIR = @MANDIR@
all: executer-mcctrl executer-mcoverlayfs executer-user mckernel mck-tools
executer-mcctrl:
+@(cd executer/kernel/mcctrl; $(MAKE) modules)
executer-mcoverlayfs:
+@(cd executer/kernel/mcoverlayfs; $(MAKE) modules)
executer-user:
+@(cd executer/user; $(MAKE))
mckernel:
+@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; $(MAKE)) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
mck-tools:
+@(cd tools/mcstat; $(MAKE))
install:
@(cd executer/kernel/mcctrl; $(MAKE) install)
@(cd executer/kernel/mcoverlayfs; $(MAKE) install)
@(cd executer/user; $(MAKE) install)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; $(MAKE) install) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@case "$(TARGET)" in \
smp-x86 | smp-arm64) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86_64/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86_64/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
install -m 755 arch/x86_64/tools/mpimcexec $(BINDIR)/mpimcexec; \
install -m 755 arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
install -m 755 arch/x86_64/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
install -m 755 arch/x86_64/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
mkdir -p -m 755 $(ETCDIR); \
install -m 644 arch/x86_64/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
install -m 644 arch/x86_64/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
mkdir -p -m 755 $(INCDIR); \
install -m 644 kernel/include/swapfmt.h $(INCDIR); \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86_64/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
install -m 644 arch/x86_64/tools/mpimcexec.1 $(MANDIR)/man1/mpimcexec.1; \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@(cd tools/mcstat/; $(MAKE) install)
clean:
@(cd executer/kernel/mcctrl; $(MAKE) clean)
@(cd executer/kernel/mcoverlayfs; $(MAKE) clean)
@(cd executer/user; $(MAKE) clean)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; $(MAKE) clean) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@(cd tools/mcstat; $(MAKE) clean)

540
NEWS.md
View File

@ -1,540 +0,0 @@
=============================================
What's new in version 1.7.0rc4 (Apr 15, 2020)
=============================================
----------------------
McKernel major updates
----------------------
1. arm64: Contiguous PTE support
2. arm64: Scalable Vector Extension (SVE) support
3. arm64: PMU overflow interrupt support
4. xpmem: Support large page attachment
5. arm64 port: Direct access to Mckernel memory from Linux
6. arm64 port: utility thread offloading, which spawns thread onto
Linux CPU
7. eclair: support for live debug
8. Crash utility extension
9. Replace mcoverlayfs with a soft userspace overlay
10. Build system is switched to cmake
11. Core dump includes thread information
------------------------
McKernel major bug fixes
------------------------
1. shmobj: Fix rusage counting for large page
2. mcctrl control: task start_time changed to u64 nsec
3. mcctrl: add handling for one more level of page tables
4. Add kernel argument to turn on/off time sharing
5. flatten_string/process env: realign env and clear trailing bits
6. madvise: Add MADV_HUGEPAGE support
8. mcctrl: remove in-kernel calls to syscalls
9. arch_cpu_read_write_register: error return fix.
10. set_cputime(): interrupt enable/disable fix.
11. set_mempolicy(): Add mode check.
12. mbind(): Fix memory_range_lock deadlock.
13. ihk_ikc_recv: Record channel to packet for release
14. Add set_cputime() kernel to kernel case and mode enum.
15. execve: Call preempt_enable() before error-exit
16. memory/x86_64: fix linux safe_kernel_map
17. do_kill(): fix pids table when nr of threads is larger than num_processors
18. shmget: Use transparent huge pages when page size isn't specified
19. prctl: Add support for PR_SET_THP_DISABLE and PR_GET_THP_DISABLE
20. monitor_init: fix undetected hang on highest numbered core
21. init_process_stack: change premapped stack size based on arch
22. x86 syscalls: add a bunch of XXat() delegated syscalls
23. do_pageout: fix direct kernel-user access
24. stack: add hwcap auxval
25. perf counters: add arch-specific perf counters
26. Added check of nohost to terminate_host().
27. kmalloc: Fix address order in free list
28. sysfs: use nr_cpu_ids for cpumasks (fixes libnuma parsing error on ARM)
29. monitor_init: Use ihk_mc_cpu_info()
30. Fix ThunderX2 write-combined PTE flag insanity
31. ARM: eliminate zero page mapping (i.e, init_low_area())
32. eliminate futex_cmpxchg_enabled check (not used and dereffed a NULL pointer)
33. page_table: Fix return value of lookup_pte when ptl4 is blank
34. sysfs: add missing symlinks for cpu/node
35. Make Linux handler run when mmap to procfs.
36. Separate mmap area from program loading (relocation) area
37. move rusage into kernel ELF image (avoid dynamic alloc before NUMA init)
38. arm: turn off cpu on panic
39. page fault handler: protect thread accesses
40. Register PPD and release_handler at the same time.
41. fix to missing exclusive processing between terminate() and
finalize_process().
42. perfctr_stop: add flags to no 'disable_intens'
43. fileobj, shmobj: free pages in object destructor (as opposed to page_unmap())
44. clear_range_l1, clear_range_middle: Fix handling contiguous PTE
45. do_mmap: don't pre-populate the whole file when asked for smaller segment
46. invalidate_one_page: Support shmobj and contiguous PTE
47. ubsan: fix undefined shifts
48. x86: disable zero mapping and add a boot pt for ap trampoline
49. rusage: Don't count PF_PATCH change
50. Fixed time processing.
51. copy_user_pte: vmap area not owned by McKernel
52. gencore: Zero-clear ELF header and memory range table
53. rpm: ignore CMakeCache.txt in dist and relax BuildRequires on cross build
54. gencore: Allocate ELF header to heap instead of stack
55. nanosleep: add cpu_pause() in spinwait loop
56. init_process: add missing initializations to proc struct
57. rus_vm_fault: always use a packet on the stack
58. process stack: use PAGE_SIZE in aux vector
59. copy_user_pte: base memobj copy on range & VR_PRIVATE
60. arm64: ptrace: Fix overwriting 1st argument with return value
61. page fault: use cow for private device mappings
62. reproductible builds: remove most install paths in c code
63. page fault: clear writable bit for non-dirtying access to shared ranges
64. mcreboot/mcstop+release: support for regular user execution
65. irqbalance_mck: replace extra service with service drop-in
66. do_mmap: give addr argument a chance even if not MAP_FIXED
67. x86: fix xchg() and cmpxchg() macros
68. IHK: support for using Linux work IRQ as IKC interrupt (optional)
69. MCS: fix ARM64 issue by using smp_XXX() functions (i.e., barrier()s)
70. procfs: add number of threads to stat and status
71. memory_range_lock: Fix deadlock in procfs/sysfs handler
72. flush instruction cache at context switch time if necessary
73. arm64: Fix PMU related functions
74. page_fault_process_memory_range: Disable COW for VM region with zeroobj
75. extend_process_region: Fall back to demand paging when not contiguous
76. munmap: fix deadlock with remote pagefault on vm range lock
77. procfs: if memory_range_lock fails, process later
78. migrate-cpu: Prevent migration target from calling schedule() twice
79. sched_request_migrate(): fix race condition between migration req and IRQs
80. get_one_cpu_topology: Renumber core_id (physical core id)
81. bb7e140 procfs cpuinfo: use sequence number as processor
82. set_host_vma(): do NOT read protect Linux VMA
===========================================
What's new in V1.6.0 (Nov 11, 2018)
===========================================
-----------------------------------------------
McKernel new features, improvements and changes
-----------------------------------------------
1. McKernel and Linux share one unified kernel virtual address space.
That is, McKernel sections resides in Linux sections spared for
modules. In this way, Linux can access the McKernel kernel memory
area.
2. hugetlbfs support
3. IHK is now included as a git submodule
4. Debug messages are turned on/off in per souce file basis at run-time.
5. It's prohibited for McKernel to access physical memory ranges which
Linux didn't give to McKernel.
6. UTI (capability to spawn a thread on Linux CPU) improvement:
* System calls issued from the thread are hooked by modifying
binary in memory.
---------------------------
McKernel bug fixes (digest)
---------------------------
#<num> below corresponds to the redmine issue number
(https://postpeta.pccluster.org/redmine/).
1. #926: shmget: Hide object with IPC_RMID from shmget
2. #1028: init_process: Inherit parent cpu_set
3. #995: Fix shebang recorded in argv[0]
4. #1024: Fix VMAP virtual address leak
5. #1109: init_process_stack: Support "ulimit -s unlimited"
6. x86 mem init: do not map identity mapping
7. mcexec_wait_syscall: requeue potential request on interrupted wait
8. mcctrl_ikc_send_wait: fix interrupt with do_frees == NULL
9. pager_req_read: handle short read
10. kprintf: only call eventfd() if it is safe to interrupt
11. process_procfs_request: Add Pid to /proc/<PID>/status
12. terminate: fix oversubscribe hang when waiting for other threads on same CPU to die
13. mcexec: Do not close fd returned to mckernel side
14. #976: execve: Clear sigaltstack and fp_regs
15. #1002: perf_event: Specify counter by bit_mask on start/stop
16. #1027: schedule: Don't reschedule immediately when wake up on migrate
17. #mcctrl: lookup unexported symbols at runtime
18. __sched_wakeup_thread: Notify interrupt_exit() of re-schedule
19. futex_wait_queue_me: Spin-sleep when timeout and idle_halt is specified
20. #1167: ihk_os_getperfevent,setperfevent: Timeout IKC sent by mcctrl
21. devobj: fix object size (POSTK_DEBUG_TEMP_FIX_36)
22. mcctrl: remove rus page cache
23. #1021: procfs: Support multiple reads of e.g. /proc/*/maps
24. #1006: wait: Delay wake-up parent within switch context
25. #1164: mem: Check if phys-mem is within the range of McKernel memory
26. #1039: page_fault_process_memory_range: Remove ihk_mc_map_virtual for CoW of device map
27. partitioned execution: pass process rank to LWK
28. process/vm: implement access_ok()
29. spinlock: rewrite spinlock to use Linux ticket head/tail format
30. #986: Fix deadlock involving mmap_sem and memory_range_lock
31. Prevent one CPU from getting chosen by concurrent forks
32. #1009: check_signal: system call restart is done only once
33. #1176: syscall: the signal received during system call processing is not processed.
34. #1036 syscall_time: Handle by McKernel
35. #1165 do_syscall: Delegate system calls to the mcexec with the same pid
36. #1194 execve: Fix calling ptrace_report_signal after preemption is disabled
37. #1005 coredump: Exclude special areas
38. #1018 procfs: Fix pread/pwrite to procfs fail when specified size is bigger than 4MB
39. #1180 sched_setaffinity: Check migration after decrementing in_interrupt
40. #771, #1179, #1143 ptrace supports threads
41. #1189 procfs/do_fork: wait until procfs entries are registered
42. #1114 procfs: add '/proc/pid/stat' to mckernel side and fix its comm
43. #1116 mcctrl procfs: check entry was returned before using it
44. #1167 ihk_os_getperfevent,setperfevent: Return -ETIME when IKC timeouts
45. mcexec/execve: fix shebangs handling
46. procfs: handle 'comm' on mckernel side
47. ihk_os_setperfevent: Return number of registered events
48. mcexec: fix terminating zero after readlink()
===========================================
What's new in V1.5.1 (July 9, 2018)
===========================================
-----------------------------------------------
McKernel new features, improvements and changes
-----------------------------------------------
1. Watchdog timer to detect hang of McKernel
mcexec prints out the following line to its stderr when a hang of
McKernel is detected.
mcexec detected hang of McKernel
The watchdog timer is enabled by passing -i <timeout_in_sec> option
to mcreboot.sh. <timeout_in_sec> specifies the interval of checking
if McKernel is alive.
Example: mcreboot.sh -i 600: Detect the hang with 10 minutes interval
The detailed step of the hang detection is as follows.
(1) mcexec acquires eventfd for notification from IHK and perform
epoll() on it.
(2) A daemon called ihkmond monitors the state of McKernel periodically
with the interval specified by the -i option. It judges that
McKernel is hanging and notifies mcexec by the eventfd if its
state hasn't changed since the last check.
2. Documentation
man page: Installed directory is changed to <install_dir>/share/man
---------------------------
McKernel bug fixes (digest)
---------------------------
1. #1146: pager_req_map(): do not take mmap_sem if not needed
2. #1135: prepare_process_ranges_args_envs(): fix saving cmdline
3. #1144: fileobj/devobj: record path name
4. #1145: fileobj: use MCS locks for per-file page hash
5. #1076: mcctrl: refactor prepare_image into new generic ikc send&wait
6. #1072: execve: fix execve with oversubscribing
7. #1132: execve: use thread variable instead of cpu_local_var(current)
8. #1117: mprotect: do not set page table writable for cow pages
9. #1143: syscall wait4: add _WALL (POSTK_DEBUG_ARCH_DEP_44)
10. #1064: rusage: Fix initialization of rusage->num_processors
11. #1133: pager_req_unmap: Put per-process data at exit
12. #731: do_fork: Propagate error code returned by mcexec
13. #1149: execve: Reinitialize vm_regions's map area on execve
14. #1065: procfs: Show file names in /proc/<PID>/maps
15. #1112: mremap: Fix type of size arguments (from ssize_t to size_t)
16. #1121: sched_getaffinity: Check arguments in the same order as in Linux
17. #1137: mmap, mremap: Check arguments in the same order as in Linux
18. #1122: fix return value of sched_getaffinity
19. #732: fix: /proc/<PID>/maps outputs a unnecessary NULL character
===================================
What's new in V1.5.0 (Apr 5, 2018)
===================================
--------------------------------------
McKernel new features and improvements
--------------------------------------
1. Aid for Linux version migration: Detect /proc, /sys format change
between two kernel verions
2. Swap out
* Only swap-out anonymous pages for now
3. Improve support of /proc/maps
4. mcstat: Linux tool to show resource usage
---------------------------
McKernel bug fixes (digest)
---------------------------
1. #727: execve: Fix memory leak when receiving SIGKILL
2. #829: perf_event_open: Support PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
3. #906: mcexec: Check return code of fork()
4. #1038: mcexec: Timeout when incorrect value is given to -n option
5. #943 #945 #946 #960 $961: mcexec: Support strace
6. #1029: struct thread is not released with stress-test involving signal
and futex
7. #863 #870: Respond immediately to terminating signal when
offloading system call
8. #1119: translate_rva_to_rpa(): use 2MB blocks in 1GB pages on x86
11. #898: Shutdown OS only after no in-flight IKC exist
12. #882: release_handler: Destroy objects as the process which opened it
13. #882: mcexec: Make child process exit if the parent is killed during
fork()
14. #925: XPMEM: Don't destroy per-process object of the parent
15. #885: ptrace: Support the case where a process attaches its child
16. #1031: sigaction: Support SA_RESETHAND
17. #923: rus_vm_fault: Return error when a thread not performing
system call offloading causes remote page fault
18. #1032 #1033 #1034: getrusage: Fix ru_maxrss, RUSAGE_CHILDREN,
ru_stime related bugs
19. #1120: getrusage: Fix deadlock on thread->times_update
20. #1123: Fix deadlock related to wait_queue_head_list_node
21. #1124: Fix deadlock of calling terminate() from terminate()
22. #1125: Fix deadlock related to thread status
* Related functions are: hold_thread(), do_kill() and terminate()
23. #1126: uti: Fix uti thread on the McKernel side blocks others in do_syscall()
24. #1066: procfs: Show Linux /proc/self/cgroup
25. #1127: prepare_process_ranges_args_envs(): fix generating saved_cmdline to
avoid PF in strlen()
26. #1128: ihk_mc_map/unmap_virtual(): do proper TLB invalidation
27. #1043: terminate(): fix update_lock and threads_lock order to avoid deadlock
28. #1129: mcreboot.sh: Save /proc/irq/*/smp_affinity to /tmp/mcreboot
29. #1130: mcexec: drop READ_IMPLIES_EXEC from personality
--------------------
McKernel workarounds
--------------------
1. Forbid CPU oversubscription
* It can be turned on by mcreboot.sh -O option
===================================
What's new in V1.4.0 (Oct 30, 2017)
===================================
-----------------------------------------------------------
Feature: Abstracted event type support in perf_event_open()
-----------------------------------------------------------
PERF_TYPE_HARDWARE and PERF_TYPE_CACHE types are supported.
----------------------------------
Clean-up: Direct user-space access
----------------------------------
Code lines using direct user-space access (e.g. passing user-space
pointer to memcpy()) becomes more portable across processor
architectures. The modification follows the following rules.
1. Move the code section as it is to the architecture dependent
directory if it is a part of the critical-path.
2. Otherwise, rewrite the code section by using the portable methods.
The methods include copy_from_user(), copy_to_user(),
pte_get_phys() and phys_to_virt().
--------------------------------
Test: MPI and OpenMP micro-bench
--------------------------------
The performance figures of MPI and OpenMP primitives are compared with
those of Linux by using Intel MPI Benchmarks and EPCC OpenMP Micro
Benchmark.
===================================
What's new in V1.3.0 (Sep 30, 2017)
===================================
--------------------
Feature: Kernel dump
--------------------
1. A dump level of "only kernel memory" is added.
The following two levels are available now:
0: Dump all
24: Dump only kernel memory
The dump level can be set by -d option in ihkosctl or the argument
for ihk_os_makedumpfile(), as shown in the following examples:
Command: ihkosctl 0 dump -d 24
Function call: ihk_os_makedumpfile(0, NULL, 24, 0);
2. Dump file is created when Linux panics.
The dump level can be set by dump_level kernel argument, as shown in the
following example:
ihkosctl 0 kargs "hidos dump_level=24"
The IHK dump function is registered to panic_notifier_list when creating
/dev/mcdX and called when Linux panics.
-----------------------------
Feature: Quick Process Launch
-----------------------------
MPI process launch time and some of the initialization time can be
reduced in application consisting of multiple MPI programs which are
launched in turn in the job script.
The following two steps should be performed to use this feature:
1. Replace mpiexec with ql_mpiexec_start and add some lines for
ql_mpiexec_finalize in the job script
2. Modify the app so that it can repeat calculations and wait for the
instructions from ql_mpiexec_{start,finalize} at the end of the
loop
The first step is explained using an example. Assume the original job
script looks like this:
/* Execute ensamble simulation and then data assimilation, and repeat this
ten times */
for i in {1..10}; do
/* Each ensamble simulation execution uses 100 nodes, launch ten of them
in parallel */
for j in {1..10}; do
mpiexec -n 100 -machinefile ./list1_$j p1.out a1 & pids[$i]=$!;
done
/* Wait until the ten ensamble simulation programs finish */
for j in {1..10}; do wait ${pids[$j]}; done
/* Launch one data assimilation program using 1000 nodes */
mpiexec -n 1000 -machinefile ./list2 p2.out a2
done
The job script should be modified like this:
for i in {1..10}; do
for j in {1..10}; do
/* Replace mpiexec with ql_mpiexec_start */
ql_mpiexec_start -n 100 -machinefile ./list1_$j p1.out a1 & pids[$j]=$!;
done
for j in {1..10}; do wait ${pids[$j]}; done
ql_mpiexec_start -n 1000 -machinefile ./list2 p2.out a2
done
/* p1.out and p2.out don't exit but are waiting for the next calculation.
So tell them to exit */
for j in {1..10}; do
ql_mpiexec_finalize -machinefile ./list1_$i p1.out a1;
done
ql_mpiexec_finalize -machinefile ./list2 p2.out a2;
The second step is explained using a pseudo-code.
MPI_Init();
Prepare data exchange with preceding / following MPI programs
loop:
foreach Fortran module
Initialize data using command-line argments, parameter files,
environment variables
Input data from preceding MPI programs / Read snap-shot
Perform main calculation
Output data to following MPI programs / Write snap-shot
/* ql_client() waits for command of ql_mpiexec_{start,finish} */
if (ql_client() == QL_CONTINUE) { goto loop; }
MPI_Finalize();
qlmpilib.h should be included in the code and libql{mpi,fort}.so
should be linked to the executable file.
========================
Restrictions on McKernel
========================
1. Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed
correctly even if the mmap() returns a success. An access of their
mapping receives the SIGSEGV signal.
2. clone() supports only the following flags. All the other flags
cause clone() to return error or are simply ignored.
* CLONE_CHILD_CLEARTID
* CLONE_CHILD_SETTID
* CLONE_PARENT_SETTID
* CLONE_SETTLS
* CLONE_SIGHAND
* CLONE_VM
3. PAPI has the following restriction.
* Number of counters a user can use at the same time is up to the
number of the physical counters in the processor.
4. msync writes back only the modified pages mapped by the calling process.
5. The following syscalls always return the ENOSYS error.
* migrate_pages()
* move_pages()
* set_robust_list()
6. The following syscalls always return the EOPNOTSUPP error.
* arch_prctl(ARCH_SET_GS)
* signalfd()
7. signalfd4() returns a fd, but signal is not notified through the
fd.
8. set_rlimit sets the limit values but they are not enforced.
9. Address randomization is not supported.
10. brk() extends the heap more than requestd when -h
(--extend-heap-by=)<step> option of mcexec is used with the value
larger than 4 KiB. syscall_pwrite02 of LTP would fail for this
reason. This is because the test expects that the end of the heap
is set to the same address as the argument of sbrk() and expects a
segmentation violation occurs when it tries to access the memory
area right next to the boundary. However, the optimization sets
the end to a value larger than the requested. Therefore, the
expected segmentation violation doesn't occur.
11. setpriority()/getpriority() won't work. They might set/get the
priority of a random mcexec thread. This is because there's no
fixed correspondence between a McKernel thread which issues the
system call and a mcexec thread which handles the offload request.
12. mbind() can set the policy but it is not used when allocating
physical pages.
13. MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for
set_mempolicy()/mbind() are not supported.
14. The MPOL_BIND policy for set_mempolicy()/mbind() works as the same
as the MPOL_PREFERRED policy. That is, the physical page allocator
doesn't give up the allocation when the specified nodes are
running out of pages but continues to search pages in the other
nodes.
15. Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and
later. In addition, crash_kexec_post_notifiers kernel argument
must be given to Linux kernel.
16. setfsuid()/setfsgid() cannot change the id of the calling thread.
Instead, it changes that of the mcexec worker thread which takes
the system-call offload request.
17. mmap (hugeTLBfs): The physical pages corresponding to a map are
released when no McKernel process exist. The next map gets fresh
physical pages.
18. Sticky bit on executable file has no effect.
19. Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the
process of booting McKernel due to the Linux bug, found in
Linux-3.10 and fixed in the later version. One way to circumvent
this is to always assign the same CPU set to McKernel.
20. madvise:
* MADV_HWPOISON and MADV_SOFT_OFFLINE always returns -EPERM.
* MADV_MERGEABLE and MADV_UNMERGEABLE always returns -EINVAL.
* MADV_HUGEPAGE and MADV_NOHUGEPAGE on file map returns -EINVAL
(It succeeds on RHEL-8 for aarch64).
21. brk() and mmap() doesn't report out-of-memory through its return
value. Instead, page-fault reports the error.
22. Anonymous mmap pre-maps requested number of pages when contiguous
pages are available. Demand paging is used when not available.
23. Mixing page sizes in anonymous shared mapping is not allowed. mmap
creates vm_range with one page size. And munmap or mremap that
needs the reduced page size changes the sizes of all the pages of
the vm_range.
24. ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS
(job-scheduler).
25. The behaviors of madvise and mbind are changed to do nothing and
report success as a workaround for Fugaku.
26. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.

285
README.md
View File

@ -1,285 +0,0 @@
![McKernel Logo](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/mckernel-logo.png)
-------------------------
IHK/McKernel is a light-weight multi-kernel operating system designed for high-end supercomputing. It runs Linux and McKernel, a light-weight kernel (LWK), side-by-side inside compute nodes and aims at the following:
- Provide scalable and consistent execution of large-scale parallel scientific applications, but at the same time maintain the ability to rapidly adapt to new hardware features and emerging programming models
- Provide efficient memory and device management so that resource contention and data movement are minimized at the system level
- Eliminate OS noise by isolating OS services in Linux and provide jitter free execution on the LWK
- Support the full POSIX/Linux APIs by selectively offloading (slow-path) system calls to Linux
## Contents
- [Background](#background-and-motivation)
- [Architectural Overview](#architectural-overview)
- [Installation](#installation)
- [The Team](#the-team)
## Background and Motivation
With the growing complexity of high-end supercomputers, the current system software stack faces significant challenges as we move forward to exascale and beyond. The necessity to deal with extreme degree of parallelism, heterogeneous architectures, multiple levels of memory hierarchy, power constraints, etc., advocates operating systems that can rapidly adapt to new hardware requirements, and that can support novel programming paradigms and runtime systems. On the other hand, a new class of more dynamic and complex applications are also on the horizon, with an increasing demand for application constructs such as in-situ analysis, workflows, elaborate monitoring and performance tools. This complexity relies not only on the rich features of POSIX, but also on the Linux APIs (such as the */proc*, */sys* filesystems, etc.) in particular.
##### Two Traditional HPC OS Approaches
Traditionally, light-weight operating systems specialized for HPC followed two approaches to tackle scalable execution of large-scale applications. In the full weight kernel (FWK) approach, a full Linux environment is taken as the basis, and features that inhibit attaining HPC scalability are removed, i.e., making it light-weight. The pure light-weight kernel (LWK) approach, on the other hand, starts from scratch and effort is undertaken to add sufficient functionality so that it provides a familiar API, typically something close to that of a general purpose OS, while at the same time it retains the desired scalability and reliability attributes. Neither of these approaches yields a fully Linux compatible environment.
##### The Multi-kernel Approach
A hybrid approach recognized recently by the system software community is to run Linux simultaneously with a lightweight kernel on compute nodes and multiple research projects are now pursuing this direction. The basic idea is that simulations run on an HPC tailored lightweight kernel, ensuring the necessary isolation for noiseless execution of parallel applications, but Linux is leveraged so that the full POSIX API is supported. Additionally, the small code base of the LWK can also facilitate rapid prototyping for new, exotic hardware features. Nevertheless, the questions of how to share node resources between the two types of kernels, where do device drivers execute, how exactly do the two kernels interact with each other and to what extent are they integrated, remain subjects of ongoing debate.
## Architectural Overview
At the heart of the stack is a low-level software infrastructure called Interface for Heterogeneous Kernels (IHK). IHK is a general framework that provides capabilities for partitioning resources in a many-core environment (e.g.,CPU cores and physical memory) and it enables management of lightweight kernels. IHK can allocate and release host resources dynamically and no reboot of the host machine is required when altering configuration. IHK also provides a low-level inter-kernel messaging infrastructure, called the Inter-Kernel Communication (IKC) layer. An architectural overview of the main system components is shown below.
![arch](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/mckernel.png)
McKernel is a lightweight kernel written from scratch. It is designed for HPC and is booted from IHK. McKernel retains a binary compatible ABI with Linux, however, it implements only a small set of performance sensitive system calls and the rest are offloaded to Linux. Specifically, McKernel has its own memory management, it supports processes and multi-threading with a simple round-robin cooperative (tick-less) scheduler, and it implements signaling. It also allows inter-process memory mappings and it provides interfaces to hardware performance counters.
### Functionality
An overview of some of the principal functionalities of the IHK/McKernel stack is provided below.
#### System Call Offloading
System call forwarding in McKernel is implemented as follows. When an offloaded system call occurs, McKernel marshals the system call number along with its arguments and sends a message to Linux via a dedicated IKC channel. The corresponding proxy process running on Linux is by default waiting for system call requests through an ioctl() call into IHKs system call delegator kernel module. The delegator kernel modules IKC interrupt handler wakes up the proxy process, which returns to userspace and simply invokes the requested system call. Once it obtains the return value, it instructs the delegator module to send the result back to McKernel, which subsequently passes the value to user-space.
#### Unified Address Space
The unified address space model in IHK/McKernel ensures that offloaded system calls can seamlessly resolve arguments even in case of pointers. This mechanism is depicted below and is implemented as follows.
![unified_ap](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/img/unified_address_space_en.png)
First, the proxy process is compiled as a position independent binary, which enables us to map the code and data segments specific to the proxy process to an address range which is explicitly excluded from McKernels user space. The grey box on the right side of the figure demonstrates the excluded region. Second, the entire valid virtual address range of McKernels application user-space is covered by a special mapping in the proxy process for which we use a pseudo file mapping in Linux. This mapping is indicated by the blue box on the left side of the figure.
## Installation
For a smooth experience, we recommend the following combination of OS distributions and platforms:
- CentOS 7.3+ running on Intel Xeon / Xeon Phi
##### 1. Change SELinux settings
Log in as the root and disable SELinux:
~~~~
vim /etc/selinux/config
~~~~
Change the file to SELINUX=disabled
##### 2. Reboot the host machine
~~~~
sudo reboot
~~~~
##### 3. Prepare packages, kernel symbol table file
You will need the following packages installed:
~~~~
sudo yum install cmake kernel-devel binutils-devel systemd-devel numactl-devel gcc make nasm git
~~~~
Grant read permission to the System.map file of your kernel version:
~~~~
sudo chmod a+r /boot/System.map-`uname -r`
~~~~
##### 4. Obtain sources and compile the kernel
Clone the source code:
~~~~
mkdir -p ~/src/ihk+mckernel/
cd ~/src/ihk+mckernel/
git clone --recursive -b development https://github.com/RIKEN-SysSoft/mckernel.git
~~~~
(Optional) Checkout to the specific branch or version:
~~~~
cd mckernel
git checkout <pathspec>
git submodule update
~~~~
Foe example, if you want to try the development branch, use "development" as the pathspec. If you want to try the prerelease version 1.7.0-0.2, use "1.7.0-0.2".
###### 4.1 Install with cmake
Configure and compile:
~~~~
mkdir -p build && cd build
cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel $HOME/src/ihk+mckernel/mckernel
make -j install
~~~~
The IHK kernel modules and McKernel kernel image should be installed under the **ihk+mckernel** folder in your home directory.
###### 4.2 Install with rpm
Configure, compile and build rpm:
~~~~
mkdir -p build && cd build
cmake $HOME/src/ihk+mckernel/mckernel
make dist
cp mckernel-<version>.tar.gz <rpmbuild>/SOURCES
rpm -ba scripts/mckernel.spec
sudo rpm -ivh <rpmbuild>/RPMS/<arch>/mckernel-<version>-<release>_<linux_kernel_ver>_<dist>.<arch>.rpm
~~~~
The IHK kernel modules and McKernel kernel image are installed under the system directory.
##### 5. Boot McKernel
A boot script called mcreboot.sh is provided under sbin in the install folder. To boot on logical CPU 1 with 512MB of memory, use the following invocation:
~~~~
export TOP=${HOME}/ihk+mckernel/
cd ${TOP}
sudo ./sbin/mcreboot.sh -c 1 -m 512m
~~~~
You should see something similar like this if you display the McKernel's kernel message log:
~~~~
./sbin/ihkosctl 0 kmsg
IHK/McKernel started.
[ -1]: no_execute_available: 1
[ -1]: map_fixed: phys: 0xfee00000 => 0xffff860000009000 (1 pages)
[ -1]: setup_x86 done.
[ -1]: ns_per_tsc: 385
[ -1]: KCommand Line: hidos dump_level=24
[ -1]: Physical memory: 0x1ad3000 - 0x21000000, 525520896 bytes, 128301 pages available @ NUMA: 0
[ -1]: NUMA: 0, Linux NUMA: 0, type: 1, available bytes: 525520896, pages: 128301
[ -1]: NUMA 0 distances: 0 (10),
[ -1]: map_fixed: phys: 0x28000 => 0xffff86000000a000 (2 pages)
[ -1]: Trampoline area: 0x28000
[ -1]: map_fixed: phys: 0x0 => 0xffff86000000c000 (1 pages)
[ -1]: # of cpus : 1
[ -1]: locals = ffff880001af6000
[ 0]: BSP: 0 (HW ID: 1 @ NUMA 0)
[ 0]: BSP: booted 0 AP CPUs
[ 0]: Master channel init acked.
[ 0]: vdso is enabled
IHK/McKernel booted.
~~~~
##### 5. Run a simple program on McKernel
The mcexec command line tool (which is also the Linux proxy process) can be used for executing applications on McKernel:
~~~~
./bin/mcexec hostname
centos-vm
~~~~
##### 6. Shutdown McKernel
Finally, to shutdown McKernel and release CPU/memory resources back to Linux use the following command:
~~~~
sudo ./sbin/mcstop+release.sh
~~~~
##### 7. Advanced: Enable Utility Thread offloading Interface (UTI)
UTI enables a runtime such as MPI runtime to spawn utility threads such as MPI asynchronous progress threads to Linux cores.
1. Install capstone
Install EPEL capstone-devel:
~~~~
sudo yum install epel-release
sudo yum install capstone-devel
~~~~
2. Install syscall_intercept
~~~~
git clone https://github.com/RIKEN-SysSoft/syscall_intercept.git
cmake ../arch/aarch64 -DCMAKE_INSTALL_PREFIX=<syscall-intercept-install> -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
~~~~
3. Install UTI for McKernel
Install:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<mckernel-install> --with-rm=mckernel
make && make install
~~~~
4. Install McKernel
~~~~
CMAKE_PREFIX_PATH=<syscall-intercept-install> cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel -DENABLE_UTI=ON $HOME/src/ihk+mckernel/mckernel
~~~~
5. Run executable
~~~~
mcexec --enable-uti <command>
~~~~
6. Install UTI for Linux for performance comparison
Install by make:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
make && make install
~~~~
Install by rpm:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
rm -f ~/rpmbuild/SOURCES/<version>.tar.gz
rpmbuild -ba ./scripts/uti.spec
rpm -Uvh uti-<version>-<release>-<arch>.rpm
~~~~
## The Team
The McKernel project was started at The University of Tokyo and currently it is mainly developed at RIKEN.
Some of our collaborators include:
- Hitachi
- Fujitsu
- CEA (France)
- NEC
## License
McKernel is GPL licensed, as found in the LICENSE file.
## Contact
Please give your feedback to us via one of the following mailing lists. Subscription via [www.pccluster.org](http://www.pccluster.org/mailman/listinfo/mckernel-users) is needed.
* English: mckernel-users@pccluster.org
* Japanese: mckernel-users-jp@pccluster.org

View File

@ -1,4 +1,4 @@
# Makefile.arch.in COPYRIGHT FUJITSU LIMITED 2015-2018
# Makefile.arch COPYRIGHT FUJITSU LIMITED 2015-2017
VDSO_SRCDIR = $(SRC)/../arch/$(IHKARCH)/kernel/vdso
VDSO_BUILDDIR = @abs_builddir@/vdso
VDSO_SO_O = $(O)/vdso.so.o
@ -6,22 +6,23 @@ VDSO_SO_O = $(O)/vdso.so.o
IHK_OBJS += assert.o cache.o cpu.o cputable.o context.o entry.o entry-fpsimd.o
IHK_OBJS += fault.o head.o hyp-stub.o local.o perfctr.o perfctr_armv8pmu.o proc.o proc-macros.o
IHK_OBJS += psci.o smp.o trampoline.o traps.o fpsimd.o
IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o timer.o
IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o
IHK_OBJS += $(notdir $(VDSO_SO_O)) memory.o syscall.o vdso.o
IHK_OBJS += irq-gic-v2.o irq-gic-v3.o
IHK_OBJS += memcpy.o memset.o
IHK_OBJS += cpufeature.o
IHK_OBJS += imp-sysreg.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
# IHK_OBJS added coredump.o
IHK_OBJS += coredump.o
$(VDSO_SO_O): $(VDSO_BUILDDIR)/vdso.so
$(VDSO_BUILDDIR)/vdso.so: FORCE
$(call echo_cmd,BUILD VDSO,$(TARGET))
mkdir -p $(O)/vdso
TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare
TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS)
@mkdir -p $(O)/vdso
@TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare
@TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS)
FORCE:

View File

@ -1,4 +1,4 @@
/* assert.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
/* assert.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <process.h>
#include <list.h>
@ -24,7 +24,6 @@ STATIC_ASSERT(offsetof(struct pt_regs, sp) == S_SP);
STATIC_ASSERT(offsetof(struct pt_regs, pc) == S_PC);
STATIC_ASSERT(offsetof(struct pt_regs, pstate) == S_PSTATE);
STATIC_ASSERT(offsetof(struct pt_regs, orig_x0) == S_ORIG_X0);
STATIC_ASSERT(offsetof(struct pt_regs, orig_pc) == S_ORIG_PC);
STATIC_ASSERT(offsetof(struct pt_regs, syscallno) == S_SYSCALLNO);
STATIC_ASSERT(sizeof(struct pt_regs) == S_FRAME_SIZE);
@ -51,6 +50,3 @@ STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved
ALIGN_UP(sizeof(struct _aarch64_ctx), 16) > sizeof(struct extra_context));
STATIC_ASSERT(SVE_PT_FPSIMD_OFFSET == sizeof(struct user_sve_header));
STATIC_ASSERT(SVE_PT_SVE_OFFSET == sizeof(struct user_sve_header));
/* assert for struct arm64_cpu_local_thread member offset define */
STATIC_ASSERT(offsetof(struct arm64_cpu_local_thread, panic_regs) == 168);

View File

@ -1,15 +1,10 @@
/* coredump.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
/* coredump.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <process.h>
#include <elfcore.h>
#include <string.h>
#include <ptrace.h>
#include <cls.h>
#include <hwcap.h>
#define align32(x) ((((x) + 3) / 4) * 4)
void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct thread *thread, void *regs0, int sig)
void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0)
{
struct pt_regs *regs = regs0;
struct elf_prstatus64 tmp_prstatus;
@ -20,6 +15,8 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
@ -27,66 +24,12 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
*/
/* copy x0-30, sp, pc, pstate */
memcpy(&tmp_prstatus.pr_reg, &regs->user_regs, sizeof(tmp_prstatus.pr_reg));
tmp_prstatus.pr_fpvalid = 0; /* We assume no fp */
/* copy unaligned prstatus addr */
memcpy(prstatus, &tmp_prstatus, sizeof(*prstatus));
prstatus->pr_pid = thread->tid;
if (thread->proc->parent) {
prstatus->pr_ppid = thread->proc->parent->pid;
}
prstatus->pr_info.si_signo = sig;
prstatus->pr_cursig = sig;
}
int arch_get_thread_core_info_size(void)
{
const struct user_regset_view *view = current_user_regset_view();
const struct user_regset *regset = find_regset(view, NT_ARM_SVE);
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return 0;
}
return sizeof(struct note) + align32(sizeof("LINUX"))
+ regset_size(cpu_local_var(current), regset);
}
void arch_fill_thread_core_info(struct note *head,
struct thread *thread, void *regs)
{
const struct user_regset_view *view = current_user_regset_view();
const struct user_regset *regset = find_regset(view, NT_ARM_SVE);
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return;
}
/* pre saved registers */
save_fp_regs(thread);
if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(thread, regset))) {
int ret;
size_t size = regset_size(thread, regset);
void *namep;
void *descp;
namep = (void *) (head + 1);
descp = namep + align32(sizeof("LINUX"));
ret = regset->get(thread, regset, 0, size, descp, NULL);
if (ret) {
return;
}
head->namesz = sizeof("LINUX");
head->descsz = size;
head->type = NT_ARM_SVE;
memcpy(namep, "LINUX", sizeof("LINUX"));
}
}
#endif /* POSTK_DEBUG_ARCH_DEP_18 */

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* cpufeature.c COPYRIGHT FUJITSU LIMITED 2017-2018 */
/* cpufeature.c COPYRIGHT FUJITSU LIMITED 2017 */
#include <cpufeature.h>
#include <ihk/debug.h>
@ -10,7 +10,9 @@
#include <ptrace.h>
#include <hwcap.h>
#ifdef POSTK_DEBUG_ARCH_DEP_65
unsigned long elf_hwcap;
#endif /* POSTK_DEBUG_ARCH_DEP_65 */
/* @ref.impl arch/arm64/kernel/cpufeature.c */
#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
@ -52,19 +54,6 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_END,
};
/* @ref.impl linux4.16.0 arch/arm64/kernel/cpufeature.c */
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
/* @ref.impl arch/arm64/kernel/cpufeature.c */
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
@ -315,7 +304,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@ -970,7 +959,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SVE),
#endif
{ 0 },
{},
};
/* @ref.impl arch/arm64/kernel/cpufeature.c */
@ -1008,7 +997,9 @@ void setup_cpu_features(void)
setup_elf_hwcaps(arm64_elf_hwcaps);
}
#ifdef POSTK_DEBUG_ARCH_DEP_65
unsigned long arch_get_hwcap(void)
{
return elf_hwcap;
}
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -10,5 +10,5 @@ struct cpu_info cpu_table[] = {
.cpu_name = "AArch64 Processor",
.cpu_setup = __cpu_setup,
},
{ 0 },
{ /* Empty */ },
};

View File

@ -2,6 +2,7 @@
#include <cputype.h>
#include <irqflags.h>
#include <ihk/context.h>
#include <ihk/debug.h>
#include <signal.h>
#include <errno.h>
#include <debug-monitors.h>

View File

@ -1,11 +1,10 @@
/* entry.S COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* entry.S COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <linkage.h>
#include <assembler.h>
#include <asm-offsets.h>
#include <esr.h>
#include <thread_info.h>
#include <asm-syscall.h>
/*
* Bad Abort numbers
@ -78,20 +77,16 @@
.macro kernel_exit, el, need_enable_step = 0
.if \el == 0
bl check_sig_pending
bl check_need_resched // or reschedule is needed.
mov x0, #0
mov x1, sp
mov x2, #0
bl check_signal // check whether the signal is delivered
bl check_need_resched // or reschedule is needed.
mov x0, #0
mov x1, sp
mov x2, #0
bl check_signal_irq_disabled // check whether the signal is delivered(for kernel_exit)
.endif
.if \el == 1
bl check_sig_pending
.endif
disable_irq x1 // disable interrupts
.if \need_enable_step == 1
ldr x1, [tsk, #TI_FLAGS]
@ -372,12 +367,7 @@ el0_sync:
b el0_inv
el0_svc:
uxtw scno, w8 // syscall number in w8
cmp scno, #__NR_rt_sigreturn
b.eq 1f
str x0, [sp, #S_ORIG_X0] // save the original x0
ldr x16, [sp, #S_PC]
str x16, [sp, #S_ORIG_PC] // save the original pc
1: str scno, [sp, #S_SYSCALLNO] // save syscall number
stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
enable_nmi
enable_dbg_and_irq x0
adrp x16, __arm64_syscall_handler
@ -560,7 +550,9 @@ ENTRY(ret_from_fork)
blr x19
1: get_thread_info tsk
bl release_runq_lock
bl utilthr_migrate
b ret_to_user
ENDPROC(ret_from_fork)
/* TODO: skeleton for rusage */
ENTRY(__freeze)
ENDPROC(__freeze)

View File

@ -1,4 +1,4 @@
/* fault.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* fault.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <ihk/context.h>
#include <ihk/debug.h>
@ -13,6 +13,7 @@
unsigned long __page_fault_handler_address;
extern int interrupt_from_user(void *);
void set_signal(int sig, void *regs, struct siginfo *info);
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
@ -104,13 +105,12 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const struct fault_info *inf = fault_info + (esr & 63);
struct siginfo info;
const int from_user = interrupt_from_user(regs);
/* set_cputime called in inf->fn() */
if (!inf->fn(addr, esr, regs))
return;
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
kprintf("Unhandled fault: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
@ -118,7 +118,7 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
info._sifields._sigfault.si_addr = (void*)addr;
arm64_notify_die("", regs, &info, esr);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}
/*
@ -127,24 +127,21 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
info._sifields._sigfault.si_addr = (void*)addr;
arm64_notify_die("", regs, &info, esr);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs) ? 1: 2);
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
@ -166,7 +163,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
(addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr);
panic("OOps.");
}
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}
static int is_el0_instruction_abort(unsigned int esr)
@ -195,7 +192,6 @@ static int do_page_fault(unsigned long addr, unsigned int esr,
}
}
/* set_cputime() call in page_fault_handler() */
page_fault_handler = (void *)__page_fault_handler_address;
(*page_fault_handler)((void *)addr, reason, regs);
@ -256,10 +252,10 @@ int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *reg
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
struct siginfo info;
const int from_user = interrupt_from_user(regs);
int from_user = interrupt_from_user(regs);
int ret = -1;
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(from_user ? 1: 2);
if (!inf->fn(addr, esr, regs)) {
ret = 1;
@ -278,7 +274,7 @@ int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *reg
ret = 0;
out:
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
return ret;
}
@ -287,9 +283,7 @@ out:
*/
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(interrupt_from_user(regs) ? 1: 2);
set_cputime(0);
return 1;
}

View File

@ -1,4 +1,4 @@
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2019 */
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2017 */
#include <thread_info.h>
#include <fpsimd.h>
#include <cpuinfo.h>
@ -9,100 +9,27 @@
#include <prctl.h>
#include <cpufeature.h>
#include <kmalloc.h>
#include <ihk/debug.h>
#include <process.h>
#include <bitmap.h>
//#define DEBUG_PRINT_FPSIMD
#ifdef DEBUG_PRINT_FPSIMD
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#endif
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
#ifdef CONFIG_ARM64_SVE
/* Set of available vector lengths, as vq_to_bit(vq): */
static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
/* Maximum supported vector length across all CPUs (initially poisoned) */
int sve_max_vl = -1;
/* Default VL for tasks that don't set it explicitly: */
int sve_default_vl = -1;
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
* _maximum_ VQ not exceeding a certain value.
*/
static unsigned int vq_to_bit(unsigned int vq)
{
return SVE_VQ_MAX - vq;
}
static unsigned int bit_to_vq(unsigned int bit)
{
if (bit >= SVE_VQ_MAX) {
bit = SVE_VQ_MAX - 1;
}
return SVE_VQ_MAX - bit;
}
/*
* All vector length selection from userspace comes through here.
* We're on a slow path, so some sanity-checks are included.
* If things go wrong there's a bug somewhere, but try to fall back to a
* safe choice.
*/
static unsigned int find_supported_vector_length(unsigned int vl)
{
int bit;
int max_vl = sve_max_vl;
if (!sve_vl_valid(vl)) {
vl = SVE_VL_MIN;
}
if (!sve_vl_valid(max_vl)) {
max_vl = SVE_VL_MIN;
}
if (vl > max_vl) {
vl = max_vl;
}
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(bit_to_vq(bit));
}
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
unsigned long zcr;
bitmap_zero(map, SVE_VQ_MAX);
zcr = ZCR_EL1_LEN_MASK;
zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
/* self-syncing */
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1);
vl = sve_get_vl();
/* skip intervening lengths */
vq = sve_vq_from_vl(vl);
set_bit(vq_to_bit(vq), map);
}
}
void sve_init_vq_map(void)
{
sve_probe_vqs(sve_vq_map);
}
size_t sve_state_size(struct thread const *thread)
{
unsigned int vl = thread->ctx.thread->sve_vl;
@ -119,19 +46,17 @@ void sve_free(struct thread *thread)
}
}
int sve_alloc(struct thread *thread)
void sve_alloc(struct thread *thread)
{
if (thread->ctx.thread->sve_state) {
return 0;
return;
}
thread->ctx.thread->sve_state =
kmalloc(sve_state_size(thread), IHK_MC_AP_NOWAIT);
if (thread->ctx.thread->sve_state == NULL) {
return -ENOMEM;
}
BUG_ON(!thread->ctx.thread->sve_state);
memset(thread->ctx.thread->sve_state, 0, sve_state_size(thread));
return 0;
}
static int get_nr_threads(struct process *proc)
@ -148,13 +73,28 @@ static int get_nr_threads(struct process *proc)
return nr_threads;
}
extern void save_fp_regs(struct thread *thread);
extern void clear_fp_regs(struct thread *thread);
extern void restore_fp_regs(struct thread *thread);
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */
int sve_set_vector_length(struct thread *thread,
unsigned long vl, unsigned long flags)
{
struct thread_info *ti = thread->ctx.thread;
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
BUG_ON(thread == cpu_local_var(current) && cpu_local_var(no_preempt) == 0);
/*
* To avoid accidents, forbid setting for individual threads of a
* multithreaded process. User code that knows what it's doing can
* pass PR_SVE_SET_VL_THREAD to override this restriction:
*/
if (!(flags & PR_SVE_SET_VL_THREAD) && get_nr_threads(thread->proc) != 1) {
return -EINVAL;
}
flags &= ~(unsigned long)PR_SVE_SET_VL_THREAD;
if (flags & ~(unsigned long)(PR_SVE_SET_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC)) {
return -EINVAL;
}
@ -163,19 +103,13 @@ int sve_set_vector_length(struct thread *thread,
return -EINVAL;
}
/*
* Clamp to the maximum vector length that VL-agnostic SVE code can
* work with. A flag may be assigned in the future to allow setting
* of larger vector lengths without confusing older software.
*/
if (vl > SVE_VL_ARCH_MAX) {
vl = SVE_VL_ARCH_MAX;
if (vl > sve_max_vl) {
BUG_ON(!sve_vl_valid(sve_max_vl));
vl = sve_max_vl;
}
vl = find_supported_vector_length(vl);
if (flags & (PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC)) {
if (flags & (PR_SVE_SET_VL_ONEXEC |
PR_SVE_SET_VL_INHERIT)) {
ti->sve_vl_onexec = vl;
} else {
/* Reset VL to system default on next exec: */
@ -187,42 +121,39 @@ int sve_set_vector_length(struct thread *thread,
goto out;
}
if (vl == ti->sve_vl) {
goto out;
}
if (vl != ti->sve_vl) {
if ((elf_hwcap & HWCAP_SVE)) {
fp_regs_struct fp_regs;
memset(&fp_regs, 0, sizeof(fp_regs));
if ((elf_hwcap & HWCAP_SVE)) {
fp_regs_struct fp_regs;
/* for self at prctl syscall */
if (thread == cpu_local_var(current)) {
save_fp_regs(thread);
clear_fp_regs(thread);
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
memset(&fp_regs, 0, sizeof(fp_regs));
ti->sve_vl = vl;
/* for self at prctl syscall */
if (thread == cpu_local_var(current)) {
save_fp_regs(thread);
clear_fp_regs();
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
restore_fp_regs(thread);
/* for target thread at ptrace */
} else {
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
ti->sve_vl = vl;
ti->sve_vl = vl;
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
restore_fp_regs(thread);
/* for target thread at ptrace */
} else {
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
ti->sve_vl = vl;
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
}
}
}
ti->sve_vl = vl;
out:
ti->sve_flags = flags & PR_SVE_VL_INHERIT;
ti->sve_flags = flags & PR_SVE_SET_VL_INHERIT;
return 0;
}
@ -232,53 +163,44 @@ out:
* Encode the current vector length and flags for return.
* This is only required for prctl(): ptrace has separate fields
*/
static int sve_prctl_status(unsigned long flags)
static int sve_prctl_status(const struct thread_info *ti)
{
int ret;
struct thread_info *ti = cpu_local_var(current)->ctx.thread;
int ret = ti->sve_vl;
if (flags & PR_SVE_SET_VL_ONEXEC) {
ret = ti->sve_vl_onexec;
}
else {
ret = ti->sve_vl;
}
ret |= ti->sve_flags << 16;
if (ti->sve_flags & PR_SVE_VL_INHERIT) {
ret |= PR_SVE_VL_INHERIT;
}
return ret;
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_task_vl */
int sve_set_thread_vl(unsigned long arg)
int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length,
const unsigned long flags)
{
unsigned long vl, flags;
int ret;
vl = arg & PR_SVE_VL_LEN_MASK;
flags = arg & ~vl;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
if (!(elf_hwcap & HWCAP_SVE)) {
return -EINVAL;
}
ret = sve_set_vector_length(cpu_local_var(current), vl, flags);
BUG_ON(thread != cpu_local_var(current));
preempt_disable();
ret = sve_set_vector_length(thread, vector_length, flags);
preempt_enable();
if (ret) {
return ret;
}
return sve_prctl_status(flags);
return sve_prctl_status(thread->ctx.thread);
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_get_ti_vl */
int sve_get_thread_vl(void)
int sve_get_thread_vl(const struct thread *thread)
{
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
if (!(elf_hwcap & HWCAP_SVE)) {
return -EINVAL;
}
return sve_prctl_status(0);
return sve_prctl_status(thread->ctx.thread);
}
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
@ -288,48 +210,25 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
panic("");
}
void sve_setup(void)
void init_sve_vl(void)
{
extern unsigned long ihk_param_default_vl;
uint64_t zcr;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return;
}
/* init sve_vq_map bitmap */
sve_init_vq_map();
/*
* The SVE architecture mandates support for 128-bit vectors,
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
if (!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)) {
set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
}
zcr = read_system_reg(SYS_ZCR_EL1);
sve_max_vl = sve_vl_from_vq((zcr & ZCR_EL1_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sve_vq_map. If not, do our best:
*/
if (sve_max_vl != find_supported_vector_length(sve_max_vl)) {
sve_max_vl = find_supported_vector_length(sve_max_vl);
}
BUG_ON(((zcr & ZCR_EL1_LEN_MASK) + 1) * 16 > sve_max_vl);
sve_max_vl = ((zcr & ZCR_EL1_LEN_MASK) + 1) * 16;
sve_default_vl = ihk_param_default_vl;
if (ihk_param_default_vl !=
find_supported_vector_length(ihk_param_default_vl)) {
kprintf("SVE: Getting unsupported default VL = %d "
"from HOST-Linux.\n", sve_default_vl);
sve_default_vl = find_supported_vector_length(64);
kprintf("SVE: Using default vl(%d byte).\n",
sve_default_vl);
if (sve_default_vl == 0) {
kprintf("SVE: Getting default VL = 0 from HOST-Linux.\n");
sve_default_vl = sve_max_vl > 64 ? 64 : sve_max_vl;
kprintf("SVE: Using default vl(%d byte).\n", sve_default_vl);
}
kprintf("SVE: maximum available vector length %u bytes per vector\n",
@ -340,7 +239,7 @@ void sve_setup(void)
#else /* CONFIG_ARM64_SVE */
void sve_setup(void)
void init_sve_vl(void)
{
/* nothing to do. */
}

472
arch/arm64/kernel/gencore.c Normal file
View File

@ -0,0 +1,472 @@
/* gencore.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <ihk/debug.h>
#include <kmalloc.h>
#include <cls.h>
#include <list.h>
#include <process.h>
#include <string.h>
#include <elfcore.h>
#define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
//#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
/*
* Generate a core file image, which consists of many chunks.
* Returns an allocated table, an etnry of which is a pair of the address
* of a chunk and its length.
*/
/**
* \brief Fill the elf header.
*
* \param eh An Elf64_Ehdr structure.
* \param segs Number of segments of the core file.
*/
void fill_elf_header(Elf64_Ehdr *eh, int segs)
{
eh->e_ident[EI_MAG0] = 0x7f;
eh->e_ident[EI_MAG1] = 'E';
eh->e_ident[EI_MAG2] = 'L';
eh->e_ident[EI_MAG3] = 'F';
eh->e_ident[EI_CLASS] = ELFCLASS64;
eh->e_ident[EI_DATA] = ELFDATA2LSB;
eh->e_ident[EI_VERSION] = El_VERSION;
eh->e_ident[EI_OSABI] = ELFOSABI_NONE;
eh->e_ident[EI_ABIVERSION] = El_ABIVERSION_NONE;
eh->e_type = ET_CORE;
#ifdef CONFIG_MIC
eh->e_machine = EM_K10M;
#else
eh->e_machine = EM_X86_64;
#endif
eh->e_version = EV_CURRENT;
eh->e_entry = 0; /* Do we really need this? */
eh->e_phoff = 64; /* fixed */
eh->e_shoff = 0; /* no section header */
eh->e_flags = 0;
eh->e_ehsize = 64; /* fixed */
eh->e_phentsize = 56; /* fixed */
eh->e_phnum = segs;
eh->e_shentsize = 0;
eh->e_shnum = 0;
eh->e_shstrndx = 0;
}
/**
* \brief Return the size of the prstatus entry of the NOTE segment.
*
*/
int get_prstatus_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ align32(sizeof(struct elf_prstatus64));
}
/**
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs0 A pointer to a x86_regs structure.
*/
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
{
/* TODO(pka_idle) */
}
/**
* \brief Return the size of the prpsinfo entry of the NOTE segment.
*
*/
int get_prpsinfo_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ align32(sizeof(struct elf_prpsinfo64));
}
/**
* \brief Fill a prpsinfo structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
head->namesz = sizeof("CORE");
head->descsz = sizeof(struct elf_prpsinfo64);
head->type = NT_PRPSINFO;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
/*
We leave most of the fields unfilled.
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
*/
}
/**
* \brief Return the size of the AUXV entry of the NOTE segment.
*
*/
int get_auxv_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ sizeof(unsigned long) * AUXV_LEN;
}
/**
* \brief Fill an AUXV structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_auxv(struct note *head, struct thread *thread, void *regs)
{
void *name;
void *auxv;
head->namesz = sizeof("CORE");
head->descsz = sizeof(unsigned long) * AUXV_LEN;
head->type = NT_AUXV;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
}
/**
* \brief Return the size of the whole NOTE segment.
*
*/
int get_note_size(void)
{
return get_prstatus_size() + get_prpsinfo_size()
+ get_auxv_size();
}
/**
* \brief Fill the NOTE segment.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_note(void *note, struct thread *thread, void *regs)
{
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
fill_auxv(note, thread, regs);
}
/**
* \brief Generate an image of the core file.
*
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
* \param coretable(out) An array of core chunks.
* \param chunks(out) Number of the entires of coretable.
*
* A core chunk is represented by a pair of a physical
* address of memory region and its size. If there are
* no corresponding physical address for a VM area
* (an unallocated demand-paging page, e.g.), the address
* should be zero.
*/
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
{
struct coretable *ct = NULL;
Elf64_Ehdr eh;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range, *next;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
int i;
*chunks = 3; /* Elf header , header table and NOTE segment */
if (vm == NULL) {
dkprintf("no vm found.\n");
return -1;
}
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
mckernel's internal use. */
if (range->flag & VR_RESERVED)
continue;
/* We need a chunk for each page for a demand paging area.
This can be optimized for spacial complexity but we would
lose simplicity instead. */
if (range->flag & VR_DEMAND_PAGING) {
unsigned long p, phys;
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
if (prevzero == 1)
(*chunks)++;
(*chunks)++;
prevzero = 0;
}
}
if (prevzero == 1)
(*chunks)++;
} else {
(*chunks)++;
}
segs++;
}
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = thread->vm->region;
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
dkprintf("brk: %lx-%lx\n", region.brk_start, region.brk_end);
dkprintf("map: %lx-%lx\n", region.map_start, region.map_end);
dkprintf("stack: %lx-%lx\n", region.stack_start, region.stack_end);
dkprintf("user: %lx-%lx\n\n", region.user_start, region.user_end);
}
dkprintf("now generate a core file image\n");
offset += sizeof(eh);
fill_elf_header(&eh, segs);
/* program header table */
phsize = sizeof(Elf64_Phdr) * segs;
ph = kmalloc(phsize, IHK_MC_AP_NOWAIT);
if (ph == NULL) {
dkprintf("could not alloc a program header table.\n");
goto fail;
}
memset(ph, 0, phsize);
offset += phsize;
/* NOTE segment
* To align the next segment page-sized, we prepare a padded
* region for our NOTE segment.
*/
notesize = get_note_size();
alignednotesize = alignpage(notesize + offset) - offset;
note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT);
if (note == NULL) {
dkprintf("could not alloc NOTE for core.\n");
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, thread, regs);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
ph[0].p_flags = 0;
ph[0].p_offset = offset;
ph[0].p_vaddr = 0;
ph[0].p_paddr = 0;
ph[0].p_filesz = notesize;
ph[0].p_memsz = notesize;
ph[0].p_align = 0;
offset += alignednotesize;
/* program header for each memory chunk */
i = 1;
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
if (range->flag & VR_RESERVED)
continue;
ph[i].p_type = PT_LOAD;
ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0)
| ((flag & VR_PROT_WRITE) ? PF_W : 0)
| ((flag & VR_PROT_EXEC) ? PF_X : 0);
ph[i].p_offset = offset;
ph[i].p_vaddr = range->start;
ph[i].p_paddr = 0;
ph[i].p_filesz = size;
ph[i].p_memsz = size;
ph[i].p_align = PAGE_SIZE;
i++;
offset += size;
}
/* coretable to send to host */
ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT);
if (!ct) {
dkprintf("could not alloc a coretable.\n");
goto fail;
}
ct[0].addr = virt_to_phys(&eh); /* ELF header */
ct[0].len = 64;
dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, &eh);
ct[1].addr = virt_to_phys(ph); /* program header table */
ct[1].len = phsize;
dkprintf("coretable[1]: %lx@%lx(%lx)\n", ct[1].len, ct[1].addr, ph);
ct[2].addr = virt_to_phys(note); /* NOTE segment */
ct[2].len = alignednotesize;
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
i = 3; /* memory segments */
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long phys;
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */
unsigned long p, start, phys;
int prevzero = 0;
unsigned long size = 0;
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* We begin a new chunk */
size = PAGE_SIZE;
start = p;
} else {
/* We extend the previous chunk */
size += PAGE_SIZE;
}
prevzero = 1;
} else {
if (prevzero == 1) {
/* Flush out an empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
ct[i].addr = phys;
ct[i].len = PAGE_SIZE;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, p);
i++;
prevzero = 0;
}
}
if (prevzero == 1) {
/* An empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
} else {
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start, &phys) != 0) {
dkprintf("could not convert user virtual address %lx"
"to physical address", range->start);
goto fail;
}
} else {
phys = virt_to_phys((void *)range->start);
}
ct[i].addr = phys;
ct[i].len = range->end - range->start;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, range->start);
i++;
}
}
*coretable = ct;
return 0;
fail:
if (ct)
kfree(ct);
if (ph)
kfree(ph);
if (note)
kfree(note);
return -1;
}
/**
* \brief Free all the allocated spaces for an image of the core file.
*
* \param coretable An array of core chunks.
*/
void freecore(struct coretable **coretable)
{
struct coretable *ct = *coretable;
kfree(phys_to_virt(ct[2].addr)); /* NOTE segment */
kfree(phys_to_virt(ct[1].addr)); /* ph */
kfree(*coretable);
}
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,4 +1,4 @@
/* head.S COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* head.S COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <linkage.h>
#include <ptrace.h>
@ -10,7 +10,9 @@
#include <smp.h>
#include <arm-gic-v3.h>
/* KERNEL_RAM_VADDR is defined by cmake */
#define KERNEL_RAM_VADDR MAP_KERNEL_START
#define EARLY_ALLOC_VADDR MAP_EARLY_ALLOC
#define BOOT_PARAM_VADDR MAP_BOOT_PARAM
//#ifndef CONFIG_SMP
//# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF
@ -48,6 +50,16 @@
add \ttb0, \ttb0, \virt_to_phys
.endm
#ifdef CONFIG_ARM64_64K_PAGES
# define BLOCK_SHIFT PAGE_SHIFT
# define BLOCK_SIZE PAGE_SIZE
# define TABLE_SHIFT PMD_SHIFT
#else
# define BLOCK_SHIFT SECTION_SHIFT
# define BLOCK_SIZE SECTION_SIZE
# define TABLE_SHIFT PUD_SHIFT
#endif
#define KERNEL_START KERNEL_RAM_VADDR
#define KERNEL_END _end
@ -75,7 +87,6 @@
#define TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE 0x08
#define TRAMPOLINE_DATA_CPU_MAP_SIZE (NR_CPUS * 8)
#define TRAMPOLINE_DATA_DATA_RDISTS_PA_SIZE (NR_CPUS * 8)
#define TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE 0x08
#define TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE 0x04
#define TRAMPOLINE_DATA_PMU_AFF_SIZE (CONFIG_SMP_MAX_CORES * 4)
@ -94,9 +105,9 @@
.globl ihk_param_gic_percpu_offset, ihk_param_gic_version
.globl ihk_param_lpj, ihk_param_hz, ihk_param_psci_method
.globl ihk_param_cpu_logical_map, ihk_param_gic_rdist_base_pa
.globl ihk_param_pmu_irq_affi, ihk_param_nr_pmu_irq_affi
.globl ihk_param_pmu_irq_affiniry, ihk_param_nr_pmu_irq_affiniry
.globl ihk_param_use_virt_timer, ihk_param_evtstrm_timer_rate
.globl ihk_param_retention_state_flag_pa, ihk_param_default_vl
.globl ihk_param_default_vl
ihk_param_head:
ihk_param_param_addr:
.quad 0
@ -134,11 +145,9 @@ ihk_param_cpu_logical_map:
.skip NR_CPUS * 8 /* array of the MPIDR and the core number */
ihk_param_gic_rdist_base_pa:
.skip NR_CPUS * 8 /* per-cpu re-distributer PA */
ihk_param_retention_state_flag_pa:
.quad 0
ihk_param_pmu_irq_affi:
ihk_param_pmu_irq_affiniry:
.skip CONFIG_SMP_MAX_CORES * 4 /* array of the pmu affinity list */
ihk_param_nr_pmu_irq_affi:
ihk_param_nr_pmu_irq_affiniry:
.word 0 /* number of pmu affinity list elements. */
/* @ref.impl arch/arm64/include/asm/kvm_arm.h */
@ -256,17 +265,13 @@ ENTRY(arch_start)
mov x16, #NR_CPUS /* calc next data */
lsl x16, x16, 3
add x0, x0, x16
/* retention_state_flag_pa */
ldr x16, [x0], #TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE
adr x15, ihk_param_retention_state_flag_pa
str x16, [x15]
/* nr_pmu_irq_affi */
/* nr_pmu_irq_affiniry */
ldr w16, [x0], #TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE
adr x15, ihk_param_nr_pmu_irq_affi
adr x15, ihk_param_nr_pmu_irq_affiniry
str w16, [x15]
/* pmu_irq_affi */
/* pmu_irq_affiniry */
mov x18, x0
adr x15, ihk_param_pmu_irq_affi
adr x15, ihk_param_pmu_irq_affiniry
b 2f
1: ldr w17, [x18], #4
str w17, [x15], #4
@ -405,17 +410,14 @@ __create_page_tables:
* Map the early_alloc_pages area, kernel_img next block
*/
ldr x3, =KERNEL_END
add x3, x3, x28 // __pa(KERNEL_END)
add x3, x3, x28 // __pa(KERNEL_END)
add x3, x3, #BLOCK_SIZE
sub x3, x3, #1
bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc.
ldr x5, =KERNEL_END // get start VA
add x5, x5, #BLOCK_SIZE
sub x5, x5, #1
bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc.
mov x6, #MAP_EARLY_ALLOC_SIZE
sub x3, x3, #1
bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc.
ldr x5, =EARLY_ALLOC_VADDR // get start VA
mov x6, #1
lsl x6, x6, #(PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT)
add x6, x5, x6 // end VA calc
mov x23, x6 // save end VA
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
@ -423,13 +425,11 @@ __create_page_tables:
* Map the boot_param area
*/
adr x3, ihk_param_param_addr
ldr x3, [x3] // get boot_param PA
mov x5, x23 // get start VA
add x5, x5, #BLOCK_SIZE
sub x5, x5, #1
bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc
mov x6, #MAP_BOOT_PARAM_SIZE
add x6, x5, x6 // end VA calc.
ldr x3, [x3] // get boot_param PA
ldr x5, =BOOT_PARAM_VADDR // get boot_param VA
mov x6, #1
lsl x6, x6, #MAP_BOOT_PARAM_SHIFT
add x6, x5, x6 // end VA calc
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6

View File

@ -7,7 +7,6 @@
#include <hw_breakpoint.h>
#include <arch-memory.h>
#include <signal.h>
#include <process.h>
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::core_num_[brps|wrps] */
/* Number of BRP/WRP registers on this CPU. */

View File

@ -1,131 +0,0 @@
/* imp-sysreg.c COPYRIGHT FUJITSU LIMITED 2018 */
#include <sysreg.h>
/* hpc */
ACCESS_REG_FUNC(fj_tag_address_ctrl_el1, IMP_FJ_TAG_ADDRESS_CTRL_EL1);
ACCESS_REG_FUNC(pf_ctrl_el1, IMP_PF_CTRL_EL1);
ACCESS_REG_FUNC(pf_stream_detect_ctrl_el0, IMP_PF_STREAM_DETECT_CTRL_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl0_el0, IMP_PF_INJECTION_CTRL0_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl1_el0, IMP_PF_INJECTION_CTRL1_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl2_el0, IMP_PF_INJECTION_CTRL2_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl3_el0, IMP_PF_INJECTION_CTRL3_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl4_el0, IMP_PF_INJECTION_CTRL4_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl5_el0, IMP_PF_INJECTION_CTRL5_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl6_el0, IMP_PF_INJECTION_CTRL6_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl7_el0, IMP_PF_INJECTION_CTRL7_EL0);
ACCESS_REG_FUNC(pf_injection_distance0_el0, IMP_PF_INJECTION_DISTANCE0_EL0);
ACCESS_REG_FUNC(pf_injection_distance1_el0, IMP_PF_INJECTION_DISTANCE1_EL0);
ACCESS_REG_FUNC(pf_injection_distance2_el0, IMP_PF_INJECTION_DISTANCE2_EL0);
ACCESS_REG_FUNC(pf_injection_distance3_el0, IMP_PF_INJECTION_DISTANCE3_EL0);
ACCESS_REG_FUNC(pf_injection_distance4_el0, IMP_PF_INJECTION_DISTANCE4_EL0);
ACCESS_REG_FUNC(pf_injection_distance5_el0, IMP_PF_INJECTION_DISTANCE5_EL0);
ACCESS_REG_FUNC(pf_injection_distance6_el0, IMP_PF_INJECTION_DISTANCE6_EL0);
ACCESS_REG_FUNC(pf_injection_distance7_el0, IMP_PF_INJECTION_DISTANCE7_EL0);
static void hpc_prefetch_regs_init(void)
{
uint64_t reg = 0;
/* PF_CTRL_EL1 */
reg = IMP_PF_CTRL_EL1_EL1AE_ENABLE | IMP_PF_CTRL_EL1_EL0AE_ENABLE;
xos_access_pf_ctrl_el1(WRITE_ACCESS, &reg);
/* PF_STREAM_DETECT_CTRL */
reg = 0;
xos_access_pf_stream_detect_ctrl_el0(WRITE_ACCESS, &reg);
/* PF_INJECTION_CTRL */
reg = 0;
xos_access_pf_injection_ctrl0_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl1_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl2_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl3_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl4_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl5_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl6_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl7_el0(WRITE_ACCESS, &reg);
/* PF_INJECTION_DISTANCE */
reg = 0;
xos_access_pf_injection_distance0_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance1_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance2_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance3_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance4_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance5_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance6_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance7_el0(WRITE_ACCESS, &reg);
}
static void hpc_tag_address_regs_init(void)
{
uint64_t reg = IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK |
IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK |
IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK;
/* FJ_TAG_ADDRESS_CTRL */
xos_access_fj_tag_address_ctrl_el1(WRITE_ACCESS, &reg);
}
void hpc_registers_init(void)
{
hpc_prefetch_regs_init();
hpc_tag_address_regs_init();
}
/* vhbm */
ACCESS_REG_FUNC(barrier_ctrl_el1, IMP_BARRIER_CTRL_EL1);
ACCESS_REG_FUNC(barrier_bst_bit_el1, IMP_BARRIER_BST_BIT_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb0_el1, IMP_BARRIER_INIT_SYNC_BB0_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb1_el1, IMP_BARRIER_INIT_SYNC_BB1_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb2_el1, IMP_BARRIER_INIT_SYNC_BB2_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb3_el1, IMP_BARRIER_INIT_SYNC_BB3_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb4_el1, IMP_BARRIER_INIT_SYNC_BB4_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb5_el1, IMP_BARRIER_INIT_SYNC_BB5_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w0_el1, IMP_BARRIER_ASSIGN_SYNC_W0_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w1_el1, IMP_BARRIER_ASSIGN_SYNC_W1_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w2_el1, IMP_BARRIER_ASSIGN_SYNC_W2_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w3_el1, IMP_BARRIER_ASSIGN_SYNC_W3_EL1);
void vhbm_barrier_registers_init(void)
{
uint64_t reg = 0;
reg = IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE |
IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE;
xos_access_barrier_ctrl_el1(WRITE_ACCESS, &reg);
reg = 0;
xos_access_barrier_init_sync_bb0_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb1_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb2_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb3_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb4_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb5_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w0_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w1_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w2_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w3_el1(WRITE_ACCESS, &reg);
}
/* sccr */
ACCESS_REG_FUNC(sccr_ctrl_el1, IMP_SCCR_CTRL_EL1);
ACCESS_REG_FUNC(sccr_assign_el1, IMP_SCCR_ASSIGN_EL1);
ACCESS_REG_FUNC(sccr_set0_l2_el1, IMP_SCCR_SET0_L2_EL1);
ACCESS_REG_FUNC(sccr_l1_el0, IMP_SCCR_L1_EL0);
void scdrv_registers_init(void)
{
uint64_t reg = 0;
reg = IMP_SCCR_CTRL_EL1_EL1AE_MASK;
xos_access_sccr_ctrl_el1(WRITE_ACCESS, &reg);
reg = 0;
xos_access_sccr_assign_el1(WRITE_ACCESS, &reg);
xos_access_sccr_l1_el0(WRITE_ACCESS, &reg);
reg = (14UL << IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT);
xos_access_sccr_set0_l2_el1(WRITE_ACCESS, &reg);
}

View File

@ -1,4 +1,4 @@
/* arch-futex.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* arch-futex.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_ARCH_FUTEX_H
#define __HEADER_ARM64_COMMON_ARCH_FUTEX_H
@ -32,13 +32,12 @@
* @ref.impl
* linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_op_inuser
*/
static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = encoded_op & 0xfff;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))

View File

@ -1,4 +1,4 @@
/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_ARCH_LOCK_H
#define __HEADER_ARM64_COMMON_ARCH_LOCK_H
@ -6,9 +6,6 @@
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -22,14 +19,14 @@ int __kprintf(const char *format, ...);
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::arch_spinlock_t */
typedef struct {
#ifdef __AARCH64EB__
uint16_t next;
uint16_t owner;
#else /* __AARCH64EB__ */
//#ifdef __AARCH64EB__
// uint16_t next;
// uint16_t owner;
//#else /* __AARCH64EB__ */
uint16_t owner;
uint16_t next;
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
//#endif /* __AARCH64EB__ */
} ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
@ -37,100 +34,14 @@ extern void preempt_disable(void);
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::__ARCH_SPIN_LOCK_UNLOCKED */
#define SPIN_LOCK_UNLOCKED { 0, 0 }
/* @ref.impl arch/arm64/include/asm/barrier.h::__nops */
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
/* @ref.impl ./arch/arm64/include/asm/lse.h::ARM64_LSE_ATOMIC_INSN */
/* else defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
/* initialized spinlock struct */
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = (ihk_spinlock_t)SPIN_LOCK_UNLOCKED;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { \
int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", \
ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", \
ihk_mc_get_processor_id()); \
rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_trylock */
/* spinlock trylock */
static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
unsigned int tmp;
ihk_spinlock_t lockval;
int success;
preempt_disable();
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldaxr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 2f\n"
" add %w0, %w0, %3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
"2:",
/* LSE atomics */
" ldr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 1f\n"
" add %w1, %w0, %3\n"
" casa %w0, %w1, %2\n"
" sub %w1, %w1, %3\n"
" eor %w1, %w1, %w0\n"
"1:")
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
: "I" (1 << TICKET_SHIFT)
: "memory");
success = !tmp;
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ \
unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", \
ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", \
ihk_mc_get_processor_id()); \
rc; \
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
/* spinlock trylock & interrupt disable & PSTATE.DAIF save */
static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock,
int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
}
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */
/* spinlock lock */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -141,8 +52,6 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */
/* spinlock lock */
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
unsigned int tmp;
@ -152,19 +61,11 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
asm volatile(
/* Atomically increment the next ticket. */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %3\n"
"1: ldaxr %w0, %3\n"
" add %w1, %w0, %w5\n"
" stxr %w2, %w1, %3\n"
" cbnz %w2, 1b\n",
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
__nops(3)
)
" cbnz %w2, 1b\n"
/* Did we get the lock? */
" eor %w1, %w0, %w0, ror #16\n"
" cbz %w1, 3f\n"
@ -184,6 +85,7 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "memory");
}
/* spinlock lock & interrupt disable & PSTATE.DAIF save */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -193,8 +95,6 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
/* spinlock lock & interrupt disable & PSTATE.DAIF save */
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
@ -206,6 +106,8 @@ static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
return flags;
}
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */
/* spinlock unlock */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -215,24 +117,12 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */
/* spinlock unlock */
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
unsigned long tmp;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" ldrh %w1, %0\n"
" add %w1, %w1, #1\n"
" stlrh %w1, %0",
/* LSE atomics */
" mov %w1, #1\n"
" staddlh %w1, %0\n"
__nops(1))
: "=Q" (lock->owner), "=&r" (tmp)
:
asm volatile(
" stlrh %w1, %0\n"
: "=Q" (lock->owner)
: "r" (lock->owner + 1)
: "memory");
preempt_enable();
@ -255,6 +145,84 @@ static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
cpu_restore_interrupt(flags);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void __mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = xchg8(&(lock->next), node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void __mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = atomic_cmpxchg8(&(lock->next), node, 0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
static void mcs_lock_lock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
preempt_disable();
__mcs_lock_lock(lock, node);
}
static void mcs_lock_unlock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
__mcs_lock_unlock(lock, node);
preempt_enable();
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
node->irqsave = cpu_disable_interrupt_save();
mcs_lock_lock_noirq(lock, node);
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
mcs_lock_unlock_noirq(lock, node);
cpu_restore_interrupt(node->irqsave);
}
#define SPINLOCK_IN_MCS_RWLOCK
// reader/writer lock
@ -270,22 +238,14 @@ typedef struct mcs_rwlock_node {
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_t;
#else
} mcs_rwlock_node_t;
#endif
} __attribute__((aligned(64))) mcs_rwlock_node_t;
typedef struct mcs_rwlock_node_irqsave {
#ifndef SPINLOCK_IN_MCS_RWLOCK
struct mcs_rwlock_node node;
#endif
unsigned long irqsave;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_irqsave_t;
#else
} mcs_rwlock_node_irqsave_t;
#endif
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
typedef struct mcs_rwlock_lock {
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -294,11 +254,7 @@ typedef struct mcs_rwlock_lock {
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
#endif
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_lock_t;
#else
} mcs_rwlock_lock_t;
#endif
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
@ -646,115 +602,4 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif
}
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
static inline int irqflags_can_interrupt(unsigned long flags)
{
return (flags == ICC_PMR_EL1_UNMASKED);
}
#else /* CONFIG_HAS_NMI */
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !(flags & 0x2);
}
#endif /* CONFIG_HAS_NMI */
struct ihk_rwlock {
unsigned int lock;
};
static inline void ihk_mc_rwlock_init(struct ihk_rwlock *rw)
{
rw->lock = 0;
}
static inline void ihk_mc_read_lock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1b\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 2b\n"
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
}
static inline int ihk_mc_read_trylock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2 = 1;
asm volatile(
" ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1f\n"
" stxr %w1, %w0, %2\n"
"1:\n"
: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
return !tmp2;
}
static inline void ihk_mc_read_unlock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2;
asm volatile(
"1: ldxr %w0, %2\n"
" sub %w0, %w0, #1\n"
" stlxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
}
static inline void ihk_mc_write_lock(struct ihk_rwlock *rw)
{
unsigned int tmp;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %1\n"
" cbnz %w0, 1b\n"
" stxr %w0, %w2, %1\n"
" cbnz %w0, 2b\n"
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "cc", "memory");
}
static inline int ihk_mc_write_trylock(struct ihk_rwlock *rw)
{
unsigned int tmp;
asm volatile(
" ldaxr %w0, %1\n"
" cbnz %w0, 1f\n"
" stxr %w0, %w2, %1\n"
"1:\n"
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "cc", "memory");
return !tmp;
}
static inline void ihk_mc_write_unlock(struct ihk_rwlock *rw)
{
asm volatile(
" stlr %w1, %0\n"
: "=Q" (rw->lock) : "r" (0) : "memory");
}
#define ihk_mc_read_can_lock(rw) ((rw)->lock < 0x80000000)
#define ihk_mc_write_can_lock(rw) ((rw)->lock == 0)
#endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */

View File

@ -1,110 +1,96 @@
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H
#define __HEADER_ARM64_COMMON_ARCH_MEMORY_H
#include <const.h>
#include <errno.h>
#ifndef __ASSEMBLY__
#include <list.h>
#include <page.h>
void panic(const char *);
#endif /*__ASSEMBLY__*/
#define _SZ4KB (1UL<<12)
#define _SZ16KB (1UL<<14)
#define _SZ64KB (1UL<<16)
#ifdef CONFIG_ARM64_64K_PAGES
# define GRANULE_SIZE _SZ64KB
# define BLOCK_SHIFT PAGE_SHIFT
# define BLOCK_SIZE PAGE_SIZE
# define TABLE_SHIFT PMD_SHIFT
# define GRANULE_SIZE _SZ64KB
#else
# define GRANULE_SIZE _SZ4KB
# define BLOCK_SHIFT SECTION_SHIFT
# define BLOCK_SIZE SECTION_SIZE
# define TABLE_SHIFT PUD_SHIFT
# define GRANULE_SIZE _SZ4KB
#endif
#define VA_BITS CONFIG_ARM64_VA_BITS
/*
* Address define
*/
/* early alloc area address */
/* START:_end, SIZE:512 pages */
#define MAP_EARLY_ALLOC_SHIFT 5
#define MAP_KERNEL_SHIFT 21
#define MAP_KERNEL_SIZE (UL(1) << MAP_KERNEL_SHIFT)
#define MAP_EARLY_ALLOC_SHIFT 9
#define MAP_EARLY_ALLOC_SIZE (UL(1) << (PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT))
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
extern char _end[];
# define MAP_EARLY_ALLOC (ALIGN_UP((unsigned long)_end, BLOCK_SIZE))
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
#endif /* !__ASSEMBLY__ */
/* bootparam area address */
/* START:early alloc area end, SIZE:2MiB */
#define MAP_BOOT_PARAM_SHIFT 21
#define MAP_BOOT_PARAM_SIZE (UL(1) << MAP_BOOT_PARAM_SHIFT)
#ifndef __ASSEMBLY__
# define MAP_BOOT_PARAM (ALIGN_UP(MAP_EARLY_ALLOC_END, BLOCK_SIZE))
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE)
#endif /* !__ASSEMBLY__ */
/*
* MAP_KERNEL_START is HOST MODULES_END - 8MiB.
* It's defined by cmake.
*/
#if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB) /* ARM64_MEMORY_LAYOUT=1 */
#if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB)
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000000400000000)
# define TASK_UNMAPPED_BASE UL(0x0000000800000000)
# define USER_END UL(0x0000002000000000)
# define MAP_VMAP_START UL(0xffffffbdc0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffffffbffbdfd000)
# define MAP_ST_START UL(0xffffffc000000000)
# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_003f_ff80_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000
#
#elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB) /* ARM64_MEMORY_LAYOUT=3 */
#elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB)
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000002000000000)
# define TASK_UNMAPPED_BASE UL(0x0000004000000000)
# define USER_END UL(0x0000010000000000)
# define MAP_VMAP_START UL(0xfffffdfee0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xfffffdfffbdd0000)
# define MAP_ST_START UL(0xfffffe0000000000)
# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_01ff_e000_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB) /* ARM64_MEMORY_LAYOUT=2 */
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB)
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
# define MAP_VMAP_START UL(0xffff7bffc0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdfd000)
# define MAP_ST_START UL(0xffff800000000000)
# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_ff80_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB) /* ARM64_MEMORY_LAYOUT=4 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB)
#
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
# define MAP_VMAP_START UL(0xffff780000000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)
# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_e000_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000
#
#else
# error address space is not defined.
#endif
#define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START)
#define STACK_TOP(region) ((region)->user_end)
#define STACK_TOP(region) ((region)->user_end)
/*
* pagetable define
@ -118,10 +104,7 @@ extern char _end[];
# define PTL3_INDEX_MASK PTL4_INDEX_MASK
# define PTL2_INDEX_MASK PTL3_INDEX_MASK
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 4)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 4)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 4)
# define FIRST_LEVEL_BLOCK_SUPPORT 1
#elif GRANULE_SIZE == _SZ16KB
# define __PTL4_SHIFT 47
# define __PTL3_SHIFT 36
@ -131,12 +114,9 @@ extern char _end[];
# define PTL3_INDEX_MASK ((UL(1) << 11) - 1)
# define PTL2_INDEX_MASK PTL3_INDEX_MASK
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 0)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 7)
# define FIRST_LEVEL_BLOCK_SUPPORT 0
#elif GRANULE_SIZE == _SZ64KB
# define __PTL4_SHIFT 55
# define __PTL4_SHIFT 0
# define __PTL3_SHIFT 42
# define __PTL2_SHIFT 29
# define __PTL1_SHIFT 16
@ -144,39 +124,19 @@ extern char _end[];
# define PTL3_INDEX_MASK ((UL(1) << 6) - 1)
# define PTL2_INDEX_MASK ((UL(1) << 13) - 1)
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 0)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 5)
# define FIRST_LEVEL_BLOCK_SUPPORT 0
#else
# error granule size error.
#endif
#ifndef __ASSEMBLY__
extern int first_level_block_support;
#endif /* __ASSEMBLY__ */
# define __PTL4_SIZE (UL(1) << __PTL4_SHIFT)
# define __PTL3_SIZE (UL(1) << __PTL3_SHIFT)
# define __PTL2_SIZE (UL(1) << __PTL2_SHIFT)
# define __PTL1_SIZE (UL(1) << __PTL1_SHIFT)
# define __PTL4_MASK (~(__PTL4_SIZE - 1))
# define __PTL3_MASK (~(__PTL3_SIZE - 1))
# define __PTL2_MASK (~(__PTL2_SIZE - 1))
# define __PTL1_MASK (~(__PTL1_SIZE - 1))
# define __PTL4_CONT_SIZE (UL(1) << __PTL4_CONT_SHIFT)
# define __PTL3_CONT_SIZE (UL(1) << __PTL3_CONT_SHIFT)
# define __PTL2_CONT_SIZE (UL(1) << __PTL2_CONT_SHIFT)
# define __PTL1_CONT_SIZE (UL(1) << __PTL1_CONT_SHIFT)
# define __PTL4_CONT_MASK (~(__PTL4_CONT_SIZE - 1))
# define __PTL3_CONT_MASK (~(__PTL3_CONT_SIZE - 1))
# define __PTL2_CONT_MASK (~(__PTL2_CONT_SIZE - 1))
# define __PTL1_CONT_MASK (~(__PTL1_CONT_SIZE - 1))
# define __PTL4_CONT_COUNT (UL(1) << (__PTL4_CONT_SHIFT - __PTL4_SHIFT))
# define __PTL3_CONT_COUNT (UL(1) << (__PTL3_CONT_SHIFT - __PTL3_SHIFT))
# define __PTL2_CONT_COUNT (UL(1) << (__PTL2_CONT_SHIFT - __PTL2_SHIFT))
# define __PTL1_CONT_COUNT (UL(1) << (__PTL1_CONT_SHIFT - __PTL1_SHIFT))
# define __PTL4_MASK (~__PTL4_SIZE - 1)
# define __PTL3_MASK (~__PTL3_SIZE - 1)
# define __PTL2_MASK (~__PTL2_SIZE - 1)
# define __PTL1_MASK (~__PTL1_SIZE - 1)
/* calculate entries */
#if (CONFIG_ARM64_PGTABLE_LEVELS > 3) && (VA_BITS > __PTL4_SHIFT)
@ -223,22 +183,6 @@ static const unsigned int PTL4_ENTRIES = __PTL4_ENTRIES;
static const unsigned int PTL3_ENTRIES = __PTL3_ENTRIES;
static const unsigned int PTL2_ENTRIES = __PTL2_ENTRIES;
static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
static const unsigned int PTL4_CONT_SHIFT = __PTL4_CONT_SHIFT;
static const unsigned int PTL3_CONT_SHIFT = __PTL3_CONT_SHIFT;
static const unsigned int PTL2_CONT_SHIFT = __PTL2_CONT_SHIFT;
static const unsigned int PTL1_CONT_SHIFT = __PTL1_CONT_SHIFT;
static const unsigned long PTL4_CONT_SIZE = __PTL4_CONT_SIZE;
static const unsigned long PTL3_CONT_SIZE = __PTL3_CONT_SIZE;
static const unsigned long PTL2_CONT_SIZE = __PTL2_CONT_SIZE;
static const unsigned long PTL1_CONT_SIZE = __PTL1_CONT_SIZE;
static const unsigned long PTL4_CONT_MASK = __PTL4_CONT_MASK;
static const unsigned long PTL3_CONT_MASK = __PTL3_CONT_MASK;
static const unsigned long PTL2_CONT_MASK = __PTL2_CONT_MASK;
static const unsigned long PTL1_CONT_MASK = __PTL1_CONT_MASK;
static const unsigned int PTL4_CONT_COUNT = __PTL4_CONT_COUNT;
static const unsigned int PTL3_CONT_COUNT = __PTL3_CONT_COUNT;
static const unsigned int PTL2_CONT_COUNT = __PTL2_CONT_COUNT;
static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
#else
# define PTL4_SHIFT __PTL4_SHIFT
# define PTL3_SHIFT __PTL3_SHIFT
@ -256,26 +200,8 @@ static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
# define PTL3_ENTRIES __PTL3_ENTRIES
# define PTL2_ENTRIES __PTL2_ENTRIES
# define PTL1_ENTRIES __PTL1_ENTRIES
# define PTL4_CONT_SHIFT __PTL4_CONT_SHIFT
# define PTL3_CONT_SHIFT __PTL3_CONT_SHIFT
# define PTL2_CONT_SHIFT __PTL2_CONT_SHIFT
# define PTL1_CONT_SHIFT __PTL1_CONT_SHIFT
# define PTL4_CONT_SIZE __PTL4_CONT_SIZE
# define PTL3_CONT_SIZE __PTL3_CONT_SIZE
# define PTL2_CONT_SIZE __PTL2_CONT_SIZE
# define PTL1_CONT_SIZE __PTL1_CONT_SIZE
# define PTL4_CONT_MASK __PTL4_CONT_MASK
# define PTL3_CONT_MASK __PTL3_CONT_MASK
# define PTL2_CONT_MASK __PTL2_CONT_MASK
# define PTL1_CONT_MASK __PTL1_CONT_MASK
# define PTL4_CONT_COUNT __PTL4_CONT_COUNT
# define PTL3_CONT_COUNT __PTL3_CONT_COUNT
# define PTL2_CONT_COUNT __PTL2_CONT_COUNT
# define PTL1_CONT_COUNT __PTL1_CONT_COUNT
#endif/*__ASSEMBLY__*/
#define __page_size(pgshift) (UL(1) << (pgshift))
#define __page_mask(pgsize) (~((pgsize) - 1))
#define __page_offset(addr, size) ((unsigned long)(addr) & ((size) - 1))
#define __page_align(addr, size) ((unsigned long)(addr) & ~((size) - 1))
#define __page_align_up(addr, size) __page_align((unsigned long)(addr) + (size) - 1, size)
@ -284,8 +210,8 @@ static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
* nornal page
*/
#define PAGE_SHIFT __PTL1_SHIFT
#define PAGE_SIZE __page_size(PAGE_SHIFT)
#define PAGE_MASK __page_mask(PAGE_SIZE)
#define PAGE_SIZE (UL(1) << __PTL1_SHIFT)
#define PAGE_MASK (~(PTL1_SIZE - 1))
#define PAGE_P2ALIGN 0
#define page_offset(addr) __page_offset(addr, PAGE_SIZE)
#define page_align(addr) __page_align(addr, PAGE_SIZE)
@ -295,8 +221,8 @@ static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
* large page
*/
#define LARGE_PAGE_SHIFT __PTL2_SHIFT
#define LARGE_PAGE_SIZE __page_size(LARGE_PAGE_SHIFT)
#define LARGE_PAGE_MASK __page_mask(LARGE_PAGE_SIZE)
#define LARGE_PAGE_SIZE (UL(1) << __PTL2_SHIFT)
#define LARGE_PAGE_MASK (~(PTL2_SIZE - 1))
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
#define large_page_offset(addr) __page_offset(addr, LARGE_PAGE_SIZE)
#define large_page_align(addr) __page_align(addr, LARGE_PAGE_SIZE)
@ -337,18 +263,6 @@ static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
#define PTE_FILEOFF PTE_SPECIAL
#ifdef CONFIG_ARM64_64K_PAGES
# define USER_STACK_PREPAGE_SIZE PAGE_SIZE
# define USER_STACK_PAGE_MASK PAGE_MASK
# define USER_STACK_PAGE_P2ALIGN PAGE_P2ALIGN
# define USER_STACK_PAGE_SHIFT PAGE_SHIFT
#else
# define USER_STACK_PREPAGE_SIZE LARGE_PAGE_SIZE
# define USER_STACK_PAGE_MASK LARGE_PAGE_MASK
# define USER_STACK_PAGE_P2ALIGN LARGE_PAGE_P2ALIGN
# define USER_STACK_PAGE_SHIFT LARGE_PAGE_SHIFT
#endif
#define PT_ENTRIES (PAGE_SIZE >> 3)
#ifndef __ASSEMBLY__
@ -398,8 +312,6 @@ enum ihk_mc_pt_attribute {
PTATTR_FOR_USER = UL(1) << (PHYS_MASK_SHIFT - 1),
/* WriteCombine */
PTATTR_WRITE_COMBINED = PTE_ATTRINDX(2),
/* converted flag */
ARCH_PTATTR_FLIPPED = PTE_PROT_NONE,
};
extern enum ihk_mc_pt_attribute attr_mask;
@ -411,23 +323,18 @@ static inline int pfn_is_write_combined(uintptr_t pfn)
//共通部と意味がするビット定義
#define attr_flip_bits (PTATTR_WRITABLE | PTATTR_LARGEPAGE)
static inline int pgsize_to_tbllv(size_t pgsize);
static inline int pte_is_type_page(const pte_t *ptep, size_t pgsize)
{
int ret = 0; //default D_TABLE
int level = pgsize_to_tbllv(pgsize);
switch (level) {
case 4:
case 3:
case 2:
if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize)) {
// check D_BLOCK
ret = ((*ptep & PMD_TYPE_MASK) == PMD_TYPE_SECT);
break;
case 1:
}
else if (PTL1_SIZE == pgsize) {
// check D_PAGE
ret = ((*ptep & PTE_TYPE_MASK) == PTE_TYPE_PAGE);
break;
}
return ret;
}
@ -506,18 +413,21 @@ static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize)
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
*ptep = PTE_NULL;
if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize) ||
(PTL1_SIZE == pgsize)) {
*ptep = PTE_NULL;
}
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
if (((PTL4_SIZE == pgsize || PTL4_CONT_SIZE == pgsize)
&& CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
((PTL3_SIZE == pgsize || PTL3_CONT_SIZE == pgsize)
&& CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize || PTL2_CONT_SIZE == pgsize) ||
(PTL1_SIZE == pgsize || PTL1_CONT_SIZE == pgsize)) {
if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize) ||
(PTL1_SIZE == pgsize)) {
*ptep = PTE_FILEOFF | off | PTE_TYPE_PAGE;
}
}
@ -547,294 +457,7 @@ static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
}
}
static inline int pte_is_contiguous(const pte_t *ptep)
{
return !!(*ptep & PTE_CONT);
}
static inline int pgsize_is_contiguous(size_t pgsize)
{
int ret = 0;
if ((pgsize == PTL4_CONT_SIZE && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(pgsize == PTL3_CONT_SIZE && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(pgsize == PTL2_CONT_SIZE) ||
(pgsize == PTL1_CONT_SIZE)) {
ret = 1;
}
return ret;
}
static inline int pgsize_to_tbllv(size_t pgsize)
{
int level = -EINVAL;
if ((pgsize == PTL4_CONT_SIZE || pgsize == PTL4_SIZE)
&& (CONFIG_ARM64_PGTABLE_LEVELS > 3)) {
level = 4;
} else if ((pgsize == PTL3_CONT_SIZE || pgsize == PTL3_SIZE)
&& (CONFIG_ARM64_PGTABLE_LEVELS > 2)) {
level = 3;
} else if (pgsize == PTL2_CONT_SIZE || pgsize == PTL2_SIZE) {
level = 2;
} else if (pgsize == PTL1_CONT_SIZE || pgsize == PTL1_SIZE) {
level = 1;
}
return level;
}
static inline int pgsize_to_pgshift(size_t pgsize)
{
/* We need to use if instead of switch because
* sometimes PTLX_CONT_SIZE == PTLX_SIZE
*/
if (pgsize == PTL4_CONT_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
return PTL4_CONT_SHIFT;
}
} else if (pgsize == PTL4_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
return PTL4_SHIFT;
}
} else if (pgsize == PTL3_CONT_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
return PTL3_CONT_SHIFT;
}
} else if (pgsize == PTL3_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
return PTL3_SHIFT;
}
} else if (pgsize == PTL2_CONT_SIZE) {
return PTL2_CONT_SHIFT;
} else if (pgsize == PTL2_SIZE) {
return PTL2_SHIFT;
} else if (pgsize == PTL1_CONT_SIZE) {
return PTL1_CONT_SHIFT;
} else if (pgsize == PTL1_SIZE) {
return PTL1_SHIFT;
}
return -EINVAL;
}
static inline size_t tbllv_to_pgsize(int level)
{
size_t pgsize = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
pgsize = PTL4_SIZE;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
pgsize = PTL3_SIZE;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
pgsize = PTL2_SIZE;
break;
case 1:
pgsize = PTL1_SIZE;
break;
default:
panic("page table level is invalid.");
}
return pgsize;
}
static inline size_t tbllv_to_contpgsize(int level)
{
size_t pgsize = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
pgsize = PTL4_CONT_SIZE;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
pgsize = PTL3_CONT_SIZE;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
pgsize = PTL2_CONT_SIZE;
break;
case 1:
pgsize = PTL1_CONT_SIZE;
break;
default:
panic("page table level is invalid.");
}
return pgsize;
}
static inline int tbllv_to_contpgshift(int level)
{
int ret = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
ret = PTL4_CONT_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
ret = PTL3_CONT_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
ret = PTL2_CONT_SHIFT;
break;
case 1:
ret = PTL1_CONT_SHIFT;
break;
default:
panic("page table level is invalid.");
}
return ret;
}
static inline pte_t *get_contiguous_head(pte_t *__ptep, size_t __pgsize)
{
unsigned long align;
int shift = 0;
switch (pgsize_to_tbllv(__pgsize)) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
shift = PTL4_CONT_SHIFT - PTL4_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
shift = PTL3_CONT_SHIFT - PTL3_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
shift = PTL2_CONT_SHIFT - PTL2_SHIFT;
break;
case 1:
shift = PTL1_CONT_SHIFT - PTL1_SHIFT;
break;
default:
panic("page table level is invalid.");
}
align = sizeof(*__ptep) << shift;
return (pte_t *)__page_align(__ptep, align);
}
static inline pte_t *get_contiguous_tail(pte_t *__ptep, size_t __pgsize)
{
unsigned long align;
int shift = 0;
switch (pgsize_to_tbllv(__pgsize)) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
shift = PTL4_CONT_SHIFT - PTL4_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
shift = PTL3_CONT_SHIFT - PTL3_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
shift = PTL2_CONT_SHIFT - PTL2_SHIFT;
break;
case 1:
shift = PTL1_CONT_SHIFT - PTL1_SHIFT;
break;
default:
panic("page table level is invalid.");
}
align = sizeof(*__ptep) << shift;
return (pte_t *)__page_align_up(__ptep + 1, align) - 1;
}
static inline int split_contiguous_pages(pte_t *ptep, size_t pgsize)
{
int ret;
pte_t *head = get_contiguous_head(ptep, pgsize);
pte_t *tail = get_contiguous_tail(ptep, pgsize);
pte_t *ptr;
uintptr_t phys;
struct page *page;
phys = pte_get_phys(head);
page = phys_to_page(phys);
if (page && (page_is_in_memobj(page)
|| page_is_multi_mapped(page))) {
ret = -EINVAL;
goto out;
}
for (ptr = head; ptr <= tail; ptr++) {
*ptr &= ~PTE_CONT;
}
ret = 0;
out:
return ret;
}
static inline int page_is_contiguous_head(pte_t *ptep, size_t pgsize)
{
pte_t *ptr = get_contiguous_head(ptep, pgsize);
return (ptr == ptep);
}
static inline int page_is_contiguous_tail(pte_t *ptep, size_t pgsize)
{
pte_t *ptr = get_contiguous_tail(ptep, pgsize);
return (ptr == ptep);
}
/* Return true if PTE doesn't belong to a contiguous PTE group or PTE
* is the head of a contiguous PTE group
*/
static inline int pte_is_head(pte_t *ptep, pte_t *old, size_t cont_size)
{
if (!pte_is_contiguous(old))
return 1;
return page_is_contiguous_head(ptep, cont_size);
}
struct page_table;
void arch_adjust_allocate_page_size(struct page_table *pt,
uintptr_t fault_addr,
pte_t *ptep,
void **pgaddrp,
size_t *pgsizep);
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);

View File

@ -1,16 +1,9 @@
/* arch-perfctr.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
/* arch-perfctr.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
#ifndef __ARCH_PERFCTR_H__
#define __ARCH_PERFCTR_H__
#include <ihk/types.h>
#include <ihk/cpu.h>
#include <bitops.h>
struct per_cpu_arm_pmu {
int num_events;
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
};
/* @ref.impl arch/arm64/include/asm/pmu.h */
struct arm_pmu {
@ -20,22 +13,15 @@ struct arm_pmu {
void (*reset)(void*);
int (*enable_pmu)(void);
void (*disable_pmu)(void);
int (*enable_counter)(unsigned long counter_mask);
int (*disable_counter)(unsigned long counter_mask);
int (*enable_intens)(unsigned long counter_mask);
int (*disable_intens)(unsigned long counter_mask);
int (*enable_counter)(int);
int (*disable_counter)(int);
int (*enable_intens)(int);
int (*disable_intens)(int);
int (*set_event_filter)(unsigned long*, int);
void (*write_evtype)(int, uint32_t);
int (*get_event_idx)(int num_events, unsigned long used_mask,
unsigned long config);
int (*get_event_idx)(int, unsigned long);
int (*map_event)(uint32_t, uint64_t);
int (*map_hw_event)(uint64_t config);
int (*map_cache_event)(uint64_t config);
int (*map_raw_event)(uint64_t config);
void (*enable_user_access_pmu_regs)(void);
void (*disable_user_access_pmu_regs)(void);
int (*counter_mask_valid)(unsigned long counter_mask);
struct per_cpu_arm_pmu *per_cpu;
int num_events;
};
static inline const struct arm_pmu* get_cpu_pmu(void)
@ -43,21 +29,44 @@ static inline const struct arm_pmu* get_cpu_pmu(void)
extern struct arm_pmu cpu_pmu;
return &cpu_pmu;
}
static inline const struct per_cpu_arm_pmu *get_per_cpu_pmu(void)
{
const struct arm_pmu *cpu_pmu = get_cpu_pmu();
return &cpu_pmu->per_cpu[ihk_mc_get_processor_id()];
}
int arm64_init_perfctr(void);
void arm64_init_per_cpu_perfctr(void);
int arm64_enable_pmu(void);
void arm64_disable_pmu(void);
int armv8pmu_init(struct arm_pmu* cpu_pmu);
void armv8pmu_per_cpu_init(struct per_cpu_arm_pmu *per_cpu);
void arm64_enable_user_access_pmu_regs(void);
void arm64_disable_user_access_pmu_regs(void);
/* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */
/*
* Generalized hardware cache events:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_NODE = 6,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
#endif

View File

@ -1,9 +1,7 @@
/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_COMMON_ARCH_TIMER_H
#define __HEADER_ARM64_COMMON_ARCH_TIMER_H
#include <ihk/cpu.h>
/* @ref.impl include/clocksource/arm_arch_timer.h */
#define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */
#define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */
@ -13,19 +11,4 @@
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
/* @ref.impl linux4.10.16 */
/* include/clocksource/arm_arch_timer.h */
#define ARCH_TIMER_CTRL_ENABLE (1 << 0)
#define ARCH_TIMER_CTRL_IT_MASK (1 << 1)
#define ARCH_TIMER_CTRL_IT_STAT (1 << 2)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
ARCH_TIMER_REG_TVAL,
};
extern int get_timer_intrid(void);
extern void arch_timer_init(void);
extern struct ihk_mc_interrupt_handler *get_timer_handler(void);
#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */

View File

@ -1,4 +1,4 @@
/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
#ifndef __HEADER_ARM64_ARCH_CPU_H
#define __HEADER_ARM64_ARCH_CPU_H
@ -12,8 +12,6 @@
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#include <registers.h>
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
@ -71,10 +69,12 @@ do { \
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */
#define read_tsc() \
({ \
unsigned long cval; \
cval = rdtsc(); \
isb(); \
asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); \
cval; \
})
@ -102,6 +102,4 @@ static inline void cpu_disable_nmi(void)
#endif /* __ASSEMBLY__ */
void arch_flush_icache_all(void);
#endif /* !__HEADER_ARM64_ARCH_CPU_H */

View File

@ -21,11 +21,12 @@
/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_FIRST_BLOCK (__PTL3_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_FIRST_CONT_BLOCK (__PTL3_CONT_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_SECOND_BLOCK (__PTL2_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_SECOND_CONT_BLOCK (__PTL2_CONT_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_THIRD_CONT_BLOCK (__PTL1_CONT_SHIFT << MAP_HUGE_SHIFT)
#if FIRST_LEVEL_BLOCK_SUPPORT
# define MAP_HUGE_FIRST_BLOCK (__PTL3_SHIFT << MAP_HUGE_SHIFT)
#else
# define MAP_HUGE_FIRST_BLOCK -1 /* not supported */
#endif
#define MAP_HUGE_SECOND_BLOCK (__PTL2_SHIFT << MAP_HUGE_SHIFT)
/*
* for mlockall()

View File

@ -0,0 +1,60 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#define DEBUG_RUSAGE
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
extern struct ihk_os_monitor *monitor;
extern int sprintf(char * buf, const char *fmt, ...);
#define DEBUG_ARCH_RUSAGE
#ifdef DEBUG_ARCH_RUSAGE
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#endif
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
#if 0 /* postk-TODO */
switch (pgsize) {
case PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case PTL2_SIZE:
ret = IHK_OS_PGSIZE_2MB;
break;
case PTL3_SIZE:
ret = IHK_OS_PGSIZE_1GB;
break;
default:
eprintf("unknown pgsize=%ld\n", pgsize);
break;
}
#endif
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -6,11 +6,12 @@
/* shmflg */
#define SHM_HUGE_SHIFT 26
#define SHM_HUGE_FIRST_BLOCK (__PTL3_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_FIRST_CONT_BLOCK (__PTL3_CONT_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_SECOND_BLOCK (__PTL2_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_SECOND_CONT_BLOCK (__PTL2_CONT_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_THIRD_CONT_BLOCK (__PTL1_CONT_SHIFT << SHM_HUGE_SHIFT)
#if FIRST_LEVEL_BLOCK_SUPPORT
# define SHM_HUGE_FIRST_BLOCK (__PTL3_SHIFT << SHM_HUGE_SHIFT)
#else
# define SHM_HUGE_FIRST_BLOCK -1 /* not supported */
#endif
#define SHM_HUGE_SECOND_BLOCK (__PTL2_SHIFT << SHM_HUGE_SHIFT)
struct ipc_perm {
key_t key;

View File

@ -0,0 +1,34 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#include <arch-memory.h>
//#define DEBUG_RUSAGE
extern struct rusage_global *rusage;
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_16KB 1
#define IHK_OS_PGSIZE_64KB 2
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case __PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case __PTL2_SIZE:
ret = IHK_OS_PGSIZE_16KB;
break;
case __PTL3_SIZE:
ret = IHK_OS_PGSIZE_64KB;
break;
default:
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -60,9 +60,9 @@
#ifdef CONFIG_HAS_NMI
#define GICD_INT_NMI_PRI 0x40
#define GICD_INT_DEF_PRI 0xc0U
#define GICD_INT_DEF_PRI 0xc0
#else
#define GICD_INT_DEF_PRI 0xa0U
#define GICD_INT_DEF_PRI 0xa0
#endif
#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
(GICD_INT_DEF_PRI << 16) |\

View File

@ -19,7 +19,6 @@
#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
#define __LINUX_IRQCHIP_ARM_GIC_V3_H
#include <stringify.h>
/* @ref.impl include/linux/irqchip/arm-gic-v3.h */
#include <sysreg.h>
@ -382,4 +381,11 @@
#define ICH_AP1R2_EL2 __AP1Rx_EL2(2)
#define ICH_AP1R3_EL2 __AP1Rx_EL2(3)
/**
* @ref.impl host-kernel/include/linux/stringify.h
*/
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_H */

View File

@ -15,9 +15,8 @@
#define S_PC 0x100 /* offsetof(struct pt_regs, pc) */
#define S_PSTATE 0x108 /* offsetof(struct pt_regs, pstate) */
#define S_ORIG_X0 0x110 /* offsetof(struct pt_regs, orig_x0) */
#define S_ORIG_PC 0x118 /* offsetof(struct pt_regs, orig_pc) */
#define S_SYSCALLNO 0x120 /* offsetof(struct pt_regs, syscallno) */
#define S_FRAME_SIZE 0x130 /* sizeof(struct pt_regs) must be 16 byte align */
#define S_SYSCALLNO 0x118 /* offsetof(struct pt_regs, syscallno) */
#define S_FRAME_SIZE 0x120 /* sizeof(struct pt_regs) */
#define CPU_INFO_SETUP 0x10 /* offsetof(struct cpu_info, cpu_setup) */
#define CPU_INFO_SZ 0x18 /* sizeof(struct cpu_info) */

View File

@ -1,20 +0,0 @@
/* asm-syscall.h COPYRIGHT FUJITSU LIMITED 2018 */
#ifndef __HEADER_ARM64_ASM_SYSCALL_H
#define __HEADER_ARM64_ASM_SYSCALL_H
#ifdef __ASSEMBLY__
#define DECLARATOR(number, name) .equ __NR_##name, number
#define SYSCALL_HANDLED(number, name) DECLARATOR(number, name)
#define SYSCALL_DELEGATED(number, name) DECLARATOR(number, name)
#include <config.h>
#include <syscall_list.h>
#undef DECLARATOR
#undef SYSCALL_HANDLED
#undef SYSCALL_DELEGATED
#endif /* __ASSEMBLY__ */
#endif /* !__HEADER_ARM64_ASM_SYSCALL_H */

View File

@ -67,12 +67,21 @@ struct arm64_cpu_capabilities {
int def_scope;/* default scope */
int (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
int (*enable)(void *);/* Called on all active CPUs */
uint32_t sys_reg;
uint8_t field_pos;
uint8_t min_field_value;
uint8_t hwcap_type;
int sign;
unsigned long hwcap;
union {
struct {/* To be used for erratum handling only */
uint32_t midr_model;
uint32_t midr_range_min, midr_range_max;
};
struct {/* Feature register checking */
uint32_t sys_reg;
uint8_t field_pos;
uint8_t min_field_value;
uint8_t hwcap_type;
int sign;
unsigned long hwcap;
};
};
};
/* @ref.impl include/linux/bitops.h */

View File

@ -25,78 +25,17 @@
#define MIDR_PARTNUM(midr) \
(((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
#define MIDR_ARCHITECTURE_SHIFT 16
#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT)
#define MIDR_ARCHITECTURE(midr) \
(((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
#define MIDR_VARIANT_SHIFT 20
#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
(((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
#define MIDR_CPU_VAR_REV(var, rev) \
(((var) << MIDR_VARIANT_SHIFT) | (rev))
#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
MIDR_ARCHITECTURE_MASK)
#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \
({ \
u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \
u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \
\
_model == (model) && rv >= (rv_min) && rv <= (rv_max); \
})
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
#define ARM_CPU_PART_CORTEX_A57 0xD07
#define ARM_CPU_PART_CORTEX_A72 0xD08
#define ARM_CPU_PART_CORTEX_A53 0xD03
#define ARM_CPU_PART_CORTEX_A73 0xD09
#define ARM_CPU_PART_CORTEX_A75 0xD0A
#define APM_CPU_PART_POTENZA 0x000
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
#define QCOM_CPU_PART_FALKOR 0xC00
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#define ARM_CPU_IMP_CAVIUM 0x43
#ifndef __ASSEMBLY__

View File

@ -0,0 +1,92 @@
/* elfcore.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#ifndef __HEADER_ARM64_COMMON_ELFCORE_H
#define __HEADER_ARM64_COMMON_ELFCORE_H
typedef uint16_t Elf64_Half;
typedef uint32_t Elf64_Word;
typedef uint64_t Elf64_Xword;
typedef uint64_t Elf64_Addr;
typedef uint64_t Elf64_Off;
#define EI_NIDENT 16
typedef struct {
unsigned char e_ident[EI_NIDENT];
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry;
Elf64_Off e_phoff;
Elf64_Off e_shoff;
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr;
#define EI_MAG0 0
#define EI_MAG1 1
#define EI_MAG2 2
#define EI_MAG3 3
#define EI_CLASS 4
#define EI_DATA 5
#define EI_VERSION 6
#define EI_OSABI 7
#define EI_ABIVERSION 8
#define EI_PAD 9
#define ELFMAG0 0x7f
#define ELFMAG1 'E'
#define ELFMAG2 'L'
#define ELFMAG3 'F'
#define ELFCLASS64 2 /* 64-bit object */
#define ELFDATA2LSB 1 /* LSB */
#define El_VERSION 1 /* defined to be the same as EV CURRENT */
#define ELFOSABI_NONE 0 /* unspecied */
#define El_ABIVERSION_NONE 0 /* unspecied */
#define ET_CORE 4 /* Core file */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_K10M 181 /* Intel K10M */
#define EV_CURRENT 1 /* Current version */
typedef struct {
Elf64_Word p_type;
Elf64_Word p_flags;
Elf64_Off p_offset;
Elf64_Addr p_vaddr;
Elf64_Addr p_paddr;
Elf64_Xword p_filesz;
Elf64_Xword p_memsz;
Elf64_Xword p_align;
} Elf64_Phdr;
#define PT_LOAD 1
#define PT_NOTE 4
#define PF_X 1 /* executable bit */
#define PF_W 2 /* writable bit */
#define PF_R 4 /* readable bit */
struct note {
Elf64_Word namesz;
Elf64_Word descsz;
Elf64_Word type;
/* name char[namesz] and desc[descsz] */
};
#define NT_PRSTATUS 1
#define NT_PRFRPREG 2
#define NT_PRPSINFO 3
#define NT_AUXV 6
#define NT_X86_STATE 0x202
#include "elfcoregpl.h"
#endif /* !__HEADER_ARM64_COMMON_ELFCORE_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,98 @@
/* elfcoregpl.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#ifndef __HEADER_ARM64_COMMON_ELFCOREGPL_H
#define __HEADER_ARM64_COMMON_ELFCOREGPL_H
#define pid_t int
/* From /usr/include/linux/elfcore.h of Linux */
#define ELF_PRARGSZ (80)
/* From /usr/include/linux/elfcore.h fro Linux */
struct elf_siginfo
{
int si_signo;
int si_code;
int si_errno;
};
/* From bfd/hosts/x86-64linux.h of gdb. */
typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t;
typedef a8_uint64_t elf_greg64_t;
struct user_regs64_struct
{
a8_uint64_t r15;
a8_uint64_t r14;
a8_uint64_t r13;
a8_uint64_t r12;
a8_uint64_t rbp;
a8_uint64_t rbx;
a8_uint64_t r11;
a8_uint64_t r10;
a8_uint64_t r9;
a8_uint64_t r8;
a8_uint64_t rax;
a8_uint64_t rcx;
a8_uint64_t rdx;
a8_uint64_t rsi;
a8_uint64_t rdi;
a8_uint64_t orig_rax;
a8_uint64_t rip;
a8_uint64_t cs;
a8_uint64_t eflags;
a8_uint64_t rsp;
a8_uint64_t ss;
a8_uint64_t fs_base;
a8_uint64_t gs_base;
a8_uint64_t ds;
a8_uint64_t es;
a8_uint64_t fs;
a8_uint64_t gs;
};
#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t))
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
struct prstatus64_timeval
{
a8_uint64_t tv_sec;
a8_uint64_t tv_usec;
};
struct elf_prstatus64
{
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
elf_gregset64_t pr_reg;
int pr_fpvalid;
};
struct elf_prpsinfo64
{
char pr_state;
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_pid, pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
#endif /* !__HEADER_ARM64_COMMON_ELFCOREGPL_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,4 +1,4 @@
/* fpsimd.h COPYRIGHT FUJITSU LIMITED 2016-2019 */
/* fpsimd.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
#ifndef __HEADER_ARM64_COMMON_FPSIMD_H
#define __HEADER_ARM64_COMMON_FPSIMD_H
@ -42,19 +42,16 @@ extern void thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs)
extern size_t sve_state_size(struct thread const *thread);
extern void sve_free(struct thread *thread);
extern int sve_alloc(struct thread *thread);
extern void sve_alloc(struct thread *thread);
extern void sve_save_state(void *state, unsigned int *pfpsr);
extern void sve_load_state(void const *state, unsigned int const *pfpsr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern int sve_set_thread_vl(unsigned long arg);
extern int sve_get_thread_vl(void);
extern int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length, const unsigned long flags);
extern int sve_get_thread_vl(const struct thread *thread);
extern int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags);
#define SVE_SET_VL(arg) sve_set_thread_vl(arg)
#define SVE_GET_VL() sve_get_thread_vl()
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
#define SVE_SET_VL(thread, vector_length, flags) sve_set_thread_vl(thread, vector_length, flags)
#define SVE_GET_VL(thread) sve_get_thread_vl(thread)
#else /* CONFIG_ARM64_SVE */
@ -83,12 +80,12 @@ static int sve_set_vector_length(struct thread *thread, unsigned long vl, unsign
}
/* for prctl syscall */
#define SVE_SET_VL(a) (-EINVAL)
#define SVE_GET_VL() (-EINVAL)
#define SVE_SET_VL(a,b,c) (-EINVAL)
#define SVE_GET_VL(a) (-EINVAL)
#endif /* CONFIG_ARM64_SVE */
extern void sve_setup(void);
extern void init_sve_vl(void);
extern void fpsimd_save_state(struct fpsimd_state *state);
extern void fpsimd_load_state(struct fpsimd_state *state);
extern void thread_fpsimd_save(struct thread *thread);

View File

@ -1,4 +1,5 @@
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifdef POSTK_DEBUG_ARCH_DEP_65
#ifndef _UAPI__ASM_HWCAP_H
#define _UAPI__ASM_HWCAP_H
@ -24,3 +25,4 @@ unsigned long arch_get_hwcap(void);
extern unsigned long elf_hwcap;
#endif /* _UAPI__ASM_HWCAP_H */
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -124,7 +124,7 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, long i)
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
{
v->counter64 = i;
}
@ -147,8 +147,6 @@ static inline void ihk_atomic64_add(long i, ihk_atomic64_t *v)
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic64_inc */
#define ihk_atomic64_inc(v) ihk_atomic64_add(1LL, (v))
#define ihk_atomic64_cmpxchg(p, o, n) cmpxchg(&((p)->counter64), o, n)
/***********************************************************************
* others
*/

View File

@ -1,4 +1,4 @@
/* context.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* context.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_IHK_CONTEXT_H
#define __HEADER_ARM64_IHK_CONTEXT_H
@ -27,9 +27,7 @@ struct pt_regs {
};
};
unsigned long orig_x0;
unsigned long orig_pc;
unsigned long syscallno;
unsigned long __padding;
};
typedef struct pt_regs ihk_mc_user_context_t;
@ -67,17 +65,17 @@ static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
}
/* temp */
#define ihk_mc_syscall_arg0(uc) ((uc)->regs[0])
#define ihk_mc_syscall_arg1(uc) ((uc)->regs[1])
#define ihk_mc_syscall_arg2(uc) ((uc)->regs[2])
#define ihk_mc_syscall_arg3(uc) ((uc)->regs[3])
#define ihk_mc_syscall_arg4(uc) ((uc)->regs[4])
#define ihk_mc_syscall_arg5(uc) ((uc)->regs[5])
#define ihk_mc_syscall_arg0(uc) (uc)->regs[0]
#define ihk_mc_syscall_arg1(uc) (uc)->regs[1]
#define ihk_mc_syscall_arg2(uc) (uc)->regs[2]
#define ihk_mc_syscall_arg3(uc) (uc)->regs[3]
#define ihk_mc_syscall_arg4(uc) (uc)->regs[4]
#define ihk_mc_syscall_arg5(uc) (uc)->regs[5]
#define ihk_mc_syscall_ret(uc) ((uc)->regs[0])
#define ihk_mc_syscall_number(uc) ((uc)->regs[8])
#define ihk_mc_syscall_ret(uc) (uc)->regs[0]
#define ihk_mc_syscall_number(uc) (uc)->regs[8]
#define ihk_mc_syscall_pc(uc) ((uc)->pc)
#define ihk_mc_syscall_sp(uc) ((uc)->sp)
#define ihk_mc_syscall_pc(uc) (uc)->pc
#define ihk_mc_syscall_sp(uc) (uc)->sp
#endif /* !__HEADER_ARM64_IHK_CONTEXT_H */

View File

@ -20,11 +20,13 @@ typedef uint64_t size_t;
typedef int64_t ssize_t;
typedef int64_t off_t;
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
#endif /* __ASSEMBLY__ */

View File

@ -1,103 +0,0 @@
/* imp-sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __ASM_IMP_SYSREG_H
#define __ASM_IMP_SYSREG_H
#ifndef __ASSEMBLY__
/* register sys_reg list */
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1 sys_reg(3, 0, 11, 2, 0)
#define IMP_SCCR_CTRL_EL1 sys_reg(3, 0, 11, 8, 0)
#define IMP_SCCR_ASSIGN_EL1 sys_reg(3, 0, 11, 8, 1)
#define IMP_SCCR_SET0_L2_EL1 sys_reg(3, 0, 15, 8, 2)
#define IMP_SCCR_SET1_L2_EL1 sys_reg(3, 0, 15, 8, 3)
#define IMP_SCCR_L1_EL0 sys_reg(3, 3, 11, 8, 2)
#define IMP_PF_CTRL_EL1 sys_reg(3, 0, 11, 4, 0)
#define IMP_PF_STREAM_DETECT_CTRL_EL0 sys_reg(3, 3, 11, 4, 0)
#define IMP_PF_INJECTION_CTRL0_EL0 sys_reg(3, 3, 11, 6, 0)
#define IMP_PF_INJECTION_CTRL1_EL0 sys_reg(3, 3, 11, 6, 1)
#define IMP_PF_INJECTION_CTRL2_EL0 sys_reg(3, 3, 11, 6, 2)
#define IMP_PF_INJECTION_CTRL3_EL0 sys_reg(3, 3, 11, 6, 3)
#define IMP_PF_INJECTION_CTRL4_EL0 sys_reg(3, 3, 11, 6, 4)
#define IMP_PF_INJECTION_CTRL5_EL0 sys_reg(3, 3, 11, 6, 5)
#define IMP_PF_INJECTION_CTRL6_EL0 sys_reg(3, 3, 11, 6, 6)
#define IMP_PF_INJECTION_CTRL7_EL0 sys_reg(3, 3, 11, 6, 7)
#define IMP_PF_INJECTION_DISTANCE0_EL0 sys_reg(3, 3, 11, 7, 0)
#define IMP_PF_INJECTION_DISTANCE1_EL0 sys_reg(3, 3, 11, 7, 1)
#define IMP_PF_INJECTION_DISTANCE2_EL0 sys_reg(3, 3, 11, 7, 2)
#define IMP_PF_INJECTION_DISTANCE3_EL0 sys_reg(3, 3, 11, 7, 3)
#define IMP_PF_INJECTION_DISTANCE4_EL0 sys_reg(3, 3, 11, 7, 4)
#define IMP_PF_INJECTION_DISTANCE5_EL0 sys_reg(3, 3, 11, 7, 5)
#define IMP_PF_INJECTION_DISTANCE6_EL0 sys_reg(3, 3, 11, 7, 6)
#define IMP_PF_INJECTION_DISTANCE7_EL0 sys_reg(3, 3, 11, 7, 7)
#define IMP_PF_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0)
#define IMP_BARRIER_CTRL_EL1 sys_reg(3, 0, 11, 12, 0)
#define IMP_BARRIER_BST_BIT_EL1 sys_reg(3, 0, 11, 12, 4)
#define IMP_BARRIER_INIT_SYNC_BB0_EL1 sys_reg(3, 0, 15, 13, 0)
#define IMP_BARRIER_INIT_SYNC_BB1_EL1 sys_reg(3, 0, 15, 13, 1)
#define IMP_BARRIER_INIT_SYNC_BB2_EL1 sys_reg(3, 0, 15, 13, 2)
#define IMP_BARRIER_INIT_SYNC_BB3_EL1 sys_reg(3, 0, 15, 13, 3)
#define IMP_BARRIER_INIT_SYNC_BB4_EL1 sys_reg(3, 0, 15, 13, 4)
#define IMP_BARRIER_INIT_SYNC_BB5_EL1 sys_reg(3, 0, 15, 13, 5)
#define IMP_BARRIER_ASSIGN_SYNC_W0_EL1 sys_reg(3, 0, 15, 15, 0)
#define IMP_BARRIER_ASSIGN_SYNC_W1_EL1 sys_reg(3, 0, 15, 15, 1)
#define IMP_BARRIER_ASSIGN_SYNC_W2_EL1 sys_reg(3, 0, 15, 15, 2)
#define IMP_BARRIER_ASSIGN_SYNC_W3_EL1 sys_reg(3, 0, 15, 15, 3)
#define IMP_SOC_STANDBY_CTRL_EL1 sys_reg(3, 0, 11, 0, 0)
#define IMP_FJ_CORE_UARCH_CTRL_EL2 sys_reg(3, 4, 11, 0, 4)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1 sys_reg(3, 0, 11, 0, 5)
/* macros */
#define PWR_REG_MASK(reg, feild) (((UL(1) << ((reg##_##feild##_MSB) - (reg##_##feild##_LSB) + 1)) - 1) << (reg##_##feild##_LSB))
/* IMP_FJ_TAG_ADDRESS_CTRL_EL1 */
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT (0)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT (8)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT (9)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT)
/* IMP_SCCR_CTRL_EL1 */
#define IMP_SCCR_CTRL_EL1_EL1AE_SHIFT (63)
#define IMP_SCCR_CTRL_EL1_EL1AE_MASK (1UL << IMP_SCCR_CTRL_EL1_EL1AE_SHIFT)
/* IMP_SCCR_SET0_L2_EL1 */
#define IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT (0)
/* IMP_PF_CTRL_EL1 */
#define IMP_PF_CTRL_EL1_EL1AE_ENABLE (1UL << 63)
#define IMP_PF_CTRL_EL1_EL0AE_ENABLE (1UL << 62)
/* IMP_BARRIER_CTRL_EL1 */
#define IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE (1UL << 63)
#define IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE (1UL << 62)
/* IMP_SOC_STANDBY_CTRL_EL1 */
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_MSB 2
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_LSB 2
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_MSB 1
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_LSB 1
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_MSB 0
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_LSB 0
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, ECO_MODE)
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, MODE_CHANGE)
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, RETENTION)
/* IMP_FJ_CORE_UARCH_RESTRECTION_EL1 */
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_MSB 33
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_LSB 33
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_MSB 9
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_LSB 8
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_MSB 0
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_LSB 0
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, FL_RESTRICT_TRANS)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, ISSUE_RESTRICTION)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, EX_RESTRICTION)
void scdrv_registers_init(void);
void hpc_registers_init(void);
void vhbm_barrier_registers_init(void);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_IMP_SYSREG_H */

View File

@ -1,4 +1,4 @@
/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_IRQ_H
#define __HEADER_ARM64_IRQ_H
@ -14,23 +14,48 @@
#define INTRID_QUERY_FREE_MEM 2
#define INTRID_CPU_STOP 3
#define INTRID_TLB_FLUSH 4
#define INTRID_STACK_TRACE 5
#define INTRID_MULTI_INTR 6
#define INTRID_MULTI_NMI 7
#define INTRID_STACK_TRACE 6
#define INTRID_MEMDUMP 7
/* use PPI interrupt number */
#define INTRID_PERF_OVF 23
#define INTRID_HYP_PHYS_TIMER 26 /* cnthp */
#define INTRID_VIRT_TIMER 27 /* cntv */
#define INTRID_HYP_VIRT_TIMER 28 /* cnthv */
#define INTRID_PHYS_TIMER 30 /* cntp */
/* timer intrid getter */
static int get_virt_timer_intrid(void)
{
#ifdef CONFIG_ARM64_VHE
unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1);
if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) {
return INTRID_HYP_VIRT_TIMER;
}
#endif /* CONFIG_ARM64_VHE */
return INTRID_VIRT_TIMER;
}
static int get_phys_timer_intrid(void)
{
#ifdef CONFIG_ARM64_VHE
unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1);
if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) {
return INTRID_HYP_PHYS_TIMER;
}
#endif /* CONFIG_ARM64_VHE */
return INTRID_PHYS_TIMER;
}
/* use timer checker */
extern unsigned long is_use_virt_timer(void);
/* Functions for GICv2 */
extern void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size);
extern void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size);
extern void gic_enable_gicv2(void);
extern void arm64_issue_ipi_gicv2(unsigned int cpuid, unsigned int vector);
extern void arm64_issue_host_ipi_gicv2(uint32_t cpuid, uint32_t vector);
extern void handle_interrupt_gicv2(struct pt_regs *regs);
/* Functions for GICv3 */
@ -38,7 +63,6 @@ extern void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size);
extern void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size);
extern void gic_enable_gicv3(void);
extern void arm64_issue_ipi_gicv3(unsigned int cpuid, unsigned int vector);
extern void arm64_issue_host_ipi_gicv3(uint32_t cpuid, uint32_t vector);
extern void handle_interrupt_gicv3(struct pt_regs *regs);
void handle_IPI(unsigned int vector, struct pt_regs *regs);

View File

@ -72,7 +72,6 @@
#define PMD_SECT_S (UL(3) << 8)
#define PMD_SECT_AF (UL(1) << 10)
#define PMD_SECT_NG (UL(1) << 11)
#define PMD_SECT_CONT (UL(1) << 52)
#define PMD_SECT_PXN (UL(1) << 53)
#define PMD_SECT_UXN (UL(1) << 54)
@ -94,7 +93,6 @@
#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF (UL(1) << 10) /* Access Flag */
#define PTE_NG (UL(1) << 11) /* nG */
#define PTE_CONT (UL(1) << 52) /* Contiguous range */
#define PTE_PXN (UL(1) << 53) /* Privileged XN */
#define PTE_UXN (UL(1) << 54) /* User XN */
/* Software defined PTE bits definition.*/

View File

@ -1,17 +1,17 @@
/* prctl.h COPYRIGHT FUJITSU LIMITED 2017-2019 */
/* prctl.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifndef __HEADER_ARM64_COMMON_PRCTL_H
#define __HEADER_ARM64_COMMON_PRCTL_H
#define PR_SET_THP_DISABLE 41
#define PR_GET_THP_DISABLE 42
/* arm64 Scalable Vector Extension controls */
/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
#define PR_SVE_SET_VL 50 /* set task vector length */
# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
#define PR_SVE_GET_VL 51 /* get task vector length */
/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
# define PR_SVE_VL_LEN_MASK 0xffff
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
#define PR_SVE_SET_VL 48 /* set task vector length */
#define PR_SVE_SET_VL_THREAD (1 << 1) /* set just this thread */
#define PR_SVE_SET_VL_INHERIT (1 << 2) /* inherit across exec */
#define PR_SVE_SET_VL_ONEXEC (1 << 3) /* defer effect until exec */
#define PR_SVE_GET_VL 49 /* get task vector length */
/* Decode helpers for the return value from PR_SVE_GET_VL: */
#define PR_SVE_GET_VL_LEN(ret) ((ret) & 0x3fff) /* vector length */
#define PR_SVE_GET_VL_INHERIT (PR_SVE_SET_VL_INHERIT << 16)
/* For conveinence, PR_SVE_SET_VL returns the result in the same encoding */
#endif /* !__HEADER_ARM64_COMMON_PRCTL_H */

View File

@ -1,4 +1,4 @@
/* ptrace.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
/* ptrace.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_PTRACE_H
#define __HEADER_ARM64_COMMON_PTRACE_H
@ -46,7 +46,6 @@
#ifndef __ASSEMBLY__
#include <lwk/compiler.h>
#include <ihk/types.h>
struct user_hwdebug_state {
@ -79,70 +78,6 @@ struct user_sve_header {
uint16_t __reserved;
};
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_TLS,
REGSET_HW_BREAK,
REGSET_HW_WATCH,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif /* CONFIG_ARM64_SVE */
};
struct thread;
struct user_regset;
typedef int user_regset_active_fn(struct thread *target,
const struct user_regset *regset);
typedef long user_regset_get_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf);
typedef long user_regset_set_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf);
typedef int user_regset_writeback_fn(struct thread *target,
const struct user_regset *regset,
int immediate);
typedef unsigned int user_regset_get_size_fn(struct thread *target,
const struct user_regset *regset);
struct user_regset {
user_regset_get_fn *get;
user_regset_set_fn *set;
user_regset_active_fn *active;
user_regset_writeback_fn *writeback;
user_regset_get_size_fn *get_size;
unsigned int n;
unsigned int size;
unsigned int align;
unsigned int bias;
unsigned int core_note_type;
};
struct user_regset_view {
const char *name;
const struct user_regset *regsets;
unsigned int n;
uint32_t e_flags;
uint16_t e_machine;
uint8_t ei_osabi;
};
extern const struct user_regset_view *current_user_regset_view(void);
extern const struct user_regset *find_regset(
const struct user_regset_view *view,
unsigned int type);
extern unsigned int regset_size(struct thread *target,
const struct user_regset *regset);
/* Definitions for user_sve_header.flags: */
#define SVE_PT_REGS_MASK (1 << 0)
@ -150,7 +85,7 @@ extern unsigned int regset_size(struct thread *target,
#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK
#define SVE_PT_VL_THREAD PR_SVE_SET_VL_THREAD
#define SVE_PT_VL_INHERIT PR_SVE_VL_INHERIT
#define SVE_PT_VL_INHERIT PR_SVE_SET_VL_INHERIT
#define SVE_PT_VL_ONEXEC PR_SVE_SET_VL_ONEXEC
/*
@ -164,9 +99,7 @@ extern unsigned int regset_size(struct thread *target,
*/
/* Offset from the start of struct user_sve_header to the register data */
#define SVE_PT_REGS_OFFSET \
((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16)
/*
* The register data content and layout depends on the value of the
@ -241,10 +174,8 @@ extern unsigned int regset_size(struct thread *target,
#define SVE_PT_SVE_FFR_OFFSET(vq) \
__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
#define SVE_PT_SVE_FPSR_OFFSET(vq) \
((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + \
(SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_SVE_FPSR_OFFSET(vq) \
((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + 15) / 16 * 16)
#define SVE_PT_SVE_FPCR_OFFSET(vq) \
(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
@ -253,10 +184,9 @@ extern unsigned int regset_size(struct thread *target,
* 128-bit boundary.
*/
#define SVE_PT_SVE_SIZE(vq, flags) \
((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE \
- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_SVE_SIZE(vq, flags) \
((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE - \
SVE_PT_SVE_OFFSET + 15) / 16 * 16)
#define SVE_PT_SIZE(vq, flags) \
(((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \

View File

@ -1,10 +1,9 @@
/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_COMMON_REGISTERS_H
#define __HEADER_ARM64_COMMON_REGISTERS_H
#include <types.h>
#include <arch/cpu.h>
#include <sysreg.h>
#define RFLAGS_CF (1 << 0)
#define RFLAGS_PF (1 << 2)
@ -77,12 +76,15 @@ static unsigned long rdmsr(unsigned int index)
return 0;
}
/* @ref.impl linux4.10.16 */
/* arch/arm64/include/asm/arch_timer.h:arch_counter_get_cntvct() */
static inline unsigned long rdtsc(void)
/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */
static unsigned long rdtsc(void)
{
unsigned long cval;
isb();
return read_sysreg(cntvct_el0);
asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
return cval;
}
static void set_perfctl(int counter, int event, int mask)

View File

@ -85,11 +85,7 @@ enum __rlimit_resource
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
/* timeout for RT tasks in us */
__RLIMIT_RTTIME = 15,
#define RLIMIT_RTTIME __RLIMIT_RTTIME
__RLIMIT_NLIMITS = 16,
__RLIMIT_NLIMITS = 15,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS

View File

@ -1,4 +1,4 @@
/* signal.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
/* signal.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_SIGNAL_H
#define __HEADER_ARM64_COMMON_SIGNAL_H
@ -9,11 +9,6 @@
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
static inline int valid_signal(unsigned long sig)
{
return sig <= _NSIG ? 1 : 0;
}
typedef unsigned long int __sigset_t;
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))
@ -298,7 +293,6 @@ struct extra_context {
struct _aarch64_ctx head;
void *data; /* 16-byte aligned pointer to the extra space */
uint32_t size; /* size in bytes of the extra space */
uint32_t __reserved[3];
};
#define SVE_MAGIC 0x53564501
@ -319,25 +313,19 @@ struct sve_context {
* The SVE architecture leaves space for future expansion of the
* vector length beyond its initial architectural limit of 2048 bits
* (16 quadwords).
*
* See linux/Documentation/arm64/sve.txt for a description of the VL/VQ
* terminology.
*/
#define SVE_VQ_BYTES 16 /* number of bytes per quadword */
#define SVE_VQ_MIN 1
#define SVE_VQ_MAX 512
#define SVE_VQ_MAX 0x200
#define SVE_VL_MIN (SVE_VQ_MIN * SVE_VQ_BYTES)
#define SVE_VL_MAX (SVE_VQ_MAX * SVE_VQ_BYTES)
#define SVE_VL_MIN (SVE_VQ_MIN * 0x10)
#define SVE_VL_MAX (SVE_VQ_MAX * 0x10)
#define SVE_NUM_ZREGS 32
#define SVE_NUM_PREGS 16
#define sve_vl_valid(vl) \
((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
#define sve_vq_from_vl(vl) ((vl) / SVE_VQ_BYTES)
#define sve_vl_from_vq(vq) ((vq) * SVE_VQ_BYTES)
((vl) % 0x10 == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
#define sve_vq_from_vl(vl) ((vl) / 0x10)
/*
* The total size of meaningful data in the SVE context in bytes,
@ -372,13 +360,11 @@ struct sve_context {
* Additional data might be appended in the future.
*/
#define SVE_SIG_ZREG_SIZE(vq) ((uint32_t)(vq) * SVE_VQ_BYTES)
#define SVE_SIG_PREG_SIZE(vq) ((uint32_t)(vq) * (SVE_VQ_BYTES / 8))
#define SVE_SIG_ZREG_SIZE(vq) ((uint32_t)(vq) * 16)
#define SVE_SIG_PREG_SIZE(vq) ((uint32_t)(vq) * 2)
#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
#define SVE_SIG_REGS_OFFSET \
((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_SIG_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16)
#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET
#define SVE_SIG_ZREG_OFFSET(vq, n) \
@ -416,6 +402,8 @@ struct ucontext {
};
void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs, int num);
void check_signal_irq_disabled(unsigned long rc, void *regs, int num);
#endif /* __HEADER_ARM64_COMMON_SIGNAL_H */

View File

@ -1,14 +1,17 @@
/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
SYSCALL_DELEGATED(4, io_getevents)
SYSCALL_DELEGATED(17, getcwd)
SYSCALL_HANDLED(22, epoll_pwait)
SYSCALL_DELEGATED(22, epoll_pwait)
SYSCALL_DELEGATED(25, fcntl)
SYSCALL_HANDLED(29, ioctl)
SYSCALL_DELEGATED(35, unlinkat)
SYSCALL_DELEGATED(43, statfs)
SYSCALL_DELEGATED(44, fstatfs)
#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */
SYSCALL_HANDLED(56, openat)
#else /* POSTK_DEBUG_ARCH_DEP_62 */
SYSCALL_DELEGATED(56, openat)
#endif /* POSTK_DEBUG_ARCH_DEP_62 */
SYSCALL_HANDLED(57, close)
SYSCALL_DELEGATED(61, getdents64)
SYSCALL_DELEGATED(62, lseek)
@ -17,8 +20,8 @@ SYSCALL_DELEGATED(64, write)
SYSCALL_DELEGATED(66, writev)
SYSCALL_DELEGATED(67, pread64)
SYSCALL_DELEGATED(68, pwrite64)
SYSCALL_HANDLED(72, pselect6)
SYSCALL_HANDLED(73, ppoll)
SYSCALL_DELEGATED(72, pselect6)
SYSCALL_DELEGATED(73, ppoll)
SYSCALL_HANDLED(74, signalfd4)
SYSCALL_DELEGATED(78, readlinkat)
SYSCALL_DELEGATED(80, fstat)
@ -83,7 +86,6 @@ SYSCALL_HANDLED(175, geteuid)
SYSCALL_HANDLED(176, getgid)
SYSCALL_HANDLED(177, getegid)
SYSCALL_HANDLED(178, gettid)
SYSCALL_HANDLED(179, sysinfo)
SYSCALL_DELEGATED(188, msgrcv)
SYSCALL_DELEGATED(189, msgsnd)
SYSCALL_DELEGATED(192, semtimedop)
@ -112,16 +114,14 @@ SYSCALL_HANDLED(236, get_mempolicy)
SYSCALL_HANDLED(237, set_mempolicy)
SYSCALL_HANDLED(238, migrate_pages)
SYSCALL_HANDLED(239, move_pages)
#ifdef ENABLE_PERF
SYSCALL_HANDLED(241, perf_event_open)
#else // PERF_ENABLE
SYSCALL_DELEGATED(241, perf_event_open)
#endif // PERF_ENABLE
SYSCALL_HANDLED(260, wait4)
SYSCALL_HANDLED(261, prlimit64)
SYSCALL_HANDLED(270, process_vm_readv)
SYSCALL_HANDLED(271, process_vm_writev)
SYSCALL_HANDLED(281, execveat)
SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)
SYSCALL_HANDLED(604, pmc_reset)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
@ -129,7 +129,6 @@ SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system)
SYSCALL_HANDLED(733, util_register_desc)
/* McKernel Specific */
SYSCALL_HANDLED(801, swapout)
@ -139,14 +138,9 @@ SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
SYSCALL_DELEGATED(1024, open)
SYSCALL_DELEGATED(1026, unlink)
SYSCALL_DELEGATED(1035, readlink)
SYSCALL_HANDLED(1045, signalfd)
SYSCALL_DELEGATED(1049, stat)
SYSCALL_DELEGATED(1060, getpgrp)
SYSCALL_HANDLED(1062, time)
SYSCALL_DELEGATED(1069, epoll_wait)
/* Do not edit the lines including this comment and
* EOF just after it because those are used as a
* robust marker for the autotest patch.
*/
SYSCALL_DELEGATED(1062, time)

View File

@ -1,4 +1,4 @@
/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
/*
* Macros for accessing system registers with older binutils.
*
@ -23,7 +23,6 @@
#include <types.h>
#include <stringify.h>
#include <ihk/types.h>
/*
* ARMv8 ARM reserves the following encoding for system registers:
@ -57,6 +56,12 @@
#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask)
#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask)
#ifdef __ASSEMBLY__
#define __emit_inst(x).inst (x)
#else
#define __emit_inst(x)".inst " __stringify((x)) "\n\t"
#endif
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@ -138,12 +143,6 @@
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64isar1 */
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
#define ID_AA64PFR0_SVE_SHIFT 32
#define ID_AA64PFR0_GIC_SHIFT 24
@ -179,14 +178,6 @@
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#define ID_AA64MMFR0_PARANGE_48 0x5
#define ID_AA64MMFR0_PARANGE_52 0x6
#ifdef CONFIG_ARM64_PA_BITS_52
#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
#else
#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
#endif
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_PAN_SHIFT 20
@ -273,46 +264,15 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (1UL << 31)
/* SYS_MIDR_EL1 */
//mask
#define SYS_MIDR_EL1_IMPLEMENTER_MASK (0xFFUL)
#define SYS_MIDR_EL1_PPNUM_MASK (0xFFFUL)
//shift
#define SYS_MIDR_EL1_IMPLEMENTER_SHIFT (24)
#define SYS_MIDR_EL1_PPNUM_SHIFT (0x4)
//val
#define SYS_MIDR_EL1_IMPLEMENTER_FJ (0x46)
#define SYS_MIDR_EL1_PPNUM_TCHIP (0x1)
#define READ_ACCESS (0)
#define WRITE_ACCESS (1)
#define ACCESS_REG_FUNC(name, reg) \
static void xos_access_##name(uint8_t flag, uint64_t *reg_value) \
{ \
if (flag == READ_ACCESS) { \
__asm__ __volatile__("mrs_s %0," __stringify(reg) "\n\t" \
:"=&r"(*reg_value)::); \
} \
else if (flag == WRITE_ACCESS) { \
__asm__ __volatile__("msr_s" __stringify(reg) ", %0\n\t" \
::"r"(*reg_value):); \
} else { \
; \
} \
}
#define XOS_FALSE (0)
#define XOS_TRUE (1)
#ifdef __ASSEMBLY__
#define __emit_inst(x).inst (x)
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__reg_num_x\num, \num
.endr
.equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
__emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
.endm
.macro msr_s, sreg, rt
@ -320,7 +280,7 @@
.endm
#else
#define __emit_inst(x)".inst " __stringify((x)) "\n\t"
asm(
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
" .equ .L__reg_num_x\\num, \\num\n"
@ -336,28 +296,6 @@ asm(
" .endm\n"
);
ACCESS_REG_FUNC(midr_el1, SYS_MIDR_EL1);
static int xos_is_tchip(void)
{
uint64_t reg = 0;
int ret = 0, impl = 0, part = 0;
xos_access_midr_el1(READ_ACCESS, &reg);
impl = (reg >> SYS_MIDR_EL1_IMPLEMENTER_SHIFT) &
SYS_MIDR_EL1_IMPLEMENTER_MASK;
part = (reg >> SYS_MIDR_EL1_PPNUM_SHIFT) & SYS_MIDR_EL1_PPNUM_MASK;
if ((impl == SYS_MIDR_EL1_IMPLEMENTER_FJ) &&
(part == SYS_MIDR_EL1_PPNUM_TCHIP)) {
ret = XOS_TRUE;
}
else {
ret = XOS_FALSE;
}
return ret;
}
#endif
/*
@ -398,6 +336,4 @@ static int xos_is_tchip(void)
/* @ref.impl arch/arm64/include/asm/kvm_arm.h */
#define CPTR_EL2_TZ (1 << 8)
#include "imp-sysreg.h"
#endif /* __ASM_SYSREG_H */

View File

@ -1,22 +1,15 @@
/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_THREAD_INFO_H
#define __HEADER_ARM64_COMMON_THREAD_INFO_H
#define MIN_KERNEL_STACK_SHIFT 15
#include <arch-memory.h>
#if (MIN_KERNEL_STACK_SHIFT < PAGE_SHIFT)
#define KERNEL_STACK_SHIFT PAGE_SHIFT
#else
#define KERNEL_STACK_SHIFT MIN_KERNEL_STACK_SHIFT
#endif
#define KERNEL_STACK_SIZE (UL(1) << KERNEL_STACK_SHIFT)
#define KERNEL_STACK_SIZE 32768 /* 8 page */
#define THREAD_START_SP KERNEL_STACK_SIZE - 16
#ifndef __ASSEMBLY__
#define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
#define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#include <process.h>
#include <prctl.h>
@ -46,9 +39,9 @@ struct thread_info {
int cpu; /* cpu */
struct cpu_context cpu_context; /* kernel_context */
void *sve_state; /* SVE registers, if any */
unsigned int sve_vl; /* SVE vector length */
unsigned int sve_vl_onexec; /* SVE vl after next exec */
unsigned long sve_flags; /* SVE related flags */
uint16_t sve_vl; /* SVE vector length */
uint16_t sve_vl_onexec; /* SVE vl after next exec */
uint16_t sve_flags; /* SVE related flags */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
};
@ -56,12 +49,12 @@ struct thread_info {
/* Flags for sve_flags (intentionally defined to match the prctl flags) */
/* Inherit sve_vl and sve_flags across execve(): */
#define THREAD_VL_INHERIT PR_SVE_VL_INHERIT
#define THREAD_VL_INHERIT PR_SVE_SET_VL_INHERIT
struct arm64_cpu_local_thread {
struct thread_info thread_info;
unsigned long paniced;
uint64_t panic_regs[34];
unsigned long paniced; /* 136 */
uint64_t panic_regs[34]; /* 144 */
};
union arm64_cpu_local_variables {

View File

@ -4,7 +4,6 @@
#define __ASM_TRAP_H
#include <types.h>
#include <arch-lock.h>
struct pt_regs;

View File

@ -1,22 +1,8 @@
/* virt.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* virt.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_VIRT_H
#define __HEADER_ARM64_COMMON_VIRT_H
/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
#ifndef __ASSEMBLY__
#include <sysreg.h>
#include <ptrace.h>
/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */
static inline int is_kernel_in_hyp_mode(void)
{
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_VIRT_H */

View File

@ -1,21 +1,21 @@
/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#include <ihk/cpu.h>
#include <irq.h>
#include <arm-gic-v2.h>
#include <io.h>
#include <arch/cpu.h>
#include <memory.h>
#include <affinity.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <arch-timer.h>
#include <cls.h>
// #define DEBUG_GICV2
#ifdef DEBUG_GICV2
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
void *dist_base;
@ -31,9 +31,10 @@ void *cpu_base;
* function, it is not necessary to perform the disable/enable
* interrupts in this function as gic_raise_softirq() .
*/
static void __arm64_raise_sgi_gicv2(unsigned int hw_cpuid, unsigned int vector)
static void arm64_raise_sgi_gicv2(unsigned int cpuid, unsigned int vector)
{
/* Build interrupt destination of the target cpu */
unsigned int hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
uint8_t cpu_target_list = gic_hwid_to_affinity(hw_cpuid);
/*
@ -49,32 +50,21 @@ static void __arm64_raise_sgi_gicv2(unsigned int hw_cpuid, unsigned int vector)
);
}
static void arm64_raise_sgi_gicv2(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target CPU */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
__arm64_raise_sgi_gicv2(hw_cpuid, vector);
}
static void arm64_raise_sgi_to_host_gicv2(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target Linux/host CPU */
uint32_t hw_cpuid = ihk_mc_get_apicid(cpuid);
__arm64_raise_sgi_gicv2(hw_cpuid, vector);
}
/**
* arm64_raise_spi_gicv2
* @ref.impl nothing.
*/
extern unsigned int ihk_ikc_irq_apicid;
static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector)
{
uint64_t spi_reg_offset;
uint32_t spi_set_pending_bitpos;
if (cpuid != ihk_ikc_irq_apicid) {
ekprintf("SPI(irq#%d) cannot send other than the host.\n", vector);
return;
}
/**
* calculates register offset and bit position corresponding to the numbers.
*
@ -93,11 +83,6 @@ static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector)
);
}
void arm64_issue_host_ipi_gicv2(uint32_t cpuid, uint32_t vector)
{
arm64_raise_sgi_to_host_gicv2(cpuid, vector);
}
/**
* arm64_issue_ipi_gicv2
* @param cpuid : hardware cpu id
@ -126,9 +111,8 @@ extern int interrupt_from_user(void *);
void handle_interrupt_gicv2(struct pt_regs *regs)
{
unsigned int irqstat, irqnr;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
do {
// get GICC_IAR.InterruptID
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
@ -148,13 +132,7 @@ void handle_interrupt_gicv2(struct pt_regs *regs)
*/
break;
} while (1);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
/* for migration by IPI */
if (get_this_cpu_local_var()->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
}
set_cputime(0);
}
void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size)
@ -171,6 +149,10 @@ void gic_enable_gicv2(void)
{
unsigned int enable_ppi_sgi = 0;
enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid();
if (is_use_virt_timer()) {
enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid();
} else {
enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid();
}
writel_relaxed(enable_ppi_sgi, dist_base + GIC_DIST_ENABLE_SET);
}

View File

@ -1,4 +1,5 @@
/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <irq.h>
#include <arm-gic-v2.h>
#include <arm-gic-v3.h>
@ -6,17 +7,17 @@
#include <cputype.h>
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <arch-timer.h>
#include <cls.h>
//#define DEBUG_GICV3
#define USE_CAVIUM_THUNDER_X
#ifdef DEBUG_GICV3
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
#ifdef USE_CAVIUM_THUNDER_X
@ -195,12 +196,15 @@ static inline void gic_write_bpr1(uint32_t val)
}
#endif
static void __arm64_raise_sgi_gicv3(uint32_t hw_cpuid, uint32_t vector)
static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector)
{
uint64_t mpidr, cluster_id;
uint16_t tlist;
uint64_t val;
/* Build interrupt destination of the target cpu */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
/*
* Ensure that stores to Normal memory are visible to the
* other CPUs before issuing the IPI.
@ -236,22 +240,6 @@ static void __arm64_raise_sgi_gicv3(uint32_t hw_cpuid, uint32_t vector)
}
}
static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target CPU */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
__arm64_raise_sgi_gicv3(hw_cpuid, vector);
}
static void arm64_raise_sgi_to_host_gicv3(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target Linux/host CPU */
uint32_t hw_cpuid = ihk_mc_get_apicid(cpuid);
__arm64_raise_sgi_gicv3(hw_cpuid, vector);
}
static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector)
{
uint64_t spi_reg_offset;
@ -278,12 +266,6 @@ static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector)
static void arm64_raise_lpi_gicv3(uint32_t cpuid, uint32_t vector)
{
// @todo.impl
ekprintf("%s called.\n", __func__);
}
void arm64_issue_host_ipi_gicv3(uint32_t cpuid, uint32_t vector)
{
arm64_raise_sgi_to_host_gicv3(cpuid, vector);
}
void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
@ -301,7 +283,7 @@ void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
// send LPI (allow only to host)
arm64_raise_lpi_gicv3(cpuid, vector);
} else {
ekprintf("#%d is bad irq number.\n", vector);
ekprintf("#%d is bad irq number.", vector);
}
}
@ -309,14 +291,10 @@ extern int interrupt_from_user(void *);
void handle_interrupt_gicv3(struct pt_regs *regs)
{
uint64_t irqnr;
const int from_user = interrupt_from_user(regs);
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
@ -324,61 +302,11 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
}
irqnr = gic_read_iar();
}
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
//irqflags = ihk_mc_spinlock_lock(&v->runq_lock);
/* For migration by IPI or by timesharing */
if (v->flags &
(CPU_FLAG_NEED_MIGRATE | CPU_FLAG_NEED_RESCHED)) {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
do_check = 1;
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
if (do_check) {
check_signal(0, regs, 0);
schedule();
}
}
static uint64_t gic_mpidr_to_affinity(unsigned long mpidr)
{
uint64_t aff;
aff = ((uint64_t)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
MPIDR_AFFINITY_LEVEL(mpidr, 0));
return aff;
}
static void init_spi_routing(uint32_t irq, uint32_t linux_cpu)
{
uint64_t spi_route_reg_val, spi_route_reg_offset;
if (irq < 32 || 1020 <= irq) {
ekprintf("%s: irq is not spi number. (irq=%d)\n",
__func__, irq);
return;
}
/* write to GICD_IROUTER */
spi_route_reg_offset = irq * 8;
spi_route_reg_val = gic_mpidr_to_affinity(cpu_logical_map(linux_cpu));
writeq_relaxed(spi_route_reg_val,
(void *)(dist_base + GICD_IROUTER +
spi_route_reg_offset));
set_cputime(0);
}
void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
{
#ifndef IHK_IKC_USE_LINUX_WORK_IRQ
extern int spi_table[];
extern int nr_spi_table;
int i;
#endif // !IHK_IKC_USE_LINUX_WORK_IRQ
dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/);
#ifdef USE_CAVIUM_THUNDER_X
@ -387,16 +315,6 @@ void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
is_cavium_thunderx = 1;
}
#endif
#ifndef IHK_IKC_USE_LINUX_WORK_IRQ
/* initialize spi routing */
for (i = 0; i < nr_spi_table; i++) {
if (spi_table[i] == -1) {
continue;
}
init_spi_routing(spi_table[i], i);
}
#endif // !IHK_IKC_USE_LINUX_WORK_IRQ
}
void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size)
@ -433,23 +351,11 @@ void gic_enable_gicv3(void)
void *rd_sgi_base = rbase + 0x10000 /* SZ_64K */;
int i;
unsigned int enable_ppi_sgi = GICD_INT_EN_SET_SGI;
extern int ihk_param_nr_pmu_irq_affi;
extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES];
enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid();
if (0 < ihk_param_nr_pmu_irq_affi) {
for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) {
if ((0 <= ihk_param_pmu_irq_affi[i]) &&
(ihk_param_pmu_irq_affi[i] <
sizeof(enable_ppi_sgi) * BITS_PER_BYTE)) {
enable_ppi_sgi |= GICD_ENABLE <<
ihk_param_pmu_irq_affi[i];
}
}
}
else {
enable_ppi_sgi |= GICD_ENABLE << INTRID_PERF_OVF;
if (is_use_virt_timer()) {
enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid();
} else {
enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid();
}
/*
@ -462,10 +368,9 @@ void gic_enable_gicv3(void)
/*
* Set priority on PPI and SGI interrupts
*/
for (i = 0; i < 32; i += 4) {
for (i = 0; i < 32; i += 4)
writel_relaxed(GICD_INT_DEF_PRI_X4,
rd_sgi_base + GIC_DIST_PRI + i);
}
rd_sgi_base + GIC_DIST_PRI + i * 4 / 4);
/* sync wait */
gic_do_wait_for_rwp(rbase);
@ -501,12 +406,9 @@ void gic_enable_gicv3(void)
gic_write_bpr1(0);
/* Set specific IPI to NMI */
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_MULTI_NMI);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
/* sync wait */
gic_do_wait_for_rwp(rbase);

View File

@ -1,4 +1,4 @@
/* local.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* local.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#include <cpulocal.h>
#include <ihk/atomic.h>
#include <ihk/mm.h>
@ -7,31 +7,24 @@
#include <registers.h>
#include <string.h>
#define LOCALS_SPAN (8 * PAGE_SIZE)
/* BSP initialized stack area */
union arm64_cpu_local_variables init_thread_info __attribute__((aligned(KERNEL_STACK_SIZE)));
/* BSP/AP idle stack pointer head */
static union arm64_cpu_local_variables *locals;
size_t arm64_cpu_local_variables_span = KERNEL_STACK_SIZE; /* for debugger */
size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
/* allocate & initialize BSP/AP idle stack */
void init_processors_local(int max_id)
{
int i = 0;
const int sz = (max_id + 1) * KERNEL_STACK_SIZE;
union arm64_cpu_local_variables *tmp;
const int npages = ((max_id + 1) *
(ALIGN_UP(KERNEL_STACK_SIZE, PAGE_SIZE) >>
PAGE_SHIFT));
if (npages < 1) {
panic("idle kernel stack allocation failed.");
}
/* allocate one more for alignment */
locals = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL);
if (locals == NULL) {
panic("idle kernel stack allocation failed.");
}
locals = ihk_mc_alloc_pages(((sz + PAGE_SIZE - 1) / PAGE_SIZE), IHK_MC_AP_CRITICAL);
locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE);
/* clear struct process, struct process_vm, struct thread_info area */

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
mikc_queue_pages = ((8 * num_processors * MASTER_IKCQ_PKTSIZE)
mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE)
+ (PAGE_SIZE - 1)) / PAGE_SIZE;
/* Place both sides in this side */

View File

@ -1,4 +1,4 @@
/* perfctr.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* perfctr.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <arch-perfctr.h>
#include <ihk/perfctr.h>
#include <mc_perf_event.h>
@ -6,61 +6,32 @@
#include <ihk/debug.h>
#include <registers.h>
#include <string.h>
#include <ihk/mm.h>
#include <irq.h>
#include <process.h>
/*
* @ref.impl arch/arm64/kernel/perf_event.c
* Set at runtime when we know what CPU type we are.
*/
struct arm_pmu cpu_pmu;
extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES];
extern int ihk_param_nr_pmu_irq_affi;
extern int ihk_param_pmu_irq_affiniry[CONFIG_SMP_MAX_CORES];
extern int ihk_param_nr_pmu_irq_affiniry;
int arm64_init_perfctr(void)
{
int ret;
int i;
int pages;
const struct ihk_mc_cpu_info *cpu_info;
memset(&cpu_pmu, 0, sizeof(cpu_pmu));
ret = armv8pmu_init(&cpu_pmu);
if (ret) {
if (!ret) {
return ret;
}
cpu_info = ihk_mc_get_cpu_info();
pages = (sizeof(struct per_cpu_arm_pmu) * cpu_info->ncpus +
PAGE_SIZE - 1) >> PAGE_SHIFT;
cpu_pmu.per_cpu = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if (cpu_pmu.per_cpu == NULL) {
return -ENOMEM;
}
memset(cpu_pmu.per_cpu, 0, pages * PAGE_SIZE);
if (0 < ihk_param_nr_pmu_irq_affi) {
for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) {
ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affi[i],
cpu_pmu.handler);
if (ret) {
break;
}
}
}
else {
ret = ihk_mc_register_interrupt_handler(INTRID_PERF_OVF,
cpu_pmu.handler);
for (i = 0; i < ihk_param_nr_pmu_irq_affiniry; i++) {
ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affiniry[i], cpu_pmu.handler);
}
return ret;
}
void arm64_init_per_cpu_perfctr(void)
{
armv8pmu_per_cpu_init(&cpu_pmu.per_cpu[ihk_mc_get_processor_id()]);
}
int arm64_enable_pmu(void)
{
int ret;
@ -76,36 +47,34 @@ void arm64_disable_pmu(void)
cpu_pmu.disable_pmu();
}
void arm64_enable_user_access_pmu_regs(void)
{
cpu_pmu.enable_user_access_pmu_regs();
}
void arm64_disable_user_access_pmu_regs(void)
{
cpu_pmu.disable_user_access_pmu_regs();
}
extern unsigned int *arm64_march_perfmap;
static int __ihk_mc_perfctr_init(int counter, uint32_t type, uint64_t config, int mode)
{
int ret = -1;
int ret;
unsigned long config_base = 0;
int mapping;
ret = cpu_pmu.disable_counter(1UL << counter);
if (ret < 0) {
mapping = cpu_pmu.map_event(type, config);
if (mapping < 0) {
return mapping;
}
ret = cpu_pmu.disable_counter(counter);
if (!ret) {
return ret;
}
ret = cpu_pmu.enable_intens(1UL << counter);
if (ret < 0) {
ret = cpu_pmu.enable_intens(counter);
if (!ret) {
return ret;
}
ret = cpu_pmu.set_event_filter(&config_base, mode);
if (ret) {
if (!ret) {
return ret;
}
config_base |= config;
config_base |= (unsigned long)mapping;
cpu_pmu.write_evtype(counter, config_base);
return ret;
}
@ -117,24 +86,41 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode)
return ret;
}
int ihk_mc_perfctr_start(unsigned long counter_mask)
int ihk_mc_perfctr_init(int counter, uint64_t config, int mode)
{
return cpu_pmu.enable_counter(counter_mask);
int ret;
ret = __ihk_mc_perfctr_init(counter, PERF_TYPE_RAW, config, mode);
return ret;
}
int ihk_mc_perfctr_stop(unsigned long counter_mask, int flags)
int ihk_mc_perfctr_start(int counter)
{
return cpu_pmu.disable_counter(counter_mask);
int ret;
ret = cpu_pmu.enable_counter(counter);
return ret;
}
int ihk_mc_perfctr_stop(int counter)
{
cpu_pmu.disable_counter(counter);
// ihk_mc_perfctr_startが呼ばれるときには、
// init系関数が呼ばれるのでdisableにする。
cpu_pmu.disable_intens(counter);
return 0;
}
int ihk_mc_perfctr_reset(int counter)
{
// TODO[PMU]: ihk_mc_perfctr_setと同様にサンプリングレートの共通部実装の扱いを見てから本実装。
cpu_pmu.write_counter(counter, 0);
return 0;
}
int ihk_mc_perfctr_set(int counter, long val)
//int ihk_mc_perfctr_set(int counter, unsigned long val)
int ihk_mc_perfctr_set(int counter, long val) /* 0416_patchtemp */
{
// TODO[PMU]: 共通部でサンプリングレートの計算をして、設定するカウンタ値をvalに渡してくるようになると想定。サンプリングレートの扱いを見てから本実装。
uint32_t v = val;
cpu_pmu.write_counter(counter, v);
return 0;
@ -147,15 +133,6 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
return 0;
}
int ihk_mc_perfctr_alloc(struct thread *thread, struct mc_perf_event *event)
{
const int counters = ihk_mc_perf_get_num_counters();
return cpu_pmu.get_event_idx(counters,
thread->pmc_alloc_map,
event->hw_config);
}
unsigned long ihk_mc_perfctr_read(int counter)
{
unsigned long count;
@ -163,135 +140,17 @@ unsigned long ihk_mc_perfctr_read(int counter)
return count;
}
unsigned long ihk_mc_perfctr_value(int counter, unsigned long correction)
{
unsigned long count = ihk_mc_perfctr_read(counter) + correction;
count &= ((1UL << 32) - 1);
return count;
}
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config,
unsigned long pmc_status)
//int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status)
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status) /* 0416_patchtemp */
{
int ret;
if (*type == PERF_TYPE_HARDWARE) {
switch (*config) {
case PERF_COUNT_HW_INSTRUCTIONS:
ret = cpu_pmu.map_event(*type, *config);
if (ret < 0) {
return -1;
}
*type = PERF_TYPE_RAW;
break;
default:
// Unexpected config
return -1;
}
}
else if (*type != PERF_TYPE_RAW) {
return -1;
}
ret = cpu_pmu.get_event_idx(get_per_cpu_pmu()->num_events, pmc_status,
*config);
ret = cpu_pmu.get_event_idx(cpu_pmu.num_events, pmc_status);
return ret;
}
int ihk_mc_perf_counter_mask_check(unsigned long counter_mask)
/* 0416_patchtemp */
/* ihk_mc_perfctr_fixed_init() stub added. */
int ihk_mc_perfctr_fixed_init(int counter, int mode)
{
return cpu_pmu.counter_mask_valid(counter_mask);
}
int ihk_mc_perf_get_num_counters(void)
{
const struct per_cpu_arm_pmu *per_cpu_arm_pmu = get_per_cpu_pmu();
return per_cpu_arm_pmu->num_events;
}
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
{
/* Nothing to do. */
return 0;
}
static inline uint64_t arm_pmu_event_max_period(struct mc_perf_event *event)
{
return 0xFFFFFFFF;
}
int hw_perf_event_init(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (!is_sampling_event(event)) {
hwc->sample_period = arm_pmu_event_max_period(event) >> 1;
hwc->last_period = hwc->sample_period;
ihk_atomic64_set(&hwc->period_left, hwc->sample_period);
}
return 0;
}
int ihk_mc_event_set_period(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int64_t left = ihk_atomic64_read(&hwc->period_left);
int64_t period = hwc->sample_period;
uint64_t max_period;
int ret = 0;
max_period = arm_pmu_event_max_period(event);
if (unlikely(left <= -period)) {
left = period;
ihk_atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
ihk_atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
/*
* Limit the maximum period to prevent the counter value
* from overtaking the one we are about to program. In
* effect we are reducing max_period to account for
* interrupt latency (and we are being very conservative).
*/
if (left > (max_period >> 1))
left = (max_period >> 1);
ihk_atomic64_set(&hwc->prev_count, (uint64_t)-left);
cpu_pmu.write_counter(event->counter_id,
(uint64_t)(-left) & max_period);
return ret;
}
uint64_t ihk_mc_event_update(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int64_t delta;
uint64_t prev_raw_count, new_raw_count;
uint64_t max_period = arm_pmu_event_max_period(event);
again:
prev_raw_count = ihk_atomic64_read(&hwc->prev_count);
new_raw_count = cpu_pmu.read_counter(event->counter_id);
if (ihk_atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count - prev_raw_count) & max_period;
ihk_atomic64_add(delta, &event->count);
ihk_atomic64_add(-delta, &hwc->period_left);
return new_raw_count;
return -1;
}

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,7 @@
*/
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
ENTRY(__cpu_do_idle)
ENTRY(cpu_do_idle)
mrs x0, daif // save I bit
msr daifset, #2 // set I bit
mrs_s x1, ICC_PMR_EL1 // save PMR
@ -41,13 +41,13 @@ ENTRY(__cpu_do_idle)
msr_s ICC_PMR_EL1, x1 // restore PMR
msr daif, x0 // restore I bit
ret
ENDPROC(__cpu_do_idle)
ENDPROC(cpu_do_idle)
#else /* defined(CONFIG_HAS_NMI) */
ENTRY(__cpu_do_idle)
ENTRY(cpu_do_idle)
dsb sy // WFI may enter a low-power mode
wfi
ret
ENDPROC(__cpu_do_idle)
ENDPROC(cpu_do_idle)
#endif /* defined(CONFIG_HAS_NMI) */
/*

View File

@ -1,4 +1,4 @@
/* psci.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* psci.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* @ref.impl arch/arm64/kernel/psci.c */
/*
* This program is free software; you can redistribute it and/or modify
@ -18,15 +18,18 @@
#include <psci.h>
#include <errno.h>
#include <ihk/types.h>
#include <ihk/debug.h>
#include <compiler.h>
#include <lwk/compiler.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_PSCI
#ifdef DEBUG_PRINT_PSCI
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1

View File

@ -1,4 +1,4 @@
/* ptrace.c COPYRIGHT FUJITSU LIMITED 2016-2019 */
/* ptrace.c COPYRIGHT FUJITSU LIMITED 2016-2017 */
#include <errno.h>
#include <debug-monitors.h>
#include <hw_breakpoint.h>
@ -11,21 +11,57 @@
#include <hwcap.h>
#include <string.h>
#include <thread_info.h>
#include <ptrace.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf kprintf
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
extern void save_debugreg(unsigned long *debugreg);
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern int interrupt_from_user(void *);
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_TLS,
REGSET_HW_BREAK,
REGSET_HW_WATCH,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif /* CONFIG_ARM64_SVE */
};
struct user_regset;
typedef long user_regset_get_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf);
typedef long user_regset_set_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf);
struct user_regset {
user_regset_get_fn *get;
user_regset_set_fn *set;
unsigned int n;
unsigned int size;
unsigned int core_note_type;
};
long ptrace_read_user(struct thread *thread, long addr, unsigned long *value)
{
return -EIO;
@ -243,17 +279,6 @@ static inline long copy_regset_from_user(struct thread *target,
return regset->set(target, regset, offset, size, NULL, data);
}
unsigned int regset_size(struct thread *target,
const struct user_regset *regset)
{
if (!regset->get_size) {
return regset->n * regset->size;
}
else {
return regset->get_size(target, regset);
}
}
/*
* Bits which are always architecturally RES0 per ARM DDI 0487A.h
* Userspace cannot use these until they have an architectural meaning.
@ -605,48 +630,6 @@ out:
#ifdef CONFIG_ARM64_SVE
static void sve_init_header_from_thread(struct user_sve_header *header,
struct thread *target)
{
unsigned int vq;
memset(header, 0, sizeof(*header));
/* McKernel processes always enable SVE. */
header->flags = SVE_PT_REGS_SVE;
if (target->ctx.thread->sve_flags & SVE_PT_VL_INHERIT) {
header->flags |= SVE_PT_VL_INHERIT;
}
header->vl = target->ctx.thread->sve_vl;
vq = sve_vq_from_vl(header->vl);
header->max_vl = sve_max_vl;
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
}
static unsigned int sve_size_from_header(struct user_sve_header const *header)
{
return ALIGN(header->size, SVE_VQ_BYTES);
}
static unsigned int sve_get_size(struct thread *target,
const struct user_regset *regset)
{
struct user_sve_header header;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return 0;
}
sve_init_header_from_thread(&header, target);
return sve_size_from_header(&header);
}
/* read NT_ARM_SVE */
static long sve_get(struct thread *target,
const struct user_regset *regset,
@ -669,9 +652,23 @@ static long sve_get(struct thread *target,
}
/* Header */
sve_init_header_from_thread(&header, target);
memset(&header, 0, sizeof(header));
header.vl = target->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(header.vl));
vq = sve_vq_from_vl(header.vl);
BUG_ON(!sve_vl_valid(sve_max_vl));
header.max_vl = sve_max_vl;
/* McKernel processes always enable SVE. */
header.flags = SVE_PT_REGS_SVE;
header.size = SVE_PT_SIZE(vq, header.flags);
header.max_size = SVE_PT_SIZE(sve_vq_from_vl(header.max_vl),
SVE_PT_REGS_SVE);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
0, sizeof(header));
if (ret) {
@ -685,9 +682,11 @@ static long sve_get(struct thread *target,
*/
/* Otherwise: full SVE case */
start = SVE_PT_SVE_OFFSET;
end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
BUG_ON(end < start);
BUG_ON(end - start > sve_state_size(target));
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
target->ctx.thread->sve_state,
start, end);
@ -697,18 +696,24 @@ static long sve_get(struct thread *target,
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
BUG_ON(end < start);
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
start, end);
if (ret) {
goto out;
}
/*
* Copy fpsr, and fpcr which must follow contiguously in
* struct fpsimd_state:
*/
start = end;
end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
BUG_ON((char *)(&target->fp_regs->fpcr + 1) <
(char *)&target->fp_regs->fpsr);
BUG_ON(end < start);
BUG_ON((char *)(&target->fp_regs->fpcr + 1) -
(char *)&target->fp_regs->fpsr !=
end - start);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->fp_regs->fpsr,
start, end);
@ -717,7 +722,9 @@ static long sve_get(struct thread *target,
}
start = end;
end = sve_size_from_header(&header);
end = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16 * 16;
BUG_ON(end < start);
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
start, end);
out:
@ -761,12 +768,13 @@ static long sve_set(struct thread *target,
* sve_set_vector_length(), which will also validate them for us:
*/
ret = sve_set_vector_length(target, header.vl,
((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
header.flags & ~SVE_PT_REGS_MASK);
if (ret) {
goto out;
}
/* Actual VL set may be less than the user asked for: */
BUG_ON(!sve_vl_valid(target->ctx.thread->sve_vl));
vq = sve_vq_from_vl(target->ctx.thread->sve_vl);
/* Registers: FPSIMD-only case */
@ -777,19 +785,11 @@ static long sve_set(struct thread *target,
}
/* Otherwise: full SVE case */
/*
* If setting a different VL from the requested VL and there is
* register data, the data layout will be wrong: don't even
* try to set the registers in this case.
*/
if (count && vq != sve_vq_from_vl(header.vl)) {
ret = -EIO;
goto out;
}
start = SVE_PT_SVE_OFFSET;
end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
BUG_ON(end < start);
BUG_ON(end - start > sve_state_size(target));
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
target->ctx.thread->sve_state,
start, end);
@ -799,21 +799,27 @@ static long sve_set(struct thread *target,
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
BUG_ON(end < start);
ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
start, end);
if (ret) {
goto out;
}
/*
* Copy fpsr, and fpcr which must follow contiguously in
* struct fpsimd_state:
*/
start = end;
end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->fp_regs->fpsr,
start, end);
BUG_ON((char *)(&target->fp_regs->fpcr + 1) <
(char *)&target->fp_regs->fpsr);
BUG_ON(end < start);
BUG_ON((char *)(&target->fp_regs->fpcr + 1) -
(char *)&target->fp_regs->fpsr !=
end - start);
user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->fp_regs->fpsr,
start, end);
out:
return ret;
}
@ -825,9 +831,8 @@ static const struct user_regset aarch64_regsets[] = {
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_pt_regs) / sizeof(uint64_t),
.size = sizeof(uint64_t),
.align = sizeof(uint64_t),
.get = gpr_get,
.set = gpr_set,
.set = gpr_set
},
[REGSET_FPR] = {
.core_note_type = NT_PRFPREG,
@ -837,75 +842,56 @@ static const struct user_regset aarch64_regsets[] = {
* fpcr are 32-bits wide.
*/
.size = sizeof(uint32_t),
.align = sizeof(uint32_t),
.get = fpr_get,
.set = fpr_set,
.set = fpr_set
},
[REGSET_TLS] = {
.core_note_type = NT_ARM_TLS,
.n = 1,
.size = sizeof(void *),
.align = sizeof(void *),
.get = tls_get,
.set = tls_set,
.set = tls_set
},
[REGSET_HW_BREAK] = {
.core_note_type = NT_ARM_HW_BREAK,
.n = sizeof(struct user_hwdebug_state) / sizeof(uint32_t),
.size = sizeof(uint32_t),
.align = sizeof(uint32_t),
.get = hw_break_get,
.set = hw_break_set,
.set = hw_break_set
},
[REGSET_HW_WATCH] = {
.core_note_type = NT_ARM_HW_WATCH,
.n = sizeof(struct user_hwdebug_state) / sizeof(uint32_t),
.size = sizeof(uint32_t),
.align = sizeof(uint32_t),
.get = hw_break_get,
.set = hw_break_set,
.set = hw_break_set
},
[REGSET_SYSTEM_CALL] = {
.core_note_type = NT_ARM_SYSTEM_CALL,
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
.get = system_call_get,
.set = system_call_set,
.set = system_call_set
},
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
.core_note_type = NT_ARM_SVE,
.n = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) +
(SVE_VQ_BYTES - 1)) / SVE_VQ_BYTES,
.size = SVE_VQ_BYTES,
.align = SVE_VQ_BYTES,
.n = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16,
.size = 16,
.get = sve_get,
.set = sve_set,
.get_size = sve_get_size,
.set = sve_set
},
#endif /* CONFIG_ARM64_SVE */
};
static const struct user_regset_view user_aarch64_view = {
.name = "aarch64", .e_machine = EM_AARCH64,
.regsets = aarch64_regsets,
.n = sizeof(aarch64_regsets) / sizeof(aarch64_regsets[0])
};
const struct user_regset_view *current_user_regset_view(void)
{
return &user_aarch64_view;
}
const struct user_regset *find_regset(const struct user_regset_view *view,
unsigned int type)
static const struct user_regset *
find_regset(const struct user_regset *regset, unsigned int type, int n)
{
int i = 0;
for (i = 0; i < view->n; i++) {
if (view->regsets[i].core_note_type == type) {
return &view->regsets[i];
for (i = 0; i < n; i++) {
if (regset[i].core_note_type == type) {
return &regset[i];
}
}
return NULL;
@ -914,8 +900,8 @@ const struct user_regset *find_regset(const struct user_regset_view *view,
static long ptrace_regset(struct thread *thread, int req, long type, struct iovec *iov)
{
long rc = -EINVAL;
const struct user_regset *regset =
find_regset(&user_aarch64_view, type);
const struct user_regset *regset = find_regset(aarch64_regsets, type,
sizeof(aarch64_regsets) / sizeof(aarch64_regsets[0]));
if (!regset) {
kprintf("%s: not supported type 0x%x\n", __FUNCTION__, type);
@ -964,43 +950,30 @@ void ptrace_report_signal(struct thread *thread, int sig)
/* save thread_info, if called by ptrace_report_exec() */
if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8)))) {
memcpy(&tinfo, thread->ctx.thread, sizeof(struct thread_info));
thread->uctx->user_regs.regs[0] = 0;
}
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
if (!(thread->ptrace & PT_TRACED)) {
if(!(proc->ptrace & PT_TRACED)){
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
return;
}
/* Transition thread state */
thread->exit_status = sig;
/* Transition thread state */
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_TRACED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->status = PS_TRACED;
thread->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8))) &&
thread->ptrace & PTRACE_O_TRACEEXEC) {
/* PTRACE_O_TRACEEXEC: since Linux 3.0, the former
* thread ID can be retrieved with PTRACE_GETEVENTMSG.
* Report no change. */
thread->ptrace_eventmsg = thread->tid;
}
save_debugreg(thread->ptrace_debugreg);
proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
thread->signal_flags |= SIGNAL_STOP_STOPPED;
}
else {
thread->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
if (thread == proc->main_thread) {
proc->status = PS_DELAY_TRACED;
parent_pid = proc->parent->pid;
}
else {
parent_pid = thread->report_proc->pid;
waitq_wakeup(&thread->report_proc->waitpid_q);
sig == SIGTTIN || sig == SIGTTOU) {
proc->signal_flags |= SIGNAL_STOP_STOPPED;
} else {
proc->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
parent_pid = proc->parent->pid;
save_debugreg(thread->ptrace_debugreg);
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
memset(&info, '\0', sizeof info);
@ -1009,6 +982,10 @@ void ptrace_report_signal(struct thread *thread, int sig)
info._sifields._sigchld.si_pid = thread->tid;
info._sifields._sigchld.si_status = thread->exit_status;
do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake parent (if sleeping in wait4()) */
waitq_wakeup(&proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
dkprintf("ptrace_report_signal,sleeping\n");
/* Sleep */
@ -1019,7 +996,6 @@ void ptrace_report_signal(struct thread *thread, int sig)
if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8)))) {
memcpy(thread->ctx.thread, &tinfo, sizeof(struct thread_info));
}
arch_flush_icache_all();
}
long

File diff suppressed because it is too large Load Diff

View File

@ -1,203 +0,0 @@
/* timer.c COPYRIGHT FUJITSU LIMITED 2018 */
#include <ihk/types.h>
#include <ihk/cpu.h>
#include <ihk/lock.h>
#include <sysreg.h>
#include <kmalloc.h>
#include <cls.h>
#include <cputype.h>
#include <irq.h>
#include <arch-timer.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_TIMER
#ifdef DEBUG_PRINT_TIMER
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 };
static int timer_intrid = INTRID_VIRT_TIMER;
static void arch_timer_virt_reg_write(enum arch_timer_reg reg, uint32_t val);
static void (*arch_timer_reg_write)(enum arch_timer_reg, uint32_t) =
arch_timer_virt_reg_write;
static uint32_t arch_timer_virt_reg_read(enum arch_timer_reg reg);
static uint32_t (*arch_timer_reg_read)(enum arch_timer_reg) =
arch_timer_virt_reg_read;
static void arch_timer_phys_reg_write(enum arch_timer_reg reg, uint32_t val)
{
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntp_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
write_sysreg(val, cntp_tval_el0);
break;
}
isb();
}
static void arch_timer_virt_reg_write(enum arch_timer_reg reg, uint32_t val)
{
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntv_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
write_sysreg(val, cntv_tval_el0);
break;
}
isb();
}
static uint32_t arch_timer_phys_reg_read(enum arch_timer_reg reg)
{
uint32_t val = 0;
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = read_sysreg(cntp_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
val = read_sysreg(cntp_tval_el0);
break;
}
return val;
}
static uint32_t arch_timer_virt_reg_read(enum arch_timer_reg reg)
{
uint32_t val = 0;
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = read_sysreg(cntv_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
val = read_sysreg(cntv_tval_el0);
break;
}
return val;
}
static void timer_handler(void *priv)
{
unsigned long ctrl;
const int cpu = ihk_mc_get_processor_id();
dkprintf("CPU%d: catch %s timer\n", cpu,
((timer_intrid == INTRID_PHYS_TIMER) ||
(timer_intrid == INTRID_HYP_PHYS_TIMER)) ?
"physical" : "virtual");
ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL);
if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
const unsigned int clocks = per_cpu_timer_val[cpu];
struct cpu_local_var *v = get_this_cpu_local_var();
unsigned long irqstate;
/* set resched flag */
irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
/* gen control register value */
ctrl &= ~ARCH_TIMER_CTRL_IT_STAT;
/* set timer re-enable for periodic */
arch_timer_reg_write(ARCH_TIMER_REG_TVAL, clocks);
arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl);
do_backlog();
}
}
static unsigned long is_use_virt_timer(void)
{
extern unsigned long ihk_param_use_virt_timer;
switch (ihk_param_use_virt_timer) {
case 0: /* physical */
case 1: /* virtual */
break;
default: /* invalid */
panic("PANIC: is_use_virt_timer(): timer select neither phys-timer nor virt-timer.\n");
break;
}
return ihk_param_use_virt_timer;
}
static struct ihk_mc_interrupt_handler timer_interrupt_handler = {
.func = timer_handler,
.priv = NULL,
};
/* other source use functions */
struct ihk_mc_interrupt_handler *get_timer_handler(void)
{
return &timer_interrupt_handler;
}
void
lapic_timer_enable(unsigned int clocks)
{
unsigned long ctrl = 0;
/* gen control register value */
ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_IT_STAT);
arch_timer_reg_write(ARCH_TIMER_REG_TVAL, clocks);
arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl);
per_cpu_timer_val[ihk_mc_get_processor_id()] = clocks;
}
void
lapic_timer_disable()
{
unsigned long ctrl = 0;
ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL);
ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl);
per_cpu_timer_val[ihk_mc_get_processor_id()] = 0;
}
int get_timer_intrid(void)
{
return timer_intrid;
}
void arch_timer_init(void)
{
const unsigned long is_virt = is_use_virt_timer();
#ifdef CONFIG_ARM64_VHE
const unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1);
#endif /* CONFIG_ARM64_VHE */
if (is_virt) {
timer_intrid = INTRID_VIRT_TIMER;
arch_timer_reg_write = arch_timer_virt_reg_write;
arch_timer_reg_read = arch_timer_virt_reg_read;
} else {
timer_intrid = INTRID_PHYS_TIMER;
arch_timer_reg_write = arch_timer_phys_reg_write;
arch_timer_reg_read = arch_timer_phys_reg_read;
}
#ifdef CONFIG_ARM64_VHE
if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) {
if (is_virt) {
timer_intrid = INTRID_HYP_VIRT_TIMER;
} else {
timer_intrid = INTRID_HYP_PHYS_TIMER;
}
}
#endif /* CONFIG_ARM64_VHE */
}

View File

@ -1,4 +1,4 @@
/* traps.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* traps.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <ihk/context.h>
#include <ihk/debug.h>
#include <traps.h>
@ -29,14 +29,12 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *inf
*/
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
const int from_user = interrupt_from_user(regs);
// /* TODO: implement lazy context saving/restoring */
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(1);
// WARN_ON(1);
kprintf("WARNING: CPU: %d PID: %d Trapped FP/ASIMD access.\n",
ihk_mc_get_processor_id(), cpu_local_var(current)->proc->pid);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}
/*
@ -53,9 +51,7 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
{
siginfo_t info;
unsigned int si_code = 0;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(1);
if (esr & FPEXC_IOF)
si_code = FPE_FLTINV;
@ -74,7 +70,7 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
info._sifields._sigfault.si_addr = (void*)regs->pc;
set_signal(SIGFPE, regs, &info);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}
/* @ref.impl arch/arm64/kernel/traps.c */
@ -137,9 +133,8 @@ exit:
void do_undefinstr(struct pt_regs *regs)
{
siginfo_t info;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
if (call_undef_hook(regs) == 0) {
goto out;
@ -152,7 +147,7 @@ void do_undefinstr(struct pt_regs *regs)
arm64_notify_die("Oops - undefined instruction", regs, &info, 0);
out:
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}
/*
@ -162,9 +157,7 @@ out:
void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
siginfo_t info;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
kprintf("entering bad_mode !! (regs:0x%p, reason:%d, esr:0x%x)\n", regs, reason, esr);
kprintf("esr Analyse:\n");
@ -180,5 +173,5 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
info._sifields._sigfault.si_addr = (void*)regs->pc;
arm64_notify_die("Oops - bad mode", regs, &info, 0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(0);
}

View File

@ -1,4 +1,4 @@
/* vdso.c COPYRIGHT FUJITSU LIMITED 2016-2018 */
/* vdso.c COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso.c */
#include <arch-memory.h>
@ -11,17 +11,21 @@
#include <process.h>
#include <string.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <ikc/queue.h>
#include <vdso.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_VDSO
#ifdef DEBUG_PRINT_VDSO
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 1
struct vdso {
long busy;
@ -32,6 +36,7 @@ struct vdso {
long lbase;
long offset_sigtramp;
};
#endif /*POSTK_DEBUG_ARCH_DEP_52*/
extern char vdso_start, vdso_end;
static struct vdso vdso;
@ -85,9 +90,26 @@ int arch_setup_vdso(void)
kprintf("Enable Host mapping vDSO.\n");
return 0;
}
kprintf("Enable McK mapping vDSO.\n");
panic("Only support host mapping vDSO");
return -1;
if (memcmp(&vdso_start, "\177ELF", 4)) {
panic("vDSO is not a valid ELF object!\n");
}
vdso.vdso_npages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
dkprintf("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
vdso.vdso_npages + 1, vdso.vdso_npages, &vdso_start, 1L, &tod_data);
if (vdso.vdso_npages != 1) {
panic("vDSO is not a valid number of pages!\n");
}
vdso.vvar_phys = virt_to_phys((void *)&tod_data);
vdso.vdso_physlist[0] = virt_to_phys((void *)&vdso_start);
vdso.lbase = VDSO_LBASE;
vdso.offset_sigtramp = vdso_offset_sigtramp;
return 0;
}
static int get_free_area(struct process_vm *vm, size_t len, intptr_t hint,
@ -136,7 +158,6 @@ int arch_map_vdso(struct process_vm *vm)
unsigned long start, end;
unsigned long flag;
int ret;
struct vm_range *range;
vdso_text_len = vdso.vdso_npages << PAGE_SHIFT;
/* Be sure to map the data page */
@ -155,7 +176,7 @@ int arch_map_vdso(struct process_vm *vm)
flag = VR_REMOTE | VR_PROT_READ;
flag |= VRFLAG_PROT_TO_MAXPROT(flag);
ret = add_process_memory_range(vm, start, end, vdso.vvar_phys, flag,
NULL, 0, PAGE_SHIFT, &range);
NULL, 0, PAGE_SHIFT, NULL);
if (ret != 0){
dkprintf("ERROR: adding memory range for tod_data\n");
goto exit;
@ -167,7 +188,7 @@ int arch_map_vdso(struct process_vm *vm)
flag = VR_REMOTE | VR_PROT_READ | VR_PROT_EXEC;
flag |= VRFLAG_PROT_TO_MAXPROT(flag);
ret = add_process_memory_range(vm, start, end, vdso.vdso_physlist[0], flag,
NULL, 0, PAGE_SHIFT, &range);
NULL, 0, PAGE_SHIFT, NULL);
if (ret != 0) {
dkprintf("ERROR: adding memory range for vdso_text\n");

View File

@ -0,0 +1,33 @@
/* vdso.so.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/vdso.S */
/*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <arch-memory.h>
#include <vdso-so-path.h>
.section ".vdso.txet", "aw"
.globl vdso_start, vdso_end
.balign PAGE_SIZE
vdso_start:
.incbin VDSO_SO_PATH
.balign PAGE_SIZE
vdso_end:
.previous

View File

@ -0,0 +1,131 @@
# Makefile.in COPYRIGHT FUJITSU LIMITED 2016
# @ref.impl arch/arm64/kernel/vdso/Makefile
# Building a vDSO image for AArch64.
HOST_DIR=@KDIR@
HOST_CONFIG=$(HOST_DIR)/.config
HOST_KERNEL_CONFIG_ARM64_4K_PAGES=$(shell grep -E "^CONFIG_ARM64_4K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_4K_PAGES=||g')
HOST_KERNEL_CONFIG_ARM64_16K_PAGES=$(shell grep -E "^CONFIG_ARM64_16K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_16K_PAGES=||g')
HOST_KERNEL_CONFIG_ARM64_64K_PAGES=$(shell grep -E "^CONFIG_ARM64_64K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_64K_PAGES=||g')
VDSOSRC = @abs_srcdir@
VDSOBUILD = @abs_builddir@
INCDIR = $(VDSOSRC)/../include
ECHO_SUFFIX = [VDSO]
VDSOOBJS := gettimeofday.o
DESTOBJS = $(addprefix $(VDSOBUILD)/, $(VDSOOBJS))
VDSOASMOBJS := note.o sigreturn.o
DESTASMOBJS = $(addprefix $(VDSOBUILD)/, $(VDSOASMOBJS))
$(if $(VDSOSRC),,$(error IHK output directory is not specified))
$(if $(TARGET),,$(error Target is not specified))
#CFLAGS := -nostdinc -mlittle-endian -Wall -mabi=lp64 -Wa,-gdwarf-2
CFLAGS := -nostdinc -mlittle-endian -Wall -Wa,-gdwarf-2
CFLAGS += -D__KERNEL__ -I$(SRC)/include
CFLAGS += -I$(SRC)/../lib/include -I$(INCDIR) -I$(IHKBASE)/smp/arm64/include
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i)))
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i)))
LDFLAGS := -nostdinc -mlittle-endian -Wall -Wundef -Wstrict-prototypes
LDFLAGS += -Wno-trigraphs -fno-strict-aliasing -fno-common
LDFLAGS += -Werror-implicit-function-declaration -Wno-format-security
#LDFLAGS += -std=gnu89 -mgeneral-regs-only -mabi=lp64 -O2
LDFLAGS += -std=gnu89 -mgeneral-regs-only -O2
LDFLAGS += -Wframe-larger-than=2048 -fno-stack-protector
LDFLAGS += -fno-delete-null-pointer-checks -Wno-unused-but-set-variable
LDFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
LDFLAGS += -fno-var-tracking-assignments -g -Wdeclaration-after-statement
LDFLAGS += -Wno-pointer-sign -fno-strict-overflow -fconserve-stack
LDFLAGS += -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time
LDFLAGS += -shared -fno-common -fno-builtin -nostdlib
LDFLAGS += -Wl,-soname=linux-vdso.so.1 -Wl,--hash-style=sysv -Wl,-n -Wl,-T
LDFLAGS += --param=allow-store-data-races=0 -DCC_HAVE_ASM_GOTO
LDFLAGS += -D"KBUILD_STR(s)=\#s" -D"KBUILD_BASENAME=KBUILD_STR(vdso.so)"
LDFLAGS += -D"KBUILD_MODNAME=KBUILD_STR(vdso.so)" -D__KERNEL__
DEPSRCS = $(wildcard $(VDSOSRC)/*.c $(VDSOSRC)/*.S)
CFLAGS_lds := -E -P -C -U$(ARCH)
CFLAGS_lds += -nostdinc
CFLAGS_lds += -mlittle-endian
CFLAGS_lds += -D__KERNEL__
CFLAGS_lds += -D__ASSEMBLY__
CFLAGS_lds += -DLINKER_SCRIPT
CFLAGS_lds += -DVDSO_LBASE=0
ifeq ($(HOST_KERNEL_CONFIG_ARM64_4K_PAGES), y)
CFLAGS_lds += -DPAGE_SIZE=0x1000
endif
ifeq ($(HOST_KERNEL_CONFIG_ARM64_16K_PAGES), y)
CFLAGS_lds += -DPAGE_SIZE=0x4000
endif
ifeq ($(HOST_KERNEL_CONFIG_ARM64_64K_PAGES), y)
CFLAGS_lds += -DPAGE_SIZE=0x10000
endif
#load mckernel config (append CPPFLAGS)
include @abs_top_builddir@/../ihk/cokernel/$(TARGETDIR)/Makefile.predefines
default: all
.PHONY: all clean depend prepare
all: depend $(VDSOBUILD)/vdso.so $(VDSOBUILD)/../include/vdso-offsets.h $(VDSOBUILD)/../include/vdso-so-path.h
# Strip rule for the .so file
$(VDSOBUILD)/vdso.so: OBJCOPYFLAGS := -S
$(VDSOBUILD)/vdso.so: $(VDSOBUILD)/vdso.so.dbg
$(objcopy_cmd)
# Generate VDSO offsets using helper script
$(VDSOBUILD)/../include/vdso-offsets.h: $(VDSOBUILD)/vdso.so.dbg
$(call echo_cmd,VDSOSYM,$<)
@mkdir -p $(VDSOBUILD)/../include
@nm $< | sh $(VDSOSRC)/gen_vdso_offsets.sh | LC_ALL=C sort > $@
$(VDSOBUILD)/../include/vdso-so-path.h:
@echo "#define VDSO_SO_PATH \"@abs_builddir@/vdso.so\"" > $@
# Link rule for the .so file, .lds has to be first
$(VDSOBUILD)/vdso.so.dbg: $(VDSOBUILD)/vdso.lds $(DESTOBJS) $(DESTASMOBJS)
$(ld_cmd)
$(VDSOBUILD)/vdso.lds: $(VDSOSRC)/vdso.lds.S
$(lds_cmd)
clean:
$(rm_cmd) $(DESTOBJS) $(DESTASMOBJS) $(VDSOBUILD)/Makefile.dep $(VDSOBUILD)/vdso.* -r $(VDSOBUILD)/../include
depend: $(VDSOBUILD)/Makefile.dep
$(VDSOBUILD)/Makefile.dep:
$(call dep_cmd,$(DEPSRCS))
prepare:
@$(RM) $(VDSOBUILD)/Makefile.dep
-include $(VDSOBUILD)/Makefile.dep
# Actual build commands
ifeq ($(V),1)
echo_cmd =
submake = make
else
echo_cmd = @echo ' ($(TARGET))' $1 $(ECHO_SUFFIX) $2;
submake = make --no-print-directory
endif
cc_cmd = $(call echo_cmd,CC,$<)$(CC) $(CFLAGS) -c -o $@
ld_cmd = $(call echo_cmd,LD,$@)$(CC) $(LDFLAGS) $^ -o $@
dep_cmd = $(call echo_cmd,DEPEND,)$(CC) $(CFLAGS) -MM $1 > $@
rm_cmd = $(call echo_cmd,CLEAN,)$(RM)
objcopy_cmd = $(call echo_cmd,OBJCOPY,$<)$(OBJCOPY) $(OBJCOPYFLAGS) $< $@
lds_cmd = $(call echo_cmd,LDS,$<)$(CC) $(CFLAGS_lds) -c -o $@ $<
$(DESTOBJS):
$(cc_cmd) $(addprefix $(VDSOSRC)/, $(notdir $(@:.o=.c)))
$(DESTASMOBJS):
$(cc_cmd) $(addprefix $(VDSOSRC)/, $(notdir $(@:.o=.S))) -D__ASSEMBLY__

View File

@ -0,0 +1,17 @@
#!/bin/sh
# gen_vdso_offsets.sh COPYRIGHT FUJITSU LIMITED 2016
# @ref.impl arch/arm64/kernel/vdso/gen_vdso_offsets.sh
#
# Match symbols in the DSO that look like VDSO_*; produce a header file
# of constant offsets into the shared object.
#
# Doing this inside the Makefile will break the $(filter-out) function,
# causing Kbuild to rebuild the vdso-offsets header file every time.
#
# Author: Will Deacon <will.deacon@arm.com
#
LC_ALL=C
sed -n -e 's/^00*/0/' -e \
's/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'

View File

@ -0,0 +1,205 @@
/* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <time.h>
#include <syscall.h>
#include <registers.h>
#include <ihk/atomic.h>
extern int __kernel_gettimeofday(struct timeval *tv, void *tz);
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("yield" ::: "memory");
return;
}
static inline void calculate_time_from_tsc(struct timespec *ts,
struct tod_data_s *tod_data)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data->version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data->origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data->version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data->clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data->clocks_per_sec)
/ tod_data->clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
}
static inline struct tod_data_s *get_tod_data_addr(void)
{
unsigned long addr;
asm volatile("adr %0, _tod_data\n"
: "=r" (addr)
:
: "memory");
return (struct tod_data_s *)addr;
}
int __kernel_gettimeofday(struct timeval *tv, void *tz)
{
long ret;
struct tod_data_s *tod_data;
struct timespec ats;
if(!tv && !tz) {
/* nothing to do */
return 0;
}
tod_data = get_tod_data_addr();
/* DO it locally if supported */
if (!tz && tod_data->do_local) {
calculate_time_from_tsc(&ats, tod_data);
tv->tv_sec = ats.tv_sec;
tv->tv_usec = ats.tv_nsec / 1000;
return 0;
}
/* Otherwize syscall */
asm volatile("mov w8, %w1\n"
"mov x0, %2\n"
"mov x1, %3\n"
"svc #0\n"
"mov %0, x0\n"
: "=r" (ret)
: "r" (__NR_gettimeofday), "r"(tv), "r"(tz)
: "memory");
if (ret) {
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
}
return (int)ret;
}
/*
* The IDs of the various system clocks (for POSIX.1b interval timers):
* @ref.impl include/uapi/linux/time.h
*/
// #define CLOCK_REALTIME 0
// #define CLOCK_MONOTONIC 1
// #define CLOCK_PROCESS_CPUTIME_ID 2
// #define CLOCK_THREAD_CPUTIME_ID 3
#define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
#define CLOCK_BOOTTIME 7
#define CLOCK_REALTIME_ALARM 8
#define CLOCK_BOOTTIME_ALARM 9
#define CLOCK_SGI_CYCLE 10 /* Hardware specific */
#define CLOCK_TAI 11
#define HIGH_RES_NSEC 1 /* nsec. */
#define CLOCK_REALTIME_RES HIGH_RES_NSEC
#define CLOCK_COARSE_RES ((NS_PER_SEC+CONFIG_HZ/2)/CONFIG_HZ) /* 10,000,000 nsec*/
typedef int clockid_t;
int __kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
{
long ret;
struct tod_data_s *tod_data;
struct timespec ats;
if (!tp) {
/* nothing to do */
return 0;
}
tod_data = get_tod_data_addr();
/* DO it locally if supported */
if (tod_data->do_local && clk_id == CLOCK_REALTIME) {
calculate_time_from_tsc(&ats, tod_data);
tp->tv_sec = ats.tv_sec;
tp->tv_nsec = ats.tv_nsec;
return 0;
}
/* Otherwize syscall */
asm volatile("mov w8, %w1\n"
"mov x0, %2\n"
"mov x1, %3\n"
"svc #0\n"
"mov %0, x0\n"
: "=r" (ret)
: "r" (__NR_clock_gettime), "r"(clk_id), "r"(tp)
: "memory");
return (int)ret;
}
int __kernel_clock_getres(clockid_t clk_id, struct timespec *res)
{
long ret;
if (!res) {
/* nothing to do */
return 0;
}
switch (clk_id) {
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
res->tv_sec = 0;
res->tv_nsec = CLOCK_REALTIME_RES;
return 0;
break;
case CLOCK_REALTIME_COARSE:
case CLOCK_MONOTONIC_COARSE:
res->tv_sec = 0;
res->tv_nsec = CLOCK_COARSE_RES;
return 0;
break;
default:
break;
}
/* Otherwise syscall */
asm volatile("mov w8, %w1\n"
"mov x0, %2\n"
"mov x1, %3\n"
"svc #0\n"
"mov %0, x0\n"
: "=r" (ret)
: "r" (__NR_clock_getres), "r"(clk_id), "r"(res)
: "memory");
return (int)ret;
}

View File

@ -0,0 +1,28 @@
/* note.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/note.S */
/*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
*/
#include <elfnote.h>
ELFNOTE_START(McKernel, 0, "a")
.long 0x10000 /* MCKERNEL_VERSION_CODE */
ELFNOTE_END

View File

@ -0,0 +1,39 @@
/* sigreturn.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/sigreturn.S */
/*
* Sigreturn trampoline for returning from a signal when the SA_RESTORER
* flag is not set.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <linkage.h>
#include "syscall.h"
.text
nop
ENTRY(__kernel_rt_sigreturn)
.cfi_startproc
.cfi_signal_frame
.cfi_def_cfa x29, 0
.cfi_offset x29, 0 * 8
.cfi_offset x30, 1 * 8
mov x8, #__NR_rt_sigreturn
svc #0
.cfi_endproc
ENDPROC(__kernel_rt_sigreturn)

View File

@ -0,0 +1,15 @@
/* syscall.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_VDSO_SYSCALL_H
#define __HEADER_ARM64_VDSO_SYSCALL_H
#define DECLARATOR(number,name) .equ __NR_##name, number
#define SYSCALL_HANDLED(number,name) DECLARATOR(number,name)
#define SYSCALL_DELEGATED(number,name) DECLARATOR(number,name)
#include <syscall_list.h>
#undef DECLARATOR
#undef SYSCALL_HANDLED
#undef SYSCALL_DELEGATED
#endif /* !__HEADER_ARM64_VDSO_SYSCALL_H */

View File

@ -0,0 +1,96 @@
/* vdso.lds.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/vdso.lds.S */
/*
* GNU linker script for the VDSO library.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
* Heavily based on the vDSO linker scripts for other archs.
*/
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
SECTIONS
{
PROVIDE(_tod_data = . - PAGE_SIZE);
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.note : { *(.note.*) } :text :note
. = ALIGN(16);
.text : { *(.text*) } :text =0xd503201f
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.dynamic : { *(.dynamic) } :text :dynamic
.rodata : { *(.rodata*) } :text
_end = .;
PROVIDE(end = .);
/DISCARD/ : {
*(.note.GNU-stack)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
}
}
/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
}
/*
* This controls what symbols we export from the DSO.
*/
VERSION
{
LINUX_2.6.39 {
global:
__kernel_rt_sigreturn;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
local: *;
};
}
/*
* Make the sigreturn code visible to the kernel.
*/
VDSO_sigtramp = __kernel_rt_sigreturn;

View File

@ -9,29 +9,29 @@ PHDRS
SECTIONS
{
. = SIZEOF_HEADERS;
. = ALIGN(4096);
. = ALIGN(4096);
.text : {
*(.text)
*(.text)
} :text
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
. = ALIGN(8);
.bss : {
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@ -18,7 +18,7 @@ extern char data_start[], data_end[];
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
#define MAP_ST_START 0xffff800000000000UL
/* MAP_KERNEL_START is defined by cmake */
#define MAP_KERNEL_START 0xffffffff80000000UL
#define PTL4_SHIFT 39
#define PTL3_SHIFT 30

View File

@ -1,2 +1,2 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o coredump.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o

View File

@ -1,9 +1,8 @@
/* coredump.c COPYRIGHT FUJITSU LIMITED 2018-2019 */
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <process.h>
#include <elfcore.h>
void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct thread *thread, void *regs0, int sig)
void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0)
{
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
@ -19,6 +18,8 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
@ -27,14 +28,6 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct prstatus64_timeval pr_cstime;
*/
prstatus->pr_pid = thread->tid;
if (thread->proc->parent) {
prstatus->pr_ppid = thread->proc->parent->pid;
}
prstatus->pr_info.si_signo = sig;
prstatus->pr_cursig = sig;
prstatus->pr_reg[0] = _r15;
prstatus->pr_reg[1] = _r14;
prstatus->pr_reg[2] = _r13;
@ -63,12 +56,4 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
prstatus->pr_fpvalid = 0; /* We assume no fp */
}
void arch_fill_thread_core_info(struct note *head,
struct thread *thread, void *regs)
{
}
int arch_get_thread_core_info_size(void)
{
return 0;
}
#endif /* POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,4 +1,3 @@
/* cpu.c COPYRIGHT FUJITSU LIMITED 2018-2019 */
/**
* \file cpu.c
* License details are found in the file LICENSE.
@ -16,6 +15,7 @@
*/
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <ihk/mm.h>
#include <types.h>
#include <errno.h>
@ -31,7 +31,6 @@
#include <prctl.h>
#include <page.h>
#include <kmalloc.h>
#include <ihk/debug.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@ -44,9 +43,11 @@
#define LAPIC_ICR0 0x300
#define LAPIC_ICR2 0x310
#define LAPIC_ESR 0x280
#ifdef POSTK_DEBUG_ARCH_DEP_75 /* x86 depend hide */
#define LOCAL_TIMER_VECTOR 0xef
#define LOCAL_PERF_VECTOR 0xf0
#define LOCAL_SMP_FUNC_CALL_VECTOR 0xf1
#endif /* POSTK_DEBUG_ARCH_DEP_75 */
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
@ -68,8 +69,11 @@
//#define DEBUG_PRINT_CPU
#ifdef DEBUG_PRINT_CPU
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#endif
static void *lapic_vp;
@ -89,10 +93,9 @@ void x86_set_warm_reset(unsigned long ip, char *first_page_va);
void x86_init_perfctr(void);
int gettime_local_support = 0;
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
extern int kprintf(const char *format, ...);
extern int interrupt_from_user(void *);
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
static struct idt_entry{
uint32_t desc[4];
@ -145,7 +148,7 @@ void reload_idt(void)
}
static struct list_head handlers[256 - 32];
extern char nmi_handler[];
extern char nmi[];
extern char page_fault[], general_protection_exception[];
extern char debug_exception[], int3_exception[];
@ -172,7 +175,7 @@ static void init_idt(void)
set_idt_entry(i, generic_common_handlers[i]);
}
set_idt_entry(2, (uintptr_t)nmi_handler);
set_idt_entry(2, (uintptr_t)nmi);
set_idt_entry(13, (unsigned long)general_protection_exception);
set_idt_entry(14, (unsigned long)page_fault);
@ -821,14 +824,11 @@ void call_ap_func(void (*next_func)(void))
next_func();
}
struct page_table *get_init_page_table(void);
void setup_x86_ap(void (*next_func)(void))
{
unsigned long rsp;
cpu_disable_interrupt();
ihk_mc_load_page_table(get_init_page_table());
assign_processor_id();
init_smp_processor();
@ -847,6 +847,9 @@ void setup_x86_ap(void (*next_func)(void))
}
void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long, void *, int);
void check_sig_pending();
extern void tlb_flush_handler(int vector);
void __show_stack(uintptr_t *sp) {
@ -874,7 +877,7 @@ void interrupt_exit(struct x86_user_context *regs)
cpu_enable_interrupt();
check_sig_pending();
check_need_resched();
check_signal(0, regs, -1);
check_signal(0, regs, 0);
}
else {
check_sig_pending();
@ -889,8 +892,7 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
lapic_ack();
++v->in_interrupt;
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
@ -952,8 +954,6 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
do_backlog();
}
else if (vector == LOCAL_PERF_VECTOR) {
struct siginfo info;
@ -1007,22 +1007,14 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
}
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(interrupt_from_user(regs)? 0: 1);
--v->in_interrupt;
/* for migration by IPI */
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
}
}
void gpe_handler(struct x86_user_context *regs)
{
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
@ -1031,8 +1023,7 @@ void gpe_handler(struct x86_user_context *regs)
}
set_signal(SIGSEGV, regs, NULL);
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(interrupt_from_user(regs)? 0: 1);
panic("GPF");
}
@ -1042,8 +1033,7 @@ void debug_handler(struct x86_user_context *regs)
int si_code = 0;
struct siginfo info;
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
#ifdef DEBUG_PRINT_CPU
kprintf("debug exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
@ -1062,16 +1052,14 @@ void debug_handler(struct x86_user_context *regs)
info.si_code = si_code;
set_signal(SIGTRAP, regs, &info);
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(interrupt_from_user(regs)? 0: 1);
}
void int3_handler(struct x86_user_context *regs)
{
struct siginfo info;
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(interrupt_from_user(regs)? 1: 2);
#ifdef DEBUG_PRINT_CPU
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
@ -1082,8 +1070,59 @@ void int3_handler(struct x86_user_context *regs)
info.si_code = TRAP_BRKPT;
set_signal(SIGTRAP, regs, &info);
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
set_cputime(interrupt_from_user(regs)? 0: 1);
}
void
unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
{
const uintptr_t address = (uintptr_t)fault_addr;
struct process_vm *vm = thread->vm;
struct vm_range *range;
unsigned long irqflags;
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
irqflags = kprintf_lock();
__kprintf("Page fault for 0x%lx\n", address);
__kprintf("%s for %s access in %s mode (reserved bit %s set), "
"it %s an instruction fetch\n",
(error & PF_PROT ? "protection fault" : "no page found"),
(error & PF_WRITE ? "write" : "read"),
(error & PF_USER ? "user" : "kernel"),
(error & PF_RSVD ? "was" : "wasn't"),
(error & PF_INSTR ? "was" : "wasn't"));
range = lookup_process_memory_range(vm, address, address+1);
if (range) {
__kprintf("address is in range, flag: 0x%lx\n",
range->flag);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
} else {
__kprintf("address is out of range! \n");
}
kprintf_unlock(irqflags);
/* TODO */
ihk_mc_debug_show_interrupt_context(regs);
if (!(error & PF_USER)) {
panic("panic: kernel mode PF");
}
//dkprintf("now dump a core file\n");
//coredump(proc, regs);
#ifdef DEBUG_PRINT_MEM
{
uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs);
kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n",
sp[0], sp[1], sp[2], sp[3]);
}
#endif
return;
}
static void outb(uint8_t v, uint16_t port)
@ -1186,6 +1225,13 @@ void cpu_pause(void)
asm volatile("pause" ::: "memory");
}
/* From: kernel-xppsl_1.5.2/arch/x86/include/asm/processor.h */
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
void cpu_relax(void)
{
asm volatile("rep; nop" ::: "memory");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled > 0;
@ -1205,15 +1251,6 @@ unsigned long cpu_disable_interrupt_save(void)
return flags;
}
unsigned long cpu_enable_interrupt_save(void)
{
unsigned long flags;
asm volatile("pushf; pop %0; sti" : "=r"(flags) : : "memory", "cc");
return flags;
}
/*@
@ behavior valid_vector:
@ assumes 32 <= vector <= 255;
@ -1257,7 +1294,7 @@ void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *))
}
extern char trampoline_code_data[], trampoline_code_data_end[];
struct page_table *get_boot_page_table(void);
struct page_table *get_init_page_table(void);
unsigned long get_transit_page_table(void);
/* reusable, but not reentrant */
@ -1281,10 +1318,9 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
memcpy(p, trampoline_code_data,
trampoline_code_data_end - trampoline_code_data);
p[1] = (unsigned long)virt_to_phys(get_boot_page_table());
p[1] = (unsigned long)virt_to_phys(get_init_page_table());
p[2] = (unsigned long)setup_x86_ap;
p[3] = pc;
p[4] = (unsigned long)get_x86_cpu_local_kstack(cpuid);
p[6] = (unsigned long)get_transit_page_table();
if (!p[6]) {
p[6] = p[1];
@ -1402,11 +1438,13 @@ long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int
}
#endif /* POSTK_DEBUG_ARCH_DEP_42 */
#ifdef POSTK_DEBUG_ARCH_DEP_23 /* add arch dep. clone_thread() function */
void arch_clone_thread(struct thread *othread, unsigned long pc,
unsigned long sp, struct thread *nthread)
{
return;
}
#endif /* POSTK_DEBUG_ARCH_DEP_23 */
void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx)
{
@ -1510,8 +1548,7 @@ void arch_print_pre_interrupt_stack(const struct x86_basic_regs *regs) {
__print_stack(rbp, regs->rip);
}
void arch_print_stack(void)
{
void arch_print_stack() {
struct stack *rbp;
__kprintf("Approximative stack trace:\n");
@ -1559,13 +1596,6 @@ return;
kprintf_unlock(irqflags);
}
void arch_cpu_stop(void)
{
while (1) {
cpu_halt();
}
}
/*@
@ behavior fs_base:
@ assumes type == IHK_ASR_X86_FS;
@ -1610,25 +1640,23 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
}
/*@
@ requires \valid_cpuid(cpu); // valid CPU logical ID
@ requires \valid_apicid(cpu); // valid APIC ID or not
@ ensures \result == 0
@*/
int ihk_mc_interrupt_cpu(int cpu, int vector)
{
if (cpu < 0 || cpu >= num_processors) {
kprintf("%s: invalid CPU id: %d\n", __func__, cpu);
return -1;
}
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
x86_issue_ipi(get_x86_cpu_local_variable(cpu)->apic_id, vector);
x86_issue_ipi(cpu, vector);
return 0;
}
#ifdef POSTK_DEBUG_ARCH_DEP_22
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
struct thread *arch_switch_context(struct thread *prev, struct thread *next)
{
struct thread *last;
struct mcs_rwlock_node_irqsave lock;
dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n",
ihk_mc_get_processor_id(), next->tlsblock_base);
@ -1636,7 +1664,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
/* Set up new TLS.. */
ihk_mc_init_user_tlsbase(next->uctx, next->tlsblock_base);
#ifdef ENABLE_PERF
/* Performance monitoring inherit */
if(next->proc->monitoring_event) {
if(next->proc->perf_status == PP_RESET)
@ -1646,10 +1673,9 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
perf_start(next->proc->monitoring_event);
}
}
#endif
#ifdef PROFILE_ENABLE
if (prev && prev->profile && prev->profile_start_ts != 0) {
if (prev->profile && prev->profile_start_ts != 0) {
prev->profile_elapsed_ts +=
(rdtsc() - prev->profile_start_ts);
prev->profile_start_ts = 0;
@ -1661,28 +1687,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
#endif
if (prev) {
mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock);
if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) {
switch (prev->proc->status) {
case PS_DELAY_STOPPED:
prev->proc->status = PS_STOPPED;
break;
case PS_DELAY_TRACED:
prev->proc->status = PS_TRACED;
break;
default:
break;
}
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&prev->proc->parent->waitpid_q);
} else {
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
}
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
}
else {
@ -1690,6 +1694,7 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
}
return last;
}
#endif
/*@
@ requires \valid(thread);
@ -1722,7 +1727,7 @@ check_and_allocate_fp_regs(struct thread *thread)
if (!thread->fp_regs) {
kprintf("error: allocating fp_regs pages\n");
result = -ENOMEM;
result = 1;
goto out;
}
@ -1735,14 +1740,12 @@ out:
/*@
@ requires \valid(thread);
@*/
int
void
save_fp_regs(struct thread *thread)
{
int ret = 0;
ret = check_and_allocate_fp_regs(thread);
if (ret) {
goto out;
if (check_and_allocate_fp_regs(thread) != 0) {
// alloc error
return;
}
if (xsave_available) {
@ -1757,25 +1760,23 @@ save_fp_regs(struct thread *thread)
dkprintf("fp_regs for TID %d saved\n", thread->tid);
}
out:
return ret;
}
int copy_fp_regs(struct thread *from, struct thread *to)
void copy_fp_regs(struct thread *from, struct thread *to)
{
int ret = 0;
if (from->fp_regs != NULL) {
ret = check_and_allocate_fp_regs(to);
if (!ret) {
memcpy(to->fp_regs,
from->fp_regs,
sizeof(fp_regs_struct));
}
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
}
return ret;
}
#ifdef POSTK_DEBUG_TEMP_FIX_19
void
clear_fp_regs(struct thread *thread)
{
return;
}
#endif /* POSTK_DEBUG_TEMP_FIX_19 */
/*@
@ requires \valid(thread);
@ assigns thread->fp_regs;
@ -1783,11 +1784,8 @@ int copy_fp_regs(struct thread *from, struct thread *to)
void
restore_fp_regs(struct thread *thread)
{
if (!thread->fp_regs) {
// only clear fpregs.
clear_fp_regs();
if (!thread->fp_regs)
return;
}
if (xsave_available) {
unsigned int low, high;
@ -1806,13 +1804,6 @@ restore_fp_regs(struct thread *thread)
//release_fp_regs(thread);
}
void clear_fp_regs(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
restore_fp_regs(&v->idle);
}
ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
{
ihk_mc_user_context_t *uctx = thread->uctx;
@ -1846,10 +1837,6 @@ ihk_mc_init_user_tlsbase(ihk_mc_user_context_t *ctx,
do_arch_prctl(ARCH_SET_FS, tls_base_addr);
}
void arch_flush_icache_all(void)
{
return;
}
/*@
@ assigns \nothing;
@ -2003,92 +1990,6 @@ mod_nmi_ctx(void *nmi_ctx, void (*func)())
l[i++] = 0x28; // KERNEL DS
}
void arch_save_panic_regs(void *irq_regs)
{
struct thread *current = cpu_local_var(current);
struct x86_user_context *regs =
(struct x86_user_context *)irq_regs;
struct x86_cpu_local_variables *x86v =
get_x86_cpu_local_variable(ihk_mc_get_processor_id());
struct segment_regs {
uint32_t rflags;
uint32_t cs;
uint32_t ss;
uint32_t ds;
uint32_t es;
uint32_t fs;
uint32_t gs;
} *sregs;
/* Kernel space? */
if (regs->gpr.rip > USER_END) {
x86v->panic_regs[0] = regs->gpr.rax;
x86v->panic_regs[1] = regs->gpr.rbx;
x86v->panic_regs[2] = regs->gpr.rcx;
x86v->panic_regs[3] = regs->gpr.rdx;
x86v->panic_regs[4] = regs->gpr.rsi;
x86v->panic_regs[5] = regs->gpr.rdi;
x86v->panic_regs[6] = regs->gpr.rbp;
x86v->panic_regs[7] = regs->gpr.rsp;
x86v->panic_regs[8] = regs->gpr.r8;
x86v->panic_regs[9] = regs->gpr.r9;
x86v->panic_regs[10] = regs->gpr.r10;
x86v->panic_regs[11] = regs->gpr.r11;
x86v->panic_regs[12] = regs->gpr.r12;
x86v->panic_regs[13] = regs->gpr.r13;
x86v->panic_regs[14] = regs->gpr.r14;
x86v->panic_regs[15] = regs->gpr.r15;
x86v->panic_regs[16] = regs->gpr.rip;
sregs = (struct segment_regs *)&x86v->panic_regs[17];
sregs->rflags = regs->gpr.rflags;
sregs->cs = regs->gpr.cs;
sregs->ss = regs->gpr.ss;
sregs->ds = regs->sr.ds;
sregs->es = regs->sr.es;
sregs->fs = regs->sr.fs;
sregs->gs = regs->sr.gs;
}
/* User-space, show kernel context */
else {
kprintf("%s: in user-space: %p\n", __func__, regs->gpr.rip);
x86v->panic_regs[0] = 0;
x86v->panic_regs[1] = current->ctx.rbx;
x86v->panic_regs[2] = 0;
x86v->panic_regs[3] = 0;
x86v->panic_regs[4] = current->ctx.rsi;
x86v->panic_regs[5] = current->ctx.rdi;
x86v->panic_regs[6] = current->ctx.rbp;
x86v->panic_regs[7] = current->ctx.rsp;
x86v->panic_regs[8] = 0;
x86v->panic_regs[9] = 0;
x86v->panic_regs[10] = 0;
x86v->panic_regs[11] = 0;
x86v->panic_regs[12] = regs->gpr.r12;
x86v->panic_regs[13] = regs->gpr.r13;
x86v->panic_regs[14] = regs->gpr.r14;
x86v->panic_regs[15] = regs->gpr.r15;
x86v->panic_regs[16] = (unsigned long)enter_user_mode;
sregs = (struct segment_regs *)&x86v->panic_regs[17];
sregs->rflags = regs->gpr.rflags;
sregs->cs = regs->gpr.cs;
sregs->ss = regs->gpr.ss;
sregs->ds = regs->sr.ds;
sregs->es = regs->sr.es;
sregs->fs = regs->sr.fs;
sregs->gs = regs->sr.gs;
}
x86v->paniced = 1;
}
void arch_clear_panic(void)
{
struct x86_cpu_local_variables *x86v =
get_x86_cpu_local_variable(ihk_mc_get_processor_id());
x86v->paniced = 0;
}
int arch_cpu_read_write_register(
struct ihk_os_cpu_register *desc,
enum mcctrl_os_cpu_operation op)
@ -2212,7 +2113,9 @@ int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg)
ihk_mc_spinlock_unlock(&get_cpu_local_var(cpu)->smp_func_req_lock,
irq_flags);
ihk_mc_interrupt_cpu(cpu, LOCAL_SMP_FUNC_CALL_VECTOR);
ihk_mc_interrupt_cpu(
get_x86_cpu_local_variable(cpu)->apic_id,
LOCAL_SMP_FUNC_CALL_VECTOR);
++cpu_index;
}
@ -2244,48 +2147,4 @@ free_out:
return ret;
}
extern int nmi_mode;
extern long freeze_thaw(void *nmi_ctx);
void multi_nm_interrupt_handler(void *irq_regs)
{
dkprintf("%s: ...\n", __func__);
switch (nmi_mode) {
case 1:
case 2:
/* mode == 1 or 2, for FREEZER NMI */
dkprintf("%s: freeze mode NMI catch. (nmi_mode=%d)\n",
__func__, nmi_mode);
freeze_thaw(NULL);
break;
case 0:
/* mode == 0, for MEMDUMP NMI */
arch_save_panic_regs(irq_regs);
ihk_mc_query_mem_areas();
/* memdump-nmi is halted McKernel, break is unnecessary. */
/* fall through */
case 3:
/* mode == 3, for SHUTDOWN-WAIT NMI */
kprintf("%s: STOP\n", __func__);
while (nmi_mode != 4)
cpu_halt();
break;
case 4:
/* mode == 4, continue NMI */
arch_clear_panic();
if (!ihk_mc_get_processor_id()) {
ihk_mc_clear_dump_page_completion();
}
kprintf("%s: RESUME, nmi_mode: %d\n", __func__, nmi_mode);
break;
default:
ekprintf("%s: Unknown nmi-mode(%d) detected.\n",
__func__, nmi_mode);
break;
}
}
/*** end of file ***/

View File

@ -1,4 +1,4 @@
/* gencore.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <ihk/debug.h>
#include <kmalloc.h>
#include <cls.h>
@ -13,17 +13,15 @@
//#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
/* Exclude reserved (mckernel's internal use), device file,
* hole created by mprotect
*/
#define GENCORE_RANGE_IS_INACCESSIBLE(range) \
((range->flag & (VR_RESERVED | VR_MEMTYPE_UC | VR_DONTDUMP)))
/* Generate a core file image, which consists of many chunks.
/*
* Generate a core file image, which consists of many chunks.
* Returns an allocated table, an etnry of which is a pair of the address
* of a chunk and its length.
*/
@ -41,14 +39,18 @@ void fill_elf_header(Elf64_Ehdr *eh, int segs)
eh->e_ident[EI_MAG1] = 'E';
eh->e_ident[EI_MAG2] = 'L';
eh->e_ident[EI_MAG3] = 'F';
eh->e_ident[EI_CLASS] = ELF_CLASS;
eh->e_ident[EI_DATA] = ELF_DATA;
eh->e_ident[EI_CLASS] = ELFCLASS64;
eh->e_ident[EI_DATA] = ELFDATA2LSB;
eh->e_ident[EI_VERSION] = El_VERSION;
eh->e_ident[EI_OSABI] = ELF_OSABI;
eh->e_ident[EI_ABIVERSION] = ELF_ABIVERSION;
eh->e_ident[EI_OSABI] = ELFOSABI_NONE;
eh->e_ident[EI_ABIVERSION] = El_ABIVERSION_NONE;
eh->e_type = ET_CORE;
eh->e_machine = ELF_ARCH;
#ifdef CONFIG_MIC
eh->e_machine = EM_K10M;
#else
eh->e_machine = EM_X86_64;
#endif
eh->e_version = EV_CURRENT;
eh->e_entry = 0; /* Do we really need this? */
eh->e_phoff = 64; /* fixed */
@ -73,6 +75,77 @@ int get_prstatus_size(void)
+ align32(sizeof(struct elf_prstatus64));
}
/**
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs0 A pointer to a x86_regs structure.
*/
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
{
void *name;
struct elf_prstatus64 *prstatus;
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
register unsigned long _r12 asm("r12");
register unsigned long _r13 asm("r13");
register unsigned long _r14 asm("r14");
register unsigned long _r15 asm("r15");
head->namesz = sizeof("CORE");
head->descsz = sizeof(struct elf_prstatus64);
head->type = NT_PRSTATUS;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
prstatus = (struct elf_prstatus64 *)(name + align32(sizeof("CORE")));
/*
We ignore following entries for now.
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
*/
prstatus->pr_reg[0] = _r15;
prstatus->pr_reg[1] = _r14;
prstatus->pr_reg[2] = _r13;
prstatus->pr_reg[3] = _r12;
prstatus->pr_reg[4] = regs->rbp;
prstatus->pr_reg[5] = regs->rbx;
prstatus->pr_reg[6] = regs->r11;
prstatus->pr_reg[7] = regs->r10;
prstatus->pr_reg[8] = regs->r9;
prstatus->pr_reg[9] = regs->r8;
prstatus->pr_reg[10] = regs->rax;
prstatus->pr_reg[11] = regs->rcx;
prstatus->pr_reg[12] = regs->rdx;
prstatus->pr_reg[13] = regs->rsi;
prstatus->pr_reg[14] = regs->rdi;
prstatus->pr_reg[15] = regs->rax; /* ??? */
prstatus->pr_reg[16] = regs->rip;
prstatus->pr_reg[17] = regs->cs;
prstatus->pr_reg[18] = regs->rflags;
prstatus->pr_reg[19] = regs->rsp;
prstatus->pr_reg[20] = regs->ss;
prstatus->pr_reg[21] = rdmsr(MSR_FS_BASE);
prstatus->pr_reg[22] = rdmsr(MSR_GS_BASE);
/* There is no ds, es, fs and gs. */
prstatus->pr_fpvalid = 0; /* We assume no fp */
}
/**
* \brief Return the size of the prpsinfo entry of the NOTE segment.
*
@ -84,37 +157,15 @@ int get_prpsinfo_size(void)
+ align32(sizeof(struct elf_prpsinfo64));
}
/**
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param regs0 A pointer to a ihk_mc_user_context_t structure.
*/
void fill_prstatus(struct note *head, struct thread *thread, int sig)
{
void *name;
struct elf_prstatus64 *prstatus;
head->namesz = sizeof("CORE");
head->descsz = sizeof(struct elf_prstatus64);
head->type = NT_PRSTATUS;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
prstatus = (struct elf_prstatus64 *)(name + align32(sizeof("CORE")));
arch_fill_prstatus(prstatus, thread, thread->coredump_regs, sig);
}
/**
* \brief Fill a prpsinfo structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param regs A pointer to a ihk_mc_user_context_t structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_prpsinfo(struct note *head, struct process *proc, char *cmdline)
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
@ -126,22 +177,22 @@ void fill_prpsinfo(struct note *head, struct process *proc, char *cmdline)
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = proc->status;
prpsinfo->pr_pid = proc->pid;
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
memcpy(prpsinfo->pr_fname, cmdline, 16);
/*
We leave most of the fields unfilled.
/* TODO: Fill the following fields:
* char pr_sname;
* char pr_zomb;
* char pr_nice;
* a8_uint64_t pr_flag;
* unsigned int pr_uid;
* unsigned int pr_gid;
* int pr_ppid, pr_pgrp, pr_sid;
* char pr_fname[16];
* char pr_psargs[ELF_PRARGSZ];
*/
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
*/
}
/**
@ -159,11 +210,11 @@ int get_auxv_size(void)
* \brief Fill an AUXV structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param regs A pointer to a ihk_mc_user_context_t structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_auxv(struct note *head, struct process *proc)
void fill_auxv(struct note *head, struct thread *thread, void *regs)
{
void *name;
void *auxv;
@ -174,8 +225,7 @@ void fill_auxv(struct note *head, struct process *proc)
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, proc->saved_auxv,
sizeof(unsigned long) * AUXV_LEN);
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
}
/**
@ -183,84 +233,34 @@ void fill_auxv(struct note *head, struct process *proc)
*
*/
int get_note_size(struct process *proc)
int get_note_size(void)
{
int note = 0;
struct thread *thread_iter;
struct mcs_rwlock_node lock;
mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &lock);
list_for_each_entry(thread_iter, &proc->threads_list, siblings_list) {
note += get_prstatus_size();
note += arch_get_thread_core_info_size();
if (thread_iter->tid == proc->pid) {
note += get_prpsinfo_size();
note += get_auxv_size();
}
}
mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock);
return note;
return get_prstatus_size() + get_prpsinfo_size()
+ get_auxv_size();
}
/**
* \brief Fill the NOTE segment.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param regs A pointer to a ihk_mc_user_context_t structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_note(void *note, struct process *proc, char *cmdline, int sig)
void fill_note(void *note, struct thread *thread, void *regs)
{
struct thread *thread_iter;
struct mcs_rwlock_node lock;
mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &lock);
list_for_each_entry(thread_iter, &proc->threads_list, siblings_list) {
fill_prstatus(note, thread_iter, sig);
note += get_prstatus_size();
arch_fill_thread_core_info(note, thread_iter,
thread_iter->coredump_regs);
note += arch_get_thread_core_info_size();
if (thread_iter->tid == proc->pid) {
fill_prpsinfo(note, proc, cmdline);
note += get_prpsinfo_size();
#if 0
fill_siginfo(note, proc);
note += get_siginfo_size();
#endif
fill_auxv(note, proc);
note += get_auxv_size();
#if 0
fill_file(note, proc);
note += get_file_size();
#endif
}
#if 0
fill_fpregset(note, thread);
note += get_fpregset_size();
fill_x86_xstate(note, thread);
note += get_x86_xstate_size();
#endif
}
mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &lock);
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
fill_auxv(note, thread, regs);
}
/**
* \brief Generate an image of the core file.
*
* \param proc A pointer to the current process structure.
* \param regs A pointer to a ihk_mc_user_context_t structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
* \param coretable(out) An array of core chunks.
* \param chunks(out) Number of the entires of coretable.
*
@ -271,16 +271,26 @@ void fill_note(void *note, struct process *proc, char *cmdline, int sig)
* should be zero.
*/
int gencore(struct process *proc, struct coretable **coretable, int *chunks,
char *cmdline, int sig)
/*@
@ requires \valid(thread);
@ requires \valid(regs);
@ requires \valid(coretable);
@ requires \valid(chunks);
@ behavior success:
@ ensures \result == 0;
@ assigns coretable;
@ behavior failure:
@ ensures \result == -1;
@*/
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
{
int error = 0;
struct coretable *ct = NULL;
Elf64_Ehdr *eh = NULL;
Elf64_Ehdr eh;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range, *next;
struct process_vm *vm = proc->vm;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
@ -289,9 +299,8 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
*chunks = 3; /* Elf header , header table and NOTE segment */
if (vm == NULL) {
kprintf("%s: ERROR: vm not found\n", __func__);
error = -EINVAL;
goto fail;
dkprintf("no vm found.\n");
return -1;
}
next = lookup_process_memory_range(vm, 0, -1);
@ -300,22 +309,21 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
/* We omit reserved areas because they are only for
mckernel's internal use. */
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DONTDUMP)
continue;
}
/* We need a chunk for each page for a demand paging area.
* This can be optimized for spacial complexity but we would
* lose simplicity instead.
*/
This can be optimized for spacial complexity but we would
lose simplicity instead. */
if (range->flag & VR_DEMAND_PAGING) {
unsigned long p, phys;
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(
vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
if (prevzero == 1)
@ -334,39 +342,26 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = vm->region;
struct vm_regions region = thread->vm->region;
dkprintf("text: %lx-%lx\n", region.text_start,
region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start,
region.data_end);
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
dkprintf("brk: %lx-%lx\n", region.brk_start, region.brk_end);
dkprintf("map: %lx-%lx\n", region.map_start, region.map_end);
dkprintf("stack: %lx-%lx\n", region.stack_start,
region.stack_end);
dkprintf("user: %lx-%lx\n\n", region.user_start,
region.user_end);
dkprintf("stack: %lx-%lx\n", region.stack_start, region.stack_end);
dkprintf("user: %lx-%lx\n\n", region.user_start, region.user_end);
}
dkprintf("now generate a core file image\n");
eh = kmalloc(sizeof(*eh), IHK_MC_AP_NOWAIT);
if (eh == NULL) {
dkprintf("could not alloc a elf header table.\n");
error = -ENOMEM;
goto fail;
}
memset(eh, 0, sizeof(*eh));
offset += sizeof(*eh);
fill_elf_header(eh, segs);
offset += sizeof(eh);
fill_elf_header(&eh, segs);
/* program header table */
phsize = sizeof(Elf64_Phdr) * segs;
ph = kmalloc(phsize, IHK_MC_AP_NOWAIT);
if (ph == NULL) {
kprintf("%s: ERROR: allocating program header\n", __func__);
error = -ENOMEM;
dkprintf("could not alloc a program header table.\n");
goto fail;
}
memset(ph, 0, phsize);
@ -377,16 +372,15 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
* To align the next segment page-sized, we prepare a padded
* region for our NOTE segment.
*/
notesize = get_note_size(proc);
notesize = get_note_size();
alignednotesize = alignpage(notesize + offset) - offset;
note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT);
if (note == NULL) {
kprintf("%s: ERROR: allocating NOTE\n", __func__);
error = -ENOMEM;
dkprintf("could not alloc NOTE for core.\n");
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, proc, cmdline, sig);
fill_note(note, thread, regs);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
@ -409,9 +403,8 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
if (range->flag & VR_RESERVED)
continue;
}
ph[i].p_type = PT_LOAD;
ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0)
@ -430,15 +423,13 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
/* coretable to send to host */
ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT);
if (!ct) {
kprintf("%s: ERROR: allocating coretable\n", __func__);
error = -ENOMEM;
dkprintf("could not alloc a coretable.\n");
goto fail;
}
memset(ct, 0, sizeof(struct coretable) * (*chunks));
ct[0].addr = virt_to_phys(eh); /* ELF header */
ct[0].addr = virt_to_phys(&eh); /* ELF header */
ct[0].len = 64;
dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, eh);
dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, &eh);
ct[1].addr = virt_to_phys(ph); /* program header table */
ct[1].len = phsize;
@ -455,12 +446,9 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
unsigned long phys;
if (GENCORE_RANGE_IS_INACCESSIBLE(range)) {
if (range->flag & VR_RESERVED)
continue;
}
if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */
unsigned long p, start, phys;
int prevzero = 0;
@ -468,15 +456,14 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(
vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* Start a new chunk */
/* We begin a new chunk */
size = PAGE_SIZE;
start = p;
} else {
/* Extend the previous chunk */
/* We extend the previous chunk */
size += PAGE_SIZE;
}
prevzero = 1;
@ -485,16 +472,15 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
/* Flush out an empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n",
i, ct[i].len,
ct[i].addr, start);
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
ct[i].addr = phys;
ct[i].len = PAGE_SIZE;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n",
i, ct[i].len, ct[i].addr, p);
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, p);
i++;
prevzero = 0;
}
@ -503,26 +489,18 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
/* An empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n",
i, ct[i].len, ct[i].addr, start);
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
} else {
if ((vm->region.user_start <= range->start) &&
(range->end <= vm->region.user_end)) {
error = ihk_mc_pt_virt_to_phys(
vm->address_space->page_table,
(void *)range->start, &phys);
if (error) {
if (error != -EFAULT) {
kprintf("%s: error: ihk_mc_pt_virt_to_phys for %lx failed (%d)\n",
__func__, range->start,
error);
goto fail;
}
/* VR_PROT_NONE range */
phys = 0;
error = 0;
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start, &phys) != 0) {
dkprintf("could not convert user virtual address %lx"
"to physical address", range->start);
goto fail;
}
} else {
phys = virt_to_phys((void *)range->start);
@ -536,14 +514,16 @@ int gencore(struct process *proc, struct coretable **coretable, int *chunks,
}
*coretable = ct;
return error;
return 0;
fail:
kfree(eh);
kfree(ct);
kfree(ph);
kfree(note);
return error;
fail:
if (ct)
kfree(ct);
if (ph)
kfree(ph);
if (note)
kfree(note);
return -1;
}
/**
@ -552,12 +532,15 @@ fail:
* \param coretable An array of core chunks.
*/
/*@
@ requires \valid(coretable);
@ assigns \nothing;
@*/
void freecore(struct coretable **coretable)
{
struct coretable *ct = *coretable;
kfree(phys_to_virt(ct[2].addr)); /* NOTE segment */
kfree(phys_to_virt(ct[1].addr)); /* ph */
kfree(phys_to_virt(ct[0].addr)); /* eh */
kfree(*coretable);
}
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -64,13 +64,12 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return oldval;
}
static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = encoded_op & 0xfff;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))

View File

@ -6,8 +6,6 @@
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include <lwk/compiler.h>
#include "config.h"
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -33,68 +31,12 @@ extern void preempt_disable(void);
#define IHK_STATIC_SPINLOCK_FUNCS
static inline void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
lock->head_tail = 0;
}
#define SPIN_LOCK_UNLOCKED { .head_tail = 0 }
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", ihk_mc_get_processor_id()); rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
static inline int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
ihk_spinlock_t cur = { .head_tail = lock->head_tail };
ihk_spinlock_t next = { .tickets = {
.head = cur.tickets.head,
.tail = cur.tickets.tail + 2
} };
int success;
if (cur.tickets.head != cur.tickets.tail) {
return 0;
}
preempt_disable();
/* Use the same increment amount as other functions! */
success = __sync_bool_compare_and_swap((__ticketpair_t*)lock, cur.head_tail, next.head_tail);
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
static inline unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock,
int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -105,7 +47,7 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
static inline void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
register struct __raw_tickets inc = { .tail = 0x0002 };
@ -136,7 +78,7 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
static inline unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
@ -156,7 +98,7 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
static inline void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
__ticket_t inc = 0x0002;
@ -175,14 +117,96 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
#else
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
#endif
static inline void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock,
unsigned long flags)
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
{
__ihk_mc_spinlock_unlock_noirq(lock);
cpu_restore_interrupt(flags);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t;
typedef mcs_lock_node_t mcs_lock_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void __mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
(unsigned long)node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void __mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = (struct mcs_lock_node *)
atomic_cmpxchg8((unsigned long *)&lock->next,
(unsigned long)node, (unsigned long)0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
static void mcs_lock_lock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
preempt_disable();
__mcs_lock_lock(lock, node);
}
static void mcs_lock_unlock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
__mcs_lock_unlock(lock, node);
preempt_enable();
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
node->irqsave = cpu_disable_interrupt_save();
mcs_lock_lock_noirq(lock, node);
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
mcs_lock_unlock_noirq(lock, node);
cpu_restore_interrupt(node->irqsave);
}
#define SPINLOCK_IN_MCS_RWLOCK
// reader/writer lock
@ -198,22 +222,14 @@ typedef struct mcs_rwlock_node {
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_t;
#else
} mcs_rwlock_node_t;
#endif
} __attribute__((aligned(64))) mcs_rwlock_node_t;
typedef struct mcs_rwlock_node_irqsave {
#ifndef SPINLOCK_IN_MCS_RWLOCK
struct mcs_rwlock_node node;
#endif
unsigned long irqsave;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_irqsave_t;
#else
} mcs_rwlock_node_irqsave_t;
#endif
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
typedef struct mcs_rwlock_lock {
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -222,13 +238,9 @@ typedef struct mcs_rwlock_lock {
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
#endif
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_lock_t;
#else
} mcs_rwlock_lock_t;
#endif
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
static inline void
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -249,7 +261,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id());
#else
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
#endif
static inline void
static void
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -276,7 +288,7 @@ __mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_n
}
#ifndef SPINLOCK_IN_MCS_RWLOCK
static inline void
static void
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
{
struct mcs_rwlock_node *p;
@ -343,7 +355,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()
#else
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
#endif
static inline void
static void
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -403,7 +415,7 @@ atomic_inc_ifnot0(ihk_atomic_t *v)
return old;
}
static inline void
static void
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -469,7 +481,7 @@ __kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()
#else
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
#endif
static inline void
static void
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -516,7 +528,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
#endif
static inline void
static void
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -536,7 +548,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
#endif
static inline void
static void
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -556,7 +568,7 @@ __kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
#endif
static inline void
static void
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -576,7 +588,7 @@ __kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
#endif
static inline void
static void
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -587,95 +599,4 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif
}
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !!(flags & 0x200);
}
struct ihk_rwlock {
union {
long lock;
struct {
unsigned int read;
int write;
};
} lock;
};
static inline void ihk_mc_rwlock_init(struct ihk_rwlock *rw)
{
rw->lock.read = 0;
rw->lock.write = 1;
}
static inline void ihk_mc_read_lock(struct ihk_rwlock *rw)
{
asm volatile("1:\t"
"lock; decq %0\n\t"
"jns 3f\n\t"
"lock incq %0\n\t"
"2:\t"
"pause\n\t"
"cmpq $0x1, %0\n\t"
"jns 1b\n\t"
"jmp 2b\n\t"
"3:"
: "+m" (rw->lock.lock) : : "memory");
}
static inline void ihk_mc_write_lock(struct ihk_rwlock *rw)
{
asm volatile("1:\t"
"lock; decl %0\n\t"
"je 3f\n\t"
"lock; incl %0\n\t"
"2:\t"
"pause\n\t"
"cmpl $0x1,%0\n\t"
"je 1b\n\t"
"jmp 2b\n\t"
"3:"
: "+m" (rw->lock.write) : "i" (((1L) << 32)) : "memory");
}
static inline int ihk_mc_read_trylock(struct ihk_rwlock *rw)
{
ihk_atomic64_t *count = (ihk_atomic64_t *)rw;
if (ihk_atomic64_sub_return(1, count) >= 0)
return 1;
ihk_atomic64_inc(count);
return 0;
}
static inline int ihk_mc_write_trylock(struct ihk_rwlock *rw)
{
ihk_atomic_t *count = (ihk_atomic_t *)&rw->lock.write;
if (ihk_atomic_dec_and_test(count))
return 1;
ihk_atomic_inc(count);
return 0;
}
static inline void ihk_mc_read_unlock(struct ihk_rwlock *rw)
{
asm volatile("lock; incq %0" : "+m" (rw->lock.lock) : : "memory");
}
static inline void ihk_mc_write_unlock(struct ihk_rwlock *rw)
{
asm volatile("lock; incl %0"
: "+m" (rw->lock.write) : "i" (((1L) << 32)) : "memory");
}
static inline int ihk_mc_write_can_lock(struct ihk_rwlock *rw)
{
return rw->lock.write == 1;
}
static inline int ihk_mc_read_can_lock(struct ihk_rwlock *rw)
{
return rw->lock.lock > 0;
}
#endif

View File

@ -1,4 +1,3 @@
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2018 */
/**
* \file arch-memomry.h
* License details are found in the file LICENSE.
@ -17,7 +16,6 @@
#define __HEADER_X86_COMMON_ARCH_MEMORY_H
#include <ihk/types.h>
#include <errno.h>
#define KERNEL_CS_ENTRY 4
#define KERNEL_DS_ENTRY 5
@ -42,9 +40,14 @@
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
#define USER_END 0x0000800000000000UL
#define LD_TASK_UNMAPPED_BASE 0x0000155555500000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
#define GB_PAGE_SHIFT 30
#define GB_PAGE_SIZE (1UL << GB_PAGE_SHIFT)
#define GB_PAGE_MASK (~((unsigned long)GB_PAGE_SIZE - 1))
#define GB_PAGE_P2ALIGN (GB_PAGE_SHIFT - PAGE_SHIFT)
#define USER_END 0x0000800000000000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
/*
* Canonical negative addresses (i.e., the smallest kernel virtual address)
@ -67,8 +70,8 @@
* Placing the LWK image in the virtual address space at the end of
* the Linux modules section enables us to map the LWK TEXT in Linux
* as well, so that Linux can also call into LWK text.
* It's defined by cmake.
*/
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
#define STACK_TOP(region) ((region)->user_end)
#define MAP_VMAP_SIZE 0x0000000100000000UL
@ -163,10 +166,6 @@ typedef unsigned long pte_t;
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
#define USER_STACK_PREPAGE_SIZE LARGE_PAGE_SIZE
#define USER_STACK_PAGE_MASK LARGE_PAGE_MASK
#define USER_STACK_PAGE_P2ALIGN LARGE_PAGE_P2ALIGN
#define USER_STACK_PAGE_SHIFT LARGE_PAGE_SHIFT
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
@ -184,10 +183,12 @@ enum ihk_mc_pt_attribute {
enum ihk_mc_pt_attribute attr_mask;
#ifdef POSTK_DEBUG_ARCH_DEP_12
static inline int pfn_is_write_combined(uintptr_t pfn)
{
return ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD));
}
#endif /* #ifdef POSTK_DEBUG_ARCH_DEP_12 */
static inline int pte_is_null(pte_t *ptep)
{
@ -337,104 +338,7 @@ static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
return;
}
static inline int pte_is_contiguous(pte_t *ptep)
{
return 0;
}
static inline int pgsize_is_contiguous(size_t pgsize)
{
return 0;
}
static inline int pgsize_to_tbllv(size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return 1;
case PTL2_SIZE: return 2;
case PTL3_SIZE: return 3;
case PTL4_SIZE: return 4;
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pgsize_to_tbllv");
#else
return 0;
#endif
}
return 0;
}
static inline int pgsize_to_pgshift(size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return PTL1_SHIFT;
case PTL2_SIZE: return PTL2_SHIFT;
case PTL3_SIZE: return PTL3_SHIFT;
case PTL4_SIZE: return PTL4_SHIFT;
default: return -EINVAL;
}
}
static inline size_t tbllv_to_pgsize(int level)
{
switch (level) {
case 1: return PTL1_SIZE;
case 2: return PTL2_SIZE;
case 3: return PTL3_SIZE;
case 4: return PTL4_SIZE;
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("tbllv_to_pgsize");
#else
return 0;
#endif
}
return 0;
}
static inline size_t tbllv_to_contpgsize(int level)
{
return 0;
}
static inline int tbllv_to_contpgshift(int level)
{
return 0;
}
static inline pte_t *get_contiguous_head(pte_t *__ptep, size_t __pgsize)
{
return __ptep;
}
static inline pte_t *get_contiguous_tail(pte_t *__ptep, size_t __pgsize)
{
return __ptep;
}
static inline int split_contiguous_pages(pte_t *ptep, size_t pgsize)
{
return 0;
}
static inline int page_is_contiguous_head(pte_t *ptep, size_t pgsize)
{
return 0;
}
static inline int page_is_contiguous_tail(pte_t *ptep, size_t pgsize)
{
return 0;
}
struct page_table;
static inline void arch_adjust_allocate_page_size(struct page_table *pt,
uintptr_t fault_addr,
pte_t *ptep,
void **pgaddrp,
size_t *pgsizep)
{
}
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);

View File

@ -13,17 +13,19 @@
#ifndef ARCH_CPU_H
#define ARCH_CPU_H
#define mb() asm volatile("mfence":::"memory")
#define rmb() asm volatile("lfence":::"memory")
#define wmb() asm volatile("sfence" ::: "memory")
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() barrier()
#define arch_barrier() asm volatile("" : : : "memory")
static inline unsigned long read_tsc(void)
static inline void rmb(void)
{
arch_barrier();
}
static inline void wmb(void)
{
arch_barrier();
}
static unsigned long read_tsc(void)
{
unsigned int low, high;
@ -32,21 +34,4 @@ static inline unsigned long read_tsc(void)
return (low | ((unsigned long)high << 32));
}
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#define smp_store_release(p, v) \
({ \
compiletime_assert_atomic_type(*p); \
barrier(); \
WRITE_ONCE(*p, v); \
})
void arch_flush_icache_all(void);
#endif /* ARCH_CPU_H */

View File

@ -0,0 +1,32 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#define DEBUG_RUSAGE
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
extern struct rusage_global *rusage;
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case PTL2_SIZE:
ret = IHK_OS_PGSIZE_2MB;
break;
case PTL3_SIZE:
ret = IHK_OS_PGSIZE_1GB;
break;
default:
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -50,7 +50,6 @@ struct x86_cpu_local_variables {
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);

View File

@ -1,4 +1,4 @@
/* elf.h COPYRIGHT FUJITSU LIMITED 2018 */
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#ifndef __HEADER_X86_COMMON_ELF_H
#define __HEADER_X86_COMMON_ELF_H
@ -56,3 +56,4 @@ struct user_regs64_struct
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
#endif /* __HEADER_S64FX_COMMON_ELF_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,94 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/*
* Structures and definitions for ELF core file.
* Extracted from
* System V Application Binary Interface - DRAFT - 10 June 2013,
* http://www.sco.com/developers/gabi/latest/contents.html
*/
typedef uint16_t Elf64_Half;
typedef uint32_t Elf64_Word;
typedef uint64_t Elf64_Xword;
typedef uint64_t Elf64_Addr;
typedef uint64_t Elf64_Off;
#define EI_NIDENT 16
typedef struct {
unsigned char e_ident[EI_NIDENT];
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry;
Elf64_Off e_phoff;
Elf64_Off e_shoff;
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr;
#define EI_MAG0 0
#define EI_MAG1 1
#define EI_MAG2 2
#define EI_MAG3 3
#define EI_CLASS 4
#define EI_DATA 5
#define EI_VERSION 6
#define EI_OSABI 7
#define EI_ABIVERSION 8
#define EI_PAD 9
#define ELFMAG0 0x7f
#define ELFMAG1 'E'
#define ELFMAG2 'L'
#define ELFMAG3 'F'
#define ELFCLASS64 2 /* 64-bit object */
#define ELFDATA2LSB 1 /* LSB */
#define El_VERSION 1 /* defined to be the same as EV CURRENT */
#define ELFOSABI_NONE 0 /* unspecied */
#define El_ABIVERSION_NONE 0 /* unspecied */
#define ET_CORE 4 /* Core file */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_K10M 181 /* Intel K10M */
#define EV_CURRENT 1 /* Current version */
typedef struct {
Elf64_Word p_type;
Elf64_Word p_flags;
Elf64_Off p_offset;
Elf64_Addr p_vaddr;
Elf64_Addr p_paddr;
Elf64_Xword p_filesz;
Elf64_Xword p_memsz;
Elf64_Xword p_align;
} Elf64_Phdr;
#define PT_LOAD 1
#define PT_NOTE 4
#define PF_X 1 /* executable bit */
#define PF_W 2 /* writable bit */
#define PF_R 4 /* readable bit */
struct note {
Elf64_Word namesz;
Elf64_Word descsz;
Elf64_Word type;
/* name char[namesz] and desc[descsz] */
};
#define NT_PRSTATUS 1
#define NT_PRFRPREG 2
#define NT_PRPSINFO 3
#define NT_AUXV 6
#define NT_X86_STATE 0x202
#include "elfcoregpl.h"
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,96 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/*
* Structures and defines from GPLed file.
*/
#define pid_t int
/* From /usr/include/linux/elfcore.h of Linux */
#define ELF_PRARGSZ (80)
/* From /usr/include/linux/elfcore.h fro Linux */
struct elf_siginfo
{
int si_signo;
int si_code;
int si_errno;
};
/* From bfd/hosts/x86-64linux.h of gdb. */
typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t;
typedef a8_uint64_t elf_greg64_t;
struct user_regs64_struct
{
a8_uint64_t r15;
a8_uint64_t r14;
a8_uint64_t r13;
a8_uint64_t r12;
a8_uint64_t rbp;
a8_uint64_t rbx;
a8_uint64_t r11;
a8_uint64_t r10;
a8_uint64_t r9;
a8_uint64_t r8;
a8_uint64_t rax;
a8_uint64_t rcx;
a8_uint64_t rdx;
a8_uint64_t rsi;
a8_uint64_t rdi;
a8_uint64_t orig_rax;
a8_uint64_t rip;
a8_uint64_t cs;
a8_uint64_t eflags;
a8_uint64_t rsp;
a8_uint64_t ss;
a8_uint64_t fs_base;
a8_uint64_t gs_base;
a8_uint64_t ds;
a8_uint64_t es;
a8_uint64_t fs;
a8_uint64_t gs;
};
#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t))
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
struct prstatus64_timeval
{
a8_uint64_t tv_sec;
a8_uint64_t tv_usec;
};
struct elf_prstatus64
{
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
elf_gregset64_t pr_reg;
int pr_fpvalid;
};
struct elf_prpsinfo64
{
char pr_state;
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_pid, pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,4 +1,5 @@
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017-2018 */
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifdef POSTK_DEBUG_ARCH_DEP_65
#ifndef _UAPI__ASM_HWCAP_H
#define _UAPI__ASM_HWCAP_H
@ -8,3 +9,4 @@ static unsigned long arch_get_hwcap(void)
}
#endif /* _UAPI__ASM_HWCAP_H */
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -13,8 +13,6 @@
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
#define HEADER_X86_COMMON_IHK_ATOMIC_H
#include <lwk/compiler.h>
/***********************************************************************
* ihk_atomic_t
*/
@ -116,7 +114,7 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, long i)
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
{
v->counter64 = i;
}
@ -126,22 +124,6 @@ static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
asm volatile ("lock incq %0" : "+m"(v->counter64));
}
static inline long ihk_atomic64_add_return(long i, ihk_atomic64_t *v)
{
long __i;
__i = i;
asm volatile("lock xaddq %0, %1"
: "+r" (i), "+m" (v->counter64)
: : "memory");
return i + __i;
}
static inline long ihk_atomic64_sub_return(long i, ihk_atomic64_t *v)
{
return ihk_atomic64_add_return(-i, v);
}
/***********************************************************************
* others
*/
@ -151,7 +133,7 @@ static inline long ihk_atomic64_sub_return(long i, ihk_atomic64_t *v)
* Note 2: xchg has side effect, so that attribute volatile is necessary,
* but generally the primitive is invalid, *ptr is output argument. --ANK
*/
#define __xg(x) ((volatile long *)(x))
#define __xg(x) ((volatile typeof(x))(x))
#define xchg4(ptr, x) \
({ \
@ -174,55 +156,43 @@ static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
return __x;
}
#define __X86_CASE_B 1
#define __X86_CASE_W 2
#define __X86_CASE_L 4
#define __X86_CASE_Q 8
#define __xchg(x, ptr, size) \
({ \
__typeof(*(ptr)) __x = (x); \
switch (size) { \
case 1: \
asm volatile("xchgb %b0,%1" \
: "=q" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 2: \
asm volatile("xchgw %w0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 4: \
asm volatile("xchgl %k0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 8: \
asm volatile("xchgq %0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
default: \
panic("xchg for wrong size"); \
} \
__x; \
})
extern void __xchg_wrong_size(void)
__compiletime_error("Bad argument size for xchg");
/*
* An exchange-type operation, which takes a value and a pointer, and
* returns the old value.
*/
#define __xchg_op(ptr, arg, op, lock) \
({ \
__typeof__(*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
asm volatile (lock #op "b %b0, %1\n" \
: "+q" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
asm volatile (lock #op "w %w0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
asm volatile (lock #op "l %0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
asm volatile (lock #op "q %q0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
default: \
__xchg_wrong_size(); \
} \
__ret; \
})
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
* Since this is generally used to protect other memory information, we
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around.
*/
#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
#define xchg(ptr, v) \
__xchg((v), (ptr), sizeof(*ptr))
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
unsigned long oldval,
@ -271,66 +241,4 @@ static inline unsigned long ihk_atomic_add_long_return(long i, long *v) {
return i + __i;
}
extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile uint8_t *__ptr = (volatile uint8_t *)(ptr);\
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile uint16_t *__ptr = (volatile uint16_t *)(ptr);\
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile uint32_t *__ptr = (volatile uint32_t *)(ptr);\
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile uint64_t *__ptr = (volatile uint64_t *)(ptr);\
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#endif

View File

@ -1,4 +1,3 @@
/* types.h COPYRIGHT FUJITSU LIMITED 2018 */
/**
* \file types.h
* Licence details are found in the file LICENSE.
@ -30,11 +29,13 @@ typedef uint64_t size_t;
typedef int64_t ssize_t;
typedef int64_t off_t;
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
#define NULL ((void *)0)

View File

@ -9,9 +9,6 @@
#ifndef __ARCH_PRCTL_H
#define __ARCH_PRCTL_H
#define PR_SET_THP_DISABLE 41
#define PR_GET_THP_DISABLE 42
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003

View File

@ -71,7 +71,7 @@
#define MSR_PERF_CTL_0 0xc0010000
#define MSR_PERF_CTR_0 0xc0010004
static inline unsigned long xgetbv(unsigned int index)
static unsigned long xgetbv(unsigned int index)
{
unsigned int low, high;
@ -80,7 +80,7 @@ static inline unsigned long xgetbv(unsigned int index)
return low | ((unsigned long)high << 32);
}
static inline void xsetbv(unsigned int index, unsigned long val)
static void xsetbv(unsigned int index, unsigned long val)
{
unsigned int low, high;
@ -90,8 +90,7 @@ static inline void xsetbv(unsigned int index, unsigned long val)
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
}
static inline void wrmsr(unsigned int idx, unsigned long value)
{
static void wrmsr(unsigned int idx, unsigned long value){
unsigned int high, low;
high = value >> 32;
@ -100,7 +99,7 @@ static inline void wrmsr(unsigned int idx, unsigned long value)
asm volatile("wrmsr" : : "c" (idx), "a" (low), "d" (high) : "memory");
}
static inline unsigned long rdpmc(unsigned int counter)
static unsigned long rdpmc(unsigned int counter)
{
unsigned int high, low;
@ -109,7 +108,7 @@ static inline unsigned long rdpmc(unsigned int counter)
return (unsigned long)high << 32 | low;
}
static inline unsigned long rdmsr(unsigned int index)
static unsigned long rdmsr(unsigned int index)
{
unsigned int high, low;
@ -118,7 +117,7 @@ static inline unsigned long rdmsr(unsigned int index)
return (unsigned long)high << 32 | low;
}
static inline unsigned long rdtsc(void)
static unsigned long rdtsc(void)
{
unsigned int high, low;
@ -127,7 +126,7 @@ static inline unsigned long rdtsc(void)
return (unsigned long)high << 32 | low;
}
static inline void set_perfctl(int counter, int event, int mask)
static void set_perfctl(int counter, int event, int mask)
{
unsigned long value;
@ -138,7 +137,7 @@ static inline void set_perfctl(int counter, int event, int mask)
wrmsr(MSR_PERF_CTL_0 + counter, value);
}
static inline void start_perfctr(int counter)
static void start_perfctr(int counter)
{
unsigned long value;
@ -146,7 +145,7 @@ static inline void start_perfctr(int counter)
value |= (1 << 22);
wrmsr(MSR_PERF_CTL_0 + counter, value);
}
static inline void stop_perfctr(int counter)
static void stop_perfctr(int counter)
{
unsigned long value;
@ -155,17 +154,17 @@ static inline void stop_perfctr(int counter)
wrmsr(MSR_PERF_CTL_0 + counter, value);
}
static inline void clear_perfctl(int counter)
static void clear_perfctl(int counter)
{
wrmsr(MSR_PERF_CTL_0 + counter, 0);
}
static inline void set_perfctr(int counter, unsigned long value)
static void set_perfctr(int counter, unsigned long value)
{
wrmsr(MSR_PERF_CTR_0 + counter, value);
}
static inline unsigned long read_perfctr(int counter)
static unsigned long read_perfctr(int counter)
{
return rdpmc(counter);
}

Some files were not shown because too many files have changed in this diff Show More