Compare commits

...

1321 Commits

Author SHA1 Message Date
0de6c6b8f9 spec: prerelase 0.91 for testing removal of mcexec -n option
Change-Id: I2b18b5fefec570bfb7a4aa0823fe97d9ea93e208
2020-08-12 13:12:06 +09:00
5ffad78b87 mcexec: use FLIB_NUM_PROCESS_ON_NODE when -n not specified (Fugaku specific)
Change-Id: I1668fecfac692d56076dd10e6e03fbf992e323ec
2020-08-12 07:30:11 +09:00
542418b1fc spec: prerelase 0.9 for testing libdwarf related package requirements
Change-Id: Iaaa116018505c4f89813883f5a99c8194cb4f99e
2020-07-29 12:22:08 +09:00
b95a2fcfab spec, README.md: fix libdwarf related package requirements
Change-Id: I460d440e33d0ff5e8ab3d4f7b328f7f2ea11bc16
2020-07-29 12:08:04 +09:00
1b11496f26 spec, README.md: add package dependency including libdwarf
Change-Id: Ie612c5dc642a9f5d6d2ba31747adb991cb568113
2020-07-22 06:59:37 +00:00
7c0e624b13 spec: prerelase 0.8 for testing mcexec -n issue
Change-Id: Ie54f7bc74097c8390f75ddbd0d6e58a8ea87ea7c
2020-07-21 13:31:45 +09:00
0b66bab992 Revert "mcexec: detect mismatch of mcexec -n and mpirun -ppn"
This reverts commit 1d135492c3.

Conflicts:
	executer/kernel/mcctrl/control.c

Change-Id: I224cced408aa4b77691a153c5e1d2fdf8043fa04
2020-07-21 13:08:21 +09:00
63ed4e7af0 spec: prerelase 0.7 for testing hugetlb map for stack
Change-Id: I4997340cd984ca8915e45749b91b1d72c1de85af
2020-07-20 08:11:40 +09:00
d7cf39883f Revert "shmobj: Support large page"
This reverts commit 9a60997ea0.

Change-Id: Id60959b4e03451987239faa0bbc2e780b72fafaa
2020-07-19 12:53:45 +00:00
40f8091fab stack: grow on page fault
The steps of the technique to replace stack with hugetlbfs map are as
follows:

(1) Prepare a hugetlbfs map with the size of rlim_cur
(2) Copy the active region of the stack to the hugetlbfs map.
    The range to copy is determined by reading /proc/[pid]/maps.
(3) Replace the stack map with the hugetlbfs map

The step (2) tries to copy a huge region if McKernel doesn't grow the
stack at run-time.

Change-Id: I5858c35b5c26dd0a42cccf9e3cc4c64b1a81f160
2020-07-19 12:53:31 +00:00
a20e1acf01 syscall: add prlimit64
Change-Id: Iad882813d54b439c236c0df74dc81508190e6707
2020-07-19 21:52:46 +09:00
b3d7bbda56 rus_vm_fault: compat: RHEL-8.2
This applies the following patch:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396
mm: change return type to vm_fault_t

Change-Id: I7189fc92824d21b4906f1033f1de5899bbad4680
2020-07-15 13:02:32 +09:00
9a60997ea0 shmobj: Support large page
Mixing page sizes is allowed by shmobj.

Change-Id: Ic48b71da2db6ce3f68fa3dbc8ad5ae96347d6018
Refs: #1381
Refs: #1458
2020-07-15 03:50:56 +00:00
4b66373813 mcexec: Don't forward SIGTSTP SIGTTIN SIGTTOUT to mckernel
Change-Id: I72bb74d6b98e1f0bf519c8f0fef742624a2a699a
Refs: #1425
2020-07-14 08:34:11 +00:00
b44b11ace7 set_robust_list: Add error check
set_robust_list is not supported by McKernel.

Change-Id: I1f679e2e4df24139cceb1f2294bc072cb7956002
Refs: 1399
2020-07-14 01:06:49 +00:00
ebc91cea0e tgkill: Fix argument validatation
Formerly, if tgid is specified as -1, tgkill() was equivalent to tkill().
Now it is treated as an error EINVAL.

Change-Id: I47bc75d439662a36dc6167c4446a5277422de507
Refs: 1380
2020-07-14 01:03:47 +00:00
58106d791a struct process: fix type of group_exit_status
Change-Id: Ib8492cbb077106cef1d0fa2d6d5e8e13bbb209c0
Refs: #1377
2020-07-13 08:33:07 +00:00
56b51d4f97 spec: prerelase 0.6 for testing cpuinfo and mmap overcommit
Change-Id: Iab5acc2c08ebe19251c37782cff87a4b5c914448
2020-07-13 10:14:23 +09:00
bafe540d86 mmap: allow unlimited overcommit
Change-Id: Iba07b5c504b4a202cd163ce682f3fc72a31284a0
2020-07-10 14:52:57 +09:00
d78a0fd05d sysinfo: support basic entries
Change-Id: I27f3e55058cc29f895831a1dddfafbc8585746a5
refs: #1389
2020-07-10 14:51:25 +09:00
999bc91b4f arch: Move some functions from arch-dependent to common part
Moved syscall rt_sigaction and functions related to signal.

Change-Id: I39f619e008d9c6018d91099a76dfb30e48757673
Refs: 1487
2020-07-10 03:54:28 +00:00
b3bd2ea9b3 procfs cpuinfo: use sequence number as processor
Change-Id: Id54ea74c5fda198a0bb9c9b6a19e6799fee0ed3f
2020-07-09 13:10:08 +09:00
d3d9e2400d test: ihklib: syscall_list.h: add robust marker for patch
Change-Id: Ie5f72b4b296db4d44e9839f38fd9a68854be78c3
2020-07-06 16:25:11 +09:00
199407b2a1 spec: prerelease 0.5 for testing ppoll
Change-Id: I51deb1c1703a986ba0aa4e02da9f53009554dbb7
2020-07-01 08:49:08 +09:00
5973d66e2d Revert "epoll_wait(): make sure to schedule in offload"
This reverts commit 5e44c9c9f9.

Change-Id: I826336f1ece31a84072c3e62c6c6c68a641e8fb5
2020-06-30 17:11:26 +09:00
d7ef74659b Revert "epoll, ppoll: deschedule on offload, don't do it when exiting system call"
This reverts commit d4056acfc3.

Change-Id: I7df15b9d3957ca571f4b4e2d576799f8b97ae299
2020-06-30 17:11:23 +09:00
ac86affecc mcexec: fix FLIB_AFFINITY_ON_PROCESS mask for McKernel CPU numbers (Fugaku)
Change-Id: If42b139fb53866bcff0809d898d4a2a712946f0c
2020-06-30 16:29:03 +09:00
2026cf8dad mcexec: explicit CPU list in partitoned execution (for Fujitsu's FLIB_AFFINITY_ON_PROCESS)
Change-Id: I05c11f73553de8ccb5f79083ce2115ac57e62584
2020-06-30 16:29:00 +09:00
1d135492c3 mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: I0c42e3119143da40ea2e69cd9ec99bde78a0ad2a
Refs: #929
2020-06-30 16:28:08 +09:00
1cfc5ca71f spec: prerelease 0.4 for testing cross-compile
Change-Id: I26908b6b415483711f55338e45d7b2d862b5c028
2020-06-23 08:34:10 +00:00
7ee533d620 spec: remove unnecessary mcinspect*.debug file
Fixes: 612f364 "spec: include recently added debug tools"
Change-Id: I29779132567d18f9468e3cecf2c713ad1c51729b
2020-06-23 08:34:10 +00:00
28334c7a29 cmake: treat libdwarf as required library when cross-compiling
Change-Id: I23ffb46c867b05de0e732c96912d62c630ebb44c
2020-06-23 16:18:35 +09:00
697e9386b3 cmake: fix resovling dwarf.h
Fixes: 0e787b7 "cmake: fix resolving libdwarf"
Change-Id: Iccb491c8ad07db0f15f6b1798ee8a91edc808cf7
2020-06-22 13:33:50 +09:00
0e787b731e cmake: fix resolving libdwarf
Change-Id: I14573f1ac7d779b4c90ed44cc310d4f584374559
2020-06-19 17:24:21 +09:00
612f364e6a spec: include recently added debug tools
Change-Id: I0318fe3551a75c7da774d26bc834c099bb235b67
2020-06-19 13:37:52 +09:00
ceee4c379f spec: prerelease 0.3 for testing fixes related to Fujitsu TSC and ihkmond
Change-Id: I4b9fcac086a3567e6e797f3e7515949c9e214c36
2020-06-18 16:23:43 +09:00
36c981bc34 sync with ihk
Change-Id: I052394121016a030d8873296b4a17b1f038d6b13
2020-06-18 16:23:43 +09:00
fd941dad44 Revert "procfs cpuinfo: use sequence number as processor"
This reverts commit bb7e140655.

Change-Id: If0c1719986706511c1e57d06bc61923d1adfc0aa
2020-06-16 13:26:55 +09:00
5f5b9f94d1 Revert "get_one_cpu_topology: Renumber core_id (physical core id)"
This reverts commit 0a4e6b49b4.

Change-Id: Icd9f2cda63d0daf661a40b146c72608b82cf2061
2020-06-16 13:26:55 +09:00
3f3c4acd71 madvise: do nothing (workaround for Fugaku)
Change-Id: Id2265e7eca4ae296dd22a8e99a2294a9a8b4c4dc
2020-06-16 13:26:54 +09:00
00007dafaa mbind: do nothing (workaround for Fugaku)
Change-Id: Id9d018304e18ed52ea7b0a872e03675c903bce6e
2020-06-16 13:26:54 +09:00
cbe2b2149d Revert "sysinfo, procfs: Support memory info partially"
This reverts commit 8f74888f87.

Change-Id: I65530dd8a4e1af2ca47cb02c02f5c54a9b4595a5
2020-06-16 13:26:54 +09:00
4cecde3fba Revert "mcexec: detect mismatch of mcexec -n and mpirun -ppn"
This reverts commit 72af689e69.

Change-Id: I25bc56cd8ac9c877852fc1092c8349fe318fd25d
2020-06-16 13:26:54 +09:00
8022a2a8c0 treat libfj90 as helper thread spawner (Fugaku specific)
Change-Id: I1f6170c7ebbfae4f575f13ac1f3106d292cd5b6a
2020-06-16 13:26:53 +09:00
3328ce03d9 Record pthread routine address in clone(), keep helper threads on caller CPU core (workaround for Fugaku)
Change-Id: I29d1589e430dc1396558cdf3df4d068c27173612
2020-06-16 13:26:53 +09:00
97b107f61c treat /var/opt/FJSVtcs/ple/daemonif/ as device file (Fugaku specific)
Change-Id: I047ec793a082f2fede3f2bd9c5fb358a30b8ea84
2020-06-16 13:26:53 +09:00
6f3be17c19 do_process_vm_read_writev: don't check vm_range (workaround for Fugaku)
Change-Id: I4ce9b5397ed876dff651c67658e43811d83658dd
2020-06-16 13:26:53 +09:00
dea7d00545 force allow_oversubscribe (workaround for Fugaku)
Change-Id: I5288f5ccbd967004fabbe71bca267feed3b9c2f8
2020-06-16 13:26:53 +09:00
4512778569 force time_sharing (workaround for Fugaku)
Change-Id: Ie3e3a0bbf00ef4e988bdee40d9d4dc93258dd4be
2020-06-16 13:26:52 +09:00
a7adb266ff mcinspect: add read memory value by specifying physical address
Change-Id: I2f2d6cb981e883c5e2ae1e0c764e10e0fec76a46
2020-06-16 13:26:52 +09:00
2566f4f213 devobj_free: don't report error on release-offload failure
Change-Id: I4179dab8cc46557a72eb3447ff0803743a1ba1a2
2020-06-16 13:26:52 +09:00
ac0081eddd handle_interrupt_gicv3: don't take runq_lock
To avoid dead-lock with the function taking the lock with
ihk_mc_spinlock_lock_noirq().

Change-Id: If689e8cc5fff81f627bcf98bfa7df7d4c13f4209
2020-06-16 13:26:52 +09:00
d4056acfc3 epoll, ppoll: deschedule on offload, don't do it when exiting system call
Change-Id: Ib1d0553ca5c50f4de055a1a5fe40b406c9c26dc7
2020-06-16 13:26:52 +09:00
1910543380 armv8pmu_write_counter: sign-extend properly
ihk_mc_event_set_period() calls armv8pmu_write_counter() by
cpu_pmu.write_counter(..., (uint64_t)(-left) & max_period)

Change-Id: I2ac8fbe5957db044ac54946f620163e3c486cb5f
2020-06-16 13:26:51 +09:00
6332903f0d Revert "xpmem: Support large page attachment"
This reverts commit a8696d811d.

Conflicts:
	kernel/include/process.h
	kernel/syscall.c
	kernel/xpmem.c

Change-Id: I726e74450f6228d3fc78fc62dda15b2067732a53
2020-06-16 13:25:57 +09:00
29d27b7c8d Revert "xpmem: Use correct process_vm in xpmem functions"
This reverts commit 0c63a2a3cd.

Change-Id: I7a67def6c45a67396b15cc55e96ffb5fc5898f28
2020-06-16 13:25:51 +09:00
7136384384 Revert "xpmem: Make sure vm_range is used under memory_range_lock"
This reverts commit 91ea69cf8f.

Conflicts:
	kernel/xpmem.c

Change-Id: Iff3eed010ad3610d63e165f53484ac56528ce384
2020-06-16 13:22:49 +09:00
2fe5c8de2e Revert "xpmem: Fix deadlock in xpmem_remove_process_memory_range()"
This reverts commit d052acab1d.

Change-Id: I31e982465ef9e0936145f27c8d1587c01737ec81
2020-06-16 12:13:49 +09:00
e774e1b984 Revert "xpmem: fix mapping of attachment and segment"
This reverts commit a5fcc91656.

Change-Id: If29415369d724391b291939ecce76482138e82f5
2020-06-16 11:28:02 +09:00
33b7414615 Revert "xpmem: map only resident segment pages at attach time (workaround for Fugaku)"
This reverts commit 3c646e2485.

Change-Id: Ibae8100403586775a32d6eb36c74383131066ac9
2020-06-16 11:27:59 +09:00
3c646e2485 xpmem: map only resident segment pages at attach time (workaround for Fugaku)
Change-Id: I50ac8ba88b208608206b68b4c57e278041913503
2020-06-16 09:17:26 +09:00
a5fcc91656 xpmem: fix mapping of attachment and segment
* Mapping attached part of segment is done at attach time instead of
  make time to work with runtimes (e.g. OpenMPI) xpmem_make-ing the
  entire user-space
* Mapping attached part of segment at attach time can be turned off by
  specifying xpmem_remote_on_demand in kernel argument
* Mapping attachment chooses appropriate page-sizes, i.e., largest
  allowed by memory range and segment page boundary

Fixes: a8696d8 "xpmem: Support large page attachment"
Change-Id: I44663865204036520e5f62fe22b9134ee4629f9b
2020-06-15 10:11:29 +09:00
d370e9241f Toggle preemption while faulting pages
Change-Id: I74201061bb3e7c7c4032e3884658ace87cb85948
2020-06-15 10:11:29 +09:00
3e254c06bf SCD_MSG_WAKE_UP_SYSCALL_THREAD: hold target thread through wake-up
Change-Id: I35b2c56f78430135b2d197d2a2cfe364dbd03947
2020-06-15 10:11:29 +09:00
07537cd2e7 eclair-dump-backtrace: expect script to dump backtrace on all CPUs
Change-Id: I358c5d5ca81903b0eaab88d227c36373164c0950
2020-06-15 10:11:29 +09:00
a37f72da0e futex_wake(): disable IRQs while iterating plist
Change-Id: I796794b2159816183c6487ef0048f42f97aac73b
2020-06-15 10:11:28 +09:00
ab11b168f0 ptrace_setoptions: debug msg
Change-Id: Iea5fdb26884c7af6e3d5aa26b5f71932f730cc9d
2020-06-15 10:11:28 +09:00
eac414d6d8 CPU read/write reg: use generic IHK messaging interface
Change-Id: Ia9637d1516d9329fdadf37822bfce7594d69105f
2020-06-15 10:11:28 +09:00
bb725f5f50 crash: print actual PTE in lookup mode
Change-Id: Ie2c1b97780347d6172ef8961ed62258117cbf115
2020-06-15 10:11:28 +09:00
5224551782 mcinspect: vtop (in progress)
Change-Id: I09f487e96edc7c4f59c97e6fb6dde28baf84c1e5
2020-06-15 10:11:28 +09:00
91146acfe5 Make struct ihk_os_rusage compatible with mckernel_rusage (workaround for Fugaku)
Change-Id: Iebae1e8b0aaf9c23cb1c9411aa1ad111b2e61028
2020-06-15 10:10:57 +09:00
f64731ab34 do_migrate: kick scheduler on target CPU
Change-Id: Ib5875ecf0c6a3118d32973329a6f1595a910562f
2020-06-15 09:58:55 +09:00
cd46cbd4b3 mcinspect and mcps: DWARF based LWK inspection
Change-Id: Ie9e209d8f77999b61afa39c38832bfc416a2c34f
2020-06-15 09:58:54 +09:00
39780917af libdwarf: compile locally if not present
Change-Id: I70d1f653f4fc4ee4daeaa2c9c6bdbf1416e43c9b
2020-06-15 09:58:52 +09:00
0f8f6d298e CMakeLists.txt: fail on missing libraries at config time
Change-Id: Ia7e4cf469d94f97fa1c565e59d2d4587f3a3d081
2020-06-13 17:18:10 +09:00
f8e8b21f04 /dev/shm: use Linux PFNs and populate mappings
Change-Id: I921c1f43c8411f896343be17e0ac6762a1bc26d1
2020-06-13 17:18:10 +09:00
5c2f9b8239 pager: prefetch all shared libraries
Change-Id: Ic62e1284d540362df817098b3926ac223245e3b6
2020-06-13 17:18:10 +09:00
1afc3d9b70 Keep track of number of context switches per CPU
Change-Id: I7a2194c8777a7efcd34e1ed7f4734da03fb4d433
2020-06-13 17:18:10 +09:00
17a8f68d60 set_timer(): treat spin wait as PS_RUNNING
Change-Id: Iea1ad5b0a49a12d5e1aef38ad68fccb8d789af5e
2020-06-13 17:18:10 +09:00
2b9a053504 syscall offload: avoid double IRQ enabling
Change-Id: I202c9f348b66672b1c9f8c146d4e28ec1d9c7658
2020-06-13 17:18:09 +09:00
6441aa1abb __sched_wakeup_thread(): check if timesharing needs to be enabled
Change-Id: I081d700f345abbbdb14dcac3b6246b79475d059b
2020-06-13 17:18:09 +09:00
9b55b68934 Allow other threads to run while waiting for I/O in page faults
Change-Id: I51e847a02a698b0ecf1e356d51599aa1c9400b15
2020-06-13 17:18:09 +09:00
83ef96a739 fileobj: disable IRQs while holding page hash locks, schedule() in I/O loop
Change-Id: Iaf72d55980f1a5df6c93c4a57fa57b0ae5b1d229
2020-06-13 17:18:09 +09:00
b5337358cf IKC: increase message queue sizes
Change-Id: Ib1eee4d26b8304cbee16fe50caabfc2c19e5c2e3
2020-06-13 17:18:09 +09:00
2db3717e57 handle_interrupt_gicv3(): check for CPU_FLAG_NEED_RESCHED as well
Change-Id: Id6ade08e4e572a6d837476de2872126442d3591c
2020-06-13 17:18:09 +09:00
5395891966 pager_req_map: fix printk
Change-Id: I98488169f02656c2df711b827d0002762de69f7a
2020-06-13 17:18:09 +09:00
c32a5e261b PF handler: print VM range's file path if available
Change-Id: I5ba55b19a0b874bc9f4b58e94bfc4afc440e6a8a
2020-06-13 17:18:09 +09:00
c0c80b71ca mmap and fileobj: handle MF_ZEROFILL properly
Change-Id: I6ee52b4cab212b1973339bc8d49065c1ec9263b0
2020-06-13 17:18:09 +09:00
d15a396d5a pager: use host physical for PMIx shared memory
Change-Id: Idfebc768ba03b5536a0e5eb1c6076769806fa7aa
2020-06-13 17:18:08 +09:00
e35ec09da1 UCX: fix page size for shared memory
Change-Id: I75b0beef8345b391e7619887765ed1a89d74c29b
2020-06-13 17:18:08 +09:00
5e44c9c9f9 epoll_wait(): make sure to schedule in offload
Change-Id: I435416cb0ac005a03cd995bf1aae75c9ce7b2082
2020-06-13 17:18:08 +09:00
0f6c36870c mcexec_syscall(): disable no per-process structure warning
Change-Id: I951575f0077054ebcfe4b3f7e29416799ab6ade8
2020-06-13 17:18:08 +09:00
2ec2112cc5 IKC: use atomic allocation during initialization
Change-Id: I5bb5d7040092d47e4cdbdad87f9d1dd5b2ceaee5
2020-06-13 17:18:08 +09:00
c86a38e18f physical memory: guard rbtree allocator with IHK_RBTREE_ALLOCATOR macro
Change-Id: I468c6bf1f641875c02b091704ef63f59fd390be5
2020-06-13 17:18:08 +09:00
6aa7b50e26 profile: refactor display code and fix ARM support
Change-Id: Ic48102c42abe17eed014f2bfe7523d0d6f03c2e9
2020-06-13 17:18:08 +09:00
c3c57940ba Memory ordering and usage of ASM cmpxchg() instead of compiler atomic intrinsics
Change-Id: I4dadebc32721744dad982f3fc5b3eea7ab7ca745
2020-06-13 17:18:08 +09:00
7aa2d64294 obtain_clone_cpuid(): avoid locking while partitioned execution
Change-Id: Iabb4784835be7dc9b2f555acc3a711fcc23ee7da
2020-06-13 17:18:08 +09:00
51fe77cdae mmap()/shmget(): use Linux huge page size when not specified
Fixes: 089b443 "mmap()/shmget(): use Linux default huge page size when not specified"
Change-Id: If8043a0993d1131ea0344aa6d500b35c7a291884
2020-06-13 17:18:08 +09:00
d5aafca1ae VM: use RW spinlock for vm_range_lock
Change-Id: Id4654084207d55bf77cc9f8b42795e0f9873cfa0
2020-06-12 03:07:33 +00:00
54b529c82d An arch independent RW spinlock implementation
Change-Id: I426d3f7b643660e6685b5c39c0ae849a9f08b9bb
2020-06-12 03:07:33 +00:00
232bc9c44b README.md: add how to checkout to specific branch or version
Change-Id: Ie727c266d576e601f4901e2f84b98c07ff49aa24
2020-06-11 18:45:52 -04:00
f34373d1c0 README.md: add how to install with rpm
Change-Id: Ic3c0ff6971686d6d64dfcdd5850ae4a70f05f40f
2020-06-11 04:38:08 -04:00
4698ae166c spec: prerelease for testing hugefileobj premap fix
Test target: a2adb0a4 "hugefileobj: rewrite page allocation/handling"

Change-Id: Ibbae5222f54704248911da9f53ca8e4675627bc4
refs: #1475
2020-06-11 04:22:48 -04:00
db9ca358f9 sync with ihk
Change-Id: I769880c52c8cfd06523cea8d77cce5703e783532
2020-06-11 13:51:44 +09:00
16a6a1d08b mcexec: Fix LD_PRELOAD string manipulation (again)
Fixes: 8cf70900 "mcexec: Fix LD_PRELOAD string manipulation"
Change-Id: I6e0188bd60f8e3977beb22c1f9212baf37f37093
2020-06-05 09:25:15 +00:00
2e2e973d78 hugefileobj: rewrite page allocation/handling
* manage pages by an array
* fix mmap of fd created by memfd_create() populates the map
* refactor pgsize and pgshift handling

Change-Id: Icaf015b10afc35f2b95f93059adf1a1b6b92e14e
refs: #1475
2020-05-19 23:36:25 -04:00
c3c0b7197f test: perf: prevent overflow counter from stopping counter
Fixes: 1a204b6 "perf: overflow test"
Change-Id: I4d8e93b97f7a8d58ef7811f55b5c995b16c5af69
2020-05-14 01:10:14 +00:00
d086100b35 perf: REFRESH: Don't perform perf_start
Change-Id: I70194467d357770f982d90a6f9b132a61a817fc5
2020-05-14 01:09:52 +00:00
8f74888f87 sysinfo, procfs: Support memory info partially
Change-Id: I597dae4f82d64d3f23889cef960db18ae879ff06
refs: #1389
2020-05-14 00:53:25 +00:00
8e42c2a254 README.md: Add description of Utility Thread offloading Interface (UTI)
Change-Id: Ibeb6e6b91e5f280214e7f78049b6f35e648198c7
2020-05-12 14:14:08 +09:00
caf0f5ef63 cmake: do NOT install crash plugin sources
Fixes "Installed (but unpackaged) file(s) found" rpmbuild error.

Fixes: 04d17dd3 "Define MAP_KERNEL_START by resolving MODULES_END at cmake time"
Change-Id: I80df58ac3c581faf1c48080115b70724eac6aea5
2020-04-20 18:51:15 -04:00
3d030391e8 spec: Update version number to 1.7.0rc4
Change-Id: I1c999cfa632711195a9c8ec9de769075292c40b9
2020-04-17 11:57:53 +09:00
0aeab6b840 NEWS.md: Add 1.7.0rc4 updates
Change-Id: I66ccbe5e8454482155243b89d9b0398994186010
2020-04-17 02:43:56 +00:00
367bbda713 mcexec: Fix resolving library path for LD_PRELOAD
Fixes: 8ee1d61d "Revert "Detect hang of McKernel in mcexec""
Fixes: b87ac8b8 "reproductible builds: remove most install paths in c code"
Change-Id: I8ef9ab81cd0a41ccd0e227ebc3e45c0745c150e9
2020-04-16 20:46:46 +09:00
0082447043 mcctrl_get_request_os_cpu: Fix debug message
Change-Id: I0d2ae427b97b7284d61dd13825d4ba3d2130f26a
2020-04-16 07:44:36 +09:00
4f50c90f6e __mcctrl_os_read_write_cpu_register: Range-check cpu number
Change-Id: I9ef991e1f0a7e301430586c261bf55bf73a4bae9
2020-04-16 07:44:36 +09:00
79950e045e eclair: Improve error message
Change-Id: Ib8fe3df0a529a17a2e331b16cf396915ab6a3eb2
2020-04-16 07:44:36 +09:00
6cf7cebb2d __mcctrl_control: Check user privilege
Change-Id: Ia87ab241f980ea25df805bd31d66f07bf3681311
2020-04-16 07:44:36 +09:00
c9f05f238d Remove unused IHK_OS_STATUS_STOPPED
Change-Id: I4aad8dac06b79a85ca8951cc26c40981c64262bb
2020-04-16 07:28:20 +09:00
f1caaa9b74 freeze: arm64: use normal interrupt instead of NMI
Fixes: 55faba7 "dump: rewrite NMI handling (for resume) and fix PANIC register saving"
Fixes: ff982b8 "freeze: change freeze-thaw to normal interrupt"
Change-Id: I9445cac191f91d20357cae11b2839e4e9384ac6f
2020-04-15 01:04:20 +00:00
97cd379ee2 mcctrl_os_shutdown_notifier: Move wait for running state to ihk side
Change-Id: I363391c63d92d952fc9a60c1e88f964eb50687fd
2020-04-15 00:54:00 +00:00
8ee1d61d0f Revert "Detect hang of McKernel in mcexec"
Change-Id: Ie8a0cf725f84a2f5d85da8b8fb15b30a826ddfcb
2020-04-15 00:50:55 +00:00
04d17dd3e9 Define MAP_KERNEL_START by resolving MODULES_END at cmake time
Change-Id: Ib88fc045b64c4ad2dad6a4b13cb0372a735a26ab
2020-04-09 00:30:05 -04:00
33eef71133 spec: Update version number to 1.7.0rc3
Change-Id: Id07122ececb562ecb4e4cf91e4983b8273c96b34
2020-04-09 00:06:17 -04:00
c10b4a1c16 spec: fix mckernel-devel package
Also fixes kernel-rpm-macro package resolution issue.

Fixes: 6d584fea "spec: Add mckernel-devel package"
Change-Id: Ide286753c89c3b931665f53dd8270427b19b39eb
2020-04-08 00:25:43 -04:00
8cf70900e7 mcexec: Fix LD_PRELOAD string manipulation
To suppress compiler warnings.

Change-Id: I4d6b5ce2d2a8fca3f2675a7fc309df40cfe3c04b
2020-04-01 01:18:10 -04:00
b2618a98f5 madvise: Support MADV_DONTDUMP and MADV_DODUMP on anonymous map
Change-Id: I231b62ed6803b797ec749ac70a66cdf8236204bd
refs: #1373
2020-03-23 13:06:26 +09:00
01d06cb218 madvise: Add locked-page check to MADV_REMOVE
Change-Id: I95465ef11aa4c772ad0ecf5d25f757192f31b93b
refs: #1372
2020-03-23 13:06:26 +09:00
c78803ac08 madvise: Support MADV_REMOVE on tmpfs
Change-Id: Ic99d374c4d2630944c7bc838937d7f45601783c6
refs: #1371
2020-03-23 13:06:26 +09:00
3300e65efc madvise: Support MADV_WIPEONFORK, MADV_KEEPONFORK and MADV_NORMAL
Change-Id: I1d4cf5affa580d7304dfdc34fa4f1707c0df617c
refs: #1374
2020-03-23 09:13:01 +09:00
d82ac31bc6 faccessat: Specify AT_SYMLINK_NOFOLLOW only when necessary.
- Specify AT_SYMLINK_NOFOLLOW in faccessat only when
   the symbolic-link is analyzed by overlay_path().

Change-Id: Ie3b1f7fedef7441fd4b39c5c8b2ef0f73cba770e
Refs: #1370
2020-03-20 00:22:50 +00:00
4946fbdd82 Fix "test: runq_lock and over-scheduling fix."
Change-Id: Iedd3b94d6ecd52b9ee67cc9b8a75735428c9fd84
Refs: #1400
2020-03-19 23:34:40 +00:00
33cba1ad48 test: ptrace: Record syscall return value before reporting
Change-Id: I8e9de3bb9bfa0b07eebe472131cc62b53ef5cc8b
Refs: #1287
2020-03-19 23:31:48 +00:00
7c69cfaf67 set_host_vma(): do NOT read protect Linux VMA
Change-Id: Id1e84464c9a06a3886b9cb16b35b1f2dda3c4c30
2020-03-19 02:15:29 +00:00
b3cbdeec84 Fix memory leak when a child exits without wait()-ed
Change-Id: I8ad9e20e3f3e6f406548a6c4de2bf4dc07c40b0e
Refs: #1349
2020-03-16 04:26:54 +00:00
1d1ec39a27 exec: Correct wrong "=" to "+=".
Change-Id: Iec8c1bb7a12ad7f2e1d4ac07c75482e4d86a0ea2
Refs: #1382
2020-03-16 04:16:03 +00:00
0a4e6b49b4 get_one_cpu_topology: Renumber core_id (physical core id)
Change-Id: I4e4857e9a063d16d19d73adfabfc18a4b461bbfb
Refs: #1439
2020-03-12 05:19:25 +00:00
bb7e140655 procfs cpuinfo: use sequence number as processor
Change-Id: Idbfa48e9b60c03495d7ba72e962c55f0ffb8bec9
2020-03-12 05:19:25 +00:00
32b32f0c4a eclair: query phys memstart on arm64
Change-Id: I32db1153f5c1e4a217db69d8d55f0d0ccfa07c77
2020-03-12 10:53:41 +09:00
bf7fd81c1b Fix includes to handle module ref counter properly
Change-Id: If3f067a14e40c346f0455f8bfb8bbc8ab2934e88
2020-03-12 10:24:01 +09:00
92d191de9e xpmem: handle size 0xffffffffffffffff
Change-Id: I04fbe21966f8a831337576a14119afefe8a2ea4f
2020-03-09 16:26:09 +09:00
baf68f7e71 mcreboot: fix ETCDIR path (cmake 3.14.5 prepends etc)
Change-Id: Ib449ef294ddaf4a4d050d705fd05b8ede8b8150d
2020-03-09 07:21:10 +00:00
26bebb2749 sched_request_migrate(): fix race condition between migration req and IRQs
make sure the caller thread holds migration queue lock with IRQs disabled
until it notifies the target CPU so that an interrupt can not deschedule
it in the middle of the request.

Change-Id: I85995018ca1e8478ccc9723985b6e8efc9c3acfb
2020-03-09 07:05:15 +00:00
9e2196c9ce fix: memory leak due to forced termination during startup
Change-Id: Ide519f01702bfd17ae4576e04806b6d155ae846a
refs: #1397
2020-03-09 01:10:38 +00:00
93581cb142 test: runq_lock and over-scheduling fix.
Change-Id: I236ab585403076d716be350c8b51e8d352122f2b
Refs: #1400
2020-03-05 15:57:57 +09:00
67f5a1d4e0 migrate-cpu: Prevent migration target from calling schedule() twice
Symptom:
A thread could call schedule() twice.

Cause:
 (1) The migrator raises rescheduling flag
 (2) The thread calls check_need_resched() for other
     reason than the migrate IPI, e.g, response to system call
     offload. And it finds that the flag is set and it's trying to
     call schedule().
 (3) The thread is interrupted by the migrate IPI and it finds that
     the flag is set and calls schedule() in the interrupt context.
 (4) The thread resumes the execution and call schedule()

Solution:
 (1) Reset the rescheduling flag when checking it and it's set
 (2) Set it again if it's decided not to call schedule()

Change-Id: I5376662d0b02ca4ebb29b42732e347f3b82d766d
Refs: #1400
2020-03-05 15:51:28 +09:00
edf7b36669 runq_lock: Fix deadlock due to cpu migration.
Symptom and analysis:
runq_lock of the migration source is acquired on
the migration destination CPU.

This happens in the following steps:
 (1) The thread stores value of cpu_local_var(runq_lock)
     to its register when trying to perform
     ihk_mc_spinlock_lock() on the lock variable.
 (2) The thread takes IPI and migrates to another CPU.
 (3) The thread resumes execution and acquires the wrong lock.

Solution:
* Disable interrupts before getting the value of
  cpu_local_var(runq_lock)

Change-Id: Ia0ea450b97f872dd6116252537e4a79f85adfc88
Refs: #1400
2020-03-05 01:51:40 +00:00
1a204b6674 perf: overflow test
Change-Id: Ic7aa0d99ae9a5b7d3ce4436129a360275e6937ca
refs: #1358
2020-03-03 15:55:13 +09:00
305511b48f perf: accumulate counter in overflow handler
Change-Id: If5f5a913e0fde889d1835ffb16c19ea0ad5e685a
2020-03-03 13:23:30 +09:00
606db376fd perf: fix perf_reset
Change-Id: I98122b0f9866bc1cc8713e7bd46fa879917ac6a0
2020-03-03 13:23:30 +09:00
5719b4c64a perf: update event structure
Change-Id: I5bc0fdd42db509b5d2daca7d97e29ad1f7d11f1a
2020-03-03 13:23:30 +09:00
343121c3d0 perf: set event period
Change-Id: Ibf569de7af8697e766c10b8d70905b8cdc4df083
2020-03-03 13:23:30 +09:00
86c45484e3 perf: add struct hw_perf_event
Change-Id: I0938e2b18064ad805a9edb6e15d26cf438bf0a59
2020-03-03 13:23:29 +09:00
767792808a perf: change count variable type to ihk_atomic64_t
Change-Id: I2bb6fab2c040683830b44fa6b963a86a233b883a
2020-03-03 13:23:29 +09:00
117f070fd6 perf: fix PERF_EVENT_IOC_REFRESH
Change-Id: Ia5d3fbe344346aabc3b5d40a801b3c21cfbaac97
2020-03-03 13:23:29 +09:00
a27909be88 ihk_atomic64_set argument to long
Change-Id: Ie9b5978028000236ae5846214a2ea14fcdffaf56
2020-03-03 13:23:29 +09:00
cec6f24559 PMU register support for cpufreq driver.
Change-Id: I11462d25ef83867ddf2e643798d1e3d0257f7f33
2020-03-02 07:14:27 +00:00
b3b8283f87 Add NEWS.md
Change-Id: Iecf193e3d5dac57f87ef8db2f43add5fb99f6a6e
2020-02-27 06:13:25 +00:00
d62f80a7c0 spec: Prevent rpmbuild from including build-id directories into package
Change-Id: Ie935d684eed3780f79f29a588233f5ab54a5f5d7
2020-02-25 10:44:08 +09:00
6d584feaef spec: Add mckernel-devel package
Change-Id: I51e9b88ed18b5a0662d1d77e344b84cb14e2189e
2020-02-25 10:44:08 +09:00
e2e015e120 spec: Remind that kernel-rpm-macros is no longer included in kernel-devel in RHEL-8
Change-Id: I4fb6a2d5f9114d9947b0eb848a21f772a2bece5e
2020-02-25 10:44:06 +09:00
5fb3abe87b spec: Relax Linux kernel version requirement for RHEL-8
Eliminate the need for rebuilding rpm for every RHEL-8 errata release.

Change-Id: I483c22d0b578809117a4f56881b11e51fcc608a7
2020-02-25 10:42:19 +09:00
37fd9e0cd2 test: rt_sigtimedwait: Add test cases for SIG_IGN and real-time signal
Change-Id: I4abafe73d81cfa77167289477ea8c5af701e7f2e
Refs: #1378
Refs: #1440
2020-02-20 04:31:08 +00:00
7e748b4ecb rt_sigtimedwait: could not wait for realtime signal
Change-Id: I341d2f0c9657c3b14eae89dddba074b68c654a12
Refs: #1440
2020-02-13 06:23:22 +00:00
cafb46efc7 rt_sigtimedwait: could not wait for ignored signal
Change-Id: I0f5a8e2eaae2b7c08a01f4ebb2c405b8972269a2
Refs: #1378
2020-02-13 06:23:22 +00:00
41ea9d16c4 mremap: Fix to work correctly when old_page is large_page
Change-Id: I5a589383644a8098d910e49cd7ade6df325e0366
Refs: #1383
2020-02-13 06:15:25 +00:00
4bbdee395e ptrace: fix execve and return value handling (fixes strace on aarch64)
Change-Id: Icb5cb7f7e99fdb74a8628bc6b550688df5fb056b
2020-02-10 07:45:06 +00:00
597baf8445 eclair: support for live debug
Change-Id: Ia9bc126e198ba4a80722529ce09de5eb0775d429
2020-02-10 07:45:06 +00:00
55faba77a5 dump: rewrite NMI handling (for resume) and fix PANIC register saving
Change-Id: I360e9aa8efa64b6ebd99b209a5dd4ee0dc7806cf
2020-02-10 07:45:01 +00:00
6bef773741 eclair and ldump2mcdump: obtain PHYS_OFFSET from dump_mem_chunks
Change-Id: I5dd5f9e7e6b5817e50b0a1855b67f163d3029f17
2020-02-10 07:42:23 +00:00
7882110e9f eclair: obtain MAP_KERNEL_START from kernel image
Change-Id: I946c640ddb2e2b32362760254a86c611517becf3
2020-02-10 07:16:06 +00:00
d1df17ffb7 eclair: fix register GDB response for descheduled threads
Change-Id: I0001d094b624bc03f2b178ec28a4cab51e2acaf0
2020-02-10 07:16:06 +00:00
72af689e69 mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: Iaf5cfb11c37bea6957b77a2114f783e9a46a48f2
Refs: #929
2020-02-05 06:39:57 +00:00
153d0609de ihk_os_{read,write}_cpu_register: Add async support
Change-Id: Ia2a2098550e856eeffbb20d8d0e0bcd57b85b6d7
2020-01-31 12:40:43 +09:00
83bbb87a0f mbind: fix processing when new range ovarlaps existing range(s)
Change-Id: I240a0205f0d836e4ff1a16b6739a3b366543bc06
Refs: #1384
2020-01-23 11:27:15 +09:00
f00d03445c epoll_pwait, ppoll, pselect: add to process sigmask
Change-Id: I6aa1db3b4c6ad81a8b5926fa87fc645269b103b6
Refs: #1361
2020-01-09 06:54:23 +00:00
911b07f507 fix: fork's race-condition caused by child and grand-child
Refs: #1329
Change-Id: Ia2d7641d1203f40155fef5db718d1bb2c583c1c5
2020-01-09 06:33:13 +00:00
5b26fe2956 do_process_vm_read_writev(): access local vector buffer using kernel virtual, PF if necessary
Change-Id: Ic90dca79e32d4151f585a5cbd5b2c7710534db0e
2019-12-23 02:54:52 +00:00
1db00ebc04 release_process_vm: free vm_range_numa_policy
Change-Id: I8084cd60a12b557b635b8e350f70d4e4f95d4c52
Refs: #1101
2019-12-20 07:12:16 +00:00
d5de68e97b eclair and crash: clean up architecture dependent codes and comply with Linux page_offset_base
Change-Id: Ie14ceb8bc9d816a9201dddd4020e2c21d6cfd686
Fujitsu: POSTK_DEBUG_ARCH_DEP_34
2019-12-18 01:53:29 +00:00
1526237bc6 x86 memory: use page_offset_base from linux
rhel 7.5 and later kernels have a page offset that is no longer
necessarily 0xffff880000000000, leading to kernel panics if we
use the wrong address

Change-Id: I3572fde1c31303a937855c23fbd3815ce0f96c64
2019-12-17 08:05:38 +00:00
b8d96a74ce Fix "arm64: Opt-out NMI for ThunderX2"
Change-Id: I95fabd17bfbae32320ed9e7a520c12e6f9527351
2019-12-17 14:48:10 +09:00
3c256e1a6c overlay: getdents: support lseek
Refs: #1421
Change-Id: Ife7ab1b50159a5897552ff695bb001ada27ec934
2019-12-13 03:49:20 +00:00
7fc4272b89 handle execveat systemcall on McKernel
Refs: #1366
Change-Id: I921e04a0df8d0d798fc94f675e5112dd2fec190a
2019-12-06 09:33:13 +09:00
d052acab1d xpmem: Fix deadlock in xpmem_remove_process_memory_range()
Refs: #1330
Change-Id: Ib62e3a7fe2811577ba8cabf174f64827e65c422c
2019-12-06 09:32:51 +09:00
91ea69cf8f xpmem: Make sure vm_range is used under memory_range_lock
Refs: #1330
Change-Id: I87a0d6042a2c388fbd260d8dff5d109106478872
2019-12-06 09:32:28 +09:00
0c63a2a3cd xpmem: Use correct process_vm in xpmem functions
Change-Id: I94c06ec69d0fe1e07d0b14bb44b448bbc63b9b63
2019-12-06 09:31:16 +09:00
a8696d811d xpmem: Support large page attachment
Change-Id: I4d672eee1c905160ece204d278f0afd9b6d7dc01
Refs: #1259
2019-12-06 09:30:51 +09:00
569dc33a9c mmap: fail and set -ENODEV when map to unmappable special file
mappable special files are /dev/mem and /dev/zero

Change-Id: Id1d4317104f901644e565007913e320d287e376f
2019-12-05 07:22:17 +00:00
4b252a990f SIGCONT: don't terminate process
Change-Id: Ib959a9e5341fda37bd055724ecb9319a469b7420
Refs: #1410
2019-12-05 07:13:56 +00:00
adb6cce3ce The process sending SIGCONT resumes the stopped process.
Change-Id: I64ee10172b99aa58540ffe8e9dd80fa0a64f4d01
Refs: #1420
2019-12-05 07:13:56 +00:00
ed21b6849d procfs: if memory_range_lock fails, process later
Change-Id: I3c5f24548455a63d8d5a4482f5081347f631885a
Refs: #452
2019-12-05 07:08:13 +00:00
37605740a4 support for backlog
Change-Id: Id8f503234e7afaa284e6b97dc264eb3a2af145c7
2019-12-05 07:08:13 +00:00
e069694c12 mem: Fix condition of whether in McKernel
Refs: #1324, #1329
Change-Id: I72bd69dbe65928f083b24513d50d29cabf3d6dff
2019-12-02 03:12:29 +00:00
dca1cb2625 arm64: Opt-out NMI for ThunderX2
Change-Id: I064da55e7e09e6d248c92ece5c56f9a9770c84a0
2019-11-28 02:22:55 +00:00
caac060684 mcctrl_getrusage: Round up cpuacct_stat_{system,user}
Change-Id: Ic1a236865fb3224dc9716c40a1eeb279c1fa1d70
2019-11-28 02:21:47 +00:00
d330721421 Rename struct cpu_topology to mcctrl_cpu_topology
To use a different name than the name in Linux kernel.

Change-Id: I44d10279195dfc9cfdc4788914b7d65b78292921
Fujitsu: POSTK_DEBUG_ARCH_DEP_40
2019-11-28 02:21:13 +00:00
157eeca41a README.md: Add contact
Change-Id: I3b038780ce91325151dfaef806e43eaaf71fe7e7
2019-11-28 02:09:41 +00:00
8ba725b225 mcstop+release.sh: Continue when releasing CPUs failed
Change-Id: Ib947843006ae9caa602e7b55309e68365edf4b2a
2019-11-28 02:09:01 +00:00
a563d780c1 munmap: fix deadlock with remote pagefault on vm range lock
Add similar protection to clear_host_pte than to set_host_vma (see #986)

Also make the page fault handler only skip taking lock if the munmap
happened on the same cpu id

Change-Id: I6d9e68e8f8905b20bb2ccfa72848e04fe6404ab6
2019-11-28 02:07:45 +00:00
621533bbd3 Add ENABLE_PERF macros so that perf support can be toggled
Change-Id: Ic50c8b329af63e63579b6a60b9557344100eaac4
2019-11-26 09:15:05 +09:00
37ea770f8c mmap: Round up map size by pagesize when specified MAP_HUGETLB
To match the behavior of Linux.

Change-Id: I7bcc2cb3c1e678ffc28f6b825c7a55032441dded
2019-11-14 07:24:25 +00:00
edd3ea0103 Revert "memory_range_lock: Enable interrupt when trylock fails"
This reverts commit 0d3ef65092.

Reason for revert: This fix causes circular dependency with memory_range manipulation and TLB flush. See #1394.

Change-Id: I4774e81ff300c199629e283e538c0a30ad0eeaae
2019-11-11 15:28:08 +09:00
41d37bcd30 mcstop+release: argument for rmmod path specification
Change-Id: I80e4e7136a90bc65050ab8f7d39615581c47f317
2019-10-03 13:58:20 +09:00
309145587f perf_event_open: Add support for counting REF_CPU_CYCLES
Using thread's tsc count instead of performance counter

Refs: #1025
Change-Id: I1d7a18f1c52f1d52087002d31818638a6b206014
2019-09-26 07:38:04 +00:00
bc06d68d84 sigsuspend: Make sure receive correct sigevent from do_kill
Change-Id: Ife9cf36a81f353e0575f6802f1e56f7dd4cb0425
Fujitsu: POSTK_DEBUG_TEMP_FIX_33
Refs: #1350
2019-09-26 07:34:34 +00:00
18412616e1 munmap: Change permission of VMA back to RWX on unmap
Change-Id: Ic02098e7458dd8fa2961fb03dc32e37fb18c5dc5
Refs: #988
2019-09-26 03:49:50 +00:00
c371fbf13b file map: cause SIGBUS when access to a page beyond EOF
Change-Id: Iaf7d792413e674267fd1c05c382212c8f67d8f5b
Refs: #1291
2019-09-26 03:41:23 +00:00
1492f16d67 make syscall_enter arch-dependent
Change-Id: I4317f3443902620ef5b3807ced05c80fa5eebbec
Fujitsu: POSTK_DEBUG_ARCH_DEP_90
Refs: #1357
2019-09-26 03:28:57 +00:00
fd38ab6fd0 Add test results for "syscall offload regardless of mcexec life and death"
Change-Id: Iee759ae8814aff4274ff81dc14f6d5d7a01494c5
Refs: #1321
2019-09-26 03:26:20 +00:00
f115bae8a7 include interrupt handling time into system time
Change-Id: If2ed2d488b4040d288d712f0a244505adbcec6f5
Refs: #1221
2019-09-26 03:21:28 +00:00
ba80dd8650 arm64: Fix for ptrace instruction rewrite on thunder-x2.
- Fixed the problem that instruction rewriting by PTRACE_POKETEXT is not reflected.
   The cause is that the instruction cache was not flushed.

 - Add instruction chache flush in ptrace_report_signal().

Change-Id: Ie9d34d3d33e1fd85aef5fe419345d82c6ca781fb
2019-09-26 02:57:07 +00:00
06960a41d9 test: signalonfork+wait: update error_injection.patch
Change-Id: Ia27e9b2fa6ec757bb05229ba3bf76e5e3bd43e5e
2019-09-26 02:34:46 +00:00
86a2aabb24 test: perf_event: add log of ThunderX2 machine (apollo)
Change-Id: I27aa1e30abdf4ed640a80b4016bcf108262ce9e3
2019-09-26 02:16:08 +00:00
b4101d9c36 brk: Fall back to demand-paging only when physically contiguous memory is unavailable
Change-Id: Id5d937b2cab7de1ad8925c9b95d85fcb620df9c6
Refs: #1353
Fujitsu: POSTK_DEBUG_ARCH_DEP_60
2019-09-26 02:16:08 +00:00
ec31d72483 freeze: add freeze_thaw test
Change-Id: I31db80b89adca9ac354a96ad21073b269d8a0e24
2019-09-26 02:13:23 +00:00
83ade5cdcd freeze: ignore multiple freeze request.
Change-Id: Ib7a7c4677137446cf7f7b387d016bacc7f0e9620
2019-09-26 02:13:23 +00:00
dec133c1dd freeze: restore state with thaw request
Change-Id: I7d6efd2c47020bedb716b6bd72d8a72b874c3cb2
2019-09-26 02:13:23 +00:00
04a528ab27 freeze: no process create in freeze state
Change-Id: Ia9cb7b8fb22d1c9d6c5a3fcdbd2873ef22f27c9f
2019-09-26 02:13:23 +00:00
8e4073c2ca freeze: allow interrupts in frozen state
Change-Id: I1d502f828ab9f9c0e1223d021979ac3dcf4d0c25
2019-09-26 02:13:23 +00:00
ff982b8594 freeze: change freeze-thaw to normal interrupt
Change-Id: Ib4dbac28f0074595e92ef316945b37ef4bc18327
2019-09-26 02:13:23 +00:00
299d47abf5 fork: memory leak detection test.
Change-Id: I9c64f8fdaee15642b3d1d2d7d869927b0bcd6511
2019-09-26 01:56:16 +00:00
f2460695c4 fork: do_fork: free resources when an error is detected
Change-Id: I0a29bb2cf886228effb088afe97d1b614728f517
2019-09-26 01:56:16 +00:00
6ce5c754f3 fork: settid: return error code.
Change-Id: I0678c266d8608b6d557b2b1e29e59bd6861314b8
2019-09-26 01:56:16 +00:00
e932f2e70c fork: release_thread: fix release of cloned thread
Change-Id: I390093bdb47a348cfec287cceaff22712df36bd9
2019-09-26 01:56:16 +00:00
bb08742467 fork: clone_thread: free resources when an error is detected
Change-Id: I922f3fddc35942ef2c67db6673980770731dced9
2019-09-26 01:56:16 +00:00
3e9fdfc0f1 fork: copy_user_ranges: rollback on error
Change-Id: Icdb8399cbce31835abcaeb783dde3ff14d30af6a
2019-09-26 01:56:16 +00:00
58f4593478 fork: fpregs: return error code.
Change-Id: I6ff150a39cd8952adad9b21d0c9f8514126ef957
2019-09-26 01:56:16 +00:00
de0e07f29e schedule: Skip save_fp_regs when the process ends
Change-Id: I32ff71a0dfcd7196d2c9e6cc1d68210933470bbb
Fujitsu: POSTK_DEBUG_ARCH_DEP_106
Refs: #1354
2019-09-25 06:43:08 +00:00
a4b83dc6d4 eclair: use snprintf instead of sprintf to prevent buffer-overrun
Change-Id: I2a27cffe303201e1738f115258f6e02058dbc63d
Refs: #1356
Fujitsu: POSTK_DEBUG_ARCH_DEP_38
2019-09-25 06:38:55 +00:00
beac6c3e80 make checking write-combine arch-dependent
Change-Id: I4c0fca7d34e69b4774141e115b8ebc03c5c1e8b3
Fujitsu: POSTK_DEBUG_ARCH_DEP_12
Refs: #1355
2019-09-23 16:42:26 +09:00
5d6715078f fix: madvise changes only the first one of vm_ranges
Change-Id: I83248c1162e28c3c24ca5f6b0933e1a8ca434d6b
Fujitsu: POSTK_DEBUG_TEMP_FIX_37
Refs: #1351
2019-09-08 14:22:00 +09:00
0615a0b00b procfs: mem: Change permission to 0600
It's 0400 in RHEL-5 and 6, but changed to 0600 in RHEL-7 and 8.

Change-Id: I9fb229e4c447eaa4570b1e2619c4fe039c07c86d
2019-08-19 01:17:03 +00:00
51cd7cbb6c arm64: rusage: Fix counting contiguous PTEs
Change-Id: I7e89c25d49dc1f6efe1c27c76c66c6fedd22af1f
Refs: #1342
2019-08-16 03:55:29 +00:00
0c1cae45fe coredump: Support signal number
Change-Id: If220bcd0865569a566e08aa53cae748fdc6317d0
Refs: #1340
2019-08-08 13:44:15 +09:00
11ef2f8092 coredump: Support threads
Change-Id: Id75ade6c87b15abcff5d772d90f77950376a32c1
Refs: #1219
2019-08-09 04:00:15 +00:00
12aef0b578 arm64: mcctrl: Fixed to search vdso_offset_sigtramp dynamically.
Change-Id: Iab5459194ca5281a1680a7fc26ae8bfaf1945a13
Refs: #1341
2019-08-08 00:48:22 +00:00
9b3450ee7e syscall offload regardless of mcexec life and death
Change-Id: I7db089993d3ee5ae6032f5085db2b67cef99fdfb
Refs: #1321
2019-08-08 00:39:26 +00:00
0d3ef65092 memory_range_lock: Enable interrupt when trylock fails
Also use read-write-lock

Change-Id: I03150b7208325ec1fe422dcd5f931e4e41c8e40e
Refs: #452
2019-08-08 00:38:55 +00:00
258156b57e support for read/write-lock and read/write-trylock
Change-Id: I609071c0f6234d0d413c8b312d8a8379abf6846e
Refs: #1323
2019-08-08 00:38:55 +00:00
8efced7bf7 mmap: Check if size exceeds available memory when MAP_HUGETLB
If size exceeds, mmap fails and set -ENOMEM

Change-Id: I4f0d6e18ee3a7c8e32e251b7ed07ee9f76305603
Refs: #1183
2019-08-08 00:31:36 +00:00
2dd8687974 flush instruction cache at context switch time if necessary
Change-Id: Ic09415ea772a9de6dca43a98168a8346ca86d3e7
2019-08-08 00:29:47 +00:00
f0bc1a6b07 cmake: Add option for "mem: per-CPU allocator cache (ThunderX2 workaround)"
Change-Id: I7156cf433b2081246d1d9b8e4fde489609676ef1
2019-08-08 00:29:34 +00:00
c52370b959 test perf_event: minor fixes(add signal handling. etc.)
Change-Id: I837d962bcaf13d3a523f80ff77f75b7fd51a98b7
2019-08-05 16:00:22 +09:00
9c78d4d249 pmu: define event validation in architecture dependent code.
Change-Id: Ia053af146ba3c89810892271cae93def6d9fd7c8
2019-07-31 16:18:50 +09:00
b6285c9aa9 pmu: Use bitmap instead of index to specify counters / events
Let the software index (or number) same as the hardware index at the
same time.

Change-Id: I847180e94bf2c57644ae2f8f571cdb4a84eac991
2019-07-31 16:17:20 +09:00
b945367c90 pmu: add ihk_mc_perfctr_value function
Change-Id: I88d25586dd470737a3eac4c3a4f1955ae6e41d64
2019-07-23 16:20:17 +09:00
0f434288e1 pmu: change to atomic register access.
Change-Id: Iebbdb8ca97e7a73f9d74138650ae18ce3a0f2605
2019-07-23 16:20:16 +09:00
b5cd813229 pmu: remove comment
Change-Id: If5819ce6f665c668f1f29724a814770957df0de0
2019-07-23 16:20:16 +09:00
7268942c35 pmu: implement ihk_mc_perf_get_num_counters.
Change-Id: I752103aedd9201fc00bda11228ca0bcf5103f12d
2019-07-23 16:20:16 +09:00
f8cad24a9a pmu: move cpu cycle event type comparison to arch dependent code.
Change-Id: If069f8893fe59e3517569b74b3a27b5267ebac03
2019-07-23 16:20:16 +09:00
2b6b3f31e5 pmu: remove pmc_{init|start|stop|reset} system call
Change-Id: I6eb65ed8c18558418c7aabfee75cd1974f4c03ff
2019-07-23 16:20:16 +09:00
ca19ee434a fix: Bug for perf_event_open error code.(LTP:perf_event_open01)
Change-Id: Ia7c942cb3c94ad5e6a0d8640f321f427cd1cd5f9
2019-07-23 16:20:16 +09:00
bb2589bac4 uti: futex_wait: Use kmalloc area for wait queue
Change-Id: Ida994c87334f9613bbf5cbda45b6b5474fd4c6be
2019-07-23 04:53:51 +00:00
e1c6e17400 uti: Use only general registers in libmck_syscall_intercept.so
Change-Id: I8e8e98bdc7e621aa111c0940d915ebe1775a10c3
2019-07-23 04:53:06 +00:00
207eba93ea uti: syscall_backward: Use kmalloc area to pass syscall arguments
Change-Id: I478a9b40b75f3d1d68c4446810a6236fe2f3a96c
Fujitsu: POSTK_DEBUG_ARCH_DEP_106
Refs: #1320
2019-07-22 03:52:44 +00:00
06af2d62c6 pmu: implement event mapping function.
Change-Id: Iac1ec99152b17a19dba0bf1a35f07724b8abc5a1
2019-07-18 16:39:18 +09:00
3e267e24cb exec: Allocate necessary number of pages to argenv area
Change-Id: I298a0de2f4e34ed774e2db7d90167dbe0d35586e
Refs: #1174
2019-07-17 06:38:35 +00:00
e58e1c6e33 uti: cmake: Add include dir pointing to libsyscall_intercept_hook_point.h
Change-Id: Iaea58725a16722d867cb27ffb4d9347b8756f9f2
2019-07-16 04:25:51 +00:00
fb924ebb9d README.md: update packages and git URL
Change-Id: I895dbece58a0ea69b39d1e07d8a16a22a2fed9a7
2019-07-08 04:24:37 +00:00
ac61577414 test: rusage: Add test private-mapping device file
Change-Id: I8b298ce598c2a5560138a1b694ccc7204d4ebbde
2019-07-05 01:18:35 +00:00
4cee9b1a27 rusage: Add comment on counting COW-source pointed-to by only fileobj
Change-Id: I082f6738dd29257c05e8a0e4b0af23dd8ffab449
2019-07-05 01:15:47 +00:00
b55e164669 page_fault_process_memory_range: Disable COW for VM region with zeroobj
This fixes ostest-mem_limits.001 which tries to anonymous-mmap 95% of
total memory. It reports a failure because:
(1) McKernel tries to allocate physically contiguous area and
    fails
(2) It turns on demand-paging
(3) It tries to obtain a page from zeroobj and fails
(4) It allocates a new page
(5) It performs COW on the page, which is unnecessary

Change-Id: Iddf0548bb9216f9bf91fb03fa21f890e599bfdad
2019-07-04 13:58:22 +09:00
aa66fe2cb1 extend_process_region: Fall back to demand paging when not contiguous enough
This fixes ostest-mem_limits.005 which tries to move brk by 95% of
total memory. It reports a failure because McKernel tries to allocate
physically contiguous area and fails.

Change-Id: I50a61cb7103fdbdbe051f0ae276a79e8e2dcdda3
2019-07-03 07:49:45 +00:00
3b74b0a093 rusage: Move pgsize_to_pgshift to arch-memory.h
Change-Id: Ia10b6e5c7d078d345347a79a3e98c06c16d28d6a
2019-07-02 09:10:04 +00:00
0267a0c8ea procfs: Fix type of number of threads
Change-Id: I7d5d17ae1e619d789cdb843f183be640efdbe9e2
Refs: #1277
2019-06-11 16:51:31 +00:00
b3b7801d51 overlay: fix /proc/PID/task/ corner cases
Change-Id: I17086c684af4c665d0c228b4a65cdb232eccf602
2019-06-07 01:48:10 +00:00
10f1fe76db ARM: set_range_middle(): fix PT deallocation bug
Change-Id: Ic8c1e1193ae33d1ae81e0df362ae1a6944c6c3b2
2019-06-06 01:11:16 +00:00
089b443aaf mmap()/shmget(): use Linux default huge page size when not specified
Change-Id: I8a9e3bed65ac1902adfaeaa254597dd30f540319
2019-06-06 01:09:38 +00:00
e9955a4bba Make heap and stack private mapping
Change-Id: I4306566b3bbbe27d206c5518a2d36d117ba4ca9f
2019-06-05 15:21:20 +09:00
dc52c8a11a crash: use fix kernel mapping instead of module space on ARM
Change-Id: I2d32dac78fc241a89bc98f8c098d4e63c8593e79
2019-06-05 14:31:48 +09:00
bc4629dfb0 ARM: fix performance counters allocation
Change-Id: Ie6c8beacf268462064f59b063d9c7b635c906dc4
2019-06-05 14:31:43 +09:00
99fba2df1c mem: per-CPU allocator cache (ThunderX2 workaround)
Change-Id: I7694524c5e9674a6f7bfcd911f8b0dbbead7df5a
2019-06-03 01:22:03 +00:00
239c95449b x86: add SMP barriers
Change-Id: I7fb36bd3d26fa272697db7c92495ce5fba34aeba
2019-06-03 01:22:03 +00:00
9dfc139eae cmake: kmod: Fix cross compile decision
Consider "arm64" to be "aarch64".
It mistakenly considers cross-compilation when compiled through spack.

Change-Id: I914df482e21517adc1105512ea3d8919ef1577b1
2019-05-22 02:34:55 +00:00
bc81d362b4 madvise: MADV_HUGEPAGE, MADV_NOHUGEPAGE: Fix error check
* Returns -EINVAL except for hugeobj and shmobj
* Fixes ostest-madvise.012 and ostest-madvise.013

Change-Id: Id1f1d6cc0c81edd204228ce5f75b641985e70cee
2019-05-13 05:54:45 +00:00
90b6aec53d get_one_cpu_topology: Fix error-handling
Fix the error handling of the following two functions:
  ihk_device_get_cpu_topology: Returns NULL when not found,
                               valid non-NULL pointer when found
  get_cache_topology: Returns NULL when not found,
                      valid non-NULL pointer when found,
                      minus error number on error

Change-Id: Ied13a61d4ab0c314477c45ea659ff2b798ad97ee
Fujitsu: POSTK_DEBUG_TEMP_FIX_21
2019-04-25 01:53:30 +00:00
0887e0de6d x86_64: mcexec: Remove "#include <asm/prctl.h>" (again)
Change-Id: Iae78954d5b520907cd6a85058e3a9fc1b842999f
Fujitsu: POSTK_DEBUG_ARCH_DEP_77
2019-04-25 10:33:00 +09:00
2c5c47344d x86_64: mcexec: Remove "#include <asm/prctl.h>"
Change-Id: I441f7a1c2e23b927fcd065fefba3ef3617356c18
Fujitsu: POSTK_DEBUG_ARCH_DEP_77
2019-04-25 10:14:19 +09:00
b9f223ceca crash: mcvtop: print proper page sizes for ARM contiguous pages
Change-Id: I2f677e64c743776de491262613b1014fe2bb7a8e
2019-04-23 08:54:26 +00:00
6297181dcd crash: mcps: print both PID and TID
Change-Id: Iafac099b1d953642509711a972962894b6111984
2019-04-23 08:54:14 +00:00
80f964e44f rus_vm_fault(): cleanup and early exit on NULL access
Change-Id: I90b18988989d4e377ed9c35df6b2e6bcdddd13b6
2019-04-23 08:53:59 +00:00
cc07d6e017 mcctrl_get_per_thread_data: Un-inline
Change-Id: I881db244ca551b3ca232918cb0b4245776f17295
Fujitsu: POSTK_DEBUG_ARCH_DEP_56
2019-04-18 02:35:52 +00:00
07c517828d procfs: add number of threads to stat and status
Change-Id: I98dd0868b20e9a1725c7d6e4f8379a4d86769780
2019-04-18 02:20:27 +00:00
75e42badf4 procfs: pagemap: Return EINVAL for unaligned offset
Change-Id: I2297818b0b31790b5452cb6f80dcba4192a7d120
2019-04-12 20:19:14 +09:00
bdccbf7356 MCS: fix ARM64 issue by using smp_XXX() functions (i.e., barrier()s)
Change-Id: I41470c082308c7c1ac91f88db2229958398d2e68
2019-04-10 20:26:13 +09:00
ad3ee26d36 Fix various issues in McKernel crash extension.
Determine V2PHYS_OFFSET dynamically.
Fix x86 hole handling in 64 bit address space.
Fix ARM64 virtual address handling and support separate user-space
and kernel-space translation tables (i.e., TTRB0 and TTRB1).
Fix page table walker's lookup functionality.

Change-Id: I6b281693cdc88bd1b8fe3f4b8f40a6af3ca95cc0
2019-04-09 01:52:49 +00:00
16f8ccb35b mcreboot: do not embed sudo when run as root
Change-Id: I59ebb4c72c12af8600a6d6d0eb13f6459ccf5bc2
2019-04-09 01:52:49 +00:00
3fda54ece8 IHK: support for using Linux work IRQ as IKC interrupt (optional)
Change-Id: I2a0e59a47c229fd9271866199c3c4d30e1ddd7f9
2019-04-09 01:52:49 +00:00
4d252c2bb2 map_fixed_area(): disable debug msg
Change-Id: Id6b3d001d908432c1adb6bba875e158a1424850d
2019-04-09 01:52:49 +00:00
0cf89c5682 Linux lockless linked list implementation
Change-Id: I8bd6ee989cecac269b55b3a0ff10cf8543629001
2019-04-09 01:52:49 +00:00
0d902872a1 x86: fix xchg() and cmpxchg() macros
Change-Id: I6faf0fff8a8595734fca6247634cdae6b86483b3
2019-04-09 01:52:49 +00:00
9b6a88eeeb x86_64: Move arch-specific interrupt vector number to arch-dependent code
Change-Id: Ie3cc631ec351503a619b019432388a827d75334c
Fujitsu: POSTK_DEBUG_ARCH_DEP_75
2019-04-08 01:48:07 +00:00
96b4729cd5 ihk_mc_map_virtual: Release virtual address range on error
It was telling the vmap allocator to release a wrong address range
(physical address range).

Change-Id: I82236ac0086b5da24ac49219166abf363672d838
Refs: #985
Fujitsu: #11
2019-04-08 00:43:55 +00:00
3372bbfd23 crash extension: port for ARM64
Change-Id: I47a4f13e96718e94c08ee8bc3e9b0be38d7a8a55
2019-03-29 07:55:28 +00:00
f17c30da07 do_mmap: give addr argument a chance even if not MAP_FIXED
hugectl relies on that to check if a range is free

Change-Id: I97963eef15c866f642e884b063b5caf5d827c776
2019-03-29 07:52:57 +00:00
9a0eb915fb Test "QLMPI (qlmpi_testsuite)" on arm64
Change-Id: I079fda2231ffb19b41fe86436d51ce9f83436c9b
2019-03-29 07:48:05 +00:00
a5ded1fc06 Add KNOWN_BUGS file
Document known major (e.g. linux crash) bugs that have not been
fixed downstream and might require workarounds on specific
hardware configurations

Change-Id: I51e5d23243afd4489ce1ae25e736afc27b2c8202
2019-03-29 07:47:28 +00:00
de042b2cb2 IPI: use logical CPU ids in ihk_mc_interrupt_cpu()
Also make remote TLB invalidation arch independent,
removes POSTK_DEBUG_ARCH_DEP_8.

Change-Id: I2b0fbcfa2bfe5da07607863e3e772d8e892e8525
2019-03-29 07:45:06 +00:00
2cee82673b test: perf_event_open: Fix test program
Change-Id: Ie5af8fb3ab7452078f2c35ec14c6369d86eedec3
2019-03-29 07:42:05 +00:00
dfb3bef96d irqbalance_mck: replace extra service with service drop-in
Using a drop-in instead of an extra service avoids having to juggle
between both services (especially since irqbalance_mck did not have a
Conflict=irqbalance.service statement)

That way, we only have a single service to check for (irqbalance.service),
and system administrators should find this less confusing if they normally
rely on irqbalance.

The drop-in is also installed in /run so will automatically disappear in
the event of a linux crash or a reboot without shutting down mckernel

Change-Id: I004f4f25d9ca037e411e0bc91f4555db138ecfef
2019-03-27 15:54:25 +09:00
2dc51530f3 mcreboot/mcstop+release: support for regular user execution
Change-Id: I9088f9c49bea13826bbab6348aa5560e6d91071b
2019-03-27 14:31:08 +09:00
13758417c5 Make boot scripts arch independent and move them to scripts
Change-Id: I3f4c3e366b325df17208a41d5f842c1a2a888494
2019-03-26 09:47:38 +00:00
c32edff2bb uti: rename x86-specific 'fs' to 'tls' + arm implem
Note: the original fujitsu implementation didn't rename the various
save_fs function/desc to save_tls for some reason, might as well go all
the way though...

Change-Id: Ic362c15c8b320c4d258d2ead8c5fd4eafd9d0ae9
Fujitsu: POSTK_DEBUG_ARCH_DEP_91
2019-03-22 16:38:29 +09:00
8356ef6c96 arm64: uti: Add arch-dependent helper for context switch
arm64 performs context-switch in kernel space instead of user space as in
x86_64.

Change-Id: Ib119b9ff014effb970183ee86cfac67fab773cba
Futjitsu: POSTK_DEBUG_ARCH_DEP_99
2019-03-22 06:52:21 +00:00
63d500515a mcexec: fix printf format warning
Some old commit before -Werror was enabled got merged,
blocking other builds. Quickly fix before anyone notices

Change-Id: I5a034cef6f79e3e99b381bb1a5d97088e33a6718
2019-03-22 05:25:34 +00:00
791e8c2114 Remove mcoverlayfs code
mcoverlayfs code is now unused (technically should work on top of the
soft emulation but not well tested, and untested unused code is bad).
Remove it.

Left the unshare/bind_mount_recursive code in mcexec in a new
MCEXEC_BIND_MOUNT ifdef (only in config.h.in directly to discourage use.
it disables the ioctl as well, but the main code is still compiled to
keep up to date with linux api changes... although it's using kallsyms
lookup so it does not validate much more than "the symbol still exists")

I honestly think this should go as well (people who would want to use it
are root and could do it manually), but will give up for now.

Change-Id: I832b6a8ab19e24ed67a1a5044b1c6c32381ae0aa
2019-03-22 05:18:43 +00:00
0bb612caea Fix test of getrusage fixes
* fix: Bug for getrusage return incorrect ru_maxrss
* fix: Bug for getrusage(RUSAGE_CHILDREN) return parent info
       (POSTK_DEBUG_TEIX_72)
* fix: Bug for getrusage often return incorrect ru_stime

Refs: #1032
Refs: #1033
Refs: #1034

Change-Id: Ifba95e4cb48ae551839819eb3abe26b37da4b196
2019-03-22 05:15:00 +00:00
5e992bc195 arm64: test: Add Makefile that was ignored commit.
Target commit:
  Test "Direct access to McKernel memory from Linux." on arm64
  Test "Scalable Vector Extension (SVE) support." on arm64

Change-Id: Ia9dc97c5cf0c4cf223423b4257745ea2101bee1d
2019-03-22 05:08:25 +00:00
08f817a654 page fault: clear writable bit for non-dirtying access to shared ranges
Change-Id: I3f3212b2aac79587f04450dfbdee9cb8a56bee04
Fujitsu: POSTK_DEBUG_ARCH_DEP_21
2019-03-22 05:03:03 +00:00
b87ac8b8c0 reproductible builds: remove most install paths in c code
In order to speed up test bot work it would be helpful to check for
identical build outputs and skip tests if required.

This removes most use of the install path in c code:
 - ql_mpi uses /proc/self/exe and looks for talker/server in same
directory as itself
 - mcexec looks for libihk.so in /proc/self/maps and use that path for
LD_PRELOAD prefix path
 - rootfsdir is not used right now but until a better fix happens just
hardcode it, someone who wants to change it can set it through cmake

There is one last occurence of the install directory, MCEXEC_PATH in
mcctrl's binfmt code, for which the build system will just overwrite it
to a constant string at build time instead of trying to remove it too
hard. It would be possible to pass it as a kernel parameter or look for
mcexec in PATH but this is too much work for now.

Change-Id: I5d1352bc5748a1ea10dcae4be630f30a07609296
2019-03-22 05:01:32 +00:00
a48a2cd3e8 add definition of util_register_desc system call number
Change-Id: I2047d33b5667761ce8399bad78eff6ab668b6ce4
2019-03-22 04:58:24 +00:00
7c238c27c9 uti: Check syscall number definition in hook()
Change-Id: I24d226199d03d23a12710ff1cad9fef29a6feedd
2019-03-22 04:58:04 +00:00
de77d2b061 add syscall_intercept.c to the mck_syscall_intercept
Change-Id: Iff8cfd2868118b6a9db7e24e4f00537251d1346c
2019-03-22 04:55:18 +00:00
52f89cf8fa add system call execution for uti
Change-Id: Ide79726b79964e72596ed78c87ec61d1eaf7e1c7
2019-03-22 04:54:34 +00:00
c96dfb0c68 mcstop: add -k to kill processses using /dev/mcos* before shutdown
Use lsof to check for processes that still open /dev/mcosX at shutdown
time.
If lsof is not installed then the check is just not done (empty PROCS
result)

If -k is not passed, print a message listing pids of users and exit
(taking bets someone will use that and sed to kill out of mcstop+release
and rerun the stop script instead of passing -k at some point)

Change-Id: Idba7486fdede4990d9885d23f8077f33839daeed
2019-03-22 04:33:33 +00:00
21c9e57646 page fault: use cow for private device mappings
Private device mappings still need copy-on-write to work, even if
there is no page.

Change-Id: I96e3e1eea81104f6b09bb7fda1105d9eeb489155
Refs: #1254
2019-03-22 04:30:55 +00:00
312b6c171b README.md: update package names
Change-Id: Ie4d37d724e60e8e473cb60db8a77b5b3a9681f4e
2019-03-19 02:20:38 +00:00
2ce695b47b proc: resurrect /proc/PID/stat and fix a few fields
Change-Id: I8ffcfde4db78c66ea10845a0451ae2610261f832
2019-03-18 20:33:29 +09:00
e5c1fdf129 MCS lock: make implementation arch independent
Change-Id: Ie5b2182555bbe1a11a005988db069d4b38f85401
2019-03-18 09:53:30 +00:00
9e3dd53c58 arm64: sve: coredump bug fix in non-sve environment.
Change-Id: I4cba5580b6367c67bef457c0273e9b70ad4a0756
2019-03-18 08:12:37 +00:00
fe53c6e0a5 Test "Process swap (swapout)" on arm64
Change-Id: I1eecb046575480966febbcb55e5f4ade6313275b
2019-03-18 08:12:14 +00:00
e988bfaf50 test: uti: Elaborate descriptions of CT12-20
Change-Id: Idfaa5fc3bfc7b65e24873f0c5e15c31a9d129420
2019-03-18 16:59:07 +09:00
f6f48b1210 Test "Direct access to McKernel memory from Linux." on arm64
Change-Id: I6e862146c3b591e671c526302bb1aad787f6bb83
2019-03-18 06:26:43 +00:00
70b42fde5d arm64: cmake: Add -mgeneral-regs-only option.
Change-Id: I0cbdc65c4b95195831344f4006bfc85b1ea58139
2019-03-12 17:26:18 +09:00
ccb36a5849 cmake: change how warning flags are added
Setting CMAKE_C_FLAGS_DEBUG does not work as first expected:
 - set(... CACHE) didn't do anything because the variables were
initialized previously
 - We could set with FORCE but then users could not change the value
 - There is a way to only do that on initial cmake run but it has the
same problem

Thus, use a new regular cache variable directly instead

Change-Id: I20741fb385c171c6c1088bbd6c25666067e07288
2019-03-08 17:22:20 +09:00
ea7f517e3d arm64: ptrace: Fix overwriting 1st argument with return value
Since arm64 shares the return value with the area of
the first argument, rewriting the return value before
the system call execution completes destroys the first argument.

Change-Id: I959944879254d8dd3a29489a65d8f274d45338e6
Fujitsu: POSTK_DEBUG_ARCH_DEP_110
2019-03-08 08:06:19 +00:00
ac18a24a27 arm64: fix phys_to_virt() calculation to be the same as Linux.
Change-Id: Ibbe17d33fd80eacff990b053fa17d8d320c227f1
2019-03-07 16:51:18 +09:00
8880710fad README.md: few minor updates
Change-Id: I7207ab2cf6ca5b69b464e0c41d2dd0ce3e80b674
2019-03-07 13:12:39 +09:00
03a85825ed copy_user_pte: base memobj copy on range & VR_PRIVATE
Some memobjs (e.g. devobj) will not be considered 'in memobj' by
page_is_in_memobj.
Instead of trying to play whack-a-mole with the non-fileobj memobjs,
base the copy check on range's memobj and VR_PRIVATE (do not copy
MAP_SHARED mappings, so the fault handler will do the right thing™
when required)

Change-Id: Ic32cdc7766754f6559753b34845eb8c5cff6ed13
Refs: #1255
2019-03-06 17:44:11 +09:00
940eeca6f5 x86 spinlock trylock: make next initializer old-gcc friendly
old gcc versions are stupid with nested structs and need us
to initialize .tickets.head and .tickets.tail in one go

Change-Id: I0d4caf8236066e7edf4a12e3270114132ced9585
2019-03-06 06:30:30 +00:00
19b02cf4ed arm64_cpu_capabilities: flatten struct
The midr_* part of the struct was never used, and confuses older gcc
with partially uninitialized assignments that were not correct.
Just flatten the struct

Change-Id: I7a9cfe064ab97cdcd5ac50ce4fb713c4d7983bd3
2019-03-06 06:30:30 +00:00
76a0cc71fc warnings: fix broken -Wmaybe-uninitialized
These variables cannot be used uninitialized, and newer gcc versions
correctly do not bring the warning up, but this will shut up older ones

Change-Id: I2b2ea9b557196a3e7eea1e04dd1f160bd12d6e54
2019-03-06 06:30:30 +00:00
ab39798181 send_syscall: remove unused variables
Change-Id: I0a350b8c7dbf27960544dd3651941d3905f93fc6
2019-03-06 06:30:30 +00:00
0cc3496747 warnings: fix missing field in initializer
use generic struct zero initializer instead.
Older gcc used on arm also seem to have trouble with '{}',
so use '{ 0 }' instead

Change-Id: I83d43b05f8d1d44e1dd86502b48e28fe242e1db2
2019-03-06 06:30:30 +00:00
10cca81401 arm64 vdso warning: fix non-void function not returning
arch_setup_vdso() needs to return something even on panic to please gcc.
In theory, flagging panic() with __attribute__((noreturn)) should work
just the same and is a much better solution but for some reason on older
gcc versions setting the flag leads to the weak memset() symbol not
being found !?

Change-Id: Ifed100df5440ca24bb495817db9afc79f0ba6751
2019-03-06 06:30:30 +00:00
0c79de67b4 warnings: disable override-init for arm perfctr arrays
The arrays first init every fields to invalid op then override a few
fields, since this is not something we want to allow everywhere use
a GCC pragma to only ignore the warning there.

Change-Id: I498546fe60d60d4b000d711e22e04c8c360b5b83
2019-03-06 06:30:30 +00:00
3fbad79afb warnings: init pte in process.c functions
pte_make_fileoff() on arm does not always init the pte, so just
init it to PTE_NULL firsthand

Change-Id: If195c1aef5b1344f13f6c0c76bb431a5fa339265
2019-03-06 06:30:30 +00:00
1b76aaa7e1 unused function warnings: add inline to static function in header files
Change-Id: I5d9bb539712a2b3e51c3ab3433a04fbb0cb0b961
2019-03-06 06:30:30 +00:00
aa3c5e91db arm64: Direct access to Mckernel memory from Linux.
Change-Id: I1a096aa5232c56382ae19d8c4e4f41d4e3e9f660
2019-03-06 14:53:16 +09:00
20d5900c35 mcstat: fix ihklib.h location
ihklib.h moved since it is no longer a generated file

Change-Id: I1ad6ff4bb8ae8c536d9ad7ee3cbeaf670ebcd11c
2019-03-01 06:24:39 +00:00
414cffd95b tests: remove calls to ihk_os_create/destroy_pseudofs
Change-Id: I04910c6a258c841437463e098fb8e02116c4f711
2019-03-01 06:24:04 +00:00
9ec0aeeab5 debug.h: merge both instances into ihk/debug.h
We do not need two debug.h files.

Take Fujitsu's STATIC_ASSERT over BUILD_BUG_ON because it is more used

Change-Id: If04c17fbb7406ab15fe86267fed8d6da460cec62
Fujitsu: POSTK_DEBUG_ARCH_DEP_9
2019-03-01 05:10:35 +00:00
06e96005a6 mcexec: restore --enable-vdso/disable-vdso for x86
Fujitsu added this ifdef together with ifndef __arch64__ and thus disabled
the option for both archs in practice; it probably does not hurt to restore...

I'm not sure I see the point of disabling the option at mcexec level though,
but who am I to care.

Change-Id: I0d4bffb6ed325edac8ae577773e19c0fff6ca2ed
Fujitsu: POSTK_DEBUG_ARCH_DEP_53
2019-03-01 05:08:45 +00:00
4606714c07 process stack: use PAGE_SIZE in aux vector
Don't ask me why this shares POSTK_DEBUG_ARCH_DEP_50 with the ksym lookups...

Change-Id: Ic3db2cd77ca88be361cefec85d8ed9deb21ffcd8
Fujitsu: POSTK_DEBUG_ARCH_DEP_50
2019-03-01 05:08:16 +00:00
a5d5baf8a8 rus_vm_fault: always use a packet on the stack
There are valid use cases where a remote page fault has no available
thread data/packet available to use, e.g. when device driver threads
need to access the data (BXI).

Do the per thread data lookup to use the right channel/tid if available,
and use mcctrl_ikc_send_wait with a new message number directly.

The fault is no longer handled in mckernel syscall forwarding code but
in the ikc handler directly in irq, this should be ok because page
faults are interrupts anyway so the code should be irq-safe.

Change-Id: Ie60f413cdaee6c1a824b4a2c93637899cb9bf9c9
2019-03-01 05:08:03 +00:00
8074445d59 README: fix background link in toc
Change-Id: Ief448fd99fddc310ea7f311798c94d0423ebf93a
2019-03-01 05:00:47 +00:00
6a456f11aa cmake: remove unused build-time symbol lookup
Everything already uses kallsyms_lookup_name or similar, this
was leftover from when the build system was ported ages ago

Change-Id: I09dd0249845df90ab2e0adc28d0eb285c0ebb64b
Fujitsu: POSTK_DEBUG_ARCH_DEP_50
2019-03-01 13:49:01 +09:00
81e665cb48 init_process: add missing initializations to proc struct
Change-Id: I4ea386ba3a8745202745bd8e35cab00c38262f65
Fujitsu: POSTK_DEBUG_ARCH_DEP_63
2019-03-01 04:39:59 +00:00
e0b9c5deec nanosleep: add cpu_pause() in spinwait loop
Probably some energy consumption saving?

Change-Id: I888f50568db8f08751abd0a002137c3b475362dc
Fujitsu: POSTK_DEBUG_ARCH_DEP_43
2019-03-01 04:38:51 +00:00
62772c8a24 gencore: Allocate ELF header to heap instead of stack
coredump() proceeds as follows:

1. coredump() calls gencore()
2. gencore() allocates ELF header to stack
3. gencore() prepares the core table and record the address of the ELF
   header to the table and return to coredump()
4. coredump() offloads __NR_coredump with the address of the core
   table

This fix prevents the ELF header from getting destroyed in the 3rd
step.

Change-Id: I770418c1658a6fdb640bb491fc076a31dfd41c22
Fujitsu: POSTK_TEMP_FIX_39
2019-03-01 04:38:28 +00:00
63d15f7dfc CMake Kbuild: fail at cmake time if KERNEL_DIR is missing
Change-Id: I66660718841d05003b87995d68bec728aa0db9ba
2019-03-01 04:38:05 +00:00
fb3f1c58a8 rpm: ignore CMakeCache.txt in dist and relax BuildRequires on cross build
CPack takes the source dir as is, so if it was used to build something
it will incorrectly grab the temporary CMakeCache file and cmake will
complain during rpmbuild later on.

The BuildRequires should be a separate patch but logic behind the change
is that the dependencies need to be installed in the sysroot, and
rpmbuild cannot test this, so just move them all to only enforce
BuildRequires for native build.

And while we are here, also add a new kernel_dir specfile option.

Change-Id: Ie67932798f632e6d307f8ead93bdbe043e6e8898
2019-03-01 04:37:46 +00:00
69846345de gencore: Zero-clear ELF header and memory range table
Change-Id: I0ff38c1b0e1e6ef204cb3605c0178848dbe40bfb
Fujitsu: POSTK_TEMP_FIX_63
2019-03-01 04:36:00 +00:00
b8155cc618 ihk submodule update: cpu/mem ioctl user access fix
Change-Id: If230c1012af5c1220e5927efba97a2ae38da42a0
2019-03-01 02:12:39 +00:00
f07e20a381 copy_user_pte: vmap area not owned by McKernel
Refs: #1166
Fujitsu: POSTK_DEBUG_TEMP_FIX_14
Change-Id: Iae0f1145d58ec2c14cecc14409b08a1db3b067b7
2019-02-28 07:50:16 +00:00
764948b51f test: Fix test programs for #1195
Add chmod 666 /dev/mcos0 for fork after setuid()

Refs: #1195
Change-Id: I2bec6a9a8378d246f50a9fc08a345b3235096a06
2019-02-28 00:57:22 +00:00
7da5fede8b Test "Scalable Vector Extension (SVE) support." on arm64
Change-Id: I3abaca932985a06b06887b962e769f2eac96c738
2019-02-27 06:26:00 +00:00
6810506c3d rusage: Fix available page sizes
Change-Id: I418075ff4b5341e0f5c7ff317e96461879a60f87
2019-02-22 14:08:18 +09:00
c82c2c1231 uti: Redirect uti thread futex() to McKernel do_futex()
Change-Id: I8203d0b60236e3ec72e22615a52907e1fff2c73c
2019-02-22 04:14:14 +00:00
5bc54a3bbe Fixed time processing.
- arm64: Get TSC corresponding to boot time from IHK.

- x86_64: Calculate the current time using vdso.

Refs: #1186
Fujitsu: POSTK_DEBUG_ARCH_DEP_52
Change-Id: I293ba4bbe5390d50dea44b8a5b7471f59237daff
2019-02-22 04:13:13 +00:00
07aa96ef95 arm64: Scalable Vector Extension (SVE) support.
Change-Id: I3568687913f583edfaa297d5cf5ac91d319d97e9
2019-02-22 04:07:29 +00:00
dac99f708c test: Add test programs for #1195
Refs: #1195
Change-Id: I21339f2597caf1704cc7d104e4bc5835d5270af6
2019-02-19 16:29:00 +09:00
f3c9fbf4ea rusage: Don't count PF_PATCH change
Fujitsu: POSTK_DEBUG_TEMP_FIX_86

Change-Id: Ia23f2d95c67062be3390acafad3e87f087466cdc
2019-02-18 14:50:56 +09:00
54122360e8 CMake: move CONFIG_ARM64_64K_PAGES and VA_BITS up to main CMakeLists
user code also needs these defines; there was a hard-coded
definition left out from debugging that didn't get cleaned up

Change-Id: I951fcd6a3d6bc1d1f1c3e897058908167520f7bc
2019-02-18 10:09:21 +09:00
21cf953a03 x86: disable zero mapping and add a boot pt for ap trampoline
the application processor trampoline needs the trampoline physical
address to be mapped for the few instructions between loading the
page table and jumping to normal memory area; setup a new pt for them.

Also make it use its stack where it needs to be directly.

With that, x86 can finally remove the 0 page from its init mapping

Change-Id: Iab3f33a2ed22570eeb47b5ab6e068c9a17c25413
2019-02-14 07:59:03 +00:00
c59d8db1b3 CMake: define RHEL_RELEASE_VERSION in config.h for non-rhel kernels
Change-Id: Iaa48e763be71e9cbc8dff6335810d3191bb3c177
2019-02-14 16:44:09 +09:00
abc0a7bdac mcs_rwlock: remove aligned(64) attribute if ENABLE_UBSAN
The attribute would impose 64-bytes alignment that we do not
respect later because the whole structures (e.g. process/thread)
are allocated at 32bytes boundaries with kmalloc

These are however justified for performance reason as we do not want
them on same page cache line, so just accept slower performance for
UBSAN only

Change-Id: Ia28968257675b7ae97b0391471986e6bf6485b7b
2019-02-14 16:44:09 +09:00
2f456b8752 cmake: Add ENABLE_UBSAN for -fsanitize=undefined
Change-Id: I73db5f904a7d86052aae62e67b01281763c83561
2019-02-14 16:44:09 +09:00
2a63c962fc build system switch to cmake
Remove old build system at the same time

Change-Id: Ifdffe1fcd4cfece05f036d8de6e7cb74aca65f62
2019-02-14 16:44:09 +09:00
4bdd9cf512 ubsan: remove most sprintf calls
sprintf is implemented as snprintf(..., INT_MAX, ...) which will overflow
the argument pointer for the end, then fix the end to be -1.
This technically works but we know the actual buffer size in all these
call sites, might as well do this properly

Change-Id: I807d09f46a0221f539063fda515e1c504e658d40
2019-02-14 16:44:09 +09:00
bc2a444828 ubsan: fix undefined shifts
A signed integer cannot be shifted in a way that will flip the
sign bit; make such arguments unsigned to be safe

Change-Id: Iafc060f98f899ae3ffb876ba22fdd6183fbb6e57
2019-02-14 16:44:09 +09:00
d9b2924249 Update patch for "Add test programs for large page"
Change-Id: I6ee96b677c65c5bf4b2312059abd689225c0581d
2019-02-14 16:26:20 +09:00
501531f3b3 shmobj: Don't page_unmap() when count isn't one in shmobj_destroy()
Change-Id: If9d567d61e1dc4db808a2aeee290034acf7be4b5
2019-02-14 16:26:19 +09:00
366e95856c Null-check ihk_os_t and mcctrl_usrdata pointers
Change-Id: I941c58d4ab6a0c1ce6bd53c24b552218a1716750
Refs: #1216
2019-02-14 16:26:19 +09:00
bdf5175d4c invalidate_one_page: Support shmobj and contiguous PTE
Change-Id: I15b74ee4afd8e2dc52c933925aae4a1e0d8bcc72
2019-02-14 16:26:18 +09:00
b174fb8099 move_pages: Check flags argument
Change-Id: Ia74aa463a060ecd43aa56ee08d622421f227dbfe
Fujitsu: POSTK_TEMP_FIX_78
2019-02-14 16:26:16 +09:00
e828398c8b do_mmap: don't pre-populate the whole file when asked for smaller segment
The linker maps parts of libs with different access flags,
so we cannot prepopulate the whole file.

[dominique.martinet@cea.fr: moved min and friends in compiler.h]
Change-Id: Ifbeddc0908699099cfae5ce9cc2adc578221db31
2019-02-14 16:26:15 +09:00
641d9f1b39 clear_range_l1, clear_range_middle: Fix handling contiguous PTE
Change-Id: I2609c94d7f9342fe25aa9a5cfc208375274d46fa
2019-02-14 16:26:14 +09:00
c1270cdf6d fileobj, shmobj: free pages in object destructor (as opposed to page_unmap())
Change-Id: I3ea50fc13ae5c090ba32aad4461f9741a4c35665
2019-02-14 16:26:00 +09:00
022e04b62b shmobj: Clean up code around memory_stat_rss_sub call
Change-Id: I6f678568c3c27799cd2a81f5574b96fd218e942f
2019-02-14 16:26:00 +09:00
9cfc373538 Refactor "do write back only MAP_SHARED pages"
* free_process_memory_range() always passes memobj to
  ihk_mc_pt_free_range()
* clear_range_*() don't flush page in fileobj with MF_PRIVATE flag

Fujitsu: POSTK_DEBUG_TEMP_FIX_87
Change-Id: I8d46d029b3fc51ca6f0e59d748a2fe93e324a374
2019-02-14 16:25:58 +09:00
fb24dcea2e unhandled_page_fault: Refactor architecture dependent parts
Fujitsu: REQ-12
Refs: #1012
Change-Id: I3c61f9cd3f514bdcd4a7f26e7c15043529269cf5
2019-02-14 16:25:57 +09:00
207d653b41 mcctrl: use vmf_insert_pfn for kernel >= 4.18
vmf_insert_pfn got added as a wrapper around vm_insert_pfn in 4.17
1c8f422059ae5da ("mm: change return type to vm_fault_t") and totally
replaced the later in 4.20 ae2b01f37044c ("mm: remove vm_insert_pfn()")

Compare with 4.18 here specifically to avoid troubles when rhel
backports this change later, and avoid adding a rhel version check down
the road.

Change-Id: Ibf108e2fb6f1199f89cde6a7973f4eb55447260b
2019-02-14 16:25:49 +09:00
0a49b6eca5 Add test programs for #1190
Change-Id: Icb63e898d5882e1fab18e6af7859af50448a1d60
2019-02-14 16:25:44 +09:00
950ea678dd Reject "setfsuid: Specify mcexec tid when asking mcexec for fsuid"
This fix is rejected because it only makes the setfsuid test in ostest
pass and doesn't fix the other issues including the one in which file
I/O could be done with the old fsuid because an mcexec thread with an
arbitrary tid could handle the system-call offload request.

Explanation of the rejected fix:

  setfsuid() proceeds as follows:

  1. McKernel asks mcexec for __NR_setfsuid (set)
  2. mcexec calls setfsuid, reports the id to McKernel
  3. McKernel asks mcexec for __NR_setfsuid (get)
  4. mcexec calls mcexec_getcred(), reports the id to Mckernel
  5. McKernel sets proc->fsuid to the obtained value

  tid of mcexec on the 2nd and 4th step could be different. So this
  fix lets mcexec report its tid on the 2nd step and McKernel specify
  it in the 3rd step.

Change-Id: Id5cfeed18c64430d576a56e961bbca1ecb2e39ad
Fujitsu: POSTK_DEBUG_TEMP_FIX_45
2019-02-14 04:42:32 +00:00
cd42d186b7 uti: Report error of offloading ioctl if any
Change-Id: If4218b9fb89f34728c4aaf81bccab2dfbb0d4a87
2019-02-14 04:15:44 +00:00
66bc44f88a Readme.md: move figures to R-CCS server
Change-Id: I6a861c15402c8e925e3692b912a8df3f6f0ffce9
2019-02-13 18:26:18 +09:00
34a995d290 perfctr_stop: add flags to no 'disable_intens'
The original fujitsu code added a whole new ihk_mc_perfctr_stop_first
function, duplicating a lot of code - add a flag to existing function
instead.

Change-Id: Ic9ce0236d68f967ff72cf88e5d9f1bda5c98aa1b
Fujitsu: POSTK_DEBUG_ARCH_DEP_107
2019-02-12 05:18:22 +00:00
d0d99adfb3 Readme.md for github
Change-Id: Ib5aa5cde10acb5f5956212f8c451baedc940d123
2019-02-12 02:37:09 +00:00
d78883c692 fix to missing exclusive processing between terminate() and
finalize_process().

The process of making a child process zombie and the process of setting
the parent of the child process to process ID 1 are excluded.

Refs: #1257
Change-Id: Ic95d4d8ee92d6a4a63847e5eda20ec1ba92566ac
2019-02-08 10:25:20 +09:00
ff0395581c Register PPD and release_handler at the same time.
Fix that process will remain even if signal is received between PPD
registration and release_handler registration.

Refs: #1201
Fujitsu: POSTK_DEBUG_TEMP_FIX_64
Change-Id: I571781963578df8cedb327f19298f595cfb137a3
2019-02-08 10:20:58 +09:00
f5023c9730 page fault handler: protect thread accesses
current cpu's thread can be NULL during init, we don't want null derefs
in the page fault handler

Change-Id: I0a2c22b39cae2c258d211317cffc2408e19f3bbf
2019-02-07 02:41:50 +00:00
fe08ac4a67 arm: turn off cpu on panic
Since interrupts are disabled on panic, linux cannot reset a
panic'd core when NMI are disabled (for e.g. mcreboot/mcstop)

Just always offline it, so linux can get it back

Change-Id: If8107172375f2924e02bd4c36e24645ec38a8999
2019-02-07 02:37:31 +00:00
60dcd0e798 move rusage into kernel ELF image (avoid dynamic alloc before NUMA init)
Change-Id: I7fe86244c8707694b379e567b31de65ee2c56887
2019-02-07 10:43:47 +09:00
4d215de641 Separate mmap area from program loading (relocation) area
We need to separate the two because the heap of a PIE is created in
the area to which it is mapped.

Related commits:

b1309a5d: PIE is mapped at map_end instead of at
          user_start
c4219655: Interpreter is mapped to map_start to make a
          system call that dereferences a NULL pointer fail

[dominique.martinet@cea.fr: Also add ULONG_MAX and friend macroes,
 used for data_min]
[ken.sato.ty@hitachi-solutions.com: fix execve]
Change-Id: I8ecaf22b7965090ab67bebece57c68283ba23664
2019-02-07 09:58:03 +09:00
97e0219f50 Make Linux handler run when mmap to procfs.
Change-Id: I98a3d098c5c676f33c83fa4354c623988ee591f2
Refs: #1222
2019-02-06 11:54:50 +00:00
f9d8d98af1 sysfs: add missing symlinks for cpu/node
Add the following patterns of symlinks:
 - /sys/bus/cpu/drivers/processor/cpu*
 - /sys/bus/node/devices/node*

And slightly change how /sys/devices/system/cpu/cpu*/node* are created
to avoid duplicate lookups

Change-Id: Id94a4d157da06d75f6bd450d5bd9a9e7709a1414
2019-02-06 09:55:54 +00:00
3738b70ad3 git hooks: fix submodule check sloppy match
Submodule check used to match any file containing submodule name (e.g.
lib/include/ihk/foo would match ihk and incorrectly be identified as
a submodule change) -- properly check for full name with anchors instead

Change-Id: Ib4330aec97e9da713cd3ab9e791962f2e0c8d396
2019-02-06 08:34:27 +00:00
9bf225d193 mckernel overlay: replace mcoverlayfs with a soft userspace overlay
mcoverlayfs has a high maintenance burden and does not work on rhel8's 4.18
kernel (while it works on vanilla 4.18...); instead of debugging this further
time is better spent making it independent from overlayfs.

Change-Id: I7454ae95b0fbb3373c256aa2fd83cdfec466c009
2019-02-06 08:27:25 +00:00
6fc9ec1c92 gencore: finish reintegration into arch-independent code
Change-Id: Ic2fc935aeec17c54931817bf43f67ef6da78adc8
Fujitsu: POSTK_DEBUG_ARCH_DEP_18
2019-02-06 17:23:54 +09:00
112ade484a page_table: Fix return value of lookup_pte when ptl4 is blank
Change-Id: I5926fedda182941a4b7a2fe480bffb12d4069713
2019-02-06 07:30:44 +00:00
be708674d3 Reject "do_migrate: Send IPI"
Change-Id: If77a51c9bc6a3caef502dd35a276b0dba22b4d24
Fujitsu: POSTK_TEMP_FIX_57
2019-02-06 04:11:16 +00:00
557f33a705 eliminate futex_cmpxchg_enabled check (not used and dereffed a NULL pointer)
Change-Id: I97b0e79acfd51b57eeaa6556eba880d231330f01
2019-02-06 02:47:31 +00:00
7dd0cbd9a6 ARM: eliminate zero page mapping (i.e, init_low_area())
Change-Id: I89bcce7fb286a4c5983a768534a0d3cea093040c
2019-02-04 04:22:24 +00:00
6ed2e5ffc1 Fix ThunderX2 write-combined PTE flag insanity
Change-Id: I59999a680b556acf3e22ac516f4758e3aee7f355
2019-02-01 21:03:19 +09:00
649059f2d2 contiguous PTE: Fix requested page-shift check
Change-Id: Iafc505457f7e10c94142070113870cd8b8c6922d
2019-02-01 21:01:27 +09:00
312c1168f3 test: XPMEM: Fix Makefile
Change-Id: If7b5887e9dc4d7f94bf18dc5ae95a549baa5fb58
2019-02-01 15:15:47 +09:00
d29419d336 test: Add test programs for #1242
Change-Id: Ib3b5d5b661e0cd027711a815d9da2e308cedeffc
Refs: #1242
2019-02-01 15:15:46 +09:00
9f7425c152 Add test programs for lage page
Tests arm64 specific, contiguous bit based large pages as well.

Change-Id: I09edad8cfde6c23a259f1f32cfc97974d9cb63c3
2019-02-01 15:15:44 +09:00
100754f556 test: add uti tests
Change-Id: Ib59f1c4dab7cec7e67ba35ec1988f6f968a2deaa
2019-02-01 15:15:14 +09:00
6d38c34993 Merge branch 'postk_topic-contiguous_pte' into development
* Merge cd7ab307fae9bc8aa49d23b32becf37368a1603e
* Merge commit is changed to one commit for gerrit

Change-Id: I75f0f4cf6b8b3286284638ac2c7816c5257551e4
2019-02-01 15:15:12 +09:00
7f1c17fc4c tests: add 'postk_master' branch tests
Change-Id: Ie0d4cfd0921aed89d2db6083c9eb068b1cfc1984
2019-02-01 15:15:00 +09:00
25ef4e9261 Merge branch 'postk_master' into development
* Merge 53e436ae7db1ed457692dbe16ccb15511aa6bc64
* Only arm64 stuff are left

Change-Id: I6b79de1f659fa61e75f44811b639d41f9a37d6cc
2019-02-01 15:14:58 +09:00
d4d78e9c61 Following arm64-support to development branch
This includes the following fixes:
* fix build of arch/arm64/kernel/vdso

Change-Id: I73b05034d29f7f8731ac17f9736edbba4fb2c639
2019-02-01 15:14:45 +09:00
e52d748744 new_mcos_handler_info: Propagate kmalloc failure
Change-Id: If484cf32cd0bf096ffd712561dd1f73046c60cd8
Fujitsu: POSTK_TEMP_FIX_64
2019-02-01 15:11:36 +09:00
39b21e7ba9 monitor_init: Use ihk_mc_cpu_info()
Its call site is moved before numa_init() as well because
monitor_init() defines ihk_os_monitor that was used in
rusage_total_memory_add() called from numa_init().
I didn't revert this modification because I don't want to touch the
working code.

Change-Id: I602467284581ce45989dd071cfe59d3fc4827e29
Fujitsu: POSTK_DEBUG_TEMP_FIX_73
2019-02-01 15:11:33 +09:00
8db2d3beec sysfs: use nr_cpu_ids for cpumasks (fixes libnuma parsing error on ARM)
Change-Id: I466ffbaf38fe5fd2b1ca0439fa7ea4a813e226ca
2019-02-01 15:08:49 +09:00
f5320fc2b4 overlayfs: make mcoverlayfs compile for 4.14.0-115 (el7 arm64)
Use the 4.18 module as a base

Change-Id: I6c9ef66399800828e1932573da5a97573545c5da
2019-02-01 15:08:47 +09:00
0fbdcc44b9 mcoverlayfs 4.18: re-define ovl_readlink
Apparently /proc needs it; it's normally implemented using get_link if
readlink isn't implemented but proc's get_link crashes the kernel in
this case (because nameidata is only defined for open* paths)

Change-Id: I1864d6c948db879d33ea29b1b281bf84ff8eeec6
2019-02-01 15:08:45 +09:00
351fdead3b kmalloc: Fix address order in free list
The order is expected by the merger.

Change-Id: I54338caaaa1a203ab5dd39a574a25aac324142a5
Fujitsu: POSTK_TEMP_FIX_46
2019-02-01 06:07:26 +00:00
859e976348 kernel/syscall.c: cleanup? pass virt_to_phys directly to do_futex
Change-Id: I196ebe5d5cdc577fce442bcd2247d07e85d2b9ff
2019-02-01 13:19:02 +09:00
49353e252b Added check of nohost to terminate_host().
Change-Id: I796a0d98b68783dad6ce04b3a80ca01db8f8eee2
Fujitsu: POSTK_DEBUG_TEMP_FIX_103
2019-02-01 13:19:00 +09:00
452d93f14d mcctrl_clear_pte_range: fix zap_page for kernel >= 4.18
zap_vma_ptes no longer returns an error code as of Linux's
27d036e33237e4 ("mm: Remove return value of zap_vma_ptes()"),
where they decided nobody is interested in it....

Just copy the check out of the function.

Change-Id: I2eda0f91ec55a34bba96f45cc3d887bc80132a82
Originally-by: Kagawa Kodai <fj1731iw@aa.jp.fujitsu.com>
2019-02-01 13:18:58 +09:00
9e5472bb94 Fix for PAGE_SIZE / PAGE_MASK magic number.
Change-Id: Icc00594d84a33495af774096ae13f830e29be39f
Fujitsu: POSTK_DEBUG_ARCH_DEP_116
2019-02-01 13:18:56 +09:00
516ab87ab9 Copyrights: fujitsu 2018 bump
Separate copyright bumps in a different commit.
A lot of files only had the copyright change at this point; these
were probably changes I added separatly in other patches but just
split these in a different commit instead to simplify git stats

Change-Id: I93cf3fc1c0fa04ee743a79c3fe9768933e6bd0d2
2019-02-01 13:18:52 +09:00
a9884453e2 vmcore2mckdump: make arm-compatible, 'fix' timeout
Change-Id: Icdb42ff47d9dff5c6a818cb8c9ae94d183b19569
Fujitsu: POSTK_DEBUG_ARCH_DEP_93
Fujitsu: POSTK_DEBUG_ARCH_DEP_102
2019-02-01 13:18:12 +09:00
0f01312040 configure.ac: remove duplicate executer/user/arch/x86_64/Makefile
Change-Id: I6b4b8e636f0194e390871600d6502d3cc94f042b
2019-02-01 13:18:10 +09:00
fb9832af6d perf counters: add arch-specific perf counters
arch perf counters are placed at start, so offset all
other counters (because placing arch perf counters at the end
wouldn't have been intrusive enough?)

Change-Id: Ifab1047872384927d9cfa0a0212327ee73545c29
Fujitsu: POSTK_DEBUG_ARCH_DEP_86
2019-02-01 13:18:09 +09:00
0e895478a1 mcctrl rus_mmap: make vma->vm_flags arch-dependent
[Dominique: renamed arch_vm_flags to arch_rus_vm_flags]
Change-Id: I5ec89b3ff80af6bf0ede342eb5816df8c78de348
Fujitsu: POSTK_DEBUG_ARCH_DEP_100
2019-02-01 13:18:07 +09:00
19659aa908 mcctrl: move translate_rva_to_rpa to archdep
Change-Id: I0efa51468a7ff4d776d8340a612e6f44eac2ed53
Fujitsu: POSTK_DEBUG_ARCH_DEP_83
2019-02-01 13:18:06 +09:00
e5de0b81ca ldump2mcdump: move PAGE_SHIFT to arch-dependent includes
Change-Id: I42e49db87e375f2dc094926e21dfc00e50484855
Fujitsu: POSTK_DEBUG_ARCH_DEP_94
2019-02-01 13:18:04 +09:00
f299fff266 stack: add hwcap auxval
Fix the AUXV_LEN to account for hwcap and remove the ifdefs

Change-Id: I303fc2c5fa4c8cea7ec9823f8580b8a66de2f58f
Fujitsu: POSTK_DEBUG_ARCH_DEP_65
2019-02-01 13:17:58 +09:00
206df33658 perfctr: remove ihk_mc_perfctr_fixed_init from api
ihk_mc_perfctr_fixed_init is only used on x86

Change-Id: I6f25d4237d45b4455ccdaae03b850dd9e8edcc57
Fujitsu: POSTK_DEBUG_TEMP_FIX_31
2019-02-01 13:17:52 +09:00
ad8a3ae962 vsnprintf: reject POSTK_DEBUG_TEMP_FIX_28 return value fix
Change-Id: I23beeca094e1b0ee84211f3ed4c33ef7e2aa62c2
2019-02-01 13:16:45 +09:00
3c1fd54a92 kernel/mem: remove unused page_table struct
Change-Id: I3593bc08206d07d7c07421240f08ac3539ddc81d
Fujitsu: POSTK_DEBUG_ARCH_DEP_89
2019-02-01 13:16:42 +09:00
ca34154a43 mcexec: lookup page_size with sysconf
page size is not defiend in sys/user.h on aarch64

Change-Id: Idbdaef2519792eeb1e1a2794be0a34d67e87907e
Fujitsu: POSTK_DEBUG_ARCH_DEP_35
2019-02-01 13:16:40 +09:00
a10f4b861c do_pageout: fix direct kernel-user access
Change-Id: Ie02faca93fdb0d52d72e1f2aa1384a214c84ebff
Fujitsu: POSTK_DEBUG_ARCH_DEP_46
2019-02-01 13:16:32 +09:00
36d473c5b5 pager linux_open/unlink: always use openat/unlinkat
some archs do not have the simple open/unlink variants, while the *at
is always available -- this is simpler than making these arch-dependent
functions

Change-Id: Ic16ae5683e6e375210b1744538d291585e67a2fa
Fujitsu: POSTK_DEBUG_ARCH_DEP_78
2019-02-01 13:16:30 +09:00
342a2e1287 x86 syscalls: add a bunch of XXat() delegated syscalls
at least funlinkat is needed because these macros define __NR_x for mckernel
side and we will use funlinkat in a later commit

Change-Id: I6b6a2eee11e2fa1e42f97eab4b67e1128cd83ddf
2019-02-01 13:16:29 +09:00
238f563e88 perf: add arch-dependent counter_mask_check function
A later version would probably want to check some mask for arm64...

Change-Id: I67e13a852c3ed406fbf8ae1688539b9e069c0e81
Fujitsu: POSTK_DEBUG_ARCH_DEP_87
2019-02-01 13:16:28 +09:00
03cadbcba2 perf: add arch-dependent get_num_counters function
Change-Id: I2230af87e0c764d97115e833dccb1842946c1b94
Fujitsu: POSTK_DEBUG_ARCH_DEP_109
2019-02-01 13:16:28 +09:00
2b254f02f8 init_process_stack: change premapped stack size based on arch
Avoid consuming a large 512MB page on 64K base page arch

Change-Id: Ice491d43fd998b375ddc24f4eff7faf5d36d9f42
Fujitsu: POSTK_DEBUG_ARCH_DEP_104
2019-02-01 13:16:27 +09:00
960a6f5f90 prepare process: add magic header in program_load_desc
Check we mapped the correct region with a magic header in the struct

Original commit: d246b93a3bced92d0ac2a4a337118091b010658a

Fujitsu: POSTK_DEBUG_TEMP_FIX_76
Change-Id: If848be64af5d76844ba65b48493021637c8114f4
2019-02-01 13:16:25 +09:00
0cc3120a01 freeze(): add cpu_pause() to the frozen state loop
I guess cpu_halt is not enough on arm?... I don't get it.

Change-Id: Ic67113ae474e5b3af91734d763f1498a19f6a948
Fujitsu: POSTK_DEBUG_ARCH_DEP_82
2019-02-01 13:16:23 +09:00
9f31abf402 monitor_init: fix undetected hang on highest numbered core
Original commit: 7d38ead4f ("Fix for bug#99 Change setting value for
monitor->num_processors.")

Change-Id: I437c957fa319c014316a6064cc660e337668bb88
2019-01-29 09:32:25 +09:00
dfd23c3ebe prctl: Add support for PR_SET_THP_DISABLE and PR_GET_THP_DISABLE
Change-Id: I04c5568a9eb78bcac632b734f34bba49cf602c4d
Refs: #1181
2019-01-22 05:40:56 +00:00
eb184419ea shmget: Use transparent huge pages when page size isn't specified
Refs: #1241
Change-Id: Ia111bfeb67d224ad1ab77e5193eac7b7d14a6577
2019-01-22 05:40:56 +00:00
13e29c0da5 mcoverlayfs: fix disabled build
Change-Id: Ia40853432547084329fc034e3942e51954e1ddf5
2019-01-22 02:15:43 +00:00
8aaf0f8551 test: Add test programs for #1166
refs: #1166
Change-Id: I9b6dd8628e8a3dcb2281e31f4b8d116e9c7852d8
2019-01-08 15:15:34 +09:00
ef9fda23a9 mcexec: Set default heap extension amount to sysconf(_SC_PAGESIZE)
Change-Id: I3ac660d33918c1fa28093ab59f3a7ead65d337d7
2018-12-12 00:38:10 +00:00
cd5cb469eb Fix "Test "Error handling improvement" on arm64"
Change-Id: Ie3c835dfe65a9754628ca221f3f563b67b0eb1a0
Refs: #727
Refs: #873
Refs: #1011
Refs: #1232
Refs: #1233
2018-12-10 19:58:15 +09:00
7a8f5043c5 mcstat: Fix test description
Change-Id: I942b351146cabd259eb164b73375a547d0fd0c30
2018-12-10 09:27:28 +00:00
cf6514def9 test: Add descriptions to "user_space" test
Change-Id: Ic14ddbfbf6bfc12d40d3284ec08e040597356963
2018-12-10 12:59:20 +09:00
96b6d773a9 ARMv8.2-LPA support
Change-Id: I12a6eac55af2e7f6a643e4e04ed59a85769f4063
2018-12-07 17:41:50 +09:00
4ba4bbd711 ContiguousPTE[12/12] modify sys_shmget/sys_mmap
Change-Id: Icfbe9fbfa6216735ec20c55da95e5b62a25fdfea
2018-12-07 08:27:51 +00:00
410bf13367 ContiguousPTE[11/12] modify ihk_mc_pt_virt_to_pagemap
Change-Id: Iff0c77cdd08a76b55c2635c6b0163ef2caade71d
2018-12-07 08:24:22 +00:00
7c231928ab ContiguousPTE[10/12] modify split_largepage
Change-Id: I0a8385af9709b11d7917eb34e8612413fefe6931
2018-12-07 08:22:56 +00:00
50de3820ad ContiguousPTE[9/12] modify ihk_mc_pt_clear|free_range
Change-Id: I75d821b81d351f4fdfd504c791543db174634261
2018-12-07 08:21:44 +00:00
c4e5bf6d6b ContiguousPTE[8/12] modify page_fault_process_memory_range
Change-Id: I79ecd08cf83aeacd3e20a7720bad66ef19573402
2018-12-07 08:17:08 +00:00
c319fe08a4 ContiguousPTE[7/12] modify ihk_mc_pt_set_range
Change-Id: Ib38530ce64a01f21107e0a6a73de7c54f214eb5a
2018-12-07 08:12:44 +00:00
24d3da32ed ContiguousPTE[6/12] modify arch_get_smaller_page_size
Change-Id: I4fe8c36cf9561b3ee895f29b112f0ac6f2418f5e
2018-12-07 08:00:32 +00:00
c4fbbb6027 ContiguousPTE[5/12] modify lookup_pte
Change-Id: Ie5aa625e5a13596ff8294699d10114aeba9d991d
2018-12-07 07:59:12 +00:00
0449437c15 ContiguousPTE[4/12] modify invalidate_process_memory_range
Change-Id: Ib59f4c5d78580a1c4344ac632d3d8f68355d7058
2018-12-07 07:56:28 +00:00
639d0e496b ContiguousPTE[3/12] modify move_pte_range
Change-Id: I20878c97bea768d1f09ab0580d744a58c070be2c
2018-12-07 07:54:28 +00:00
b6de164e9a ContiguousPTE[2/12] modify copy_user_pte
Change-Id: Ie696245a8c09e87c48426bc3e74a6f049a085471
2018-12-07 07:52:17 +00:00
d1b36aab62 ContiguousPTE[1/12] add page table access functions
Change-Id: I3291c170e66592c871f316d78d71248d26748501
2018-12-07 07:51:01 +00:00
8a2f4be443 Test "user_space" on arm64
Test: Architecture dependent separation of user space access code.
Add arm64 result files.

Change-Id: I651992c0c8bcd1da8313a35eda03612405b55b89
2018-12-07 07:46:09 +00:00
8a684587fa Fix "Test "Error handling improvement" on arm64"
* Fix test to make mcexec fail to fork()

Change-Id: I9a696787b5d4ce44541a4651622e5be60f9ef355
2018-12-07 07:40:14 +00:00
05c315857c Test "Add mcstat tool" on arm64
Change-Id: I4bf1260e999c16fe7b9c339af3833ea007277889
2018-12-07 06:24:18 +00:00
1422838dd1 sysfs-meminfo: Add page size consideration other than 4KiB.
Change-Id: I88e3aa6b9537dfff21c72b4a247fda24873216cb
2018-12-06 18:45:56 +09:00
c9fc110fc6 do_kill(): fix pids table when nr of threads is larger than num_processors
Change-Id: I0f0120c67a9b0df1cdf7d3fed34dd9c656fd317a
Refs: #1235
2018-12-05 08:17:05 +00:00
ed3c138e1f test: Fix user_space, process_vm_writev01 expected value file.
Fix to check only TPASS. (Delete pagesize)
  test: pvw_003, pvw_012

Change-Id: I4f9c3c42b855d419f3db457fbb5e7865da85eee8
2018-12-05 15:51:52 +09:00
60c97d0e60 Test "mbind support" on arm64
Add arm64 result files.

Change-Id: I32e8d4e1346076683e7d55e8e928d168e439eaca
2018-12-05 11:27:03 +09:00
95e90c727e Test "Error handling improvement" on arm64
The following test set:
  execve: fix memory leak
  add: NULL check for master_channel at IKC interrupt_handler.
  Fix the check routine for elf sections (Fujitsu: POSTK_TEMP_FIX_77)

Change-Id: I16c2a341c48f6df10a4839be08b93ea16bda8fbe
Refs: #727
Refs: #873
Refs: #1011
2018-12-05 02:01:29 +00:00
ec844bb6e3 Test "fix: Bug for getrusage" on arm64
The following test set:
  fix: Bug for getrusage return incorrect ru_maxrss
  fix: Bug for getrusage(RUSAGE_CHILDREN) return parent info (POSTK_DEBUG_TEMP_FIX_72)
  fix: Bug for getrusage often return incorrect ru_stime

Change-Id: I6734b1e34565d5d2715f9901a04ba5b6f0278032
Refs: #1032
Refs: #1033
Refs: #1034
2018-12-05 01:58:44 +00:00
a11d4d7a9d Test "mcexec_destroy_per_process_data: System calls delegation can not be terminated in error when the last process that closed /dev/mcos0 is a child process." on arm64
Change-Id: I6bc3023c1fa6089bc2ca6365b59bbab384b3e1d7
Refs: #882
2018-12-05 01:43:31 +00:00
0ee446923a Test "make sure to context-switch to idle thread when therad's status is PS_EXITED" on arm64
Change-Id: I757d529e49655e9010022f10414e4d6c9eb4c059
Refs: #1029
2018-12-05 01:21:48 +00:00
01b2a1d213 Tests: dust off x86_64 mem_dest_prev
Change-Id: I445ea0e8ae2cd631c775718a86a64fd2ecb90f35
Refs: #1228
2018-12-04 10:05:39 +00:00
52cd57fed2 memory/x86_64: fix linux safe_kernel_map
init_linux_kernel_mapping is called in setup_x86_phase1 way
before arguments are setup, but we can access kernel boot args
directly and use that, so ugly fix for now.

Change-Id: I285ecc31c6646d6d18566d411b09ae3190e8101e
Refs: #1228
2018-12-04 10:05:03 +00:00
bbc39480d2 Fix test programs for "execve: fix memory leak"
* Fix README

Change-Id: I90fe1fbb26569bbab5a34638b5f357d7000eda5d
Refs: #727
2018-12-04 10:02:42 +00:00
8521b98730 execve: Call preempt_enable() before error-exit
Fix "execve: fix execve with oversubscribing".

Change-Id: I4de3f5d44b1703db392f3da75196faa1e12d5845
Refs: #727
Refs: #1072
Refs: #1232
2018-12-04 09:43:19 +00:00
da02f76a25 mcexec: Fix error handling of init_worker_threads
Refs: #1233
Change-Id: Icce49c996d69b3cf64a71e7bd470421f329c881f
2018-12-04 09:40:24 +00:00
dbe5e99cf9 Fix test of "make sure to context-switch to idle thread when therad's status is PS_EXITED"
Change-Id: I62ea813656805b6250b0465853e8fa2918b0c86b
Refs: #1029
Refs: #1227
2018-12-04 08:17:54 +00:00
6b293409e5 mbind: Fix test programs
Refs: #1226
Change-Id: I12bf807812d93b7eca8f452e70e70e7c4e32f6a3
2018-12-04 08:17:13 +00:00
b94247c478 Test "signal: When the process receives a termination signal, it first terminates mcexec." on arm64
Change-Id: I1be32b991a45f0892146d93a9e6d6be9199faf59
Refs: #870
2018-12-04 05:07:32 +00:00
556a64ac5e Test "signal: When the process receives a termination signal, it first terminates mcexec." on arm64
Change-Id: I5c8ab90ffd5c5da30162d606f4d86dca9d387b5a
Refs: #863
2018-12-04 05:06:07 +00:00
3f11c1aee5 Test "Wait for LWK to run at shutdown." on arm64
Change-Id: I96785dda7a1a7eb36ceeb31401d71b4e40efb185
Refs: #898
Refs: #928
2018-12-03 20:06:37 +09:00
de70eac619 mcstat: Fix error propagation
Change-Id: Ib4a053d5b9ba5eb0d32c46be7c7fcd0be10cb97b
2018-11-30 14:29:14 +09:00
2ba3ec8a4c mcstat: Fix memory related stats
Refs: #1237
Change-Id: I0574cd71fe3b07aeda3ef981bd82d04ce5862f4f
2018-11-30 05:18:48 +00:00
394a1ef3c5 mcstat: Fix array of status strings
More error checks are added at the same time.

Refs: #1223
Change-Id: I406066a6ba0853584d6e1820dde74721ce2682dd
2018-11-30 14:05:21 +09:00
1954aec0ea perf_event_open: Propagate return value
Refs: #1236
Change-Id: I61a4683a533fb199a73a99bc7b2e6f2638212000
2018-11-30 04:10:54 +00:00
2b1b82b242 qlmpi: Refactor test programs
Change-Id: I3dd74eda1b77aea529f9cc044177b6c29185b6df
2018-11-29 10:33:11 +00:00
502463ed9e test: Fix user_space, testing use of copy_from_user / copy_to_user
Change-Id: I2caef1ba6597f693dc4f773ef8fedbd837c45ce6
2018-11-29 19:32:04 +09:00
715f67f32f mcreboot.sh: Fix error handling of BUILDID mismatch
Change-Id: I29d78c4739679e0b3229cc6fa28816f1ceee332c
2018-11-29 19:19:09 +09:00
82a57d5f55 test: Add MCK_DIR to mck_test_config.sample.in
Change-Id: I9ed1b0433fc6b8eeb1cb024be2d33263e3283ab7
2018-11-29 12:50:29 +09:00
56abe988f3 test: Fix user_space, testing use of copy_from_user / copy_to_user
Change-Id: I2caef1ba6597f693dc4f773ef8fedbd837c45ce6
2018-11-29 11:32:42 +09:00
68c581f721 test: Fix 898 and 928
1. Catch up with the interface change in
   ihk_os_destroy_pseudofs() and ihk_os_create_pseudofs()
2. Expect ihk_os_shutdown() to return zero when the OS had been shut
   down

Refs: #898
Refs: #928
Change-Id: Ic430550ebfd5cd21164eefaed155fe769adf8395
2018-11-28 02:19:37 +00:00
6ca5aaa1fc configure: Fix BUILDID (again)
The previous commit made BUILDID use git for submodule, but for complex
git setup (e.g. worktree) and older git version or dead .git 'link' it
would blindly rely on the existence of the .git file even if git does not
actually find anything.
This would lead to possibly empty BUILDID which would fail building.

Just always run the git command, and echo the version string if it failed

Change-Id: Ied268d2150a30dc1146498e15fa8394afc8a8d0d
2018-11-27 17:15:27 +09:00
b2a58ce3e3 Test "Confirm build ID of mcexec, ihk, mckernel" on arm64
Change-Id: Ia5fa6d6d062e8d845c7fedca1b6cc50fbeab1860
2018-11-27 08:12:28 +00:00
cfcf0137eb Test "Exclude areas not assigned to Mckernel from direct map of all phys." on arm64
Change-Id: Ida0d1f13f4a14c2ee219325aaa4b2cac1476c991
2018-11-27 05:29:15 +00:00
00395d68d4 Test "mcexec additional options (h, m, n, O, stack-premap)" on arm64
Change-Id: I85d5deb0433cc1208e4b6837dcc6d6dc2a7b7b52
2018-11-27 05:12:43 +00:00
dc1f96fee3 Add set_cputime() kernel to kernel case and mode enum.
Change-Id: Id4584389f39f255335d3bf7b5606f054f108ad51
Fujitsu: POSTK_DEBUG_TEMP_FIX_84
2018-11-27 05:03:39 +00:00
c585a37440 move mcoverlayfs kernel version check from mcexec.c to configure
While we are here:
 - fix uname -r (single quote?!)
 - add compat for rhel8 (el kernel and version is 4.18)
 - also remove linux version check in mcreboot.sh, trust configure check

Change-Id: I14726d4374b0dfd941640096044ea1d5d88bfcb8
2018-11-26 12:09:00 +00:00
98aa633856 add attribute converted flag
Change-Id: I215e42fa87752d16b8c9744b02d063098cba0af7
2018-11-22 06:04:34 +00:00
ddde519263 Test "rus_vm_fault: If page fault occurs in a thread that has not processed system call offloading, incorrectly return to normal." on arm64
Change-Id: I3dc98d8994228ad27cfdf9ca96a0a76e544bc947
Refs: #923
2018-11-22 05:27:56 +00:00
f240671fc8 Test "ptrace: support for attaching child_process to parent" on arm64
Change-Id: I752542b6bfbf023d22e91f909518660afbff813c
Refs: #885
2018-11-22 04:54:29 +00:00
cf113d392a Test "/proc/PID/maps support add" on arm64.
Change-Id: I0585ae6257b5c0269760dd7f23ba75b83dd7ac2c
2018-11-22 04:53:04 +00:00
9e57db5427 Test "sigaction: support for SA_RESETHAND on x86_64" on arm64
Change-Id: I6154134d53d1ee0344e4bc344f302ffaf810c618
Refs: #1031
2018-11-22 04:51:36 +00:00
739472bd86 Test "xpmem: support for fork()" on arm64
Change-Id: I12c628312157f35e239d3c5e67fa38adf156406b
Refs #925
2018-11-22 04:50:58 +00:00
136b749349 configure.ac: Fix BUILDID
Change-Id: Id9717422c3d5d2de51570d4672864dbd271ad0fc
2018-11-21 17:02:45 +09:00
ae9a1f39df ihk_ikc_recv: Record channel to packet for release
ihk_ikc_release_packet takes the channel and puts the packet into its
free-list.  This fix makes it easy and safe to identify the proper
channel.

Change-Id: I5584b1e8a3ed675c2f9d68f0b5ed331b909197f6
Fujitsu: POSTK_DEBUG_TEMP_FIX_89
2018-11-21 17:01:58 +09:00
10dc87dd3f mcreboot: check on SELinux
Change-Id: I2c3706c04c7977ec22407358232d7c3a21abdc14
2018-11-21 07:52:10 +00:00
724e0eb7d0 mbind(): Fix memory_range_lock deadlock.
Fixed the problem of "return error/goto out" while
locking the memory_range_lock in mbind().

Change-Id: I980a7a440f652b60379acae3cb3575211a749774
Fujitsu: POSTK_DEBUG_TEMP_FIX_100
2018-11-21 16:49:48 +09:00
04e0456232 set_mempolicy(): Add mode check.
Fix a problem that does not result in an error even
if MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES are
simultaneously specified in set_mempolicy() mode.

Change-Id: I06e695baf869daee8bc64179748cac27b64e914b
Fujitsu: POSTK_DEBUG_TEMP_FIX_99
2018-11-21 16:49:40 +09:00
6626204c99 set_cputime(): interrupt enable/disable fix.
Check interrupt enabled state in set_cputime() instead of enabling
them unconditionally on exit.

Change-Id: I99212855f33f5535f67f045665bf5e025c55b690
Fujitsu: POSTK_DEBUG_TEMP_FIX_98
2018-11-21 16:49:30 +09:00
190039f5d9 arch_cpu_read_write_register: error return fix.
Fixed an issue where errors generated in arch_cpu_read_write_register()
are not transmitted to the caller.

Change-Id: I05d7d872eab834918220cf18f628aee37208a156
Fujitsu: POSTK_DEBUG_TEMP_FIX_94
2018-11-21 16:49:21 +09:00
583cb94667 mcctrl: remove in-kernel calls to syscalls
Since 4.17.0, kernel cannot call syscalls directly because the calling
convention can be different on x86_64, as explained in this email:
https://lore.kernel.org/lkml/20180325162527.GA17492@light.dominikbrodowski.net

Use the ksys_* alternatives instead when possible, or for readlink use
do_readlinkat (and use readlinkat all the time to simplify ifdefs)

It might be possible to change some of these without ifdefs, but for
example ksys_unshare only got introduced in 4.17 so we need to keep some
syscall calling...

Change-Id: Ic47e184b29ef8b21731b2eae6193b0af2548b872
2018-11-21 16:42:26 +09:00
db4d19e419 Add crash utility extension
Change-Id: Ia3dadecdd4605c3ee74d1b5242f67486c675faa7
2018-11-21 07:40:00 +00:00
04c11f35e9 xpmem: Add xpmem_openat
In arm64, glibc-open of /dev/xpmem is hooked in sys_openat. This
commit adds xpmem_openat which is called by sys_openat.
This commit silently applies copy_from_user fix to sys_open as well.

Change-Id: I3b4f7bf0e152c359250bb2b56910db9192390cb1
Fujitsu: POSTK_DEBUG_ARCH_DEP_46, POSTK_DEBUG_ARCH_DEP_62
2018-11-21 07:39:56 +00:00
e12d5ed341 Expose McKernel version in /proc/mckernel
Change-Id: Ica0fbb0ff70a4ff2559e92738926279a3ae78a21
2018-11-21 07:39:54 +00:00
1253f4d18c mcexec shebang: delete spaces *before* path as well
Apparently, a shebang '#! /bin/sh' should work.
Will add some ostests for these...

Change-Id: Iab8ba8e3cc7e434c98742f71fe7db3c425f08278
2018-11-21 07:39:51 +00:00
527adedaa3 madvise: Add MADV_HUGEPAGE support
Since McKernel allocates hugepages by default, we could consider that
madvise call with MADV_HUGEPAGE is supported.

Change-Id: Ibdaa6f77416d029a1d17210773ef79539ba04b1c
2018-11-21 07:39:26 +00:00
525b90d028 flatten_string/process env: realign env and clear trailing bits
envs are stuck after args which are now possibly unaligned, and used
from a non-aligned pointer in prepare_process_ranges_args_envs (env)

The memory immediately after args/envs is copied anyway with memcpy_long,
so make sure the bits are initialized and realign env correctly

Fixes: 70e52faf36 ("flatten_strings: do not return unused trailing bits")
Change-Id: Ic747e947d151c0eea65dec36bc9c888cf6e0c394
2018-11-21 07:39:16 +00:00
38e68f358a Add kernel argument to turn on/off time sharing
Add "-T 0" to mcreboot.sh if you want to turn off time sharing.  When
it's turned off, McKernel doesn't activate interval timer when the
length of per-CPU run-queue is larger than one.

Change-Id: I2cedc1b30a9cd9a0f4608a32ecec0a0d58c6225e
2018-11-21 07:37:01 +00:00
7a3f4d7501 mcctrl rhel8 compat: remove unneeded RHEL_RELEASE_CODE check
it was meant for 3.10 kernels, so the regular < 4.0.0 check
will work for el7 and older kernels as well

Change-Id: I807f030f6303c9c3d17b0d80de55c256a3479486
2018-11-21 07:36:50 +00:00
1a5b10277f mcexec: load_elf: disable execvp for within-mckernel execs
the libc takes care of trying execve as many times as needed for
execvp, it's not a kernel call.

Also, sneak a double-free fix (desc was not reset properly in case
load_elf_desc_shebang failed)

Fixes: b1681f4a3affff ("mcexec/execve: fix shebangs handling")
Change-Id: If8e3d7ae53acdeffc0331ae8621e0832fcfa406f
2018-11-21 16:17:58 +09:00
a59c55c188 mcexec load_elf_desc: print error after returning
Running "mcexec dfsafds" did not print any message in normal use.
Rather than looking for which message shows in debug and turn in into
eprintf, add a single coherent message (more shell-like) at the end and
turn other messages off.

There is a small loss of information but this is equivalent to what
shells give (a single errno value with no details), and it is now easy
to add --debug to mcexec to see more information if required

Change-Id: Id2c3a47880b7d1d7467883351e6e7af561f91bbf
2018-11-21 16:17:58 +09:00
1d6a078afa mcexec: add --debug-mcexec
We already have debug statements compiled in, add a toggle for it
Also fix case indent for 's'

Change-Id: I1104ee57d571b82ec5e061f22cd44033a5c7fc39
2018-11-21 07:16:54 +00:00
fb98664f49 clone_thread: Add arch_clone_thread()
Fujitsu: POSTK_DEBUG_ARCH_DEP_23
Refs: #969
Change-Id: Ic15765b8c9e956c95fc50b333b01464d87450d3c
2018-11-21 07:10:01 +00:00
9db8d115d9 overlayfs: rhel8 compat for the 4.18 version
rhel8 is a 4.18 kernel but they've already backported some later fixes.
Instead of relying on the kernel version, the changes removed some defines so
we can check for the define presence to make the code more robust to kernel
version wilderness instead

Change-Id: I6cf5548a7b73a7394405daf850f715a1e20ab0b4
2018-11-21 16:06:31 +09:00
e26e693e58 mcoverlayfs: update and compile new overlayfs for 4.18 kernels
This newer version is much simpler than the old ones:
 - the options are noop, this lets the code simplify all the allocating
of a new option struct and passing it around
 - ovl_reset_ovl_entry was added and called all the time, but the
mechanism that made this required is gone in this kernel version

On the other hand, one new thing in this version:
 - newer kernel check the stacking depth of filesystems now, and we are
reaching the default limit of two with our setup. Bump it to three here.

Also, while we are here, make make fail if requested directory does not
exist, instead of infinitely recurse into make modules in the mcoverlayfs
directory...

Change-Id: I45050d693a0aa6fd3027deaf417c29876ef6a1ea
2018-11-21 16:06:31 +09:00
fc2775c932 mcoverlayfs: add new base from 4.18.14
This just lays out new files so the next commit is easier to review;
nothing changes here

Change-Id: I66669877d2d10632f5436c0eeb32248cd4c8b996
2018-11-21 16:06:31 +09:00
6581f9b4b2 mcctrl syscall: compat for newer zap_vma_ptes
newer version of this function no longer return an error on the basis
that "no-one checks what it returns anyway"........

See linux 4.18's 27d036e33237e ("mm: Remove return value of zap_vma_ptes()")

Change-Id: I8fb9f060e3e145cc2db21738585c9ee7f1445f74
2018-11-21 16:06:31 +09:00
3a90521489 mcexec: fix strncat bounding
strncat must not look at the appendee's length, but at how much
is left where we're appending.
This API is stupid anyway, where is strlcat when we need it...

Change-Id: Icdf418083146420a06f8ba5ffdf882982610d39b
2018-11-21 16:06:31 +09:00
03802052ed mcctrl: add handling for one more level of page tables
newer linux got a 5 level page table now, try to handle that.

Some of the macros will be no-op (e.g. loop only on one iteration) on
architecture/kernels with only 4 levels but the code needs to be there
to compile

Change-Id: Ifc6304cbb066dce7d4e30962687ae05d7e034730
2018-11-21 07:03:24 +00:00
c21485d427 mcctrl: include linux/cred.h
The headers defines __task_cred and other macroes we use, and always
existed; we must have gotten it indirectly on older kernels, it doesn't
hurt to always include

Change-Id: Iacfff0365e7a21e6247eea42606bbbf1dfccc077
2018-11-21 06:38:08 +00:00
18d50e48dc mcctrl: lookup for alternate syscall names
on newer x64 kernels (config option?), syscalls can be renamed to allow
both x64 and ia32 versions to coexist. Lookup either names

Change-Id: I2f55cc804d3eee948ee1ed6d18c69c75bd2f652c
2018-11-21 06:38:08 +00:00
a2be475ae4 mcctrl control: replace cpu_isset by cpumask_test_cpu for new kernels
Change-Id: I60635118e5ce7281de97e024c626ac40d1a4aa36
Fujitsu: POSTK_DEBUG_ARCH_DEP_54
2018-11-21 06:38:08 +00:00
38f683d1d0 mcctrl control: task start_time changed to u64 nsec
Change-Id: I1128c20cf836d20b6e84d7ec58cf8dfb075297da
Fujitsu: POSTK_DEBUG_ARCH_DEP_74
2018-11-21 06:38:08 +00:00
59828db5c9 mcctrl archdeps: rename vdso_image_64 to _vdso_image_64
The symbol appears in some header in some linux version,
it's still not exported so we need our own lookup anyway; just rename it.

Change-Id: Ia4bce85988641c96fa3f5a0ae1d42c25c713b6c2
2018-11-21 06:38:08 +00:00
1a3c73468f shmobj: Fix rusage counting for large page
Fujitsu: POSTK_DEBUG_TEMP_FIX_88
Change-Id: I852fe804bddf6da5b93a2ac72b0461ee63c98d46
2018-11-21 04:51:57 +00:00
85c936a6cb mcexec: fix terminating zero after readlink()
Change-Id: Icb5432f157ceb2182d93e2d327cfa63ad02a8c0e
2018-11-08 17:01:22 +09:00
6f9fef2b13 procfs: Make /proc/<PID>/mem unwritable
refs: #1177
Change-Id: Ibb319221155547febf9126e05a9e322bd9f140cc
2018-10-26 08:58:31 +00:00
cc1d39e55d mcctrl_perf_enable: Fix type of integer constant
Change-Id: Ib98eca85a9962520dafdd08b8fc223a6a83bafd0
2018-10-24 14:56:26 +09:00
fd8bed670e ihk_os_setperfevent: Return number of registered events
In addition to that, mcctrl_perf_set is modified so that it updates
usrdata->perf_event_num with number of registered events.

Change-Id: I3f343176f55b06d3baab0b0fe34e240f39706cf6
Fujitsu: POSTK_DEBUG_TEMP_FIX_80
2018-10-24 06:16:41 +00:00
24a3b236a0 Update .gitmodules to point IHK at github
Change-Id: I712f4cf2fb012d2b268f0881a156268024df57b9
2018-10-24 11:20:13 +09:00
27e55b8cf1 mcreboot.sh: Fix error reporting for missing argment
Change-Id: I3af99d7a117d4401c2e0a143fa74513094a53302
2018-10-18 12:06:58 +09:00
70e52faf36 flatten_strings: do not return unused trailing bits
Trailing bits were displayed in proc->saved_cmdline, displaying
uninitialized data to the user in /proc/<pid>/cmdline

Change-Id: I74831c8c68dd2f2197b35e9b49aaaae29c4c1dd5
2018-10-15 08:35:50 +00:00
8db36c3828 mcexec: do not resolve links in lookup_exec_path
This would incorrectly make "mcexec sh -c './script.sh'" run with
/bin/bash instead of /bin/sh (which is important, because bash behaviour
changes depending on how it is invoked)

Change-Id: I80610cf442c6c3ecacfa23e8ed15652bc8d4e3f7
2018-10-15 08:35:41 +00:00
06dd71a7e0 Revert "procfs: add '/proc/pid/stat' to mckernel side and fix its comm"
This reverts commit b70d470e20.

That commit had been landed too fast after a mistake during migration
from old to new gerrit that didn't keep -1 vote ; it needs some fix

Change-Id: Ifc8a23e42449dfe471049270b4706e9b137e096e
2018-10-12 10:54:14 +09:00
01fe83dcb3 do_mmap: change addr to uintptr_t
Change-Id: I7df45e125387083aef7e62b046c20b7422f60f22
2018-10-11 09:24:23 +00:00
c86d168165 procfs: handle 'comm' on mckernel side
Change-Id: Ie68514ba3e5161b931b88eeee9e8a2267ee69354
2018-10-11 09:19:42 +00:00
a032dc3d1b procfs: use length from snprintf instead of recomputing
Change-Id: I75ba4cf5c2e94798d183728c11bb34032cdddf5a
2018-10-11 09:17:58 +00:00
201fa7fb55 fork: copy saved_cmdline from parent process
This fixes empty children names for forked children.

Change-Id: I9512f0981d2a241c106ee3e8500f2084ef61a660
2018-10-11 09:14:14 +00:00
dd676f7149 saved_cmdline: only allocated necessary space
Change-Id: Ibb3fe66b46485a28c15e45dca9213f42f5afaa1c
2018-10-11 09:13:15 +00:00
a751e96b1a Add mck_num_processors symbol pointing to num_processors
the 'num_processors' symbol is also used by linux, so trying to load all
symbols from linux and mckernel at the same time renders either symbol
inaccessible (the first to be seen is kept by default).

This provides an alternate name for the mckernel symbol, thus letting us
access both more easily if required.

Change-Id: I8074d4f9f9ac45717df9a8df16be710ff762e161
2018-10-11 09:12:04 +00:00
c3bfa3f6a9 move BUG_ON, panic and kprintf define to debug.h; add BUILD_BUG_ON
these functions are more logical to keep together there as they depend
on each other.

Also add a comment about the __printf attribute, if we have a quiet
period it would be useful to enable and clear the thousands of
warnings...

Change-Id: I47d3891c9cd87da28b2883c29384959f5abd1459
2018-10-11 09:03:53 +00:00
1e1fa4f70d trivial warnings fixes (unused variable/function)
Change-Id: I71cedd2c09eeb5d2c2fd2e988dfdde0877627abc
2018-10-11 09:03:53 +00:00
39f9d7fdff Handle hugetlbfs file mapping
Hugetlbfs file mappings are handled differently than regular files:
 - pager_req_create will tell us the file is in a hugetlbfs
 - allocate memory upfront, we need to fail if not enough memory
 - the memory needs to be given again if another process maps the same
   file

This implementation still has some hacks, in particular, the memory
needs to be freed when all mappings are done and the file has been
deleted/closed by all processes.
We cannot know when the file is closed/unlinked easily, so clean up
memory when all processes have exited.

To test, install libhugetlbfs and link a program with the additional
LDFLAGS += -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align

Then run with HUGETLB_ELFMAP=RW set, you can check this works with
HUGETLB_DEBUG=1 HUGETLB_VERBOSE=2

Change-Id: I327920ff06efd82e91b319b27319f41912169af1
2018-10-11 08:54:13 +00:00
3e3ccf377c compiler.h: add READ_ONCE/WRITE_ONCE macro
These macros are needed to make sure the compiler does not optimize away
atomic constructs such as "while (!READ_ONCE(foo))" loops that do not
modify foo within the loop

Also move the barrier() define where it belongs while we are here, it is
needed for READ_ONCE/WRITE_ONCE and including ihk/cpu.h here causes
include loops

Change-Id: Ia533a849ed674719ccbc0495be47d22a3c47b8f8
2018-10-11 08:54:13 +00:00
13e71ac9dc pager: minor cleanups
- remove unused MF_END (that only makes sense for enums without holes,
  this one is a set of bits masks)
- remove useless goto in pager_req_create()
- init maxprot to 0 from the start, it's not used in the error cases
  (except for debug print)

Change-Id: Ic56c0754824b99f8a7e45fa8e99b8fe3e7c7e592
2018-10-11 08:54:13 +00:00
b1681f4a3a mcexec/execve: fix shebangs handling
There were mainly two problems with shebangs:
 - Suffix arguments handling e.g. '#!/bin/sh -x'
 - Recursive handling e.g. script1 fetchs '#!/path/to/script2'
and script2 itself has a shebang
 - (did I say two?) running shebang would replace argv[optind] instead
of appending e.g. script with '#!/bin/sh' and running './script -c'
would run '/bin/sh -c' instead of '/bin/sh ./script -c'

There also are two places where this needs parsing:
 - starting a fresh program from mcexec
 - starting a new program from execve in mcexec

The first was easy to fix as we already had argv around, but the later
required a new way to transfer the 'new argv elements from the script'
to mckernel to append before its argv -- it used to be 'desc->shell_path'
but that was no longer used at some point and just one keyword is not
enough to handle this properly.

This commit does:
 - Refactors the lookup_path + load_elf_desc that was only done at most
twice in its own function that loops indefinitely and use that in both
situations described above
 - Transmits the argv addition in the transfer to mckernel after the
desc; mckernel allocates 4 pages (hardcoded) for the descs and we will
hopefully have room for the script arguments on top of that... (there is
no guard!!!)
 - Change flatten_strings to allow prepending a flattened string instead
of a single string.
Note that the flatten_string change also brought in a difference in the
format, to have the full length embedded within the string, the latest
slot that used to be zeroes now contains the position of the end of the
buffer (where the last+1 string would be if there had been one)
This required a trivial change in mckernel prepare args function that
used this property for no real reason.

Hopefully things work™, this probably warrants adding a couple of new
ostests...
 - create a couple of scripts with recursive invocation/arguments and
check their own argv.
 - execute "mcexec script args" and "mcexec sh -c 'script args'"

Change-Id: I2cf9cde5c07c9293f730de89c9731bd93dbfa789
Refs: #1115
2018-10-04 14:31:02 +09:00
1226e692d9 mcstat: Install mcstat.1
Change-Id: Id5af2f56ef9cc9c444bfc0500190f52ffc779936
2018-10-04 02:52:18 +00:00
73ea4b1ce9 ihk_os_getperfevent,setperfevent: Return -ETIME when IKC timeouts
Change the return value from -EINVAL to -ETIME.

Refs: #1167
Change-Id: I87fa57bb45d0036b7e4b25366aa7b7ce6fb2c764
2018-10-04 02:44:22 +00:00
09f663c246 mcctrl procfs: check entry was returned before using it
Change-Id: If66e95d217d1045e2e65bc5978bba020e3fa7c0d
Refs: #1116
2018-10-04 02:41:16 +00:00
9b77630c8b mcexec: readlink and use full path for reexec
This fixes comm on linux side, showing mcexec instead of 'exe'

Change-Id: I9345d7a23dccb36b3a1e17fd3e7491eaeca54e5b
2018-10-04 01:03:10 +00:00
b70d470e20 procfs: add '/proc/pid/stat' to mckernel side and fix its comm
This lets ps show the proper executable name instead of mcexec's comm
on linux side

Change-Id: I62732037451f129fc2e905357ebdc351bf7f6d2d
Refs: #1114
2018-10-04 01:01:19 +00:00
ecc850dfef procfs/do_fork: wait until procfs entries are registered
Do not return from fork() until mcctrl side has created mckernel's
procfs entries for the child PID.

This fixes programs doing fork() immediately followed by opening
/proc/<child pid>/something, and would get some error

Refs: #1189
Change-Id: Ie10ea56b65c55f59e96a1ab6ef83a1070e36048d
2018-10-04 01:00:52 +00:00
b11377f2e9 Increase IKC master channel size
Change-Id: I183878bb22b848e1230f8028947cf46485293471
2018-10-03 06:23:17 +00:00
ed1edb152b ptrace supports threads
Fujitsu: POSTK_DEBUG_TEMP_FIX_53, POSTK_DEBUG_ARCH_DEP_44
Refs: #771, #1179, #1143
Change-Id: Ie17ece6864f0eeb0c0e550f4e369abb77980a0d0
2018-10-01 03:57:16 +00:00
28c434a230 test: Fix test for 898 and 928
Change-Id: If939dda7ccdcf568abfa42ccab7ff6be2b983cc2
2018-09-28 02:55:55 +00:00
daa234d8b9 mcexec_create_per_process_data: use copy_from_user
Refs: #1205
Change-Id: Idced73a7f88aada5fc2462b490d56603f8fe2472
2018-09-27 15:42:01 +00:00
e803698618 test: Refactor test programs
Change-Id: I77fec2f5f30f6fda3bda6f85ce00f1c2e7f7a9b3
2018-09-25 12:45:20 +09:00
c862b29d65 sched_setaffinity: Check migration after decrementing in_interrupt
refs: #1180
Change-Id: I2b3fb03066812ecc802406297084977e757092fe
2018-09-25 01:52:54 +00:00
dd58d366c3 procfs: Fix pread/pwrite to procfs fail when specified size is bigger than 4MB
Fujitsu: POSTK_DEBUG_TEMP_FIX_43
Refs: #1018
Change-Id: I736ac69885695ef8eeababc3fcfe69a6258b4e16
2018-09-20 02:06:17 +00:00
ab284b0531 test: Add test programs for #1158
refs: #1158
Change-Id: I853dd84f5433a01da510813e9fb1276e5477f73f
2018-09-20 02:05:55 +00:00
42b9b31606 mcctrl: Propagate writecore()'s return value to caller
Fujitsu: POSTK_DEBUG_TEMP_FIX_62
Change-Id: I847dd520187cbf66fbad8140f79f62c6d5d9d5fc
2018-09-20 11:01:22 +09:00
29c5c68761 coredump: Change type of coretable.len to loff_t from int
Fujitsu: POSTK_DEBUG_TEMP_FIX_61
Change-Id: I6a27a8d477c3b3dcc12be772a15dfcff370bd2a8
2018-09-20 11:01:22 +09:00
38c08a6663 coredump: Add O_TRUNC to flags opening corefile
Fujitsu: POSTK_DEBUG_TEMP_FIX_59
Change-Id: I36c89fa894dfc0cdd170781e8ca4aab6149d4928
2018-09-20 11:01:20 +09:00
57258e7f59 coredump: Don't dump when MCK_RLIMIT_CORE is zero
Fujitsu: POSTK_DEBUG_ARCH_DEP_67
Change-Id: Ic85c793b052cde9d7fa4fe510c5daee303d370c4
2018-09-20 01:51:18 +00:00
8c33c92720 mcctrl: Switch Linux functions/structures according to the version
For get_user_pages_remote in binfmt_mcexec.c:
In 4.10 with 5b56d49fc31d ("mm: add locked parameter to
get_user_pages_remote()")
In 4.9 with 9beae1ea8930 ("mm: replace get_user_pages_remote()
write/force parameters with gup_flags")

For vmf in syscall.c, these two patches in 4.10:
82b0f8c39a38 ("mm: join struct fault_env and vm_fault")
1a29d85eb0f1 ("mm: use vmf->address instead of
vmf->virtual_address")

Fujitsu: POSTK_DEBUG_ARCH_DEP_41
Change-Id: I89a02d03169a2162ea186da1804bf48910446d11
2018-09-20 01:50:04 +00:00
a269d96978 coredump: Exclude special areas
Fujitsu: POSTK_DEBUG_TEMP_FIX_38
Refs: #1005
Change-Id: I8934d2aecf06a09469afe131347e42b48b6f67f6
2018-09-20 01:48:17 +00:00
2910818f06 execve: Fix calling ptrace_report_signal after preemption is disabled
Change-Id: I451d28d985ab330d855501597713e982b8febf4e
Refs: 1194
2018-09-20 01:31:31 +00:00
3df82d61ce test: Fix tests of "user_space"
user_space/swapout/swapout_copy_to_01.sh:
* Use ~/.mck_test_config
* Fix checking if McKernel version is written in swap-file

user_space/futex/futex_test.sh:
* Use ~/.mck_test_config

user_space/perf_event_open/perf_event_open_test.sh
* Use ~/.mck_test_config

Change-Id: Id93b207ed0e3e9ebf307073db81b40335bc5b140
2018-09-19 08:54:08 +00:00
159092c58e rusage: Refactor test programs
Change-Id: I846a6416acf903f7fa19db98d4d937c51c10b4af
2018-09-18 18:42:19 +09:00
60011718d2 add common test framework
Add new file with common functions for tests to use.

 - loads config file
 - checks for mcexec etc
 - checks for LTP and OSTEST if required
 - handle mcstop / mcreboot if required, and provide function for it

At the same time, make a few changes to mck_test_config:
 - move to ~/.mck_test_config
 - add boot params to the config, tests the require specific params can
   overwite it
 - make the config "set-if-variable-is-empty", so someone can overwrite
   any param by setting the environment value e.g. LTP=.... ./test.sh
   will use the value given

Change-Id: Ib04112043e3eb89615dc7afaa8842a98571fab93
2018-09-14 03:30:06 +00:00
7e342751a2 do_syscall: Delegate system calls to the mcexec with the same pid
This includes the following fix:
send_syscall, do_syscall: remove argument pid

Fujitsu: POSTK_TEMP_FIX_26
Refs: #1165
Change-Id: I702362c07a28f507a5e43dd751949aefa24bc8c0
2018-09-13 16:59:47 +09:00
c23bc8d401 syscall_time: Handle by McKernel
refs: #1036
Change-Id: Ifa81b613c7ee8d95ae7cdf3dd54643f60526fa73
2018-09-13 07:44:02 +00:00
5e760db417 syscall: the signal received during system call processing is not processed.
Refs: #1176
Fujitsu: POSTK_DEBUG_TEMP_FIX_56
Change-Id: I410160ccbcef3ef49a0e37611a608bc87c97e63b
2018-09-13 07:04:11 +00:00
e4da71010c check_signal: system call restart is done only once
Fujitsu: POSTK_TEMP_FIX_66
Refs: #1009
Change-Id: Ic0f04ac6b7f6c6bb01b55fb389bf9befd56b1dd9
2018-09-13 07:00:49 +00:00
c25fb2aa39 memobj: transform memobj lock to refcounting
We had a deadlock between:
 - free_process_memory_range (take lock) -> ihk_mc_pt_free_range ->
... -> remote_flush_tlb_array_cpumask -> "/* Wait for all cores */"
and
 - obj_list_lookup() under fileobj_list_lock that disabled irqs
and thus never ack'd the remote flush

The rework is quite big but removes the need for the big lock,
although devobj and shmobj needed a new smaller lock to be
introduced - the new locks are used much more locally and
should not cause problems.

On the bright side, refcounting being moved to memobj level means
we could remove refcounting implemented separately in all object
types and simplifies code a bit.

Change-Id: I6bc8438a98b1d8edddc91c4ac33c11b88e097ebb
2018-09-12 18:03:25 +09:00
b51886421e uti: Don't compile syscall_intercept related stuff when not specified with configure option
Change-Id: I9be8cb9b3fcae78d33a33b057c43caee23a81fc1
2018-09-05 16:29:20 +09:00
22c6c5c736 do_syscall: Call schedule() when runq_len > 1
This optimization make the offloading thread quickly yield to
another thread. Without this, it yileded only after the interval timer
set the rescheduling flag.

Change-Id: Ida3b17ed94782d5d1af0185a96b1f50d9db8d244
2018-09-04 19:53:03 +09:00
cd00fc3a78 set_timer: Start timer when runnable thread count is bigger than one
Change-Id: Ie32799fff2936ffc057f166db5681edccdbf5920
2018-09-04 19:53:03 +09:00
00a34a8ba3 uti: util_thread: Hoist uti_desc check
Change-Id: I8c4b75140df2fe149dfe20e0a8f0bf323b5f1763
2018-09-04 19:53:03 +09:00
8900c2cec5 uti: mcexec_uti_attr: Fix CPU binding decision
Change-Id: I4047858895503ae912e5575bb232dbbb2f915722
2018-09-04 19:53:03 +09:00
fca02ee248 uti: Add error checks to kmalloc of struct uti_attr 2018-09-04 19:53:03 +09:00
781a69617b uti: Replace data types represented as arrays with C structures
Defining C structures for the following objects:
(1) Remote and local context
(2) Stack of system call arguments / return values

Change-Id: Iafbb6c795bd765e3c78c54a255d8a1e4d4536288
2018-09-04 19:53:03 +09:00
04d4145b3e uti: Replace dead uti thread with new mcexec thread in proc->tids
Change-Id: Ic6e906dd1bfac1b07f1317732cbe0a5191831cd8
2018-09-04 19:53:03 +09:00
96aab7e215 uti: Cosmetic change in util_thread
Change-Id: I8aa75efa4dbfb798e40e75f76bacbd184dae23b8
2018-09-04 19:53:02 +09:00
98ee584ab6 uti: Change field name of release_user_space_desc
Change-Id: I18ada86ec3835198c1a947d8ceb36075d6ff2e94
2018-09-04 19:53:02 +09:00
6b031c5472 uti: Fix condition for pthread_join of mcexec threads
Change-Id: Iaeee91c197b84436f84ce4380768aa79e7f9419e
2018-09-04 19:53:02 +09:00
e42c414454 uti: Hook system calls by binary-patching glibc
(1) Add --enable-uti option. The binary-patch library is
    preloaded with this option.
(2) Binary-patching is done by syscall_intercept developed by Intel

This commit includes the following fixes:

(1) Fix do_exit() and terminate() handling
(2) Fix timing of killing mcexec threads when McKernel thread calls terminate()

Change-Id: Iad885e1e5540ed79f0808debd372463e3b8fecea
2018-09-04 19:53:02 +09:00
e613483bee uti: Add system call profile 2018-09-04 19:53:02 +09:00
c0271f4727 Add debug messages for per-process data 2018-09-04 19:53:02 +09:00
4969762f15 uti: Add usage of uti specific options to mcexec 2018-09-04 19:53:02 +09:00
09d3648e43 uti: Set PROT_EXEC to host VMA when PROT_READ is set
Set PROT_EXEC to host VMA because uti needs PROT_EXEC for text VMAs.

Meanings of prot bits of Host VMA has been changed as follows.
   RWX: No mapping or RW mapping
   RX: Read only mapping
2018-09-04 19:53:02 +09:00
4e905cd412 uti: do_syscall: Don't warn when proxy is gone
This is because this is a normal case since terminate() is changed so
that it first kills all mcexec threads and then kill McKernel threads.

Change-Id: I88380bf28b60645d361baded525d71105235c16f
2018-09-04 19:53:01 +09:00
8c11daf726 uti: Fix signal relay from mcexec to McKernel
Change-Id: I2ffd8049a0fb1637cfc6bab7fe24c6a85e5e53fc
2018-09-04 19:53:01 +09:00
5cb8a1f10f uti: Workaround not to share CPU with OpenMP threads
* Assign uti thread to the last idle CPU so that it's not shared with
  an OpenMP thread

Change-Id: Ia42cae056ce81fde9b6dab6286b39a52f3c9e172
2018-09-04 19:53:01 +09:00
dbba7dea18 uti: Allow only the first do_fork() call to create a uti thread 2018-09-04 19:53:01 +09:00
b6ab5911b7 uti: Identify uti thread by clone count
--uti-thread-count <count> is added to mcexec.

Change-Id: Id9ec464412a5bb71e4d9e87d05f79de22d35b067
2018-09-04 19:53:01 +09:00
b0d7f890d0 uti: Reverse-offload msync() 2018-09-04 19:53:01 +09:00
b9c0cdddab uti: Cosmetic change 2018-09-04 19:52:14 +09:00
7ee7dd5e2c uti: Allow tracer to call release_handler() for the main process
Change-Id: I934a6eefbcb87473e87c109d6b4d32c7ab486894
2018-09-04 19:52:14 +09:00
07db4a80a7 __do_in_kernel_syscall: Move ihk_ikc_release_packet from mcexec_wait_syscall
Change-Id: Ieeb5fda42dbddc9da27242f4b547c2143659f97a
2018-09-04 19:52:14 +09:00
f04e5c24ab uti: Don't call mcexec_terminate_thread() when McKernel asks mcexec to interrupt system call 2018-09-04 19:52:14 +09:00
b8bacdd2de Reference counting per-thread data
It is accompanied by the following fixes:
(1) Fix put ppd locations in mcexec_wait_syscall()
(2) Move put ptd to end of mcexec_terminate_thread_unsafe() and mcexec_ret_syscall()
(3) Add debug messages for ptd add/get/put
(4) Fix ptd-add/get/put matching in mcexec_wait_syscall()
    * Skip put when woken-up from wait_event_interruptible() by signal

Change-Id: Ib9be3f5e62a7a370197cb36c9fa7c4d79f44c314
2018-09-04 19:52:14 +09:00
a121ffc785 uti: Release packet of reply from McKernel in backward_offload() 2018-09-04 19:52:14 +09:00
88f9693390 uti: Return -ENOSYS without offloading for set_robust_list()
Change-Id: I43466e3850fd2ad68e5754d1d460438fa47f3ed4
2018-09-04 19:52:13 +09:00
124ec580a0 uti: Call do_exit when tracer isn't working and do_syscall returned -ERESTARTSYS 2018-09-04 19:52:13 +09:00
af7f61db49 uti: mcexec: Fix error check of pthread_detach
Change-Id: Idda8e060641bbd7b01c50163140a2c5f7466d193
2018-09-04 19:52:13 +09:00
ee299b5780 uti: Check size of syscall arguments for syscall_intercept
Change-Id: I747b90e1f521b08266cfc021ef4b23e2e3c7ba4c
2018-09-04 19:52:13 +09:00
c60a778c8d uti: Zero-clear struct mckernel_exec_file before initialization
Change-Id: I315008b7f5c9e66a93b80da87d1a6332d717c2aa
2018-09-04 19:52:13 +09:00
25a129ea6a uti: Disable jumping to McKernel futex code 2018-09-04 19:52:13 +09:00
8e9924c523 uti: Lock per_thread_data_hash_lock in mcctrl_put_per_proc_data() 2018-09-04 19:52:13 +09:00
c71291a429 mcctrl: Add mcexec_terminate_thread_unsafe()
Change-Id: I6ca54cdac2ab9449d40b22f7329f1a215e5aa33b
2018-09-04 19:52:13 +09:00
ba93b83d68 uti: Add __user to mcexec_terminate_thread argument
Change-Id: Ic96a91e6a892a1bd2f1d333580e28bced6a40dc0
2018-09-04 19:52:13 +09:00
c2f41ca9ad uti: Replace hand-made list of host_threads with Linux macro
Change-Id: Ib46cc9fcdd2854b7bbe21c2cc885beeb22d16dd2
2018-09-04 19:52:13 +09:00
062d7ecae3 uti: Use copy_from_user() in mcexec_terminate_thread() 2018-09-04 19:52:12 +09:00
58d038fcac uti: Fix wrong argument passed to ihk_ikc_release_packet() in mcexec_terminate_thread() 2018-09-04 19:52:12 +09:00
510310342c uti: Use fresh struct syscall_request instance when replying to syscall_backward() 2018-09-04 19:52:12 +09:00
a6198f267b uti: Offload set_robust_list to McKernel 2018-09-04 19:52:12 +09:00
5e78bd85ab uti: Fix tracer exit code for the case when create_tracer() isn't called 2018-09-04 19:52:12 +09:00
85c0c8a01f uti: Add debug messages for syscall
Change-Id: I2f96e71d5384f883f7dc568122c57d92bc1cd818
2018-09-04 19:52:12 +09:00
e29f579061 uti: Prevent user space vma from getting copied when forking 2018-09-04 19:52:12 +09:00
63703589e5 uti: Clear user space PTEs after first fork in create_tracer()
Change-Id: I60755f0cb5e84c3a5a5cd91515411a30f0995822
2018-09-04 19:52:12 +09:00
5c8c1986b5 uti: Add comment on ppd life cycle
Change-Id: Id16cf036b2d919444e8634b536fd701d996bcef2
2018-09-04 19:52:12 +09:00
e4370d235c uti: Make tracer not call mcexec_terminate_thread() when tracee is killed by signal
Change-Id: I5878c7d623ce182a7cb9578c9d5c430c1bee8e1e
2018-09-04 19:52:12 +09:00
31ac007cb5 uti: Increase CPU_HZ to 1000
Change-Id: I8619263845fd8ebabe6fc7de619a5b51ac04470a
2018-09-04 19:52:11 +09:00
56da7e2de9 uti: Allocate memory area directly to uti_desc->wp
Change-Id: Ia5a1dbf56b937d9d05cd7fa1c5eec4a5b4b7b196
2018-09-04 19:52:11 +09:00
35300e7b4f uti: Create tracer when forking
Change-Id: Ic66cf6289ac6f32a884ba1266e641ce61620a239
2018-09-04 19:52:11 +09:00
439dc0928b uti: Streamline syscall_backward() 2018-09-04 19:52:11 +09:00
4b3e58fd3d uti: Call terminate only when exit_group is called
Tracer tells McKernel side to call do_exit() in WIFSIGNALED case.

Change-Id: If85c6cbb4856036b406b11335f1384e57f26292d
2018-09-04 19:52:11 +09:00
b7cdbd6c42 uti: Enforce mcexec is destroyed and then McKernel process is destroyed 2018-09-04 19:52:11 +09:00
77f5cac2bf uti: Make tracer exit when not used
Change-Id: I3d3b2f92fa2b160ffce633c46d1b60e9079e7f1b
2018-09-04 19:52:11 +09:00
9102b176c4 uti: Make per_proc_data of tracee survive over the signal-kill of the tracee
Change-Id: I8ff1dddb526ef2fd948cfe1b8f3aa8403c2006d6
2018-09-04 19:52:11 +09:00
bb4317beaf uti: futex: Propagate -ERESTARTSYS returned by wait_event_interruptible()
Change-Id: Id36c4df0e0a8e1f64b12c635c0502f63552ba50b
2018-09-04 19:52:11 +09:00
d24b7585b7 uti: Make tracee pthread-detached
Change-Id: I672ee18739b956980901b63e55ee3ebc192b4e56
2018-09-04 19:52:11 +09:00
4438f994dc uti: Add/Modify test programs
Change-Id: I27a39d6b11af5243f93d07c31c2ef80f6727dd53
2018-09-04 19:52:11 +09:00
52afbbbc98 uti: Call into McKernel futex()
(1) Masquerade clv
(2) Fix timeout
(3) Let mcexec thread with the same tid as McKernel thread migrating
    to Linux handles the migration request
(4) Call create_tracer() before creating proxy related objects

Change-Id: I6b2689b70db49827f10aa7d5a4c581aa81319b55
2018-09-04 19:52:10 +09:00
460917c4a0 remote_page_fault,syscall_backward: Zero-clear waitq entry
Change-Id: I151a35004183e911aaba766a8749830e1768bfe6
2018-09-04 19:52:10 +09:00
7803468afe remote_page_fault,syscall_backward: Retry when interrupted by signal
Change-Id: Ic7d72ad9ca32bb3c8e3522e00fef1d98caf3c049
2018-09-04 19:52:10 +09:00
8f2c7d2265 Fix thread-safety issue in rus_vm_fault
Change-Id: I8640a8e0de8a0dfaee700b25e5f9e2941ac98fc8
2018-09-04 19:52:10 +09:00
c6c3a84a46 syscall: Add missing definition of thread to access thread->sigpending 2018-09-04 19:52:10 +09:00
5a7ca14fcc rus_vm_fault: Return VM_FAULT_SIGBUS when per-process data is not found 2018-09-04 19:52:10 +09:00
d7b882855a Correct comments in declaration of struct ikc_scd_packet 2018-09-04 19:52:10 +09:00
2337832e4c pager_req_release(): Correct debug messages 2018-09-04 19:52:10 +09:00
be635ceb19 terminate: Fix coutning of non-leader threads
Change-Id: I8399ad553bb8e09bef508ac976e8cd56cdae8013
2018-09-04 19:51:11 +09:00
0b0b7b03d7 Prevent one CPU from getting chosen by concurrent forks
One CPU could be chosen by concurrent forks because CPU selection and
runq addition are not done atomicly. So this fix makes the two steps
atomic.

Change-Id: Ib6b75ad655789385d13207e0a47fa4717dec854a
2018-09-04 19:51:11 +09:00
82914c6a2e remote_page_fault: Retry when interrupted
Change-Id: Ib71a87ad03420e1918dc97da43351cb93e7d0754
2018-09-04 19:51:11 +09:00
f127dfdf1e mcexec_create_per_process_data: Zero ppd on allocation
Change-Id: I06306f30ce30ad6ddc6e8b8cab46ee39be0e4940
2018-09-04 19:51:11 +09:00
567dcd3846 Fix deadlock involving mmap_sem and memory_range_lock
Change-Id: I187246271163e708af6542c057d0a8dfde5b211e
Fujitsu: TEMP_FIX_1
Refs: #986
2018-09-04 19:51:10 +09:00
b080e0f301 spinlock: Add trylock
Change-Id: If349d7c0065609615f5df229f70c59f92bf97adf
2018-09-04 19:51:10 +09:00
ff383d96ba spinlock: rewrite spinlock to use Linux ticket head/tail format
This is a cherry-pick of 2964302d094f035242d6257d8af5450f72f9b5a7.

Change-Id: Ie8b7e825b28415dd41cc232fbeceb4653251f9e3
2018-09-04 19:51:10 +09:00
0bcd3d5de3 unimap: update ihk to unimap
Change-Id: I5b23270f9253d26031ad90bb38721a6234bd98e1
2018-09-04 19:51:10 +09:00
9d6e0319f7 atobytes(): restore postfix before return 2018-09-04 19:51:10 +09:00
0e50eb44a9 process/vm/access_ok: fix edge checks.
Add check for start/end being larger than the range we're checking.
Fix corner case where the access_check() was done on last vm range, and
we would be looking beyond last element (null deref)
2018-09-04 19:51:10 +09:00
2db69d0f24 process/vm: implement access_ok() 2018-09-04 19:51:10 +09:00
a697f5e98d partitioned execution: pass process rank to LWK
Cherry-pick of d2d134d5e6a4b16a34d55d31b14614a2a91ecf47

Conflicts:
	kernel/include/process.h
2018-09-04 19:51:10 +09:00
4439b04d9f ihk_mc_get_linux_kernel_pgt(): add declaration
Cherry-pick of caff967a442907dd75f8cd878b9f2ea7608c77b2
2018-09-04 19:51:10 +09:00
38c3b2358a Exclude areas not assigned to Mckernel from direct map of all phys. memory
It's enabled by adding -s to mcreboot.sh.

Cherry-pick of the following commit:

commit b5c13ce51a5a4926c2cf11c817cd0d369ac4402d
Author: Katsuya Horigome <katsuya.horigome.rj@ps.hitachi-solutions.com>
Date:   Mon Nov 20 09:40:41 2017 +0900

    Include measures to prevent memory destruction on Linux side (This is rebase commit for merging to development+hfi)
2018-09-04 19:51:10 +09:00
221ce34da2 eclair: fix MAP_KERNEL_START and apply Fujitsu's proposals
(1) Cherry-pick of 644afd8b45fc253ad7b90849e99aae354bac5b17
(2) Pass length to functions with arguments of variable length
    * POSTK_DEBUG_ARCH_DEP_38
(3) Separate architecture dependent functions/structures
    * POSTK_DEBUG_ARCH_DEP_34
(4) Fix include path
    * POSTK_DEBUG_ARCH_DEP_76
(5) Include config.h
    * POSTK_DEBUG_ARCH_DEP_33
2018-09-04 19:51:09 +09:00
4246d41007 kmalloc_header: use signed integer for target CPU id
Cherry-pick of bdb2d4d8fa94f9c0268cdfdb21af1a2a5c2bcae5
2018-09-04 19:51:09 +09:00
65df9c8084 ihk_mc_get_processor_id(): return -1 for non-McKernel CPUs
Cherry-pick of c45641e97add9fde467844d9272f2626cf4317de
2018-09-04 19:51:09 +09:00
7836aa0136 Map LWK TEXT to the end of Linux modules section (0xFFFFFFFFFE800000) 2018-09-04 19:51:09 +09:00
1cf7fad15a virt_to_phys(): fix debug messages
Cherry-pick of 46eb3b73dac75b28ead62476f017ad0f29ec4b0a
2018-09-04 19:51:09 +09:00
0076e1f5e0 mem: make McKernel kernel heap virtual addresses Linux compatible
Cherry-pick of e5334c646d2dc6fb11d419918d8139a0de583fde
2018-09-04 19:51:09 +09:00
cae6b9f154 move McKernel out of Linux kernel virtual 2018-09-04 19:51:09 +09:00
5fcbfa2eb5 page_fault_process_memory_range: Remove ihk_mc_map_virtual for CoW of device map
Device map with MAP_PRIVATE is copied when forking using copy_user_pte.
So the map isn't copied by those statements.

Futjitsu: POSTK_TEMP_FIX_14
Refs: #1039
Change-Id: I1a697ed2e003055d66a8eebd3e8d5e9e49d094ad
2018-08-30 02:21:42 +00:00
9a20cfaefb mem: Check if phys-mem is within the range of McKernel memory
Fujitsu: POSTK_DEBUG_TEMP_FIX_52
Refs: #1164
Change-Id: Idb9a6eac1d2e1df4c663c3171925c774421177fd
2018-08-30 02:18:37 +00:00
f57b0c5d4f wait: Delay wake-up parent within switch context
Fujitsu: POSTK_DEBUG_TEMP_FIX_41
Refs: #1006
Change-Id: Ia98e896505ad0f6549766604ade84550eee8bd2d
2018-08-30 02:13:51 +00:00
0fdeb254b3 switch context: Move to arch-dependent (arch_switch_context())
Fujitsu: POSTK_DEBUG_ARCH_DEP_22
Change-Id: I6faf8d9daa1e639350c2cd83db9bb27b9d37ba01
2018-08-30 02:13:34 +00:00
895a8c4099 procfs: Support multiple reads of e.g. /proc/*/maps
Refs: #1021
Change-Id: If36e1a0f3f41f0215868daf578e96775d96a59a3
2018-08-30 01:48:06 +00:00
e531ee626e mcctrl pager: handle pagers more properly
the pagers are all destroyed when linux thinks there is no process left,
but there is no synchronisation with mcexec on that and some new process
might have spawned and started using these pagers in the meantime,
leading to weird crashes because an invalid pager was used.

The reason we're cleaning up pagers when no process is left is that
mcctrl does not handle pager_req_release is the linux-side process got
killed or died before the mckernel one for some reason, so:
 - move pager_req_release to a new __do_in_kernel_irq_syscall() helper
 - have free_all_process_memory_range not set MF_HOST_RELEASED on the
memobj
 - just in case, clean up everything like before on mcctrl shutdown
instead of when no process is left.

Change-Id: I53b8b9b81b1e5b807593850af17b5ea5e8471174
Refs: #1154
2018-08-24 09:18:20 +09:00
94d093f058 fileobj_create: Suppress message on getting -ESRCH
-ESRCH from mcctrl doesn't mean an error but the file is not a regular
file and mcctrl wants McKernel to treat it as a device file.

Change-Id: Ie121f0e6a8b1f0a29c2f2cf193a51f4f52337809
2018-08-23 04:01:20 +00:00
9b8424523a mcctrl: remove rus page cache
Change-Id: Ieed7a2a0077ffde3fec8a64d2051e56a53924a42
2018-08-23 02:10:44 +00:00
ebc702624b devobj: fix object size (POSTK_DEBUG_TEMP_FIX_36)
Fujitsu: POSTK_DEBUG_TEMP_FIX_36
Change-Id: I5f020708f97b7468f19496b44c98e164d856598d
2018-08-22 07:26:50 +00:00
ea125cb58c checkpatch: remove warning on LINUX_KERNEL_VERSION and split strings
Change-Id: Ia22f3106208c6ddf46a767e142b8842373e9d6b5
2018-08-22 07:14:48 +00:00
689a799bb9 mcctrl prepare_image: return reserve_user_space error
Change-Id: I00556cb58b12acca888f9512c144a3ce3f5332b1
2018-08-22 07:14:40 +00:00
802b1ac14b ihk_os_getperfevent,setperfevent: Timeout IKC sent by mcctrl
Report timeout when McKernel doesn't respond to prevent the caller
from waiting forever.

Refs: #1167
Change-Id: I8bd87e43aafffdd0952198224e44195af4368883
2018-08-22 06:43:27 +00:00
affe3e9010 do_fork: Increase tid table size when allowing oversubscription
The size of tid table needs to be more than #CPUs when CPU oversubscription
is needed.

Note that the max number of simultaneous threads are the min of the
following two:
(1) Number of mcexec worker threads
(2) NR_TID defined in kernel/syscall.c

Change-Id: I425189da415e1d3a763ad62567950d001850cf0d
2018-08-22 06:42:13 +00:00
0b2169964a futex_wait_queue_me: Spin-sleep when timeout and idle_halt is specified
schedule_timeout() with idle_halt should use spin sleep because sleep
with timeout is not implemented.

Change-Id: Ia0bebcc10ddfb872bffeece7f13fb35a4791db18
2018-08-22 06:36:43 +00:00
f18d1f5383 __sched_wakeup_thread: Notify interrupt_exit() of re-schedule
Change-Id: I438eb168f818eb5649857e22bdc7e68a145872f7
2018-08-22 06:33:23 +00:00
ea35954613 linux side: replace vfs_read by kernel_read
vfs_read has been unexported in bd8df82be66 ("fs: unexport vfs_read and vfs_write")
in kernel 4.14.
kernel_read has always™ existed and is actually more appropriate: we can
remove the set_fs calls that are done in kernel_read.

The downside is that the function prototype also changed in 4.14 with
bdd1d2d3d251 ("fs: fix kernel_read prototype")...
(same with kernel_write e13ec939e96b ("fs: fix kernel_write prototype"))

Change-Id: I6f76a6387ae02b4d33bd62952d995a90b1952fc9
2018-08-22 06:27:12 +00:00
61a942acdc arm64 vdso/gettimeofday: add new includes for cpu_set_t and pte_t
Change-Id: I4035b179a173a6b29c34c73670d68a38d4dc5dc4
2018-08-22 06:17:56 +00:00
c4b4b7222e arm64: ihk_mc_perfctr_start/stop: fix prototype that was changed in x86
The functions now take a bitmask in argument since commit d7416c6f79
("perf_event: Specify counter by bit_mask on start/stop")...
Thanksfully the change also induced a type modification so it was easy
to notice.

(On the other hand I'm building with --disable-perf so why the hell is
that file compiled?!)

Change-Id: Ie16367cc94e81068b70e1b80142a6394de896c4f
2018-08-22 06:14:15 +00:00
21af0351d1 arm64 syscall.c needs uio.h for struct iovec
Change-Id: I9d070d0e148636be1d9ecec8ec4dfb72f93c4ed6
2018-08-22 06:08:27 +00:00
1e1c91962e mcctrl: add missing sched_param include for newer linux
struct sched_param is defined differently since headers changed in
linux ae7e81c07 ("sched/headers...")

Change-Id: I22af79bf3d9df69d09903b2830d99426309cf911
2018-08-22 06:04:35 +00:00
b1aa94d417 arm64 arch-perfctr.h: remove duplicate enums
Some enums were redefined in lib/include/mc_perf_event.h in commit
1284060 ("support PERF_TYPE_{HARDWARE|HWCACHE} in perf_event_open")

Change-Id: I1a98699955ca7fd6135b2a7dde72ed4df77b1974
2018-08-22 06:04:08 +00:00
a6a9bac5b7 Protect more code by #ifdef PERF_ENABLE
Change-Id: I20a67c56c4d7817fdb87cc6a2aa47d68fe3eae8d
2018-08-22 06:03:12 +00:00
240a23a21b arch-lock: tentative implementation of irqflags_can_interrupt for arm64
Change-Id: I814e02e757039cab8c142c0b774ad470154454c1
2018-08-22 06:02:06 +00:00
d5108dba80 arm64 eclair build: add missing explicit libs
Change-Id: I5b6f8825430c2d495da50d868a3f54fc0b354d84
2018-08-22 05:56:20 +00:00
20368dd317 syscall: move sync_child_event up a bit
The function was between two perf functions when perf functions don't
use it...
It seemed simpler to move the function than to add an extra ifdef

Use that occasion to fix style warnings, no actual code changes were
made.

Change-Id: Ie8b5fa7968a3d5e54a690d079874db54f5e6c8c9
2018-08-22 05:55:26 +00:00
b93e14f695 arm64 signal.h: add valid_signal() function
This function was added for x86 by commit 140f813d77 ("fix:
differences in behavior of sigaction between Linux and Mckernel")

The x86 and arm files are actually pretty close and could use
factoring...

Change-Id: Ia8820fd2f824d898610b384a3e137c96aadbc911
2018-08-22 05:54:31 +00:00
3e3f3c5590 mcoverlayfs: vfs_readdir -> iterate_dir compat for el7.5
Also enable mcoverlay for new kernel version / actually build it

Change-Id: I80bc043c65cf99c3b41a54a5666ea7652e6c2bbd
2018-08-09 04:30:24 +00:00
e8f8660b73 mcctrl: lookup unexported symbols at runtime
Instead of parsing System.map, use kallsyms_lookup_name() to
get unexported symbols addresses at module loading time.

This lets mckernel work with kaslr enabled (it gets enabled by
default from el7.5 onwards)

Change-Id: Ie4349fc1145ebce44f37f1f40c16f9d75584074d
2018-08-08 06:00:20 +00:00
794684985f mcctrl syscall: remove unused walk page debug function
This saves looking up one symbol for a debug function that is not
used anywhere

Change-Id: I6a3a480ce8067b4f6f0faf9aa837119ea46888ad
2018-08-08 05:57:46 +00:00
625607e6db mcctrl sysfs_files: cleanup vfs_readdir -> iterate_dir compat
Cleanup the fix suggested by Fujitsu a bit

Change-Id: I95165b834e32a01f43eb3b4fcaca039e4d04fe86
2018-08-08 05:41:04 +00:00
05afa8b6dd mcctrl sysfs_files: vfs_readdir -> iterate_dir compat
vfs_readdir got removed in recent kernels

Change-Id: Iac9a9954afefa0f6dbcdc2c94786cf747e21e1fe
Fujitsu: POSTK_DEBUG_TEMP_FIX_22
2018-08-08 05:39:07 +00:00
6cf89076dc mcctrl handle_mm_fault compat: add el7.5 support
Change-Id: I8c7738b70ca914e857be119b7720cdc22e61ae0e
2018-08-08 05:36:35 +00:00
29a658716b configure: Create config file for test programs
Change-Id: I3ec90fed348ff535b24c8116416c6b89636c532c
2018-08-02 02:29:19 +00:00
a7c9988aeb schedule: Don't reschedule immediately when wake up on migrate
Refs: #1027
Change-Id: Ibe563c45c42611170273f1e437566c20fbef68d3
2018-08-02 02:28:25 +00:00
d4fa953975 test: Add testcase for #1001
Refs: #1001
Change-Id: I3edd750108bd3f887af1f0afe3f2651f1243062b
2018-08-02 02:24:41 +00:00
786649d2a3 perf_event: Move changing monitoring-status into perf_stop
Change-Id: I84a13c2a825de24bfdada533c7049e8770a07061
2018-08-02 02:23:38 +00:00
d7416c6f79 perf_event: Specify counter by bit_mask on start/stop
Fujitsu: POSTK_DEBUG_TEMP_FIX_30
Refs: #1002
Change-Id: Iea51e9aef78927a5033e3a226d5efc6298da056a
2018-08-02 11:22:28 +09:00
cb1522ca92 perf_event: Handle fixed-pmc in arch-dep part
Fujitsu: POSTK_DEBUG_TEMP_FIX_31
Refs: #1003
Change-Id: I66c7d18b9137894cf5764464482e2ebd5ecb9d52
2018-08-02 02:14:04 +00:00
14660a10c3 Fix to procfs read returns EIO
Refs: #1152
Change-Id: I48b330953fd7674ba1a3ac35744f9f50a5712730
2018-08-02 01:48:51 +00:00
1387c9687b Add test cases for #765
Refs: #765
Change-Id: I50d70a15d5d5ce31227cacbed4eccd49b218713b
2018-08-02 01:42:46 +00:00
ec99adde4a Add test cases for #998 and #999
Refs: #998 #999
Change-Id: I86f8857594b2446c833c1e59d53b484ef022a9ee
2018-08-02 01:42:11 +00:00
c716e87c53 execve: Clear sigaltstack and fp_regs
Fujitsu: POSTK_DEBUG_TEMP_FIX_19
Refs: #976
Change-Id: I16895eab13eecbb47b7e6da961fae82ee5e570ee
2018-08-01 15:11:05 +09:00
d898f18293 mcexec: Do not close fd returned to mckernel side
Fixes: 9a79920ef9 ("Static analysis fixes")
Change-Id: I2b51d6e288e7bb2b0f4bff579fa237d575dcb026
Reported-by: Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com>
2018-07-30 23:27:17 +00:00
bc0759e2dc arm64 arch-lock: add missing include for cpu_set
Probably only needed for recent system, see ihk's 3271b5e6 ("fix
compilation with recent glibc (cpu_set define change)")

The root of the problem really is that we rely on system headers for
mckernel that ought to be independent...

Change-Id: Ieb9a017e5a7697ad767087370ced7b615efc917e
2018-07-27 02:33:03 +00:00
1aa429d4f5 init_normal_area: fix warnings
- unused variable pt_phys
 - undeclared function set_pt_large_page (move definition lower)

Change-Id: I4625b70efe8e914160b17064078c42b86a461d3e
2018-07-27 02:32:23 +00:00
1543119139 mcctrl rus_vm_fault: tpe changed with kernel >= 4.11
vma is part of vmf and isn't needed, so type changed (see linux 11bac80
("mm, fs: reduce fault, [...] to take only vmf"))

Change-Id: I4c023e23c7e7416ad2df2dcc0698a0032e574e4c
2018-07-27 02:31:39 +00:00
0a0a78ac2e mcctrl: replace GFP_TEMPORARY by GFP_KERNEL
See linux's commit 0ee931c4 ("mm: treewide: remove GFP_TEMPORARY
allocation flag") for a long explanation, but basically that flag
"is just cargo cult" and should be removed

Change-Id: I2147cd65b6b9ec509a72e11cc3abf1fe1561c10b
2018-07-27 02:31:00 +00:00
6999d0a3f9 bind_mount_recursive: Use lstat instead of d_type of readdir
Change-Id: I0eb8d6c7e1fa5df6dbc5962a639901546a159d04
2018-07-26 18:38:48 +09:00
f01a883971 devobj: fix out of bounds shift
Similarily, pgoff << PAGE_SHIFT would need pgoff to be unsigned to fit,
but off_t is signed.
The reason for this shift was to truncate the offset argument to be
aligned to page boundaries, do that instead

Change-Id: I36c3de34b1834fdb0503942a6f3212e94986effd
2018-07-26 05:20:19 +00:00
3185334c1c debug messages: implement dynamic debug
Heavily inspired off linux kernel's dynamic debug:
 * add a /sys/kernel/debug/dynamic_debug/control file
 (accessible from linux side in /sys/class/mcos/mcos0/sys/kernel/debug/dynamic_debug/control)
 * read from file to list debug statements (currently limited to 4k in size)
 * write to file with '[file foo ][func bar ][line [x][-[y]]] [+-]p' to change values

Side effects:
 * reindented all linker scripts, there is a new __verbose section
 * added string function strpbrk

Change-Id: I36d7707274dcc3ecaf200075a31a2f0f76021059
2018-07-26 14:16:31 +09:00
bc887aab44 x86 futex: fix out of bounds shift
8 << 28 needs unsigned to fit, other shifts were done to truncate
the input, use a mask instead

Change-Id: I81ba41595f4629f1df554e34392116440ff3b641
2018-07-26 05:10:36 +00:00
6f7c428a34 terminate: fix oversubscribe hang when waiting for other threads on same CPU to die
Change-Id: I8c4fbdd3aab9d0567ce5457a4a6405490608925d
2018-07-26 05:02:13 +00:00
68c702d024 process_procfs_request: Add Pid to /proc/<PID>/status
The standard UNIX tool to get processes information, need to have the
process id inside /proc/<PID>/status.

Using ps without PID in /proc/<PID>/status gives :

  PID TTY          TIME CMD
 2551 pts/0    00:00:00 bash
    0 pts/0    00:00:00 exe
    0 pts/0    00:00:00 exe

With this patch:
  PID TTY          TIME CMD
 2551 pts/0    00:00:00 bash
11966 pts/0    00:00:00 exe
12619 pts/0    00:00:00 exe

Change-Id: Ic9d255cbef4d49e49bdaedcfc8e3545d9c144325
2018-07-26 05:00:21 +00:00
97273adcc5 x86_64 move_pages_smp_handler: rework initialisation
- add missing break statement
- remove duplicate memset for mpsr->status

Change-Id: I1fd1a8b2bb7bbabb32db9e7d3fc84102d9b0ff82
2018-07-26 04:59:23 +00:00
ad2cb6375a kprintf: only call eventfd() if it is safe to interrupt
Missing ARM64 implementation, cannot test right now

Change-Id: Ia05e8b7952b19bcd8fdac1f920d9bfe341be8b97
2018-07-26 04:57:30 +00:00
6df4bd8f8c Fix a few more warnings
Some are important, e.g. the seemingly harmless braces around if with dprintf,
since that dprintf is defined as empty, will screw things up and grab the next
line

Change-Id: Ie5e1cf813178ad708ff42ae5e477fbc96034471c
2018-07-26 04:52:17 +00:00
0994c3300e search_free_space: remove POSTK_DEBUG_ARCH_DEP_27 side
search_free_space changed since this was implemented and the code is
no longer compatible
Looking at it again, the function is not used anywhere other than syscall.c
and the second function does not seem to fix anything specific so this
just removes the untested side.

Change-Id: If28d35ec4da083a40dc6936fcb21f05fb64e378a
Fujitsu: POSTK_DEBUG_ARCH_DEP_27
2018-07-26 04:43:05 +00:00
a5c3e48843 search_free_space(): manage region->map_end internally
Change-Id: If9176773868c44fa1eb801c0815c35cea9f4b54b
2018-07-26 04:43:05 +00:00
df2c993721 fileobj_create: only allocate new object if one wasn't found
Change-Id: I5e12439333bf0c9cc7dad6e3cf410bfee616f77e
2018-07-26 04:41:03 +00:00
dc8d6b740c pager_req_read: handle short read
Change-Id: Iff89046041e012a65c80a29b485ddbb636435dd0
2018-07-26 04:37:54 +00:00
c2e1b8d694 mcctrl_ikc_send_wait: fix interrupt with do_frees == NULL
do_frees is allowed to be NULL only if free_addrs_count is 0, but that
is increased to account for the wakeup_desc itself before this failure

Change-Id: Iab33712c76ae452df7044558a12745a89adb47ac
2018-07-26 04:34:03 +00:00
f6d8138e05 mcexec_wait_syscall: requeue potential request on interrupted wait
Change-Id: Id7a324f18ebb8c81f05bd8362e19d9314a445308
2018-07-26 04:31:34 +00:00
9d587dcbe8 fileobj_release: do not notify linux of surplus refs
Surplus refs on the linux side will not change anything, so spare
ourselves a message.
The final message will free all refs at once when the object is
destroyed.

Change-Id: Ie086b9dda663729962037c67e8233370509234a5
2018-07-26 04:08:43 +00:00
eb675818c7 x86 mmap: fix out of bounds shift
0x3F << MAP_HUGE_SHIFT is too big to fit in signed int,
make it unsigned

Change-Id: I0e476b80ff51a8e141c90da6f985ba18a3438752
2018-07-26 03:50:44 +00:00
3ce7763715 x86 mem init: do not map identity mapping
init_normal_area was mapping identity lookups (phys = virt) from 0,
leading to many undetected null pointer dereferences in init_pt (but
not in new process page tables leading to odd behaviour)

This also makes the code use the set_pt_large_page() function, cleaning
it up a bit

Change-Id: I22889031de26a7e48501b0eb4d453ca62e671835
2018-07-26 03:50:44 +00:00
fd429ecc5b rusage_private: fix null pointer dereference
Change-Id: Id1f066699a41c249203073c5937e34012f5fe6c3
2018-07-26 03:50:44 +00:00
ed7f5abc28 schedule: fix null pointer dereferences
Change-Id: I1d4b0a2fabb5810a89cca4c6a0a837db3a9813ee
2018-07-26 03:50:44 +00:00
79e5026f01 x86 mem init: fix clearing of init_pt
memset(init_pt...) had the wrong size.

Change-Id: Idb5d0d53b3c70ee4a16a101dd265d0854cfd3b72
2018-07-26 03:50:31 +00:00
a1b50051ed mcexec: always compile debug statements
This helps catching errors like accessing a field that no longer exists
in a debug print that wasn't compiled...

Change-Id: If6c862ea2b866f819195aae93c7fd68e610fe48e
2018-07-26 03:38:00 +00:00
9a79920ef9 Static analysis fixes
Change-Id: I7bc42545a1c497f704d7bfa6ea1b7e3893acc697
2018-07-26 03:36:50 +00:00
141fa5120e git hooks: use correct directory for submodule
Change-Id: I7a39021dc02212065612b21cafcb6c653e2280f0
2018-07-26 03:29:43 +00:00
699cb4f88c arm64/arch-lock: typedef mcs_lock_t
Was done in x86_64 for fileobj in commit 249bda4aef ("fileobj: use
MCS locks for per-file page hash")

Change-Id: I61957de336b6657687803e6288afed9360a42032
2018-07-26 03:28:40 +00:00
bc3e6ded65 disable sse for everyone
GCC optimizes big switches with sse so we could clobber users floating
point registers when they would do a syscall

Reproducer:
```
 #include <stdio.h>
 #include <stdlib.h>

 union num {
 	float f;
 	unsigned long long i;
 };

 #define WORKSIZE (1024 * 1024 * 32)

 int main(int argc, char **argv) {
 	char *work = malloc(WORKSIZE);
 	char *fromaddr;
 	char sink;
 	union num r;
 	unsigned long long int offset;

 	r.f = drand48();
 	printf("r: %llx\n", (long long)r.i);
 	offset = (long long int)(r.f * (double)WORKSIZE);
 	fromaddr = work + offset;
 	printf("%e %llx %llx\n", r.f, offset, fromaddr);
 	sink = *fromaddr;

 	return 0;
 }
```

Change-Id: I7bb0883ec8ef2f245ab98064e308025422afc115
2018-07-26 03:26:25 +00:00
eae5c40f60 init_process_stack: Support "ulimit -s unlimited"
Refs: #1109
Change-Id: I395f012fd747cb6a2f93be71e34c7f6f3666ed67
2018-07-26 02:40:27 +00:00
0c7384f980 Add test cases for #840
Refs: #840
Change-Id: Ie29867d29ba6a25cfac77b95b8effc2f057aae14
2018-07-26 02:39:24 +00:00
67ebcca74d Fix to VMAP virtual address leak
Fujitsu: POSTK_DEBUG_TEMP_FIX_51
Refs: #1024
Change-Id: I1692ee4f004cb4d1f725baf47a8ed31fce1bf42a
2018-07-26 02:17:55 +00:00
3d365b0d7a add ihk as submodule
Change-Id: I512255a96d0d95795bd0d803289fffe4394eb7ec
2018-07-26 01:50:48 +00:00
94e96927a6 mremap: Do nothing when no size change and !MREMAP_FIXED
Behave in the same way as Linux which returns old_address when
old_size == new_size && !MREMAP_FIXED.

Refs: #1112
Change-Id: Ice1421a8a77f962d087de8475aa2cd40c59be5f7
2018-07-26 01:49:01 +00:00
3636c8e7e4 setrlimit: Check arguments in the same order as in Linux
(1) Check if rlim's address is valid
(2) Check if soft-limit does not exceed hard-limit

Fujitsu: POSTK_DEBUG_TEMP_FIX_3
Refs: #1050
Change-Id: I5bf1008ce172f9dff64ec89b1f97614926abaf13
2018-07-26 01:48:05 +00:00
b920da5103 execve: Use interp in shebang as is
Fujitsu: POSTK_DEBUG_TEMP_FIX_9
Refs: #995
Change-Id: I09751d13c4fecd68087d47815029c0b65e51f18a
2018-07-26 01:46:22 +00:00
f1a40a409f perf_event: Include list.h by itself
Fujitsu: POSTK_DEBUG_TEMP_FIX_32
Refs: #1004
Change-Id: I8670477cf498ac98df971f2c0288f335a989f675
2018-07-26 00:45:57 +00:00
4ce4c9f264 init_process: Inherit parent cpu_set
Fujitsu: POSTK_DEBUG_TEMP_FIX_69
Refs: #1028
Change-Id: I1628bb5bf35fa670bb0019e1f3ae295277b1566e
2018-07-26 00:44:41 +00:00
e770a22fa5 scripts: add checkpatch.pl & git hooks
Change-Id: I29e5f7a99e8dd92511c0b1d099f3e1a2f37d7a72
2018-07-12 00:55:58 +00:00
9bb8076dc0 shmget: Make shmobj underwent IPC_RMID invisible to shmget
Refs: #926
Change-Id: I16120623b581da5d5d484fd05d5111788c8ad5e2
2018-07-10 02:13:00 +00:00
229b041320 test: Add testcase for #1122
Refs: #1122
Change-Id: Ieafee7469d1397461abf05552ffad0bfea1dd6cd
2018-07-10 02:12:23 +00:00
e1f204de4a test: Add testcase for #1112
Refs: #1112
Change-Id: I0041366d8dcf035a09fbb59a5dbd5c94cae0d65e
2018-07-10 02:12:04 +00:00
c6cc0bf07a test: Add testcase for #1111
Refs: #1111
Change-Id: Ifdf25a9ce98ef495200daf1c24d7ac2c81b3ef17
2018-07-10 02:11:45 +00:00
04e54ead5d test: Add testcase for #1031
Refs: #1031
Change-Id: I6a51596b84a97329ba7d5b765c8471246dcf85df
2018-07-10 02:11:13 +00:00
992705d465 pager_get_path: Append \0 to path
Change-Id: Iaabd89a649bb20b37b35cd345da0f468fd5dd0b5
2018-07-10 02:10:19 +00:00
ae09d979b6 Add testcases for #1141
Refs: #1141
Change-Id: I50d1ac6248e9dfc33c372b825c10cf0bd8b61d3e
2018-07-10 02:09:38 +00:00
1cbe389879 do_fork: Propagate error code returned by mcexec
Refs: #731
Change-Id: I7eb52c1c76103d65d108b18b7beaf8041b51cd03
2018-07-03 09:19:54 +00:00
0758f6254e headers: declare void arguments for functions
Not giving any argument means that any argument is OK,
this is not what is meant here.

Change-Id: Ide651c1dec973d4b8709cf00646988f4c4f3acdd
2018-07-03 09:18:25 +00:00
db732a245c execve: Reinitialize vm_regions's map area on execve
Reinitialize vm->region.map_end in sys_execve()
in the same way as when creating a new process.

Change-Id: I7fc048a187e619ba4b5a578976e2a6774d13a6a7
2018-07-03 08:58:50 +00:00
08f2840f7d procfs: Show file names in /proc/<PID>/maps
Refs: #1065
Change-Id: I2f1603b02d12e60972c8f2e5f059d0025f4ceaea
2018-07-03 08:56:44 +00:00
521bdc6181 mremap: Fix type of size arguments (from ssize_t to size_t)
Refs: #1112
Change-Id: I3987d3a20a1e7c4b60f3880e91a670bc0bdc240f
2018-07-03 08:54:14 +00:00
e7b6a3472b sched_getaffinity: Check arguments in the same order as in Linux
(1) Check if size is large enough
(2) Check if size is positive

Fujitsu: POSTK_DEBUG_TEMP_FIX_5
Refs: #1121
Change-Id: I3e41720c89ef89294820f7f4fa8df1a69a7011b0
2018-07-03 08:53:30 +00:00
11756d96ef mmap, mremap: Check arguments in the same order as in Linux
Refs: #1137
Change-Id: I4fd2ac83b013a2741a3facce4dd7e0c37b14fd25
2018-07-03 08:41:30 +00:00
f185be06eb mcoverlay-create.sh, mcoverlay-destroy.sh: Return -EINVAL on failure
Change-Id: I0561df33e8068327bf2d921c8facac7b18ac8866
2018-07-03 05:19:55 +00:00
854bc85602 mcctrl: convert send_signal to mcctrl_ihk_send_wait
Change-Id: Ibd2fc834444d83341a96579f0c9c22080a53e8fa
2018-07-02 16:11:01 +09:00
ab8fe0bbbf mcctrl: convert perf ctrl ioctls to mcctrl_ihk_send_wait
While we are here, also optimize code a bit: perf_desc does not need
to be allocated for every cpu; and fix coding style.

Change-Id: Iad19fed08205d38594fd3f1b7ddf2b19a9cf0d9d
2018-07-02 16:11:01 +09:00
b87c06cbcb mcctrl_ikc_send_wait: give possibility to use pre-allocated desc
Change-Id: I1afbabe792648bbf2c5a9a38ebbfba8ea9060d06
2018-07-02 16:11:01 +09:00
b939ca9370 mcctrl: refactor prepare_image into new generic ikc send&wait
Many ikc messages expecting a reply use wait_event_interruptible
incorrectly, freeing memory that could still be used on the other side.

This commit implements a generic ikc send and wait helper that helps
with memory management and ownership properly:
 - if the message succeeds and a reply comes back normally, the memory
is freed by the caller as usual
 - if the wait fails (signal before the reply comes or timeout) then the
memory is set as owner by ikc and will be free when the reply comes back
later
 - if the reply never comes, the memory is freed at shutdown when
destroying ikc channels

Refs: #1076
Change-Id: I7f348d9029a6ad56ba9a50c836105ec39fa14943
2018-07-02 04:34:44 +00:00
ec202a1ca9 execve: fix execve with oversubscribing
Issue: #1072
Change-Id: I88446e075b60de3c94cad2a19a4731e58037ea63
2018-07-02 13:31:23 +09:00
d4471df94e execve: use thread variable instead of cpu_local_var(current)
This fixes crashes _without_ oversubscribing with a process doing
fork() execve() / wait() in a loop

Issue: #1132
Change-Id: I98531f4643ad6b6a8f750a1a3f05b9ff3ebfd50f
2018-07-02 04:28:23 +00:00
a6ac4acf40 rusage: Fix initialization of rusage->num_processors
Refs: #1064
Change-Id: I4c04127a766b9c71f726113b8b7d6416ff971bff
2018-06-28 11:24:47 +09:00
8ff754c466 test: delete garbage files 2018-06-21 13:50:40 +09:00
90dba00742 fix return value of sched_getaffinity (POSTK_DEBUG_TEMP_FIX_58) refs#1122
Change-Id: I3d7b9b74eec268dd49b703600ca56df1d2933bd9
2018-06-21 09:15:22 +09:00
86ae1380e4 configure.ac: Move man directory to share/man
Change-Id: Idaa5c0f61fbbe3bda4697bc59487f562e09ff2d6
2018-06-11 13:13:13 +09:00
9bb48186e6 add testcases for #732 #1065 #1102 2018-06-07 10:11:23 +09:00
139123dc12 move test programs 2018-06-07 10:08:48 +09:00
6602cf442c add test cases 2018-06-07 10:04:33 +09:00
f148863586 pager_req_map(): do not take mmap_sem if not needed 2018-06-07 07:17:41 +09:00
ec375da27a pager_req_create(): prefetch libiomp, libpthread and libc 2018-06-07 07:17:31 +09:00
c50e7c1029 prepare_process_ranges_args_envs(): fix saving cmdline 2018-06-07 07:17:21 +09:00
5f4dbb2c71 mprotect: Fix early exit condition on page table attribute 2018-06-06 01:39:44 +09:00
328609269b Clean up "Detect hang of McKernel in mcexec"
* Clean up error checks
2018-06-01 14:51:07 +09:00
056fdb2633 Fix "Detect hang of McKernel in mcexec"
1. Call exit() when detecting hang
2. Clean up error checks
2018-06-01 14:21:19 +09:00
09d0a59e22 Detect hang of McKernel in mcexec
mcexec spawns a thread which detects hang of McKernel by using
ihk_os_get_eventfd().

Change-Id: I6cf0ee0c1f0c2c31a8422224b2105f64a9b9ab93
2018-06-01 10:44:34 +09:00
511555c8cb fix: /proc/<PID>/maps outputs a unnecessary NULL character 2018-05-30 16:38:28 +09:00
81699345cc mprotect: do not set page table writable for cow pages
Change-Id: If8b0bb56e7dae59aa9dc3d745a4cc4e43bf4bf9a
2018-05-30 13:29:55 +09:00
130751ff66 fileobj: avoid memory leak in path recording 2018-05-14 17:46:52 +09:00
f3d18eb9de fileobj/devobj: record path name (originally by Takagi-san) 2018-05-14 17:46:52 +09:00
249bda4aef fileobj: use MCS locks for per-file page hash 2018-05-14 17:46:52 +09:00
aaa246f86f mcexec: change debug printf macros to be more tolerant to trivial format
Enabling DEBUG fails to compile. It'd be easy to fix the dprintf to dprint
but this is just as generic and we can now use dprintf everywhere
2018-05-11 09:23:46 +09:00
c52f7a5b49 syscall wait4: add _WALL (POSTK_DEBUG_ARCH_DEP_44)
Needed by strace -f
2018-05-11 09:22:54 +09:00
90a34f54c9 mcreboot.sh,mcstop+release.sh: Disable irqbalance_mck forcefully 2018-04-26 15:06:53 +09:00
bfb5080b71 pager_req_unmap: Put per-process data at exit 2018-04-10 11:35:03 +09:00
641dfed37e configure.ac: Update version number 2018-04-06 09:14:27 +09:00
4572e6be3f fix mcctrl SMAP - everyone needs copy_to_user 2018-04-03 10:38:44 +09:00
12e44050c9 mcexec: drop READ_IMPLIES_EXEC from personality to avoid device file mapping failure 2018-04-02 20:12:54 +09:00
d5190990f5 mcreboot.sh,mcstop+release.sh: rm -rf /tmp/mcreboot when it's done 2018-03-27 23:25:44 +09:00
82822b1f16 mcreboot.sh: Fix error cases
(1) Restart irqbalance when error occurs after it's stopped
(2) Restore /proc/irq/*/smp_affinity when error occurs after
    they're modified
2018-03-27 22:20:25 +09:00
7f02889f76 mcreboot.sh,mcstop+release.sh: Save /proc/irq/*/smp_affinity to /tmp/mcreboot 2018-03-27 22:01:55 +09:00
9dc86869d8 test: Modify mng_mod/{863,870}/README 2018-03-27 19:36:07 +09:00
02bb127007 test: Modify mng_mod/*/README 2018-03-27 14:53:29 +09:00
c26c4aba4f test: Modify mng_mod/{863,870} 2018-03-13 10:24:52 +09:00
e8d8ad60c2 Modify README files of test/mng_mod/{863,870,882} 2018-03-13 05:04:06 +09:00
a7f645f7df terminate(): fix update_lock and threads_lock order to avoid deadlock 2018-03-25 08:29:53 +09:00
73731d2a0d ihk_mc_map/unmap_virtual(): do proper TLB invalidation 2018-03-24 07:58:08 +09:00
0f049c5ed7 Modify README of #863 and #870 2018-03-12 17:13:16 +09:00
8d5f95de04 schedule: Add comment on #1029
refs #1029
2018-03-12 17:11:20 +09:00
88fca2c0df issue/{863, 870}/README: update test items 2018-03-23 16:08:17 +09:00
81d18e35dd rename files 2018-03-23 15:35:24 +09:00
309da8fc53 issue/863: add 8 testcases 2018-03-23 14:48:18 +09:00
535e3f3af6 issue/863/CT300x: add timestamp and check 2018-03-23 13:28:19 +09:00
4c80dca479 issue/863/README: add how to execute stress_test 2018-03-23 12:26:13 +09:00
7bef1f5117 Remove debug-print from do_syscall() 2018-03-12 02:07:12 +09:00
bb8c8355c2 small fix: testcases for #1032, #1033, #1034 2018-03-19 16:28:18 +09:00
fab0641813 prepare_process_ranges_args_envs(): fix generating saved_cmdline to avoid PF in strlen() 2018-03-19 13:56:04 +09:00
ce3af4734a fix: dual hold_thread() in do_kill() 2018-03-19 11:12:50 +09:00
e2dea4e9f8 mcexec_start_image(): handle IKC send timeout 2018-03-17 21:33:17 +09:00
0d9c1df75a update: testcases and result for #1032, #1033, #1034 2018-03-16 11:14:29 +09:00
6a979cf4b8 add: testcases for #1032, #1033, #1034 2018-03-15 14:31:29 +09:00
c107d1fdf9 fix: Bug for measuring rss in fork()
refs: #1032
2018-03-15 14:29:16 +09:00
bc89a51e00 fix: getrusage's u|stime race-condition caused by release_thread() and getrusage() 2018-03-15 14:26:39 +09:00
9da9e755fa Issue#923: add test cases 2018-03-15 10:13:16 +09:00
fe42481d6f Add allow_oversubscribe kernel argument
It's not allowed in the default setting.
Execute mcreboot.sh with -O option to allow it.

refs #1072
2018-03-10 13:08:38 +09:00
b1ea6eb82a procfs: Show Linux /proc/self/cgroup
Support the case where McKernel process retrieves its job-id when running under
the Fujitsu TCS suite.
2018-03-10 11:58:45 +09:00
8c2e20c3aa uti: Fix uti thread on the McKernel side blocks others in do_syscall()
It could block other threads on the same CPU in do_syscall() since it busy-waits after woken up
because it's not allowed to sleep again.
2018-03-09 18:02:45 +09:00
65667709a8 Fix thread status race-condition caused by hold_thread() in do_kill() and terminate()
Conflicts:
	arch/x86_64/kernel/syscall.c
	kernel/syscall.c
2018-03-09 17:53:17 +09:00
51bc5fd61f uti: Fix wrong argument passed to ihk_ikc_release_packet() in mcexec_terminate_thread()
Conflicts:
	executer/kernel/mcctrl/control.c
2018-03-09 17:44:30 +09:00
3b277b2354 uti: Fix dead-lock of calling terminate() from terminate()
Conflicts:
	arch/x86_64/kernel/syscall.c
	kernel/syscall.c
2018-03-09 17:38:55 +09:00
3e4c9bdd90 Fix lock of struct wait_queue_head_list_node 2018-03-09 17:31:10 +09:00
06b1b4f8ab Fix deadlock on thread->times_update in getrusage()
Set thread->in_kernel properly on exiting interrupt handler when entering
it from kernel mode.

Conflicts:
	arch/x86_64/kernel/cpu.c
	kernel/mem.c
2018-03-09 17:26:31 +09:00
7b4de6e6c2 mcstat: Clean-up Makefile.in 2018-03-09 14:36:01 +09:00
1c266f4849 mcstat: Fix build error 2018-03-09 14:31:07 +09:00
b7a7281195 fix: Bug for getrusage often return incorrect ru_stime
refs #1034
2018-03-07 13:11:37 +09:00
b77732fb4f fix: Bug for getrusage(RUSAGE_CHILDREN) return parent info (POSTK_DEBUG_TEMP_FIX_72)
refs #1033
2018-03-07 13:10:45 +09:00
a224bf648a fix: Bug for getrusage return incorrect ru_maxrss
refs #1032
2018-03-07 13:09:24 +09:00
642520f80c rus_vm_fault: If page fault occurs in a thread that has not processed system call offloading, incorrectly return to normal.
refs #923
2018-03-07 10:22:47 +09:00
5cb75b00c7 mcexec_destroy_per_process_data: System calls delegation can not be terminated in error when the last process that closed /dev/mcos0 is a child process.
refs #882
2018-03-07 09:11:37 +09:00
7dd0d1137f revert for fix git message
This reverts commit 840acd6021.
2018-03-07 09:09:28 +09:00
cb2fe29f06 fix build error 2018-03-05 10:57:10 +09:00
3432f46d8b fix & add: testcases for refs #885, refs #1031 2018-03-01 15:41:58 +09:00
afcf1a24aa add: testcases for refs #885, refs #1031 2018-03-01 10:24:21 +09:00
140f813d77 fix: differences in behavior of sigaction between Linux and Mckernel 2018-03-01 09:44:44 +09:00
7ad6f9595c fix: bug for ptrace_attach self pid 2018-03-01 09:37:12 +09:00
1796c20b88 A bug for not installing mcstat is fixed. 2018-02-25 11:46:16 +09:00
0da5b76916 Merge branch 'development' of postpeta.pccluster.org:mckernel into development 2018-02-25 11:03:13 +09:00
4ac1efae6c - mcstat is a tool to report McKernel statistics from Linux side.
This is a response to a CEA's request.
	- The tools directory is created under the mckernel directory.
	- Some include files are now installed in the install directory,
	  but we should rethink of it.
2018-02-25 10:57:28 +09:00
523a066245 sigaction: support for SA_RESETHAND on x86_64
refs #1031
2018-02-22 11:55:32 +09:00
98df469d29 Issue#882: add test cases 2018-02-22 11:42:43 +09:00
f46287a711 ptrace: support for attaching child_process to parent
refs #885
2018-02-22 09:47:59 +09:00
c260b5c6f3 xpmem: support for fork()
refs #925
2018-02-22 09:37:48 +09:00
c9157f273f do_fork: If mcexec succeeds for fork and McKernel fails fork, the child process of mcexec will remain. 2018-02-14 16:37:38 +09:00
840acd6021 mcexec_destroy_per_process_data: System calls delegation can not be terminated in error when the last process that closed /dev/mcos0 is a child process.
refs #822
2018-02-14 16:34:08 +09:00
c949a894c6 Remove unnecessary files commited by mistake. 2018-02-06 10:43:21 +09:00
228f8f8533 Wait for LWK to run at shutdown.
refs #898
refs #928
2018-02-06 10:40:12 +09:00
8ee9eca74e issue 863: add test cases and test evidences 2018-02-05 16:07:00 +09:00
748429fc92 do_generic_syscall: Even if the system call is normal, if errno is not zero, it returns an error. (TEMP_FIX_75) 2018-02-03 21:37:12 +09:00
a9dfcd9a89 translate_rva_to_rpa(): use 2MB blocks in 1GB pages on x86 2018-01-31 11:16:44 +09:00
559fc9746c signal: check_signal must be called after check_need_resched. 2018-01-28 13:38:51 +09:00
54169bc3ea procfs: indicate heap in /proc/maps 2018-01-26 16:22:43 +09:00
142e923222 procfs: indicate VDSO, vsyscall and stack in /proc/maps 2018-01-26 16:02:32 +09:00
86efc86945 save_syscall_return_value(): separate from check_signal() and call from syscall() (for ARM64) 2018-01-26 14:43:18 +09:00
ebaafa95d8 settid(): clear syscal offload request before populating 2018-01-26 13:54:34 +09:00
b8ee144e67 do_fork(): return -ENOMEM when no more TIDs available 2018-01-26 13:53:05 +09:00
722ae0e7d5 ARM64 arch_clone_thread(): eliminate extra save_fp_regs() 2018-01-26 13:51:38 +09:00
f56e087208 init_process_stack(): fix stack alignment (align to 64 bytes) 2018-01-26 13:43:23 +09:00
f55f01cc11 signal: If the thread receiving the signal is not current, the signal is not processed. 2018-01-25 22:27:34 +09:00
1fa398cfab do_kill: fix to initialization leakage 2018-01-24 23:11:18 +09:00
8123cc413e Use version string in configure.ac when git repo is not found 2018-01-24 00:52:18 +09:00
d4459cf9f3 Add check to confirm IHK and McKernel with the same version are used 2018-01-24 00:20:57 +09:00
4bb65494e9 signal: When the process receives a termination signal, it first terminates mcexec.
refs #863
refs #870
2018-01-23 14:40:38 +09:00
2f2b3cdc6f signal: interrupt_syscall is called by the core executing the thread that recieved the signal.
refs #999
2018-01-23 14:31:04 +09:00
1e9f9d9809 update Test for Issue#1029 2018-01-14 14:58:19 +09:00
1b25379c02 small fix: reset switch_ctx flag in schedule() for redo 2018-01-14 14:50:31 +09:00
38bbb4e390 add Test programs for Issue#1029 2018-01-10 11:22:05 +09:00
0fa88f513f fix broken files 2017-12-27 15:28:13 +09:00
cd54c5983a fix openat 2017-12-27 14:59:13 +09:00
6084faeecd make McKernel's execve behave same as Linux when argv or envp is set to NULL (fix for TEMP_FIX_21) 2017-12-26 17:43:17 +09:00
d209c00a30 part of Issue#994
mcexec: open syscall moves to arch_dep
do_fork: don't use __NR_fork. use __NR_clone
vfork: moves to arch_dep
2017-12-26 10:30:33 +09:00
9a5d5feb9c time(): Split into architecture dependent functions
This fixes the bug reported as POSTK_ARCH_DEP_13 and POSTK_DEBUG_ARCH_DEP_13.
2017-12-23 11:36:52 +09:00
0cda763f95 fix /proc/*/pagemap
refs #387
2017-12-25 16:08:51 +09:00
cc7be46b7d make sure to context-switch to idle thread when therad's status is PS_EXITED
refs #1029
2017-12-25 13:32:42 +09:00
589504dc33 mcreboot: -h to indicate halting CPU in idle threads (e.g., in futex_wait()) 2017-12-18 11:22:15 +09:00
bf2f38051b mcreboot-smp: offline/online MCDRAM in one go 2017-12-06 14:41:25 +09:00
2d2d0af6fb add test for Issue#873, 1011 2017-11-29 12:23:20 +09:00
7f47dc78a1 add Issue#727 test cases 2017-11-29 11:32:40 +09:00
c3c9187ed5 add test for portability (kahansei_kojo in dev_V) 2017-11-28 17:55:23 +09:00
aebacb243e User Space:swapout (this is a rebase commit to merge into development) 2017-11-28 09:16:00 +09:00
5a8d1f09e8 add test/dump/README 2017-11-27 19:39:16 +09:00
0e10b6d1ee test/strace: Fix permission 2017-11-22 06:31:32 +09:00
d649d6fc2d Include mbind support (this is a rebase commit to merge into development) 2017-11-27 11:16:53 +09:00
bad487cc07 add regression test result for strace 2017-11-25 18:30:51 +09:00
3b6056fb1a add strace test cases and test result 2017-11-25 17:37:10 +09:00
5cc738d6bd add test programs for strace 2017-11-25 14:35:17 +09:00
c9fa445f54 Merge branch 'development' of pccluster.org:mckernel into development 2017-11-22 10:53:33 +09:00
d273a2f58b add strace bundled test cases 2017-11-22 10:52:30 +09:00
4e7069d499 add: proc|sys fs format_checker (tool) 2017-11-22 09:39:48 +09:00
66f44e77af mcstop+release.sh: Allow ihkmond to flush kmsg buffer 2017-11-20 18:28:48 +09:00
35f908b75c mcexec: protect against incorrect partitioned execution argument (-n) using timeouts 2017-11-20 17:06:01 +09:00
2f0089dfb9 mcstop+release: use ihkconfig release mem all 2017-11-20 17:06:01 +09:00
2af6d5115a fix: depending arch futex_atomic_op_inuser() (a part of ARCH_DEP_8) 2017-11-20 16:42:47 +09:00
ac25c5e1e7 fix: depending arch in Makefile (POSTK_DEBUG_ARCH_DEP_1) 2017-11-20 14:45:18 +09:00
90c0355d90 add setting process of pgshift to remap_process_memory_range
refs #955
2017-11-20 14:17:03 +09:00
43230eb623 fix: checking the return code of fork() in Linux.
refs #906
2017-11-15 15:46:47 +09:00
f18dc8428d fix: error code of perf_event_open, when unsupported event is specified.
refs #1030
2017-11-15 12:49:56 +09:00
ab53c8e0a4 execve: fix memory leak
refs #727
2017-11-09 16:44:31 +09:00
6c33e236d7 mcreboot: Fix umask for /proc and /sys files 2017-10-27 04:57:44 +09:00
85d36f1469 mcexec: check kernel version <= 3.10 for RHEL mcoverlayfs 2017-10-31 13:39:31 +09:00
0ecf31d896 modify:User space memory access(arm64) 2017-10-24 10:29:11 +09:00
08a625cc0d modify:User space memory access
perf_event_open,futex,process_vm_readv,process_vm_writev,move_pages
2017-10-23 20:27:56 +09:00
12840601e1 support PERF_TYPE_{HARDWARE|HW_CACHE} in perf_event_open
refs #829
2017-10-20 23:10:20 +09:00
2ae6883a8b mcreboot.sh, mcstop+release.sh: Fix retry loop of shutdown 2017-10-19 01:54:46 +09:00
d5629606c5 mcexec: -m: interpret as numactl -m (i.e., MPOL_BIND)
Conflicts:
	executer/include/uprotocol.h
	executer/user/mcexec.c
	kernel/include/syscall.h
2017-10-18 16:54:34 +09:00
285059e504 mcexec: use -M for --mpol-threshold
Conflicts:
	executer/user/mcexec.c
2017-10-18 16:44:49 +09:00
5b6d0a887c Add ARM64 arch_rusage header 2017-10-18 09:23:08 +09:00
3573b8649e Guard call to gencore and freecore
The gencore() and freecore() code in gencore.c is guarded by
POSTK_DEBUG_ARCH_DEP_18, so the call to these functions should
also be guarded, otherwise linking fails.
2017-10-18 09:20:52 +09:00
d7523cdd84 Remove assignment of ns_per_tsc in struct monitor
struct member seems to have been removed or moved to struct
global_rusage
2017-10-18 09:20:52 +09:00
5753db5846 Add ihk_mc_syscall_number() for ARM by reading x8 2017-10-18 09:20:52 +09:00
2d7cb0af89 Add copy_fp_regs to ARM (same as for x86_64) 2017-10-18 09:20:52 +09:00
1cb9b435a9 Fix (?) build system
- disable -mno-red-zone for ARM
- add missing INCLUDEDIR
- make gencore.c compile
2017-10-18 09:20:52 +09:00
43ecf06e83 arch: x86 -> x86_64 and build system changes 2017-10-18 09:20:52 +09:00
51982de36b Handle return value of mcctrl_ikc_send in mcexec_handle_prepare_image 2017-10-18 09:20:51 +09:00
0a22320a3c Don't allocate memory for 0-page-sized requests
Previously the allocator would return all availble memory for a
request of 0 pages. This is rather counter-intuitive and left no
memory for subsequent allocations.
2017-10-18 09:20:51 +09:00
8813e890c5 Fix the check routine for elf sections 2017-10-18 09:20:51 +09:00
e664ffba18 Show context registers at the interrupt by SGI 6 2017-10-18 09:20:51 +09:00
3bd0137c25 Fix some race condition on arm64
* move barrier() to architecture depended region
* add barrier() in issue_ipi, kprintf, map_virtual
* enable the workaround for cavium thunderx
2017-10-18 09:20:51 +09:00
4f2b4aa402 Round the allocation for cpu-local variables up PAGE_SIZE
Previously, this resulted in 0 pages being allocated.
2017-10-18 09:20:51 +09:00
682cd34b74 Make mcstop+release architecture independent 2017-10-18 09:20:51 +09:00
2bc4d06a48 Add empty definition of visit_pte_range_safe()
This is for linking only. visit_pte_range_safe() is required only
for memdump, as far as i can tell. Since memdump is disabled anyway
I think it's ok to leave this function empty for now.
2017-10-18 09:20:51 +09:00
4f2c1e07c1 Add ARCH variable to Makefiles
In some Makefiles the ARCH variable was not set, although it was used.
In executer/user/Makefile.in it was used before it was set.
2017-10-18 09:20:50 +09:00
77bb3038d3 Add PT_ENTRIES macro 2017-10-18 09:20:50 +09:00
931448a94d Fix typo in page_align_up 2017-10-18 09:20:50 +09:00
c51bbbabc6 Change x86 to @ARCH@ in mcreboot-smp-x86.sh.in
since it is used for smp-x86 and smp-arm64
2017-10-18 09:20:50 +09:00
2ddc52e1a4 setitimer(): Fix error handling of copy_from_user()
This fixes POSTK_TEMP_FIX_40 (POSTK_DEBUG_TEMP_FIX_40)
2017-10-13 04:59:50 +09:00
3c93958c48 extend_process_region(): fix align_shift (POSTK_DEBUG_TEMP_FIX_68) 2017-10-17 15:07:57 +09:00
9763c40f64 set_robust_list: returns 0
refs #977
2017-10-16 09:54:23 +09:00
3bf77446cc mcreboot-smp-x86.sh: add extra_kopts param
This lets one specify arbitrary kernel parameters, instead of manually
fiddling with the script.
Could ultimately replace params like -t (turbo) and -d (dump_level) that
do not have any side effect (logmode starts a userland daemon)
2017-10-13 10:02:11 +09:00
c3dfb1663d page_fault_handler: do not try to fault addresses < 4k
There is no good reason to map these low addresses (userspace could with
mmap fixed, but that is grounds for many exploits...);

the main advantage however is if we do a null deref or close to (0->foo)
within a pagefault we will get a panic stack instead of getting a hang
because we cannot get some locks.
2017-10-13 10:02:11 +09:00
217dd9c1e5 x86 set_signal: panic if interrupt came from kernel
This makes debugging errors e.g. FPE from kernel much easier,
we really shouldn't be taking a user level coredump blaming user
in that case anyway
2017-10-13 10:02:11 +09:00
d4cd756a91 x86/cpu.c: unhandled page fault: print pre-fault stack
Do basic manual unwinding and print raw stack addresses, with a
suggested invocation of addr2line to pretty-print the result.
2017-10-13 10:02:11 +09:00
b894619d1b Speed up parallel builds
- make should be $(MAKE)
 - add + in front of rules spawning long-lasted make process in a
subshell. (This would not be needed with $(MAKE) -C .. target, but our
makefiles do not handle that because they use $(PWD))
 - split the main 'all' rule as all 4 targets are independant
 - fix dependencies where appropriate for parallelism

Extra, not speed-related changes:
 - remove some double-colon for targets as they do not need it

This cuts build time from 5s to 1.5s on a laptop with -j4, and more
importantly from 85s to 35s on a KNL node.
As a bonus, the fixed dependencies removes the need to clean before
rebuilding all the time. Probably.
2017-10-13 10:02:11 +09:00
b962da700b do_signal: ignore SIGWINCH
McKernel would terminate() running program on terminal resizing
It actually looks like there is nothing for us to do when we
get that anyway (tested with `dialog`)
2017-10-13 10:02:11 +09:00
196379854b Fix a few more harmless compiler warnings:
- myfree in pager.c was called with an argument, so add one to the
dummy definition
- pgoff is offset_t (unsigned) and doesn't need to be compared to 0
- clang says '*(int *)0 = 0' will be optimized away instead of keeping
the segfault without a volatile hint (?! that is wrong!), but it causes
no harm to add anyway.
2017-10-13 10:02:11 +09:00
d213efac79 mcctrl/sysfs: add parenthesis around SYSFS_UNLINK_KEEP_ANCESTOR check
! has more priority than &, so !flags & SYSFS_UNLINK_KEEP_ANCESTOR is
not very likely. Change to !(flags & SYSFS_UNLINK_KEEP_ANCESTOR)
2017-10-13 10:02:11 +09:00
38910fe13d mc_perf_event.h: s/EVNET/EVENT/ in the guard (improper ifndef) 2017-10-13 10:02:11 +09:00
4d4279121b process/vm; replace vm_range list by a rbtree
This replaces the chained list used to keep track of all memory ranges
of a process by a standard rbtree (no need of interval tree here
because there is no overlap)

Accesses that were done directly through vm_range_list before were
replaced by lookup_process_memory_range, even full list scan (e.g.
coredump).
The full scans will thus be less efficient because calls to rb_next()
will not be inlined, but these are rarer calls that can probably afford
this compared to code simplicity.

The only reference to the actual backing structure left outside of
process.c is a call to rb_erase in xpmem_free_process_memory_range.

v2: fix lookup_process_memory_range with small start address

v3: make vm_range_insert error out properly

Panic does not lead to easy debug, all error paths
are handled to just return someting on error

v4: fix lookup_process_memory_range (again)

That optimistically going left was a more serious bug than just
last iteration, we could just pass by a match and continue down
the tree if the match was not a leaf.

v5: some users actually needed leftmost match, so restore behavior
without the breakage (hopefully)
2017-10-13 10:00:27 +09:00
99da5b6484 ptrace: unify flags PT_TRACE_SYSCALL_ENTER and PT_TRACE_SYSCALL_EXIT to PT_TRACE_SYSCALL
refs #961
2017-10-11 15:43:57 +09:00
6b60dee890 ihklib: Fix ihklib_rusage.h for x86 2017-10-04 05:06:17 +09:00
dd08a3151e mcreboot: Fix version check for mcoverlayfs 2017-10-04 00:37:01 +09:00
e1442bf12b mcexec: Fix usage 2017-10-03 15:34:00 +09:00
86f297ddc4 mcreboot: Fix change umask for /proc and /sys files 2017-10-03 15:21:44 +09:00
823b222af9 mcreboot: Change umask for /proc and /sys files 2017-10-03 06:03:44 +09:00
9c25eb8ef2 mcoverlayfs: Fix version check 2017-10-02 19:51:30 +09:00
665eead78b do_wait: delegate process status for ppid_parent if child process is teacee
refs #946
2017-09-29 14:59:34 +09:00
f8ef43c77d Merge branch 'development' of pccluster.org:mckernel into development 2017-09-29 14:59:10 +09:00
8f4afe410f Remove obsolete pc_init(), pc_ap_init(), pc_test() 2017-09-29 13:20:01 +09:00
da9bb421cc ptrace: call ptrace_syscall_exit before check_signal
refs #960
2017-09-29 10:03:44 +09:00
1e89796d3e Replace ihk_set_kmsg() with ihk_get_kmsg_buf() 2017-09-27 20:26:23 +09:00
a1a2900606 ptrace: Fix the timing of save_fp_regs, and Add copy fp_regs to child in clone_thread
refs #702
2017-09-27 17:02:30 +09:00
79b977ac06 Check xgetbv availability before use for machines without it (i.e. KVM) 2017-09-26 19:31:34 +09:00
37e3118df6 mcexec: Add --stack-premap=<premap_size>[,<max>] to man page 2017-09-26 18:45:52 +09:00
be4d84c0c1 mcexec: Add --stack-premap=<premap_size>[,<max>]
<premap_size> of stack is pre-mapped on creating a process.
And its max size of stack is set to <max>.
This replaces MCKERNEL_RLIMIT_STACK=<premap_size>,<max>.
2017-09-26 17:04:10 +09:00
c43c1b640a execve: call ptrace_syscall_exit if execve successed
refs #945
2017-09-26 14:31:07 +09:00
e294db7e53 syscall: set syscall_return before calling ptrace_syscall_exit
refs #944
2017-09-26 14:29:02 +09:00
df3f388e09 syscall: set -ENOSYS to syscall_return before calling ptrace_syscall_enter
refs #943
2017-09-26 14:25:49 +09:00
a2fbe99b60 madvise: support MADV_DONTDUMP/DODUMP
refs #661
2017-09-26 14:21:40 +09:00
9c847c0a8f Change permission of mcoverlay-create/destroy.sh from 600 to 755 2017-09-26 14:05:54 +09:00
58c1fd4512 Update test programs for qlmpi (do swap with using shared memory, ib_pingpong) 2017-09-25 16:56:52 +09:00
dae9a5ff13 mcexec: verify argument for -n/-t/-c 2017-09-25 16:43:47 +09:00
4d9a1628f2 Add test programs for ihk_os_getrusage() 2017-09-20 19:48:32 +09:00
47b4bd5aba Installing mcexec.1 man page 2017-09-20 16:37:05 +09:00
ea831c614e mcexec man page 2017-09-20 16:37:00 +09:00
5b51eb80a3 Redirect kmsg to /dev/log and detect hungup
1. ihkmond retrieves kmsg when the amount of kmsg exceeds the threashold and
   /dev/mcosX is deleted
2. ihkmond periodically monitors OS status change to detect hungup
2017-09-20 15:25:19 +09:00
daa7526127 rusage and ihklib: Fix out-of-memory reporting and cleanup
1. Fix OOM: Count memory usage only when allocation succeeded
2. Fix OOM: Make user allocation fail when memory is running out
3. Fix OOM: Move rusage_init() before numa_init()
4. Cleanup: Rename ihkconfig/ihkosctl functions
5. Cleanup: Pass event type to eventfd()
6. Cleanup: arch/.../rusage.h --> arch/.../arch_rusage.h
2017-09-20 15:11:57 +09:00
a1af7edd6e ihk_os_create_pseudofs(): Add a function to prepare /proc and /sys 2017-09-20 15:11:57 +09:00
c5d71c325d Modify copyright of files related to XPMEM 2017-09-20 15:11:57 +09:00
aa7cb970c4 ihk_os_getrusage(): Compile LWK-specific results in mcctrl
1. User asks mcctrl for the result via ihk_os_getrusage() with passing void *
2. mcctrl compiles the results and passes them to the user
3. User interprets it by using the type defined in the LWK-specific header
2017-09-20 15:03:45 +09:00
5664125e57 mcexec: verify number of processes for partitioned execution 2017-09-21 16:11:56 +09:00
203bfc2492 mcexec: limit nr. of threads for non-OpenMP partitioned execution 2017-09-21 15:30:37 +09:00
973d8ddd2c remove kernel/gencore.c
That file is not compiled (there are arch/*/kernel/gencore.c variants)
2017-09-12 18:27:28 +09:00
a491e49bbc syscall.c: fix misleading indent
This is a non-fonctional change, brought to attention by newer gcc warnings
2017-09-12 18:27:28 +09:00
2f9af42b2e configure: set KERNELSRC with double-quotes
This evals 4.12.5-300.fc26.x86_64 right away, which is necessary for e.g.
the variable used for System.map detection.
(looking at history, ihk always had double-quotes while mckernel always
had singles quotes -- this looks like a manual copy typo?)
2017-09-12 18:27:28 +09:00
b3613e2535 configure: check for read access on system.map
This lets us fallback gracefully to /System.map, which is
more open by default and binary identical on rhel systems
2017-09-12 18:27:28 +09:00
2a46fd0b2d compiler.h: take in recent linux updates for newer gcc support
Had to remove from original compiler-gcc:
 - things that deal with types, e.g. READ_ONCE macro and friends;
 - #define barrier(). This one would be better there at some point.
2017-09-12 18:27:28 +09:00
230272438f init_fpu: only call xgetbv if we have XSAVE cpuid
xgetbv crashes on cpu without avx, the XSAVE bit seems to indicate
that it is ok to call xgetbv
2017-09-12 18:27:28 +09:00
99a45f20c2 eliminate POSTK_DEBUG_TEMP_FIX_55: fixes preemption bug 2017-09-12 18:21:56 +09:00
43db8e2d65 remove osnum from mckernel kargs. refs #338 2017-09-12 14:53:44 +09:00
4eed36f124 procfs: support /proc/pid/status State field
refs #445
2017-09-12 13:37:27 +09:00
cdfa4015b7 load_elf: check mckernel execution
refs #758
2017-09-12 13:15:22 +09:00
a05b6e1ba8 Expand dump-functions for excluding user/unused memory (This is rebase commit for merging to development) 2017-09-11 15:49:04 +09:00
325082a571 adapt "out of tree build" for arm64 2017-09-11 15:29:53 +09:00
0278a876db disable POSTK_DEBUG_* on x86_64 2017-09-07 22:20:22 +09:00
707b245009 diable swap out/in in qlmpi 2017-09-07 16:06:56 +09:00
8cc264d794 fix build error with "out of tree build"
- change include-path
- enable memdump by default
2017-09-06 21:54:43 +09:00
9a550b310c Add hwcap.h for x86 2017-09-06 11:10:32 +09:00
9989f41fd3 add arm64 support
- add arm64 dependent codes with GICv3 and SVE support
- fix bugs based on architecture separation requests
2017-09-05 15:06:27 +09:00
704096b139 profile: fix process level aggregation bug 2017-09-04 09:07:58 +09:00
99ca46663b mcctrl, mexec: fix a bunch of warnings 2017-09-04 08:53:32 +09:00
90fbfd6f7d clear_range_l3(): remove debug message 2017-09-04 08:33:42 +09:00
f4c32e5507 qlmpi: add testcase to qlmpi (rusage for swap) 2017-08-31 15:43:28 +09:00
4b3f220659 qlmpi: fix debugging part of swap 2017-08-31 14:04:11 +09:00
82a0f155d8 qlmpilib.c:fix ql_init() to static 2017-08-30 17:00:37 +09:00
a2b8235e83 Add -rpath to mcexec 2017-08-30 16:54:48 +09:00
b53fb5f5cb qlmpi: export qlmpilib.h 2017-08-30 10:37:36 +09:00
236a072311 Add qlmpi and swap to mckernel (This is rebase commit for merging to development) 2017-08-29 15:04:58 +09:00
74f15783d2 ihk_os_getrusage(): Add per-page-size memory usage accounting 2017-08-17 12:49:34 +09:00
184c2d311c fileobj_flush_page(): Not flush when MF_HOST_RELEASED 2017-08-17 12:49:34 +09:00
75e2bb7793 mcctrl: Fix debug messages 2017-08-17 12:49:34 +09:00
6d4d6440aa terminate(): clean-up and formatting 2017-08-08 11:12:55 +09:00
9194742de8 do_mmap(): fix calculation of search_free_space() hint 2017-08-01 16:24:07 +09:00
831a0637a1 delete debug print 2017-08-01 15:27:51 +09:00
ac432504a7 uti_attr: move kmalloc after error check 2017-07-28 10:31:59 +09:00
b39fec1104 uti: remove unused functions 2017-07-26 13:14:30 +09:00
86dedc32fa Eliminate Japanese comments 2017-07-15 20:04:16 +09:00
effde241b9 support uti_attr for utility thread offloading 2017-07-25 13:03:48 +09:00
101cab5b0a remove debug print 2017-07-25 13:02:17 +09:00
4cd1c120fa profile: add PROFILE_remote_page_fault 2017-07-23 19:00:00 +09:00
bf5ac7afc8 remote_flush_tlb_array_cpumask(): bundle remote TLB invalidations 2017-07-21 15:34:48 +09:00
bc423255d9 mcctrl/mcexec: limit thread pool size when too many threads exist on Linux 2017-07-21 15:33:19 +09:00
6714161c25 profile remote TLB invalidations 2017-07-20 22:28:25 +09:00
992a292c08 profile: better time breakdown and exclusion of idle cycles 2017-07-20 17:36:34 +09:00
64c2e437c6 open: check filename address (re-commit) 2017-07-19 11:37:55 +09:00
dd9675d65e NUMA: only print a short summary at boot time 2017-07-19 09:11:44 +09:00
51ed8dce06 numa_init(): fix rusage memory counting 2017-07-19 08:23:05 +09:00
01f5e46865 revert 2d7890731e 2017-07-18 12:13:48 +09:00
38961fca78 Revert "do_fork(): RLIMIT_NPROC check"
This reverts commit 035e7913d8.
2017-07-13 04:13:41 +09:00
2d7890731e add_process_memory_range: do not initialize page when did not present phys page 2017-07-18 00:45:18 +09:00
7d181fccd9 open: check filename address 2017-07-18 00:09:39 +09:00
bd75e80df2 terminate: fix to reference freed pointer 2017-07-17 19:32:08 +09:00
035e7913d8 do_fork(): RLIMIT_NPROC check
1. mcexec sets RLIMIT_NPROC to the number of mcexec threads.
2. do_fork() gets the current number of threads by calling rusage function.
3. do_fork() returns -EAGAIN when the limit is exceeded.
2017-07-12 20:42:38 +09:00
7d38c7c147 delete debug print 2017-07-14 10:13:22 +09:00
a801bcc591 delete rusage.c 2017-07-14 09:52:33 +09:00
d7b8e7f4f4 fix to count user pages
refs #864
2017-07-14 09:51:39 +09:00
6afea4af48 mcexec: Fix debug/error messages 2017-07-12 14:30:21 +09:00
6415dcfdcc mcexec: Disable address space layout randomization
Move the code from mcreboot.sh to mcexec.c.
2017-07-12 14:17:38 +09:00
0f58e9e77d NUMA: expose correct /sys/devices/system/node/nodeX/meminfo 2017-07-07 00:59:32 +09:00
72e3f5ee50 ihk_mc_get_ikc_cpu(): Get IKC destination CPU 2017-07-11 20:20:40 +09:00
8d57ad9bc4 pmc_start, pmc_stop: Error check on counter number 2017-07-11 19:05:45 +09:00
35b36c2d33 move_pages_smp_handler(): more parallelization 2017-07-08 18:36:13 +09:00
632611d78c mbind(): debug msg 2017-07-08 18:36:13 +09:00
d48d44d365 move_pages(): fix barrier in parallel implementation 2017-07-08 18:36:13 +09:00
4c0f401424 move_pages(): parallel implementation v1 2017-07-08 18:36:05 +09:00
06f824c829 pte_update_phys(): update physical address of a PTE 2017-07-08 18:36:05 +09:00
7a606baad4 move_pages(): sequential implementation 2017-07-08 18:36:05 +09:00
4c6c66555e memset_smp(): parallel memset 2017-07-08 18:36:05 +09:00
8426cf589a ihk_pagealloc_free(): report double-free in bitmap based allocator 2017-07-08 18:36:05 +09:00
da7421e8ee memdebug: more detailed error report 2017-07-08 18:36:05 +09:00
209748d913 visit_pte_range(): visit L1 PTEs but don't free for MF_PREMAP files 2017-07-08 18:36:04 +09:00
f81722c63b __mckernel_free_pages_in_allocator(): fix deallocation of invalid physical range 2017-07-08 18:35:50 +09:00
2189c55d99 x86: ASM fast memset() 2017-07-08 18:26:51 +09:00
201a7e2595 Red-black tree based physical memory management 2017-07-08 18:26:51 +09:00
5cdd194856 Port Linux red-black trees 2017-07-08 18:12:01 +09:00
0061adadfb temporary fix for bug #889 2017-07-04 12:04:37 +09:00
67843151d3 fix how to count rss and num of threads
refs #864
refs #865
2017-07-03 16:27:46 +09:00
083cf3fcc9 rusage_max_memory is set sum of all memory chanks
refs #891
2017-07-03 14:49:35 +09:00
4236323661 add SCD_MSG_EVENT_SIGNAL
refs #862
2017-07-03 14:49:13 +09:00
5a9bee55c9 kill system call offloading from interrupt_syscall (tid == -1) change to one sided communication
refs #889
2017-07-03 14:48:42 +09:00
6e23b07b20 disable switch until to complete thread termination
refs #888
2017-07-03 14:47:48 +09:00
e64bd49d9e Add comment for x86_sregs 2017-07-03 10:43:36 +09:00
72b8f99d3b Correct comment for do_page_fault_process_vm() 2017-07-03 10:43:36 +09:00
090937a5a3 fix out of tree build 2017-06-30 09:57:50 +09:00
2082acdf0d add executer/user/arch/x86_64/Makefile.in 2017-06-28 09:36:31 +09:00
a8f11634e6 remove debug print for uti tracer 2017-06-27 14:42:04 +09:00
4f9865cc8f clean up unused code 2017-06-27 13:46:38 +09:00
07efb3ab9a support to utility thread offloading 2017-06-27 13:27:09 +09:00
2afc9d37d1 fix config.h inclusion 2017-06-17 07:05:33 +09:00
fa6f20a3c4 Correct comments in gencore.c 2017-06-16 21:47:23 +09:00
52bc052e1a mcexec: recursively bind mount $prefix/rootfs/ on / 2017-06-16 18:01:25 +09:00
f84415c310 mcexec: use atobytes() for MCKERNEL_RLIMIT_STACK 2017-06-15 16:50:34 +09:00
1a853e07d7 rus_vm_fault(): fix misaligned address before accessing PTE 2017-06-14 20:32:03 +09:00
07b0954610 IKC: add ihk_ikc_direction to ihk_ikc_listen_param. refs #841 2017-06-13 16:33:15 +09:00
1f006b2381 remote_page_fault(): free remote PF response packet to avoid memory leak 2017-06-12 22:03:12 +09:00
4dfd806aa7 mcctrl: release syscall packets to LWK -> Linux channels 2017-06-12 22:02:32 +09:00
c6e3185246 mcctrl: clean up RUS page hash at job completion 2017-06-12 13:04:03 +09:00
d9e6ff235d mcctrl: track and clean up ikc2linux channels 2017-06-12 13:03:07 +09:00
b03f69783a mcctrl: cleanup devobj pagers in release_handle() to avoid memory leak 2017-06-11 19:13:31 +09:00
ab915f3331 mcctrl: clean up pagers for file objects to avoid memory leak 2017-06-11 19:11:54 +09:00
7773c4aef6 add log print for existing processes/threads
usage: ihkosctl 0 ioctl 40000000 [1-4]
1: print for existing processes
2: print for existing threads
3: print for existing processes without process lock
4: print for existing threads without thread lock
2017-06-11 15:19:24 +09:00
58e531eb58 mcreboot: add taskset -c 0 to insmod. refs #848 2017-06-09 17:18:45 +09:00
9beef7d901 sysfs: fix directory memory leak 2017-06-09 15:51:41 +09:00
0733592eb5 mcexec_open_exec() fix filename memory leak 2017-06-09 15:51:14 +09:00
4d0e0728f4 destroy_thread(): disable IRQ while holding update lock 2017-06-08 17:40:35 +09:00
66fad4c7a4 terminate(): do not iterate process hash if no children processes exist 2017-06-08 14:53:57 +09:00
5758dba7cf use spinlocks in MCS rwlock 2017-06-08 14:16:29 +09:00
1ca16b9693 rusage: add kernel/include/config.h.in 2017-06-08 09:02:52 +09:00
d29922c820 configure: re-autoreconf 2017-06-07 17:33:32 +09:00
46b48ac59b __return_syscall(): verify response structure 2017-06-07 17:21:55 +09:00
446ef0465b mcctrl: verify ihk_device_map_virtual()'d buffer before accessing 2017-06-07 17:21:55 +09:00
200fe9aec4 mcctrl/mcexec: fix per-process data reference counting 2017-06-07 17:21:55 +09:00
fedba28a93 extend_process_region(): fix alignment 2017-06-07 17:21:55 +09:00
b527503937 Fix rusage 2017-06-07 15:15:20 +09:00
6bdafbd33b Fix rusage 2017-06-07 09:30:42 +09:00
12e7ed644f fileobj_flush_page(): do not offload for files with MF_HOST_RELEASED flag set 2017-06-05 22:20:25 +09:00
edf059888d support rusage parameter of wait4
refs #857
2017-05-28 07:52:47 +09:00
a66fb96cd9 re-autoconf 2017-05-28 07:52:38 +09:00
dd2ef89997 SMP: generic function call facility for CPU sets 2017-05-28 07:41:48 +09:00
ba7edf1981 move out local IRQ vector definitions to shared header 2017-05-28 07:36:21 +09:00
a669fc5125 extend_process_region(): align to heap extension 2017-05-26 15:45:57 +09:00
c0cabc2d83 brk(): return old address if memory allocation fails 2017-05-26 15:41:38 +09:00
e306b1e838 fileobj_create(): fix --mpol-shm-premap for Quadrant mode 2017-05-31 08:33:29 +09:00
0c3b705f98 brk(): make aggressive heap extension optional 2017-05-24 01:41:54 +09:00
9f55263528 mcexec: atobytes() to convert size string to # of bytes 2017-05-24 01:41:54 +09:00
74c5f61fd5 mmap(): fix populate_len warning 2017-05-24 01:41:54 +09:00
cadb66e5c1 init_host_ikc2linux(): adjust minimum queue size 2017-05-23 20:00:09 +09:00
9b5ccb5a33 Pre-map file mappings from /dev/shm (--mpol-shm-premap mcexec argument) 2017-05-23 20:00:06 +09:00
c5079898c2 mckernel_allocate_aligned_pages_node(): support explicit NUMA node designation 2017-05-23 19:58:52 +09:00
746b459e7f profile: more detailed profiling of file PFs 2017-05-23 19:58:52 +09:00
4c42086154 profile: fix job level clearing 2017-05-23 19:58:52 +09:00
56ee0787c9 profiler: function to clear process level logs 2017-05-23 19:58:52 +09:00
e901d42fb6 mcexec: --extend-heap-by: argument to specify heap extension size 2017-05-23 19:58:49 +09:00
29ab087fa2 execve(): larger allocation for program descriptor 2017-05-23 19:57:08 +09:00
105d373765 PROFILE_page_fault_XXX: more detailed page PF profiling 2017-05-23 19:57:08 +09:00
0dd2fad33b brk(): more forceful heap extension 2017-05-23 19:57:08 +09:00
e554f4e2f9 mcexec: --disable-sched-yield: avoid kernel/user switch 2017-05-23 19:57:08 +09:00
a256280118 PROFILE_mmap_XXX: more detailed mmap profiling 2017-05-23 19:57:08 +09:00
d75be7228b PROFILE_mmap_anon_no_contig_phys: profile ANON mmap()s that couldn't be backed by contiguous physical memory 2017-05-23 02:42:06 +09:00
923dc4aa11 PROFILE_mpol_alloc_missed: profile allocations that fail to satisfy user requested memory policy 2017-05-23 02:42:06 +09:00
e3e0f6a174 mcexec: introduction of --profile 2017-05-23 02:42:06 +09:00
dd6f721e03 profile: job level event accumulation 2017-05-23 02:42:06 +09:00
9c25d47d9b mcexec: transfer job information to LWK 2017-05-23 02:42:06 +09:00
5a4148aaaf ___kfree(): disregard NULL pointer argument 2017-05-23 02:42:06 +09:00
32c8f6192d unhandled_page_fault(): print registers for kernel mode PF 2017-05-23 02:42:05 +09:00
e2f424846c profile: rewrite syscall tracker for generic profiling code 2017-05-23 02:42:05 +09:00
989af7e045 mcexec: RLIMIT_STACK handling 2017-05-23 02:39:42 +09:00
721cee05a2 MPOL default threshold to 0 2017-05-23 02:39:42 +09:00
86aa76e088 IKC: increase ikc2linux channels' queue size 2017-05-23 02:39:42 +09:00
ab113658f1 mcexec: --no-bind-ikc-map for optionally disabling binding 2017-05-23 02:39:42 +09:00
2d72042021 mcexec: bind to CPus according to ikc_map 2017-05-23 02:39:42 +09:00
610463ff39 sched_setaffinity(): respect process cpu_set 2017-05-23 02:39:42 +09:00
dfb0a37305 procfs: increase procfs request timeout 2017-05-23 02:39:42 +09:00
26b9484bae mcexec: --mpol-threshold to control MPOL_BIND/MPOL_PREFERRED 2017-05-23 02:39:42 +09:00
b4aecfd43c partitioned execution: order by process start time 2017-05-23 02:39:42 +09:00
bf036f19f7 mcreboot: offline/re-online RAM before IHK reserve 2017-05-23 02:39:42 +09:00
182202523e mcexec/mm: user memory policy control for heap, stack, etc. 2017-05-23 02:39:42 +09:00
afb7cb3a1e BSS/data: demand paging for non-file section and respect user requested NUMA allocation policy 2017-05-23 02:39:41 +09:00
fdbdcbd0ee VR_AP_USER: memory range flag to respect user mempolicy (e.g., in PF handler) 2017-05-23 02:39:41 +09:00
a18fd1f45c sched_yield(): optionally disable wait 2017-05-23 02:39:41 +09:00
d8170e292c init_process_stack(): debug msg format 2017-05-23 02:39:41 +09:00
fee5234c54 stack: force transparent large pages 2017-05-23 02:39:41 +09:00
6309095fd2 brk(): force transparent large pages 2017-05-23 02:39:41 +09:00
b005adc103 SCD_MSG_PERF_CTRL: use IKC3 channel for response packet 2017-05-20 12:43:08 +09:00
21373338cc mcctrl: IHK CPU register manipulation implementation 2017-05-20 12:38:14 +09:00
39352cd364 event_signal(): use IKC3 ikc2linux channel 2017-05-19 10:31:15 +09:00
84025cc9cb configure : add option --enable-rusage 2017-05-19 10:31:14 +09:00
04cbfbb025 xpmem: porting xpmem v2.6.3
implement xpmem_get, xpmem_release, xpmem_attach, xpmem_detach
2017-05-19 10:30:36 +09:00
ba58054c9d create rusage branch. 2017-05-19 10:30:36 +09:00
7fd55dc83f IKC: only cpu 0 check the master-channel 2017-05-19 10:26:30 +09:00
d66af42f7b Revert "IKC: separate IRQ between Master-channel and Regular-channel"
This reverts commit 3c98b9410966ceebe187ebae1038317b628fbb03.
2017-05-19 10:26:30 +09:00
4b964b8e0d IKC: allocate Linux channel table dynamically 2017-05-19 10:26:30 +09:00
65dc3440cb IKC: separate IRQ between Master-channel and Regular-channel 2017-05-19 10:26:30 +09:00
fbd9086ce5 IKC: delete recieve channel list 2017-05-19 10:26:29 +09:00
c2b1d8e3ef IKC: delete the comments for review 2017-05-19 10:26:29 +09:00
e2d59e2cb9 mcreboot-smp: introduction of ikc_irq_start argument 2017-05-19 10:26:29 +09:00
3de0f5ea19 mcreboot-smp: introduction of ikc_map argument 2017-05-19 10:26:29 +09:00
373e9ea63c ap_wait(): init syscall channel with proper Linux remote CPU 2017-05-19 10:26:29 +09:00
8daffa939e IKC: distribute IKC-interrupt to Linux cpus. 2017-05-19 10:26:29 +09:00
eaa4d35fab do_migrate(): don't clear oversubscribed source CPUs from remote TLB mask 2017-05-17 11:22:29 +09:00
a968c935b5 Fix timing of save/restore smp_affinity, and modifing of /proc/irq/*/smp_affinity 2017-05-15 14:52:22 +09:00
e01f6dd6ea eclair: obtain kernel_base from dump_mem_chunks_t 2017-05-12 13:23:23 +09:00
a07d802cbe Fix manipulation of /proc/irq/*/smp_affinity
Fix the case where
(1) #CPUs % 32 == 0
(2) #CPUs % 4 != 0
2017-05-12 09:35:49 +09:00
1e442cce10 mcklogd: fixed termination method of mcklogd 2017-05-09 16:28:21 +09:00
3f870b69a6 mcklogd: change the timing of start/stop. 2017-05-09 16:06:07 +09:00
0fef80cb19 SCD_MSG_CPU_RW_REG: use syscall channel for reply packet in CPU MSR read/write operation 2017-05-05 00:16:02 +09:00
9992fe0d72 mcctrl: support remote CPU MSR read/write operations 2017-05-05 00:01:43 +09:00
2d19ed9391 configure.ac: check NUMA development library 2017-04-29 05:30:27 +09:00
2f2f04d5a1 mcexec: ENABLE_MCOVERLAYFS on CentOS for up to version 7.3 2017-04-29 05:10:21 +09:00
1541b26086 ihklib: add pa_info functions. 2017-04-27 17:13:49 +09:00
e6c4d7731d Merge remote-tracking branch 'origin/rusage'
Conflicts:
	configure
	kernel/process.c
2017-04-27 15:10:38 +09:00
94b527e027 modified: lib/include/ihk/rusage.h 2017-04-27 14:47:21 +09:00
8c9b207557 configure : add option --enable-rusage 2017-04-27 14:00:59 +09:00
dacb05844b mcoverlayfs: support compile up to 3.10.0-514 2017-04-20 00:48:56 +09:00
c3ec5d20ca configure: --with-uname_r: optionally specify target kernel version string 2017-04-20 00:48:56 +09:00
92a40f92dd mcctrl_put_per_proc_data(): do not use task_pid_vnr() in IRQ context 2017-03-30 15:02:57 +09:00
45bddf3caa mcexec_syscall(): do not use task_pid_vnr() in IRQ context 2017-03-30 14:56:57 +09:00
b7671fedd3 mcctrl_per_proc_data: comments 2017-03-30 14:51:24 +09:00
c38d536aaa xpmem: porting xpmem v2.6.3
implement xpmem_get, xpmem_release, xpmem_attach, xpmem_detach
2017-03-29 18:20:53 +09:00
4ee0c05e08 mcoverlayfs: fix NULL pointer dereference on ovl_dentry_release() 2017-03-28 21:52:41 +09:00
f2ab0193e5 fix to panic when thread end and signal overlap. 2017-03-28 11:31:27 +09:00
ef910fdf0e Discard outstanding system calls at the end of mcexec. 2017-03-28 11:23:54 +09:00
b97a8c5138 mcexec_open_exec(): use strncpy_from_user() before accessing file name 2017-03-21 20:13:12 +09:00
034d10b185 When receiving a signal during fuex processing, the signal is not processed. 2017-03-21 20:37:17 +09:00
3fe2257929 create rusage branch. 2017-03-15 23:22:51 +09:00
eca4018ecb mcctrl: release syscall packets when mcexec termination
refs #835
2017-03-11 20:57:54 +09:00
e936b2ebe1 memobj_release: don't call syscall_generic_forwarding after process termination
refs #816
2017-03-10 12:58:47 +09:00
d8112f92f8 terminate(): don't call free_all_process_memory_range
refs #816
2017-03-08 14:30:28 +09:00
1076010de4 Boundary check in early_alloc_pages() 2017-03-04 17:21:57 +09:00
da4a5ec44b page_allocator_init(): move memory_nodes to BSS 2017-02-24 19:33:25 +09:00
d35aa9b100 page_allocator_init(): clean-up code, eliminate initial flag 2017-02-24 14:25:22 +09:00
ba8dbf1b19 Put kernel image and page table into one chunk 2017-02-24 14:21:32 +09:00
6213f0e488 mcctrl: fix cpumask macros for Linux 4.6 2017-02-02 15:49:39 +09:00
4ef82c2683 OFP-SNC-4: offline/online MCDRAM before memory reservation 2017-01-30 14:47:36 +09:00
e066a8798c IKC: adjust master channel queue size to nr. of CPUs 2017-01-30 07:24:09 +09:00
b702c9691e AP init: synchronize syscall channel initialization 2017-01-30 07:24:09 +09:00
addbe91e59 do_migrate(): signal migrated thread before releasing runq lock 2017-01-30 07:24:09 +09:00
b812848a0e eclair-dump-backtrace.exp: handle user space threads 2017-01-30 07:24:09 +09:00
ad214c8206 reserve_user_space(): mutual exclusion on mmap 2017-01-30 07:24:09 +09:00
1bc3218fc1 partitioned execution: bind mcexec to corresponding NUMA node 2017-01-30 07:24:09 +09:00
5cc420a6c3 syscall/offload tracker: clean-up and support process-wise aggregation 2017-01-30 07:24:09 +09:00
c7686fdf4e execve(): fix memory leak 2017-01-30 07:24:09 +09:00
c1dae4d8b0 mmap(): no physical memory pre-allocation for Intel 128MB mapping 2017-01-30 07:24:08 +09:00
2473025201 do_mmap(): remove codes for debug
refs #395
2017-01-16 15:53:27 +09:00
fa5c1b23ca eclair-dump-backtrace.exp: dump full backtrace of all mckernel threads 2017-01-15 10:46:07 +09:00
f2f499aace mcreboot/stop: toggle address-space layout randomization (ASLR) to avoid mcexec user-space reservation failure 2017-01-15 10:36:50 +09:00
bd47b909bf futex(): spin wait when CPU not oversubscribed and fix lost wake-up bug 2017-01-13 08:43:25 +09:00
d646c2a4b9 cpu_set/clear(): unsigned long for IRQ flags 2017-01-13 08:43:25 +09:00
865ada46bf IKC2: eliminate unused IKC structures 2017-01-13 08:43:25 +09:00
cdffc5e853 do_syscall(): eliminate centralized lock for exit/kill code path (use IKC2 thread pool) 2017-01-08 14:16:10 +09:00
0e67e9266b ap_init(): reformat AP cores report 2017-01-08 14:16:10 +09:00
1ff0afe6fb devobj/fileobj: do not try to free memory for device file mappings 2017-01-08 14:16:10 +09:00
d34884f9a4 numa_init(): error handling and propagation 2017-01-08 14:15:51 +09:00
7a0c204dc1 eclair: report PID for all threads 2017-01-08 14:15:44 +09:00
25f67c9ef8 mcreboot/mcstop-smp-x86: surpress libkmod warnings 2017-01-08 14:15:34 +09:00
a776464a7e mcreboot/mcstop: adjust swappiness 2017-01-03 09:02:41 +09:00
c40e7105e6 NUMA: order nodes by distance for MPOL_BIND / MPOL_PREFERRED policies as well 2017-01-03 09:02:29 +09:00
5bac38ce8b mmap()/stack/heap: follow user requested NUMA policy 2016-12-31 19:38:05 +09:00
e3f0662130 allocate_aligned_pages_node(): debug msg format 2016-12-31 16:25:14 +09:00
21df56b233 sched_wakeup_thread(): memory barrier after status update 2016-12-31 10:44:13 +09:00
393cec513c allocate_aligned_pages_node(): follow user policiy only for user allocations 2016-12-31 10:10:42 +09:00
4437ecc69a do_mmap(): indicate user level allocations for anonymous mappings 2016-12-31 10:09:49 +09:00
40d75baca2 ihk_mc_ap_flag: rewrite flag type, intro for denoting user level allocations 2016-12-30 19:19:34 +09:00
00f3fe0840 ihk_mc_alloc_aligned_pages_node(): support for explicit indication of target NUMA node 2016-12-30 19:03:59 +09:00
47a8b5bda5 mmap(): faster pre-allocation for anonymous private mappings 2016-12-30 17:18:44 +09:00
ec75095073 add_process_memory_range(): optionally return range object 2016-12-30 15:51:17 +09:00
1794232989 irqbalance_mck: create environment file in /tmp to avoid race condition on PFS 2016-12-30 15:47:44 +09:00
40978d162e procfs_read/write(): rewrite synchronization for scalability and correctness 2016-12-28 14:17:17 +09:00
536ce9f927 process_procfs_request(): use IRQ save MCS locks while iterating thread list to avoid deadlock 2016-12-28 12:29:10 +09:00
4e5ec74ffe mmap(): fault in memory only up to file size for populated file mappings 2016-12-27 16:33:24 +09:00
a6d8125fd7 mcreboot-smp-x86: reserve memory first and then CPUs 2016-12-27 15:19:05 +09:00
15d3a0361e destroy_ikc_channels(): eliminate kprint from error free path 2016-12-27 11:52:24 +09:00
6ad84a96a3 mcexec_syscall(): avoid calling task_pid_nr_ns() in IRQ context 2016-12-26 20:43:17 +09:00
16e846e9b6 mcexec: report error in prepare_image() if wait queue interrupted 2016-12-26 20:42:31 +09:00
5bc7185f07 do_migrate(): update debug msg format 2016-12-25 17:34:26 +09:00
32462dfb2d eclair: fix CPU number display for non-active threads 2016-12-25 17:28:31 +09:00
e3ef88c0cf do_sigsuspend(): deschedule thread when neccessary (fixes gdb deadlock) 2016-12-25 17:24:32 +09:00
829aae7b8d mcexec: PATH_MAX buffer lenght in do_generic_syscall() 2016-12-25 17:20:14 +09:00
b836b84825 mcexec_prepare_image(): use memory barrier when updating request status 2016-12-25 17:19:14 +09:00
3e1f154412 patch_process_vm(): eliminate kprintfs from error free code path 2016-12-25 17:18:20 +09:00
e7af537452 get_pid_cred(): proper locking around pid_task 2016-12-25 17:17:27 +09:00
3565959af7 eclair: fix compiler warnings 2016-12-23 09:57:50 +09:00
4667136a4c mcctrl: refcount per-process data to avoid corrupted syscall request lists 2016-12-23 09:54:15 +09:00
972d14611a mcctrl: move prepare waitqueue to per-process data 2016-12-22 10:15:31 +09:00
e90eef8910 eclair: support for direct memory inspection 2016-12-21 21:55:32 +09:00
f81927b85b Revert "brk(): larger allocation units internally"
This reverts commit c58ab0f648.
2016-12-20 11:11:09 +09:00
701cdcdab1 use MCS locks in physical memory allocator 2016-12-19 12:57:59 +09:00
9635a628a9 fileobj/shmobj/devobj: add file size to memobj 2016-12-19 12:55:12 +09:00
3e1b16f3fc syscall_channel: increase queue size to avoid deadlock in ikc_send() 2016-12-18 21:12:38 +09:00
ff37ff9ccf memobj: synch prefetch among processes 2016-12-18 21:12:38 +09:00
5b7bcb7170 fileobj: use read/write MCS locks in page hash 2016-12-18 21:12:37 +09:00
6a5fe90f98 mcexec_get_cpuset(): save CPU set and IKC target cpu in per-process data 2016-12-18 21:12:37 +09:00
91373337ba mcctrl: add IKC target CPU to OS file release_handler 2016-12-18 21:12:37 +09:00
56ed726a88 pager_req_create(): prefetch for MPI library and zerofill for shm 2016-12-18 21:12:37 +09:00
bce10e11e4 fileobj: rewrite for scalability using per-file page hash 2016-12-18 21:12:37 +09:00
91cdb16158 MCS lock: separate IRQ disable/enable versions 2016-12-18 21:12:37 +09:00
c58ab0f648 brk(): larger allocation units internally 2016-12-18 21:12:37 +09:00
f410af1cfc xpmem: porting xpmem v2.6.3
implement xpmem_make, xpmem_remove
2016-12-16 17:00:09 +09:00
aa15e5eea8 mcexec: -t option and OMP_NUM_THREADS for thread pool size 2016-12-14 18:56:30 +09:00
df9f1f8f78 allocate_aligned_pages(): take user set NUMA policy into account 2016-12-13 17:51:39 +09:00
7ace35d737 mcexec_get_cpuset(): fix NUMA search bug 2016-12-13 17:50:50 +09:00
551999ff6b NUMA: order nodes based on distances 2016-12-13 10:46:17 +09:00
052b3f44ca mcexec: -n: topology aware partitioned execution 2016-12-10 16:27:57 +09:00
fdcf766337 prepare_process(): pass cpu_set in program_load_desc 2016-12-09 16:32:20 +09:00
7d13bfb14e set_mempolicy(): limit maxnode to PROCESS_NUMA_MASK_BITS 2016-12-08 21:05:10 +09:00
202bfd9955 IHK-API: expand and fix for ver 1.2. 2016-12-08 17:28:53 +09:00
c99e36235b execve(): disable debug warnings 2016-12-08 16:33:24 +09:00
3cecafac59 obtain_clone_cpuid(): respect parent's CPU set 2016-12-08 16:01:30 +09:00
61fc4c5e55 show_context_stack(): fix warning 2016-12-07 11:42:09 +09:00
fad73cacc1 x86: display call stack for IRQ 133 (for debug) 2016-12-07 11:32:02 +09:00
8fced29978 page_fault_handler(): improved debug msg format 2016-12-07 11:25:02 +09:00
b0f4ae4890 ihk_mc_pt_set_pte(): double check phys address alignment 2016-12-07 11:23:45 +09:00
7070094a31 ihk_mc_pt_print_pte(): handle large pages correctly 2016-12-07 11:13:53 +09:00
011185e3f7 __ihk_pagealloc_large(): fix 1GB page alignment bug 2016-12-07 09:38:37 +09:00
461881e46a /proc/mckernel to indicate McKernel 2016-12-06 14:29:25 +09:00
ddc33821cf sched_yield(): avoid schedule for single thread 2016-12-05 18:10:20 +09:00
0ab7d02994 disable syscall tracker and eliminate interrupt_syscall debug msg 2016-12-05 18:10:20 +09:00
a8c4ab221b use MCS locks in signal handling code 2016-12-05 18:10:20 +09:00
87d36a7752 mcreboot-smp-x86: -t to enable turbo boost 2016-12-05 18:10:20 +09:00
998ded414c mcreboot-smp-x86: shorter sleep in waiting for /proc 2016-12-05 18:10:20 +09:00
f78d031e64 syscall and offload tracking (disabled by default) 2016-12-05 18:10:20 +09:00
4ab37dd34a schedule(): only load page table during context switch if it's different 2016-12-05 18:10:20 +09:00
8129dec2f7 Fix out-of-tree build
<build>/ihk/cokernel/Makefile.common is not found when
<build>/mckernel/kernel/Makfile tries to perform
"Make -C <build>/ihk/{cokernel,ikc}" from mckernel/kernel
2016-12-01 16:44:01 +09:00
a1035a1878 fix out of tree build 2016-12-01 12:55:34 +09:00
db169c5f90 add gcc options (-ffreestanding -fno-tree-loop-distribute-patterns)
refs #299
2016-11-29 16:28:18 +09:00
bbb55ef261 sched_setparam: thread lock is necessary when update other thread data 2016-11-28 14:04:44 +09:00
1130cafe41 ptrace: fixed for threads. 2016-11-28 11:19:30 +09:00
a1cf27e232 sched_getaffinity(): fix error code for special invalid input 2016-11-28 05:50:01 +09:00
5a1ce99d87 mcexec: fix number of threads not to exceed thread_data array 2016-11-27 07:31:52 +09:00
c7db296e1b getcpu(): expose correct NUMA id 2016-11-26 09:29:09 +09:00
f634a750c5 sched_{set/get}affinity(): fix error codes (also fixes KMP_AFFINITY behavior) 2016-11-24 21:25:16 +09:00
d07a196c8e mcexec: enable the same number of threads as CPU cores 2016-11-24 16:40:52 +09:00
8c56c75d2c process_vm_read_writev(): fix base address check for EFAULT 2016-11-24 10:40:41 +09:00
e54895efde set_mempolicy(): debug msg 2016-11-23 08:53:26 +09:00
2f8cca2d6d memcpy(): faster version using ASM rep; movsl 2016-11-23 08:51:22 +09:00
64607152ee VM: introduction of range lookup cache 2016-11-23 08:48:44 +09:00
20383ad3d0 do_process_vm_read_writev(): page size awareness optimization 2016-11-23 08:47:32 +09:00
787d34f650 introduction of ihk_mc_pt_virt_to_phys_size() 2016-11-23 08:40:33 +09:00
ae618a0c68 mcexec: remount /proc in mcexec's file NS after exec() 2016-11-22 13:22:59 +09:00
f480376153 mcoverlayfs: supported Linux kernel 4.6
add mcoverlayfs(linux-4.6.7 base)
2016-11-17 18:09:27 +09:00
e4b3a88fc6 mcexec_sys_umount(): remove debug print 2016-11-10 15:05:45 +09:00
69a5c53074 NUMA: hide non-existing nodes from /sys/devices/system/node listing 2016-11-05 16:12:08 +09:00
259583e936 mcreboot-smp-x86.sh: more white out of invalid NUMA info 2016-11-05 13:35:53 +09:00
0f826290d0 NUMA: get_mempolicy(), set_mempolicy() and mbind() implementation 2016-11-05 13:32:02 +09:00
e46f027894 mcexec/mcctrl: unmount cgroups (privately) which expose invalid NUMA info 2016-11-04 17:02:48 +09:00
3e093f6a40 sysfs: fix /sys/devices/system/node/online value 2016-11-03 16:10:29 +09:00
00996b551f mcreboot: white out non-existing NUMA information 2016-11-03 16:09:27 +09:00
24d8697cef mcexec: workaround for overlayed /sys FS directory lseek() bug
lseek() on directories under /sys filesystem that are part of an
overlayed filesystem behave differently than in the original /sys.
This causes segfault in libnuma when discovering topology
information. The patch fakes return value as it is supposed to be,
which also fixes the Intel MPI 2017 MPI_Init() crash.
2016-11-03 13:41:25 +09:00
be4f6741f9 sysfs: fix /sys/devices/system/cpu/cpuXX/online value 2016-11-03 13:39:21 +09:00
7a2f67f5f0 sysfs: eliminate unnecessary new line from /sys/devices/system/node/nodeX/distance 2016-11-03 13:37:53 +09:00
bba0425267 sysfs: fix /sys/devices/system/cpu/online value 2016-11-03 13:36:29 +09:00
2447 changed files with 220004 additions and 21890 deletions

40
.gitignore vendored
View File

@ -1,3 +1,4 @@
*~
*.o
*.elf
*.bin
@ -8,9 +9,36 @@
Module.symvers
*.order
.tmp_versions
elfboot/elfboot
elfboot/elfboot_test
linux/executer/mcexec
linux/mod_test*
linux/target
old_timestamp
CMakeFiles
CMakeCache.txt
Makefile
!test/*/*/Makefile
!test/signalonfork+wait/Makefile
!test/perf_overflow/Makefile
!test/*/*/*.cmd
Kbuild
cmake_install.cmake
config.h
mcstop+release.sh
mcreboot.sh
mcreboot.1
mcoverlay-destroy.sh
mcoverlay-create.sh
kernel/mckernel.img
kernel/include/swapfmt.h
executer/user/vmcore2mckdump
executer/user/ql_talker
executer/user/mcexec.1
executer/user/mcexec
executer/user/libsched_yield.so.1.0.0
executer/user/libsched_yield.so
executer/user/libmcexec.a
executer/user/libldump2mcdump.so
executer/user/eclair
tools/mcstat/mcstat
/_CPack_Packages
/CPackSourceConfig.cmake
CPackConfig.cmake
/build
mckernel-*.tar.gz

6
.gitmodules vendored Normal file
View File

@ -0,0 +1,6 @@
[submodule "ihk"]
path = ihk
url = https://github.com/RIKEN-SysSoft/ihk.git
[submodule "executer/user/lib/libdwarf/libdwarf"]
path = executer/user/lib/libdwarf/libdwarf
url = https://github.com/bgerofi/libdwarf.git

262
CMakeLists.txt Normal file
View File

@ -0,0 +1,262 @@
cmake_minimum_required(VERSION 2.6)
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type: Debug Release..." FORCE)
endif (NOT CMAKE_BUILD_TYPE)
enable_language(C ASM)
project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.0")
# See "Fedora Packaging Guidlines -- Versioning"
set(MCKERNEL_RELEASE "0.91")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
if(DEFINED SYSCONF_INSTALL_DIR)
set(CMAKE_INSTALL_SYSCONFDIR "${SYSCONF_INSTALL_DIR}")
endif()
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
set(BUILD_TARGET "smp-x86" CACHE STRING "Build target: smp-x86 | smp-arm64")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(BUILD_TARGET "smp-arm64" CACHE STRING "Build target: smp-x86 | smp-arm64")
endif()
if (BUILD_TARGET STREQUAL "smp-x86")
set(ARCH "x86_64")
elseif (BUILD_TARGET STREQUAL "smp-arm64")
set(ARCH "arm64")
endif()
include(GNUInstallDirs)
include(CMakeParseArguments)
include(Kbuild)
include(CheckCCompilerFlag)
include(AutoconfHelper)
CHECK_C_COMPILER_FLAG(-Wno-implicit-fallthrough IMPLICIT_FALLTHROUGH)
if(IMPLICIT_FALLTHROUGH)
set(EXTRA_WARNINGS "-Wno-implicit-fallthrough")
endif(IMPLICIT_FALLTHROUGH)
# build options
set(CFLAGS_WARNING "-Wall" "-Wextra" "-Wno-unused-parameter" "-Wno-sign-compare" "-Wno-unused-function" ${EXTRA_WARNINGS} CACHE STRING "Warning flags")
add_compile_options(${CFLAGS_WARNING})
option(ENABLE_WERROR "Enable -Werror" OFF)
if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
add_definitions(-DIHK_IKC_USE_LINUX_WORK_IRQ)
endif()
if (BUILD_TARGET STREQUAL "smp-arm64")
foreach(i RANGE 1 120)
add_definitions(-DPOSTK_DEBUG_ARCH_DEP_${i} -DPOSTK_DEBUG_TEMP_FIX_${i})
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DPOSTK_DEBUG_ARCH_DEP_${i} -DPOSTK_DEBUG_TEMP_FIX_${i}")
endforeach()
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_64K_PAGES\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_64K_PAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_VA_BITS\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_VA_BITS OUTPUT_STRIP_TRAILING_WHITESPACE)
message("Host kernel CONFIG_ARM64_64K_PAGES=${CONFIG_ARM64_64K_PAGES}")
message("Host kernel CONFIG_ARM64_VA_BITS=${CONFIG_ARM64_VA_BITS}")
if(CONFIG_ARM64_64K_PAGES STREQUAL "y")
if(CONFIG_ARM64_VA_BITS STREQUAL 42)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=2 -DCONFIG_ARM64_VA_BITS=42 -DCONFIG_ARM64_64K_PAGES)
set(LINKER_SCRIPT "smp-arm64_type3.lds")
elseif(CONFIG_ARM64_VA_BITS STREQUAL 48)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=3 -DCONFIG_ARM64_VA_BITS=48 -DCONFIG_ARM64_64K_PAGES)
set(LINKER_SCRIPT "smp-arm64_type4.lds")
endif()
else(CONFIG_ARM64_64K_PAGES STREQUAL "y")
if(CONFIG_ARM64_VA_BITS STREQUAL 39)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=3 -DCONFIG_ARM64_VA_BITS=39)
set(LINKER_SCRIPT "smp-arm64_type1.lds")
elseif(CONFIG_ARM64_VA_BITS STREQUAL 48)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=4 -DCONFIG_ARM64_VA_BITS=48)
set(LINKER_SCRIPT "smp-arm64_type2.lds")
endif()
endif(CONFIG_ARM64_64K_PAGES STREQUAL "y")
endif()
set_property(CACHE BUILD_TARGET PROPERTY STRINGS smp-x86 smp-arm64)
# define MAP_KERNEL_START
set(tmpdir ${CMAKE_CURRENT_BINARY_DIR}/tmp.resolve_MODULES_END)
file(REMOVE_RECURSE ${tmpdir})
file(MAKE_DIRECTORY ${tmpdir})
file(WRITE ${tmpdir}/driver.c "#include <linux/module.h>\n")
file(APPEND ${tmpdir}/driver.c "unsigned long MAP_KERNEL_START = MODULES_END - (1UL << 23);\n")
file(WRITE ${tmpdir}/Makefile "obj-m := driver.o\n")
file(APPEND ${tmpdir}/Makefile "all:\n")
file(APPEND ${tmpdir}/Makefile "\tmake ${KBUILD_MAKE_FLAGS_STR} -C ${KERNEL_DIR} M=${tmpdir} modules\n")
execute_process(COMMAND make -C ${tmpdir})
execute_process(COMMAND bash -c "offset=`readelf -S ${tmpdir}/driver.ko | grep .data | sed 's/.* //g'`; echo $((0x$offset))"
OUTPUT_VARIABLE MAP_KERNEL_START_OFFSET OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "dd if=${tmpdir}/driver.ko bs=1 skip=${MAP_KERNEL_START_OFFSET} count=8 2>/dev/null | od -tx8 -Ax | head -1 | sed 's|.* |0x|g'"
OUTPUT_VARIABLE MAP_KERNEL_START OUTPUT_STRIP_TRAILING_WHITESPACE)
set(ENABLE_MEMDUMP ON)
option(ENABLE_PERF "Enable perf support" ON)
option(ENABLE_RUSAGE "Enable rusage support" ON)
option(ENABLE_QLMPI "Enable qlmpi programs" OFF)
option(ENABLE_UTI "Enable uti support" OFF)
option(ENABLE_UBSAN "Enable undefined behaviour sanitizer on mckernel size" OFF)
option(ENABLE_PER_CPU_ALLOC_CACHE "Enable per-CPU allocator cache (ThunderX2 workaround)" OFF)
find_package(PkgConfig REQUIRED)
set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ON)
find_library(LIBRT rt)
if (NOT LIBRT)
message(FATAL_ERROR "error: couldn't find librt")
endif()
find_library(LIBNUMA numa)
if (NOT LIBNUMA)
message(FATAL_ERROR "error: couldn't find libnuma")
endif()
find_library(LIBBFD bfd)
if (NOT LIBBFD)
message(FATAL_ERROR "error: couldn't find libbfd")
endif()
find_library(LIBIBERTY iberty)
if (NOT LIBIBERTY)
message(FATAL_ERROR "error: couldn't find libiberty")
endif()
find_library(LIBDWARF dwarf)
if (NOT LIBDWARF)
if (CMAKE_CROSSCOMPILING)
message(FATAL_ERROR "Could not find libdwarf.so, install libdwarf-devel to ${CMAKE_FIND_ROOT_PATH}")
endif()
message("WARNING: libdwarf will be compiled locally")
enable_language(CXX)
else()
# Note that libdwarf-devel provides /usr/include/libdwarf/dwarf.h
# but elfutils-devel provides /usr/include/dwarf.h
# while mcinspect.c performs "#include <dwarf.h>"
find_path(DWARF_H dwarf.h PATH_SUFFIXES libdwarf)
endif()
if (ENABLE_QLMPI)
find_package(MPI REQUIRED)
endif()
if (ENABLE_UTI)
pkg_check_modules(LIBSYSCALL_INTERCEPT REQUIRED libsyscall_intercept)
link_directories(${LIBSYSCALL_INTERCEPT_LIBRARY_DIRS})
endif()
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-([0-9]+)(.*))?" "\\1;\\2;\\3;\\5;\\6" LINUX_VERSION ${UNAME_R})
list(GET LINUX_VERSION 0 LINUX_VERSION_MAJOR)
list(GET LINUX_VERSION 1 LINUX_VERSION_MINOR)
list(GET LINUX_VERSION 2 LINUX_VERSION_PATCH)
list(GET LINUX_VERSION 3 LINUX_VERSION_RELEASE)
math(EXPR LINUX_VERSION_CODE "${LINUX_VERSION_MAJOR} * 65536 + ${LINUX_VERSION_MINOR} * 256 + ${LINUX_VERSION_PATCH}")
# compat with various install paths
set(BINDIR ${CMAKE_INSTALL_FULL_BINDIR})
set(SBINDIR ${CMAKE_INSTALL_FULL_SBINDIR})
set(ETCDIR ${CMAKE_INSTALL_PREFIX}/etc)
set(ROOTFSDIR "/rootfs")
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
set(KMODDIR "/lib/modules/${UNAME_R}/extra/mckernel")
set(MCKERNELDIR "${CMAKE_INSTALL_FULL_DATADIR}/mckernel/${BUILD_TARGET}")
else()
set(KMODDIR "${CMAKE_INSTALL_PREFIX}/kmod")
set(MCKERNELDIR "${CMAKE_INSTALL_PREFIX}/${BUILD_TARGET}/kernel")
endif()
set(prefix ${CMAKE_INSTALL_PREFIX})
# set rpath for everyone
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
# ihk: ultimately should support extrnal build, but add as subproject for now
if (EXISTS ${PROJECT_SOURCE_DIR}/ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
elseif (EXISTS ${PROJECT_SOURCE_DIR}/../ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "../ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
else()
set(IHK_SOURCE_DIR "ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
endif()
if (EXISTS ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR})
elseif (EXISTS /${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR /${IHK_SOURCE_DIR})
else()
message(FATAL_ERROR "Could not find ihk dir, or it does not contain CMakeLists.txt, either clone ihk or run git submodule update --init")
endif()
add_subdirectory(${IHK_SOURCE_DIR} ihk)
configure_file(config.h.in config.h)
# actual build section - just subdirs
add_subdirectory(executer/kernel/mcctrl)
add_subdirectory(executer/user)
add_subdirectory(kernel)
add_subdirectory(tools/mcstat)
add_subdirectory(tools/crash)
configure_file(scripts/mcreboot-smp.sh.in mcreboot.sh @ONLY)
configure_file(scripts/mcstop+release-smp.sh.in mcstop+release.sh @ONLY)
configure_file(scripts/mcreboot.1in mcreboot.1 @ONLY)
configure_file(scripts/eclair-dump-backtrace.exp.in eclair-dump-backtrace.exp @ONLY)
install(PROGRAMS
"${CMAKE_CURRENT_BINARY_DIR}/mcreboot.sh"
"${CMAKE_CURRENT_BINARY_DIR}/mcstop+release.sh"
DESTINATION "${CMAKE_INSTALL_SBINDIR}")
install(PROGRAMS
"${CMAKE_CURRENT_BINARY_DIR}/eclair-dump-backtrace.exp"
DESTINATION "${CMAKE_INSTALL_BINDIR}")
install(FILES "scripts/irqbalance_mck.in"
DESTINATION "${CMAKE_INSTALL_SYSCONFDIR}")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mcreboot.1"
DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")
configure_file(scripts/mckernel.spec.in scripts/mckernel.spec @ONLY)
set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${MCKERNEL_VERSION}")
set(CPACK_SOURCE_IGNORE_FILES "/.git/;/build;/CMakeCache.txt$;/CMakeFiles$;/Makefile$")
set(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};/;${IHK_FULL_SOURCE_DIR};/ihk;${CMAKE_BINARY_DIR}/scripts;/scripts")
set(CPACK_SOURCE_GENERATOR "TGZ")
include(CPack)
add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
# config report
message("-------------------------------")
message("Option summary")
message("-------------------------------")
message("Build type: ${CMAKE_BUILD_TYPE}")
message("Build target: ${BUILD_TARGET}")
message("IHK_SOURCE_DIR: ${IHK_SOURCE_DIR} (relative to mckernel source tree)")
message("UNAME_R: ${UNAME_R}")
message("KERNEL_DIR: ${KERNEL_DIR}")
message("SYSTEM_MAP: ${SYSTEM_MAP}")
message("VMLINUX: ${VMLINUX}")
message("KBUILD_C_FLAGS: ${KBUILD_C_FLAGS}")
message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")
message("ENABLE_WERROR: ${ENABLE_WERROR}")
message("ENABLE_UBSAN: ${ENABLE_UBSAN}")
message("ENABLE_LINUX_WORK_IRQ_FOR_IKC: ${ENABLE_LINUX_WORK_IRQ_FOR_IKC}")
message("ENABLE_PER_CPU_ALLOC_CACHE: ${ENABLE_PER_CPU_ALLOC_CACHE}")
message("-------------------------------")

70
KNOWN_BUGS.md Normal file
View File

@ -0,0 +1,70 @@
Linux crash when offlining CPU (el7, hardware-specific)
=========================================================
On some hardware with el7 kernel, linux can crash due to a bug in the
irq handling when offlining CPUs (reserve cpu part of mcreboot)
Example stack trace:
```
[ 4147.052753] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040
[ 4147.060677] IP: [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.068226] PGD 1057e44067 PUD 105f1e7067 PMD 0
[ 4147.072935] Oops: 0000 [#1] SMP
[ 4147.076230] Modules linked in: mcctrl(OE) ihk_smp_x86_64(OE) ihk(OE) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_core
[ 4147.148619] dm_mirror dm_region_hash dm_log dm_mod sb_edac edac_core intel_powerclamp coretemp ext4 mbcache jbd2 intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ipmi_ssif glue_helper ablk_helper joydev iTCO_wdt iTCO_vendor_support cryptd ipmi_si ipmi_devintf ipmi_msghandler pcspkr wmi mei_me mei lpc_ich i2c_i801 sg ioatdma shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c mlx4_en sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm isci igb drm mlx4_core libsas ahci libahci scsi_transport_sas libata crct10dif_pclmul ptp crct10dif_common pps_core crc32c_intel dca i2c_algo_bit i2c_core devlink [last unloaded: ihk]
[ 4147.215370] CPU: 6 PID: 38 Comm: migration/6 Tainted: G OE ------------ T 3.10.0-693.2.2.el7.x86_64 #1
[ 4147.225672] Hardware name: SGI.COM C1104G-RP5/X9DRG-HF, BIOS 3.0 10/25/2013
[ 4147.232747] task: ffff880174689fa0 ti: ffff8801746ac000 task.ti: ffff8801746ac000
[ 4147.240278] RIP: 0010:[<ffffffff8102ce26>] [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.250275] RSP: 0018:ffff8801746afd30 EFLAGS: 00010046
[ 4147.255608] RAX: 0000000000000000 RBX: 000000000000004e RCX: 0000000000000000
[ 4147.262770] RDX: 0000000000000020 RSI: 000000000000005f RDI: 0000000000000023
[ 4147.269936] RBP: ffff8801746afd58 R08: 0000000000000001 R09: ffff88017f800490
[ 4147.277103] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000006
[ 4147.284269] R13: 0000000000000000 R14: ffff88085ca82500 R15: 000000000000005f
[ 4147.291429] FS: 0000000000000000(0000) GS:ffff88085fb80000(0000) knlGS:0000000000000000
[ 4147.299556] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4147.305326] CR2: 0000000000000040 CR3: 0000001059704000 CR4: 00000000001407e0
[ 4147.312490] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 4147.319659] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 4147.326827] Stack:
[ 4147.328857] ffff8808f43078c8 ffff8808f4307850 0000000000000286 ffff8808f4307701
[ 4147.336384] 0000000000000000 ffff8801746afd70 ffffffff81052a82 0000000200000000
[ 4147.343915] ffff8801746afd88 ffffffff81693ca3 0000000000000003 ffff8801746afdc0
[ 4147.351447] Call Trace:
[ 4147.353921] [<ffffffff81052a82>] native_cpu_disable+0x12/0x40
[ 4147.359795] [<ffffffff81693ca3>] take_cpu_down+0x13/0x40
[ 4147.365236] [<ffffffff81116899>] multi_cpu_stop+0xd9/0x100
[ 4147.370850] [<ffffffff811167c0>] ? cpu_stop_should_run+0x50/0x50
[ 4147.376983] [<ffffffff81116ab7>] cpu_stopper_thread+0x97/0x150
[ 4147.382942] [<ffffffff816a8fad>] ? __schedule+0x39d/0x8b0
[ 4147.388461] [<ffffffff810b909f>] smpboot_thread_fn+0x12f/0x180
[ 4147.394406] [<ffffffff810b8f70>] ? lg_double_unlock+0x40/0x40
[ 4147.400276] [<ffffffff810b098f>] kthread+0xcf/0xe0
[ 4147.405182] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[ 4147.411319] [<ffffffff816b4f58>] ret_from_fork+0x58/0x90
[ 4147.418893] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[ 4147.426524] Code: 81 fb 00 01 00 00 0f 84 8a 00 00 00 89 d8 65 44 8b 3c 85 20 c6 00 00 45 85 ff 78 e1 44 89 ff e8 91 31 10 00 48 63 15 7e 10 af 00 <48> 8b 70 40 48 c7 c7 80 71 cf 81 49 89 c6 48 83 c2 3f 48 c1 fa
[ 4147.450352] RIP [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.460135] RSP <ffff8801746afd30>
[ 4147.465154] CR2: 0000000000000040
```
This bug has been fixed upstream, but redhat will not backport the fixes.
You can work around the problem with a kpatch by backporting the three
following commits:
x86: irq: Get correct available vectors for cpu disable
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ac2a55395eddccd6e3e39532df9869d61e97b2ee
x86/irq: Check for valid irq descriptor in check_irq_vectors_for_cpu_disable()
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d97eb8966c91f2c9d05f0a22eb89ed5b76d966d1
x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable()
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cbb24dc761d95fe39a7a122bb1b298e9604cae15
Alternatively, since it is related to the irq configuration, it might
be possible to mitigate the issue by setting the irq affinities early
on and making sure none of the cpus that will be offlined have any irq
configured.

339
LICENSE Normal file
View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View File

@ -1,76 +0,0 @@
TARGET = @TARGET@
SBINDIR = @SBINDIR@
ETCDIR = @ETCDIR@
MANDIR = @MANDIR@
all::
@(cd executer/kernel/mcctrl; make modules)
@(cd executer/kernel/mcoverlayfs; make modules)
@(cd executer/user; make)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
install::
@(cd executer/kernel/mcctrl; make install)
@(cd executer/kernel/mcoverlayfs; make install)
@(cd executer/user; make install)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make install) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@case "$(TARGET)" in \
attached-mic) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
builtin-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
smp-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
mkdir -p -m 755 $(ETCDIR); \
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
clean::
@(cd executer/kernel/mcctrl; make clean)
@(cd executer/kernel/mcoverlayfs; make clean)
@(cd executer/user; make clean)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make clean) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac

540
NEWS.md Normal file
View File

@ -0,0 +1,540 @@
=============================================
What's new in version 1.7.0rc4 (Apr 15, 2020)
=============================================
----------------------
McKernel major updates
----------------------
1. arm64: Contiguous PTE support
2. arm64: Scalable Vector Extension (SVE) support
3. arm64: PMU overflow interrupt support
4. xpmem: Support large page attachment
5. arm64 port: Direct access to Mckernel memory from Linux
6. arm64 port: utility thread offloading, which spawns thread onto
Linux CPU
7. eclair: support for live debug
8. Crash utility extension
9. Replace mcoverlayfs with a soft userspace overlay
10. Build system is switched to cmake
11. Core dump includes thread information
------------------------
McKernel major bug fixes
------------------------
1. shmobj: Fix rusage counting for large page
2. mcctrl control: task start_time changed to u64 nsec
3. mcctrl: add handling for one more level of page tables
4. Add kernel argument to turn on/off time sharing
5. flatten_string/process env: realign env and clear trailing bits
6. madvise: Add MADV_HUGEPAGE support
8. mcctrl: remove in-kernel calls to syscalls
9. arch_cpu_read_write_register: error return fix.
10. set_cputime(): interrupt enable/disable fix.
11. set_mempolicy(): Add mode check.
12. mbind(): Fix memory_range_lock deadlock.
13. ihk_ikc_recv: Record channel to packet for release
14. Add set_cputime() kernel to kernel case and mode enum.
15. execve: Call preempt_enable() before error-exit
16. memory/x86_64: fix linux safe_kernel_map
17. do_kill(): fix pids table when nr of threads is larger than num_processors
18. shmget: Use transparent huge pages when page size isn't specified
19. prctl: Add support for PR_SET_THP_DISABLE and PR_GET_THP_DISABLE
20. monitor_init: fix undetected hang on highest numbered core
21. init_process_stack: change premapped stack size based on arch
22. x86 syscalls: add a bunch of XXat() delegated syscalls
23. do_pageout: fix direct kernel-user access
24. stack: add hwcap auxval
25. perf counters: add arch-specific perf counters
26. Added check of nohost to terminate_host().
27. kmalloc: Fix address order in free list
28. sysfs: use nr_cpu_ids for cpumasks (fixes libnuma parsing error on ARM)
29. monitor_init: Use ihk_mc_cpu_info()
30. Fix ThunderX2 write-combined PTE flag insanity
31. ARM: eliminate zero page mapping (i.e, init_low_area())
32. eliminate futex_cmpxchg_enabled check (not used and dereffed a NULL pointer)
33. page_table: Fix return value of lookup_pte when ptl4 is blank
34. sysfs: add missing symlinks for cpu/node
35. Make Linux handler run when mmap to procfs.
36. Separate mmap area from program loading (relocation) area
37. move rusage into kernel ELF image (avoid dynamic alloc before NUMA init)
38. arm: turn off cpu on panic
39. page fault handler: protect thread accesses
40. Register PPD and release_handler at the same time.
41. fix to missing exclusive processing between terminate() and
finalize_process().
42. perfctr_stop: add flags to no 'disable_intens'
43. fileobj, shmobj: free pages in object destructor (as opposed to page_unmap())
44. clear_range_l1, clear_range_middle: Fix handling contiguous PTE
45. do_mmap: don't pre-populate the whole file when asked for smaller segment
46. invalidate_one_page: Support shmobj and contiguous PTE
47. ubsan: fix undefined shifts
48. x86: disable zero mapping and add a boot pt for ap trampoline
49. rusage: Don't count PF_PATCH change
50. Fixed time processing.
51. copy_user_pte: vmap area not owned by McKernel
52. gencore: Zero-clear ELF header and memory range table
53. rpm: ignore CMakeCache.txt in dist and relax BuildRequires on cross build
54. gencore: Allocate ELF header to heap instead of stack
55. nanosleep: add cpu_pause() in spinwait loop
56. init_process: add missing initializations to proc struct
57. rus_vm_fault: always use a packet on the stack
58. process stack: use PAGE_SIZE in aux vector
59. copy_user_pte: base memobj copy on range & VR_PRIVATE
60. arm64: ptrace: Fix overwriting 1st argument with return value
61. page fault: use cow for private device mappings
62. reproductible builds: remove most install paths in c code
63. page fault: clear writable bit for non-dirtying access to shared ranges
64. mcreboot/mcstop+release: support for regular user execution
65. irqbalance_mck: replace extra service with service drop-in
66. do_mmap: give addr argument a chance even if not MAP_FIXED
67. x86: fix xchg() and cmpxchg() macros
68. IHK: support for using Linux work IRQ as IKC interrupt (optional)
69. MCS: fix ARM64 issue by using smp_XXX() functions (i.e., barrier()s)
70. procfs: add number of threads to stat and status
71. memory_range_lock: Fix deadlock in procfs/sysfs handler
72. flush instruction cache at context switch time if necessary
73. arm64: Fix PMU related functions
74. page_fault_process_memory_range: Disable COW for VM region with zeroobj
75. extend_process_region: Fall back to demand paging when not contiguous
76. munmap: fix deadlock with remote pagefault on vm range lock
77. procfs: if memory_range_lock fails, process later
78. migrate-cpu: Prevent migration target from calling schedule() twice
79. sched_request_migrate(): fix race condition between migration req and IRQs
80. get_one_cpu_topology: Renumber core_id (physical core id)
81. bb7e140 procfs cpuinfo: use sequence number as processor
82. set_host_vma(): do NOT read protect Linux VMA
===========================================
What's new in V1.6.0 (Nov 11, 2018)
===========================================
-----------------------------------------------
McKernel new features, improvements and changes
-----------------------------------------------
1. McKernel and Linux share one unified kernel virtual address space.
That is, McKernel sections resides in Linux sections spared for
modules. In this way, Linux can access the McKernel kernel memory
area.
2. hugetlbfs support
3. IHK is now included as a git submodule
4. Debug messages are turned on/off in per souce file basis at run-time.
5. It's prohibited for McKernel to access physical memory ranges which
Linux didn't give to McKernel.
6. UTI (capability to spawn a thread on Linux CPU) improvement:
* System calls issued from the thread are hooked by modifying
binary in memory.
---------------------------
McKernel bug fixes (digest)
---------------------------
#<num> below corresponds to the redmine issue number
(https://postpeta.pccluster.org/redmine/).
1. #926: shmget: Hide object with IPC_RMID from shmget
2. #1028: init_process: Inherit parent cpu_set
3. #995: Fix shebang recorded in argv[0]
4. #1024: Fix VMAP virtual address leak
5. #1109: init_process_stack: Support "ulimit -s unlimited"
6. x86 mem init: do not map identity mapping
7. mcexec_wait_syscall: requeue potential request on interrupted wait
8. mcctrl_ikc_send_wait: fix interrupt with do_frees == NULL
9. pager_req_read: handle short read
10. kprintf: only call eventfd() if it is safe to interrupt
11. process_procfs_request: Add Pid to /proc/<PID>/status
12. terminate: fix oversubscribe hang when waiting for other threads on same CPU to die
13. mcexec: Do not close fd returned to mckernel side
14. #976: execve: Clear sigaltstack and fp_regs
15. #1002: perf_event: Specify counter by bit_mask on start/stop
16. #1027: schedule: Don't reschedule immediately when wake up on migrate
17. #mcctrl: lookup unexported symbols at runtime
18. __sched_wakeup_thread: Notify interrupt_exit() of re-schedule
19. futex_wait_queue_me: Spin-sleep when timeout and idle_halt is specified
20. #1167: ihk_os_getperfevent,setperfevent: Timeout IKC sent by mcctrl
21. devobj: fix object size (POSTK_DEBUG_TEMP_FIX_36)
22. mcctrl: remove rus page cache
23. #1021: procfs: Support multiple reads of e.g. /proc/*/maps
24. #1006: wait: Delay wake-up parent within switch context
25. #1164: mem: Check if phys-mem is within the range of McKernel memory
26. #1039: page_fault_process_memory_range: Remove ihk_mc_map_virtual for CoW of device map
27. partitioned execution: pass process rank to LWK
28. process/vm: implement access_ok()
29. spinlock: rewrite spinlock to use Linux ticket head/tail format
30. #986: Fix deadlock involving mmap_sem and memory_range_lock
31. Prevent one CPU from getting chosen by concurrent forks
32. #1009: check_signal: system call restart is done only once
33. #1176: syscall: the signal received during system call processing is not processed.
34. #1036 syscall_time: Handle by McKernel
35. #1165 do_syscall: Delegate system calls to the mcexec with the same pid
36. #1194 execve: Fix calling ptrace_report_signal after preemption is disabled
37. #1005 coredump: Exclude special areas
38. #1018 procfs: Fix pread/pwrite to procfs fail when specified size is bigger than 4MB
39. #1180 sched_setaffinity: Check migration after decrementing in_interrupt
40. #771, #1179, #1143 ptrace supports threads
41. #1189 procfs/do_fork: wait until procfs entries are registered
42. #1114 procfs: add '/proc/pid/stat' to mckernel side and fix its comm
43. #1116 mcctrl procfs: check entry was returned before using it
44. #1167 ihk_os_getperfevent,setperfevent: Return -ETIME when IKC timeouts
45. mcexec/execve: fix shebangs handling
46. procfs: handle 'comm' on mckernel side
47. ihk_os_setperfevent: Return number of registered events
48. mcexec: fix terminating zero after readlink()
===========================================
What's new in V1.5.1 (July 9, 2018)
===========================================
-----------------------------------------------
McKernel new features, improvements and changes
-----------------------------------------------
1. Watchdog timer to detect hang of McKernel
mcexec prints out the following line to its stderr when a hang of
McKernel is detected.
mcexec detected hang of McKernel
The watchdog timer is enabled by passing -i <timeout_in_sec> option
to mcreboot.sh. <timeout_in_sec> specifies the interval of checking
if McKernel is alive.
Example: mcreboot.sh -i 600: Detect the hang with 10 minutes interval
The detailed step of the hang detection is as follows.
(1) mcexec acquires eventfd for notification from IHK and perform
epoll() on it.
(2) A daemon called ihkmond monitors the state of McKernel periodically
with the interval specified by the -i option. It judges that
McKernel is hanging and notifies mcexec by the eventfd if its
state hasn't changed since the last check.
2. Documentation
man page: Installed directory is changed to <install_dir>/share/man
---------------------------
McKernel bug fixes (digest)
---------------------------
1. #1146: pager_req_map(): do not take mmap_sem if not needed
2. #1135: prepare_process_ranges_args_envs(): fix saving cmdline
3. #1144: fileobj/devobj: record path name
4. #1145: fileobj: use MCS locks for per-file page hash
5. #1076: mcctrl: refactor prepare_image into new generic ikc send&wait
6. #1072: execve: fix execve with oversubscribing
7. #1132: execve: use thread variable instead of cpu_local_var(current)
8. #1117: mprotect: do not set page table writable for cow pages
9. #1143: syscall wait4: add _WALL (POSTK_DEBUG_ARCH_DEP_44)
10. #1064: rusage: Fix initialization of rusage->num_processors
11. #1133: pager_req_unmap: Put per-process data at exit
12. #731: do_fork: Propagate error code returned by mcexec
13. #1149: execve: Reinitialize vm_regions's map area on execve
14. #1065: procfs: Show file names in /proc/<PID>/maps
15. #1112: mremap: Fix type of size arguments (from ssize_t to size_t)
16. #1121: sched_getaffinity: Check arguments in the same order as in Linux
17. #1137: mmap, mremap: Check arguments in the same order as in Linux
18. #1122: fix return value of sched_getaffinity
19. #732: fix: /proc/<PID>/maps outputs a unnecessary NULL character
===================================
What's new in V1.5.0 (Apr 5, 2018)
===================================
--------------------------------------
McKernel new features and improvements
--------------------------------------
1. Aid for Linux version migration: Detect /proc, /sys format change
between two kernel verions
2. Swap out
* Only swap-out anonymous pages for now
3. Improve support of /proc/maps
4. mcstat: Linux tool to show resource usage
---------------------------
McKernel bug fixes (digest)
---------------------------
1. #727: execve: Fix memory leak when receiving SIGKILL
2. #829: perf_event_open: Support PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
3. #906: mcexec: Check return code of fork()
4. #1038: mcexec: Timeout when incorrect value is given to -n option
5. #943 #945 #946 #960 $961: mcexec: Support strace
6. #1029: struct thread is not released with stress-test involving signal
and futex
7. #863 #870: Respond immediately to terminating signal when
offloading system call
8. #1119: translate_rva_to_rpa(): use 2MB blocks in 1GB pages on x86
11. #898: Shutdown OS only after no in-flight IKC exist
12. #882: release_handler: Destroy objects as the process which opened it
13. #882: mcexec: Make child process exit if the parent is killed during
fork()
14. #925: XPMEM: Don't destroy per-process object of the parent
15. #885: ptrace: Support the case where a process attaches its child
16. #1031: sigaction: Support SA_RESETHAND
17. #923: rus_vm_fault: Return error when a thread not performing
system call offloading causes remote page fault
18. #1032 #1033 #1034: getrusage: Fix ru_maxrss, RUSAGE_CHILDREN,
ru_stime related bugs
19. #1120: getrusage: Fix deadlock on thread->times_update
20. #1123: Fix deadlock related to wait_queue_head_list_node
21. #1124: Fix deadlock of calling terminate() from terminate()
22. #1125: Fix deadlock related to thread status
* Related functions are: hold_thread(), do_kill() and terminate()
23. #1126: uti: Fix uti thread on the McKernel side blocks others in do_syscall()
24. #1066: procfs: Show Linux /proc/self/cgroup
25. #1127: prepare_process_ranges_args_envs(): fix generating saved_cmdline to
avoid PF in strlen()
26. #1128: ihk_mc_map/unmap_virtual(): do proper TLB invalidation
27. #1043: terminate(): fix update_lock and threads_lock order to avoid deadlock
28. #1129: mcreboot.sh: Save /proc/irq/*/smp_affinity to /tmp/mcreboot
29. #1130: mcexec: drop READ_IMPLIES_EXEC from personality
--------------------
McKernel workarounds
--------------------
1. Forbid CPU oversubscription
* It can be turned on by mcreboot.sh -O option
===================================
What's new in V1.4.0 (Oct 30, 2017)
===================================
-----------------------------------------------------------
Feature: Abstracted event type support in perf_event_open()
-----------------------------------------------------------
PERF_TYPE_HARDWARE and PERF_TYPE_CACHE types are supported.
----------------------------------
Clean-up: Direct user-space access
----------------------------------
Code lines using direct user-space access (e.g. passing user-space
pointer to memcpy()) becomes more portable across processor
architectures. The modification follows the following rules.
1. Move the code section as it is to the architecture dependent
directory if it is a part of the critical-path.
2. Otherwise, rewrite the code section by using the portable methods.
The methods include copy_from_user(), copy_to_user(),
pte_get_phys() and phys_to_virt().
--------------------------------
Test: MPI and OpenMP micro-bench
--------------------------------
The performance figures of MPI and OpenMP primitives are compared with
those of Linux by using Intel MPI Benchmarks and EPCC OpenMP Micro
Benchmark.
===================================
What's new in V1.3.0 (Sep 30, 2017)
===================================
--------------------
Feature: Kernel dump
--------------------
1. A dump level of "only kernel memory" is added.
The following two levels are available now:
0: Dump all
24: Dump only kernel memory
The dump level can be set by -d option in ihkosctl or the argument
for ihk_os_makedumpfile(), as shown in the following examples:
Command: ihkosctl 0 dump -d 24
Function call: ihk_os_makedumpfile(0, NULL, 24, 0);
2. Dump file is created when Linux panics.
The dump level can be set by dump_level kernel argument, as shown in the
following example:
ihkosctl 0 kargs "hidos dump_level=24"
The IHK dump function is registered to panic_notifier_list when creating
/dev/mcdX and called when Linux panics.
-----------------------------
Feature: Quick Process Launch
-----------------------------
MPI process launch time and some of the initialization time can be
reduced in application consisting of multiple MPI programs which are
launched in turn in the job script.
The following two steps should be performed to use this feature:
1. Replace mpiexec with ql_mpiexec_start and add some lines for
ql_mpiexec_finalize in the job script
2. Modify the app so that it can repeat calculations and wait for the
instructions from ql_mpiexec_{start,finalize} at the end of the
loop
The first step is explained using an example. Assume the original job
script looks like this:
/* Execute ensamble simulation and then data assimilation, and repeat this
ten times */
for i in {1..10}; do
/* Each ensamble simulation execution uses 100 nodes, launch ten of them
in parallel */
for j in {1..10}; do
mpiexec -n 100 -machinefile ./list1_$j p1.out a1 & pids[$i]=$!;
done
/* Wait until the ten ensamble simulation programs finish */
for j in {1..10}; do wait ${pids[$j]}; done
/* Launch one data assimilation program using 1000 nodes */
mpiexec -n 1000 -machinefile ./list2 p2.out a2
done
The job script should be modified like this:
for i in {1..10}; do
for j in {1..10}; do
/* Replace mpiexec with ql_mpiexec_start */
ql_mpiexec_start -n 100 -machinefile ./list1_$j p1.out a1 & pids[$j]=$!;
done
for j in {1..10}; do wait ${pids[$j]}; done
ql_mpiexec_start -n 1000 -machinefile ./list2 p2.out a2
done
/* p1.out and p2.out don't exit but are waiting for the next calculation.
So tell them to exit */
for j in {1..10}; do
ql_mpiexec_finalize -machinefile ./list1_$i p1.out a1;
done
ql_mpiexec_finalize -machinefile ./list2 p2.out a2;
The second step is explained using a pseudo-code.
MPI_Init();
Prepare data exchange with preceding / following MPI programs
loop:
foreach Fortran module
Initialize data using command-line argments, parameter files,
environment variables
Input data from preceding MPI programs / Read snap-shot
Perform main calculation
Output data to following MPI programs / Write snap-shot
/* ql_client() waits for command of ql_mpiexec_{start,finish} */
if (ql_client() == QL_CONTINUE) { goto loop; }
MPI_Finalize();
qlmpilib.h should be included in the code and libql{mpi,fort}.so
should be linked to the executable file.
========================
Restrictions on McKernel
========================
1. Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed
correctly even if the mmap() returns a success. An access of their
mapping receives the SIGSEGV signal.
2. clone() supports only the following flags. All the other flags
cause clone() to return error or are simply ignored.
* CLONE_CHILD_CLEARTID
* CLONE_CHILD_SETTID
* CLONE_PARENT_SETTID
* CLONE_SETTLS
* CLONE_SIGHAND
* CLONE_VM
3. PAPI has the following restriction.
* Number of counters a user can use at the same time is up to the
number of the physical counters in the processor.
4. msync writes back only the modified pages mapped by the calling process.
5. The following syscalls always return the ENOSYS error.
* migrate_pages()
* move_pages()
* set_robust_list()
6. The following syscalls always return the EOPNOTSUPP error.
* arch_prctl(ARCH_SET_GS)
* signalfd()
7. signalfd4() returns a fd, but signal is not notified through the
fd.
8. set_rlimit sets the limit values but they are not enforced.
9. Address randomization is not supported.
10. brk() extends the heap more than requestd when -h
(--extend-heap-by=)<step> option of mcexec is used with the value
larger than 4 KiB. syscall_pwrite02 of LTP would fail for this
reason. This is because the test expects that the end of the heap
is set to the same address as the argument of sbrk() and expects a
segmentation violation occurs when it tries to access the memory
area right next to the boundary. However, the optimization sets
the end to a value larger than the requested. Therefore, the
expected segmentation violation doesn't occur.
11. setpriority()/getpriority() won't work. They might set/get the
priority of a random mcexec thread. This is because there's no
fixed correspondence between a McKernel thread which issues the
system call and a mcexec thread which handles the offload request.
12. mbind() can set the policy but it is not used when allocating
physical pages.
13. MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for
set_mempolicy()/mbind() are not supported.
14. The MPOL_BIND policy for set_mempolicy()/mbind() works as the same
as the MPOL_PREFERRED policy. That is, the physical page allocator
doesn't give up the allocation when the specified nodes are
running out of pages but continues to search pages in the other
nodes.
15. Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and
later. In addition, crash_kexec_post_notifiers kernel argument
must be given to Linux kernel.
16. setfsuid()/setfsgid() cannot change the id of the calling thread.
Instead, it changes that of the mcexec worker thread which takes
the system-call offload request.
17. mmap (hugeTLBfs): The physical pages corresponding to a map are
released when no McKernel process exist. The next map gets fresh
physical pages.
18. Sticky bit on executable file has no effect.
19. Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the
process of booting McKernel due to the Linux bug, found in
Linux-3.10 and fixed in the later version. One way to circumvent
this is to always assign the same CPU set to McKernel.
20. madvise:
* MADV_HWPOISON and MADV_SOFT_OFFLINE always returns -EPERM.
* MADV_MERGEABLE and MADV_UNMERGEABLE always returns -EINVAL.
* MADV_HUGEPAGE and MADV_NOHUGEPAGE on file map returns -EINVAL
(It succeeds on RHEL-8 for aarch64).
21. brk() and mmap() doesn't report out-of-memory through its return
value. Instead, page-fault reports the error.
22. Anonymous mmap pre-maps requested number of pages when contiguous
pages are available. Demand paging is used when not available.
23. Mixing page sizes in anonymous shared mapping is not allowed. mmap
creates vm_range with one page size. And munmap or mremap that
needs the reduced page size changes the sizes of all the pages of
the vm_range.
24. ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS
(job-scheduler).
25. The behaviors of madvise and mbind are changed to do nothing and
report success as a workaround for Fugaku.
26. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.

290
README.md Normal file
View File

@ -0,0 +1,290 @@
![McKernel Logo](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/mckernel-logo.png)
-------------------------
IHK/McKernel is a light-weight multi-kernel operating system designed for high-end supercomputing. It runs Linux and McKernel, a light-weight kernel (LWK), side-by-side inside compute nodes and aims at the following:
- Provide scalable and consistent execution of large-scale parallel scientific applications, but at the same time maintain the ability to rapidly adapt to new hardware features and emerging programming models
- Provide efficient memory and device management so that resource contention and data movement are minimized at the system level
- Eliminate OS noise by isolating OS services in Linux and provide jitter free execution on the LWK
- Support the full POSIX/Linux APIs by selectively offloading (slow-path) system calls to Linux
## Contents
- [Background](#background-and-motivation)
- [Architectural Overview](#architectural-overview)
- [Installation](#installation)
- [The Team](#the-team)
## Background and Motivation
With the growing complexity of high-end supercomputers, the current system software stack faces significant challenges as we move forward to exascale and beyond. The necessity to deal with extreme degree of parallelism, heterogeneous architectures, multiple levels of memory hierarchy, power constraints, etc., advocates operating systems that can rapidly adapt to new hardware requirements, and that can support novel programming paradigms and runtime systems. On the other hand, a new class of more dynamic and complex applications are also on the horizon, with an increasing demand for application constructs such as in-situ analysis, workflows, elaborate monitoring and performance tools. This complexity relies not only on the rich features of POSIX, but also on the Linux APIs (such as the */proc*, */sys* filesystems, etc.) in particular.
##### Two Traditional HPC OS Approaches
Traditionally, light-weight operating systems specialized for HPC followed two approaches to tackle scalable execution of large-scale applications. In the full weight kernel (FWK) approach, a full Linux environment is taken as the basis, and features that inhibit attaining HPC scalability are removed, i.e., making it light-weight. The pure light-weight kernel (LWK) approach, on the other hand, starts from scratch and effort is undertaken to add sufficient functionality so that it provides a familiar API, typically something close to that of a general purpose OS, while at the same time it retains the desired scalability and reliability attributes. Neither of these approaches yields a fully Linux compatible environment.
##### The Multi-kernel Approach
A hybrid approach recognized recently by the system software community is to run Linux simultaneously with a lightweight kernel on compute nodes and multiple research projects are now pursuing this direction. The basic idea is that simulations run on an HPC tailored lightweight kernel, ensuring the necessary isolation for noiseless execution of parallel applications, but Linux is leveraged so that the full POSIX API is supported. Additionally, the small code base of the LWK can also facilitate rapid prototyping for new, exotic hardware features. Nevertheless, the questions of how to share node resources between the two types of kernels, where do device drivers execute, how exactly do the two kernels interact with each other and to what extent are they integrated, remain subjects of ongoing debate.
## Architectural Overview
At the heart of the stack is a low-level software infrastructure called Interface for Heterogeneous Kernels (IHK). IHK is a general framework that provides capabilities for partitioning resources in a many-core environment (e.g.,CPU cores and physical memory) and it enables management of lightweight kernels. IHK can allocate and release host resources dynamically and no reboot of the host machine is required when altering configuration. IHK also provides a low-level inter-kernel messaging infrastructure, called the Inter-Kernel Communication (IKC) layer. An architectural overview of the main system components is shown below.
![arch](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/mckernel.png)
McKernel is a lightweight kernel written from scratch. It is designed for HPC and is booted from IHK. McKernel retains a binary compatible ABI with Linux, however, it implements only a small set of performance sensitive system calls and the rest are offloaded to Linux. Specifically, McKernel has its own memory management, it supports processes and multi-threading with a simple round-robin cooperative (tick-less) scheduler, and it implements signaling. It also allows inter-process memory mappings and it provides interfaces to hardware performance counters.
### Functionality
An overview of some of the principal functionalities of the IHK/McKernel stack is provided below.
#### System Call Offloading
System call forwarding in McKernel is implemented as follows. When an offloaded system call occurs, McKernel marshals the system call number along with its arguments and sends a message to Linux via a dedicated IKC channel. The corresponding proxy process running on Linux is by default waiting for system call requests through an ioctl() call into IHKs system call delegator kernel module. The delegator kernel modules IKC interrupt handler wakes up the proxy process, which returns to userspace and simply invokes the requested system call. Once it obtains the return value, it instructs the delegator module to send the result back to McKernel, which subsequently passes the value to user-space.
#### Unified Address Space
The unified address space model in IHK/McKernel ensures that offloaded system calls can seamlessly resolve arguments even in case of pointers. This mechanism is depicted below and is implemented as follows.
![unified_ap](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/img/unified_address_space_en.png)
First, the proxy process is compiled as a position independent binary, which enables us to map the code and data segments specific to the proxy process to an address range which is explicitly excluded from McKernels user space. The grey box on the right side of the figure demonstrates the excluded region. Second, the entire valid virtual address range of McKernels application user-space is covered by a special mapping in the proxy process for which we use a pseudo file mapping in Linux. This mapping is indicated by the blue box on the left side of the figure.
## Installation
For a smooth experience, we recommend the following combination of OS distributions and platforms:
- CentOS 7.3+ running on Intel Xeon, Xeon Phi, Fujitsu A64FX
##### 1. Change SELinux settings
Log in as the root and disable SELinux:
~~~~
vim /etc/selinux/config
~~~~
Change the file to SELINUX=disabled
##### 2. Reboot the host machine
~~~~
sudo reboot
~~~~
##### 3. Prepare packages, kernel symbol table file
You will need the following packages installed:
~~~~
sudo yum install cmake kernel-devel binutils-devel systemd-devel numactl-devel gcc make nasm git libdwarf-devel
~~~~
Note that to install libdwarf-devel to RHEL-8.2, you need to enable the CodeReady Linux Builder (CLB) repository and the EPEL repository with the following commands:
~~~~
sudo subscription-manager repos --enable codeready-builder-for-rhel-8-$(/bin/arch)-rpms
~~~~
Grant read permission to the System.map file of your kernel version:
~~~~
sudo chmod a+r /boot/System.map-`uname -r`
~~~~
##### 4. Obtain sources and compile the kernel
Clone the source code:
~~~~
mkdir -p ~/src/ihk+mckernel/
cd ~/src/ihk+mckernel/
git clone --recursive -b development https://github.com/RIKEN-SysSoft/mckernel.git
~~~~
(Optional) Checkout to the specific branch or version:
~~~~
cd mckernel
git checkout <pathspec>
git submodule update
~~~~
Foe example, if you want to try the development branch, use "development" as the pathspec. If you want to try the prerelease version 1.7.0-0.2, use "1.7.0-0.2".
###### 4.1 Install with cmake
Configure and compile:
~~~~
mkdir -p build && cd build
cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel $HOME/src/ihk+mckernel/mckernel
make -j install
~~~~
The IHK kernel modules and McKernel kernel image should be installed under the **ihk+mckernel** folder in your home directory.
###### 4.2 Install with rpm
Build rpm:
~~~~
mkdir -p build && cd build
cmake $HOME/src/ihk+mckernel/mckernel
make dist
cp mckernel-<version>.tar.gz <rpmbuild>/SOURCES
rpm -ba scripts/mckernel.spec
sudo rpm -ivh <rpmbuild>/RPMS/<arch>/mckernel-<version>-<release>_<linux_kernel_ver>_<dist>.<arch>.rpm
~~~~
The IHK kernel modules and McKernel kernel image are installed under the system directory.
##### 5. Boot McKernel
A boot script called mcreboot.sh is provided under sbin in the install folder. To boot on logical CPU 1 with 512MB of memory, use the following invocation:
~~~~
export TOP=${HOME}/ihk+mckernel/
cd ${TOP}
sudo ./sbin/mcreboot.sh -c 1 -m 512m
~~~~
You should see something similar like this if you display the McKernel's kernel message log:
~~~~
./sbin/ihkosctl 0 kmsg
IHK/McKernel started.
[ -1]: no_execute_available: 1
[ -1]: map_fixed: phys: 0xfee00000 => 0xffff860000009000 (1 pages)
[ -1]: setup_x86 done.
[ -1]: ns_per_tsc: 385
[ -1]: KCommand Line: hidos dump_level=24
[ -1]: Physical memory: 0x1ad3000 - 0x21000000, 525520896 bytes, 128301 pages available @ NUMA: 0
[ -1]: NUMA: 0, Linux NUMA: 0, type: 1, available bytes: 525520896, pages: 128301
[ -1]: NUMA 0 distances: 0 (10),
[ -1]: map_fixed: phys: 0x28000 => 0xffff86000000a000 (2 pages)
[ -1]: Trampoline area: 0x28000
[ -1]: map_fixed: phys: 0x0 => 0xffff86000000c000 (1 pages)
[ -1]: # of cpus : 1
[ -1]: locals = ffff880001af6000
[ 0]: BSP: 0 (HW ID: 1 @ NUMA 0)
[ 0]: BSP: booted 0 AP CPUs
[ 0]: Master channel init acked.
[ 0]: vdso is enabled
IHK/McKernel booted.
~~~~
##### 6. Run a simple program on McKernel
The mcexec command line tool (which is also the Linux proxy process) can be used for executing applications on McKernel:
~~~~
./bin/mcexec hostname
centos-vm
~~~~
##### 7. Shutdown McKernel
Finally, to shutdown McKernel and release CPU/memory resources back to Linux use the following command:
~~~~
sudo ./sbin/mcstop+release.sh
~~~~
##### 8. Advanced: Enable Utility Thread offloading Interface (UTI)
UTI enables a runtime such as MPI runtime to spawn utility threads such as MPI asynchronous progress threads to Linux cores.
###### 8.1 Install capstone
Install EPEL capstone-devel:
~~~~
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
sudo yum install capstone-devel
~~~~
###### 8.2 Install syscall_intercept
~~~~
git clone https://github.com/RIKEN-SysSoft/syscall_intercept.git
cmake ../arch/aarch64 -DCMAKE_INSTALL_PREFIX=<syscall-intercept-install> -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
~~~~
###### 8.3 Install UTI for McKernel
Install:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<mckernel-install> --with-rm=mckernel
make && make install
~~~~
###### 8.4 Install McKernel
~~~~
CMAKE_PREFIX_PATH=<syscall-intercept-install> cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel -DENABLE_UTI=ON $HOME/src/ihk+mckernel/mckernel
~~~~
###### 8.5 Run executable
~~~~
mcexec --enable-uti <command>
~~~~
###### 8.6 Install UTI for Linux for performance comparison
Install by make:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
make && make install
~~~~
Install by rpm:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
rm -f ~/rpmbuild/SOURCES/<version>.tar.gz
rpmbuild -ba ./scripts/uti.spec
rpm -Uvh uti-<version>-<release>-<arch>.rpm
~~~~
## The Team
The McKernel project was started at The University of Tokyo and currently it is mainly developed at RIKEN.
Some of our collaborators include:
- Hitachi
- Fujitsu
- CEA (France)
- NEC
## License
McKernel is GPL licensed, as found in the LICENSE file.
## Contact
Please give your feedback to us via one of the following mailing lists. Subscription via [www.pccluster.org](http://www.pccluster.org/mailman/listinfo/mckernel-users) is needed.
* English: mckernel-users@pccluster.org
* Japanese: mckernel-users-jp@pccluster.org

View File

@ -0,0 +1,27 @@
# Makefile.arch.in COPYRIGHT FUJITSU LIMITED 2015-2018
VDSO_SRCDIR = $(SRC)/../arch/$(IHKARCH)/kernel/vdso
VDSO_BUILDDIR = @abs_builddir@/vdso
VDSO_SO_O = $(O)/vdso.so.o
IHK_OBJS += assert.o cache.o cpu.o cputable.o context.o entry.o entry-fpsimd.o
IHK_OBJS += fault.o head.o hyp-stub.o local.o perfctr.o perfctr_armv8pmu.o proc.o proc-macros.o
IHK_OBJS += psci.o smp.o trampoline.o traps.o fpsimd.o
IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o timer.o
IHK_OBJS += $(notdir $(VDSO_SO_O)) memory.o syscall.o vdso.o
IHK_OBJS += irq-gic-v2.o irq-gic-v3.o
IHK_OBJS += memcpy.o memset.o
IHK_OBJS += cpufeature.o
IHK_OBJS += imp-sysreg.o
IHK_OBJS += coredump.o
$(VDSO_SO_O): $(VDSO_BUILDDIR)/vdso.so
$(VDSO_BUILDDIR)/vdso.so: FORCE
$(call echo_cmd,BUILD VDSO,$(TARGET))
mkdir -p $(O)/vdso
TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare
TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS)
FORCE:

View File

@ -0,0 +1,56 @@
/* assert.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
#include <process.h>
#include <list.h>
#include <ihk/debug.h>
#include <ihk/context.h>
#include <asm-offsets.h>
#include <cputable.h>
#include <thread_info.h>
#include <smp.h>
#include <ptrace.h>
/* assert for struct pt_regs member offset & size define */
STATIC_ASSERT(offsetof(struct pt_regs, regs[0]) == S_X0);
STATIC_ASSERT(offsetof(struct pt_regs, regs[1]) == S_X1);
STATIC_ASSERT(offsetof(struct pt_regs, regs[2]) == S_X2);
STATIC_ASSERT(offsetof(struct pt_regs, regs[3]) == S_X3);
STATIC_ASSERT(offsetof(struct pt_regs, regs[4]) == S_X4);
STATIC_ASSERT(offsetof(struct pt_regs, regs[5]) == S_X5);
STATIC_ASSERT(offsetof(struct pt_regs, regs[6]) == S_X6);
STATIC_ASSERT(offsetof(struct pt_regs, regs[7]) == S_X7);
STATIC_ASSERT(offsetof(struct pt_regs, regs[30]) == S_LR);
STATIC_ASSERT(offsetof(struct pt_regs, sp) == S_SP);
STATIC_ASSERT(offsetof(struct pt_regs, pc) == S_PC);
STATIC_ASSERT(offsetof(struct pt_regs, pstate) == S_PSTATE);
STATIC_ASSERT(offsetof(struct pt_regs, orig_x0) == S_ORIG_X0);
STATIC_ASSERT(offsetof(struct pt_regs, orig_pc) == S_ORIG_PC);
STATIC_ASSERT(offsetof(struct pt_regs, syscallno) == S_SYSCALLNO);
STATIC_ASSERT(sizeof(struct pt_regs) == S_FRAME_SIZE);
/* assert for struct cpu_info member offset & size define */
STATIC_ASSERT(offsetof(struct cpu_info, cpu_setup) == CPU_INFO_SETUP);
STATIC_ASSERT(sizeof(struct cpu_info) == CPU_INFO_SZ);
/* assert for struct thread_info member offset define */
STATIC_ASSERT(offsetof(struct thread_info, flags) == TI_FLAGS);
STATIC_ASSERT(offsetof(struct thread_info, cpu_context) == TI_CPU_CONTEXT);
/* assert for arch depend kernel stack size and common kernel stack pages */
STATIC_ASSERT((KERNEL_STACK_SIZE * 2) < (KERNEL_STACK_NR_PAGES * PAGE_SIZE));
/* assert for struct secondary_data member offset define */
STATIC_ASSERT(offsetof(struct secondary_data, stack) == SECONDARY_DATA_STACK);
STATIC_ASSERT(offsetof(struct secondary_data, next_pc) == SECONDARY_DATA_NEXT_PC);
STATIC_ASSERT(offsetof(struct secondary_data, arg) == SECONDARY_DATA_ARG);
/* assert for sve defines */
/* @ref.impl arch/arm64/kernel/signal.c::BUILD_BUG_ON in the init_user_layout */
STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved) > ALIGN_UP(sizeof(struct _aarch64_ctx), 16));
STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved) -
ALIGN_UP(sizeof(struct _aarch64_ctx), 16) > sizeof(struct extra_context));
STATIC_ASSERT(SVE_PT_FPSIMD_OFFSET == sizeof(struct user_sve_header));
STATIC_ASSERT(SVE_PT_SVE_OFFSET == sizeof(struct user_sve_header));
/* assert for struct arm64_cpu_local_thread member offset define */
STATIC_ASSERT(offsetof(struct arm64_cpu_local_thread, panic_regs) == 168);

39
arch/arm64/kernel/cache.S Normal file
View File

@ -0,0 +1,39 @@
/* cache.S COPYRIGHT FUJITSU LIMITED 2015 */
#include <linkage.h>
#include "proc-macros.S"
/*
* __inval_cache_range(start, end)
* - start - start address of region
* - end - end address of region
*/
ENTRY(__inval_cache_range)
/* FALLTHROUGH */
/*
* __dma_inv_range(start, end)
* - start - virtual start address of region
* - end - virtual end address of region
*/
__dma_inv_range:
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
bic x1, x1, x3
b.eq 1f
dc civac, x1 // clean & invalidate D / U line
1: tst x0, x3 // start cache line aligned?
bic x0, x0, x3
b.eq 2f
dc civac, x0 // clean & invalidate D / U line
b 3f
2: dc ivac, x0 // invalidate D / U line
3: add x0, x0, x2
cmp x0, x1
b.lo 2b
dsb sy
ret
ENDPROC(__inval_cache_range)
ENDPROC(__dma_inv_range)

191
arch/arm64/kernel/context.c Normal file
View File

@ -0,0 +1,191 @@
/* context.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <ihk/context.h>
#include <ihk/debug.h>
#include <thread_info.h>
#include <cputype.h>
#include <mmu_context.h>
#include <arch-memory.h>
#include <irqflags.h>
#include <lwk/compiler.h>
#include <bitops.h>
/* @ref.impl arch/arm64/include/asm/mmu_context.h::MAX_ASID_BITS */
#define MAX_ASID_BITS 16
#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS)
#define ASID_MASK ((1 << MAX_ASID_BITS) - 1)
#define VERSION_MASK (0xFFFF << MAX_ASID_BITS)
/* @ref.impl arch/arm64/mm/context.c::asid_bits */
#define asid_bits(reg) \
(((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
#define MAX_CTX_NR (1UL << MAX_ASID_BITS)
DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR) = { 1 }; /* context number 0 reserved. */
/* cpu_asid lock */
static ihk_spinlock_t cpu_asid_lock = SPIN_LOCK_UNLOCKED;
/* last allocation ASID, initialized by 0x0001_0000 */
static unsigned int cpu_last_asid = ASID_FIRST_VERSION;
/* @ref.impl arch/arm64/mm/context.c::set_mm_context */
/* set asid for kernel_context_t.context */
static void set_mm_context(struct page_table *pgtbl, unsigned int asid)
{
unsigned int context = get_address_space_id(pgtbl);
if (likely((context ^ cpu_last_asid) >> MAX_ASID_BITS)) {
set_address_space_id(pgtbl, asid);
}
}
/* @ref.impl arch/arm64/mm/context.c::__new_context */
/* ASID allocation for new process function */
static inline void __new_context(struct page_table *pgtbl)
{
unsigned int asid;
unsigned int bits = asid_bits();
unsigned long flags;
unsigned int context = get_address_space_id(pgtbl);
unsigned long index = 0;
flags = ihk_mc_spinlock_lock(&cpu_asid_lock);
/* already assigned context number? */
if (!unlikely((context ^ cpu_last_asid) >> MAX_ASID_BITS)) {
/* true, unnecessary assigned context number */
ihk_mc_spinlock_unlock(&cpu_asid_lock, flags);
return;
}
/* false, necessary assigned context number */
/* search from the previous assigned number */
index = (cpu_last_asid & ASID_MASK) + 1;
asid = find_next_zero_bit(mmu_context_bmap, MAX_CTX_NR, index);
/* upper limit exceeded */
if (asid >= (1 << bits)) {
/* re assigned context number, search from 1 */
asid = find_next_zero_bit(mmu_context_bmap, index, 1);
/* upper previous assigned number, goto panic */
if (unlikely(asid >= index)) {
ihk_mc_spinlock_unlock(&cpu_asid_lock, flags);
panic("__new_context(): PANIC: Context Number Depletion.\n");
}
}
/* set assigned context number bitmap */
mmu_context_bmap[asid >> 6] |= (1UL << (asid & 63));
/* set previous assigned context number */
cpu_last_asid = asid | (cpu_last_asid & VERSION_MASK);
set_mm_context(pgtbl, cpu_last_asid);
ihk_mc_spinlock_unlock(&cpu_asid_lock, flags);
}
void free_mmu_context(struct page_table *pgtbl)
{
unsigned int context = get_address_space_id(pgtbl);
unsigned int nr = context & ASID_MASK;
unsigned long flags = ihk_mc_spinlock_lock(&cpu_asid_lock);
/* clear used context number bitmap */
mmu_context_bmap[nr >> 6] &= ~(1UL << (nr & 63));
ihk_mc_spinlock_unlock(&cpu_asid_lock, flags);
}
/* set ttbr0 assembler code extern */
/* in arch/arm64/kernel/proc.S */
extern void *cpu_do_switch_mm(translation_table_t* tt_pa, unsigned int asid);
/* @ref.impl arch/arm64/include/asm/mmu_context.h::switch_new_context */
/* ASID allocation for new process */
static inline void switch_new_context(struct page_table *pgtbl)
{
unsigned long flags;
translation_table_t* tt_pa;
unsigned int context;
/* ASID allocation */
__new_context(pgtbl);
context = get_address_space_id(pgtbl);
/* disable interrupt save */
flags = cpu_disable_interrupt_save();
tt_pa = get_translation_table_as_paddr(pgtbl);
cpu_do_switch_mm(tt_pa, context & ASID_MASK);
/* interrupt restore */
cpu_restore_interrupt(flags);
}
/* @ref.impl arch/arm64/include/asm/mmu_context.h::check_and_switch_context */
/* ASID allocation */
void switch_mm(struct page_table *pgtbl)
{
unsigned int context = get_address_space_id(pgtbl);
/* During switch_mm, you want to disable the TTBR */
cpu_set_reserved_ttbr0();
/* check new process or existing process */
if (!((context ^ cpu_last_asid) >> MAX_ASID_BITS)) {
translation_table_t* tt_pa;
/* for existing process */
tt_pa = get_translation_table_as_paddr(pgtbl);
cpu_do_switch_mm(tt_pa, context & ASID_MASK);
/* TODO: tif_switch_mm / after context switch */
// } else if (irqs_disabled()) {
// /*
// * Defer the new ASID allocation until after the context
// * switch critical region since __new_context() cannot be
// * called with interrupts disabled.
// */
// set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
} else {
/* for new process */
/* ASID allocation & set ttbr0 */
switch_new_context(pgtbl);
}
}
/* context switch assembler code extern */
/* in arch/arm64/kernel/entry.S */
extern void *cpu_switch_to(struct thread_info *prev, struct thread_info *next, void *prev_proc);
/* context switch C function */
/* TODO: fpreg etc.. save & restore */
static inline void *switch_to(struct thread_info *prev,
struct thread_info *next,
void *prev_proc)
{
void *last = NULL;
next->cpu = ihk_mc_get_processor_id();
last = cpu_switch_to(prev, next, prev_proc);
return last;
}
/* common unit I/F, for context switch */
void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx,
ihk_mc_kernel_context_t *new_ctx,
void *prev)
{
struct thread_info *prev_ti = NULL;
struct thread_info *next_ti = NULL;
/* get next thread_info addr */
next_ti = new_ctx->thread;
if (likely(old_ctx)) {
/* get prev thread_info addr */
prev_ti = old_ctx->thread;
}
/* switch next thread_info & process */
return switch_to(prev_ti, next_ti, prev);
}

View File

@ -0,0 +1,194 @@
/* copy_template.S COPYRIGHT FUJITSU LIMITED 2017 */
/*
* Copyright (C) 2013 ARM Ltd.
* Copyright (C) 2013 Linaro.
*
* This code is based on glibc cortex strings work originally authored by Linaro
* and re-licensed under GPLv2 for the Linux kernel. The original code can
* be found @
*
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
* files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
*
* Parameters:
* x0 - dest
* x1 - src
* x2 - n
* Returns:
* x0 - dest
*/
dstin .req x0
src .req x1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
dst .req x6
A_l .req x7
A_h .req x8
B_l .req x9
B_h .req x10
C_l .req x11
C_h .req x12
D_l .req x13
D_h .req x14
mov dst, dstin
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
neg tmp2, src
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
b.eq .LSrcAligned
sub count, count, tmp2
/*
* Copy the leading memory data from src to dst in an increasing
* address order.By this way,the risk of overwritting the source
* memory data is eliminated when the distance between src and
* dst is less than 16. The memory accesses here are alignment.
*/
tbz tmp2, #0, 1f
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
1:
tbz tmp2, #1, 2f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
2:
tbz tmp2, #2, 3f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
3:
tbz tmp2, #3, .LSrcAligned
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
.LSrcAligned:
cmp count, #64
b.ge .Lcpy_over64
/*
* Deal with small copies quickly by dropping straight into the
* exit block.
*/
.Ltail63:
/*
* Copy up to 48 bytes of data. At this point we only need the
* bottom 6 bits of count to be accurate.
*/
ands tmp1, count, #0x30
b.eq .Ltiny15
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
1:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
2:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
.Ltiny15:
/*
* Prefer to break one ldp/stp into several load/store to access
* memory in an increasing address order,rather than to load/store 16
* bytes from (src-16) to (dst-16) and to backward the src to aligned
* address,which way is used in original cortex memcpy. If keeping
* the original memcpy process here, memmove need to satisfy the
* precondition that src address is at least 16 bytes bigger than dst
* address,otherwise some source data will be overwritten when memove
* call memcpy directly. To make memmove simpler and decouple the
* memcpy's dependency on memmove, withdrew the original process.
*/
tbz count, #3, 1f
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
1:
tbz count, #2, 2f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
2:
tbz count, #1, 3f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
3:
tbz count, #0, .Lexitfunc
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
b .Lexitfunc
.Lcpy_over64:
subs count, count, #128
b.ge .Lcpy_body_large
/*
* Less than 128 bytes to copy, so handle 64 here and then jump
* to the tail.
*/
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
ldp1 D_l, D_h, src, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
b .Lexitfunc
/*
* Critical loop. Start at a new cache line boundary. Assuming
* 64 bytes per line this ensures the entire loop is in one line.
*/
.p2align L1_CACHE_SHIFT
.Lcpy_body_large:
/* pre-get 64 bytes data. */
ldp1 A_l, A_h, src, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
ldp1 D_l, D_h, src, #16
1:
/*
* interlace the load of next 64 bytes data block with store of the last
* loaded 64 bytes data.
*/
stp1 A_l, A_h, dst, #16
ldp1 A_l, A_h, src, #16
stp1 B_l, B_h, dst, #16
ldp1 B_l, B_h, src, #16
stp1 C_l, C_h, dst, #16
ldp1 C_l, C_h, src, #16
stp1 D_l, D_h, dst, #16
ldp1 D_l, D_h, src, #16
subs count, count, #64
b.ge 1b
stp1 A_l, A_h, dst, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
.Lexitfunc:

View File

@ -0,0 +1,92 @@
/* coredump.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
#include <process.h>
#include <elfcore.h>
#include <string.h>
#include <ptrace.h>
#include <cls.h>
#include <hwcap.h>
#define align32(x) ((((x) + 3) / 4) * 4)
void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct thread *thread, void *regs0, int sig)
{
struct pt_regs *regs = regs0;
struct elf_prstatus64 tmp_prstatus;
/*
We ignore following entries for now.
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
*/
/* copy x0-30, sp, pc, pstate */
memcpy(&tmp_prstatus.pr_reg, &regs->user_regs, sizeof(tmp_prstatus.pr_reg));
tmp_prstatus.pr_fpvalid = 0; /* We assume no fp */
/* copy unaligned prstatus addr */
memcpy(prstatus, &tmp_prstatus, sizeof(*prstatus));
prstatus->pr_pid = thread->tid;
if (thread->proc->parent) {
prstatus->pr_ppid = thread->proc->parent->pid;
}
prstatus->pr_info.si_signo = sig;
prstatus->pr_cursig = sig;
}
int arch_get_thread_core_info_size(void)
{
const struct user_regset_view *view = current_user_regset_view();
const struct user_regset *regset = find_regset(view, NT_ARM_SVE);
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return 0;
}
return sizeof(struct note) + align32(sizeof("LINUX"))
+ regset_size(cpu_local_var(current), regset);
}
void arch_fill_thread_core_info(struct note *head,
struct thread *thread, void *regs)
{
const struct user_regset_view *view = current_user_regset_view();
const struct user_regset *regset = find_regset(view, NT_ARM_SVE);
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return;
}
/* pre saved registers */
save_fp_regs(thread);
if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(thread, regset))) {
int ret;
size_t size = regset_size(thread, regset);
void *namep;
void *descp;
namep = (void *) (head + 1);
descp = namep + align32(sizeof("LINUX"));
ret = regset->get(thread, regset, 0, size, descp, NULL);
if (ret) {
return;
}
head->namesz = sizeof("LINUX");
head->descsz = size;
head->type = NT_ARM_SVE;
memcpy(namep, "LINUX", sizeof("LINUX"));
}
}

1902
arch/arm64/kernel/cpu.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,14 @@
/* cputable.c COPYRIGHT FUJITSU LIMITED 2015 */
#include <cputable.h>
extern unsigned long __cpu_setup(void);
struct cpu_info cpu_table[] = {
{
.cpu_id_val = 0x000f0000,
.cpu_id_mask = 0x000f0000,
.cpu_name = "AArch64 Processor",
.cpu_setup = __cpu_setup,
},
{ 0 },
};

View File

@ -0,0 +1,109 @@
/* debug-monitors.c COPYRIGHT FUJITSU LIMITED 2016-2017 */
#include <cputype.h>
#include <irqflags.h>
#include <ihk/context.h>
#include <signal.h>
#include <errno.h>
#include <debug-monitors.h>
#include <cls.h>
#include <thread_info.h>
/* @ref.impl arch/arm64/kernel/debug-monitors.c::debug_monitors_arch */
/* Determine debug architecture. */
unsigned char debug_monitors_arch(void)
{
return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::mdscr_write */
void mdscr_write(unsigned int mdscr)
{
unsigned long flags = local_dbg_save();
asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
local_dbg_restore(flags);
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::mdscr_read */
unsigned int mdscr_read(void)
{
unsigned int mdscr;
asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
return mdscr;
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::clear_os_lock */
static void clear_os_lock(void)
{
asm volatile("msr oslar_el1, %0" : : "r" (0));
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::debug_monitors_init */
void debug_monitors_init(void)
{
clear_os_lock();
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::set_regs_spsr_ss */
void set_regs_spsr_ss(struct pt_regs *regs)
{
unsigned long spsr;
spsr = regs->pstate;
spsr &= ~DBG_SPSR_SS;
spsr |= DBG_SPSR_SS;
regs->pstate = spsr;
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::set_regs_spsr_ss */
void clear_regs_spsr_ss(struct pt_regs *regs)
{
unsigned long spsr;
spsr = regs->pstate;
spsr &= ~DBG_SPSR_SS;
regs->pstate = spsr;
}
extern int interrupt_from_user(void *);
extern void clear_single_step(struct thread *thread);
/* @ref.impl arch/arm64/kernel/debug-monitors.c::single_step_handler */
int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
siginfo_t info;
int ret = -EFAULT;
if (interrupt_from_user(regs)) {
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
info._sifields._sigfault.si_addr = (void *)regs->pc;
set_signal(SIGTRAP, regs, &info);
clear_single_step(cpu_local_var(current));
ret = 0;
} else {
kprintf("Unexpected kernel single-step exception at EL1\n");
}
return ret;
}
/* @ref.impl arch/arm64/kernel/debug-monitors.c::brk_handler */
int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
siginfo_t info;
int ret = -EFAULT;
if (interrupt_from_user(regs)) {
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
info._sifields._sigfault.si_addr = (void *)regs->pc;
set_signal(SIGTRAP, regs, &info);
ret = 0;
} else {
kprintf("Unexpected kernel BRK exception at EL1\n");
}
return ret;
}

View File

@ -0,0 +1,126 @@
/* entry-fpsimd.S COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <linkage.h>
#include <assembler.h>
#include <fpsimdmacros.h>
/*
* @ref.impl linux-linaro/arch/arm64/include/asm/fpsimdmacros.h
*/
/*
* FP/SIMD state saving and restoring macros
*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
.macro fpsimd_save state, tmpnr
stp q0, q1, [\state, #16 * 0]
stp q2, q3, [\state, #16 * 2]
stp q4, q5, [\state, #16 * 4]
stp q6, q7, [\state, #16 * 6]
stp q8, q9, [\state, #16 * 8]
stp q10, q11, [\state, #16 * 10]
stp q12, q13, [\state, #16 * 12]
stp q14, q15, [\state, #16 * 14]
stp q16, q17, [\state, #16 * 16]
stp q18, q19, [\state, #16 * 18]
stp q20, q21, [\state, #16 * 20]
stp q22, q23, [\state, #16 * 22]
stp q24, q25, [\state, #16 * 24]
stp q26, q27, [\state, #16 * 26]
stp q28, q29, [\state, #16 * 28]
stp q30, q31, [\state, #16 * 30]!
mrs x\tmpnr, fpsr
str w\tmpnr, [\state, #16 * 2]
mrs x\tmpnr, fpcr
str w\tmpnr, [\state, #16 * 2 + 4]
.endm
.macro fpsimd_restore_fpcr state, tmp
/*
* Writes to fpcr may be self-synchronising, so avoid restoring
* the register if it hasn't changed.
*/
mrs \tmp, fpcr
cmp \tmp, \state
b.eq 9999f
msr fpcr, \state
9999:
.endm
/* Clobbers \state */
.macro fpsimd_restore state, tmpnr
ldp q0, q1, [\state, #16 * 0]
ldp q2, q3, [\state, #16 * 2]
ldp q4, q5, [\state, #16 * 4]
ldp q6, q7, [\state, #16 * 6]
ldp q8, q9, [\state, #16 * 8]
ldp q10, q11, [\state, #16 * 10]
ldp q12, q13, [\state, #16 * 12]
ldp q14, q15, [\state, #16 * 14]
ldp q16, q17, [\state, #16 * 16]
ldp q18, q19, [\state, #16 * 18]
ldp q20, q21, [\state, #16 * 20]
ldp q22, q23, [\state, #16 * 22]
ldp q24, q25, [\state, #16 * 24]
ldp q26, q27, [\state, #16 * 26]
ldp q28, q29, [\state, #16 * 28]
ldp q30, q31, [\state, #16 * 30]!
ldr w\tmpnr, [\state, #16 * 2]
msr fpsr, x\tmpnr
ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state
.endm
/*
* @ref.impl linux-linaro/arch/arm64/kernel/entry-fpsimd.S
*/
/*
* Save the FP registers.
*
* x0 - pointer to struct fpsimd_state
*/
ENTRY(fpsimd_save_state)
fpsimd_save x0, 8
ret
ENDPROC(fpsimd_save_state)
/*
* Load the FP registers.
*
* x0 - pointer to struct fpsimd_state
*/
ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
#ifdef CONFIG_ARM64_SVE
ENTRY(sve_save_state)
sve_save 0, x1, 2
ret
ENDPROC(sve_save_state)
ENTRY(sve_load_state)
sve_load 0, x1, x2, 3
ret
ENDPROC(sve_load_state)
ENTRY(sve_get_vl)
_zrdvl 0, 1
ret
ENDPROC(sve_get_vl)
#endif /* CONFIG_ARM64_SVE */

566
arch/arm64/kernel/entry.S Normal file
View File

@ -0,0 +1,566 @@
/* entry.S COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <linkage.h>
#include <assembler.h>
#include <asm-offsets.h>
#include <esr.h>
#include <thread_info.h>
#include <asm-syscall.h>
/*
* Bad Abort numbers
*-----------------
*/
#define BAD_SYNC 0
#define BAD_IRQ 1
#define BAD_FIQ 2
#define BAD_ERROR 3
.macro kernel_entry, el, regsize = 64
sub sp, sp, #S_FRAME_SIZE
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
stp x0, x1, [sp, #16 * 0]
stp x2, x3, [sp, #16 * 1]
stp x4, x5, [sp, #16 * 2]
stp x6, x7, [sp, #16 * 3]
stp x8, x9, [sp, #16 * 4]
stp x10, x11, [sp, #16 * 5]
stp x12, x13, [sp, #16 * 6]
stp x14, x15, [sp, #16 * 7]
stp x16, x17, [sp, #16 * 8]
stp x18, x19, [sp, #16 * 9]
stp x20, x21, [sp, #16 * 10]
stp x22, x23, [sp, #16 * 11]
stp x24, x25, [sp, #16 * 12]
stp x26, x27, [sp, #16 * 13]
stp x28, x29, [sp, #16 * 14]
.if \el == 0
mrs x21, sp_el0
get_thread_info tsk // Ensure MDSCR_EL1.SS is clear,
ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
.else
add x21, sp, #S_FRAME_SIZE
.endif
mrs x22, elr_el1
mrs x23, spsr_el1
#if defined(CONFIG_HAS_NMI)
mrs_s x20, ICC_PMR_EL1 // Get PMR
and x20, x20, #ICC_PMR_EL1_G_BIT // Extract mask bit
lsl x20, x20, #PSR_G_PMR_G_SHIFT // Shift to a PSTATE RES0 bit
eor x20, x20, #PSR_G_BIT // Invert bit
orr x23, x20, x23 // Store PMR within PSTATE
mov x20, #ICC_PMR_EL1_MASKED
msr_s ICC_PMR_EL1, x20 // Mask normal interrupts at PMR
#endif /* defined(CONFIG_HAS_NMI) */
stp lr, x21, [sp, #S_LR]
stp x22, x23, [sp, #S_PC]
/*
* Set syscallno to -1 by default (overridden later if real syscall).
*/
.if \el == 0
mvn x21, xzr
str x21, [sp, #S_SYSCALLNO]
.endif
/*
* Registers that may be useful after this macro is invoked:
*
* x21 - aborted SP
* x22 - aborted PC
* x23 - aborted PSTATE
*/
.endm
.macro kernel_exit, el, need_enable_step = 0
.if \el == 0
bl check_sig_pending
bl check_need_resched // or reschedule is needed.
mov x0, #0
mov x1, sp
mov x2, #0
bl check_signal // check whether the signal is delivered
mov x0, #0
mov x1, sp
mov x2, #0
bl check_signal_irq_disabled // check whether the signal is delivered(for kernel_exit)
.endif
.if \el == 1
bl check_sig_pending
.endif
disable_irq x1 // disable interrupts
.if \need_enable_step == 1
ldr x1, [tsk, #TI_FLAGS]
enable_step_tsk x1, x2
.endif
disable_nmi
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
// ct_user_enter // McKernel, disable (debugcode?)
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
.endif
#if defined(CONFIG_HAS_NMI)
and x20, x22, #PSR_G_BIT // Get stolen PSTATE bit
and x22, x22, #~PSR_G_BIT // Clear stolen bit
lsr x20, x20, #PSR_G_PMR_G_SHIFT // Shift back to PMR mask
eor x20, x20, #ICC_PMR_EL1_UNMASKED // x20 gets 0xf0 or 0xb0
msr_s ICC_PMR_EL1, x20 // Write to PMR
#endif /* defined(CONFIG_HAS_NMI) */
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
ldp x2, x3, [sp, #16 * 1]
ldp x4, x5, [sp, #16 * 2]
ldp x6, x7, [sp, #16 * 3]
ldp x8, x9, [sp, #16 * 4]
ldp x10, x11, [sp, #16 * 5]
ldp x12, x13, [sp, #16 * 6]
ldp x14, x15, [sp, #16 * 7]
ldp x16, x17, [sp, #16 * 8]
ldp x18, x19, [sp, #16 * 9]
ldp x20, x21, [sp, #16 * 10]
ldp x22, x23, [sp, #16 * 11]
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
eret // return to kernel
.endm
.macro get_thread_info, rd
mov \rd, sp
and \rd, \rd, #~(KERNEL_STACK_SIZE - 1) // top of stack
.endm
/*
* These are the registers used in the syscall handler, and allow us to
* have in theory up to 7 arguments to a function - x0 to x6.
*
* x7 is reserved for the system call number in 32-bit mode.
*/
sc_nr .req x25 // number of system calls
scno .req x26 // syscall number
stbl .req x27 // syscall table pointer
tsk .req x28 // current thread_info
/*
* Interrupt handling.
*/
.macro irq_handler
adrp x1, handle_arch_irq
ldr x1, [x1, #:lo12:handle_arch_irq]
mov x0, sp
blr x1
.endm
.text
/*
* Exception vectors.
*/
.align 11
ENTRY(vectors)
ventry el1_sync_invalid // Synchronous EL1t
ventry el1_irq_invalid // IRQ EL1t
ventry el1_fiq_invalid // FIQ EL1t
ventry el1_error_invalid // Error EL1t
ventry el1_sync // Synchronous EL1h
ventry el1_irq // IRQ EL1h
ventry el1_fiq_invalid // FIQ EL1h
ventry el1_error_invalid // Error EL1h
ventry el0_sync // Synchronous 64-bit EL0
ventry el0_irq // IRQ 64-bit EL0
ventry el0_fiq_invalid // FIQ 64-bit EL0
ventry el0_error_invalid // Error 64-bit EL0
ventry el0_sync_invalid // Synchronous 32-bit EL0
ventry el0_irq_invalid // IRQ 32-bit EL0
ventry el0_fiq_invalid // FIQ 32-bit EL0
ventry el0_error_invalid // Error 32-bit EL0
END(vectors)
/*
* Invalid mode handlers
*/
.macro inv_entry, el, reason, regsize = 64
kernel_entry el, \regsize
mov x0, sp
mov x1, #\reason
mrs x2, esr_el1
enable_nmi
.if \el == 0
bl bad_mode
b ret_to_user
.else
b bad_mode
.endif
.endm
el0_sync_invalid:
inv_entry 0, BAD_SYNC
ENDPROC(el0_sync_invalid)
el0_irq_invalid:
inv_entry 0, BAD_IRQ
ENDPROC(el0_irq_invalid)
el0_fiq_invalid:
inv_entry 0, BAD_FIQ
ENDPROC(el0_fiq_invalid)
el0_error_invalid:
inv_entry 0, BAD_ERROR
ENDPROC(el0_error_invalid)
el1_sync_invalid:
inv_entry 1, BAD_SYNC
ENDPROC(el1_sync_invalid)
el1_irq_invalid:
inv_entry 1, BAD_IRQ
ENDPROC(el1_irq_invalid)
el1_fiq_invalid:
inv_entry 1, BAD_FIQ
ENDPROC(el1_fiq_invalid)
el1_error_invalid:
inv_entry 1, BAD_ERROR
ENDPROC(el1_error_invalid)
/*
* EL1 mode handlers.
*/
.align 6
el1_sync:
kernel_entry 1
mrs x1, esr_el1 // read the syndrome register
lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
b.eq el1_da
// cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
// b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
b.eq el1_sp_pc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
b.eq el1_sp_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
b.eq el1_undef
// cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
// b.ge el1_dbg
b el1_inv
el1_ia:
/*
* Fall through to the Data abort case
*/
el1_da:
/*
* Data abort handling
*/
mrs x0, far_el1
enable_nmi
enable_dbg
#if defined(CONFIG_HAS_NMI)
# define PSR_INTR_SHIFT PSR_G_SHIFT // PSR_G_BIT
#else /* defined(CONFIG_HAS_NMI) */
# define PSR_INTR_SHIFT 7 // PSR_I_BIT
#endif /* defined(CONFIG_HAS_NMI) */
// re-enable interrupts if they were enabled in the aborted context
tbnz x23, #PSR_INTR_SHIFT, 1f
enable_irq x2
1:
mov x2, sp // struct pt_regs
bl do_mem_abort
// disable interrupts before pulling preserved data off the stack
kernel_exit 1
el1_sp_pc:
/*
* Stack or PC alignment exception handling
*/
mrs x0, far_el1
enable_nmi
enable_dbg
mov x2, sp
b do_sp_pc_abort
el1_undef:
/*
* Undefined instruction
*/
enable_nmi
enable_dbg
mov x0, sp
b do_undefinstr
// el1_dbg:
// /*
// * Debug exception handling
// */
// cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
// cinc x24, x24, eq // set bit '0'
// tbz x24, #0, el1_inv // EL1 only
// mrs x0, far_el1
// mov x2, sp // struct pt_regs
// bl do_debug_exception
// kernel_exit 1
el1_inv:
// TODO: add support for undefined instructions in kernel mode
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
enable_nmi
enable_dbg
b bad_mode
ENDPROC(el1_sync)
/*
* EL1 mode handlers.
*/
.align 6
el1_irq:
kernel_entry 1
enable_dbg
irq_handler
kernel_exit 1
ENDPROC(el1_irq)
/*
* EL0 mode handlers.
*/
.align 6
el0_sync:
kernel_entry 0
mrs x25, esr_el1 // read the syndrome register
lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state
b.eq el0_svc
cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
b.eq el0_da
cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
b.eq el0_ia
cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
b.eq el0_fpsimd_acc
#ifdef CONFIG_ARM64_SVE
cmp x24, #ESR_ELx_EC_SVE // SVE access
b.eq el0_sve_acc
#endif
cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el0_undef
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
b.eq el0_sp_pc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
b.eq el0_sp_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
b.ge el0_dbg
b el0_inv
el0_svc:
uxtw scno, w8 // syscall number in w8
cmp scno, #__NR_rt_sigreturn
b.eq 1f
str x0, [sp, #S_ORIG_X0] // save the original x0
ldr x16, [sp, #S_PC]
str x16, [sp, #S_ORIG_PC] // save the original pc
1: str scno, [sp, #S_SYSCALLNO] // save syscall number
enable_nmi
enable_dbg_and_irq x0
adrp x16, __arm64_syscall_handler
ldr x16, [x16, #:lo12:__arm64_syscall_handler]
mov x0, scno
mov x1, sp
blr x16 // __arm64_syscall_handler(int, syscall_num, ihk_mc_user_context_t *uctx);
/* Signal check has been completed at the stage of came back. */
b ret_fast_syscall
el0_da:
/*
* Data abort handling
*/
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_nmi
enable_dbg_and_irq x0
// ct_user_exit
bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
bl do_mem_abort
b ret_to_user
el0_ia:
/*
* Instruction abort handling
*/
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_nmi
enable_dbg_and_irq x0
// ct_user_exit
mov x0, x26
mov x1, x25
mov x2, sp
bl do_mem_abort
b ret_to_user
el0_fpsimd_acc:
/*
* Floating Point or Advanced SIMD access
*/
enable_nmi
enable_dbg
// ct_user_exit
mov x0, x25
mov x1, sp
bl do_fpsimd_acc
b ret_to_user
#ifdef CONFIG_ARM64_SVE
/*
* Scalable Vector Extension access
*/
el0_sve_acc:
enable_nmi
enable_dbg
// ct_user_exit
mov x0, x25
mov x1, sp
bl do_sve_acc
b ret_to_user
#endif
el0_fpsimd_exc:
/*
* Floating Point, Advanced SIMD or SVE exception
*/
enable_nmi
enable_dbg
// ct_user_exit
mov x0, x25
mov x1, sp
bl do_fpsimd_exc
b ret_to_user
el0_sp_pc:
/*
* Stack or PC alignment exception handling
*/
mrs x26, far_el1
// enable interrupts before calling the main handler
enable_nmi
enable_dbg_and_irq x0
mov x0, x26
mov x1, x25
mov x2, sp
bl do_sp_pc_abort
b ret_to_user
el0_undef:
/*
* Undefined instruction
*/
// enable interrupts before calling the main handler
enable_nmi
enable_dbg_and_irq x0
// ct_user_exit
mov x0, sp
bl do_undefinstr
b ret_to_user
el0_dbg:
/*
* Debug exception handling
*/
tbnz x24, #0, el0_inv // EL0 only
mrs x0, far_el1
mov x1, x25
mov x2, sp
enable_nmi
bl do_debug_exception
enable_dbg
// ct_user_exit
b ret_to_user
el0_inv:
enable_dbg
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
enable_nmi
bl bad_mode
b ret_to_user
ENDPROC(el0_sync)
.align 6
el0_irq:
kernel_entry 0
enable_dbg
irq_handler
b ret_to_user
ENDPROC(el0_irq)
/*
* Register switch for AArch64. The callee-saved registers need to be saved
* and restored. On entry:
* x0 = previous task_struct (must be preserved across the switch)
* x1 = next task_struct
* Previous and next are guaranteed not to be the same.
*
*/
ENTRY(cpu_switch_to)
cmp x0, xzr // for idle process branch(skip save)
b.eq 1f
add x8, x0, #TI_CPU_CONTEXT
mov x9, sp
stp x19, x20, [x8], #16 // store callee-saved registers
stp x21, x22, [x8], #16
stp x23, x24, [x8], #16
stp x25, x26, [x8], #16
stp x27, x28, [x8], #16
stp x29, x9, [x8], #16
str lr, [x8]
1: add x8, x1, #TI_CPU_CONTEXT
ldp x19, x20, [x8], #16 // restore callee-saved registers
ldp x21, x22, [x8], #16
ldp x23, x24, [x8], #16
ldp x25, x26, [x8], #16
ldp x27, x28, [x8], #16
ldp x29, x9, [x8], #16
ldr lr, [x8]
mov sp, x9
mov x0, x2 // return void *prev
ret
ENDPROC(cpu_switch_to)
ret_fast_syscall:
kernel_exit 0, 1
ENDPROC(ret_fast_syscall)
/*
* "slow" syscall return path.
*/
ret_to_user:
no_work_pending:
kernel_exit 0, 1
ENDPROC(ret_to_user)
/*
* This is how we return from a fork.
*/
ENTRY(ret_from_fork)
// bl schedule_tail
cbz x19, 1f // not a kernel thread
mov x0, x20
blr x19
1: get_thread_info tsk
bl release_runq_lock
bl utilthr_migrate
b ret_to_user
ENDPROC(ret_from_fork)

295
arch/arm64/kernel/fault.c Normal file
View File

@ -0,0 +1,295 @@
/* fault.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <ihk/context.h>
#include <ihk/debug.h>
#include <ptrace.h>
#include <esr.h>
#include <signal.h>
#include <arch-memory.h>
#include <thread_info.h>
#include <syscall.h>
#include <debug-monitors.h>
unsigned long __page_fault_handler_address;
extern int interrupt_from_user(void *);
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_alignment_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
int sig;
int code;
const char *name;
} fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
{ do_bad, SIGBUS, 0, "level 2 address size fault" },
{ do_bad, SIGBUS, 0, "level 3 address size fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGBUS, 0, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
{ do_bad, SIGBUS, 0, "unknown 12" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
{ do_bad, SIGBUS, 0, "unknown 35" },
{ do_bad, SIGBUS, 0, "unknown 36" },
{ do_bad, SIGBUS, 0, "unknown 37" },
{ do_bad, SIGBUS, 0, "unknown 38" },
{ do_bad, SIGBUS, 0, "unknown 39" },
{ do_bad, SIGBUS, 0, "unknown 40" },
{ do_bad, SIGBUS, 0, "unknown 41" },
{ do_bad, SIGBUS, 0, "unknown 42" },
{ do_bad, SIGBUS, 0, "unknown 43" },
{ do_bad, SIGBUS, 0, "unknown 44" },
{ do_bad, SIGBUS, 0, "unknown 45" },
{ do_bad, SIGBUS, 0, "unknown 46" },
{ do_bad, SIGBUS, 0, "unknown 47" },
{ do_bad, SIGBUS, 0, "TLB conflict abort" },
{ do_bad, SIGBUS, 0, "unknown 49" },
{ do_bad, SIGBUS, 0, "unknown 50" },
{ do_bad, SIGBUS, 0, "unknown 51" },
{ do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" },
{ do_bad, SIGBUS, 0, "implementation fault (unsupported exclusive)" },
{ do_bad, SIGBUS, 0, "unknown 54" },
{ do_bad, SIGBUS, 0, "unknown 55" },
{ do_bad, SIGBUS, 0, "unknown 56" },
{ do_bad, SIGBUS, 0, "unknown 57" },
{ do_bad, SIGBUS, 0, "unknown 58" },
{ do_bad, SIGBUS, 0, "unknown 59" },
{ do_bad, SIGBUS, 0, "unknown 60" },
{ do_bad, SIGBUS, 0, "section domain fault" },
{ do_bad, SIGBUS, 0, "page domain fault" },
{ do_bad, SIGBUS, 0, "unknown 63" },
};
static const char *fault_name(unsigned int esr)
{
const struct fault_info *inf = fault_info + (esr & 63);
return inf->name;
}
/*
* Dispatch a data abort to the relevant handler.
*/
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const struct fault_info *inf = fault_info + (esr & 63);
struct siginfo info;
const int from_user = interrupt_from_user(regs);
/* set_cputime called in inf->fn() */
if (!inf->fn(addr, esr, regs))
return;
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
kprintf("Unhandled fault: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
info._sifields._sigfault.si_addr = (void*)addr;
arm64_notify_die("", regs, &info, esr);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
/*
* Handle stack alignment exceptions.
*/
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
info._sifields._sigfault.si_addr = (void*)addr;
arm64_notify_die("", regs, &info, esr);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
if (interrupt_from_user(regs)) {
kprintf("unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
fault_name(esr), SIGSEGV, addr, esr);
current_thread_info()->fault_address = addr;
current_thread_info()->fault_code = esr;
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
info._sifields._sigfault.si_addr = (void *)addr;
set_signal(SIGSEGV, regs, &info);
} else {
kprintf("Unable to handle kernel %s at virtual address %08lx\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr);
panic("OOps.");
}
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
static int is_el0_instruction_abort(unsigned int esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
static int do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
void (*page_fault_handler)(void *, uint64_t, void *);
uint64_t reason = 0;
int esr_ec_dfsc = (esr & 63);
if (interrupt_from_user(regs)) {
reason |= PF_USER;
}
if (is_el0_instruction_abort(esr)) {
reason |= PF_INSTR;
} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
reason |= PF_WRITE;
if (13 <= esr_ec_dfsc && esr_ec_dfsc <= 15 ) {
/* level [1-3] permission fault */
reason |= PF_PROT;
}
}
/* set_cputime() call in page_fault_handler() */
page_fault_handler = (void *)__page_fault_handler_address;
(*page_fault_handler)((void *)addr, reason, regs);
return 0;
}
/*
* First Level Translation Fault Handler
*
* We enter here because the first level page table doesn't contain a valid
* entry for the address.
*
* If the address is in kernel space (>= TASK_SIZE), then we are probably
* faulting in the vmalloc() area.
*
* If the init_task's first level page tables contains the relevant entry, we
* copy the it to this task. If not, we send the process a signal, fixup the
* exception, or oops the kernel.
*
* NOTE! We MUST NOT take any locks for this case. We may be in an interrupt
* or a critical region, and should only copy the information from the master
* page table, nothing more.
*/
static int do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
if (addr < USER_END)
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);
return 0;
}
static int do_alignment_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
do_bad_area(addr, esr, regs);
return 0;
}
extern int breakpoint_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs);
extern int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs);
extern int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs);
extern int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static struct fault_info debug_fault_info[] = {
{ breakpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-breakpoint handler" },
{ single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler" },
{ watchpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-watchpoint handler" },
{ do_bad, SIGBUS, 0, "unknown 3" },
{ do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" },
{ do_bad, SIGTRAP, 0, "aarch32 vector catch" },
{ brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler" },
{ do_bad, SIGBUS, 0, "unknown 7" },
};
int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
struct siginfo info;
const int from_user = interrupt_from_user(regs);
int ret = -1;
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
if (!inf->fn(addr, esr, regs)) {
ret = 1;
goto out;
}
kprintf("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
info._sifields._sigfault.si_addr = (void *)addr;
arm64_notify_die("", regs, &info, 0);
ret = 0;
out:
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
return ret;
}
/*
* This abort handler always returns "fault".
*/
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
return 1;
}

426
arch/arm64/kernel/fpsimd.c Normal file
View File

@ -0,0 +1,426 @@
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2019 */
#include <thread_info.h>
#include <fpsimd.h>
#include <cpuinfo.h>
#include <lwk/compiler.h>
#include <ikc/ihk.h>
#include <hwcap.h>
#include <cls.h>
#include <prctl.h>
#include <cpufeature.h>
#include <kmalloc.h>
#include <ihk/debug.h>
#include <process.h>
#include <bitmap.h>
//#define DEBUG_PRINT_FPSIMD
#ifdef DEBUG_PRINT_FPSIMD
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#ifdef CONFIG_ARM64_SVE
/* Set of available vector lengths, as vq_to_bit(vq): */
static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
/* Maximum supported vector length across all CPUs (initially poisoned) */
int sve_max_vl = -1;
/* Default VL for tasks that don't set it explicitly: */
int sve_default_vl = -1;
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
* _maximum_ VQ not exceeding a certain value.
*/
static unsigned int vq_to_bit(unsigned int vq)
{
return SVE_VQ_MAX - vq;
}
static unsigned int bit_to_vq(unsigned int bit)
{
if (bit >= SVE_VQ_MAX) {
bit = SVE_VQ_MAX - 1;
}
return SVE_VQ_MAX - bit;
}
/*
* All vector length selection from userspace comes through here.
* We're on a slow path, so some sanity-checks are included.
* If things go wrong there's a bug somewhere, but try to fall back to a
* safe choice.
*/
static unsigned int find_supported_vector_length(unsigned int vl)
{
int bit;
int max_vl = sve_max_vl;
if (!sve_vl_valid(vl)) {
vl = SVE_VL_MIN;
}
if (!sve_vl_valid(max_vl)) {
max_vl = SVE_VL_MIN;
}
if (vl > max_vl) {
vl = max_vl;
}
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(bit_to_vq(bit));
}
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
unsigned long zcr;
bitmap_zero(map, SVE_VQ_MAX);
zcr = ZCR_EL1_LEN_MASK;
zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
/* self-syncing */
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1);
vl = sve_get_vl();
/* skip intervening lengths */
vq = sve_vq_from_vl(vl);
set_bit(vq_to_bit(vq), map);
}
}
void sve_init_vq_map(void)
{
sve_probe_vqs(sve_vq_map);
}
size_t sve_state_size(struct thread const *thread)
{
unsigned int vl = thread->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(vl));
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}
void sve_free(struct thread *thread)
{
if (thread->ctx.thread->sve_state) {
kfree(thread->ctx.thread->sve_state);
thread->ctx.thread->sve_state = NULL;
}
}
int sve_alloc(struct thread *thread)
{
if (thread->ctx.thread->sve_state) {
return 0;
}
thread->ctx.thread->sve_state =
kmalloc(sve_state_size(thread), IHK_MC_AP_NOWAIT);
if (thread->ctx.thread->sve_state == NULL) {
return -ENOMEM;
}
memset(thread->ctx.thread->sve_state, 0, sve_state_size(thread));
return 0;
}
static int get_nr_threads(struct process *proc)
{
struct thread *child;
struct mcs_rwlock_node_irqsave lock;
int nr_threads = 0;
mcs_rwlock_reader_lock(&proc->threads_lock, &lock);
list_for_each_entry(child, &proc->threads_list, siblings_list){
nr_threads++;
}
mcs_rwlock_reader_unlock(&proc->threads_lock, &lock);
return nr_threads;
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */
int sve_set_vector_length(struct thread *thread,
unsigned long vl, unsigned long flags)
{
struct thread_info *ti = thread->ctx.thread;
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC)) {
return -EINVAL;
}
if (!sve_vl_valid(vl)) {
return -EINVAL;
}
/*
* Clamp to the maximum vector length that VL-agnostic SVE code can
* work with. A flag may be assigned in the future to allow setting
* of larger vector lengths without confusing older software.
*/
if (vl > SVE_VL_ARCH_MAX) {
vl = SVE_VL_ARCH_MAX;
}
vl = find_supported_vector_length(vl);
if (flags & (PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC)) {
ti->sve_vl_onexec = vl;
} else {
/* Reset VL to system default on next exec: */
ti->sve_vl_onexec = 0;
}
/* Only actually set the VL if not deferred: */
if (flags & PR_SVE_SET_VL_ONEXEC) {
goto out;
}
if (vl == ti->sve_vl) {
goto out;
}
if ((elf_hwcap & HWCAP_SVE)) {
fp_regs_struct fp_regs;
memset(&fp_regs, 0, sizeof(fp_regs));
/* for self at prctl syscall */
if (thread == cpu_local_var(current)) {
save_fp_regs(thread);
clear_fp_regs();
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
ti->sve_vl = vl;
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
restore_fp_regs(thread);
/* for target thread at ptrace */
} else {
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
ti->sve_vl = vl;
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
}
}
ti->sve_vl = vl;
out:
ti->sve_flags = flags & PR_SVE_VL_INHERIT;
return 0;
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_prctl_status */
/*
* Encode the current vector length and flags for return.
* This is only required for prctl(): ptrace has separate fields
*/
static int sve_prctl_status(unsigned long flags)
{
int ret;
struct thread_info *ti = cpu_local_var(current)->ctx.thread;
if (flags & PR_SVE_SET_VL_ONEXEC) {
ret = ti->sve_vl_onexec;
}
else {
ret = ti->sve_vl;
}
if (ti->sve_flags & PR_SVE_VL_INHERIT) {
ret |= PR_SVE_VL_INHERIT;
}
return ret;
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_task_vl */
int sve_set_thread_vl(unsigned long arg)
{
unsigned long vl, flags;
int ret;
vl = arg & PR_SVE_VL_LEN_MASK;
flags = arg & ~vl;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return -EINVAL;
}
ret = sve_set_vector_length(cpu_local_var(current), vl, flags);
if (ret) {
return ret;
}
return sve_prctl_status(flags);
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_get_ti_vl */
int sve_get_thread_vl(void)
{
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return -EINVAL;
}
return sve_prctl_status(0);
}
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
kprintf("PANIC: CPU: %d PID: %d ESR: %x Trapped SVE access.\n",
ihk_mc_get_processor_id(), cpu_local_var(current)->proc->pid, esr);
panic("");
}
void sve_setup(void)
{
extern unsigned long ihk_param_default_vl;
uint64_t zcr;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return;
}
/* init sve_vq_map bitmap */
sve_init_vq_map();
/*
* The SVE architecture mandates support for 128-bit vectors,
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
if (!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)) {
set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
}
zcr = read_system_reg(SYS_ZCR_EL1);
sve_max_vl = sve_vl_from_vq((zcr & ZCR_EL1_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sve_vq_map. If not, do our best:
*/
if (sve_max_vl != find_supported_vector_length(sve_max_vl)) {
sve_max_vl = find_supported_vector_length(sve_max_vl);
}
sve_default_vl = ihk_param_default_vl;
if (ihk_param_default_vl !=
find_supported_vector_length(ihk_param_default_vl)) {
kprintf("SVE: Getting unsupported default VL = %d "
"from HOST-Linux.\n", sve_default_vl);
sve_default_vl = find_supported_vector_length(64);
kprintf("SVE: Using default vl(%d byte).\n",
sve_default_vl);
}
kprintf("SVE: maximum available vector length %u bytes per vector\n",
sve_max_vl);
kprintf("SVE: default vector length %u bytes per vector\n",
sve_default_vl);
}
#else /* CONFIG_ARM64_SVE */
void sve_setup(void)
{
/* nothing to do. */
}
#endif /* CONFIG_ARM64_SVE */
/* @ref.impl arch/arm64/kernel/fpsimd.c::__task_pffr */
static void *__thread_pffr(struct thread *thread)
{
unsigned int vl = thread->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(vl));
return (char *)thread->ctx.thread->sve_state + 34 * vl;
}
/* There is a need to call from to check the HWCAP_FP and HWCAP_ASIMD state. */
void thread_fpsimd_load(struct thread *thread)
{
if (likely(elf_hwcap & HWCAP_SVE)) {
unsigned int vl = thread->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(vl));
sve_load_state(__thread_pffr(thread), &thread->fp_regs->fpsr, sve_vq_from_vl(vl) - 1);
dkprintf("sve for TID %d restored\n", thread->tid);
} else {
// Load the current FPSIMD state to memory.
fpsimd_load_state(thread->fp_regs);
dkprintf("fp_regs for TID %d restored\n", thread->tid);
}
}
/* There is a need to call from to check the HWCAP_FP and HWCAP_ASIMD state. */
void thread_fpsimd_save(struct thread *thread)
{
if (likely(elf_hwcap & HWCAP_SVE)) {
sve_save_state(__thread_pffr(thread), &thread->fp_regs->fpsr);
dkprintf("sve for TID %d saved\n", thread->tid);
} else {
// Save the current FPSIMD state to memory.
fpsimd_save_state(thread->fp_regs);
dkprintf("fp_regs for TID %d saved\n", thread->tid);
}
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::__task_fpsimd_to_sve */
static void __thread_fpsimd_to_sve(struct thread *thread, fp_regs_struct *fp_regs, unsigned int vq)
{
struct fpsimd_sve_state(vq) *sst = thread->ctx.thread->sve_state;
unsigned int i;
for (i = 0; i < 32; i++) {
sst->zregs[i][0] = fp_regs->vregs[i];
}
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::task_fpsimd_to_sve */
void thread_fpsimd_to_sve(struct thread *thread, fp_regs_struct *fp_regs)
{
unsigned int vl = thread->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(vl));
__thread_fpsimd_to_sve(thread, fp_regs, sve_vq_from_vl(vl));
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::__task_sve_to_fpsimd */
static void __thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs, unsigned int vq)
{
struct fpsimd_sve_state(vq) *sst = thread->ctx.thread->sve_state;
unsigned int i;
for (i = 0; i < 32; i++) {
fp_regs->vregs[i] = sst->zregs[i][0];
}
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::task_sve_to_fpsimd */
void thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs)
{
unsigned int vl = thread->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(vl));
__thread_sve_to_fpsimd(thread, fp_regs, sve_vq_from_vl(vl));
}

805
arch/arm64/kernel/head.S Normal file
View File

@ -0,0 +1,805 @@
/* head.S COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <linkage.h>
#include <ptrace.h>
#include <assembler.h>
#include <asm-offsets.h>
#include <virt.h>
#include <cache.h>
#include <arch-memory.h>
#include <smp.h>
#include <arm-gic-v3.h>
/* KERNEL_RAM_VADDR is defined by cmake */
//#ifndef CONFIG_SMP
//# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF
//# define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF
//#else
# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
# define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
//#endif /*CONFIG_SMP*/
#ifdef CONFIG_ARM64_64K_PAGES
# define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
#else
# define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
#endif
.macro pgtbl_init_core, name, dir, tbl, ents, virt_to_phys
ldr \tbl, =\name
ldr \ents, =\dir
add \tbl, \tbl, \virt_to_phys
str \ents, [\tbl]
add \tbl, \tbl, #8
add \ents, \ents, \virt_to_phys
str \ents, [\tbl]
.endm
.macro pgtbl_init, tbl, ents, virt_to_phys
pgtbl_init_core swapper_page_table, swapper_pg_dir, \tbl, \ents, \virt_to_phys
pgtbl_init_core idmap_page_table, idmap_pg_dir, \tbl, \ents, \virt_to_phys
.endm
.macro pgtbl, ttb0, ttb1, virt_to_phys
ldr \ttb1, =swapper_pg_dir
ldr \ttb0, =idmap_pg_dir
add \ttb1, \ttb1, \virt_to_phys
add \ttb0, \ttb0, \virt_to_phys
.endm
#define KERNEL_START KERNEL_RAM_VADDR
#define KERNEL_END _end
/* ihk param offset */
#define TRAMPOLINE_DATA_RESERVED_SIZE 0x08
#define TRAMPOLINE_DATA_PGTBL_SIZE 0x08
#define TRAMPOLINE_DATA_LOAD_SIZE 0x08
#define TRAMPOLINE_DATA_STACK_SIZE 0x08
#define TRAMPOLINE_DATA_BOOT_PARAM_SIZE 0x08
#define TRAMPOLINE_DATA_STARTUP_DATA_SIZE 0x08
#define TRAMPOLINE_DATA_ST_PHYS_BASE_SIZE 0x08
#define TRAMPOLINE_DATA_ST_PHYS_SIZE_SIZE 0x08
#define TRAMPOLINE_DATA_GIC_DIST_PA_SIZE 0x08
#define TRAMPOLINE_DATA_GIC_DIST_MAP_SIZE_SIZE 0x08
#define TRAMPOLINE_DATA_GIC_CPU_PA_SIZE 0x08
#define TRAMPOLINE_DATA_GIC_CPU_MAP_SIZE_SIZE 0x08
#define TRAMPOLINE_DATA_GIC_PERCPU_OFF_SIZE 0x04
#define TRAMPOLINE_DATA_GIC_VERSION_SIZE 0x04
#define TRAMPOLINE_DATA_LPJ_SIZE 0x08
#define TRAMPOLINE_DATA_HZ_SIZE 0x08
#define TRAMPOLINE_DATA_PSCI_METHOD_SIZE 0x08
#define TRAMPOLINE_DATA_USE_VIRT_TIMER_SIZE 0x08
#define TRAMPOLINE_DATA_EVTSTRM_TIMER_RATE_SIZE 0x08
#define TRAMPOLINE_DATA_DEFAULT_VL_SIZE 0x08
#define TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE 0x08
#define TRAMPOLINE_DATA_CPU_MAP_SIZE (NR_CPUS * 8)
#define TRAMPOLINE_DATA_DATA_RDISTS_PA_SIZE (NR_CPUS * 8)
#define TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE 0x08
#define TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE 0x04
#define TRAMPOLINE_DATA_PMU_AFF_SIZE (CONFIG_SMP_MAX_CORES * 4)
#define STARTUP_DATA_RESERVED 0x00
#define STARTUP_DATA_BASE 0x08
#define STARTUP_DATA_PGTBL 0x10
#define STARTUP_DATA_STACK 0x18
#define STARTUP_DATA_ARG2 0x20
#define STARTUP_DATA_TRAMPILINE 0x28
#define STARTUP_DATA_NEXT_PC 0x30
/* ihk param save area */
.globl ihk_param_head
.globl ihk_param_gic_dist_base_pa, ihk_param_gic_cpu_base_pa
.globl ihk_param_gic_dist_map_size, ihk_param_gic_cpu_map_size
.globl ihk_param_gic_percpu_offset, ihk_param_gic_version
.globl ihk_param_lpj, ihk_param_hz, ihk_param_psci_method
.globl ihk_param_cpu_logical_map, ihk_param_gic_rdist_base_pa
.globl ihk_param_pmu_irq_affi, ihk_param_nr_pmu_irq_affi
.globl ihk_param_use_virt_timer, ihk_param_evtstrm_timer_rate
.globl ihk_param_retention_state_flag_pa, ihk_param_default_vl
ihk_param_head:
ihk_param_param_addr:
.quad 0
ihk_param_phys_addr:
.quad 0
ihk_param_st_phys_base:
.quad 0
ihk_param_st_phys_size:
.quad 0
ihk_param_gic_dist_base_pa:
.quad 0
ihk_param_gic_dist_map_size:
.quad 0
ihk_param_gic_cpu_base_pa:
.quad 0
ihk_param_gic_cpu_map_size:
.quad 0
ihk_param_gic_percpu_offset:
.word 0
ihk_param_gic_version:
.word 0
ihk_param_lpj:
.quad 0 /* udelay loops value */
ihk_param_hz:
.quad 0 /* host HZ value */
ihk_param_psci_method:
.quad 0 /* hvc or smc ? */
ihk_param_use_virt_timer:
.quad 0 /* virt timer or phys timer ? */
ihk_param_evtstrm_timer_rate:
.quad 0 /* event stream timer rate */
ihk_param_default_vl:
.quad 0 /* SVE default VL */
ihk_param_cpu_logical_map:
.skip NR_CPUS * 8 /* array of the MPIDR and the core number */
ihk_param_gic_rdist_base_pa:
.skip NR_CPUS * 8 /* per-cpu re-distributer PA */
ihk_param_retention_state_flag_pa:
.quad 0
ihk_param_pmu_irq_affi:
.skip CONFIG_SMP_MAX_CORES * 4 /* array of the pmu affinity list */
ihk_param_nr_pmu_irq_affi:
.word 0 /* number of pmu affinity list elements. */
/* @ref.impl arch/arm64/include/asm/kvm_arm.h */
#define HCR_E2H (UL(1) << 34)
#define HCR_RW_SHIFT 31
#define HCR_RW (UL(1) << HCR_RW_SHIFT)
#define HCR_TGE (UL(1) << 27)
/*
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
.section ".text","ax"
ENTRY(arch_start)
/* store ihk param */
/* x4 = ihk_smp_trampoline_data PA */
add x0, x4, #TRAMPOLINE_DATA_RESERVED_SIZE
/* header_pgtbl */
add x0, x0, #TRAMPOLINE_DATA_PGTBL_SIZE
/* header_load */
add x0, x0, #TRAMPOLINE_DATA_LOAD_SIZE
/* stack_ptr */
add x0, x0, #TRAMPOLINE_DATA_STACK_SIZE
/* notify_address */
ldr x16, [x0], #TRAMPOLINE_DATA_BOOT_PARAM_SIZE
adr x15, ihk_param_param_addr
str x16, [x15]
/* startup_data */
ldr x16, [x0], #TRAMPOLINE_DATA_STARTUP_DATA_SIZE
ldr x15, [x16, #STARTUP_DATA_ARG2]
adr x17, ihk_param_phys_addr
str x15, [x17]
/* st_phys_base */
ldr x16, [x0], #TRAMPOLINE_DATA_ST_PHYS_BASE_SIZE
adr x15, ihk_param_st_phys_base
str x16, [x15]
/* st_phys_size */
ldr x16, [x0], #TRAMPOLINE_DATA_ST_PHYS_SIZE_SIZE
adr x15, ihk_param_st_phys_size
str x16, [x15]
/* dist_base_pa */
ldr x16, [x0], #TRAMPOLINE_DATA_GIC_DIST_PA_SIZE
adr x15, ihk_param_gic_dist_base_pa
str x16, [x15]
/* dist_map_size */
ldr x16, [x0], #TRAMPOLINE_DATA_GIC_DIST_MAP_SIZE_SIZE
adr x15, ihk_param_gic_dist_map_size
str x16, [x15]
/* cpu_base_pa */
ldr x16, [x0], #TRAMPOLINE_DATA_GIC_CPU_PA_SIZE
adr x15, ihk_param_gic_cpu_base_pa
str x16, [x15]
/* cpu_map_size */
ldr x16, [x0], #TRAMPOLINE_DATA_GIC_CPU_MAP_SIZE_SIZE
adr x15, ihk_param_gic_cpu_map_size
str x16, [x15]
/* percpu_offset */
ldr w16, [x0], #TRAMPOLINE_DATA_GIC_PERCPU_OFF_SIZE
adr x15, ihk_param_gic_percpu_offset
str w16, [x15]
/* gic_version */
ldr w16, [x0], #TRAMPOLINE_DATA_GIC_VERSION_SIZE
adr x15, ihk_param_gic_version
str w16, [x15]
/* loops_per_jiffy */
ldr x16, [x0], #TRAMPOLINE_DATA_LPJ_SIZE
adr x15, ihk_param_lpj
str x16, [x15]
/* hz */
ldr x16, [x0], #TRAMPOLINE_DATA_HZ_SIZE
adr x15, ihk_param_hz
str x16, [x15]
/* psci_method */
ldr x16, [x0], #TRAMPOLINE_DATA_PSCI_METHOD_SIZE
adr x15, ihk_param_psci_method
str x16, [x15]
/* use_virt_timer */
ldr x16, [x0], #TRAMPOLINE_DATA_USE_VIRT_TIMER_SIZE
adr x15, ihk_param_use_virt_timer
str x16, [x15]
/* evtstrm_timer_rate */
ldr x16, [x0], #TRAMPOLINE_DATA_EVTSTRM_TIMER_RATE_SIZE
adr x15, ihk_param_evtstrm_timer_rate
str x16, [x15]
/* SVE default VL */
ldr x16, [x0], #TRAMPOLINE_DATA_DEFAULT_VL_SIZE
adr x15, ihk_param_default_vl
str x16, [x15]
/* cpu_logical_map_size */
ldr x16, [x0], #TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE
mov x1, x16
/* cpu_logical_map */
adr x15, ihk_param_cpu_logical_map
mov x18, x0
1: ldr x17, [x18], #8
str x17, [x15], #8
sub x16, x16, #1
cmp x16, #0
b.ne 1b
mov x16, #NR_CPUS /* calc next data */
lsl x16, x16, 3
add x0, x0, x16
/* reset cpu_logical_map_size */
mov x16, x1
/* gic_rdist_base_pa */
adr x15, ihk_param_gic_rdist_base_pa
mov x18, x0
1: ldr x17, [x18], #8
str x17, [x15], #8
sub x16, x16, #1
cmp x16, #0
b.ne 1b
mov x16, #NR_CPUS /* calc next data */
lsl x16, x16, 3
add x0, x0, x16
/* retention_state_flag_pa */
ldr x16, [x0], #TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE
adr x15, ihk_param_retention_state_flag_pa
str x16, [x15]
/* nr_pmu_irq_affi */
ldr w16, [x0], #TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE
adr x15, ihk_param_nr_pmu_irq_affi
str w16, [x15]
/* pmu_irq_affi */
mov x18, x0
adr x15, ihk_param_pmu_irq_affi
b 2f
1: ldr w17, [x18], #4
str w17, [x15], #4
sub w16, w16, #1
2: cmp w16, #0
b.ne 1b
mov x16, #CONFIG_SMP_MAX_CORES /* calc next data */
lsl x16, x16, 2
add x0, x0, x16
/* */
bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-KERNEL_START
bl __create_page_tables // x25=TTBR0, x26=TTBR1
b secondary_entry_common
ENDPROC(arch_start)
ENTRY(arch_ap_start)
bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-KERNEL_START
b secondary_entry_common
ENDPROC(arch_ap_start)
/*
* Macro to create a table entry to the next page.
*
* tbl: page table address
* virt: virtual address
* shift: #imm page table shift
* ptrs: #imm pointers per table page
*
* Preserves: virt
* Corrupts: tmp1, tmp2
* Returns: tbl -> next level table page address
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
lsr \tmp1, \virt, #\shift
and \tmp1, \tmp1, #\ptrs - 1 // table index
add \tmp2, \tbl, #PAGE_SIZE
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
str \tmp2, [\tbl, \tmp1, lsl #3]
add \tbl, \tbl, #PAGE_SIZE // next level table page
.endm
/*
* Macro to populate the PGD (and possibily PUD) for the corresponding
* block entry in the next level (tbl) for the given virtual address.
*
* Preserves: tbl, next, virt
* Corrupts: tmp1, tmp2
*/
.macro create_pgd_entry, tbl, virt, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
#if SWAPPER_PGTABLE_LEVELS == 3
create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#endif
.endm
/*
* Macro to populate block entries in the page table for the start..end
* virtual range (inclusive).
*
* Preserves: tbl, flags
* Corrupts: phys, start, end, pstate
*/
.macro create_block_map, tbl, flags, phys, start, end
lsr \phys, \phys, #BLOCK_SHIFT
lsr \start, \start, #BLOCK_SHIFT
and \start, \start, #PTRS_PER_PTE - 1 // table index
orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
lsr \end, \end, #BLOCK_SHIFT
and \end, \end, #PTRS_PER_PTE - 1 // table end index
9999: str \phys, [\tbl, \start, lsl #3] // store the entry
add \start, \start, #1 // next entry
add \phys, \phys, #BLOCK_SIZE // next block
cmp \start, \end
b.ls 9999b
.endm
/*
* Setup the initial page tables. We only setup the barest amount which is
* required to get the kernel running. The following sections are required:
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled, including the FDT blob (TTBR1)
* - pgd entry for fixed mappings (TTBR1)
*/
__create_page_tables:
pgtbl_init x25, x26, x28
pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
mov x27, lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
* dirty cache lines being evicted.
*/
mov x0, x25
add x1, x26, #SWAPPER_DIR_SIZE
bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
*/
mov x0, x25
add x6, x26, #SWAPPER_DIR_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
cmp x0, x6
b.lo 1b
ldr x7, =MM_MMUFLAGS
/*
* Create the identity mapping.
*/
mov x0, x25 // idmap_pg_dir
ldr x3, =KERNEL_START
add x3, x3, x28 // __pa(KERNEL_START)
create_pgd_entry x0, x3, x5, x6
ldr x6, =KERNEL_END
mov x5, x3 // __pa(KERNEL_START)
add x6, x6, x28 // __pa(KERNEL_END)
create_block_map x0, x7, x3, x5, x6
/*
* Map the kernel image (starting with PHYS_OFFSET).
*/
mov x0, x26 // swapper_pg_dir
ldr x5, =KERNEL_START
create_pgd_entry x0, x5, x3, x6
ldr x6, =KERNEL_END
mov x3, x24 // phys offset
create_block_map x0, x7, x3, x5, x6
/*
* Map the early_alloc_pages area, kernel_img next block
*/
ldr x3, =KERNEL_END
add x3, x3, x28 // __pa(KERNEL_END)
add x3, x3, #BLOCK_SIZE
sub x3, x3, #1
bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc.
ldr x5, =KERNEL_END // get start VA
add x5, x5, #BLOCK_SIZE
sub x5, x5, #1
bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc.
mov x6, #MAP_EARLY_ALLOC_SIZE
add x6, x5, x6 // end VA calc
mov x23, x6 // save end VA
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
/*
* Map the boot_param area
*/
adr x3, ihk_param_param_addr
ldr x3, [x3] // get boot_param PA
mov x5, x23 // get start VA
add x5, x5, #BLOCK_SIZE
sub x5, x5, #1
bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc
mov x6, #MAP_BOOT_PARAM_SIZE
add x6, x5, x6 // end VA calc.
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
/*
* Map the FDT blob (maximum 2MB; must be within 512MB of
* PHYS_OFFSET).
*/
/* FDT disable for McKernel */
// mov x3, x21 // FDT phys address
// and x3, x3, #~((1 << 21) - 1) // 2MB aligned
// mov x6, #PAGE_OFFSET
// sub x5, x3, x24 // subtract PHYS_OFFSET
// tst x5, #~((1 << 29) - 1) // within 512MB?
// csel x21, xzr, x21, ne // zero the FDT pointer
// b.ne 1f
// add x5, x5, x6 // __va(FDT blob)
// add x6, x5, #1 << 21 // 2MB for the FDT blob
// sub x6, x6, #1 // inclusive range
// create_block_map x0, x7, x3, x5, x6
1:
/*
* Since the page tables have been populated with non-cacheable
* accesses (MMU disabled), invalidate the idmap and swapper page
* tables again to remove any speculatively loaded cache lines.
*/
mov x0, x25
add x1, x26, #SWAPPER_DIR_SIZE
bl __inval_cache_range
mov lr, x27
ret
ENDPROC(__create_page_tables)
.ltorg
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
* booted in EL1 or EL2 respectively.
*/
ENTRY(el2_setup)
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.ne 1f
mrs x0, sctlr_el2
CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2
CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2
msr sctlr_el2, x0
b 2f
1: mrs x0, sctlr_el1
CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1
CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
msr sctlr_el1, x0
mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
2:
#ifdef CONFIG_ARM64_VHE
/*
* Check for VHE being present. For the rest of the EL2 setup,
* x2 being non-zero indicates that we do have VHE, and that the
* kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
ubfx x2, x2, #8, #4
#else /* CONFIG_ARM64_VHE */
mov x2, xzr
#endif /* CONFIG_ARM64_VHE */
/* Hyp configuration. */
mov x0, #HCR_RW // 64-bit EL1
cbz x2, set_hcr
orr x0, x0, #HCR_TGE // Enable Host Extensions
orr x0, x0, #HCR_E2H
set_hcr:
msr hcr_el2, x0
isb
/* Generic timers. */
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset
#ifdef CONFIG_ARM_GIC_V3
/* GICv3 system register access */
mrs x0, id_aa64pfr0_el1
ubfx x0, x0, #24, #4
cmp x0, #1
b.ne 3f
mrs_s x0, ICC_SRE_EL2
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
msr_s ICC_SRE_EL2, x0
isb // Make sure SRE is now set
msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
3:
#endif
/* Populate ID registers. */
mrs x0, midr_el1
mrs x1, mpidr_el1
msr vpidr_el2, x0
msr vmpidr_el2, x1
/*
* When VHE is not in use, early init of EL2 and EL1 needs to be
* done here.
* When VHE _is_ in use, EL1 will not be used in the host and
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
cbnz x2, 1f
/* sctlr_el1 */
mov x0, #0x0800 // Set/clear RES{1,0} bits
CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems
CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
msr sctlr_el1, x0
/* Coprocessor traps. */
mov x0, #0x33ff
/* SVE register access */
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, 4f
bic x0, x0, #CPTR_EL2_TZ // Disable SVE traps to EL2
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_EL1_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL1, x1 // length for EL1.
b 1f
4: msr cptr_el2, x0 // Disable copro. traps to EL2
1:
#ifdef CONFIG_COMPAT
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
/* Stage-2 translation */
msr vttbr_el2, xzr
cbz x2, install_el2_stub
mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
isb
ret
install_el2_stub:
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors
add x0, x0, #:lo12:__hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, x0
msr elr_el2, lr
mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
eret
ENDPROC(el2_setup)
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
ENTRY(set_cpu_boot_mode_flag)
ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode
add x1, x1, x28
cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
1: str w20, [x1] // This CPU has booted in EL1
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret
ENDPROC(set_cpu_boot_mode_flag)
#if defined(CONFIG_HAS_NMI)
/*
* void maybe_switch_to_sysreg_gic_cpuif(void)
*
* Enable interrupt controller system register access if this feature
* has been detected by the alternatives system.
*
* Before we jump into generic code we must enable interrupt controller system
* register access because this is required by the irqflags macros. We must
* also mask interrupts at the PMR and unmask them within the PSR. That leaves
* us set up and ready for the kernel to make its first call to
* arch_local_irq_enable().
*
*/
ENTRY(maybe_switch_to_sysreg_gic_cpuif)
mrs_s x0, ICC_SRE_EL1
orr x0, x0, #1
msr_s ICC_SRE_EL1, x0 // Set ICC_SRE_EL1.SRE==1
isb // Make sure SRE is now set
mov x0, ICC_PMR_EL1_MASKED
msr_s ICC_PMR_EL1, x0 // Prepare for unmask of I bit
msr daifclr, #2 // Clear the I bit
ret
ENDPROC(maybe_switch_to_sysreg_gic_cpuif)
#else
ENTRY(maybe_switch_to_sysreg_gic_cpuif)
ret
ENDPROC(maybe_switch_to_sysreg_gic_cpuif)
#endif /* defined(CONFIG_HAS_NMI) */
/*
* We need to find out the CPU boot mode long after boot, so we need to
* store it in a writable variable.
*
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
.pushsection .data..cacheline_aligned
ENTRY(__boot_cpu_mode)
.align L1_CACHE_SHIFT
.long BOOT_CPU_MODE_EL2
.long 0
.popsection
ENTRY(secondary_entry_common)
bl el2_setup // Drop to EL1
bl set_cpu_boot_mode_flag
b secondary_startup
ENDPROC(secondary_entry_common)
ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
mrs x22, midr_el1 // x22=cpuid
mov x0, x22
bl lookup_processor_type
mov x23, x0 // x23=current cpu_table
cbz x23, __error_p // invalid processor (x23=0)?
pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1
ldr x12, [x23, #CPU_INFO_SETUP]
add x12, x12, x28 // __virt_to_phys
blr x12 // initialise processor
ldr x21, =secondary_data
ldr x27, =__secondary_switched // address to jump to after enabling the MMU
b __enable_mmu
ENDPROC(secondary_startup)
ENTRY(__secondary_switched)
ldr x0, [x21, #SECONDARY_DATA_STACK] // get secondary_data.stack
mov sp, x0
/*
* Conditionally switch to GIC PMR for interrupt masking (this
* will be a nop if we are using normal interrupt masking)
*/
bl maybe_switch_to_sysreg_gic_cpuif
mov x29, #0
adr x1, secondary_data
ldr x0, [x1, #SECONDARY_DATA_ARG] // get secondary_data.arg
ldr x27, [x1, #SECONDARY_DATA_NEXT_PC] // get secondary_data.next_pc
br x27 // secondary_data.next_pc(secondary_data.arg);
ENDPROC(__secondary_switched)
/*
* Setup common bits before finally enabling the MMU. Essentially this is just
* loading the page table pointer and vector base registers.
*
* On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
* the MMU.
*/
__enable_mmu:
ldr x5, =vectors
msr vbar_el1, x5
msr ttbr0_el1, x25 // load TTBR0
msr ttbr1_el1, x26 // load TTBR1
isb
b __turn_mmu_on
ENDPROC(__enable_mmu)
/*
* Enable the MMU. This completely changes the structure of the visible memory
* space. You will not be able to trace execution through this.
*
* x0 = system control register
* x27 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
*
* We align the entire function to the smallest power of two larger than it to
* ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
* close to the end of a 512MB or 1GB block we might require an additional
* table to map the entire function.
*/
.align 4
__turn_mmu_on:
msr sctlr_el1, x0
isb
br x27
ENDPROC(__turn_mmu_on)
/*
* Calculate the start of physical memory.
*/
__calc_phys_offset:
adr x0, 1f
ldp x1, x2, [x0]
sub x28, x0, x1 // x28 = PHYS_OFFSET - KERNEL_START
add x24, x2, x28 // x24 = PHYS_OFFSET
ret
ENDPROC(__calc_phys_offset)
.align 3
1: .quad .
.quad KERNEL_START
/*
* Exception handling. Something went wrong and we can't proceed. We ought to
* tell the user, but since we don't have any guarantee that we're even
* running on the right architecture, we do virtually nothing.
*/
__error_p:
ENDPROC(__error_p)
__error:
1: nop
b 1b
ENDPROC(__error)
/*
* This function gets the processor ID in w0 and searches the cpu_table[] for
* a match. It returns a pointer to the struct cpu_info it found. The
* cpu_table[] must end with an empty (all zeros) structure.
*
* This routine can be called via C code and it needs to work with the MMU
* both disabled and enabled (the offset is calculated automatically).
*/
ENTRY(lookup_processor_type)
adr x1, __lookup_processor_type_data
ldp x2, x3, [x1]
sub x1, x1, x2 // get offset between VA and PA
add x3, x3, x1 // convert VA to PA
1:
ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask
cbz w5, 2f // end of list?
and w6, w6, w0
cmp w5, w6
b.eq 3f
add x3, x3, #CPU_INFO_SZ
b 1b
2:
mov x3, #0 // unknown processor
3:
mov x0, x3
ret
ENDPROC(lookup_processor_type)
.align 3
.type __lookup_processor_type_data, %object
__lookup_processor_type_data:
.quad .
.quad cpu_table
.size __lookup_processor_type_data, . - __lookup_processor_type_data

View File

@ -0,0 +1,410 @@
/* hw_breakpoint.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <ihk/debug.h>
#include <cputype.h>
#include <errno.h>
#include <elfcore.h>
#include <ptrace.h>
#include <hw_breakpoint.h>
#include <arch-memory.h>
#include <signal.h>
#include <process.h>
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::core_num_[brps|wrps] */
/* Number of BRP/WRP registers on this CPU. */
int core_num_brps;
int core_num_wrps;
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::get_num_brps */
/* Determine number of BRP registers available. */
int get_num_brps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::get_num_wrps */
/* Determine number of WRP registers available. */
int get_num_wrps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::hw_breakpoint_slots */
int hw_breakpoint_slots(int type)
{
/*
* We can be called early, so don't rely on
* our static variables being initialised.
*/
switch (type) {
case TYPE_INST:
return get_num_brps();
case TYPE_DATA:
return get_num_wrps();
default:
kprintf("unknown slot type: %d\n", type);
return 0;
}
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::READ_WB_REG_CASE */
#define READ_WB_REG_CASE(OFF, N, REG, VAL) \
case (OFF + N): \
AARCH64_DBG_READ(N, REG, VAL); \
break
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::READ_WB_REG_CASE */
#define WRITE_WB_REG_CASE(OFF, N, REG, VAL) \
case (OFF + N): \
AARCH64_DBG_WRITE(N, REG, VAL); \
break
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::GEN_READ_WB_REG_CASES */
#define GEN_READ_WB_REG_CASES(OFF, REG, VAL) \
READ_WB_REG_CASE(OFF, 0, REG, VAL); \
READ_WB_REG_CASE(OFF, 1, REG, VAL); \
READ_WB_REG_CASE(OFF, 2, REG, VAL); \
READ_WB_REG_CASE(OFF, 3, REG, VAL); \
READ_WB_REG_CASE(OFF, 4, REG, VAL); \
READ_WB_REG_CASE(OFF, 5, REG, VAL); \
READ_WB_REG_CASE(OFF, 6, REG, VAL); \
READ_WB_REG_CASE(OFF, 7, REG, VAL); \
READ_WB_REG_CASE(OFF, 8, REG, VAL); \
READ_WB_REG_CASE(OFF, 9, REG, VAL); \
READ_WB_REG_CASE(OFF, 10, REG, VAL); \
READ_WB_REG_CASE(OFF, 11, REG, VAL); \
READ_WB_REG_CASE(OFF, 12, REG, VAL); \
READ_WB_REG_CASE(OFF, 13, REG, VAL); \
READ_WB_REG_CASE(OFF, 14, REG, VAL); \
READ_WB_REG_CASE(OFF, 15, REG, VAL)
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::GEN_WRITE_WB_REG_CASES */
#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL) \
WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \
WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::read_wb_reg */
unsigned long read_wb_reg(int reg, int n)
{
unsigned long val = 0;
switch (reg + n) {
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
kprintf("attempt to read from unknown breakpoint register %d\n", n);
}
return val;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::write_wb_reg */
void write_wb_reg(int reg, int n, unsigned long val)
{
switch (reg + n) {
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
kprintf("attempt to write to unknown breakpoint register %d\n", n);
}
isb();
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::hw_breakpoint_reset */
void hw_breakpoint_reset(void)
{
int i = 0;
/* clear DBGBVR<n>_EL1 and DBGBCR<n>_EL1 (n=0-(core_num_brps-1)) */
for (i = 0; i < core_num_brps; i++) {
write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL);
write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL);
}
/* clear DBGWVR<n>_EL1 and DBGWCR<n>_EL1 (n=0-(core_num_wrps-1)) */
for (i = 0; i < core_num_wrps; i++) {
write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL);
write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL);
}
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_hw_breakpoint_init */
void arch_hw_breakpoint_init(void)
{
struct user_hwdebug_state hws;
int max_hws_dbg_regs = sizeof(hws.dbg_regs) / sizeof(hws.dbg_regs[0]);
core_num_brps = get_num_brps();
core_num_wrps = get_num_wrps();
if (max_hws_dbg_regs < core_num_brps) {
kprintf("debugreg struct size is less than Determine number of BRP registers available.\n");
core_num_brps = max_hws_dbg_regs;
}
if (max_hws_dbg_regs < core_num_wrps) {
kprintf("debugreg struct size is less than Determine number of WRP registers available.\n");
core_num_wrps = max_hws_dbg_regs;
}
hw_breakpoint_reset();
}
struct arch_hw_breakpoint_ctrl {
unsigned int __reserved : 19,
len : 8,
type : 2,
privilege : 2,
enabled : 1;
};
static inline unsigned int encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
{
return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
ctrl.enabled;
}
static inline void decode_ctrl_reg(unsigned int reg, struct arch_hw_breakpoint_ctrl *ctrl)
{
ctrl->enabled = reg & 0x1;
reg >>= 1;
ctrl->privilege = reg & 0x3;
reg >>= 2;
ctrl->type = reg & 0x3;
reg >>= 2;
ctrl->len = reg & 0xff;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_bp_generic_fields */
/*
* Extract generic type and length encodings from an arch_hw_breakpoint_ctrl.
* Hopefully this will disappear when ptrace can bypass the conversion
* to generic breakpoint descriptions.
*/
int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
int *gen_len, int *gen_type)
{
/* Type */
switch (ctrl.type) {
case ARM_BREAKPOINT_EXECUTE:
*gen_type = HW_BREAKPOINT_X;
break;
case ARM_BREAKPOINT_LOAD:
*gen_type = HW_BREAKPOINT_R;
break;
case ARM_BREAKPOINT_STORE:
*gen_type = HW_BREAKPOINT_W;
break;
case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE:
*gen_type = HW_BREAKPOINT_RW;
break;
default:
return -EINVAL;
}
/* Len */
switch (ctrl.len) {
case ARM_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
case ARM_BREAKPOINT_LEN_2:
*gen_len = HW_BREAKPOINT_LEN_2;
break;
case ARM_BREAKPOINT_LEN_4:
*gen_len = HW_BREAKPOINT_LEN_4;
break;
case ARM_BREAKPOINT_LEN_8:
*gen_len = HW_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
}
return 0;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_check_bp_in_kernelspace */
/*
* Check whether bp virtual address is in kernel space.
*/
int arch_check_bp_in_kernelspace(unsigned long addr, unsigned int len)
{
return (addr >= USER_END) && ((addr + len - 1) >= USER_END);
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::arch_validate_hwbkpt_settings */
int arch_validate_hwbkpt_settings(long note_type, struct user_hwdebug_state *hws, size_t len)
{
int i;
unsigned long alignment_mask;
size_t cpysize, cpynum;
switch(note_type) {
case NT_ARM_HW_BREAK: /* breakpoint */
alignment_mask = 0x3;
break;
case NT_ARM_HW_WATCH: /* watchpoint */
alignment_mask = 0x7;
break;
default:
return -EINVAL;
}
cpysize = len - offsetof(struct user_hwdebug_state, dbg_regs[0]);
cpynum = cpysize / sizeof(hws->dbg_regs[0]);
for (i = 0; i < cpynum; i++) {
unsigned long addr = hws->dbg_regs[i].addr;
unsigned int uctrl = hws->dbg_regs[i].ctrl;
struct arch_hw_breakpoint_ctrl ctrl;
int err, len, type;
/* empty dbg_regs check skip */
if (addr == 0 && uctrl == 0) {
continue;
}
/* check address alignment */
if (addr & alignment_mask) {
return -EINVAL;
}
/* decode control bit */
decode_ctrl_reg(uctrl, &ctrl);
/* disabled, continue */
if (!ctrl.enabled) {
continue;
}
err = arch_bp_generic_fields(ctrl, &len, &type);
if (err) {
return err;
}
/* type check */
switch (note_type) {
case NT_ARM_HW_BREAK: /* breakpoint */
if ((type & HW_BREAKPOINT_X) != type) {
return -EINVAL;
}
break;
case NT_ARM_HW_WATCH: /* watchpoint */
if ((type & HW_BREAKPOINT_RW) != type) {
return -EINVAL;
}
break;
default:
return -EINVAL;
}
/* privilege generate */
if (arch_check_bp_in_kernelspace(addr, len)) {
/* kernel space breakpoint unsupported. */
return -EINVAL;
} else {
ctrl.privilege = AARCH64_BREAKPOINT_EL0;
}
/* ctrl check OK. */
hws->dbg_regs[i].ctrl = encode_ctrl_reg(ctrl);
}
return 0;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::breakpoint_handler */
/*
* Debug exception handlers.
*/
int breakpoint_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs)
{
int i = 0;
unsigned long val;
unsigned int ctrl_reg;
struct arch_hw_breakpoint_ctrl ctrl;
siginfo_t info;
for (i = 0; i < core_num_brps; i++) {
/* Check if the breakpoint value matches. */
val = read_wb_reg(AARCH64_DBG_REG_BVR, i);
if (val != (regs->pc & ~0x3)) {
continue;
}
/* Possible match, check the byte address select to confirm. */
ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i);
decode_ctrl_reg(ctrl_reg, &ctrl);
if (!((1 << (regs->pc & 0x3)) & ctrl.len)) {
continue;
}
/* send SIGTRAP */
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
info._sifields._sigfault.si_addr = (void *)regs->pc;
set_signal(SIGTRAP, regs, &info);
}
return 0;
}
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::watchpoint_handler */
int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
int i = 0;
int access;
unsigned long val;
unsigned int ctrl_reg;
struct arch_hw_breakpoint_ctrl ctrl;
siginfo_t info;
for (i = 0; i < core_num_wrps; i++) {
/* Check if the watchpoint value matches. */
val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
if (val != (addr & ~0x7)) {
continue;
}
/* Possible match, check the byte address select to confirm. */
ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
decode_ctrl_reg(ctrl_reg, &ctrl);
if (!((1 << (addr & 0x7)) & ctrl.len)) {
continue;
}
/*
* Check that the access type matches.
* 0 => load, otherwise => store
*/
access = (esr & AARCH64_ESR_ACCESS_MASK) ? ARM_BREAKPOINT_STORE :
ARM_BREAKPOINT_LOAD;
if (!(access & ctrl.type)) {
continue;
}
/* send SIGTRAP */
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
info._sifields._sigfault.si_addr = (void *)addr;
set_signal(SIGTRAP, regs, &info);
}
return 0;
}

View File

@ -0,0 +1,58 @@
/* hyp-stub.S COPYRIGHT FUJITSU LIMITED 2015 */
#include <linkage.h>
#include <assembler.h>
.text
.align 11
ENTRY(__hyp_stub_vectors)
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
ventry el2_error_invalid // Error EL2t
ventry el2_sync_invalid // Synchronous EL2h
ventry el2_irq_invalid // IRQ EL2h
ventry el2_fiq_invalid // FIQ EL2h
ventry el2_error_invalid // Error EL2h
ventry el1_sync // Synchronous 64-bit EL1
ventry el1_irq_invalid // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
ventry el1_sync_invalid // Synchronous 32-bit EL1
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
ENDPROC(__hyp_stub_vectors)
.align 11
el1_sync:
mrs x1, esr_el2
lsr x1, x1, #26
cmp x1, #0x16
b.ne 2f // Not an HVC trap
cbz x0, 1f
msr vbar_el2, x0 // Set vbar_el2
b 2f
1: mrs x0, vbar_el2 // Return vbar_el2
2: eret
ENDPROC(el1_sync)
.macro invalid_vector label
\label:
b \label
ENDPROC(\label)
.endm
invalid_vector el2_sync_invalid
invalid_vector el2_irq_invalid
invalid_vector el2_fiq_invalid
invalid_vector el2_error_invalid
invalid_vector el1_sync_invalid
invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid

View File

@ -0,0 +1,131 @@
/* imp-sysreg.c COPYRIGHT FUJITSU LIMITED 2018 */
#include <sysreg.h>
/* hpc */
ACCESS_REG_FUNC(fj_tag_address_ctrl_el1, IMP_FJ_TAG_ADDRESS_CTRL_EL1);
ACCESS_REG_FUNC(pf_ctrl_el1, IMP_PF_CTRL_EL1);
ACCESS_REG_FUNC(pf_stream_detect_ctrl_el0, IMP_PF_STREAM_DETECT_CTRL_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl0_el0, IMP_PF_INJECTION_CTRL0_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl1_el0, IMP_PF_INJECTION_CTRL1_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl2_el0, IMP_PF_INJECTION_CTRL2_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl3_el0, IMP_PF_INJECTION_CTRL3_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl4_el0, IMP_PF_INJECTION_CTRL4_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl5_el0, IMP_PF_INJECTION_CTRL5_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl6_el0, IMP_PF_INJECTION_CTRL6_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl7_el0, IMP_PF_INJECTION_CTRL7_EL0);
ACCESS_REG_FUNC(pf_injection_distance0_el0, IMP_PF_INJECTION_DISTANCE0_EL0);
ACCESS_REG_FUNC(pf_injection_distance1_el0, IMP_PF_INJECTION_DISTANCE1_EL0);
ACCESS_REG_FUNC(pf_injection_distance2_el0, IMP_PF_INJECTION_DISTANCE2_EL0);
ACCESS_REG_FUNC(pf_injection_distance3_el0, IMP_PF_INJECTION_DISTANCE3_EL0);
ACCESS_REG_FUNC(pf_injection_distance4_el0, IMP_PF_INJECTION_DISTANCE4_EL0);
ACCESS_REG_FUNC(pf_injection_distance5_el0, IMP_PF_INJECTION_DISTANCE5_EL0);
ACCESS_REG_FUNC(pf_injection_distance6_el0, IMP_PF_INJECTION_DISTANCE6_EL0);
ACCESS_REG_FUNC(pf_injection_distance7_el0, IMP_PF_INJECTION_DISTANCE7_EL0);
static void hpc_prefetch_regs_init(void)
{
uint64_t reg = 0;
/* PF_CTRL_EL1 */
reg = IMP_PF_CTRL_EL1_EL1AE_ENABLE | IMP_PF_CTRL_EL1_EL0AE_ENABLE;
xos_access_pf_ctrl_el1(WRITE_ACCESS, &reg);
/* PF_STREAM_DETECT_CTRL */
reg = 0;
xos_access_pf_stream_detect_ctrl_el0(WRITE_ACCESS, &reg);
/* PF_INJECTION_CTRL */
reg = 0;
xos_access_pf_injection_ctrl0_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl1_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl2_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl3_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl4_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl5_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl6_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl7_el0(WRITE_ACCESS, &reg);
/* PF_INJECTION_DISTANCE */
reg = 0;
xos_access_pf_injection_distance0_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance1_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance2_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance3_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance4_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance5_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance6_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance7_el0(WRITE_ACCESS, &reg);
}
static void hpc_tag_address_regs_init(void)
{
uint64_t reg = IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK |
IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK |
IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK;
/* FJ_TAG_ADDRESS_CTRL */
xos_access_fj_tag_address_ctrl_el1(WRITE_ACCESS, &reg);
}
void hpc_registers_init(void)
{
hpc_prefetch_regs_init();
hpc_tag_address_regs_init();
}
/* vhbm */
ACCESS_REG_FUNC(barrier_ctrl_el1, IMP_BARRIER_CTRL_EL1);
ACCESS_REG_FUNC(barrier_bst_bit_el1, IMP_BARRIER_BST_BIT_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb0_el1, IMP_BARRIER_INIT_SYNC_BB0_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb1_el1, IMP_BARRIER_INIT_SYNC_BB1_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb2_el1, IMP_BARRIER_INIT_SYNC_BB2_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb3_el1, IMP_BARRIER_INIT_SYNC_BB3_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb4_el1, IMP_BARRIER_INIT_SYNC_BB4_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb5_el1, IMP_BARRIER_INIT_SYNC_BB5_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w0_el1, IMP_BARRIER_ASSIGN_SYNC_W0_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w1_el1, IMP_BARRIER_ASSIGN_SYNC_W1_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w2_el1, IMP_BARRIER_ASSIGN_SYNC_W2_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w3_el1, IMP_BARRIER_ASSIGN_SYNC_W3_EL1);
void vhbm_barrier_registers_init(void)
{
uint64_t reg = 0;
reg = IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE |
IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE;
xos_access_barrier_ctrl_el1(WRITE_ACCESS, &reg);
reg = 0;
xos_access_barrier_init_sync_bb0_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb1_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb2_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb3_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb4_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb5_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w0_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w1_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w2_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w3_el1(WRITE_ACCESS, &reg);
}
/* sccr */
ACCESS_REG_FUNC(sccr_ctrl_el1, IMP_SCCR_CTRL_EL1);
ACCESS_REG_FUNC(sccr_assign_el1, IMP_SCCR_ASSIGN_EL1);
ACCESS_REG_FUNC(sccr_set0_l2_el1, IMP_SCCR_SET0_L2_EL1);
ACCESS_REG_FUNC(sccr_l1_el0, IMP_SCCR_L1_EL0);
void scdrv_registers_init(void)
{
uint64_t reg = 0;
reg = IMP_SCCR_CTRL_EL1_EL1AE_MASK;
xos_access_sccr_ctrl_el1(WRITE_ACCESS, &reg);
reg = 0;
xos_access_sccr_assign_el1(WRITE_ACCESS, &reg);
xos_access_sccr_l1_el0(WRITE_ACCESS, &reg);
reg = (14UL << IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT);
xos_access_sccr_set0_l2_el1(WRITE_ACCESS, &reg);
}

View File

@ -0,0 +1,19 @@
/* arch-bitops.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_COMMON_BITOPS_H
#define __HEADER_ARM64_COMMON_BITOPS_H
#ifndef INCLUDE_BITOPS_H
# error only <bitops.h> can be included directly
#endif
#ifndef __ASSEMBLY__
#include "bitops-fls.h"
#include "bitops-__ffs.h"
#include "bitops-ffz.h"
#include "bitops-set_bit.h"
#include "bitops-clear_bit.h"
#endif /*__ASSEMBLY__*/
#endif /* !__HEADER_ARM64_COMMON_BITOPS_H */

View File

@ -0,0 +1,146 @@
/* arch-futex.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_FUTEX_H
#define __HEADER_ARM64_COMMON_ARCH_FUTEX_H
/*
* @ref.impl
* linux-linaro/arch/arm64/include/asm/futex.h:__futex_atomic_op
*/
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
asm volatile( \
"1: ldxr %w1, %2\n" \
insn "\n" \
"2: stlxr %w3, %w0, %2\n" \
" cbnz %w3, 1b\n" \
" dmb ish\n" \
"3:\n" \
" .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"4: mov %w0, %w5\n" \
" b 3b\n" \
" .popsection\n" \
" .pushsection __ex_table,\"a\"\n" \
" .align 3\n" \
" .quad 1b, 4b, 2b, 4b\n" \
" .popsection\n" \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
: "r" (oparg), "Ir" (-EFAULT) \
: "memory")
/*
* @ref.impl
* linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_op_inuser
*/
static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = encoded_op & 0xfff;
int oldval = 0, ret, tmp;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
#ifdef __UACCESS__
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
#endif
// pagefault_disable(); /* implies preempt_disable() */
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("mov %w0, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ADD:
__futex_atomic_op("add %w0, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_OR:
__futex_atomic_op("orr %w0, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ANDN:
__futex_atomic_op("and %w0, %w1, %w4",
ret, oldval, uaddr, tmp, ~oparg);
break;
case FUTEX_OP_XOR:
__futex_atomic_op("eor %w0, %w1, %w4",
ret, oldval, uaddr, tmp, oparg);
break;
default:
ret = -ENOSYS;
}
// pagefault_enable(); /* subsumes preempt_enable() */
if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
default: ret = -ENOSYS;
}
}
return ret;
}
/*
* @ref.impl
* linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_cmpxchg_inatomic
* mckernel/kernel/include/futex.h:futex_atomic_cmpxchg_inatomic (x86 depend)
*/
static inline int
futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
{
int ret = 0;
int val, tmp;
if(uaddr == NULL) {
return -EFAULT;
}
#ifdef __UACCESS__
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) {
return -EFAULT;
}
#endif
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
"1: ldxr %w1, %2\n"
" sub %w3, %w1, %w4\n"
" cbnz %w3, 3f\n"
"2: stlxr %w3, %w5, %2\n"
" cbnz %w3, 1b\n"
" dmb ish\n"
"3:\n"
" .pushsection .fixup,\"ax\"\n"
"4: mov %w0, %w6\n"
" b 3b\n"
" .popsection\n"
" .pushsection __ex_table,\"a\"\n"
" .align 3\n"
" .quad 1b, 4b, 2b, 4b\n"
" .popsection\n"
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
: "memory");
return ret;
}
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
*dest = *(volatile uint32_t *)from;
return 0;
}
#endif /* !__HEADER_ARM64_COMMON_ARCH_FUTEX_H */

View File

@ -0,0 +1,760 @@
/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_LOCK_H
#define __HEADER_ARM64_COMMON_ARCH_LOCK_H
#define IHK_STATIC_SPINLOCK_FUNCS
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK)
int __kprintf(const char *format, ...);
#endif
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::TICKET_SHIFT */
#define TICKET_SHIFT 16
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::arch_spinlock_t */
typedef struct {
#ifdef __AARCH64EB__
uint16_t next;
uint16_t owner;
#else /* __AARCH64EB__ */
uint16_t owner;
uint16_t next;
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::__ARCH_SPIN_LOCK_UNLOCKED */
#define SPIN_LOCK_UNLOCKED { 0, 0 }
/* @ref.impl arch/arm64/include/asm/barrier.h::__nops */
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
/* @ref.impl ./arch/arm64/include/asm/lse.h::ARM64_LSE_ATOMIC_INSN */
/* else defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
/* initialized spinlock struct */
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = (ihk_spinlock_t)SPIN_LOCK_UNLOCKED;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { \
int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", \
ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", \
ihk_mc_get_processor_id()); \
rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_trylock */
/* spinlock trylock */
static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
unsigned int tmp;
ihk_spinlock_t lockval;
int success;
preempt_disable();
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldaxr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 2f\n"
" add %w0, %w0, %3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
"2:",
/* LSE atomics */
" ldr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 1f\n"
" add %w1, %w0, %3\n"
" casa %w0, %w1, %2\n"
" sub %w1, %w1, %3\n"
" eor %w1, %w1, %w0\n"
"1:")
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
: "I" (1 << TICKET_SHIFT)
: "memory");
success = !tmp;
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ \
unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", \
ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", \
ihk_mc_get_processor_id()); \
rc; \
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
/* spinlock trylock & interrupt disable & PSTATE.DAIF save */
static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock,
int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_lock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */
/* spinlock lock */
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
unsigned int tmp;
ihk_spinlock_t lockval, newval;
preempt_disable();
asm volatile(
/* Atomically increment the next ticket. */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %3\n"
"1: ldaxr %w0, %3\n"
" add %w1, %w0, %w5\n"
" stxr %w2, %w1, %3\n"
" cbnz %w2, 1b\n",
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
__nops(3)
)
/* Did we get the lock? */
" eor %w1, %w0, %w0, ror #16\n"
" cbz %w1, 3f\n"
/*
* No: spin on the owner. Send a local event to avoid missing an
* unlock before the exclusive load.
*/
" sevl\n"
"2: wfe\n"
" ldaxrh %w2, %4\n"
" eor %w1, %w2, %w0, lsr #16\n"
" cbnz %w1, 2b\n"
/* We got the lock. Critical section starts here. */
"3:"
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_lock(l);\
__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
/* spinlock lock & interrupt disable & PSTATE.DAIF save */
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
__ihk_mc_spinlock_lock_noirq(lock);
return flags;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */
/* spinlock unlock */
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
unsigned long tmp;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" ldrh %w1, %0\n"
" add %w1, %w1, #1\n"
" stlrh %w1, %0",
/* LSE atomics */
" mov %w1, #1\n"
" staddlh %w1, %0\n"
__nops(1))
: "=Q" (lock->owner), "=&r" (tmp)
:
: "memory");
preempt_enable();
}
/* spinlock unlock & restore PSTATE.DAIF */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock(l, f) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock((l), (f)); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
#endif
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
{
__ihk_mc_spinlock_unlock_noirq(lock);
cpu_restore_interrupt(flags);
}
#define SPINLOCK_IN_MCS_RWLOCK
// reader/writer lock
typedef struct mcs_rwlock_node {
ihk_atomic_t count; // num of readers (use only common reader)
char type; // lock type
#define MCS_RWLOCK_TYPE_COMMON_READER 0
#define MCS_RWLOCK_TYPE_READER 1
#define MCS_RWLOCK_TYPE_WRITER 2
char locked; // lock
#define MCS_RWLOCK_LOCKED 1
#define MCS_RWLOCK_UNLOCKED 0
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_t;
#else
} mcs_rwlock_node_t;
#endif
typedef struct mcs_rwlock_node_irqsave {
#ifndef SPINLOCK_IN_MCS_RWLOCK
struct mcs_rwlock_node node;
#endif
unsigned long irqsave;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_irqsave_t;
#else
} mcs_rwlock_node_irqsave_t;
#endif
typedef struct mcs_rwlock_lock {
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_spinlock_t slock;
#else
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
#endif
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_lock_t;
#else
} mcs_rwlock_lock_t;
#endif
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_init(&lock->slock);
#else
ihk_atomic_set(&lock->reader.count, 0);
lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER;
lock->node = NULL;
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
#endif
static void
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_lock_noirq(&lock->slock);
#else
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_WRITER;
node->next = NULL;
pred = xchg8(&(lock->node), node);
if (pred) {
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
#endif
}
#ifndef SPINLOCK_IN_MCS_RWLOCK
static void
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
{
struct mcs_rwlock_node *p;
struct mcs_rwlock_node *f = NULL;
struct mcs_rwlock_node *n;
int breakf = 0;
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
for(p = &lock->reader; p->next; p = n){
n = p->next;
if(p->next->type == MCS_RWLOCK_TYPE_READER){
p->next = n->next;
if(lock->node == n){
struct mcs_rwlock_node *old;
old = atomic_cmpxchg8(&(lock->node), n, p);
if(old != n){ // couldn't change
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
else{
breakf = 1;
}
}
else if(p->next == NULL){
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
if(f){
ihk_atomic_inc(&lock->reader.count);
n->locked = MCS_RWLOCK_UNLOCKED;
}
else
f = n;
n = p;
if(breakf)
break;
}
if(n->next == NULL && lock->node != n){
while (n->next == NULL && lock->node != n) {
cpu_pause();
}
}
}
f->locked = MCS_RWLOCK_UNLOCKED;
}
#endif
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
#endif
static void
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock_noirq(&lock->slock);
#else
if (node->next == NULL) {
struct mcs_rwlock_node *old = atomic_cmpxchg8(&(lock->node), node, 0);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
}
if(node->next->type == MCS_RWLOCK_TYPE_READER){
lock->reader.next = node->next;
mcs_rwlock_unlock_readers(lock);
}
else{
node->next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
#endif
static inline unsigned int
atomic_inc_ifnot0(ihk_atomic_t *v)
{
unsigned int *p = (unsigned int *)(&(v)->counter);
unsigned int old;
unsigned int new;
unsigned int val;
do{
if(!(old = *p))
break;
new = old + 1;
val = atomic_cmpxchg4(p, old, new);
}while(val != old);
return old;
}
static void
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_lock_noirq(&lock->slock);
#else
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_READER;
node->next = NULL;
node->dmy1 = ihk_mc_get_processor_id();
pred = xchg8(&(lock->node), node);
if (pred) {
if(pred == &lock->reader){
if(atomic_inc_ifnot0(&pred->count)){
struct mcs_rwlock_node *old;
old = atomic_cmpxchg8(&(lock->node), node, pred);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
node->locked = MCS_RWLOCK_LOCKED;
lock->reader.next = node;
mcs_rwlock_unlock_readers(lock);
ihk_atomic_dec(&pred->count);
goto out;
}
}
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
else {
lock->reader.next = node;
mcs_rwlock_unlock_readers(lock);
}
out:
return;
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
#endif
static void
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock_noirq(&lock->slock);
#else
if(ihk_atomic_dec_return(&lock->reader.count))
goto out;
if (lock->reader.next == NULL) {
struct mcs_rwlock_node *old;
old = atomic_cmpxchg8(&(lock->node), &(lock->reader), 0);
if (old == &lock->reader) {
goto out;
}
while (lock->reader.next == NULL) {
cpu_pause();
}
}
if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){
mcs_rwlock_unlock_readers(lock);
}
else{
lock->reader.next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
#endif
static void
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
node->irqsave = ihk_mc_spinlock_lock(&lock->slock);
#else
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_writer_lock_noirq(lock, &node->node);
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
#endif
static void
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock(&lock->slock, node->irqsave);
#else
__mcs_rwlock_writer_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
#endif
static void
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
node->irqsave = ihk_mc_spinlock_lock(&lock->slock);
#else
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_reader_lock_noirq(lock, &node->node);
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
#endif
static void
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock(&lock->slock, node->irqsave);
#else
__mcs_rwlock_reader_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
#endif
}
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
static inline int irqflags_can_interrupt(unsigned long flags)
{
return (flags == ICC_PMR_EL1_UNMASKED);
}
#else /* CONFIG_HAS_NMI */
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !(flags & 0x2);
}
#endif /* CONFIG_HAS_NMI */
struct ihk_rwlock {
unsigned int lock;
};
static inline void ihk_mc_rwlock_init(struct ihk_rwlock *rw)
{
rw->lock = 0;
}
static inline void ihk_mc_read_lock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1b\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 2b\n"
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
}
static inline int ihk_mc_read_trylock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2 = 1;
asm volatile(
" ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1f\n"
" stxr %w1, %w0, %2\n"
"1:\n"
: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
return !tmp2;
}
static inline void ihk_mc_read_unlock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2;
asm volatile(
"1: ldxr %w0, %2\n"
" sub %w0, %w0, #1\n"
" stlxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
}
static inline void ihk_mc_write_lock(struct ihk_rwlock *rw)
{
unsigned int tmp;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %1\n"
" cbnz %w0, 1b\n"
" stxr %w0, %w2, %1\n"
" cbnz %w0, 2b\n"
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "cc", "memory");
}
static inline int ihk_mc_write_trylock(struct ihk_rwlock *rw)
{
unsigned int tmp;
asm volatile(
" ldaxr %w0, %1\n"
" cbnz %w0, 1f\n"
" stxr %w0, %w2, %1\n"
"1:\n"
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "cc", "memory");
return !tmp;
}
static inline void ihk_mc_write_unlock(struct ihk_rwlock *rw)
{
asm volatile(
" stlr %w1, %0\n"
: "=Q" (rw->lock) : "r" (0) : "memory");
}
#define ihk_mc_read_can_lock(rw) ((rw)->lock < 0x80000000)
#define ihk_mc_write_can_lock(rw) ((rw)->lock == 0)
#endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */

View File

@ -0,0 +1,866 @@
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H
#define __HEADER_ARM64_COMMON_ARCH_MEMORY_H
#include <const.h>
#include <errno.h>
#ifndef __ASSEMBLY__
#include <list.h>
#include <page.h>
void panic(const char *);
#endif /*__ASSEMBLY__*/
#define _SZ4KB (1UL<<12)
#define _SZ16KB (1UL<<14)
#define _SZ64KB (1UL<<16)
#ifdef CONFIG_ARM64_64K_PAGES
# define GRANULE_SIZE _SZ64KB
# define BLOCK_SHIFT PAGE_SHIFT
# define BLOCK_SIZE PAGE_SIZE
# define TABLE_SHIFT PMD_SHIFT
#else
# define GRANULE_SIZE _SZ4KB
# define BLOCK_SHIFT SECTION_SHIFT
# define BLOCK_SIZE SECTION_SIZE
# define TABLE_SHIFT PUD_SHIFT
#endif
#define VA_BITS CONFIG_ARM64_VA_BITS
/*
* Address define
*/
/* early alloc area address */
/* START:_end, SIZE:512 pages */
#define MAP_EARLY_ALLOC_SHIFT 5
#define MAP_EARLY_ALLOC_SIZE (UL(1) << (PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT))
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
extern char _end[];
# define MAP_EARLY_ALLOC (ALIGN_UP((unsigned long)_end, BLOCK_SIZE))
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
#endif /* !__ASSEMBLY__ */
/* bootparam area address */
/* START:early alloc area end, SIZE:2MiB */
#define MAP_BOOT_PARAM_SHIFT 21
#define MAP_BOOT_PARAM_SIZE (UL(1) << MAP_BOOT_PARAM_SHIFT)
#ifndef __ASSEMBLY__
# define MAP_BOOT_PARAM (ALIGN_UP(MAP_EARLY_ALLOC_END, BLOCK_SIZE))
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE)
#endif /* !__ASSEMBLY__ */
/*
* MAP_KERNEL_START is HOST MODULES_END - 8MiB.
* It's defined by cmake.
*/
#if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB) /* ARM64_MEMORY_LAYOUT=1 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000000400000000)
# define TASK_UNMAPPED_BASE UL(0x0000000800000000)
# define USER_END UL(0x0000002000000000)
# define MAP_VMAP_START UL(0xffffffbdc0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffffffbffbdfd000)
# define MAP_ST_START UL(0xffffffc000000000)
#
#elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB) /* ARM64_MEMORY_LAYOUT=3 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000002000000000)
# define TASK_UNMAPPED_BASE UL(0x0000004000000000)
# define USER_END UL(0x0000010000000000)
# define MAP_VMAP_START UL(0xfffffdfee0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xfffffdfffbdd0000)
# define MAP_ST_START UL(0xfffffe0000000000)
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB) /* ARM64_MEMORY_LAYOUT=2 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
# define MAP_VMAP_START UL(0xffff7bffc0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdfd000)
# define MAP_ST_START UL(0xffff800000000000)
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB) /* ARM64_MEMORY_LAYOUT=4 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
# define MAP_VMAP_START UL(0xffff780000000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)
#
#else
# error address space is not defined.
#endif
#define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START)
#define STACK_TOP(region) ((region)->user_end)
/*
* pagetable define
*/
#if GRANULE_SIZE == _SZ4KB
# define __PTL4_SHIFT 39
# define __PTL3_SHIFT 30
# define __PTL2_SHIFT 21
# define __PTL1_SHIFT 12
# define PTL4_INDEX_MASK ((UL(1) << 9) - 1)
# define PTL3_INDEX_MASK PTL4_INDEX_MASK
# define PTL2_INDEX_MASK PTL3_INDEX_MASK
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 4)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 4)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 4)
#elif GRANULE_SIZE == _SZ16KB
# define __PTL4_SHIFT 47
# define __PTL3_SHIFT 36
# define __PTL2_SHIFT 25
# define __PTL1_SHIFT 14
# define PTL4_INDEX_MASK ((UL(1) << 1) - 1)
# define PTL3_INDEX_MASK ((UL(1) << 11) - 1)
# define PTL2_INDEX_MASK PTL3_INDEX_MASK
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 0)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 7)
#elif GRANULE_SIZE == _SZ64KB
# define __PTL4_SHIFT 55
# define __PTL3_SHIFT 42
# define __PTL2_SHIFT 29
# define __PTL1_SHIFT 16
# define PTL4_INDEX_MASK 0
# define PTL3_INDEX_MASK ((UL(1) << 6) - 1)
# define PTL2_INDEX_MASK ((UL(1) << 13) - 1)
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 0)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 5)
#else
# error granule size error.
#endif
#ifndef __ASSEMBLY__
extern int first_level_block_support;
#endif /* __ASSEMBLY__ */
# define __PTL4_SIZE (UL(1) << __PTL4_SHIFT)
# define __PTL3_SIZE (UL(1) << __PTL3_SHIFT)
# define __PTL2_SIZE (UL(1) << __PTL2_SHIFT)
# define __PTL1_SIZE (UL(1) << __PTL1_SHIFT)
# define __PTL4_MASK (~(__PTL4_SIZE - 1))
# define __PTL3_MASK (~(__PTL3_SIZE - 1))
# define __PTL2_MASK (~(__PTL2_SIZE - 1))
# define __PTL1_MASK (~(__PTL1_SIZE - 1))
# define __PTL4_CONT_SIZE (UL(1) << __PTL4_CONT_SHIFT)
# define __PTL3_CONT_SIZE (UL(1) << __PTL3_CONT_SHIFT)
# define __PTL2_CONT_SIZE (UL(1) << __PTL2_CONT_SHIFT)
# define __PTL1_CONT_SIZE (UL(1) << __PTL1_CONT_SHIFT)
# define __PTL4_CONT_MASK (~(__PTL4_CONT_SIZE - 1))
# define __PTL3_CONT_MASK (~(__PTL3_CONT_SIZE - 1))
# define __PTL2_CONT_MASK (~(__PTL2_CONT_SIZE - 1))
# define __PTL1_CONT_MASK (~(__PTL1_CONT_SIZE - 1))
# define __PTL4_CONT_COUNT (UL(1) << (__PTL4_CONT_SHIFT - __PTL4_SHIFT))
# define __PTL3_CONT_COUNT (UL(1) << (__PTL3_CONT_SHIFT - __PTL3_SHIFT))
# define __PTL2_CONT_COUNT (UL(1) << (__PTL2_CONT_SHIFT - __PTL2_SHIFT))
# define __PTL1_CONT_COUNT (UL(1) << (__PTL1_CONT_SHIFT - __PTL1_SHIFT))
/* calculate entries */
#if (CONFIG_ARM64_PGTABLE_LEVELS > 3) && (VA_BITS > __PTL4_SHIFT)
# define __PTL4_ENTRIES (UL(1) << (VA_BITS - __PTL4_SHIFT))
# define __PTL3_ENTRIES (UL(1) << (__PTL1_SHIFT - 3))
# define __PTL2_ENTRIES (UL(1) << (__PTL1_SHIFT - 3))
# define __PTL1_ENTRIES (UL(1) << (__PTL1_SHIFT - 3))
#elif (CONFIG_ARM64_PGTABLE_LEVELS > 2) && (VA_BITS > __PTL3_SHIFT)
# define __PTL4_ENTRIES 1
# define __PTL3_ENTRIES (UL(1) << (VA_BITS - __PTL3_SHIFT))
# define __PTL2_ENTRIES (UL(1) << (__PTL1_SHIFT - 3))
# define __PTL1_ENTRIES (UL(1) << (__PTL1_SHIFT - 3))
#elif (CONFIG_ARM64_PGTABLE_LEVELS > 1) && (VA_BITS > __PTL2_SHIFT)
# define __PTL4_ENTRIES 1
# define __PTL3_ENTRIES 1
# define __PTL2_ENTRIES (UL(1) << (VA_BITS - __PTL2_SHIFT))
# define __PTL1_ENTRIES (UL(1) << (__PTL1_SHIFT - 3))
#elif VA_BITS > __PTL1_SHIFT
# define __PTL4_ENTRIES 1
# define __PTL3_ENTRIES 1
# define __PTL2_ENTRIES 1
# define __PTL1_ENTRIES (UL(1) << (VA_BITS - __PTL1_SHIFT))
#else
# define __PTL4_ENTRIES 1
# define __PTL3_ENTRIES 1
# define __PTL2_ENTRIES 1
# define __PTL1_ENTRIES 1
#endif
#ifndef __ASSEMBLY__
static const unsigned int PTL4_SHIFT = __PTL4_SHIFT;
static const unsigned int PTL3_SHIFT = __PTL3_SHIFT;
static const unsigned int PTL2_SHIFT = __PTL2_SHIFT;
static const unsigned int PTL1_SHIFT = __PTL1_SHIFT;
static const unsigned long PTL4_SIZE = __PTL4_SIZE;
static const unsigned long PTL3_SIZE = __PTL3_SIZE;
static const unsigned long PTL2_SIZE = __PTL2_SIZE;
static const unsigned long PTL1_SIZE = __PTL1_SIZE;
static const unsigned long PTL4_MASK = __PTL4_MASK;
static const unsigned long PTL3_MASK = __PTL3_MASK;
static const unsigned long PTL2_MASK = __PTL2_MASK;
static const unsigned long PTL1_MASK = __PTL1_MASK;
static const unsigned int PTL4_ENTRIES = __PTL4_ENTRIES;
static const unsigned int PTL3_ENTRIES = __PTL3_ENTRIES;
static const unsigned int PTL2_ENTRIES = __PTL2_ENTRIES;
static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
static const unsigned int PTL4_CONT_SHIFT = __PTL4_CONT_SHIFT;
static const unsigned int PTL3_CONT_SHIFT = __PTL3_CONT_SHIFT;
static const unsigned int PTL2_CONT_SHIFT = __PTL2_CONT_SHIFT;
static const unsigned int PTL1_CONT_SHIFT = __PTL1_CONT_SHIFT;
static const unsigned long PTL4_CONT_SIZE = __PTL4_CONT_SIZE;
static const unsigned long PTL3_CONT_SIZE = __PTL3_CONT_SIZE;
static const unsigned long PTL2_CONT_SIZE = __PTL2_CONT_SIZE;
static const unsigned long PTL1_CONT_SIZE = __PTL1_CONT_SIZE;
static const unsigned long PTL4_CONT_MASK = __PTL4_CONT_MASK;
static const unsigned long PTL3_CONT_MASK = __PTL3_CONT_MASK;
static const unsigned long PTL2_CONT_MASK = __PTL2_CONT_MASK;
static const unsigned long PTL1_CONT_MASK = __PTL1_CONT_MASK;
static const unsigned int PTL4_CONT_COUNT = __PTL4_CONT_COUNT;
static const unsigned int PTL3_CONT_COUNT = __PTL3_CONT_COUNT;
static const unsigned int PTL2_CONT_COUNT = __PTL2_CONT_COUNT;
static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
#else
# define PTL4_SHIFT __PTL4_SHIFT
# define PTL3_SHIFT __PTL3_SHIFT
# define PTL2_SHIFT __PTL2_SHIFT
# define PTL1_SHIFT __PTL1_SHIFT
# define PTL4_SIZE __PTL4_SIZE
# define PTL3_SIZE __PTL3_SIZE
# define PTL2_SIZE __PTL2_SIZE
# define PTL1_SIZE __PTL1_SIZE
# define PTL4_MASK __PTL4_MASK
# define PTL3_MASK __PTL3_MASK
# define PTL2_MASK __PTL2_MASK
# define PTL1_MASK __PTL1_MASK
# define PTL4_ENTRIES __PTL4_ENTRIES
# define PTL3_ENTRIES __PTL3_ENTRIES
# define PTL2_ENTRIES __PTL2_ENTRIES
# define PTL1_ENTRIES __PTL1_ENTRIES
# define PTL4_CONT_SHIFT __PTL4_CONT_SHIFT
# define PTL3_CONT_SHIFT __PTL3_CONT_SHIFT
# define PTL2_CONT_SHIFT __PTL2_CONT_SHIFT
# define PTL1_CONT_SHIFT __PTL1_CONT_SHIFT
# define PTL4_CONT_SIZE __PTL4_CONT_SIZE
# define PTL3_CONT_SIZE __PTL3_CONT_SIZE
# define PTL2_CONT_SIZE __PTL2_CONT_SIZE
# define PTL1_CONT_SIZE __PTL1_CONT_SIZE
# define PTL4_CONT_MASK __PTL4_CONT_MASK
# define PTL3_CONT_MASK __PTL3_CONT_MASK
# define PTL2_CONT_MASK __PTL2_CONT_MASK
# define PTL1_CONT_MASK __PTL1_CONT_MASK
# define PTL4_CONT_COUNT __PTL4_CONT_COUNT
# define PTL3_CONT_COUNT __PTL3_CONT_COUNT
# define PTL2_CONT_COUNT __PTL2_CONT_COUNT
# define PTL1_CONT_COUNT __PTL1_CONT_COUNT
#endif/*__ASSEMBLY__*/
#define __page_size(pgshift) (UL(1) << (pgshift))
#define __page_mask(pgsize) (~((pgsize) - 1))
#define __page_offset(addr, size) ((unsigned long)(addr) & ((size) - 1))
#define __page_align(addr, size) ((unsigned long)(addr) & ~((size) - 1))
#define __page_align_up(addr, size) __page_align((unsigned long)(addr) + (size) - 1, size)
/*
* nornal page
*/
#define PAGE_SHIFT __PTL1_SHIFT
#define PAGE_SIZE __page_size(PAGE_SHIFT)
#define PAGE_MASK __page_mask(PAGE_SIZE)
#define PAGE_P2ALIGN 0
#define page_offset(addr) __page_offset(addr, PAGE_SIZE)
#define page_align(addr) __page_align(addr, PAGE_SIZE)
#define page_align_up(addr) __page_align_up(addr, PAGE_SIZE)
/*
* large page
*/
#define LARGE_PAGE_SHIFT __PTL2_SHIFT
#define LARGE_PAGE_SIZE __page_size(LARGE_PAGE_SHIFT)
#define LARGE_PAGE_MASK __page_mask(LARGE_PAGE_SIZE)
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
#define large_page_offset(addr) __page_offset(addr, LARGE_PAGE_SIZE)
#define large_page_align(addr) __page_align(addr, LARGE_PAGE_SIZE)
#define large_page_align_up(addr) __page_align_up(addr, LARGE_PAGE_SIZE)
/*
*
*/
#define TTBR_ASID_SHIFT 48
#define TTBR_ASID_MASK (0xFFFFUL << TTBR_ASID_SHIFT)
#define TTBR_BADDR_MASK (~TTBR_ASID_MASK)
#include "pgtable-hwdef.h"
#define KERNEL_PHYS_OFFSET
#define PT_PHYSMASK PHYS_MASK
/* We allow user programs to access all the memory (D_Block, D_Page) */
#define PFL_KERN_BLK_ATTR PROT_SECT_NORMAL_EXEC
#define PFL_KERN_PAGE_ATTR PAGE_KERNEL_EXEC
/* for the page table entry that points another page table (D_Table) */
#define PFL_PDIR_TBL_ATTR PMD_TYPE_TABLE
#ifdef CONFIG_ARM64_64K_PAGES
# define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS)
#else
# define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1)
#endif
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (3 * PAGE_SIZE)
/* [Page level Write Throgh] ページキャッシュ方式 0:ライトバック 1:ライトスルー */
#define PFL1_PWT 0 //< DEBUG_ARCH_DEP, devobj.cの直接参照を関数化 (is_pte_pwd)
/* [Page level Cache Disable] ページキャッシュ 0:有効 1:無効 */
#define PFL1_PCD 0 //< DEBUG_ARCH_DEP, devobj.cの直接参照を関数化 (is_pte_pcd)
#define PTE_NULL (0)
#define PTE_FILEOFF PTE_SPECIAL
#ifdef CONFIG_ARM64_64K_PAGES
# define USER_STACK_PREPAGE_SIZE PAGE_SIZE
# define USER_STACK_PAGE_MASK PAGE_MASK
# define USER_STACK_PAGE_P2ALIGN PAGE_P2ALIGN
# define USER_STACK_PAGE_SHIFT PAGE_SHIFT
#else
# define USER_STACK_PREPAGE_SIZE LARGE_PAGE_SIZE
# define USER_STACK_PAGE_MASK LARGE_PAGE_MASK
# define USER_STACK_PAGE_P2ALIGN LARGE_PAGE_P2ALIGN
# define USER_STACK_PAGE_SHIFT LARGE_PAGE_SHIFT
#endif
#define PT_ENTRIES (PAGE_SIZE >> 3)
#ifndef __ASSEMBLY__
#include <ihk/types.h>
typedef unsigned long pte_t;
/*
* pagemap kernel ABI bits
*/
#define PM_ENTRY_BYTES sizeof(uint64_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
/* ページが物理メモリにロードされているか */
PTATTR_ACTIVE = PTE_VALID,
/* Read/Writeフラグ */
PTATTR_WRITABLE = PTE_RDONLY, //共通定義と意味が反転するので注意
/* ユーザ/特権フラグ */
PTATTR_USER = PTE_USER | PTE_NG,
/* ページの変更を示す */
PTATTR_DIRTY = PTE_DIRTY,
/* ラージページを示す */
PTATTR_LARGEPAGE = PMD_TABLE_BIT, //共通定義と意味が反転するので注意
/* remap_file_page フラグ */
PTATTR_FILEOFF = PTE_FILEOFF,
/* 実行不可フラグ */
PTATTR_NO_EXECUTE = PTE_UXN,
/* キャッシュ無し */
PTATTR_UNCACHABLE = PTE_ATTRINDX(1),
/* ユーザ空間向けを示す */
PTATTR_FOR_USER = UL(1) << (PHYS_MASK_SHIFT - 1),
/* WriteCombine */
PTATTR_WRITE_COMBINED = PTE_ATTRINDX(2),
/* converted flag */
ARCH_PTATTR_FLIPPED = PTE_PROT_NONE,
};
extern enum ihk_mc_pt_attribute attr_mask;
static inline int pfn_is_write_combined(uintptr_t pfn)
{
return ((pfn & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC));
}
//共通部と意味がするビット定義
#define attr_flip_bits (PTATTR_WRITABLE | PTATTR_LARGEPAGE)
static inline int pgsize_to_tbllv(size_t pgsize);
static inline int pte_is_type_page(const pte_t *ptep, size_t pgsize)
{
int ret = 0; //default D_TABLE
int level = pgsize_to_tbllv(pgsize);
switch (level) {
case 4:
case 3:
case 2:
// check D_BLOCK
ret = ((*ptep & PMD_TYPE_MASK) == PMD_TYPE_SECT);
break;
case 1:
// check D_PAGE
ret = ((*ptep & PTE_TYPE_MASK) == PTE_TYPE_PAGE);
break;
}
return ret;
}
static inline int pte_is_null(pte_t *ptep)
{
return (*ptep == PTE_NULL);
}
static inline int pte_is_present(pte_t *ptep)
{
return !!(*ptep & PMD_SECT_VALID);
}
static inline int pte_is_writable(pte_t *ptep)
{
extern int kprintf(const char *format, ...);
kprintf("ERROR: %s is not implemented. \n", __func__);
return 0;
}
static inline int pte_is_dirty(pte_t *ptep, size_t pgsize)
{
int ret = 0;
int do_check = pte_is_type_page(ptep, pgsize);
if (do_check) {
ret = !!(*ptep & PTE_DIRTY);
}
return ret;
}
static inline int pte_is_fileoff(pte_t *ptep, size_t pgsize)
{
int ret = 0;
int do_check = pte_is_type_page(ptep, pgsize);
if (do_check) {
ret = !!(*ptep & PTE_FILEOFF);
}
return ret;
}
static inline void pte_update_phys(pte_t *ptep, unsigned long phys)
{
*ptep = (*ptep & ~PT_PHYSMASK) | (phys & PT_PHYSMASK);
}
static inline uintptr_t pte_get_phys(pte_t *ptep)
{
return (uintptr_t)(*ptep & PT_PHYSMASK);
}
static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
{
return (off_t)(*ptep & PHYS_MASK);
}
static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize)
{
enum ihk_mc_pt_attribute attr;
attr = *ptep & attr_mask;
attr ^= attr_flip_bits;
if ((*ptep & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_DEVICE_nGnRE)) {
attr |= PTATTR_UNCACHABLE;
} else if ((*ptep & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC)) {
attr |= PTATTR_WRITE_COMBINED;
}
if (((pgsize == PTL2_SIZE) || (pgsize == PTL3_SIZE))
&& ((*ptep & PMD_TYPE_MASK) == PMD_TYPE_SECT)) {
attr |= PTATTR_LARGEPAGE;
}
return attr;
}
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
*ptep = PTE_NULL;
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
if (((PTL4_SIZE == pgsize || PTL4_CONT_SIZE == pgsize)
&& CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
((PTL3_SIZE == pgsize || PTL3_CONT_SIZE == pgsize)
&& CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize || PTL2_CONT_SIZE == pgsize) ||
(PTL1_SIZE == pgsize || PTL1_CONT_SIZE == pgsize)) {
*ptep = PTE_FILEOFF | off | PTE_TYPE_PAGE;
}
}
#if 0 /* XXX: workaround. cannot use panic() here */
static inline void pte_xchg(pte_t *ptep, pte_t *valp)
{
*valp = xchg(ptep, *valp);
}
#else
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
#endif
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
{
int do_clear = pte_is_type_page(ptep, pgsize);
if (do_clear) {
*ptep = *ptep & ~PTE_DIRTY;
}
}
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
{
int do_set = pte_is_type_page(ptep, pgsize);
if (do_set) {
*ptep |= PTE_DIRTY;
}
}
static inline int pte_is_contiguous(const pte_t *ptep)
{
return !!(*ptep & PTE_CONT);
}
static inline int pgsize_is_contiguous(size_t pgsize)
{
int ret = 0;
if ((pgsize == PTL4_CONT_SIZE && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(pgsize == PTL3_CONT_SIZE && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(pgsize == PTL2_CONT_SIZE) ||
(pgsize == PTL1_CONT_SIZE)) {
ret = 1;
}
return ret;
}
static inline int pgsize_to_tbllv(size_t pgsize)
{
int level = -EINVAL;
if ((pgsize == PTL4_CONT_SIZE || pgsize == PTL4_SIZE)
&& (CONFIG_ARM64_PGTABLE_LEVELS > 3)) {
level = 4;
} else if ((pgsize == PTL3_CONT_SIZE || pgsize == PTL3_SIZE)
&& (CONFIG_ARM64_PGTABLE_LEVELS > 2)) {
level = 3;
} else if (pgsize == PTL2_CONT_SIZE || pgsize == PTL2_SIZE) {
level = 2;
} else if (pgsize == PTL1_CONT_SIZE || pgsize == PTL1_SIZE) {
level = 1;
}
return level;
}
static inline int pgsize_to_pgshift(size_t pgsize)
{
/* We need to use if instead of switch because
* sometimes PTLX_CONT_SIZE == PTLX_SIZE
*/
if (pgsize == PTL4_CONT_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
return PTL4_CONT_SHIFT;
}
} else if (pgsize == PTL4_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
return PTL4_SHIFT;
}
} else if (pgsize == PTL3_CONT_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
return PTL3_CONT_SHIFT;
}
} else if (pgsize == PTL3_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
return PTL3_SHIFT;
}
} else if (pgsize == PTL2_CONT_SIZE) {
return PTL2_CONT_SHIFT;
} else if (pgsize == PTL2_SIZE) {
return PTL2_SHIFT;
} else if (pgsize == PTL1_CONT_SIZE) {
return PTL1_CONT_SHIFT;
} else if (pgsize == PTL1_SIZE) {
return PTL1_SHIFT;
}
return -EINVAL;
}
static inline size_t tbllv_to_pgsize(int level)
{
size_t pgsize = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
pgsize = PTL4_SIZE;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
pgsize = PTL3_SIZE;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
pgsize = PTL2_SIZE;
break;
case 1:
pgsize = PTL1_SIZE;
break;
default:
panic("page table level is invalid.");
}
return pgsize;
}
static inline size_t tbllv_to_contpgsize(int level)
{
size_t pgsize = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
pgsize = PTL4_CONT_SIZE;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
pgsize = PTL3_CONT_SIZE;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
pgsize = PTL2_CONT_SIZE;
break;
case 1:
pgsize = PTL1_CONT_SIZE;
break;
default:
panic("page table level is invalid.");
}
return pgsize;
}
static inline int tbllv_to_contpgshift(int level)
{
int ret = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
ret = PTL4_CONT_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
ret = PTL3_CONT_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
ret = PTL2_CONT_SHIFT;
break;
case 1:
ret = PTL1_CONT_SHIFT;
break;
default:
panic("page table level is invalid.");
}
return ret;
}
static inline pte_t *get_contiguous_head(pte_t *__ptep, size_t __pgsize)
{
unsigned long align;
int shift = 0;
switch (pgsize_to_tbllv(__pgsize)) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
shift = PTL4_CONT_SHIFT - PTL4_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
shift = PTL3_CONT_SHIFT - PTL3_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
shift = PTL2_CONT_SHIFT - PTL2_SHIFT;
break;
case 1:
shift = PTL1_CONT_SHIFT - PTL1_SHIFT;
break;
default:
panic("page table level is invalid.");
}
align = sizeof(*__ptep) << shift;
return (pte_t *)__page_align(__ptep, align);
}
static inline pte_t *get_contiguous_tail(pte_t *__ptep, size_t __pgsize)
{
unsigned long align;
int shift = 0;
switch (pgsize_to_tbllv(__pgsize)) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
shift = PTL4_CONT_SHIFT - PTL4_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
shift = PTL3_CONT_SHIFT - PTL3_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
shift = PTL2_CONT_SHIFT - PTL2_SHIFT;
break;
case 1:
shift = PTL1_CONT_SHIFT - PTL1_SHIFT;
break;
default:
panic("page table level is invalid.");
}
align = sizeof(*__ptep) << shift;
return (pte_t *)__page_align_up(__ptep + 1, align) - 1;
}
static inline int split_contiguous_pages(pte_t *ptep, size_t pgsize)
{
int ret;
pte_t *head = get_contiguous_head(ptep, pgsize);
pte_t *tail = get_contiguous_tail(ptep, pgsize);
pte_t *ptr;
uintptr_t phys;
struct page *page;
phys = pte_get_phys(head);
page = phys_to_page(phys);
if (page && (page_is_in_memobj(page)
|| page_is_multi_mapped(page))) {
ret = -EINVAL;
goto out;
}
for (ptr = head; ptr <= tail; ptr++) {
*ptr &= ~PTE_CONT;
}
ret = 0;
out:
return ret;
}
static inline int page_is_contiguous_head(pte_t *ptep, size_t pgsize)
{
pte_t *ptr = get_contiguous_head(ptep, pgsize);
return (ptr == ptep);
}
static inline int page_is_contiguous_tail(pte_t *ptep, size_t pgsize)
{
pte_t *ptr = get_contiguous_tail(ptep, pgsize);
return (ptr == ptep);
}
/* Return true if PTE doesn't belong to a contiguous PTE group or PTE
* is the head of a contiguous PTE group
*/
static inline int pte_is_head(pte_t *ptep, pte_t *old, size_t cont_size)
{
if (!pte_is_contiguous(old))
return 1;
return page_is_contiguous_head(ptep, cont_size);
}
struct page_table;
void arch_adjust_allocate_page_size(struct page_table *pt,
uintptr_t fault_addr,
pte_t *ptep,
void **pgaddrp,
size_t *pgsizep);
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
struct page_table *get_init_page_table(void);
void *early_alloc_pages(int nr_pages);
void *get_last_early_heap(void);
void flush_tlb(void);
void flush_tlb_single(unsigned long addr);
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
void set_address_space_id(struct page_table *pt, int asid);
int get_address_space_id(const struct page_table *pt);
typedef pte_t translation_table_t;
void set_translation_table(struct page_table *pt, translation_table_t* tt);
translation_table_t* get_translation_table(const struct page_table *pt);
translation_table_t* get_translation_table_as_paddr(const struct page_table *pt);
extern unsigned long ap_trampoline;
//#define AP_TRAMPOLINE 0x10000
#define AP_TRAMPOLINE_SIZE 0x2000
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_ARCH_MEMORY_H */

View File

@ -0,0 +1,63 @@
/* arch-perfctr.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __ARCH_PERFCTR_H__
#define __ARCH_PERFCTR_H__
#include <ihk/types.h>
#include <ihk/cpu.h>
#include <bitops.h>
struct per_cpu_arm_pmu {
int num_events;
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
};
/* @ref.impl arch/arm64/include/asm/pmu.h */
struct arm_pmu {
struct ihk_mc_interrupt_handler* handler;
uint32_t (*read_counter)(int);
void (*write_counter)(int, uint32_t);
void (*reset)(void*);
int (*enable_pmu)(void);
void (*disable_pmu)(void);
int (*enable_counter)(unsigned long counter_mask);
int (*disable_counter)(unsigned long counter_mask);
int (*enable_intens)(unsigned long counter_mask);
int (*disable_intens)(unsigned long counter_mask);
int (*set_event_filter)(unsigned long*, int);
void (*write_evtype)(int, uint32_t);
int (*get_event_idx)(int num_events, unsigned long used_mask,
unsigned long config);
int (*map_event)(uint32_t, uint64_t);
int (*map_hw_event)(uint64_t config);
int (*map_cache_event)(uint64_t config);
int (*map_raw_event)(uint64_t config);
void (*enable_user_access_pmu_regs)(void);
void (*disable_user_access_pmu_regs)(void);
int (*counter_mask_valid)(unsigned long counter_mask);
struct per_cpu_arm_pmu *per_cpu;
};
static inline const struct arm_pmu* get_cpu_pmu(void)
{
extern struct arm_pmu cpu_pmu;
return &cpu_pmu;
}
static inline const struct per_cpu_arm_pmu *get_per_cpu_pmu(void)
{
const struct arm_pmu *cpu_pmu = get_cpu_pmu();
return &cpu_pmu->per_cpu[ihk_mc_get_processor_id()];
}
int arm64_init_perfctr(void);
void arm64_init_per_cpu_perfctr(void);
int arm64_enable_pmu(void);
void arm64_disable_pmu(void);
int armv8pmu_init(struct arm_pmu* cpu_pmu);
void armv8pmu_per_cpu_init(struct per_cpu_arm_pmu *per_cpu);
void arm64_enable_user_access_pmu_regs(void);
void arm64_disable_user_access_pmu_regs(void);
#endif

View File

@ -0,0 +1,13 @@
/* arch-string.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
#ifndef __HEADER_ARM64_COMMON_ARCH_STRING_H
#define __HEADER_ARM64_COMMON_ARCH_STRING_H
#define ARCH_FAST_MEMCPY
extern void *__inline_memcpy(void *to, const void *from, size_t t);
#define ARCH_FAST_MEMSET
extern void *__inline_memset(void *s, unsigned long c, size_t count);
#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */

View File

@ -0,0 +1,31 @@
/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_TIMER_H
#define __HEADER_ARM64_COMMON_ARCH_TIMER_H
#include <ihk/cpu.h>
/* @ref.impl include/clocksource/arm_arch_timer.h */
#define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */
#define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */
#define ARCH_TIMER_VIRT_EVT_EN (1 << 2)
#define ARCH_TIMER_EVT_TRIGGER_SHIFT (4)
#define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT)
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
/* @ref.impl linux4.10.16 */
/* include/clocksource/arm_arch_timer.h */
#define ARCH_TIMER_CTRL_ENABLE (1 << 0)
#define ARCH_TIMER_CTRL_IT_MASK (1 << 1)
#define ARCH_TIMER_CTRL_IT_STAT (1 << 2)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
ARCH_TIMER_REG_TVAL,
};
extern int get_timer_intrid(void);
extern void arch_timer_init(void);
extern struct ihk_mc_interrupt_handler *get_timer_handler(void);
#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */

View File

@ -0,0 +1,7 @@
/* auxvec.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_ARCH_AUXVEC_H
#define __HEADER_ARM64_ARCH_AUXVEC_H
#define AT_SYSINFO_EHDR 33
#endif /* __HEADER_ARM64_ARCH_AUXVEC_H */

View File

@ -0,0 +1,107 @@
/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __HEADER_ARM64_ARCH_CPU_H
#define __HEADER_ARM64_ARCH_CPU_H
#ifndef __ASSEMBLY__
#define sev() asm volatile("sev" : : : "memory")
#define wfe() asm volatile("wfe" : : : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
#define isb() asm volatile("isb" : : : "memory")
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#include <registers.h>
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
#define dma_rmb() dmb(oshld)
#define dma_wmb() dmb(oshst)
//#ifndef CONFIG_SMP
//#else
#define smp_mb() dmb(ish)
#define smp_rmb() dmb(ishld)
#define smp_wmb() dmb(ishst)
#define arch_barrier() smp_mb()
#define smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*p) : "r" (v) : "memory"); \
break; \
case 8: \
asm volatile ("stlr %1, %0" \
: "=Q" (*p) : "r" (v) : "memory"); \
break; \
} \
} while (0)
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1; \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 4: \
asm volatile ("ldar %w0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
: "=r" (___p1) : "Q" (*p) : "memory"); \
break; \
} \
___p1; \
})
//#endif /*CONFIG_SMP*/
#define read_barrier_depends() do { } while(0)
#define smp_read_barrier_depends() do { } while(0)
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
#define nop() asm volatile("nop");
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
#define read_tsc() \
({ \
unsigned long cval; \
cval = rdtsc(); \
cval; \
})
void init_tod_data(void);
#if defined(CONFIG_HAS_NMI)
static inline void cpu_enable_nmi(void)
{
asm volatile("msr daifclr, #2": : : "memory");
}
static inline void cpu_disable_nmi(void)
{
asm volatile("msr daifset, #2": : : "memory");
}
#else/*defined(CONFIG_HAS_NMI)*/
static inline void cpu_enable_nmi(void)
{
}
static inline void cpu_disable_nmi(void)
{
}
#endif/*defined(CONFIG_HAS_NMI)*/
#endif /* __ASSEMBLY__ */
void arch_flush_icache_all(void);
#endif /* !__HEADER_ARM64_ARCH_CPU_H */

View File

@ -0,0 +1,17 @@
/* mm.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_ARCH_MM_H
#define __HEADER_ARM64_ARCH_MM_H
struct process_vm;
static inline void
flush_nfo_tlb()
{
}
static inline void
flush_nfo_tlb_mm(struct process_vm *vm)
{
}
#endif /* __HEADER_ARM64_ARCH_MM_H */

View File

@ -0,0 +1,36 @@
/* mman.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* @ref.impl linux-linaro/include/uapi/asm-generic/mman.h */
#ifndef __HEADER_ARM64_ARCH_MMAN_H
#define __HEADER_ARM64_ARCH_MMAN_H
#include <arch-memory.h>
/*
* mapping flags
*/
#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
#define MAP_LOCKED 0x2000 /* pages are locked */
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_FIRST_BLOCK (__PTL3_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_FIRST_CONT_BLOCK (__PTL3_CONT_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_SECOND_BLOCK (__PTL2_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_SECOND_CONT_BLOCK (__PTL2_CONT_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_THIRD_CONT_BLOCK (__PTL1_CONT_SHIFT << MAP_HUGE_SHIFT)
/*
* for mlockall()
*/
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
#endif /* __HEADER_ARM64_ARCH_MMAN_H */

View File

@ -0,0 +1,40 @@
/* shm.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_ARCH_SHM_H
#define __HEADER_ARM64_ARCH_SHM_H
#include <arch-memory.h>
/* shmflg */
#define SHM_HUGE_SHIFT 26
#define SHM_HUGE_FIRST_BLOCK (__PTL3_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_FIRST_CONT_BLOCK (__PTL3_CONT_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_SECOND_BLOCK (__PTL2_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_SECOND_CONT_BLOCK (__PTL2_CONT_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_THIRD_CONT_BLOCK (__PTL1_CONT_SHIFT << SHM_HUGE_SHIFT)
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
uint64_t shm_nattch;
uint8_t padding[12];
int init_pgshift;
};
#endif /* __HEADER_ARM64_ARCH_SHM_H */

View File

@ -0,0 +1,106 @@
/* arm-gic-v2.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/*
* include/linux/irqchip/arm-gic.h
*
* Copyright (C) 2002 ARM Limited, All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __LINUX_IRQCHIP_ARM_GIC_H
#define __LINUX_IRQCHIP_ARM_GIC_H
/* check config */
#if defined(CONFIG_HAS_NMI) && !defined(CONFIG_ARM_GIC_V3)
# error GICv2 is not support NMI
#endif
/* @ref.impl include/linux/irqchip/arm-gic.h */
#define GIC_CPU_CTRL 0x00
#define GIC_CPU_PRIMASK 0x04
#define GIC_CPU_BINPOINT 0x08
#define GIC_CPU_INTACK 0x0c
#define GIC_CPU_EOI 0x10
#define GIC_CPU_RUNNINGPRI 0x14
#define GIC_CPU_HIGHPRI 0x18
#define GIC_CPU_ALIAS_BINPOINT 0x1c
#define GIC_CPU_ACTIVEPRIO 0xd0
#define GIC_CPU_IDENT 0xfc
#define GICC_ENABLE 0x1
#define GICC_INT_PRI_THRESHOLD 0xf0
#define GICC_IAR_INT_ID_MASK 0x3ff
#define GICC_INT_SPURIOUS 1023
#define GICC_DIS_BYPASS_MASK 0x1e0
#define GIC_DIST_CTRL 0x000
#define GIC_DIST_CTR 0x004
#define GIC_DIST_IGROUP 0x080
#define GIC_DIST_ENABLE_SET 0x100
#define GIC_DIST_ENABLE_CLEAR 0x180
#define GIC_DIST_PENDING_SET 0x200
#define GIC_DIST_PENDING_CLEAR 0x280
#define GIC_DIST_ACTIVE_SET 0x300
#define GIC_DIST_ACTIVE_CLEAR 0x380
#define GIC_DIST_PRI 0x400
#define GIC_DIST_TARGET 0x800
#define GIC_DIST_CONFIG 0xc00
#define GIC_DIST_SOFTINT 0xf00
#define GIC_DIST_SGI_PENDING_CLEAR 0xf10
#define GIC_DIST_SGI_PENDING_SET 0xf20
#define GICD_ENABLE 0x1
#define GICD_DISABLE 0x0
#define GICD_INT_ACTLOW_LVLTRIG 0x0
#define GICD_INT_EN_CLR_X32 0xffffffff
#define GICD_INT_EN_SET_SGI 0x0000ffff
#define GICD_INT_EN_CLR_PPI 0xffff0000
#ifdef CONFIG_HAS_NMI
#define GICD_INT_NMI_PRI 0x40
#define GICD_INT_DEF_PRI 0xc0U
#else
#define GICD_INT_DEF_PRI 0xa0U
#endif
#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
(GICD_INT_DEF_PRI << 16) |\
(GICD_INT_DEF_PRI << 8) |\
GICD_INT_DEF_PRI)
#define GICH_HCR 0x0
#define GICH_VTR 0x4
#define GICH_VMCR 0x8
#define GICH_MISR 0x10
#define GICH_EISR0 0x20
#define GICH_EISR1 0x24
#define GICH_ELRSR0 0x30
#define GICH_ELRSR1 0x34
#define GICH_APR 0xf0
#define GICH_LR0 0x100
#define GICH_HCR_EN (1 << 0)
#define GICH_HCR_UIE (1 << 1)
#define GICH_LR_VIRTUALID (0x3ff << 0)
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT)
#define GICH_LR_STATE (3 << 28)
#define GICH_LR_PENDING_BIT (1 << 28)
#define GICH_LR_ACTIVE_BIT (1 << 29)
#define GICH_LR_EOI (1 << 19)
#define GICH_VMCR_CTRL_SHIFT 0
#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
#define GICH_VMCR_PRIMASK_SHIFT 27
#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT)
#define GICH_VMCR_BINPOINT_SHIFT 21
#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT)
#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18
#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
#define GICH_MISR_EOI (1 << 0)
#define GICH_MISR_U (1 << 1)
#endif /* __LINUX_IRQCHIP_ARM_GIC_H */

View File

@ -0,0 +1,385 @@
/* arm-gic-v3.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/*
* Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
#define __LINUX_IRQCHIP_ARM_GIC_V3_H
#include <stringify.h>
/* @ref.impl include/linux/irqchip/arm-gic-v3.h */
#include <sysreg.h>
/*
* Distributor registers. We assume we're running non-secure, with ARE
* being set. Secure-only and non-ARE registers are not described.
*/
#define GICD_CTLR 0x0000
#define GICD_TYPER 0x0004
#define GICD_IIDR 0x0008
#define GICD_STATUSR 0x0010
#define GICD_SETSPI_NSR 0x0040
#define GICD_CLRSPI_NSR 0x0048
#define GICD_SETSPI_SR 0x0050
#define GICD_CLRSPI_SR 0x0058
#define GICD_SEIR 0x0068
#define GICD_IGROUPR 0x0080
#define GICD_ISENABLER 0x0100
#define GICD_ICENABLER 0x0180
#define GICD_ISPENDR 0x0200
#define GICD_ICPENDR 0x0280
#define GICD_ISACTIVER 0x0300
#define GICD_ICACTIVER 0x0380
#define GICD_IPRIORITYR 0x0400
#define GICD_ICFGR 0x0C00
#define GICD_IGRPMODR 0x0D00
#define GICD_NSACR 0x0E00
#define GICD_IROUTER 0x6000
#define GICD_IDREGS 0xFFD0
#define GICD_PIDR2 0xFFE8
/*
* Those registers are actually from GICv2, but the spec demands that they
* are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
*/
#define GICD_ITARGETSR 0x0800
#define GICD_SGIR 0x0F00
#define GICD_CPENDSGIR 0x0F10
#define GICD_SPENDSGIR 0x0F20
#define GICD_CTLR_RWP (1U << 31)
#define GICD_CTLR_DS (1U << 6)
#define GICD_CTLR_ARE_NS (1U << 4)
#define GICD_CTLR_ENABLE_G1A (1U << 1)
#define GICD_CTLR_ENABLE_G1 (1U << 0)
/*
* In systems with a single security state (what we emulate in KVM)
* the meaning of the interrupt group enable bits is slightly different
*/
#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
#define GICD_TYPER_LPIS (1U << 17)
#define GICD_TYPER_MBIS (1U << 16)
#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32)
#define GICD_TYPER_LPIS (1U << 17)
#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
#define GICD_IROUTER_SPI_MODE_ANY (1U << 31)
#define GIC_PIDR2_ARCH_MASK 0xf0
#define GIC_PIDR2_ARCH_GICv3 0x30
#define GIC_PIDR2_ARCH_GICv4 0x40
#define GIC_V3_DIST_SIZE 0x10000
/*
* Re-Distributor registers, offsets from RD_base
*/
#define GICR_CTLR GICD_CTLR
#define GICR_IIDR 0x0004
#define GICR_TYPER 0x0008
#define GICR_STATUSR GICD_STATUSR
#define GICR_WAKER 0x0014
#define GICR_SETLPIR 0x0040
#define GICR_CLRLPIR 0x0048
#define GICR_SEIR GICD_SEIR
#define GICR_PROPBASER 0x0070
#define GICR_PENDBASER 0x0078
#define GICR_INVLPIR 0x00A0
#define GICR_INVALLR 0x00B0
#define GICR_SYNCR 0x00C0
#define GICR_MOVLPIR 0x0100
#define GICR_MOVALLR 0x0110
#define GICR_IDREGS GICD_IDREGS
#define GICR_PIDR2 GICD_PIDR2
#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff)
#define GICR_WAKER_ProcessorSleep (1U << 1)
#define GICR_WAKER_ChildrenAsleep (1U << 2)
#define GICR_PROPBASER_NonShareable (0U << 10)
#define GICR_PROPBASER_InnerShareable (1U << 10)
#define GICR_PROPBASER_OuterShareable (2U << 10)
#define GICR_PROPBASER_SHAREABILITY_MASK (3UL << 10)
#define GICR_PROPBASER_nCnB (0U << 7)
#define GICR_PROPBASER_nC (1U << 7)
#define GICR_PROPBASER_RaWt (2U << 7)
#define GICR_PROPBASER_RaWb (3U << 7)
#define GICR_PROPBASER_WaWt (4U << 7)
#define GICR_PROPBASER_WaWb (5U << 7)
#define GICR_PROPBASER_RaWaWt (6U << 7)
#define GICR_PROPBASER_RaWaWb (7U << 7)
#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7)
#define GICR_PROPBASER_IDBITS_MASK (0x1f)
#define GICR_PENDBASER_NonShareable (0U << 10)
#define GICR_PENDBASER_InnerShareable (1U << 10)
#define GICR_PENDBASER_OuterShareable (2U << 10)
#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10)
#define GICR_PENDBASER_nCnB (0U << 7)
#define GICR_PENDBASER_nC (1U << 7)
#define GICR_PENDBASER_RaWt (2U << 7)
#define GICR_PENDBASER_RaWb (3U << 7)
#define GICR_PENDBASER_WaWt (4U << 7)
#define GICR_PENDBASER_WaWb (5U << 7)
#define GICR_PENDBASER_RaWaWt (6U << 7)
#define GICR_PENDBASER_RaWaWb (7U << 7)
#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7)
/*
* Re-Distributor registers, offsets from SGI_base
*/
#define GICR_IGROUPR0 GICD_IGROUPR
#define GICR_ISENABLER0 GICD_ISENABLER
#define GICR_ICENABLER0 GICD_ICENABLER
#define GICR_ISPENDR0 GICD_ISPENDR
#define GICR_ICPENDR0 GICD_ICPENDR
#define GICR_ISACTIVER0 GICD_ISACTIVER
#define GICR_ICACTIVER0 GICD_ICACTIVER
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
#define GICR_ICFGR0 GICD_ICFGR
#define GICR_IGRPMODR0 GICD_IGRPMODR
#define GICR_NSACR GICD_NSACR
#define GICR_TYPER_PLPIS (1U << 0)
#define GICR_TYPER_VLPIS (1U << 1)
#define GICR_TYPER_LAST (1U << 4)
#define GIC_V3_REDIST_SIZE 0x20000
#define LPI_PROP_GROUP1 (1 << 1)
#define LPI_PROP_ENABLED (1 << 0)
/*
* ITS registers, offsets from ITS_base
*/
#define GITS_CTLR 0x0000
#define GITS_IIDR 0x0004
#define GITS_TYPER 0x0008
#define GITS_CBASER 0x0080
#define GITS_CWRITER 0x0088
#define GITS_CREADR 0x0090
#define GITS_BASER 0x0100
#define GITS_PIDR2 GICR_PIDR2
#define GITS_TRANSLATER 0x10040
#define GITS_CTLR_ENABLE (1U << 0)
#define GITS_CTLR_QUIESCENT (1U << 31)
#define GITS_TYPER_DEVBITS_SHIFT 13
#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
#define GITS_TYPER_PTA (1UL << 19)
#define GITS_CBASER_VALID (1UL << 63)
#define GITS_CBASER_nCnB (0UL << 59)
#define GITS_CBASER_nC (1UL << 59)
#define GITS_CBASER_RaWt (2UL << 59)
#define GITS_CBASER_RaWb (3UL << 59)
#define GITS_CBASER_WaWt (4UL << 59)
#define GITS_CBASER_WaWb (5UL << 59)
#define GITS_CBASER_RaWaWt (6UL << 59)
#define GITS_CBASER_RaWaWb (7UL << 59)
#define GITS_CBASER_CACHEABILITY_MASK (7UL << 59)
#define GITS_CBASER_NonShareable (0UL << 10)
#define GITS_CBASER_InnerShareable (1UL << 10)
#define GITS_CBASER_OuterShareable (2UL << 10)
#define GITS_CBASER_SHAREABILITY_MASK (3UL << 10)
#define GITS_BASER_NR_REGS 8
#define GITS_BASER_VALID (1UL << 63)
#define GITS_BASER_nCnB (0UL << 59)
#define GITS_BASER_nC (1UL << 59)
#define GITS_BASER_RaWt (2UL << 59)
#define GITS_BASER_RaWb (3UL << 59)
#define GITS_BASER_WaWt (4UL << 59)
#define GITS_BASER_WaWb (5UL << 59)
#define GITS_BASER_RaWaWt (6UL << 59)
#define GITS_BASER_RaWaWb (7UL << 59)
#define GITS_BASER_CACHEABILITY_MASK (7UL << 59)
#define GITS_BASER_TYPE_SHIFT (56)
#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1)
#define GITS_BASER_NonShareable (0UL << 10)
#define GITS_BASER_InnerShareable (1UL << 10)
#define GITS_BASER_OuterShareable (2UL << 10)
#define GITS_BASER_SHAREABILITY_SHIFT (10)
#define GITS_BASER_SHAREABILITY_MASK (3UL << GITS_BASER_SHAREABILITY_SHIFT)
#define GITS_BASER_PAGE_SIZE_SHIFT (8)
#define GITS_BASER_PAGE_SIZE_4K (0UL << GITS_BASER_PAGE_SIZE_SHIFT)
#define GITS_BASER_PAGE_SIZE_16K (1UL << GITS_BASER_PAGE_SIZE_SHIFT)
#define GITS_BASER_PAGE_SIZE_64K (2UL << GITS_BASER_PAGE_SIZE_SHIFT)
#define GITS_BASER_PAGE_SIZE_MASK (3UL << GITS_BASER_PAGE_SIZE_SHIFT)
#define GITS_BASER_PAGES_MAX 256
#define GITS_BASER_TYPE_NONE 0
#define GITS_BASER_TYPE_DEVICE 1
#define GITS_BASER_TYPE_VCPU 2
#define GITS_BASER_TYPE_CPU 3
#define GITS_BASER_TYPE_COLLECTION 4
#define GITS_BASER_TYPE_RESERVED5 5
#define GITS_BASER_TYPE_RESERVED6 6
#define GITS_BASER_TYPE_RESERVED7 7
/*
* ITS commands
*/
#define GITS_CMD_MAPD 0x08
#define GITS_CMD_MAPC 0x09
#define GITS_CMD_MAPVI 0x0a
#define GITS_CMD_MOVI 0x01
#define GITS_CMD_DISCARD 0x0f
#define GITS_CMD_INV 0x0c
#define GITS_CMD_MOVALL 0x0e
#define GITS_CMD_INVALL 0x0d
#define GITS_CMD_INT 0x03
#define GITS_CMD_CLEAR 0x04
#define GITS_CMD_SYNC 0x05
/*
* CPU interface registers
*/
#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1)
#define ICC_CTLR_EL1_EOImode_drop (1U << 1)
#define ICC_SRE_EL1_SRE (1U << 0)
/*
* Hypervisor interface registers (SRE only)
*/
#define ICH_LR_VIRTUAL_ID_MASK ((1UL << 32) - 1)
#define ICH_LR_EOI (1UL << 41)
#define ICH_LR_GROUP (1UL << 60)
#define ICH_LR_STATE (3UL << 62)
#define ICH_LR_PENDING_BIT (1UL << 62)
#define ICH_LR_ACTIVE_BIT (1UL << 63)
#define ICH_MISR_EOI (1 << 0)
#define ICH_MISR_U (1 << 1)
#define ICH_HCR_EN (1 << 0)
#define ICH_HCR_UIE (1 << 1)
#define ICH_VMCR_CTLR_SHIFT 0
#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT)
#define ICH_VMCR_BPR1_SHIFT 18
#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT)
#define ICH_VMCR_BPR0_SHIFT 21
#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT)
#define ICH_VMCR_PMR_SHIFT 24
#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3)
#define ICC_IAR1_EL1_SPURIOUS 0x3ff
#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5)
#define ICC_SRE_EL2_SRE (1 << 0)
#define ICC_SRE_EL2_ENABLE (1 << 3)
#define ICC_SGI1R_TARGET_LIST_SHIFT 0
#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
#define ICC_SGI1R_AFFINITY_1_SHIFT 16
#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
#define ICC_SGI1R_SGI_ID_SHIFT 24
#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT)
#define ICC_SGI1R_AFFINITY_2_SHIFT 32
#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
#define ICC_SGI1R_AFFINITY_3_SHIFT 48
#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
#ifdef CONFIG_HAS_NMI
/* PMR values used to mask/unmask interrupts */
#define ICC_PMR_EL1_G_SHIFT 6
#define ICC_PMR_EL1_G_BIT (1 << ICC_PMR_EL1_G_SHIFT)
#define ICC_PMR_EL1_UNMASKED 0xf0
#define ICC_PMR_EL1_MASKED (ICC_PMR_EL1_UNMASKED ^ ICC_PMR_EL1_G_BIT)
/*
* This is the GIC interrupt mask bit. It is not actually part of the
* PSR and so does not appear in the user API, we are simply using some
* reserved bits in the PSR to store some state from the interrupt
* controller. The context save/restore functions will extract the
* ICC_PMR_EL1_G_BIT and save it as the PSR_G_BIT.
*/
#define PSR_G_BIT 0x00400000
#define PSR_G_SHIFT 22
#define PSR_G_PMR_G_SHIFT (PSR_G_SHIFT - ICC_PMR_EL1_G_SHIFT)
#define PSR_I_PMR_G_SHIFT (7 - ICC_PMR_EL1_G_SHIFT)
#endif /* CONFIG_HAS_NMI */
/*
* System register definitions
*/
#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4)
#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0)
#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1)
#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2)
#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3)
#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5)
#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7)
#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x)
#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x)
#define ICH_LR0_EL2 __LR0_EL2(0)
#define ICH_LR1_EL2 __LR0_EL2(1)
#define ICH_LR2_EL2 __LR0_EL2(2)
#define ICH_LR3_EL2 __LR0_EL2(3)
#define ICH_LR4_EL2 __LR0_EL2(4)
#define ICH_LR5_EL2 __LR0_EL2(5)
#define ICH_LR6_EL2 __LR0_EL2(6)
#define ICH_LR7_EL2 __LR0_EL2(7)
#define ICH_LR8_EL2 __LR8_EL2(0)
#define ICH_LR9_EL2 __LR8_EL2(1)
#define ICH_LR10_EL2 __LR8_EL2(2)
#define ICH_LR11_EL2 __LR8_EL2(3)
#define ICH_LR12_EL2 __LR8_EL2(4)
#define ICH_LR13_EL2 __LR8_EL2(5)
#define ICH_LR14_EL2 __LR8_EL2(6)
#define ICH_LR15_EL2 __LR8_EL2(7)
#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
#define ICH_AP0R0_EL2 __AP0Rx_EL2(0)
#define ICH_AP0R1_EL2 __AP0Rx_EL2(1)
#define ICH_AP0R2_EL2 __AP0Rx_EL2(2)
#define ICH_AP0R3_EL2 __AP0Rx_EL2(3)
#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x)
#define ICH_AP1R0_EL2 __AP1Rx_EL2(0)
#define ICH_AP1R1_EL2 __AP1Rx_EL2(1)
#define ICH_AP1R2_EL2 __AP1Rx_EL2(2)
#define ICH_AP1R3_EL2 __AP1Rx_EL2(3)
#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_H */

View File

@ -0,0 +1,28 @@
/* asm-offsets.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_COMMON_ASM_OFFSETS_H
#define __HEADER_ARM64_COMMON_ASM_OFFSETS_H
#define S_X0 0x00 /* offsetof(struct pt_regs, regs[0]) */
#define S_X1 0x08 /* offsetof(struct pt_regs, regs[1]) */
#define S_X2 0x10 /* offsetof(struct pt_regs, regs[2]) */
#define S_X3 0x18 /* offsetof(struct pt_regs, regs[3]) */
#define S_X4 0x20 /* offsetof(struct pt_regs, regs[4]) */
#define S_X5 0x28 /* offsetof(struct pt_regs, regs[5]) */
#define S_X6 0x30 /* offsetof(struct pt_regs, regs[6]) */
#define S_X7 0x38 /* offsetof(struct pt_regs, regs[7]) */
#define S_LR 0xf0 /* offsetof(struct pt_regs, regs[30]) */
#define S_SP 0xf8 /* offsetof(struct pt_regs, sp) */
#define S_PC 0x100 /* offsetof(struct pt_regs, pc) */
#define S_PSTATE 0x108 /* offsetof(struct pt_regs, pstate) */
#define S_ORIG_X0 0x110 /* offsetof(struct pt_regs, orig_x0) */
#define S_ORIG_PC 0x118 /* offsetof(struct pt_regs, orig_pc) */
#define S_SYSCALLNO 0x120 /* offsetof(struct pt_regs, syscallno) */
#define S_FRAME_SIZE 0x130 /* sizeof(struct pt_regs) must be 16 byte align */
#define CPU_INFO_SETUP 0x10 /* offsetof(struct cpu_info, cpu_setup) */
#define CPU_INFO_SZ 0x18 /* sizeof(struct cpu_info) */
#define TI_FLAGS 0x00 /* offsetof(struct thread_info, flags) */
#define TI_CPU_CONTEXT 0x10 /* offsetof(struct thread_info, cpu_context) */
#endif /* !__HEADER_ARM64_COMMON_ASM_OFFSETS_H */

View File

@ -0,0 +1,20 @@
/* asm-syscall.h COPYRIGHT FUJITSU LIMITED 2018 */
#ifndef __HEADER_ARM64_ASM_SYSCALL_H
#define __HEADER_ARM64_ASM_SYSCALL_H
#ifdef __ASSEMBLY__
#define DECLARATOR(number, name) .equ __NR_##name, number
#define SYSCALL_HANDLED(number, name) DECLARATOR(number, name)
#define SYSCALL_DELEGATED(number, name) DECLARATOR(number, name)
#include <config.h>
#include <syscall_list.h>
#undef DECLARATOR
#undef SYSCALL_HANDLED
#undef SYSCALL_DELEGATED
#endif /* __ASSEMBLY__ */
#endif /* !__HEADER_ARM64_ASM_SYSCALL_H */

View File

@ -0,0 +1,149 @@
/* assembler.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_ASSEMBLER_H
#define __HEADER_ARM64_COMMON_ASSEMBLER_H
#include <thread_info.h>
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
#else /* defined(CONFIG_HAS_NMI) */
#include <sysreg.h>
#endif /* defined(CONFIG_HAS_NMI) */
#if defined(CONFIG_HAS_NMI)
/*
* Enable and disable pseudo NMI.
*/
.macro disable_nmi
msr daifset, #2
.endm
.macro enable_nmi
msr daifclr, #2
.endm
/*
* Enable and disable interrupts.
*/
.macro disable_irq, tmp
mov \tmp, #ICC_PMR_EL1_MASKED
msr_s ICC_PMR_EL1, \tmp
.endm
.macro enable_irq, tmp
mov \tmp, #ICC_PMR_EL1_UNMASKED
msr_s ICC_PMR_EL1, \tmp
.endm
#else /* defined(CONFIG_HAS_NMI) */
/*
* Enable and disable pseudo NMI.
*/
.macro disable_nmi
.endm
.macro enable_nmi
.endm
/*
* Enable and disable interrupts.
*/
.macro disable_irq, tmp
msr daifset, #2
.endm
.macro enable_irq, tmp
msr daifclr, #2
.endm
#endif /* defined(CONFIG_HAS_NMI) */
/*
* Enable and disable debug exceptions.
*/
.macro disable_dbg
msr daifset, #8
.endm
.macro enable_dbg
msr daifclr, #8
.endm
.macro disable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
bic \tmp, \tmp, #1
msr mdscr_el1, \tmp
isb // Synchronise with enable_dbg
9990:
.endm
.macro enable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
disable_dbg
mrs \tmp, mdscr_el1
orr \tmp, \tmp, #1
msr mdscr_el1, \tmp
b 9991f
9990:
mrs \tmp, mdscr_el1
bic \tmp, \tmp, #1
msr mdscr_el1, \tmp
isb // Synchronise with enable_dbg
9991:
.endm
/*
* Enable both debug exceptions and interrupts. This is likely to be
* faster than two daifclr operations, since writes to this register
* are self-synchronising.
*/
#if defined(CONFIG_HAS_NMI)
.macro enable_dbg_and_irq, tmp
enable_dbg
enable_irq \tmp
.endm
#else /* defined(CONFIG_HAS_NMI) */
.macro enable_dbg_and_irq, tmp
msr daifclr, #(8 | 2)
.endm
#endif /* defined(CONFIG_HAS_NMI) */
/*
* Register aliases.
*/
lr .req x30 // link register
/*
* Vector entry
*/
.macro ventry label
.align 7
b \label
.endm
/*
* Select code when configured for BE.
*/
//#ifdef CONFIG_CPU_BIG_ENDIAN
//#define CPU_BE(code...) code
//#else
#define CPU_BE(code...)
//#endif
/*
* Select code when configured for LE.
*/
//#ifdef CONFIG_CPU_BIG_ENDIAN
//#define CPU_LE(code...)
//#else
#define CPU_LE(code...) code
//#endif
#define ENDPIPROC(x) \
.globl __pi_##x; \
.type __pi_##x, %function; \
.set __pi_##x, x; \
.size __pi_##x, . - x; \
ENDPROC(x)
#endif /* !__HEADER_ARM64_COMMON_ASSEMBLER_H */

View File

@ -0,0 +1,7 @@
/* cache.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_CACHE_H
#define __HEADER_ARM64_COMMON_CACHE_H
#define L1_CACHE_SHIFT 6
#endif /* !__HEADER_ARM64_COMMON_CACHE_H */

View File

@ -0,0 +1,32 @@
/* cas.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_COMMON_CAS_H
#define __HEADER_ARM64_COMMON_CAS_H
#include <arch/cpu.h>
/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__cmpxchg (size == 8 case) */
/* 8 byte compare and swap, return 0:fail, 1:success */
static inline int
compare_and_swap(void *addr, unsigned long olddata, unsigned long newdata)
{
unsigned long oldval = 0, res = 0;
smp_mb();
do {
asm volatile("// __cmpxchg8\n"
" ldxr %1, %2\n"
" mov %w0, #0\n"
" cmp %1, %3\n"
" b.ne 1f\n"
" stxr %w0, %4, %2\n"
"1:\n"
: "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned long *)addr)
: "Ir" (olddata), "r" (newdata)
: "cc");
} while (res);
smp_mb();
return (oldval == olddata);
}
#endif /* !__HEADER_ARM64_COMMON_CAS_H */

View File

@ -0,0 +1,32 @@
/* compiler.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __ASM_COMPILER_H
#define __ASM_COMPILER_H
/* @ref.impl arch/arm64/include/asm/compiler.h::__asmeq(x,y) */
/*
* This is used to ensure the compiler did actually allocate the register we
* asked it for some inline assembly sequences. Apparently we can't trust the
* compiler from one version to another so a bit of paranoia won't hurt. This
* string is meant to be concatenated with the inline asm string and will
* cause compilation to stop on mismatch. (for details, see gcc PR 15089)
*/
#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t"
/* @ref.impl include/linux/compiler.h::__section(S) */
/* Simple shorthand for a section definition */
# define __section(S) __attribute__ ((__section__(#S)))
/* @ref.impl include/linux/compiler.h::__aligned(x) */
/*
* From the GCC manual:
*
* Many functions have no effects except the return value and their
* return value depends only on the parameters and/or global
* variables. Such a function can be subject to common subexpression
* elimination and loop optimization just as an arithmetic operator
* would be.
* [...]
*/
#define __aligned(x) __attribute__((aligned(x)))
#endif /* __ASM_COMPILER_H */

View File

@ -0,0 +1,23 @@
/* const.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_CONST_H
#define __HEADER_ARM64_COMMON_CONST_H
#ifndef __ASSEMBLY__
#define __AC(X,Y) (X##Y)
#define _AC(X,Y) __AC(X,Y)
#define _AT(T,X) ((T)(X))
#else /* !__ASSEMBLY__ */
#define _AC(X,Y) X
#define _AT(T,X) X
#endif /* !__ASSEMBLY__ */
#define _BITUL(x) (_AC(1,UL) << (x))
#define _BITULL(x) (_AC(1,ULL) << (x))
/*
* Allow for constants defined here to be used from assembly code
* by prepending the UL suffix only with actual C code compilation.
*/
#define UL(x) _AC(x, UL)
#endif /* !__HEADER_ARM64_COMMON_CONST_H */

View File

@ -0,0 +1,8 @@
/* context.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_CONTEXT_H
#define __HEADER_ARM64_COMMON_CONTEXT_H
void switch_mm(struct page_table *pgtbl);
void free_mmu_context(struct page_table *pgtbl);
#endif /*__HEADER_ARM64_COMMON_CONTEXT_H*/

View File

@ -0,0 +1,182 @@
/* cpufeature.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
#include <types.h>
#include <cpuinfo.h>
#include <sysreg.h>
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
/* CPU feature register tracking */
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
FTR_HIGHER_SAFE,/* Bigger value is safe */
};
#define FTR_STRICT (1) /* SANITY check strict matching required */
#define FTR_NONSTRICT (0) /* SANITY check ignored */
#define FTR_SIGNED (1) /* Value should be treated as signed */
#define FTR_UNSIGNED (0) /* Value should be treated as unsigned */
#define FTR_VISIBLE (1) /* Feature visible to the user space */
#define FTR_HIDDEN (0) /* Feature is hidden from the user */
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
struct arm64_ftr_bits {
int sign; /* Value is signed ? */
int visible;
int strict; /* CPU Sanity check: strict matching required ? */
enum ftr_type type;
uint8_t shift;
uint8_t width;
int64_t safe_val; /* safe value for FTR_EXACT features */
};
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
/*
* @arm64_ftr_reg - Feature register
* @strict_mask Bits which should match across all CPUs for sanity.
* @sys_val Safe value across the CPUs (system view)
*/
struct arm64_ftr_reg {
const char *name;
uint64_t strict_mask;
uint64_t user_mask;
uint64_t sys_val;
uint64_t user_val;
const struct arm64_ftr_bits *ftr_bits;
};
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
/* scope of capability check */
enum {
SCOPE_SYSTEM,
SCOPE_LOCAL_CPU,
};
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
struct arm64_cpu_capabilities {
const char *desc;
uint16_t capability;
int def_scope;/* default scope */
int (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
int (*enable)(void *);/* Called on all active CPUs */
uint32_t sys_reg;
uint8_t field_pos;
uint8_t min_field_value;
uint8_t hwcap_type;
int sign;
unsigned long hwcap;
};
/* @ref.impl include/linux/bitops.h */
/*
* Create a contiguous bitmask starting at bit position @l and ending at
* position @h. For example
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
#define GENMASK(h, l) \
(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline uint64_t arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (uint64_t)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int
cpuid_feature_extract_signed_field_width(uint64_t features, int field, int width)
{
return (int64_t)(features << (64 - width - field)) >> (64 - width);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int
cpuid_feature_extract_signed_field(uint64_t features, int field)
{
return cpuid_feature_extract_signed_field_width(features, field, 4);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline unsigned int
cpuid_feature_extract_unsigned_field_width(uint64_t features, int field, int width)
{
return (uint64_t)(features << (64 - width - field)) >> (64 - width);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline unsigned int
cpuid_feature_extract_unsigned_field(uint64_t features, int field)
{
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline uint64_t arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
{
return (reg->user_val | (reg->sys_val & reg->user_mask));
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int
cpuid_feature_extract_field_width(uint64_t features, int field, int width, int sign)
{
return (sign) ?
cpuid_feature_extract_signed_field_width(features, field, width) :
cpuid_feature_extract_unsigned_field_width(features, field, width);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int
cpuid_feature_extract_field(uint64_t features, int field, int sign)
{
return cpuid_feature_extract_field_width(features, field, 4, sign);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int64_t arm64_ftr_value(const struct arm64_ftr_bits *ftrp, uint64_t val)
{
return (int64_t)cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width, ftrp->sign);
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int id_aa64pfr0_32bit_el0(uint64_t pfr0)
{
uint32_t val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
return val == ID_AA64PFR0_EL0_32BIT_64BIT;
}
/* @ref.impl arch/arm64/include/asm/cpufeature.h */
static inline int id_aa64pfr0_sve(uint64_t pfr0)
{
uint32_t val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
return val > 0;
}
void setup_cpu_features(void);
void update_cpu_features(int cpu,
struct cpuinfo_arm64 *info,
struct cpuinfo_arm64 *boot);
uint64_t read_system_reg(uint32_t id);
void init_cpu_features(struct cpuinfo_arm64 *info);
int enable_mrs_emulation(void);
/* @ref.impl arch/arm64/include/asm/hwcap.h */
enum {
CAP_HWCAP = 1,
#ifdef CONFIG_COMPAT
CAP_COMPAT_HWCAP,
CAP_COMPAT_HWCAP2,
#endif
};
#endif /* __ASM_CPUFEATURE_H */

View File

@ -0,0 +1,34 @@
/* cpuinfo.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
#ifndef __HEADER_ARM64_COMMON_CPUINFO_H
#define __HEADER_ARM64_COMMON_CPUINFO_H
#include <types.h>
/* @ref.impl arch/arm64/include/cpu.h */
/*
* Records attributes of an individual CPU.
*/
struct cpuinfo_arm64 {
uint32_t reg_midr;
unsigned int hwid; /* McKernel Original. */
uint32_t reg_ctr;
uint32_t reg_cntfrq;
uint32_t reg_dczid;
uint32_t reg_revidr;
uint64_t reg_id_aa64dfr0;
uint64_t reg_id_aa64dfr1;
uint64_t reg_id_aa64isar0;
uint64_t reg_id_aa64isar1;
uint64_t reg_id_aa64mmfr0;
uint64_t reg_id_aa64mmfr1;
uint64_t reg_id_aa64mmfr2;
uint64_t reg_id_aa64pfr0;
uint64_t reg_id_aa64pfr1;
uint64_t reg_id_aa64zfr0;
uint64_t reg_zcr;
};
#endif /* !__HEADER_ARM64_COMMON_CPUINFO_H */

View File

@ -0,0 +1,13 @@
/* cpulocal.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_CPULOCAL_H
#define __HEADER_ARM64_COMMON_CPULOCAL_H
#include <types.h>
#include <registers.h>
#include <thread_info.h>
union arm64_cpu_local_variables *get_arm64_cpu_local_variable(int id);
union arm64_cpu_local_variables *get_arm64_this_cpu_local(void);
void *get_arm64_this_cpu_kstack(void);
#endif /* !__HEADER_ARM64_COMMON_CPULOCAL_H */

View File

@ -0,0 +1,12 @@
/* cputable.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_CPUTABLE_H
#define __HEADER_ARM64_COMMON_CPUTABLE_H
struct cpu_info {
unsigned int cpu_id_val;
unsigned int cpu_id_mask;
const char *cpu_name;
unsigned long (*cpu_setup)(void);
};
#endif /* !__HEADER_ARM64_COMMON_CPUTABLE_H */

View File

@ -0,0 +1,110 @@
/* cputype.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* @ref.impl arch/arm64/include/asm/cputype.h */
#ifndef __HEADER_ARM64_COMMON_CPUTYPE_H
#define __HEADER_ARM64_COMMON_CPUTYPE_H
#include <sysreg.h>
#define MPIDR_LEVEL_BITS_SHIFT 3
#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT)
#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
#define MPIDR_LEVEL_SHIFT(level) \
(((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT)
#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
#define MIDR_REVISION_MASK 0xf
#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK)
#define MIDR_PARTNUM_SHIFT 4
#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT)
#define MIDR_PARTNUM(midr) \
(((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
#define MIDR_ARCHITECTURE_SHIFT 16
#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT)
#define MIDR_ARCHITECTURE(midr) \
(((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
#define MIDR_VARIANT_SHIFT 20
#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
(((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
#define MIDR_CPU_VAR_REV(var, rev) \
(((var) << MIDR_VARIANT_SHIFT) | (rev))
#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
MIDR_ARCHITECTURE_MASK)
#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \
({ \
u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \
u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \
\
_model == (model) && rv >= (rv_min) && rv <= (rv_max); \
})
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
#define ARM_CPU_PART_CORTEX_A57 0xD07
#define ARM_CPU_PART_CORTEX_A72 0xD08
#define ARM_CPU_PART_CORTEX_A53 0xD03
#define ARM_CPU_PART_CORTEX_A73 0xD09
#define ARM_CPU_PART_CORTEX_A75 0xD0A
#define APM_CPU_PART_POTENZA 0x000
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
#define QCOM_CPU_PART_FALKOR 0xC00
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#ifndef __ASSEMBLY__
static unsigned int read_cpuid_id(void)
{
return read_cpuid(MIDR_EL1);
}
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_CPUTYPE_H */

View File

@ -0,0 +1,35 @@
/* debug-monitors.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
#ifndef __HEADER_ARM64_COMMON_DEBUG_MONITORS_H
#define __HEADER_ARM64_COMMON_DEBUG_MONITORS_H
/* Low-level stepping controls. */
#define DBG_MDSCR_SS (1 << 0)
#define DBG_SPSR_SS (1 << 21)
/* MDSCR_EL1 enabling bits */
#define DBG_MDSCR_KDE (1 << 13)
#define DBG_MDSCR_MDE (1 << 15)
#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7)
/* AArch64 */
#define DBG_ESR_EVT_HWBP 0x0
#define DBG_ESR_EVT_HWSS 0x1
#define DBG_ESR_EVT_HWWP 0x2
#define DBG_ESR_EVT_BRK 0x6
#ifndef __ASSEMBLY__
unsigned char debug_monitors_arch(void);
void mdscr_write(unsigned int mdscr);
unsigned int mdscr_read(void);
void debug_monitors_init(void);
struct pt_regs;
void set_regs_spsr_ss(struct pt_regs *regs);
void clear_regs_spsr_ss(struct pt_regs *regs);
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_DEBUG_MONITORS_H */

View File

@ -0,0 +1,28 @@
/* elf.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_ELF_H
#define __HEADER_ARM64_COMMON_ELF_H
#include <ihk/context.h>
/* ELF target machines defined */
#define EM_AARCH64 183
/* ELF header defined */
#define ELF_CLASS ELFCLASS64
#define ELF_DATA ELFDATA2LSB
#define ELF_OSABI ELFOSABI_NONE
#define ELF_ABIVERSION El_ABIVERSION_NONE
#define ELF_ARCH EM_AARCH64
#define ELF_NGREG64 (sizeof (struct user_pt_regs) / sizeof(elf_greg64_t))
/* PTRACE_GETREGSET and PTRACE_SETREGSET requests. */
#define NT_ARM_TLS 0x401 /* ARM TLS register */
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
#endif /* __HEADER_ARM64_COMMON_ELF_H */

View File

@ -0,0 +1,60 @@
/* elfnote.h COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl include/linux/elfnote.h */
/*
* Helper macros to generate ELF Note structures, which are put into a
* PT_NOTE segment of the final vmlinux image. These are useful for
* including name-value pairs of metadata into the kernel binary (or
* modules?) for use by external programs.
*
* Each note has three parts: a name, a type and a desc. The name is
* intended to distinguish the note's originator, so it would be a
* company, project, subsystem, etc; it must be in a suitable form for
* use in a section name. The type is an integer which is used to tag
* the data, and is considered to be within the "name" namespace (so
* "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
* "desc" field is the actual data. There are no constraints on the
* desc field's contents, though typically they're fairly small.
*
* All notes from a given NAME are put into a section named
* .note.NAME. When the kernel image is finally linked, all the notes
* are packed into a single .notes section, which is mapped into the
* PT_NOTE segment. Because notes for a given name are grouped into
* the same section, they'll all be adjacent the output file.
*
* This file defines macros for both C and assembler use. Their
* syntax is slightly different, but they're semantically similar.
*
* See the ELF specification for more detail about ELF notes.
*/
#ifndef __HEADER_ARM64_COMMON_ELFNOTE_H
#define __HEADER_ARM64_COMMON_ELFNOTE_H
#ifdef __ASSEMBLER__
/*
* Generate a structure with the same shape as Elf{32,64}_Nhdr (which
* turn out to be the same size and shape), followed by the name and
* desc data with appropriate padding. The 'desctype' argument is the
* assembler pseudo op defining the type of the data e.g. .asciz while
* 'descdata' is the data itself e.g. "hello, world".
*
* e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
* ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
*/
#define ELFNOTE_START(name, type, flags) \
.pushsection .note.name, flags,@note ; \
.balign 4 ; \
.long 2f - 1f /* namesz */ ; \
.long 4484f - 3f /* descsz */ ; \
.long type ; \
1:.asciz #name ; \
2:.balign 4 ; \
3:
#define ELFNOTE_END \
4484:.balign 4 ; \
.popsection ;
#endif /* __ASSEMBLER__ */
#endif /* !__HEADER_ARM64_COMMON_ELFNOTE_H */

View File

@ -0,0 +1,112 @@
/* errno.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_COMMON_ERRNO_H
#define __HEADER_ARM64_COMMON_ERRNO_H
#include <generic-errno.h>
#define EDEADLK 35 /* Resource deadlock would occur */
#define ENAMETOOLONG 36 /* File name too long */
#define ENOLCK 37 /* No record locks available */
#define ENOSYS 38 /* Function not implemented */
#define ENOTEMPTY 39 /* Directory not empty */
#define ELOOP 40 /* Too many symbolic links encountered */
#define EWOULDBLOCK EAGAIN /* Operation would block */
#define ENOMSG 42 /* No message of desired type */
#define EIDRM 43 /* Identifier removed */
#define ECHRNG 44 /* Channel number out of range */
#define EL2NSYNC 45 /* Level 2 not synchronized */
#define EL3HLT 46 /* Level 3 halted */
#define EL3RST 47 /* Level 3 reset */
#define ELNRNG 48 /* Link number out of range */
#define EUNATCH 49 /* Protocol driver not attached */
#define ENOCSI 50 /* No CSI structure available */
#define EL2HLT 51 /* Level 2 halted */
#define EBADE 52 /* Invalid exchange */
#define EBADR 53 /* Invalid request descriptor */
#define EXFULL 54 /* Exchange full */
#define ENOANO 55 /* No anode */
#define EBADRQC 56 /* Invalid request code */
#define EBADSLT 57 /* Invalid slot */
#define EDEADLOCK EDEADLK
#define EBFONT 59 /* Bad font file format */
#define ENOSTR 60 /* Device not a stream */
#define ENODATA 61 /* No data available */
#define ETIME 62 /* Timer expired */
#define ENOSR 63 /* Out of streams resources */
#define ENONET 64 /* Machine is not on the network */
#define ENOPKG 65 /* Package not installed */
#define EREMOTE 66 /* Object is remote */
#define ENOLINK 67 /* Link has been severed */
#define EADV 68 /* Advertise error */
#define ESRMNT 69 /* Srmount error */
#define ECOMM 70 /* Communication error on send */
#define EPROTO 71 /* Protocol error */
#define EMULTIHOP 72 /* Multihop attempted */
#define EDOTDOT 73 /* RFS specific error */
#define EBADMSG 74 /* Not a data message */
#define EOVERFLOW 75 /* Value too large for defined data type */
#define ENOTUNIQ 76 /* Name not unique on network */
#define EBADFD 77 /* File descriptor in bad state */
#define EREMCHG 78 /* Remote address changed */
#define ELIBACC 79 /* Can not access a needed shared library */
#define ELIBBAD 80 /* Accessing a corrupted shared library */
#define ELIBSCN 81 /* .lib section in a.out corrupted */
#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
#define ELIBEXEC 83 /* Cannot exec a shared library directly */
#define EILSEQ 84 /* Illegal byte sequence */
#define ERESTART 85 /* Interrupted system call should be restarted */
#define ESTRPIPE 86 /* Streams pipe error */
#define EUSERS 87 /* Too many users */
#define ENOTSOCK 88 /* Socket operation on non-socket */
#define EDESTADDRREQ 89 /* Destination address required */
#define EMSGSIZE 90 /* Message too long */
#define EPROTOTYPE 91 /* Protocol wrong type for socket */
#define ENOPROTOOPT 92 /* Protocol not available */
#define EPROTONOSUPPORT 93 /* Protocol not supported */
#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
#define EPFNOSUPPORT 96 /* Protocol family not supported */
#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
#define EADDRINUSE 98 /* Address already in use */
#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
#define ENETDOWN 100 /* Network is down */
#define ENETUNREACH 101 /* Network is unreachable */
#define ENETRESET 102 /* Network dropped connection because of reset */
#define ECONNABORTED 103 /* Software caused connection abort */
#define ECONNRESET 104 /* Connection reset by peer */
#define ENOBUFS 105 /* No buffer space available */
#define EISCONN 106 /* Transport endpoint is already connected */
#define ENOTCONN 107 /* Transport endpoint is not connected */
#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
#define ETOOMANYREFS 109 /* Too many references: cannot splice */
#define ETIMEDOUT 110 /* Connection timed out */
#define ECONNREFUSED 111 /* Connection refused */
#define EHOSTDOWN 112 /* Host is down */
#define EHOSTUNREACH 113 /* No route to host */
#define EALREADY 114 /* Operation already in progress */
#define EINPROGRESS 115 /* Operation now in progress */
#define ESTALE 116 /* Stale NFS file handle */
#define EUCLEAN 117 /* Structure needs cleaning */
#define ENOTNAM 118 /* Not a XENIX named type file */
#define ENAVAIL 119 /* No XENIX semaphores available */
#define EISNAM 120 /* Is a named type file */
#define EREMOTEIO 121 /* Remote I/O error */
#define EDQUOT 122 /* Quota exceeded */
#define ENOMEDIUM 123 /* No medium found */
#define EMEDIUMTYPE 124 /* Wrong medium type */
#define ECANCELED 125 /* Operation Canceled */
#define ENOKEY 126 /* Required key not available */
#define EKEYEXPIRED 127 /* Key has expired */
#define EKEYREVOKED 128 /* Key has been revoked */
#define EKEYREJECTED 129 /* Key was rejected by service */
/* for robust mutexes */
#define EOWNERDEAD 130 /* Owner died */
#define ENOTRECOVERABLE 131 /* State not recoverable */
#define ERFKILL 132 /* Operation not possible due to RF-kill */
#endif

View File

@ -0,0 +1,180 @@
/* esr.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/*
* Copyright (C) 2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_ESR_H
#define __ASM_ESR_H
#include <const.h>
#define ESR_ELx_EC_UNKNOWN (0x00)
#define ESR_ELx_EC_WFx (0x01)
/* Unallocated EC: 0x02 */
#define ESR_ELx_EC_CP15_32 (0x03)
#define ESR_ELx_EC_CP15_64 (0x04)
#define ESR_ELx_EC_CP14_MR (0x05)
#define ESR_ELx_EC_CP14_LS (0x06)
#define ESR_ELx_EC_FP_ASIMD (0x07)
#define ESR_ELx_EC_CP10_ID (0x08)
/* Unallocated EC: 0x09 - 0x0B */
#define ESR_ELx_EC_CP14_64 (0x0C)
/* Unallocated EC: 0x0d */
#define ESR_ELx_EC_ILL (0x0E)
/* Unallocated EC: 0x0F - 0x10 */
#define ESR_ELx_EC_SVC32 (0x11)
#define ESR_ELx_EC_HVC32 (0x12)
#define ESR_ELx_EC_SMC32 (0x13)
/* Unallocated EC: 0x14 */
#define ESR_ELx_EC_SVC64 (0x15)
#define ESR_ELx_EC_HVC64 (0x16)
#define ESR_ELx_EC_SMC64 (0x17)
#define ESR_ELx_EC_SYS64 (0x18)
#define ESR_ELx_EC_SVE (0x19)
/* Unallocated EC: 0x1A - 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f)
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
#define ESR_ELx_EC_PC_ALIGN (0x22)
/* Unallocated EC: 0x23 */
#define ESR_ELx_EC_DABT_LOW (0x24)
#define ESR_ELx_EC_DABT_CUR (0x25)
#define ESR_ELx_EC_SP_ALIGN (0x26)
/* Unallocated EC: 0x27 */
#define ESR_ELx_EC_FP_EXC32 (0x28)
/* Unallocated EC: 0x29 - 0x2B */
#define ESR_ELx_EC_FP_EXC64 (0x2C)
/* Unallocated EC: 0x2D - 0x2E */
#define ESR_ELx_EC_SERROR (0x2F)
#define ESR_ELx_EC_BREAKPT_LOW (0x30)
#define ESR_ELx_EC_BREAKPT_CUR (0x31)
#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
#define ESR_ELx_EC_WATCHPT_LOW (0x34)
#define ESR_ELx_EC_WATCHPT_CUR (0x35)
/* Unallocated EC: 0x36 - 0x37 */
#define ESR_ELx_EC_BKPT32 (0x38)
/* Unallocated EC: 0x39 */
#define ESR_ELx_EC_VECTOR32 (0x3A)
/* Unallocted EC: 0x3B */
#define ESR_ELx_EC_BRK64 (0x3C)
/* Unallocated EC: 0x3D - 0x3F */
#define ESR_ELx_EC_MAX (0x3F)
#define ESR_ELx_EC_SHIFT (26)
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
#define ESR_ELx_IL (UL(1) << 25)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR (UL(1) << 6)
/* Shared ISS field definitions for Data/Instruction aborts */
#define ESR_ELx_EA (UL(1) << 9)
#define ESR_ELx_S1PTW (UL(1) << 7)
/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
#define ESR_ELx_FSC (0x3F)
#define ESR_ELx_FSC_TYPE (0x3C)
#define ESR_ELx_FSC_EXTABT (0x10)
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV (UL(1) << 24)
#define ESR_ELx_SAS_SHIFT (22)
#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
#define ESR_ELx_SSE (UL(1) << 21)
#define ESR_ELx_SRT_SHIFT (16)
#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
#define ESR_ELx_SF (UL(1) << 15)
#define ESR_ELx_AR (UL(1) << 14)
#define ESR_ELx_CM (UL(1) << 8)
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
/* ESR value templates for specific events */
/* BRK instruction trap from AArch64 state */
#define ESR_ELx_VAL_BRK64(imm) \
((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \
((imm) & 0xffff))
/* ISS field definitions for System instruction traps */
#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22
#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT)
#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1
#define ESR_ELx_SYS64_ISS_DIR_READ 0x1
#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0
#define ESR_ELx_SYS64_ISS_RT_SHIFT 5
#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT)
#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1
#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT)
#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10
#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT)
#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14
#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT)
#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17
#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT)
#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20
#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT)
#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
ESR_ELx_SYS64_ISS_OP1_MASK | \
ESR_ELx_SYS64_ISS_OP2_MASK | \
ESR_ELx_SYS64_ISS_CRN_MASK | \
ESR_ELx_SYS64_ISS_CRM_MASK)
#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \
(((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \
((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \
((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \
((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \
((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT))
#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \
ESR_ELx_SYS64_ISS_DIR_MASK)
/*
* User space cache operations have the following sysreg encoding
* in System instructions.
* op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
*/
#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10
#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5
#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
ESR_ELx_SYS64_ISS_OP1_MASK | \
ESR_ELx_SYS64_ISS_OP2_MASK | \
ESR_ELx_SYS64_ISS_CRN_MASK | \
ESR_ELx_SYS64_ISS_DIR_MASK)
#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \
(ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
ESR_ELx_SYS64_ISS_DIR_WRITE)
#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \
ESR_ELx_SYS64_ISS_DIR_READ)
#endif /* __ASM_ESR_H */

View File

@ -0,0 +1,102 @@
/* fpsimd.h COPYRIGHT FUJITSU LIMITED 2016-2019 */
#ifndef __HEADER_ARM64_COMMON_FPSIMD_H
#define __HEADER_ARM64_COMMON_FPSIMD_H
#include <ptrace.h>
#ifndef __ASSEMBLY__
/*
* FP/SIMD storage area has:
* - FPSR and FPCR
* - 32 128-bit data registers
*
* Note that user_fpsimd forms a prefix of this structure, which is
* relied upon in the ptrace FP/SIMD accessors.
*/
/* @ref.impl arch/arm64/include/asm/fpsimd.h::struct fpsimd_state */
struct fpsimd_state {
union {
struct user_fpsimd_state user_fpsimd;
struct {
__uint128_t vregs[32];
unsigned int fpsr;
unsigned int fpcr;
/*
* For ptrace compatibility, pad to next 128-bit
* boundary here if extending this struct.
*/
};
};
/* the id of the last cpu to have restored this state */
unsigned int cpu;
};
/* need for struct process */
typedef struct fpsimd_state fp_regs_struct;
extern void thread_fpsimd_to_sve(struct thread *thread, fp_regs_struct *fp_regs);
extern void thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs);
#ifdef CONFIG_ARM64_SVE
extern size_t sve_state_size(struct thread const *thread);
extern void sve_free(struct thread *thread);
extern int sve_alloc(struct thread *thread);
extern void sve_save_state(void *state, unsigned int *pfpsr);
extern void sve_load_state(void const *state, unsigned int const *pfpsr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern int sve_set_thread_vl(unsigned long arg);
extern int sve_get_thread_vl(void);
extern int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags);
#define SVE_SET_VL(arg) sve_set_thread_vl(arg)
#define SVE_GET_VL() sve_get_thread_vl()
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
#else /* CONFIG_ARM64_SVE */
#include <ihk/debug.h>
#include <errno.h>
static void sve_save_state(void *state, unsigned int *pfpsr)
{
panic("PANIC:sve_save_state() was called CONFIG_ARM64_SVE off.\n");
}
static void sve_load_state(void const *state, unsigned int const *pfpsr, unsigned long vq_minus_1)
{
panic("PANIC:sve_load_state() was called CONFIG_ARM64_SVE off.\n");
}
static unsigned int sve_get_vl(void)
{
panic("PANIC:sve_get_vl() was called CONFIG_ARM64_SVE off.\n");
return (unsigned int)-1;
}
static int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags)
{
return -EINVAL;
}
/* for prctl syscall */
#define SVE_SET_VL(a) (-EINVAL)
#define SVE_GET_VL() (-EINVAL)
#endif /* CONFIG_ARM64_SVE */
extern void sve_setup(void);
extern void fpsimd_save_state(struct fpsimd_state *state);
extern void fpsimd_load_state(struct fpsimd_state *state);
extern void thread_fpsimd_save(struct thread *thread);
extern void thread_fpsimd_load(struct thread *thread);
extern int sve_max_vl;
extern int sve_default_vl;
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_FPSIMD_H */

View File

@ -0,0 +1,151 @@
/* fpsimdmacros.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
.macro _check_reg nr
.if (\nr) < 0 || (\nr) > 31
.error "Bad register number \nr."
.endif
.endm
.macro _check_zreg znr
.if (\znr) < 0 || (\znr) > 31
.error "Bad Scalable Vector Extension vector register number \znr."
.endif
.endm
.macro _check_preg pnr
.if (\pnr) < 0 || (\pnr) > 15
.error "Bad Scalable Vector Extension predicate register number \pnr."
.endif
.endm
.macro _check_num n, min, max
.if (\n) < (\min) || (\n) > (\max)
.error "Number \n out of range [\min,\max]"
.endif
.endm
.macro _zstrv znt, nspb, ioff=0
_check_zreg \znt
_check_reg \nspb
_check_num (\ioff), -0x100, 0xff
.inst 0xe5804000 \
| (\znt) \
| ((\nspb) << 5) \
| (((\ioff) & 7) << 10) \
| (((\ioff) & 0x1f8) << 13)
.endm
.macro _zldrv znt, nspb, ioff=0
_check_zreg \znt
_check_reg \nspb
_check_num (\ioff), -0x100, 0xff
.inst 0x85804000 \
| (\znt) \
| ((\nspb) << 5) \
| (((\ioff) & 7) << 10) \
| (((\ioff) & 0x1f8) << 13)
.endm
.macro _zstrp pnt, nspb, ioff=0
_check_preg \pnt
_check_reg \nspb
_check_num (\ioff), -0x100, 0xff
.inst 0xe5800000 \
| (\pnt) \
| ((\nspb) << 5) \
| (((\ioff) & 7) << 10) \
| (((\ioff) & 0x1f8) << 13)
.endm
.macro _zldrp pnt, nspb, ioff=0
_check_preg \pnt
_check_reg \nspb
_check_num (\ioff), -0x100, 0xff
.inst 0x85800000 \
| (\pnt) \
| ((\nspb) << 5) \
| (((\ioff) & 7) << 10) \
| (((\ioff) & 0x1f8) << 13)
.endm
.macro _zrdvl nspd, is1
_check_reg \nspd
_check_num (\is1), -0x20, 0x1f
.inst 0x04bf5000 \
| (\nspd) \
| (((\is1) & 0x3f) << 5)
.endm
.macro _zrdffr pnd
_check_preg \pnd
.inst 0x2519f000 \
| (\pnd)
.endm
.macro _zwrffr pnd
_check_preg \pnd
.inst 0x25289000 \
| ((\pnd) << 5)
.endm
.macro for from, to, insn
.if (\from) >= (\to)
\insn (\from)
.exitm
.endif
for \from, ((\from) + (\to)) / 2, \insn
for ((\from) + (\to)) / 2 + 1, \to, \insn
.endm
.macro sve_save nb, xpfpsr, ntmp
.macro savez n
_zstrv \n, \nb, (\n) - 34
.endm
.macro savep n
_zstrp \n, \nb, (\n) - 16
.endm
for 0, 31, savez
for 0, 15, savep
_zrdffr 0
_zstrp 0, \nb
_zldrp 0, \nb, -16
mrs x\ntmp, fpsr
str w\ntmp, [\xpfpsr]
mrs x\ntmp, fpcr
str w\ntmp, [\xpfpsr, #4]
.purgem savez
.purgem savep
.endm
.macro sve_load nb, xpfpsr, xvqminus1 ntmp
mrs_s x\ntmp, SYS_ZCR_EL1
bic x\ntmp, x\ntmp, ZCR_EL1_LEN_MASK
orr x\ntmp, x\ntmp, \xvqminus1
msr_s SYS_ZCR_EL1, x\ntmp // self-synchronising
.macro loadz n
_zldrv \n, \nb, (\n) - 34
.endm
.macro loadp n
_zldrp \n, \nb, (\n) - 16
.endm
for 0, 31, loadz
_zldrp 0, \nb
_zwrffr 0
for 0, 15, loadp
ldr w\ntmp, [\xpfpsr]
msr fpsr, x\ntmp
ldr w\ntmp, [\xpfpsr, #4]
msr fpcr, x\ntmp
.purgem loadz
.purgem loadp
.endm

View File

@ -0,0 +1,92 @@
/* hw_breakpoint.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_COMMON_HW_BREAKPOINT_H
#define __HEADER_ARM64_COMMON_HW_BREAKPOINT_H
#include <ihk/types.h>
int hw_breakpoint_slots(int type);
unsigned long read_wb_reg(int reg, int n);
void write_wb_reg(int reg, int n, unsigned long val);
void hw_breakpoint_reset(void);
void arch_hw_breakpoint_init(void);
struct user_hwdebug_state;
int arch_validate_hwbkpt_settings(long note_type, struct user_hwdebug_state *hws, size_t len);
extern int core_num_brps;
extern int core_num_wrps;
/* @ref.impl include/uapi/linux/hw_breakpoint.h::HW_BREAKPOINT_LEN_n, HW_BREAKPOINT_xxx, bp_type_idx */
enum {
HW_BREAKPOINT_LEN_1 = 1,
HW_BREAKPOINT_LEN_2 = 2,
HW_BREAKPOINT_LEN_4 = 4,
HW_BREAKPOINT_LEN_8 = 8,
};
enum {
HW_BREAKPOINT_EMPTY = 0,
HW_BREAKPOINT_R = 1,
HW_BREAKPOINT_W = 2,
HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W,
HW_BREAKPOINT_X = 4,
HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
};
enum bp_type_idx {
TYPE_INST = 0,
TYPE_DATA = 1,
TYPE_MAX
};
/* Breakpoint */
#define ARM_BREAKPOINT_EXECUTE 0
/* Watchpoints */
#define ARM_BREAKPOINT_LOAD 1
#define ARM_BREAKPOINT_STORE 2
#define AARCH64_ESR_ACCESS_MASK (1 << 6)
/* Privilege Levels */
#define AARCH64_BREAKPOINT_EL1 1
#define AARCH64_BREAKPOINT_EL0 2
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3
#define ARM_BREAKPOINT_LEN_4 0xf
#define ARM_BREAKPOINT_LEN_8 0xff
/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::ARM_MAX_[BRP|WRP] */
/*
* Limits.
* Changing these will require modifications to the register accessors.
*/
#define ARM_MAX_BRP 16
#define ARM_MAX_WRP 16
/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::AARCH64_DBG_REG_xxx */
/* Virtual debug register bases. */
#define AARCH64_DBG_REG_BVR 0
#define AARCH64_DBG_REG_BCR (AARCH64_DBG_REG_BVR + ARM_MAX_BRP)
#define AARCH64_DBG_REG_WVR (AARCH64_DBG_REG_BCR + ARM_MAX_BRP)
#define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP)
/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::AARCH64_DBG_REG_NAME_xxx */
/* Debug register names. */
#define AARCH64_DBG_REG_NAME_BVR "bvr"
#define AARCH64_DBG_REG_NAME_BCR "bcr"
#define AARCH64_DBG_REG_NAME_WVR "wvr"
#define AARCH64_DBG_REG_NAME_WCR "wcr"
/* @ref.impl arch/arm64/include/asm/hw_breakpoint.h::AARCH64_DBG_[READ|WRITE] */
/* Accessor macros for the debug registers. */
#define AARCH64_DBG_READ(N, REG, VAL) do {\
asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\
} while (0)
#define AARCH64_DBG_WRITE(N, REG, VAL) do {\
asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\
} while (0)
#endif /* !__HEADER_ARM64_COMMON_HW_BREAKPOINT_H */

View File

@ -0,0 +1,26 @@
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifndef _UAPI__ASM_HWCAP_H
#define _UAPI__ASM_HWCAP_H
/*
* HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
*/
#define HWCAP_FP (1 << 0)
#define HWCAP_ASIMD (1 << 1)
#define HWCAP_EVTSTRM (1 << 2)
#define HWCAP_AES (1 << 3)
#define HWCAP_PMULL (1 << 4)
#define HWCAP_SHA1 (1 << 5)
#define HWCAP_SHA2 (1 << 6)
#define HWCAP_CRC32 (1 << 7)
#define HWCAP_ATOMICS (1 << 8)
#define HWCAP_FPHP (1 << 9)
#define HWCAP_ASIMDHP (1 << 10)
#define HWCAP_CPUID (1 << 11)
#define HWCAP_ASIMDRDM (1 << 12)
#define HWCAP_SVE (1 << 13)
unsigned long arch_get_hwcap(void);
extern unsigned long elf_hwcap;
#endif /* _UAPI__ASM_HWCAP_H */

View File

@ -0,0 +1,365 @@
/* atomic.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_IHK_ATOMIC_H
#define __HEADER_ARM64_IHK_ATOMIC_H
#include <arch/cpu.h>
/***********************************************************************
* ihk_atomic_t
*/
typedef struct {
int counter;
} ihk_atomic_t;
#define IHK_ATOMIC_INIT(i) { (i) }
static inline int ihk_atomic_read(const ihk_atomic_t *v)
{
return (*(volatile int *)&(v)->counter);
}
static inline void ihk_atomic_set(ihk_atomic_t *v, int i)
{
v->counter = i;
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_add (atomic_##op) */
static inline void ihk_atomic_add(int i, ihk_atomic_t *v)
{
unsigned long tmp;
int result;
asm volatile("// atomic_add\n"
"1: ldxr %w0, %2\n"
" add %w0, %w0, %w3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
: "Ir" (i));
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_sub (atomic_##op) */
static inline void ihk_atomic_sub(int i, ihk_atomic_t *v)
{
unsigned long tmp;
int result;
asm volatile("// atomic_sub\n"
"1: ldxr %w0, %2\n"
" sub %w0, %w0, %w3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
: "Ir" (i));
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_inc */
#define ihk_atomic_inc(v) ihk_atomic_add(1, v)
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_dec */
#define ihk_atomic_dec(v) ihk_atomic_sub(1, v)
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_add_return (atomic_##op##_return) */
static inline int ihk_atomic_add_return(int i, ihk_atomic_t *v)
{
unsigned long tmp;
int result;
asm volatile("// atomic_add_return\n"
"1: ldxr %w0, %2\n"
" add %w0, %w0, %w3\n"
" stlxr %w1, %w0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
: "Ir" (i)
: "memory");
smp_mb();
return result;
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_sub_return (atomic_##op##_return) */
static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
{
unsigned long tmp;
int result;
asm volatile("// atomic_sub_return\n"
"1: ldxr %w0, %2\n"
" sub %w0, %w0, %w3\n"
" stlxr %w1, %w0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
: "Ir" (i)
: "memory");
smp_mb();
return result;
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_inc_and_test */
#define ihk_atomic_inc_and_test(v) (ihk_atomic_add_return(1, v) == 0)
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_dec_and_test */
#define ihk_atomic_dec_and_test(v) (ihk_atomic_sub_return(1, v) == 0)
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_inc_return */
#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v))
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic_dec_return */
#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v))
/***********************************************************************
* ihk_atomic64_t
*/
typedef struct {
long counter64;
} ihk_atomic64_t;
#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) }
static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
{
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, long i)
{
v->counter64 = i;
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic64_add (atomic64_##op) */
static inline void ihk_atomic64_add(long i, ihk_atomic64_t *v)
{
long result;
unsigned long tmp;
asm volatile("// atomic64_add\n"
"1: ldxr %0, %2\n"
" add %0, %0, %3\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter64)
: "Ir" (i));
}
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic64_inc */
#define ihk_atomic64_inc(v) ihk_atomic64_add(1LL, (v))
#define ihk_atomic64_cmpxchg(p, o, n) cmpxchg(&((p)->counter64), o, n)
/***********************************************************************
* others
*/
/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__xchg */
static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
{
unsigned long ret = 0, tmp;
switch (size) {
case 1:
asm volatile("// __xchg1\n"
"1: ldxrb %w0, %2\n"
" stlxrb %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned char *)ptr)
: "r" (x)
: "memory");
break;
case 2:
asm volatile("// __xchg2\n"
"1: ldxrh %w0, %2\n"
" stlxrh %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned short *)ptr)
: "r" (x)
: "memory");
break;
case 4:
asm volatile("// __xchg4\n"
"1: ldxr %w0, %2\n"
" stlxr %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned int *)ptr)
: "r" (x)
: "memory");
break;
case 8:
asm volatile("// __xchg8\n"
"1: ldxr %0, %2\n"
" stlxr %w1, %3, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr)
: "r" (x)
: "memory");
break;
/*
default:
BUILD_BUG();
*/
}
smp_mb();
return ret;
}
/* @ref.impl arch/arm64/include/asm/cmpxchg.h::xchg */
#define xchg(ptr,x) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \
})
#define xchg4(ptr, x) xchg(ptr,x)
#define xchg8(ptr, x) xchg(ptr,x)
/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__cmpxchg */
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
unsigned long oldval = 0, res;
switch (size) {
case 1:
do {
asm volatile("// __cmpxchg1\n"
" ldxrb %w1, %2\n"
" mov %w0, #0\n"
" cmp %w1, %w3\n"
" b.ne 1f\n"
" stxrb %w0, %w4, %2\n"
"1:\n"
: "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned char *)ptr)
: "Ir" (old), "r" (new) : "cc");
} while (res);
break;
case 2:
do {
asm volatile("// __cmpxchg2\n"
" ldxrh %w1, %2\n"
" mov %w0, #0\n"
" cmp %w1, %w3\n"
" b.ne 1f\n"
" stxrh %w0, %w4, %2\n"
"1:\n"
: "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned short *)ptr)
: "Ir" (old), "r" (new)
: "cc");
} while (res);
break;
case 4:
do {
asm volatile("// __cmpxchg4\n"
" ldxr %w1, %2\n"
" mov %w0, #0\n"
" cmp %w1, %w3\n"
" b.ne 1f\n"
" stxr %w0, %w4, %2\n"
"1:\n"
: "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned int *)ptr)
: "Ir" (old), "r" (new)
: "cc");
} while (res);
break;
case 8:
do {
asm volatile("// __cmpxchg8\n"
" ldxr %1, %2\n"
" mov %w0, #0\n"
" cmp %1, %3\n"
" b.ne 1f\n"
" stxr %w0, %4, %2\n"
"1:\n"
: "=&r" (res), "=&r" (oldval), "+Q" (*(unsigned long *)ptr)
: "Ir" (old), "r" (new)
: "cc");
} while (res);
break;
/*
default:
BUILD_BUG();
*/
}
return oldval;
}
/* @ref.impl arch/arm64/include/asm/cmpxchg.h::__cmpxchg_mb */
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
unsigned long ret;
smp_mb();
ret = __cmpxchg(ptr, old, new, size);
smp_mb();
return ret;
}
/* @ref.impl arch/arm64/include/asm/cmpxchg.h::cmpxchg */
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})
#define atomic_cmpxchg4(ptr, o, n) cmpxchg(ptr,o,n)
#define atomic_cmpxchg8(ptr, o, n) cmpxchg(ptr,o,n)
static inline void ihk_atomic_add_long(long i, long *v)
{
long result;
unsigned long tmp;
asm volatile("// atomic64_add\n"
"1: ldxr %0, %2\n"
" add %0, %0, %3\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (*v)
: "Ir" (i));
}
static inline void ihk_atomic_add_ulong(long i, unsigned long *v)
{
long result;
unsigned long tmp;
asm volatile("// atomic64_add\n"
"1: ldxr %0, %2\n"
" add %0, %0, %3\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (*v)
: "Ir" (i));
}
static inline unsigned long ihk_atomic_add_long_return(long i, long *v)
{
unsigned long result;
unsigned long tmp;
asm volatile("// atomic64_add_return\n"
"1: ldxr %0, %2\n"
" add %0, %0, %3\n"
" stlxr %w1, %0, %2\n"
" cbnz %w1, 1b"
: "=&r" (result), "=&r" (tmp), "+Q" (*v)
: "Ir" (i)
: "memory");
smp_mb();
return result;
}
#endif /* !__HEADER_ARM64_COMMON_IHK_ATOMIC_H */

View File

@ -0,0 +1,83 @@
/* context.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_IHK_CONTEXT_H
#define __HEADER_ARM64_IHK_CONTEXT_H
#include <registers.h>
struct thread_info;
typedef struct {
struct thread_info *thread;
} ihk_mc_kernel_context_t;
struct user_pt_regs {
unsigned long regs[31];
unsigned long sp;
unsigned long pc;
unsigned long pstate;
};
struct pt_regs {
union {
struct user_pt_regs user_regs;
struct {
unsigned long regs[31];
unsigned long sp;
unsigned long pc;
unsigned long pstate;
};
};
unsigned long orig_x0;
unsigned long orig_pc;
unsigned long syscallno;
unsigned long __padding;
};
typedef struct pt_regs ihk_mc_user_context_t;
/* @ref.impl arch/arm64/include/asm/ptrace.h */
#define GET_IP(regs) ((unsigned long)(regs)->pc)
#define SET_IP(regs, value) ((regs)->pc = ((uint64_t) (value)))
/* @ref.impl arch/arm64/include/asm/ptrace.h */
/* AArch32 CPSR bits */
#define COMPAT_PSR_MODE_MASK 0x0000001f
/* @ref.impl include/asm-generic/ptrace.h */
static inline unsigned long instruction_pointer(struct pt_regs *regs)
{
return GET_IP(regs);
}
/* @ref.impl include/asm-generic/ptrace.h */
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
SET_IP(regs, val);
}
/* @ref.impl arch/arm64/include/asm/ptrace.h */
/*
* Write a register given an architectural register index r.
* This handles the common case where 31 means XZR, not SP.
*/
static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
unsigned long val)
{
if (r != 31)
regs->regs[r] = val;
}
/* temp */
#define ihk_mc_syscall_arg0(uc) ((uc)->regs[0])
#define ihk_mc_syscall_arg1(uc) ((uc)->regs[1])
#define ihk_mc_syscall_arg2(uc) ((uc)->regs[2])
#define ihk_mc_syscall_arg3(uc) ((uc)->regs[3])
#define ihk_mc_syscall_arg4(uc) ((uc)->regs[4])
#define ihk_mc_syscall_arg5(uc) ((uc)->regs[5])
#define ihk_mc_syscall_ret(uc) ((uc)->regs[0])
#define ihk_mc_syscall_number(uc) ((uc)->regs[8])
#define ihk_mc_syscall_pc(uc) ((uc)->pc)
#define ihk_mc_syscall_sp(uc) ((uc)->sp)
#endif /* !__HEADER_ARM64_IHK_CONTEXT_H */

View File

@ -0,0 +1,14 @@
/* ikc.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_IHK_IKC_H
#define __HEADER_ARM64_IHK_IKC_H
#include <ikc/ihk.h>
#define IKC_PORT_IKC2MCKERNEL 501
#define IKC_PORT_IKC2LINUX 503
/* manycore side */
int ihk_mc_ikc_init_first(struct ihk_ikc_channel_desc *,
ihk_ikc_ph_t handler);
#endif /* !__HEADER_ARM64_IHK_IKC_H */

View File

@ -0,0 +1,33 @@
/* types.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_IHK_TYPES_H
#define __HEADER_ARM64_IHK_TYPES_H
#ifndef __ASSEMBLY__
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef signed long long int64_t;
typedef int64_t ptrdiff_t;
typedef int64_t intptr_t;
typedef uint64_t uintptr_t;
typedef uint64_t size_t;
typedef int64_t ssize_t;
typedef int64_t off_t;
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
#endif /* __ASSEMBLY__ */
#define NULL ((void *)0)
#endif /* !__HEADER_ARM64_IHK_TYPES_H */

View File

@ -0,0 +1,103 @@
/* imp-sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __ASM_IMP_SYSREG_H
#define __ASM_IMP_SYSREG_H
#ifndef __ASSEMBLY__
/* register sys_reg list */
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1 sys_reg(3, 0, 11, 2, 0)
#define IMP_SCCR_CTRL_EL1 sys_reg(3, 0, 11, 8, 0)
#define IMP_SCCR_ASSIGN_EL1 sys_reg(3, 0, 11, 8, 1)
#define IMP_SCCR_SET0_L2_EL1 sys_reg(3, 0, 15, 8, 2)
#define IMP_SCCR_SET1_L2_EL1 sys_reg(3, 0, 15, 8, 3)
#define IMP_SCCR_L1_EL0 sys_reg(3, 3, 11, 8, 2)
#define IMP_PF_CTRL_EL1 sys_reg(3, 0, 11, 4, 0)
#define IMP_PF_STREAM_DETECT_CTRL_EL0 sys_reg(3, 3, 11, 4, 0)
#define IMP_PF_INJECTION_CTRL0_EL0 sys_reg(3, 3, 11, 6, 0)
#define IMP_PF_INJECTION_CTRL1_EL0 sys_reg(3, 3, 11, 6, 1)
#define IMP_PF_INJECTION_CTRL2_EL0 sys_reg(3, 3, 11, 6, 2)
#define IMP_PF_INJECTION_CTRL3_EL0 sys_reg(3, 3, 11, 6, 3)
#define IMP_PF_INJECTION_CTRL4_EL0 sys_reg(3, 3, 11, 6, 4)
#define IMP_PF_INJECTION_CTRL5_EL0 sys_reg(3, 3, 11, 6, 5)
#define IMP_PF_INJECTION_CTRL6_EL0 sys_reg(3, 3, 11, 6, 6)
#define IMP_PF_INJECTION_CTRL7_EL0 sys_reg(3, 3, 11, 6, 7)
#define IMP_PF_INJECTION_DISTANCE0_EL0 sys_reg(3, 3, 11, 7, 0)
#define IMP_PF_INJECTION_DISTANCE1_EL0 sys_reg(3, 3, 11, 7, 1)
#define IMP_PF_INJECTION_DISTANCE2_EL0 sys_reg(3, 3, 11, 7, 2)
#define IMP_PF_INJECTION_DISTANCE3_EL0 sys_reg(3, 3, 11, 7, 3)
#define IMP_PF_INJECTION_DISTANCE4_EL0 sys_reg(3, 3, 11, 7, 4)
#define IMP_PF_INJECTION_DISTANCE5_EL0 sys_reg(3, 3, 11, 7, 5)
#define IMP_PF_INJECTION_DISTANCE6_EL0 sys_reg(3, 3, 11, 7, 6)
#define IMP_PF_INJECTION_DISTANCE7_EL0 sys_reg(3, 3, 11, 7, 7)
#define IMP_PF_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0)
#define IMP_BARRIER_CTRL_EL1 sys_reg(3, 0, 11, 12, 0)
#define IMP_BARRIER_BST_BIT_EL1 sys_reg(3, 0, 11, 12, 4)
#define IMP_BARRIER_INIT_SYNC_BB0_EL1 sys_reg(3, 0, 15, 13, 0)
#define IMP_BARRIER_INIT_SYNC_BB1_EL1 sys_reg(3, 0, 15, 13, 1)
#define IMP_BARRIER_INIT_SYNC_BB2_EL1 sys_reg(3, 0, 15, 13, 2)
#define IMP_BARRIER_INIT_SYNC_BB3_EL1 sys_reg(3, 0, 15, 13, 3)
#define IMP_BARRIER_INIT_SYNC_BB4_EL1 sys_reg(3, 0, 15, 13, 4)
#define IMP_BARRIER_INIT_SYNC_BB5_EL1 sys_reg(3, 0, 15, 13, 5)
#define IMP_BARRIER_ASSIGN_SYNC_W0_EL1 sys_reg(3, 0, 15, 15, 0)
#define IMP_BARRIER_ASSIGN_SYNC_W1_EL1 sys_reg(3, 0, 15, 15, 1)
#define IMP_BARRIER_ASSIGN_SYNC_W2_EL1 sys_reg(3, 0, 15, 15, 2)
#define IMP_BARRIER_ASSIGN_SYNC_W3_EL1 sys_reg(3, 0, 15, 15, 3)
#define IMP_SOC_STANDBY_CTRL_EL1 sys_reg(3, 0, 11, 0, 0)
#define IMP_FJ_CORE_UARCH_CTRL_EL2 sys_reg(3, 4, 11, 0, 4)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1 sys_reg(3, 0, 11, 0, 5)
/* macros */
#define PWR_REG_MASK(reg, feild) (((UL(1) << ((reg##_##feild##_MSB) - (reg##_##feild##_LSB) + 1)) - 1) << (reg##_##feild##_LSB))
/* IMP_FJ_TAG_ADDRESS_CTRL_EL1 */
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT (0)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT (8)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT (9)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT)
/* IMP_SCCR_CTRL_EL1 */
#define IMP_SCCR_CTRL_EL1_EL1AE_SHIFT (63)
#define IMP_SCCR_CTRL_EL1_EL1AE_MASK (1UL << IMP_SCCR_CTRL_EL1_EL1AE_SHIFT)
/* IMP_SCCR_SET0_L2_EL1 */
#define IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT (0)
/* IMP_PF_CTRL_EL1 */
#define IMP_PF_CTRL_EL1_EL1AE_ENABLE (1UL << 63)
#define IMP_PF_CTRL_EL1_EL0AE_ENABLE (1UL << 62)
/* IMP_BARRIER_CTRL_EL1 */
#define IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE (1UL << 63)
#define IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE (1UL << 62)
/* IMP_SOC_STANDBY_CTRL_EL1 */
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_MSB 2
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_LSB 2
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_MSB 1
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_LSB 1
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_MSB 0
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_LSB 0
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, ECO_MODE)
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, MODE_CHANGE)
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, RETENTION)
/* IMP_FJ_CORE_UARCH_RESTRECTION_EL1 */
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_MSB 33
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_LSB 33
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_MSB 9
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_LSB 8
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_MSB 0
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_LSB 0
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, FL_RESTRICT_TRANS)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, ISSUE_RESTRICTION)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, EX_RESTRICTION)
void scdrv_registers_init(void);
void hpc_registers_init(void);
void vhbm_barrier_registers_init(void);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_IMP_SYSREG_H */

View File

@ -0,0 +1,99 @@
/* io.h COPYRIGHT FUJITSU LIMITED 2015 */
/*
* Based on arch/arm/include/asm/io.h
*
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_IO_H
#define __ASM_IO_H
#include <ihk/types.h>
#ifdef __KERNEL__
/*
* Generic IO read/write. These perform native-endian accesses.
*/
static inline void __raw_writeb(uint8_t val, volatile void *addr)
{
asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr));
}
static inline void __raw_writew(uint16_t val, volatile void *addr)
{
asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr));
}
static inline void __raw_writel(uint32_t val, volatile void *addr)
{
asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr));
}
static inline void __raw_writeq(uint64_t val, volatile void *addr)
{
asm volatile("str %0, [%1]" : : "r" (val), "r" (addr));
}
static inline uint8_t __raw_readb(const volatile void *addr)
{
uint8_t val;
asm volatile("ldarb %w0, [%1]"
: "=r" (val) : "r" (addr));
return val;
}
static inline uint16_t __raw_readw(const volatile void *addr)
{
uint16_t val;
asm volatile("ldarh %w0, [%1]"
: "=r" (val) : "r" (addr));
return val;
}
static inline uint32_t __raw_readl(const volatile void *addr)
{
uint32_t val;
asm volatile("ldar %w0, [%1]"
: "=r" (val) : "r" (addr));
return val;
}
static inline uint64_t __raw_readq(const volatile void *addr)
{
uint64_t val;
asm volatile("ldar %0, [%1]"
: "=r" (val) : "r" (addr));
return val;
}
/*
* Relaxed I/O memory access primitives. These follow the Device memory
* ordering rules but do not guarantee any ordering relative to Normal memory
* accesses.
*/
#define readb_relaxed(c) ({ uint8_t __v = (uint8_t)__raw_readb(c); __v; })
#define readw_relaxed(c) ({ uint16_t __v = (uint16_t)__raw_readw(c); __v; })
#define readl_relaxed(c) ({ uint32_t __v = (uint32_t)__raw_readl(c); __v; })
#define readq_relaxed(c) ({ uint64_t __v = (uint64_t)__raw_readq(c); __v; })
#define writeb_relaxed(v,c) ((void)__raw_writeb((uint8_t)(v),(c)))
#define writew_relaxed(v,c) ((void)__raw_writew((uint16_t)(v),(c)))
#define writel_relaxed(v,c) ((void)__raw_writel((uint32_t)(v),(c)))
#define writeq_relaxed(v,c) ((void)__raw_writeq((uint64_t)(v),(c)))
#endif /* __KERNEL__ */
#endif /* __ASM_IO_H */

View File

@ -0,0 +1,46 @@
/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_IRQ_H
#define __HEADER_ARM64_IRQ_H
#include <ihk/debug.h>
#include <ihk/context.h>
#include <sysreg.h>
#include <cputype.h>
/* use SGI interrupt number */
#define INTRID_CPU_NOTIFY 0
#define INTRID_IKC 1
#define INTRID_QUERY_FREE_MEM 2
#define INTRID_CPU_STOP 3
#define INTRID_TLB_FLUSH 4
#define INTRID_STACK_TRACE 5
#define INTRID_MULTI_INTR 6
#define INTRID_MULTI_NMI 7
/* use PPI interrupt number */
#define INTRID_PERF_OVF 23
#define INTRID_HYP_PHYS_TIMER 26 /* cnthp */
#define INTRID_VIRT_TIMER 27 /* cntv */
#define INTRID_HYP_VIRT_TIMER 28 /* cnthv */
#define INTRID_PHYS_TIMER 30 /* cntp */
/* Functions for GICv2 */
extern void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size);
extern void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size);
extern void gic_enable_gicv2(void);
extern void arm64_issue_ipi_gicv2(unsigned int cpuid, unsigned int vector);
extern void arm64_issue_host_ipi_gicv2(uint32_t cpuid, uint32_t vector);
extern void handle_interrupt_gicv2(struct pt_regs *regs);
/* Functions for GICv3 */
extern void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size);
extern void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size);
extern void gic_enable_gicv3(void);
extern void arm64_issue_ipi_gicv3(unsigned int cpuid, unsigned int vector);
extern void arm64_issue_host_ipi_gicv3(uint32_t cpuid, uint32_t vector);
extern void handle_interrupt_gicv3(struct pt_regs *regs);
void handle_IPI(unsigned int vector, struct pt_regs *regs);
#endif /* __HEADER_ARM64_IRQ_H */

View File

@ -0,0 +1,31 @@
/* irqflags.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_IRQFLAGS_H
#define __HEADER_ARM64_COMMON_IRQFLAGS_H
#include <ptrace.h>
/*
* save and restore debug state
*/
static inline unsigned long local_dbg_save(void)
{
unsigned long flags;
asm volatile(
"mrs %0, daif // local_dbg_save\n"
"msr daifset, #8"
: "=r" (flags)
:
: "memory");
return flags;
}
static inline void local_dbg_restore(unsigned long flags)
{
asm volatile(
"msr daif, %0 // local_dbg_restore"
:
: "r" (flags)
: "memory");
}
#endif /* !__HEADER_ARM64_COMMON_IRQFLAGS_H */

View File

@ -0,0 +1,25 @@
/* linkage.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef __HEADER_ARM64_COMMON_LINKAGE_H
#define __HEADER_ARM64_COMMON_LINKAGE_H
#include <arch-memory.h>
#include <compiler.h>
#define ASM_NL ;
#define __ALIGN .align 4
#define __ALIGN_STR ".align 4"
#define ENTRY(name) \
.globl name ASM_NL \
__ALIGN ASM_NL \
name:
#define END(name) \
.size name, .-name
#define ENDPROC(name) \
.type name, @function ASM_NL \
END(name)
#endif /* !__HEADER_ARM64_COMMON_LINKAGE_H */

View File

@ -0,0 +1,22 @@
/* mmu_context.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_MMU_CONTEXT_H
#define __HEADER_ARM64_COMMON_MMU_CONTEXT_H
#include <pgtable.h>
#include <memory.h>
/*
* Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
*/
static inline void cpu_set_reserved_ttbr0(void)
{
unsigned long ttbr = virt_to_phys(empty_zero_page);
asm(
" msr ttbr0_el1, %0 // set TTBR0\n"
" isb"
:
: "r" (ttbr));
}
#endif /* !__HEADER_ARM64_COMMON_MMU_CONTEXT_H */

View File

@ -0,0 +1,198 @@
/* pgtable-hwdef.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_PGTABLE_HWDEF_H
#define __HEADER_ARM64_COMMON_PGTABLE_HWDEF_H
#ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H
# error arch-memory.h
#endif
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
*/
#if CONFIG_ARM64_PGTABLE_LEVELS > 2
# define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3)
# define PMD_SIZE (1UL << PMD_SHIFT)
# define PMD_MASK (~(PMD_SIZE-1))
# define PTRS_PER_PMD PTRS_PER_PTE
#endif
/*
* PUD_SHIFT determines the size a level 1 page table entry can map.
*/
#if CONFIG_ARM64_PGTABLE_LEVELS > 3
# define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3)
# define PUD_SIZE (1UL << PUD_SHIFT)
# define PUD_MASK (~(PUD_SIZE-1))
# define PTRS_PER_PUD PTRS_PER_PTE
#endif
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
* (depending on the configuration, this level can be 0, 1 or 2).
*/
#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
/*
* Section address mask and size definitions.
*/
#define SECTION_SHIFT PMD_SHIFT
#define SECTION_SIZE (UL(1) << SECTION_SHIFT)
#define SECTION_MASK (~(SECTION_SIZE-1))
/*
* Level 2 descriptor (PMD).
*/
#define PMD_TYPE_MASK (UL(3) << 0)
#define PMD_TYPE_FAULT (UL(0) << 0)
#define PMD_TYPE_TABLE (UL(3) << 0)
#define PMD_TYPE_SECT (UL(1) << 0)
#define PMD_TABLE_BIT (UL(1) << 1)
/*
* Table (D_Block)
*/
#define PMD_TBL_PXNT (UL(1) << 59)
#define PMD_TBL_UXNT (UL(1) << 60)
#define PMD_TBL_APT_USER (UL(1) << 61) /* 0:Access at EL0 permitted, 1:Access at EL0 not permitted */
#define PMD_TBL_APT_RDONLY (UL(2) << 61) /* 0:read write(EL0-3) 0:read only(EL0-3) */
#define PMD_TBL_NST (UL(1) << 63) /* 0:secure, 1:non-secure */
/*
* Section (D_Page)
*/
#define PMD_SECT_VALID (UL(1) << 0)
#define PMD_SECT_PROT_NONE (UL(1) << 58)
#define PMD_SECT_USER (UL(1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (UL(1) << 7) /* AP[2] */
#define PMD_SECT_S (UL(3) << 8)
#define PMD_SECT_AF (UL(1) << 10)
#define PMD_SECT_NG (UL(1) << 11)
#define PMD_SECT_CONT (UL(1) << 52)
#define PMD_SECT_PXN (UL(1) << 53)
#define PMD_SECT_UXN (UL(1) << 54)
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
*/
#define PMD_ATTRINDX(t) (UL(t) << 2)
#define PMD_ATTRINDX_MASK (UL(7) << 2)
/*
* Level 3 descriptor (PTE).
*/
#define PTE_TYPE_MASK (UL(3) << 0)
#define PTE_TYPE_FAULT (UL(0) << 0)
#define PTE_TYPE_PAGE (UL(3) << 0)
#define PTE_TABLE_BIT (UL(1) << 1)
#define PTE_USER (UL(1) << 6) /* AP[1] */
#define PTE_RDONLY (UL(1) << 7) /* AP[2] */
#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF (UL(1) << 10) /* Access Flag */
#define PTE_NG (UL(1) << 11) /* nG */
#define PTE_CONT (UL(1) << 52) /* Contiguous range */
#define PTE_PXN (UL(1) << 53) /* Privileged XN */
#define PTE_UXN (UL(1) << 54) /* User XN */
/* Software defined PTE bits definition.*/
#define PTE_VALID (UL(1) << 0)
#define PTE_FILE (UL(1) << 2) /* only when !pte_present() */
#define PTE_DIRTY (UL(1) << 55)
#define PTE_SPECIAL (UL(1) << 56)
#define PTE_WRITE (UL(1) << 57)
#define PTE_PROT_NONE (UL(1) << 58) /* only when !PTE_VALID */
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
*/
#define PTE_ATTRINDX(t) (UL(t) << 2)
#define PTE_ATTRINDX_MASK (UL(7) << 2)
/*
* Highest possible physical address supported.
*/
#define PHYS_MASK_SHIFT (48)
#define PHYS_MASK (((UL(1) << PHYS_MASK_SHIFT) - 1) & PAGE_MASK)
/*
* TCR flags.
*/
#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24))
#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24))
#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24))
#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24))
#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24))
#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26))
#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26))
#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26))
#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26))
#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26))
#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28))
#define TCR_TG0_4K (UL(0) << 14)
#define TCR_TG0_64K (UL(1) << 14)
#define TCR_TG0_16K (UL(2) << 14)
#define TCR_TG1_16K (UL(1) << 30)
#define TCR_TG1_4K (UL(2) << 30)
#define TCR_TG1_64K (UL(3) << 30)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
/*
* Memory types available.
*/
#define MT_DEVICE_nGnRnE 0
#define MT_DEVICE_nGnRE 1
#define MT_DEVICE_GRE 2
#define MT_NORMAL_NC 3
#define MT_NORMAL 4
/*
* page table entry attribute set.
*/
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define PAGE_KERNEL (_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC (_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_NONE (((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
#define PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_COPY (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_COPY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY
#define __P100 PAGE_READONLY_EXEC
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_COPY_EXEC
#define __P111 PAGE_COPY_EXEC
#define __S000 PAGE_NONE
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
#define __S100 PAGE_READONLY_EXEC
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
#endif /* !__HEADER_ARM64_COMMON_PGTABLE_HWDEF_H */

View File

@ -0,0 +1,7 @@
/* pgtable.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_PGTABLE_H
#define __HEADER_ARM64_COMMON_PGTABLE_H
extern char empty_zero_page[];
#endif /* !__HEADER_ARM64_COMMON_PGTABLE_H */

View File

@ -0,0 +1,17 @@
/* prctl.h COPYRIGHT FUJITSU LIMITED 2017-2019 */
#ifndef __HEADER_ARM64_COMMON_PRCTL_H
#define __HEADER_ARM64_COMMON_PRCTL_H
#define PR_SET_THP_DISABLE 41
#define PR_GET_THP_DISABLE 42
/* arm64 Scalable Vector Extension controls */
/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
#define PR_SVE_SET_VL 50 /* set task vector length */
# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
#define PR_SVE_GET_VL 51 /* get task vector length */
/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
# define PR_SVE_VL_LEN_MASK 0xffff
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
#endif /* !__HEADER_ARM64_COMMON_PRCTL_H */

View File

@ -0,0 +1,68 @@
/* psci.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* @ref.impl include/uapi/linux/psci.h */
/*
* ARM Power State and Coordination Interface (PSCI) header
*
* This header holds common PSCI defines and macros shared
* by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
*
* Copyright (C) 2014 Linaro Ltd.
* Author: Anup Patel <anup.patel@linaro.org>
*/
#ifndef __HEADER_ARM64_PSCI_H
#define __HEADER_ARM64_PSCI_H
/*
* PSCI v0.1 interface
*
* The PSCI v0.1 function numbers are implementation defined.
*
* Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
* INVALID_PARAMS, and DENIED defined below are applicable
* to PSCI v0.1.
*/
/* PSCI v0.2 interface */
#define PSCI_0_2_FN_BASE 0x84000000
#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n))
#define PSCI_0_2_64BIT 0x40000000
#define PSCI_0_2_FN64_BASE (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n))
#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0)
#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2)
#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3)
#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4)
/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
#define PSCI_0_2_POWER_STATE_ID_SHIFT 0
#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16
#define PSCI_0_2_POWER_STATE_TYPE_MASK \
(0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24
#define PSCI_0_2_POWER_STATE_AFFL_MASK \
(0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
/* PSCI version decoding (independent of PSCI version) */
#define PSCI_VERSION_MAJOR_SHIFT 16
#define PSCI_VERSION_MINOR_MASK \
((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK
#define PSCI_VERSION_MAJOR(ver) \
(((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
#define PSCI_VERSION_MINOR(ver) \
((ver) & PSCI_VERSION_MINOR_MASK)
/* PSCI return values (inclusive of all PSCI versions) */
#define PSCI_RET_SUCCESS 0
#define PSCI_RET_NOT_SUPPORTED -1
#define PSCI_RET_INVALID_PARAMS -2
#define PSCI_RET_DENIED -3
int psci_init(void);
int psci_cpu_off(void);
int cpu_psci_cpu_boot(unsigned int cpu, unsigned long pc);
#endif /* __HEADER_ARM64_PSCI_H */

View File

@ -0,0 +1,268 @@
/* ptrace.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_COMMON_PTRACE_H
#define __HEADER_ARM64_COMMON_PTRACE_H
/*
* PSR bits
*/
#define PSR_MODE_EL0t 0x00000000
#define PSR_MODE_EL1t 0x00000004
#define PSR_MODE_EL1h 0x00000005
#define PSR_MODE_EL2t 0x00000008
#define PSR_MODE_EL2h 0x00000009
#define PSR_MODE_EL3t 0x0000000c
#define PSR_MODE_EL3h 0x0000000d
#define PSR_MODE_MASK 0x0000000f
/* AArch32 CPSR bits */
#define PSR_MODE32_BIT 0x00000010
/* AArch64 SPSR bits */
#define PSR_F_BIT 0x00000040
#define PSR_I_BIT 0x00000080
#define PSR_A_BIT 0x00000100
#define PSR_D_BIT 0x00000200
#define PSR_Q_BIT 0x08000000
#define PSR_V_BIT 0x10000000
#define PSR_C_BIT 0x20000000
#define PSR_Z_BIT 0x40000000
#define PSR_N_BIT 0x80000000
/*
* Groups of PSR bits
*/
#define PSR_f 0xff000000 /* Flags */
#define PSR_s 0x00ff0000 /* Status */
#define PSR_x 0x0000ff00 /* Extension */
#define PSR_c 0x000000ff /* Control */
/* Current Exception Level values, as contained in CurrentEL */
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
/* thread->ptrace_debugreg lower-area and higher-area */
#define HWS_BREAK 0
#define HWS_WATCH 1
#ifndef __ASSEMBLY__
#include <lwk/compiler.h>
#include <ihk/types.h>
struct user_hwdebug_state {
uint32_t dbg_info;
uint32_t pad;
struct {
uint64_t addr;
uint32_t ctrl;
uint32_t pad;
} dbg_regs[16];
};
struct user_fpsimd_state {
__uint128_t vregs[32];
uint32_t fpsr;
uint32_t fpcr;
uint32_t __reserved[2];
};
extern unsigned int ptrace_hbp_get_resource_info(unsigned int note_type);
/* SVE/FP/SIMD state (NT_ARM_SVE) */
struct user_sve_header {
uint32_t size; /* total meaningful regset content in bytes */
uint32_t max_size; /* maxmium possible size for this thread */
uint16_t vl; /* current vector length */
uint16_t max_vl; /* maximum possible vector length */
uint16_t flags;
uint16_t __reserved;
};
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_TLS,
REGSET_HW_BREAK,
REGSET_HW_WATCH,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif /* CONFIG_ARM64_SVE */
};
struct thread;
struct user_regset;
typedef int user_regset_active_fn(struct thread *target,
const struct user_regset *regset);
typedef long user_regset_get_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf);
typedef long user_regset_set_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf);
typedef int user_regset_writeback_fn(struct thread *target,
const struct user_regset *regset,
int immediate);
typedef unsigned int user_regset_get_size_fn(struct thread *target,
const struct user_regset *regset);
struct user_regset {
user_regset_get_fn *get;
user_regset_set_fn *set;
user_regset_active_fn *active;
user_regset_writeback_fn *writeback;
user_regset_get_size_fn *get_size;
unsigned int n;
unsigned int size;
unsigned int align;
unsigned int bias;
unsigned int core_note_type;
};
struct user_regset_view {
const char *name;
const struct user_regset *regsets;
unsigned int n;
uint32_t e_flags;
uint16_t e_machine;
uint8_t ei_osabi;
};
extern const struct user_regset_view *current_user_regset_view(void);
extern const struct user_regset *find_regset(
const struct user_regset_view *view,
unsigned int type);
extern unsigned int regset_size(struct thread *target,
const struct user_regset *regset);
/* Definitions for user_sve_header.flags: */
#define SVE_PT_REGS_MASK (1 << 0)
#define SVE_PT_REGS_FPSIMD 0
#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK
#define SVE_PT_VL_THREAD PR_SVE_SET_VL_THREAD
#define SVE_PT_VL_INHERIT PR_SVE_VL_INHERIT
#define SVE_PT_VL_ONEXEC PR_SVE_SET_VL_ONEXEC
/*
* The remainder of the SVE state follows struct user_sve_header. The
* total size of the SVE state (including header) depends on the
* metadata in the header: SVE_PT_SIZE(vq, flags) gives the total size
* of the state in bytes, including the header.
*
* Refer to <asm/sigcontext.h> for details of how to pass the correct
* "vq" argument to these macros.
*/
/* Offset from the start of struct user_sve_header to the register data */
#define SVE_PT_REGS_OFFSET \
((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
/*
* The register data content and layout depends on the value of the
* flags field.
*/
/*
* (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
*
* The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
* struct user_fpsimd_state. Additional data might be appended in the
* future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
* SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
* sizeof(struct user_fpsimd_state).
*/
#define SVE_PT_FPSIMD_OFFSET SVE_PT_REGS_OFFSET
#define SVE_PT_FPSIMD_SIZE(vq, flags) (sizeof(struct user_fpsimd_state))
/*
* (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
*
* The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
* SVE_PT_SVE_SIZE(vq, flags).
*
* Additional macros describe the contents and layout of the payload.
* For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
* the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
* the size in bytes:
*
* x type description
* - ---- -----------
* ZREGS \
* ZREG |
* PREGS | refer to <asm/sigcontext.h>
* PREG |
* FFR /
*
* FPSR uint32_t FPSR
* FPCR uint32_t FPCR
*
* Additional data might be appended in the future.
*/
#define SVE_PT_SVE_ZREG_SIZE(vq) SVE_SIG_ZREG_SIZE(vq)
#define SVE_PT_SVE_PREG_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
#define SVE_PT_SVE_FFR_SIZE(vq) SVE_SIG_FFR_SIZE(vq)
#define SVE_PT_SVE_FPSR_SIZE sizeof(uint32_t)
#define SVE_PT_SVE_FPCR_SIZE sizeof(uint32_t)
#define __SVE_SIG_TO_PT(offset) \
((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
#define SVE_PT_SVE_OFFSET SVE_PT_REGS_OFFSET
#define SVE_PT_SVE_ZREGS_OFFSET \
__SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
__SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
#define SVE_PT_SVE_ZREGS_SIZE(vq) \
(SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
#define SVE_PT_SVE_PREGS_OFFSET(vq) \
__SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
__SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
#define SVE_PT_SVE_PREGS_SIZE(vq) \
(SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
SVE_PT_SVE_PREGS_OFFSET(vq))
#define SVE_PT_SVE_FFR_OFFSET(vq) \
__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
#define SVE_PT_SVE_FPSR_OFFSET(vq) \
((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + \
(SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_SVE_FPCR_OFFSET(vq) \
(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
/*
* Any future extension appended after FPCR must be aligned to the next
* 128-bit boundary.
*/
#define SVE_PT_SVE_SIZE(vq, flags) \
((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE \
- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_SIZE(vq, flags) \
(((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \
SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \
: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_PTRACE_H */

View File

@ -0,0 +1,127 @@
/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_REGISTERS_H
#define __HEADER_ARM64_COMMON_REGISTERS_H
#include <types.h>
#include <arch/cpu.h>
#include <sysreg.h>
#define RFLAGS_CF (1 << 0)
#define RFLAGS_PF (1 << 2)
#define RFLAGS_AF (1 << 4)
#define RFLAGS_ZF (1 << 6)
#define RFLAGS_SF (1 << 7)
#define RFLAGS_TF (1 << 8)
#define RFLAGS_IF (1 << 9)
#define RFLAGS_DF (1 << 10)
#define RFLAGS_OF (1 << 11)
#define RFLAGS_IOPL (3 << 12)
#define RFLAGS_NT (1 << 14)
#define RFLAGS_RF (1 << 16)
#define RFLAGS_VM (1 << 17)
#define RFLAGS_AC (1 << 18)
#define RFLAGS_VIF (1 << 19)
#define RFLAGS_VIP (1 << 20)
#define RFLAGS_ID (1 << 21)
#define DB6_B0 (1 << 0)
#define DB6_B1 (1 << 1)
#define DB6_B2 (1 << 2)
#define DB6_B3 (1 << 3)
#define DB6_BD (1 << 13)
#define DB6_BS (1 << 14)
#define DB6_BT (1 << 15)
#define MSR_EFER 0xc0000080
#define MSR_STAR 0xc0000081
#define MSR_LSTAR 0xc0000082
#define MSR_FMASK 0xc0000084
#define MSR_FS_BASE 0xc0000100
#define MSR_GS_BASE 0xc0000101
#define MSR_IA32_APIC_BASE 0x000000001b
#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_IA32_PERF_CTL 0x00000199
#define MSR_IA32_MISC_ENABLE 0x000001a0
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_IA32_CR_PAT 0x00000277
#define CVAL(event, mask) \
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
#define CVAL2(event, mask, inv, count) \
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff) | \
((inv & 1) << 23) | ((count & 0xff) << 24))
/* AMD */
#define MSR_PERF_CTL_0 0xc0010000
#define MSR_PERF_CTR_0 0xc0010004
static unsigned long xgetbv(unsigned int index)
{
return 0;
}
static void xsetbv(unsigned int index, unsigned long val)
{
}
static unsigned long rdpmc(unsigned int counter)
{
return 0;
}
static unsigned long rdmsr(unsigned int index)
{
return 0;
}
/* @ref.impl linux4.10.16 */
/* arch/arm64/include/asm/arch_timer.h:arch_counter_get_cntvct() */
static inline unsigned long rdtsc(void)
{
isb();
return read_sysreg(cntvct_el0);
}
static void set_perfctl(int counter, int event, int mask)
{
}
static void start_perfctr(int counter)
{
}
static void stop_perfctr(int counter)
{
}
static void clear_perfctl(int counter)
{
}
static void set_perfctr(int counter, unsigned long value)
{
}
static unsigned long read_perfctr(int counter)
{
return 0;
}
#define ihk_mc_mb() do {} while(0);
#define REGS_GET_STACK_POINTER(regs) (((struct pt_regs *)regs)->sp)
enum arm64_pf_error_code {
PF_PROT = 1 << 0,
PF_WRITE = 1 << 1,
PF_USER = 1 << 2,
PF_RSVD = 1 << 3,
PF_INSTR = 1 << 4,
PF_PATCH = 1 << 29,
PF_POPULATE = 1 << 30,
};
#endif /* !__HEADER_ARM64_COMMON_REGISTERS_H */

View File

@ -0,0 +1,100 @@
/* rlimit.h COPYRIGHT FUJITSU LIMITED 2016 */
/**
* \file rlimit.h
* License details are found in the file LICENSE.
* \brief
* Kinds of resource limit
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef __HEADER_ARM64_COMMON_RLIMIT_H
#define __HEADER_ARM64_COMMON_RLIMIT_H
/* Kinds of resource limit. */
enum __rlimit_resource
{
/* Per-process CPU limit, in seconds. */
RLIMIT_CPU = 0,
#define RLIMIT_CPU RLIMIT_CPU
/* Largest file that can be created, in bytes. */
RLIMIT_FSIZE = 1,
#define RLIMIT_FSIZE RLIMIT_FSIZE
/* Maximum size of data segment, in bytes. */
RLIMIT_DATA = 2,
#define RLIMIT_DATA RLIMIT_DATA
/* Maximum size of stack segment, in bytes. */
RLIMIT_STACK = 3,
#define RLIMIT_STACK RLIMIT_STACK
/* Largest core file that can be created, in bytes. */
RLIMIT_CORE = 4,
#define RLIMIT_CORE RLIMIT_CORE
/* Largest resident set size, in bytes.
This affects swapping; processes that are exceeding their
resident set size will be more likely to have physical memory
taken from them. */
__RLIMIT_RSS = 5,
#define RLIMIT_RSS __RLIMIT_RSS
/* Number of open files. */
RLIMIT_NOFILE = 7,
__RLIMIT_OFILE = RLIMIT_NOFILE, /* BSD name for same. */
#define RLIMIT_NOFILE RLIMIT_NOFILE
#define RLIMIT_OFILE __RLIMIT_OFILE
/* Address space limit. */
RLIMIT_AS = 9,
#define RLIMIT_AS RLIMIT_AS
/* Number of processes. */
__RLIMIT_NPROC = 6,
#define RLIMIT_NPROC __RLIMIT_NPROC
/* Locked-in-memory address space. */
__RLIMIT_MEMLOCK = 8,
#define RLIMIT_MEMLOCK __RLIMIT_MEMLOCK
/* Maximum number of file locks. */
__RLIMIT_LOCKS = 10,
#define RLIMIT_LOCKS __RLIMIT_LOCKS
/* Maximum number of pending signals. */
__RLIMIT_SIGPENDING = 11,
#define RLIMIT_SIGPENDING __RLIMIT_SIGPENDING
/* Maximum bytes in POSIX message queues. */
__RLIMIT_MSGQUEUE = 12,
#define RLIMIT_MSGQUEUE __RLIMIT_MSGQUEUE
/* Maximum nice priority allowed to raise to.
Nice levels 19 .. -20 correspond to 0 .. 39
values of this resource limit. */
__RLIMIT_NICE = 13,
#define RLIMIT_NICE __RLIMIT_NICE
/* Maximum realtime priority allowed for non-priviledged
processes. */
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
/* timeout for RT tasks in us */
__RLIMIT_RTTIME = 15,
#define RLIMIT_RTTIME __RLIMIT_RTTIME
__RLIMIT_NLIMITS = 16,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS
};
#include <generic-rlimit.h>
#endif

View File

@ -0,0 +1,421 @@
/* signal.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_COMMON_SIGNAL_H
#define __HEADER_ARM64_COMMON_SIGNAL_H
#include <fpsimd.h>
#include <ihk/types.h>
#define _NSIG 64
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
static inline int valid_signal(unsigned long sig)
{
return sig <= _NSIG ? 1 : 0;
}
typedef unsigned long int __sigset_t;
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))
typedef struct {
__sigset_t __val[_NSIG_WORDS];
} sigset_t;
#define SIG_BLOCK 0
#define SIG_UNBLOCK 1
#define SIG_SETMASK 2
struct sigaction {
void (*sa_handler)(int);
unsigned long sa_flags;
void (*sa_restorer)(int);
sigset_t sa_mask;
};
typedef void __sig_fn_t(int);
typedef __sig_fn_t *__sig_handler_t;
#define SIG_DFL (__sig_handler_t)0
#define SIG_IGN (__sig_handler_t)1
#define SIG_ERR (__sig_handler_t)-1
#define SA_NOCLDSTOP 0x00000001U
#define SA_NOCLDWAIT 0x00000002U
#define SA_NODEFER 0x40000000U
#define SA_ONSTACK 0x08000000U
#define SA_RESETHAND 0x80000000U
#define SA_RESTART 0x10000000U
#define SA_SIGINFO 0x00000004U
/* Required for AArch32 compatibility. */
#define SA_RESTORER 0x04000000U
struct k_sigaction {
struct sigaction sa;
};
typedef struct sigaltstack {
void *ss_sp;
int ss_flags;
size_t ss_size;
} stack_t;
#define MINSIGSTKSZ 5120
#define SS_ONSTACK 1
#define SS_DISABLE 2
typedef union sigval {
int sival_int;
void *sival_ptr;
} sigval_t;
#define __SI_MAX_SIZE 128
#define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 4)
typedef struct siginfo {
int si_signo; /* Signal number. */
int si_errno; /* If non-zero, an errno value associated with
this signal, as defined in <errno.h>. */
int si_code; /* Signal code. */
#define SI_USER 0 /* sent by kill, sigsend, raise */
#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
#define SI_QUEUE -1 /* sent by sigqueue */
#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change
*/
#define SI_ASYNCIO -4 /* sent by AIO completion */
#define SI_SIGIO -5 /* sent by queued SIGIO */
#define SI_TKILL -6 /* sent by tkill system call */
#define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */
#define ILL_ILLOPC 1 /* illegal opcode */
#define ILL_ILLOPN 2 /* illegal operand */
#define ILL_ILLADR 3 /* illegal addressing mode */
#define ILL_ILLTRP 4 /* illegal trap */
#define ILL_PRVOPC 5 /* privileged opcode */
#define ILL_PRVREG 6 /* privileged register */
#define ILL_COPROC 7 /* coprocessor error */
#define ILL_BADSTK 8 /* internal stack error */
#define FPE_INTDIV 1 /* integer divide by zero */
#define FPE_INTOVF 2 /* integer overflow */
#define FPE_FLTDIV 3 /* floating point divide by zero */
#define FPE_FLTOVF 4 /* floating point overflow */
#define FPE_FLTUND 5 /* floating point underflow */
#define FPE_FLTRES 6 /* floating point inexact result */
#define FPE_FLTINV 7 /* floating point invalid operation */
#define FPE_FLTSUB 8 /* subscript out of range */
#define SEGV_MAPERR 1 /* address not mapped to object */
#define SEGV_ACCERR 2 /* invalid permissions for mapped object */
#define BUS_ADRALN 1 /* invalid address alignment */
#define BUS_ADRERR 2 /* non-existant physical address */
#define BUS_OBJERR 3 /* object specific hardware error */
/* hardware memory error consumed on a machine check: action required */
#define BUS_MCEERR_AR 4
/* hardware memory error detected in process but not consumed: action optional*/
#define BUS_MCEERR_AO 5
#define TRAP_BRKPT 1 /* process breakpoint */
#define TRAP_TRACE 2 /* process trace trap */
#define TRAP_BRANCH 3 /* process taken branch trap */
#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */
#define CLD_EXITED 1 /* child has exited */
#define CLD_KILLED 2 /* child was killed */
#define CLD_DUMPED 3 /* child terminated abnormally */
#define CLD_TRAPPED 4 /* traced child has trapped */
#define CLD_STOPPED 5 /* child has stopped */
#define CLD_CONTINUED 6 /* stopped child has continued */
#define POLL_IN 1 /* data input available */
#define POLL_OUT 2 /* output buffers available */
#define POLL_MSG 3 /* input message available */
#define POLL_ERR 4 /* i/o error */
#define POLL_PRI 5 /* high priority input available */
#define POLL_HUP 6 /* device disconnected */
#define SIGEV_SIGNAL 0 /* notify via signal */
#define SIGEV_NONE 1 /* other notification: meaningless */
#define SIGEV_THREAD 2 /* deliver via thread creation */
#define SIGEV_THREAD_ID 4 /* deliver to thread */
union {
int _pad[__SI_PAD_SIZE];
/* kill(). */
struct {
int si_pid;/* Sending process ID. */
int si_uid;/* Real user ID of sending process. */
} _kill;
/* POSIX.1b timers. */
struct {
int si_tid; /* Timer ID. */
int si_overrun; /* Overrun count. */
sigval_t si_sigval; /* Signal value. */
} _timer;
/* POSIX.1b signals. */
struct {
int si_pid; /* Sending process ID. */
int si_uid; /* Real user ID of sending process. */
sigval_t si_sigval; /* Signal value. */
} _rt;
/* SIGCHLD. */
struct {
int si_pid; /* Which child. */
int si_uid; /* Real user ID of sending process. */
int si_status; /* Exit value or signal. */
long si_utime;
long si_stime;
} _sigchld;
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */
struct {
void *si_addr; /* Faulting insn/memory ref. */
} _sigfault;
/* SIGPOLL. */
struct {
long int si_band; /* Band event for SIGPOLL. */
int si_fd;
} _sigpoll;
} _sifields;
} siginfo_t;
struct signalfd_siginfo {
unsigned int ssi_signo;
int ssi_errno;
int ssi_code;
unsigned int ssi_pid;
unsigned int ssi_uid;
int ssi_fd;
unsigned int ssi_tid;
unsigned int ssi_band;
unsigned int ssi_overrun;
unsigned int ssi_trapno;
int ssi_status;
int ssi_int;
unsigned long ssi_ptr;
unsigned long ssi_utime;
unsigned long ssi_stime;
unsigned long ssi_addr;
unsigned short ssi_addr_lsb;
char __pad[46];
};
#define SIGHUP 1
#define SIGINT 2
#define SIGQUIT 3
#define SIGILL 4
#define SIGTRAP 5
#define SIGABRT 6
#define SIGIOT 6
#define SIGBUS 7
#define SIGFPE 8
#define SIGKILL 9
#define SIGUSR1 10
#define SIGSEGV 11
#define SIGUSR2 12
#define SIGPIPE 13
#define SIGALRM 14
#define SIGTERM 15
#define SIGSTKFLT 16
#define SIGCHLD 17
#define SIGCONT 18
#define SIGSTOP 19
#define SIGTSTP 20
#define SIGTTIN 21
#define SIGTTOU 22
#define SIGURG 23
#define SIGXCPU 24
#define SIGXFSZ 25
#define SIGVTALRM 26
#define SIGPROF 27
#define SIGWINCH 28
#define SIGIO 29
#define SIGPOLL SIGIO
#define SIGPWR 30
#define SIGSYS 31
#define SIGUNUSED 31
#define SIGRTMIN 32
#ifndef SIGRTMAX
#define SIGRTMAX _NSIG
#endif
#define PTRACE_EVENT_EXEC 4
/*
* @ref.impl linux-linaro/arch/arm64/include/uapi/asm/sigcontext.h
*/
struct sigcontext {
unsigned long fault_address;
/* AArch64 registers */
unsigned long regs[31];
unsigned long sp;
unsigned long pc;
unsigned long pstate;
/* 4K reserved for FP/SIMD state and future expansion */
unsigned char __reserved[4096] /*__attribute__((__aligned__(16)))*/;
};
/*
* Header to be used at the beginning of structures extending the user
* context. Such structures must be placed after the rt_sigframe on the stack
* and be 16-byte aligned. The last structure must be a dummy one with the
* magic and size set to 0.
*/
struct _aarch64_ctx {
unsigned int magic;
unsigned int size;
};
#define FPSIMD_MAGIC 0x46508001
struct fpsimd_context {
struct _aarch64_ctx head;
unsigned int fpsr;
unsigned int fpcr;
__uint128_t vregs[32];
};
/* ESR_EL1 context */
#define ESR_MAGIC 0x45535201
struct esr_context {
struct _aarch64_ctx head;
unsigned long esr;
};
#define EXTRA_MAGIC 0x45585401
struct extra_context {
struct _aarch64_ctx head;
void *data; /* 16-byte aligned pointer to the extra space */
uint32_t size; /* size in bytes of the extra space */
uint32_t __reserved[3];
};
#define SVE_MAGIC 0x53564501
#define fpsimd_sve_state(vq) { \
__uint128_t zregs[32][vq]; \
uint16_t pregs[16][vq]; \
uint16_t ffr[vq]; \
}
struct sve_context {
struct _aarch64_ctx head;
uint16_t vl;
uint16_t __reserved[3];
};
/*
* The SVE architecture leaves space for future expansion of the
* vector length beyond its initial architectural limit of 2048 bits
* (16 quadwords).
*
* See linux/Documentation/arm64/sve.txt for a description of the VL/VQ
* terminology.
*/
#define SVE_VQ_BYTES 16 /* number of bytes per quadword */
#define SVE_VQ_MIN 1
#define SVE_VQ_MAX 512
#define SVE_VL_MIN (SVE_VQ_MIN * SVE_VQ_BYTES)
#define SVE_VL_MAX (SVE_VQ_MAX * SVE_VQ_BYTES)
#define SVE_NUM_ZREGS 32
#define SVE_NUM_PREGS 16
#define sve_vl_valid(vl) \
((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
#define sve_vq_from_vl(vl) ((vl) / SVE_VQ_BYTES)
#define sve_vl_from_vq(vq) ((vq) * SVE_VQ_BYTES)
/*
* The total size of meaningful data in the SVE context in bytes,
* including the header, is given by SVE_SIG_CONTEXT_SIZE(vq).
*
* Note: for all these macros, the "vq" argument denotes the SVE
* vector length in quadwords (i.e., units of 128 bits).
*
* The correct way to obtain vq is to use sve_vq_from_vl(vl). The
* result is valid if and only if sve_vl_valid(vl) is true. This is
* guaranteed for a struct sve_context written by the kernel.
*
*
* Additional macros describe the contents and layout of the payload.
* For each, SVE_SIG_x_OFFSET(args) is the start offset relative to
* the start of struct sve_context, and SVE_SIG_x_SIZE(args) is the
* size in bytes:
*
*
* x type description
* - ---- -----------
* REGS the entire SVE context
*
* ZREGS __uint128_t[SVE_NUM_ZREGS][vq] all Z-registers
* ZREG __uint128_t[vq] individual Z-register Zn
*
* PREGS uint16_t[SVE_NUM_PREGS][vq] all P-registers
* PREG uint16_t[vq] individual P-register Pn
*
* FFR uint16_t[vq] first-fault status register
*
* Additional data might be appended in the future.
*/
#define SVE_SIG_ZREG_SIZE(vq) ((uint32_t)(vq) * SVE_VQ_BYTES)
#define SVE_SIG_PREG_SIZE(vq) ((uint32_t)(vq) * (SVE_VQ_BYTES / 8))
#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
#define SVE_SIG_REGS_OFFSET \
((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET
#define SVE_SIG_ZREG_OFFSET(vq, n) \
(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n))
#define SVE_SIG_ZREGS_SIZE(vq) \
(SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET)
#define SVE_SIG_PREGS_OFFSET(vq) \
(SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq))
#define SVE_SIG_PREG_OFFSET(vq, n) \
(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n))
#define SVE_SIG_PREGS_SIZE(vq) \
(SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq))
#define SVE_SIG_FFR_OFFSET(vq) \
(SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq))
#define SVE_SIG_REGS_SIZE(vq) \
(SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET)
#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
/*
* @ref.impl linux-linaro/arch/arm64/include/asm/ucontext.h
*/
struct ucontext {
unsigned long uc_flags;
struct ucontext *uc_link;
stack_t uc_stack;
sigset_t uc_sigmask;
/* glibc uses a 1024-bit sigset_t */
unsigned char __unused[1024 / 8 - sizeof(sigset_t)];
/* last for future expansion */
struct sigcontext uc_mcontext;
};
void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err);
void check_signal_irq_disabled(unsigned long rc, void *regs, int num);
#endif /* __HEADER_ARM64_COMMON_SIGNAL_H */

View File

@ -0,0 +1,23 @@
/* smp.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef __HEADER_ARM64_COMMON_SMP_H
#define __HEADER_ARM64_COMMON_SMP_H
#ifndef __ASSEMBLY__
/*
* Initial data for bringing up a secondary CPU.
*/
struct secondary_data {
void *stack;
unsigned long next_pc;
unsigned long arg;
};
extern struct secondary_data secondary_data;
#endif /* __ASSEMBLY__ */
/* struct secondary_data offsets */
#define SECONDARY_DATA_STACK 0x00
#define SECONDARY_DATA_NEXT_PC 0x08
#define SECONDARY_DATA_ARG 0x10
#endif /* !__HEADER_ARM64_COMMON_SMP_H */

View File

@ -0,0 +1,17 @@
/* stringify.h COPYRIGHT FUJITSU LIMITED 2017 */
/**
* @ref.impl host-kernel/include/linux/stringify.h
*/
#ifndef __LINUX_STRINGIFY_H
#define __LINUX_STRINGIFY_H
/* Indirect stringification. Doing two levels allows the parameter to be a
* macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
* converts to "bar".
*/
#define __stringify_1(x...)#x
#define __stringify(x...)__stringify_1(x)
#endif/* !__LINUX_STRINGIFY_H */

View File

@ -0,0 +1,152 @@
/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
SYSCALL_DELEGATED(4, io_getevents)
SYSCALL_DELEGATED(17, getcwd)
SYSCALL_HANDLED(22, epoll_pwait)
SYSCALL_DELEGATED(25, fcntl)
SYSCALL_HANDLED(29, ioctl)
SYSCALL_DELEGATED(35, unlinkat)
SYSCALL_DELEGATED(43, statfs)
SYSCALL_DELEGATED(44, fstatfs)
SYSCALL_HANDLED(56, openat)
SYSCALL_HANDLED(57, close)
SYSCALL_DELEGATED(61, getdents64)
SYSCALL_DELEGATED(62, lseek)
SYSCALL_HANDLED(63, read)
SYSCALL_DELEGATED(64, write)
SYSCALL_DELEGATED(66, writev)
SYSCALL_DELEGATED(67, pread64)
SYSCALL_DELEGATED(68, pwrite64)
SYSCALL_HANDLED(72, pselect6)
SYSCALL_HANDLED(73, ppoll)
SYSCALL_HANDLED(74, signalfd4)
SYSCALL_DELEGATED(78, readlinkat)
SYSCALL_DELEGATED(80, fstat)
SYSCALL_HANDLED(93, exit)
SYSCALL_HANDLED(94, exit_group)
SYSCALL_HANDLED(95, waitid)
SYSCALL_HANDLED(96, set_tid_address)
SYSCALL_HANDLED(98, futex)
SYSCALL_HANDLED(99, set_robust_list)
SYSCALL_HANDLED(101, nanosleep)
SYSCALL_HANDLED(102, getitimer)
SYSCALL_HANDLED(103, setitimer)
SYSCALL_HANDLED(113, clock_gettime)
SYSCALL_DELEGATED(114, clock_getres)
SYSCALL_DELEGATED(115, clock_nanosleep)
SYSCALL_HANDLED(117, ptrace)
SYSCALL_HANDLED(118, sched_setparam)
SYSCALL_HANDLED(119, sched_setscheduler)
SYSCALL_HANDLED(120, sched_getscheduler)
SYSCALL_HANDLED(121, sched_getparam)
SYSCALL_HANDLED(122, sched_setaffinity)
SYSCALL_HANDLED(123, sched_getaffinity)
SYSCALL_HANDLED(124, sched_yield)
SYSCALL_HANDLED(125, sched_get_priority_max)
SYSCALL_HANDLED(126, sched_get_priority_min)
SYSCALL_HANDLED(127, sched_rr_get_interval)
SYSCALL_HANDLED(129, kill)
SYSCALL_HANDLED(130, tkill)
SYSCALL_HANDLED(131, tgkill)
SYSCALL_HANDLED(132, sigaltstack)
SYSCALL_HANDLED(133, rt_sigsuspend)
SYSCALL_HANDLED(134, rt_sigaction)
SYSCALL_HANDLED(135, rt_sigprocmask)
SYSCALL_HANDLED(136, rt_sigpending)
SYSCALL_HANDLED(137, rt_sigtimedwait)
SYSCALL_HANDLED(138, rt_sigqueueinfo)
SYSCALL_HANDLED(139, rt_sigreturn)
SYSCALL_HANDLED(143, setregid)
SYSCALL_HANDLED(144, setgid)
SYSCALL_HANDLED(145, setreuid)
SYSCALL_HANDLED(146, setuid)
SYSCALL_HANDLED(147, setresuid)
SYSCALL_HANDLED(148, getresuid)
SYSCALL_HANDLED(149, setresgid)
SYSCALL_HANDLED(150, getresgid)
SYSCALL_HANDLED(151, setfsuid)
SYSCALL_HANDLED(152, setfsgid)
SYSCALL_HANDLED(153, times)
SYSCALL_HANDLED(154, setpgid)
SYSCALL_DELEGATED(160, uname)
SYSCALL_HANDLED(163, getrlimit)
SYSCALL_HANDLED(164, setrlimit)
SYSCALL_HANDLED(165, getrusage)
SYSCALL_HANDLED(167, prctl)
SYSCALL_HANDLED(168, getcpu)
SYSCALL_HANDLED(169, gettimeofday)
SYSCALL_HANDLED(170, settimeofday)
SYSCALL_HANDLED(172, getpid)
SYSCALL_HANDLED(173, getppid)
SYSCALL_HANDLED(174, getuid)
SYSCALL_HANDLED(175, geteuid)
SYSCALL_HANDLED(176, getgid)
SYSCALL_HANDLED(177, getegid)
SYSCALL_HANDLED(178, gettid)
SYSCALL_HANDLED(179, sysinfo)
SYSCALL_DELEGATED(188, msgrcv)
SYSCALL_DELEGATED(189, msgsnd)
SYSCALL_DELEGATED(192, semtimedop)
SYSCALL_DELEGATED(193, semop)
SYSCALL_HANDLED(194, shmget)
SYSCALL_HANDLED(195, shmctl)
SYSCALL_HANDLED(196, shmat)
SYSCALL_HANDLED(197, shmdt)
SYSCALL_HANDLED(214, brk)
SYSCALL_HANDLED(215, munmap)
SYSCALL_HANDLED(216, mremap)
SYSCALL_HANDLED(220, clone)
SYSCALL_HANDLED(221, execve)
SYSCALL_HANDLED(222, mmap)
SYSCALL_HANDLED(226, mprotect)
SYSCALL_HANDLED(227, msync)
SYSCALL_HANDLED(228, mlock)
SYSCALL_HANDLED(229, munlock)
SYSCALL_HANDLED(230, mlockall)
SYSCALL_HANDLED(231, munlockall)
SYSCALL_HANDLED(232, mincore)
SYSCALL_HANDLED(233, madvise)
SYSCALL_HANDLED(234, remap_file_pages)
SYSCALL_HANDLED(235, mbind)
SYSCALL_HANDLED(236, get_mempolicy)
SYSCALL_HANDLED(237, set_mempolicy)
SYSCALL_HANDLED(238, migrate_pages)
SYSCALL_HANDLED(239, move_pages)
#ifdef ENABLE_PERF
SYSCALL_HANDLED(241, perf_event_open)
#else // PERF_ENABLE
SYSCALL_DELEGATED(241, perf_event_open)
#endif // PERF_ENABLE
SYSCALL_HANDLED(260, wait4)
SYSCALL_HANDLED(261, prlimit64)
SYSCALL_HANDLED(270, process_vm_readv)
SYSCALL_HANDLED(271, process_vm_writev)
SYSCALL_HANDLED(281, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system)
SYSCALL_HANDLED(733, util_register_desc)
/* McKernel Specific */
SYSCALL_HANDLED(801, swapout)
SYSCALL_HANDLED(802, linux_mlock)
SYSCALL_HANDLED(803, suspend_threads)
SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
SYSCALL_DELEGATED(1024, open)
SYSCALL_DELEGATED(1035, readlink)
SYSCALL_HANDLED(1045, signalfd)
SYSCALL_DELEGATED(1049, stat)
SYSCALL_DELEGATED(1060, getpgrp)
SYSCALL_HANDLED(1062, time)
SYSCALL_DELEGATED(1069, epoll_wait)
/* Do not edit the lines including this comment and
* EOF just after it because those are used as a
* robust marker for the autotest patch.
*/

View File

@ -0,0 +1,403 @@
/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
/*
* Macros for accessing system registers with older binutils.
*
* Copyright (C) 2014 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SYSREG_H
#define __ASM_SYSREG_H
#include <types.h>
#include <stringify.h>
#include <ihk/types.h>
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
* C5.2, version:ARM DDI 0487A.f)
* [20-19] : Op0
* [18-16] : Op1
* [15-12] : CRn
* [11-8] : CRm
* [7-5] : Op2
*/
#define Op0_shift 19
#define Op0_mask 0x3
#define Op1_shift 16
#define Op1_mask 0x7
#define CRn_shift 12
#define CRn_mask 0xf
#define CRm_shift 8
#define CRm_mask 0xf
#define Op2_shift 5
#define Op2_mask 0x7
#define sys_reg(op0, op1, crn, crm, op2) \
(((op0) << Op0_shift) | ((op1) << Op1_shift) | \
((crn) << CRn_shift) | ((crm) << CRm_shift) | \
((op2) << Op2_shift))
#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask)
#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask)
#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask)
#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask)
#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask)
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0)
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
/*
#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM | \
(!!x)<<8 | 0x1f)
#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM | \
(!!x)<<8 | 0x1f)
*/
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_EE (1 << 25)
#define SCTLR_ELx_I (1 << 12)
#define SCTLR_ELx_SA (1 << 3)
#define SCTLR_ELx_C (1 << 2)
#define SCTLR_ELx_A (1 << 1)
#define SCTLR_ELx_M 1
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
SCTLR_ELx_SA | SCTLR_ELx_I)
/* SCTLR_EL1 specific flags. */
#define SCTLR_EL1_UCI (1 << 26)
#define SCTLR_EL1_SPAN (1 << 23)
#define SCTLR_EL1_UCT (1 << 15)
#define SCTLR_EL1_SED (1 << 8)
#define SCTLR_EL1_CP15BEN (1 << 5)
/* id_aa64isar0 */
#define ID_AA64ISAR0_RDM_SHIFT 28
#define ID_AA64ISAR0_ATOMICS_SHIFT 20
#define ID_AA64ISAR0_CRC32_SHIFT 16
#define ID_AA64ISAR0_SHA2_SHIFT 12
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64isar1 */
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
#define ID_AA64PFR0_SVE_SHIFT 32
#define ID_AA64PFR0_GIC_SHIFT 24
#define ID_AA64PFR0_ASIMD_SHIFT 20
#define ID_AA64PFR0_FP_SHIFT 16
#define ID_AA64PFR0_EL3_SHIFT 12
#define ID_AA64PFR0_EL2_SHIFT 8
#define ID_AA64PFR0_EL1_SHIFT 4
#define ID_AA64PFR0_EL0_SHIFT 0
#define ID_AA64PFR0_SVE 0x1
#define ID_AA64PFR0_FP_NI 0xf
#define ID_AA64PFR0_FP_SUPPORTED 0x0
#define ID_AA64PFR0_ASIMD_NI 0xf
#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
#define ID_AA64MMFR0_TGRAN16_SHIFT 20
#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
#define ID_AA64MMFR0_SNSMEM_SHIFT 12
#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
#define ID_AA64MMFR0_ASID_SHIFT 4
#define ID_AA64MMFR0_PARANGE_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#define ID_AA64MMFR0_PARANGE_48 0x5
#define ID_AA64MMFR0_PARANGE_52 0x6
#ifdef CONFIG_ARM64_PA_BITS_52
#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
#else
#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
#endif
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_PAN_SHIFT 20
#define ID_AA64MMFR1_LOR_SHIFT 16
#define ID_AA64MMFR1_HPD_SHIFT 12
#define ID_AA64MMFR1_VHE_SHIFT 8
#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
#define ID_AA64MMFR1_HADBS_SHIFT 0
#define ID_AA64MMFR1_VMIDBITS_8 0
#define ID_AA64MMFR1_VMIDBITS_16 2
/* id_aa64mmfr2 */
#define ID_AA64MMFR2_LVA_SHIFT 16
#define ID_AA64MMFR2_IESB_SHIFT 12
#define ID_AA64MMFR2_LSM_SHIFT 8
#define ID_AA64MMFR2_UAO_SHIFT 4
#define ID_AA64MMFR2_CNP_SHIFT 0
/* id_aa64dfr0 */
#define ID_AA64DFR0_PMSVER_SHIFT 32
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
#define ID_AA64DFR0_WRPS_SHIFT 20
#define ID_AA64DFR0_BRPS_SHIFT 12
#define ID_AA64DFR0_PMUVER_SHIFT 8
#define ID_AA64DFR0_TRACEVER_SHIFT 4
#define ID_AA64DFR0_DEBUGVER_SHIFT 0
#define ID_ISAR5_RDM_SHIFT 24
#define ID_ISAR5_CRC32_SHIFT 16
#define ID_ISAR5_SHA2_SHIFT 12
#define ID_ISAR5_SHA1_SHIFT 8
#define ID_ISAR5_AES_SHIFT 4
#define ID_ISAR5_SEVL_SHIFT 0
#define MVFR0_FPROUND_SHIFT 28
#define MVFR0_FPSHVEC_SHIFT 24
#define MVFR0_FPSQRT_SHIFT 20
#define MVFR0_FPDIVIDE_SHIFT 16
#define MVFR0_FPTRAP_SHIFT 12
#define MVFR0_FPDP_SHIFT 8
#define MVFR0_FPSP_SHIFT 4
#define MVFR0_SIMD_SHIFT 0
#define MVFR1_SIMDFMAC_SHIFT 28
#define MVFR1_FPHP_SHIFT 24
#define MVFR1_SIMDHP_SHIFT 20
#define MVFR1_SIMDSP_SHIFT 16
#define MVFR1_SIMDINT_SHIFT 12
#define MVFR1_SIMDLS_SHIFT 8
#define MVFR1_FPDNAN_SHIFT 4
#define MVFR1_FPFTZ_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
#define ID_AA64MMFR0_TGRAN16_SHIFT 20
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED
#elif defined(CONFIG_ARM64_64K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
#endif
#define ZCR_EL1_LEN_SHIFT 0
#define ZCR_EL1_LEN_SIZE 9
#define ZCR_EL1_LEN_MASK 0x1ff
#define CPACR_EL1_ZEN_EL1EN (1 << 16)
#define CPACR_EL1_ZEN_EL0EN (1 << 17)
#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (1UL << 31)
/* SYS_MIDR_EL1 */
//mask
#define SYS_MIDR_EL1_IMPLEMENTER_MASK (0xFFUL)
#define SYS_MIDR_EL1_PPNUM_MASK (0xFFFUL)
//shift
#define SYS_MIDR_EL1_IMPLEMENTER_SHIFT (24)
#define SYS_MIDR_EL1_PPNUM_SHIFT (0x4)
//val
#define SYS_MIDR_EL1_IMPLEMENTER_FJ (0x46)
#define SYS_MIDR_EL1_PPNUM_TCHIP (0x1)
#define READ_ACCESS (0)
#define WRITE_ACCESS (1)
#define ACCESS_REG_FUNC(name, reg) \
static void xos_access_##name(uint8_t flag, uint64_t *reg_value) \
{ \
if (flag == READ_ACCESS) { \
__asm__ __volatile__("mrs_s %0," __stringify(reg) "\n\t" \
:"=&r"(*reg_value)::); \
} \
else if (flag == WRITE_ACCESS) { \
__asm__ __volatile__("msr_s" __stringify(reg) ", %0\n\t" \
::"r"(*reg_value):); \
} else { \
; \
} \
}
#define XOS_FALSE (0)
#define XOS_TRUE (1)
#ifdef __ASSEMBLY__
#define __emit_inst(x).inst (x)
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__reg_num_x\num, \num
.endr
.equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
.endm
.macro msr_s, sreg, rt
__emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt))
.endm
#else
#define __emit_inst(x)".inst " __stringify((x)) "\n\t"
asm(
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
" .equ .L__reg_num_x\\num, \\num\n"
" .endr\n"
" .equ .L__reg_num_xzr, 31\n"
"\n"
" .macro mrs_s, rt, sreg\n"
__emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))
" .endm\n"
"\n"
" .macro msr_s, sreg, rt\n"
__emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))
" .endm\n"
);
ACCESS_REG_FUNC(midr_el1, SYS_MIDR_EL1);
static int xos_is_tchip(void)
{
uint64_t reg = 0;
int ret = 0, impl = 0, part = 0;
xos_access_midr_el1(READ_ACCESS, &reg);
impl = (reg >> SYS_MIDR_EL1_IMPLEMENTER_SHIFT) &
SYS_MIDR_EL1_IMPLEMENTER_MASK;
part = (reg >> SYS_MIDR_EL1_PPNUM_SHIFT) & SYS_MIDR_EL1_PPNUM_MASK;
if ((impl == SYS_MIDR_EL1_IMPLEMENTER_FJ) &&
(part == SYS_MIDR_EL1_PPNUM_TCHIP)) {
ret = XOS_TRUE;
}
else {
ret = XOS_FALSE;
}
return ret;
}
#endif
/*
* Unlike read_cpuid, calls to read_sysreg are never expected to be
* optimized away or replaced with synthetic values.
*/
#define read_sysreg(r) ({ \
uint64_t __val; \
asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
__val; \
})
/*
* The "Z" constraint normally means a zero immediate, but when combined with
* the "%x0" template means XZR.
*/
#define write_sysreg(v, r) do { \
uint64_t __val = (uint64_t)v; \
asm volatile("msr " __stringify(r) ", %x0" \
: : "rZ" (__val)); \
} while (0)
/*
* For registers without architectural names, or simply unsupported by
* GAS.
*/
#define read_sysreg_s(r) ({ \
uint64_t __val; \
asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \
__val; \
})
#define write_sysreg_s(v, r) do { \
uint64_t __val = (uint64_t)v; \
asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \
} while (0)
/* @ref.impl arch/arm64/include/asm/kvm_arm.h */
#define CPTR_EL2_TZ (1 << 8)
#include "imp-sysreg.h"
#endif /* __ASM_SYSREG_H */

View File

@ -0,0 +1,107 @@
/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_COMMON_THREAD_INFO_H
#define __HEADER_ARM64_COMMON_THREAD_INFO_H
#define MIN_KERNEL_STACK_SHIFT 15
#include <arch-memory.h>
#if (MIN_KERNEL_STACK_SHIFT < PAGE_SHIFT)
#define KERNEL_STACK_SHIFT PAGE_SHIFT
#else
#define KERNEL_STACK_SHIFT MIN_KERNEL_STACK_SHIFT
#endif
#define KERNEL_STACK_SIZE (UL(1) << KERNEL_STACK_SHIFT)
#define THREAD_START_SP KERNEL_STACK_SIZE - 16
#ifndef __ASSEMBLY__
#include <process.h>
#include <prctl.h>
struct cpu_context {
unsigned long x19;
unsigned long x20;
unsigned long x21;
unsigned long x22;
unsigned long x23;
unsigned long x24;
unsigned long x25;
unsigned long x26;
unsigned long x27;
unsigned long x28;
unsigned long fp;
unsigned long sp;
unsigned long pc;
};
struct thread_info {
unsigned long flags; /* low level flags */
// mm_segment_t addr_limit; /* address limit */
// struct task_struct *task; /* main task structure */
// struct exec_domain *exec_domain; /* execution domain */
// struct restart_block restart_block;
// int preempt_count; /* 0 => preemptable, <0 => bug */
int cpu; /* cpu */
struct cpu_context cpu_context; /* kernel_context */
void *sve_state; /* SVE registers, if any */
unsigned int sve_vl; /* SVE vector length */
unsigned int sve_vl_onexec; /* SVE vl after next exec */
unsigned long sve_flags; /* SVE related flags */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
};
/* Flags for sve_flags (intentionally defined to match the prctl flags) */
/* Inherit sve_vl and sve_flags across execve(): */
#define THREAD_VL_INHERIT PR_SVE_VL_INHERIT
struct arm64_cpu_local_thread {
struct thread_info thread_info;
unsigned long paniced;
uint64_t panic_regs[34];
};
union arm64_cpu_local_variables {
struct arm64_cpu_local_thread arm64_cpu_local_thread;
unsigned long stack[KERNEL_STACK_SIZE / sizeof(unsigned long)];
};
extern union arm64_cpu_local_variables init_thread_info;
/*
* how to get the current stack pointer from C
*/
register unsigned long current_stack_pointer asm ("sp");
/*
* how to get the thread information struct from C
*/
static inline struct thread_info *current_thread_info(void)
{
unsigned long ti = 0;
ti = ALIGN_DOWN(current_stack_pointer, KERNEL_STACK_SIZE);
return (struct thread_info *)ti;
}
/*
* how to get the pt_regs struct from C
*/
static inline struct pt_regs *current_pt_regs(void)
{
unsigned long regs = 0;
regs = ALIGN_DOWN(current_stack_pointer, KERNEL_STACK_SIZE);
regs += THREAD_START_SP - sizeof(struct pt_regs);
return (struct pt_regs *)regs;
}
#endif /* !__ASSEMBLY__ */
#define TIF_SINGLESTEP 21
#endif /* !__HEADER_ARM64_COMMON_THREAD_INFO_H */

View File

@ -0,0 +1,28 @@
/* traps.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifndef __ASM_TRAP_H
#define __ASM_TRAP_H
#include <types.h>
#include <arch-lock.h>
struct pt_regs;
/* @ref.impl arch/arm64/include/asm/traps.h */
struct undef_hook {
struct list_head node;
uint32_t instr_mask;
uint32_t instr_val;
uint64_t pstate_mask;
uint64_t pstate_val;
int (*fn)(struct pt_regs *regs, uint32_t instr);
};
/* @ref.impl arch/arm64/include/asm/traps.h */
void register_undef_hook(struct undef_hook *hook);
/* @ref.impl arch/arm64/include/asm/traps.h */
void unregister_undef_hook(struct undef_hook *hook);
#endif /* __ASM_TRAP_H */

View File

@ -0,0 +1,30 @@
/* vdso.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_COMMON_VDSO_H
#define __HEADER_ARM64_COMMON_VDSO_H
#ifdef __KERNEL__
/* @ref.impl arch/arm64/include/asm/vsdo.h::VDSO_LBASE */
/*
* Default link address for the vDSO.
* Since we randomise the VDSO mapping, there's little point in trying
* to prelink this.
*/
#define VDSO_LBASE 0x0
#ifndef __ASSEMBLY__
#include <vdso-offsets.h>
/* @ref.impl arch/arm64/include/asm/vsdo.h::VDSO_SYMBOL */
#define VDSO_SYMBOL(base, name) vdso_symbol_##name((unsigned long)(base))
void* vdso_symbol_sigtramp(unsigned long base);
int add_vdso_pages(struct thread *thread);
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* !__HEADER_ARM64_COMMON_VDSO_H */

View File

@ -0,0 +1,22 @@
/* virt.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_VIRT_H
#define __HEADER_ARM64_COMMON_VIRT_H
/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
#ifndef __ASSEMBLY__
#include <sysreg.h>
#include <ptrace.h>
/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */
static inline int is_kernel_in_hyp_mode(void)
{
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_VIRT_H */

View File

@ -0,0 +1,176 @@
/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <ihk/cpu.h>
#include <irq.h>
#include <arm-gic-v2.h>
#include <io.h>
#include <arch/cpu.h>
#include <memory.h>
#include <affinity.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <arch-timer.h>
#include <cls.h>
// #define DEBUG_GICV2
#ifdef DEBUG_GICV2
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
void *dist_base;
void *cpu_base;
#define gic_hwid_to_affinity(hw_cpuid) (1UL << hw_cpuid)
/**
* arm64_raise_sgi_gicv2
* @ref.impl drivers/irqchip/irq-gic.c:gic_raise_softirq
*
* @note Because it performs interrupt control at a higher
* function, it is not necessary to perform the disable/enable
* interrupts in this function as gic_raise_softirq() .
*/
static void __arm64_raise_sgi_gicv2(unsigned int hw_cpuid, unsigned int vector)
{
/* Build interrupt destination of the target cpu */
uint8_t cpu_target_list = gic_hwid_to_affinity(hw_cpuid);
/*
* Ensure that stores to Normal memory are visible to the
* other CPUs before they observe us issuing the IPI.
*/
dmb(ishst);
/* write to GICD_SGIR */
writel_relaxed(
cpu_target_list << 16 | vector,
(void *)(dist_base + GIC_DIST_SOFTINT)
);
}
static void arm64_raise_sgi_gicv2(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target CPU */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
__arm64_raise_sgi_gicv2(hw_cpuid, vector);
}
static void arm64_raise_sgi_to_host_gicv2(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target Linux/host CPU */
uint32_t hw_cpuid = ihk_mc_get_apicid(cpuid);
__arm64_raise_sgi_gicv2(hw_cpuid, vector);
}
/**
* arm64_raise_spi_gicv2
* @ref.impl nothing.
*/
static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector)
{
uint64_t spi_reg_offset;
uint32_t spi_set_pending_bitpos;
/**
* calculates register offset and bit position corresponding to the numbers.
*
* For interrupt vector m,
* - the corresponding GICD_ISPENDR number, n, is given by n = m / 32
* - the offset of the required GICD_ISPENDR is (0x200 + (4*n))
* - the bit number of the required Set-pending bit in this register is m % 32.
*/
spi_reg_offset = vector / 32 * 4;
spi_set_pending_bitpos = vector % 32;
/* write to GICD_ISPENDR */
writel_relaxed(
1 << spi_set_pending_bitpos,
(void *)(dist_base + GIC_DIST_PENDING_SET + spi_reg_offset)
);
}
void arm64_issue_host_ipi_gicv2(uint32_t cpuid, uint32_t vector)
{
arm64_raise_sgi_to_host_gicv2(cpuid, vector);
}
/**
* arm64_issue_ipi_gicv2
* @param cpuid : hardware cpu id
* @param vector : interrupt vector number
*/
void arm64_issue_ipi_gicv2(unsigned int cpuid, unsigned int vector)
{
dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid);
if(vector < 16){
// send SGI
arm64_raise_sgi_gicv2(cpuid, vector);
} else if (32 <= vector && vector < 1020) {
// send SPI (allow only to host)
arm64_raise_spi_gicv2(cpuid, vector);
} else {
ekprintf("#%d is bad irq number.", vector);
}
}
/**
* handle_interrupt_gicv2
* @ref.impl drivers/irqchip/irq-gic.c:gic_handle_irq
*/
extern int interrupt_from_user(void *);
void handle_interrupt_gicv2(struct pt_regs *regs)
{
unsigned int irqstat, irqnr;
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
do {
// get GICC_IAR.InterruptID
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
irqnr = irqstat & GICC_IAR_INT_ID_MASK;
if (irqnr < 32) {
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
handle_IPI(irqnr, regs);
continue;
} else if (irqnr != 1023) {
panic("PANIC: handle_interrupt_gicv2(): catch invalid interrupt.");
}
/*
* If another interrupt is not pending, GICC_IAR.InterruptID
* returns 1023 (see GICv2 spec. Chap. 4.4.4) .
*/
break;
} while (1);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
/* for migration by IPI */
if (get_this_cpu_local_var()->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
}
}
void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size)
{
dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/);
}
void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size)
{
cpu_base = map_fixed_area(cpu_base_pa, size, 1 /*non chachable*/);
}
void gic_enable_gicv2(void)
{
unsigned int enable_ppi_sgi = 0;
enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid();
writel_relaxed(enable_ppi_sgi, dist_base + GIC_DIST_ENABLE_SET);
}

View File

@ -0,0 +1,514 @@
/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <irq.h>
#include <arm-gic-v2.h>
#include <arm-gic-v3.h>
#include <io.h>
#include <cputype.h>
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <arch-timer.h>
#include <cls.h>
//#define DEBUG_GICV3
#define USE_CAVIUM_THUNDER_X
#ifdef DEBUG_GICV3
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#ifdef USE_CAVIUM_THUNDER_X
static char is_cavium_thunderx = 0;
#endif
void *dist_base;
void *rdist_base[NR_CPUS];
extern uint64_t ihk_param_cpu_logical_map;
static uint64_t *__cpu_logical_map = &ihk_param_cpu_logical_map;
extern uint64_t ihk_param_gic_rdist_base_pa[NR_CPUS];
#define cpu_logical_map(cpu) __cpu_logical_map[cpu]
/* Our default, arbitrary priority value. Linux only uses one anyway. */
#define DEFAULT_PMR_VALUE 0xf0
/**
* Low level accessors
* @ref.impl host-kernel/drivers/irqchip/irq-gic-v3.c
*/
static uint64_t gic_read_iar_common(void)
{
uint64_t irqstat;
#ifdef CONFIG_HAS_NMI
uint64_t daif;
uint64_t pmr;
uint64_t default_pmr_value = DEFAULT_PMR_VALUE;
/*
* The PMR may be configured to mask interrupts when this code is
* called, thus in order to acknowledge interrupts we must set the
* PMR to its default value before reading from the IAR.
*
* To do this without taking an interrupt we also ensure the I bit
* is set whilst we are interfering with the value of the PMR.
*/
asm volatile(
"mrs %1, daif\n\t" /* save I bit */
"msr daifset, #2\n\t" /* set I bit */
"mrs_s %2, " __stringify(ICC_PMR_EL1) "\n\t" /* save PMR */
"msr_s " __stringify(ICC_PMR_EL1) ",%3\n\t" /* set PMR */
"mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t" /* ack int */
"msr_s " __stringify(ICC_PMR_EL1) ",%2\n\t" /* restore PMR */
"isb\n\t"
"msr daif, %1" /* restore I */
: "=r" (irqstat), "=&r" (daif), "=&r" (pmr)
: "r" (default_pmr_value));
#else /* CONFIG_HAS_NMI */
asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
#endif /* CONFIG_HAS_NMI */
return irqstat;
}
#ifdef USE_CAVIUM_THUNDER_X
/* Cavium ThunderX erratum 23154 */
static uint64_t gic_read_iar_cavium_thunderx(void)
{
uint64_t irqstat;
#ifdef CONFIG_HAS_NMI
uint64_t daif;
uint64_t pmr;
uint64_t default_pmr_value = DEFAULT_PMR_VALUE;
/*
* The PMR may be configured to mask interrupts when this code is
* called, thus in order to acknowledge interrupts we must set the
* PMR to its default value before reading from the IAR.
*
* To do this without taking an interrupt we also ensure the I bit
* is set whilst we are interfering with the value of the PMR.
*/
asm volatile(
"mrs %1, daif\n\t" /* save I bit */
"msr daifset, #2\n\t" /* set I bit */
"mrs_s %2, " __stringify(ICC_PMR_EL1) "\n\t" /* save PMR */
"msr_s " __stringify(ICC_PMR_EL1) ",%3\n\t" /* set PMR */
"nop;nop;nop;nop\n\t"
"nop;nop;nop;nop\n\t"
"mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t" /* ack int */
"nop;nop;nop;nop\n\t"
"msr_s " __stringify(ICC_PMR_EL1) ",%2\n\t" /* restore PMR */
"isb\n\t"
"msr daif, %1" /* restore I */
: "=r" (irqstat), "=&r" (daif), "=&r" (pmr)
: "r" (default_pmr_value));
#else /* CONFIG_HAS_NMI */
asm volatile("nop;nop;nop;nop;");
asm volatile("nop;nop;nop;nop;");
asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
asm volatile("nop;nop;nop;nop;");
#endif /* CONFIG_HAS_NMI */
mb();
return irqstat;
}
#endif
static uint64_t gic_read_iar(void)
{
#ifdef USE_CAVIUM_THUNDER_X
if (is_cavium_thunderx)
return gic_read_iar_cavium_thunderx();
else
#endif
return gic_read_iar_common();
}
static void gic_write_pmr(uint64_t val)
{
asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val));
}
static void gic_write_ctlr(uint64_t val)
{
asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val));
isb();
}
static void gic_write_grpen1(uint64_t val)
{
asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val));
isb();
}
static inline void gic_write_eoir(uint64_t irq)
{
asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
isb();
}
static void gic_write_sgi1r(uint64_t val)
{
asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
}
static inline uint32_t gic_read_sre(void)
{
uint64_t val;
asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
return val;
}
static inline void gic_write_sre(uint32_t val)
{
asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" ((uint64_t)val));
isb();
}
static uint32_t gic_enable_sre(void)
{
uint32_t val;
val = gic_read_sre();
if (val & ICC_SRE_EL1_SRE)
return 1; /*ok*/
val |= ICC_SRE_EL1_SRE;
gic_write_sre(val);
val = gic_read_sre();
return !!(val & ICC_SRE_EL1_SRE);
}
#ifdef CONFIG_HAS_NMI
static inline void gic_write_bpr1(uint32_t val)
{
asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val));
}
#endif
static void __arm64_raise_sgi_gicv3(uint32_t hw_cpuid, uint32_t vector)
{
uint64_t mpidr, cluster_id;
uint16_t tlist;
uint64_t val;
/*
* Ensure that stores to Normal memory are visible to the
* other CPUs before issuing the IPI.
*/
smp_wmb();
mpidr = cpu_logical_map(hw_cpuid);
if((mpidr & 0xffUL) < 16) {
cluster_id = cpu_logical_map(hw_cpuid) & ~0xffUL;
tlist = (uint16_t)(1 << (mpidr & 0xf));
#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \
(MPIDR_AFFINITY_LEVEL(cluster_id, level) \
<< ICC_SGI1R_AFFINITY_## level ##_SHIFT)
val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) |
MPIDR_TO_SGI_AFFINITY(cluster_id, 2) |
vector << ICC_SGI1R_SGI_ID_SHIFT |
MPIDR_TO_SGI_AFFINITY(cluster_id, 1) |
tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
dkprintf("CPU%d: ICC_SGI1R_EL1 %llx\n", ihk_mc_get_processor_id(), val);
gic_write_sgi1r(val);
/* Force the above writes to ICC_SGI1R_EL1 to be executed */
isb();
} else {
/*
* If we ever get a cluster of more than 16 CPUs, just
* scream and skip that CPU.
*/
ekprintf("GICv3 can't send SGI for TargetList=%d\n", (mpidr & 0xffUL));
}
}
static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target CPU */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
__arm64_raise_sgi_gicv3(hw_cpuid, vector);
}
static void arm64_raise_sgi_to_host_gicv3(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target Linux/host CPU */
uint32_t hw_cpuid = ihk_mc_get_apicid(cpuid);
__arm64_raise_sgi_gicv3(hw_cpuid, vector);
}
static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector)
{
uint64_t spi_reg_offset;
uint32_t spi_set_pending_bitpos;
/**
* calculates register offset and bit position corresponding to the numbers.
*
* For interrupt vector m,
* - the corresponding GICD_ISPENDR number, n, is given by n = m / 32
* - the offset of the required GICD_ISPENDR is (0x200 + (4*n))
* - the bit number of the required Set-pending bit in this register is m % 32.
*/
spi_reg_offset = vector / 32 * 4;
spi_set_pending_bitpos = vector % 32;
/* write to GICD_ISPENDR */
writel_relaxed(
1 << spi_set_pending_bitpos,
(void *)(dist_base + GICD_ISPENDR + spi_reg_offset)
);
}
static void arm64_raise_lpi_gicv3(uint32_t cpuid, uint32_t vector)
{
// @todo.impl
ekprintf("%s called.\n", __func__);
}
void arm64_issue_host_ipi_gicv3(uint32_t cpuid, uint32_t vector)
{
arm64_raise_sgi_to_host_gicv3(cpuid, vector);
}
void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
{
dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid);
barrier();
if(vector < 16){
// send SGI
arm64_raise_sgi_gicv3(cpuid, vector);
} else if (32 <= vector && vector < 1020) {
// send SPI (allow only to host)
arm64_raise_spi_gicv3(cpuid, vector);
} else if (8192 <= vector) {
// send LPI (allow only to host)
arm64_raise_lpi_gicv3(cpuid, vector);
} else {
ekprintf("#%d is bad irq number.\n", vector);
}
}
extern int interrupt_from_user(void *);
void handle_interrupt_gicv3(struct pt_regs *regs)
{
uint64_t irqnr;
const int from_user = interrupt_from_user(regs);
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
handle_IPI(irqnr, regs);
}
irqnr = gic_read_iar();
}
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
//irqflags = ihk_mc_spinlock_lock(&v->runq_lock);
/* For migration by IPI or by timesharing */
if (v->flags &
(CPU_FLAG_NEED_MIGRATE | CPU_FLAG_NEED_RESCHED)) {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
do_check = 1;
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
if (do_check) {
check_signal(0, regs, 0);
schedule();
}
}
static uint64_t gic_mpidr_to_affinity(unsigned long mpidr)
{
uint64_t aff;
aff = ((uint64_t)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
MPIDR_AFFINITY_LEVEL(mpidr, 0));
return aff;
}
static void init_spi_routing(uint32_t irq, uint32_t linux_cpu)
{
uint64_t spi_route_reg_val, spi_route_reg_offset;
if (irq < 32 || 1020 <= irq) {
ekprintf("%s: irq is not spi number. (irq=%d)\n",
__func__, irq);
return;
}
/* write to GICD_IROUTER */
spi_route_reg_offset = irq * 8;
spi_route_reg_val = gic_mpidr_to_affinity(cpu_logical_map(linux_cpu));
writeq_relaxed(spi_route_reg_val,
(void *)(dist_base + GICD_IROUTER +
spi_route_reg_offset));
}
void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
{
#ifndef IHK_IKC_USE_LINUX_WORK_IRQ
extern int spi_table[];
extern int nr_spi_table;
int i;
#endif // !IHK_IKC_USE_LINUX_WORK_IRQ
dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/);
#ifdef USE_CAVIUM_THUNDER_X
/* Cavium ThunderX erratum 23154 */
if (MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_CAVIUM) {
is_cavium_thunderx = 1;
}
#endif
#ifndef IHK_IKC_USE_LINUX_WORK_IRQ
/* initialize spi routing */
for (i = 0; i < nr_spi_table; i++) {
if (spi_table[i] == -1) {
continue;
}
init_spi_routing(spi_table[i], i);
}
#endif // !IHK_IKC_USE_LINUX_WORK_IRQ
}
void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size)
{
int32_t cpuid, hw_cpuid;
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
for(cpuid = 0; cpuid < cpu_info->ncpus; cpuid++) {
hw_cpuid = cpu_info->hw_ids[cpuid];
if(ihk_param_gic_rdist_base_pa[hw_cpuid] != 0) {
rdist_base[hw_cpuid] =
map_fixed_area(ihk_param_gic_rdist_base_pa[hw_cpuid], size, 1 /*non chachable*/);
}
}
}
static void gic_do_wait_for_rwp(void *base)
{
uint32_t count = 1000000; /* 1s! */
while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
count--;
if (!count) {
ekprintf("RWP timeout, gone fishing\n");
return;
}
cpu_pause();
};
}
void gic_enable_gicv3(void)
{
void *rbase = rdist_base[ihk_mc_get_hardware_processor_id()];
void *rd_sgi_base = rbase + 0x10000 /* SZ_64K */;
int i;
unsigned int enable_ppi_sgi = GICD_INT_EN_SET_SGI;
extern int ihk_param_nr_pmu_irq_affi;
extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES];
enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid();
if (0 < ihk_param_nr_pmu_irq_affi) {
for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) {
if ((0 <= ihk_param_pmu_irq_affi[i]) &&
(ihk_param_pmu_irq_affi[i] <
sizeof(enable_ppi_sgi) * BITS_PER_BYTE)) {
enable_ppi_sgi |= GICD_ENABLE <<
ihk_param_pmu_irq_affi[i];
}
}
}
else {
enable_ppi_sgi |= GICD_ENABLE << INTRID_PERF_OVF;
}
/*
* Deal with the banked PPI and SGI interrupts - disable all
* PPI interrupts, ensure all SGI interrupts are enabled.
*/
writel_relaxed(~enable_ppi_sgi, rd_sgi_base + GIC_DIST_ENABLE_CLEAR);
writel_relaxed(enable_ppi_sgi, rd_sgi_base + GIC_DIST_ENABLE_SET);
/*
* Set priority on PPI and SGI interrupts
*/
for (i = 0; i < 32; i += 4) {
writel_relaxed(GICD_INT_DEF_PRI_X4,
rd_sgi_base + GIC_DIST_PRI + i);
}
/* sync wait */
gic_do_wait_for_rwp(rbase);
/*
* Need to check that the SRE bit has actually been set. If
* not, it means that SRE is disabled at EL2. We're going to
* die painfully, and there is nothing we can do about it.
*
* Kindly inform the luser.
*/
if (!gic_enable_sre())
panic("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
#ifndef CONFIG_HAS_NMI
/* Set priority mask register */
gic_write_pmr(DEFAULT_PMR_VALUE);
#endif
/* EOI deactivates interrupt too (mode 0) */
gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
/* ... and let's hit the road... */
gic_write_grpen1(1);
#ifdef CONFIG_HAS_NMI
/*
* Some firmwares hand over to the kernel with the BPR changed from
* its reset value (and with a value large enough to prevent
* any pre-emptive interrupts from working at all). Writing a zero
* to BPR restores is reset value.
*/
gic_write_bpr1(0);
/* Set specific IPI to NMI */
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_MULTI_NMI);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
/* sync wait */
gic_do_wait_for_rwp(rbase);
#endif /* CONFIG_HAS_NMI */
}

95
arch/arm64/kernel/local.c Normal file
View File

@ -0,0 +1,95 @@
/* local.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <cpulocal.h>
#include <ihk/atomic.h>
#include <ihk/mm.h>
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <registers.h>
#include <string.h>
/* BSP initialized stack area */
union arm64_cpu_local_variables init_thread_info __attribute__((aligned(KERNEL_STACK_SIZE)));
/* BSP/AP idle stack pointer head */
static union arm64_cpu_local_variables *locals;
size_t arm64_cpu_local_variables_span = KERNEL_STACK_SIZE; /* for debugger */
/* allocate & initialize BSP/AP idle stack */
void init_processors_local(int max_id)
{
int i = 0;
union arm64_cpu_local_variables *tmp;
const int npages = ((max_id + 1) *
(ALIGN_UP(KERNEL_STACK_SIZE, PAGE_SIZE) >>
PAGE_SHIFT));
if (npages < 1) {
panic("idle kernel stack allocation failed.");
}
/* allocate one more for alignment */
locals = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL);
if (locals == NULL) {
panic("idle kernel stack allocation failed.");
}
locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE);
/* clear struct process, struct process_vm, struct thread_info area */
for (i = 0, tmp = locals; i < max_id; i++, tmp++) {
memset(tmp, 0, sizeof(struct thread_info));
}
kprintf("locals = %p\n", locals);
}
/* get id (logical processor id) local variable address */
union arm64_cpu_local_variables *get_arm64_cpu_local_variable(int id)
{
return locals + id;
}
/* get id (logical processor id) kernel stack address */
static void *get_arm64_cpu_local_kstack(int id)
{
return (char *)get_arm64_cpu_local_variable(id) + THREAD_START_SP;
}
/* get current cpu local variable address */
union arm64_cpu_local_variables *get_arm64_this_cpu_local(void)
{
int id = ihk_mc_get_processor_id();
return get_arm64_cpu_local_variable(id);
}
/* get current kernel stack address */
void *get_arm64_this_cpu_kstack(void)
{
int id = ihk_mc_get_processor_id();
return get_arm64_cpu_local_kstack(id);
}
/* assign logical processor id for current_thread_info.cpu */
/* logical processor id BSP:0, AP0:1, AP1:2, ... APn:n-1 */
static ihk_atomic_t last_processor_id = IHK_ATOMIC_INIT(-1);
void assign_processor_id(void)
{
int id;
union arm64_cpu_local_variables *v;
id = ihk_atomic_inc_return(&last_processor_id);
v = get_arm64_cpu_local_variable(id);
v->arm64_cpu_local_thread.thread_info.cpu = id;
}
/** IHK **/
/* get current logical processor id */
int ihk_mc_get_processor_id(void)
{
return current_thread_info()->cpu;
}
/* get current physical processor id (not equal AFFINITY !!) */
int ihk_mc_get_hardware_processor_id(void)
{
return ihk_mc_get_cpu_info()->hw_ids[ihk_mc_get_processor_id()];
}

View File

@ -0,0 +1,78 @@
/* memcpy.S COPYRIGHT FUJITSU LIMITED 2017 */
/*
* Copyright (C) 2013 ARM Ltd.
* Copyright (C) 2013 Linaro.
*
* This code is based on glibc cortex strings work originally authored by Linaro
* and re-licensed under GPLv2 for the Linux kernel. The original code can
* be found @
*
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
* files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linkage.h>
#include <assembler.h>
#include <cache.h>
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
*
* Parameters:
* x0 - dest
* x1 - src
* x2 - n
* Returns:
* x0 - dest
*/
.macro ldrb1 ptr, regB, val
ldrb \ptr, [\regB], \val
.endm
.macro strb1 ptr, regB, val
strb \ptr, [\regB], \val
.endm
.macro ldrh1 ptr, regB, val
ldrh \ptr, [\regB], \val
.endm
.macro strh1 ptr, regB, val
strh \ptr, [\regB], \val
.endm
.macro ldr1 ptr, regB, val
ldr \ptr, [\regB], \val
.endm
.macro str1 ptr, regB, val
str \ptr, [\regB], \val
.endm
.macro ldp1 ptr, regB, regC, val
ldp \ptr, \regB, [\regC], \val
.endm
.macro stp1 ptr, regB, regC, val
stp \ptr, \regB, [\regC], \val
.endm
.weak memcpy
ENTRY(____inline_memcpy)
ENTRY(__inline_memcpy)
#include "copy_template.S"
ret
ENDPIPROC(__inline_memcpy)
ENDPROC(____inline_memcpy)

3810
arch/arm64/kernel/memory.c Normal file

File diff suppressed because it is too large Load Diff

220
arch/arm64/kernel/memset.S Normal file
View File

@ -0,0 +1,220 @@
/* memset.S COPYRIGHT FUJITSU LIMITED 2017 */
/*
* Copyright (C) 2013 ARM Ltd.
* Copyright (C) 2013 Linaro.
*
* This code is based on glibc cortex strings work originally authored by Linaro
* and re-licensed under GPLv2 for the Linux kernel. The original code can
* be found @
*
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
* files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linkage.h>
#include <assembler.h>
#include <cache.h>
/*
* Fill in the buffer with character c (alignment handled by the hardware)
*
* Parameters:
* x0 - buf
* x1 - c
* x2 - n
* Returns:
* x0 - buf
*/
dstin .req x0
val .req w1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
zva_len_x .req x5
zva_len .req w5
zva_bits_x .req x6
A_l .req x7
A_lw .req w7
dst .req x8
tmp3w .req w9
tmp3 .req x9
.weak memset
ENTRY(____inline_memset)
ENTRY(__inline_memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
orr A_lw, A_lw, A_lw, lsl #16
orr A_l, A_l, A_l, lsl #32
cmp count, #15
b.hi .Lover16_proc
/*All store maybe are non-aligned..*/
tbz count, #3, 1f
str A_l, [dst], #8
1:
tbz count, #2, 2f
str A_lw, [dst], #4
2:
tbz count, #1, 3f
strh A_lw, [dst], #2
3:
tbz count, #0, 4f
strb A_lw, [dst]
4:
ret
.Lover16_proc:
/*Whether the start address is aligned with 16.*/
neg tmp2, dst
ands tmp2, tmp2, #15
b.eq .Laligned
/*
* The count is not less than 16, we can use stp to store the start 16 bytes,
* then adjust the dst aligned with 16.This process will make the current
* memory address at alignment boundary.
*/
stp A_l, A_l, [dst] /*non-aligned store..*/
/*make the dst aligned..*/
sub count, count, tmp2
add dst, dst, tmp2
.Laligned:
cbz A_l, .Lzero_mem
.Ltail_maybe_long:
cmp count, #64
b.ge .Lnot_short
.Ltail63:
ands tmp1, count, #0x30
b.eq 3f
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
stp A_l, A_l, [dst], #16
1:
stp A_l, A_l, [dst], #16
2:
stp A_l, A_l, [dst], #16
/*
* The last store length is less than 16,use stp to write last 16 bytes.
* It will lead some bytes written twice and the access is non-aligned.
*/
3:
ands count, count, #15
cbz count, 4f
add dst, dst, count
stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
4:
ret
/*
* Critical loop. Start at a new cache line boundary. Assuming
* 64 bytes per line, this ensures the entire loop is in one line.
*/
.p2align L1_CACHE_SHIFT
.Lnot_short:
sub dst, dst, #16/* Pre-bias. */
sub count, count, #64
1:
stp A_l, A_l, [dst, #16]
stp A_l, A_l, [dst, #32]
stp A_l, A_l, [dst, #48]
stp A_l, A_l, [dst, #64]!
subs count, count, #64
b.ge 1b
tst count, #0x3f
add dst, dst, #16
b.ne .Ltail63
.Lexitfunc:
ret
/*
* For zeroing memory, check to see if we can use the ZVA feature to
* zero entire 'cache' lines.
*/
.Lzero_mem:
cmp count, #63
b.le .Ltail63
/*
* For zeroing small amounts of memory, it's not worth setting up
* the line-clear code.
*/
cmp count, #128
b.lt .Lnot_short /*count is at least 128 bytes*/
mrs tmp1, dczid_el0
tbnz tmp1, #4, .Lnot_short
mov tmp3w, #4
and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
lsl zva_len, tmp3w, zva_len
ands tmp3w, zva_len, #63
/*
* ensure the zva_len is not less than 64.
* It is not meaningful to use ZVA if the block size is less than 64.
*/
b.ne .Lnot_short
.Lzero_by_line:
/*
* Compute how far we need to go to become suitably aligned. We're
* already at quad-word alignment.
*/
cmp count, zva_len_x
b.lt .Lnot_short /* Not enough to reach alignment. */
sub zva_bits_x, zva_len_x, #1
neg tmp2, dst
ands tmp2, tmp2, zva_bits_x
b.eq 2f /* Already aligned. */
/* Not aligned, check that there's enough to copy after alignment.*/
sub tmp1, count, tmp2
/*
* grantee the remain length to be ZVA is bigger than 64,
* avoid to make the 2f's process over mem range.*/
cmp tmp1, #64
ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
b.lt .Lnot_short
/*
* We know that there's at least 64 bytes to zero and that it's safe
* to overrun by 64 bytes.
*/
mov count, tmp1
1:
stp A_l, A_l, [dst]
stp A_l, A_l, [dst, #16]
stp A_l, A_l, [dst, #32]
subs tmp2, tmp2, #64
stp A_l, A_l, [dst, #48]
add dst, dst, #64
b.ge 1b
/* We've overrun a bit, so adjust dst downwards.*/
add dst, dst, tmp2
2:
sub count, count, zva_len_x
3:
dc zva, dst
add dst, dst, zva_len_x
subs count, count, zva_len_x
b.ge 3b
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
ENDPIPROC(__inline_memset)
ENDPROC(____inline_memset)

44
arch/arm64/kernel/mikc.c Normal file
View File

@ -0,0 +1,44 @@
/* mikc.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#include <ihk/ikc.h>
#include <ihk/lock.h>
#include <ikc/msg.h>
#include <memory.h>
#include <string.h>
extern int num_processors;
extern void arch_set_mikc_queue(void *r, void *w);
ihk_ikc_ph_t arch_master_channel_packet_handler;
int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
ihk_ikc_ph_t packet_handler)
{
struct ihk_ikc_queue_head *rq, *wq;
size_t mikc_queue_pages;
ihk_ikc_system_init(NULL);
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
mikc_queue_pages = ((8 * num_processors * MASTER_IKCQ_PKTSIZE)
+ (PAGE_SIZE - 1)) / PAGE_SIZE;
/* Place both sides in this side */
rq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL);
wq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL);
ihk_ikc_init_queue(rq, 0, 0,
mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
ihk_ikc_init_queue(wq, 0, 0,
mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
arch_master_channel_packet_handler = packet_handler;
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
ihk_ikc_master_channel_packet_handler, channel);
ihk_ikc_enable_channel(channel);
/* Set boot parameter */
arch_set_mikc_queue(rq, wq);
return 0;
}

297
arch/arm64/kernel/perfctr.c Normal file
View File

@ -0,0 +1,297 @@
/* perfctr.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <arch-perfctr.h>
#include <ihk/perfctr.h>
#include <mc_perf_event.h>
#include <errno.h>
#include <ihk/debug.h>
#include <registers.h>
#include <string.h>
#include <ihk/mm.h>
#include <irq.h>
#include <process.h>
/*
* @ref.impl arch/arm64/kernel/perf_event.c
* Set at runtime when we know what CPU type we are.
*/
struct arm_pmu cpu_pmu;
extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES];
extern int ihk_param_nr_pmu_irq_affi;
int arm64_init_perfctr(void)
{
int ret;
int i;
int pages;
const struct ihk_mc_cpu_info *cpu_info;
memset(&cpu_pmu, 0, sizeof(cpu_pmu));
ret = armv8pmu_init(&cpu_pmu);
if (ret) {
return ret;
}
cpu_info = ihk_mc_get_cpu_info();
pages = (sizeof(struct per_cpu_arm_pmu) * cpu_info->ncpus +
PAGE_SIZE - 1) >> PAGE_SHIFT;
cpu_pmu.per_cpu = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if (cpu_pmu.per_cpu == NULL) {
return -ENOMEM;
}
memset(cpu_pmu.per_cpu, 0, pages * PAGE_SIZE);
if (0 < ihk_param_nr_pmu_irq_affi) {
for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) {
ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affi[i],
cpu_pmu.handler);
if (ret) {
break;
}
}
}
else {
ret = ihk_mc_register_interrupt_handler(INTRID_PERF_OVF,
cpu_pmu.handler);
}
return ret;
}
void arm64_init_per_cpu_perfctr(void)
{
armv8pmu_per_cpu_init(&cpu_pmu.per_cpu[ihk_mc_get_processor_id()]);
}
int arm64_enable_pmu(void)
{
int ret;
if (cpu_pmu.reset) {
cpu_pmu.reset(&cpu_pmu);
}
ret = cpu_pmu.enable_pmu();
return ret;
}
void arm64_disable_pmu(void)
{
cpu_pmu.disable_pmu();
}
void arm64_enable_user_access_pmu_regs(void)
{
cpu_pmu.enable_user_access_pmu_regs();
}
void arm64_disable_user_access_pmu_regs(void)
{
cpu_pmu.disable_user_access_pmu_regs();
}
static int __ihk_mc_perfctr_init(int counter, uint32_t type, uint64_t config, int mode)
{
int ret = -1;
unsigned long config_base = 0;
ret = cpu_pmu.disable_counter(1UL << counter);
if (ret < 0) {
return ret;
}
ret = cpu_pmu.enable_intens(1UL << counter);
if (ret < 0) {
return ret;
}
ret = cpu_pmu.set_event_filter(&config_base, mode);
if (ret) {
return ret;
}
config_base |= config;
cpu_pmu.write_evtype(counter, config_base);
return ret;
}
int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode)
{
int ret;
ret = __ihk_mc_perfctr_init(counter, PERF_TYPE_RAW, config, mode);
return ret;
}
int ihk_mc_perfctr_start(unsigned long counter_mask)
{
return cpu_pmu.enable_counter(counter_mask);
}
int ihk_mc_perfctr_stop(unsigned long counter_mask, int flags)
{
return cpu_pmu.disable_counter(counter_mask);
}
int ihk_mc_perfctr_reset(int counter)
{
cpu_pmu.write_counter(counter, 0);
return 0;
}
int ihk_mc_perfctr_set(int counter, long val)
{
uint32_t v = val;
cpu_pmu.write_counter(counter, v);
return 0;
}
int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
{
/* this function not used yet. */
panic("not implemented.");
return 0;
}
int ihk_mc_perfctr_alloc(struct thread *thread, struct mc_perf_event *event)
{
const int counters = ihk_mc_perf_get_num_counters();
return cpu_pmu.get_event_idx(counters,
thread->pmc_alloc_map,
event->hw_config);
}
unsigned long ihk_mc_perfctr_read(int counter)
{
unsigned long count;
count = cpu_pmu.read_counter(counter);
return count;
}
unsigned long ihk_mc_perfctr_value(int counter, unsigned long correction)
{
unsigned long count = ihk_mc_perfctr_read(counter) + correction;
count &= ((1UL << 32) - 1);
return count;
}
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config,
unsigned long pmc_status)
{
int ret;
if (*type == PERF_TYPE_HARDWARE) {
switch (*config) {
case PERF_COUNT_HW_INSTRUCTIONS:
ret = cpu_pmu.map_event(*type, *config);
if (ret < 0) {
return -1;
}
*type = PERF_TYPE_RAW;
break;
default:
// Unexpected config
return -1;
}
}
else if (*type != PERF_TYPE_RAW) {
return -1;
}
ret = cpu_pmu.get_event_idx(get_per_cpu_pmu()->num_events, pmc_status,
*config);
return ret;
}
int ihk_mc_perf_counter_mask_check(unsigned long counter_mask)
{
return cpu_pmu.counter_mask_valid(counter_mask);
}
int ihk_mc_perf_get_num_counters(void)
{
const struct per_cpu_arm_pmu *per_cpu_arm_pmu = get_per_cpu_pmu();
return per_cpu_arm_pmu->num_events;
}
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
{
/* Nothing to do. */
return 0;
}
static inline uint64_t arm_pmu_event_max_period(struct mc_perf_event *event)
{
return 0xFFFFFFFF;
}
int hw_perf_event_init(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (!is_sampling_event(event)) {
hwc->sample_period = arm_pmu_event_max_period(event) >> 1;
hwc->last_period = hwc->sample_period;
ihk_atomic64_set(&hwc->period_left, hwc->sample_period);
}
return 0;
}
int ihk_mc_event_set_period(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int64_t left = ihk_atomic64_read(&hwc->period_left);
int64_t period = hwc->sample_period;
uint64_t max_period;
int ret = 0;
max_period = arm_pmu_event_max_period(event);
if (unlikely(left <= -period)) {
left = period;
ihk_atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
ihk_atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
/*
* Limit the maximum period to prevent the counter value
* from overtaking the one we are about to program. In
* effect we are reducing max_period to account for
* interrupt latency (and we are being very conservative).
*/
if (left > (max_period >> 1))
left = (max_period >> 1);
ihk_atomic64_set(&hwc->prev_count, (uint64_t)-left);
cpu_pmu.write_counter(event->counter_id,
(uint64_t)(-left) & max_period);
return ret;
}
uint64_t ihk_mc_event_update(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int64_t delta;
uint64_t prev_raw_count, new_raw_count;
uint64_t max_period = arm_pmu_event_max_period(event);
again:
prev_raw_count = ihk_atomic64_read(&hwc->prev_count);
new_raw_count = cpu_pmu.read_counter(event->counter_id);
if (ihk_atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count - prev_raw_count) & max_period;
ihk_atomic64_add(delta, &event->count);
ihk_atomic64_add(-delta, &hwc->period_left);
return new_raw_count;
}

View File

@ -0,0 +1,910 @@
/* perfctr_armv8pmu.c COPYRIGHT FUJITSU LIMITED 2016-2018 */
#include <arch-perfctr.h>
#include <mc_perf_event.h>
#include <ihk/perfctr.h>
#include <errno.h>
#include <ihk/debug.h>
#include <sysreg.h>
#include <virt.h>
#include <bitops.h>
#include <string.h>
#include <signal.h>
#include <cls.h>
#include <process.h>
#define BIT(nr) (1UL << (nr))
//#define DEBUG_PRINT_PMU
#ifdef DEBUG_PRINT_PMU
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
/*
* read pmevcntr<n>_el0 functions
*/
#define read_pmevcntrN_el0(N) \
static uint32_t read_pmevcntr##N##_el0(void) \
{ \
return read_sysreg(pmevcntr##N##_el0); \
}
read_pmevcntrN_el0(0)
read_pmevcntrN_el0(1)
read_pmevcntrN_el0(2)
read_pmevcntrN_el0(3)
read_pmevcntrN_el0(4)
read_pmevcntrN_el0(5)
read_pmevcntrN_el0(6)
read_pmevcntrN_el0(7)
read_pmevcntrN_el0(8)
read_pmevcntrN_el0(9)
read_pmevcntrN_el0(10)
read_pmevcntrN_el0(11)
read_pmevcntrN_el0(12)
read_pmevcntrN_el0(13)
read_pmevcntrN_el0(14)
read_pmevcntrN_el0(15)
read_pmevcntrN_el0(16)
read_pmevcntrN_el0(17)
read_pmevcntrN_el0(18)
read_pmevcntrN_el0(19)
read_pmevcntrN_el0(20)
read_pmevcntrN_el0(21)
read_pmevcntrN_el0(22)
read_pmevcntrN_el0(23)
read_pmevcntrN_el0(24)
read_pmevcntrN_el0(25)
read_pmevcntrN_el0(26)
read_pmevcntrN_el0(27)
read_pmevcntrN_el0(28)
read_pmevcntrN_el0(29)
read_pmevcntrN_el0(30)
static uint32_t (* const read_pmevcntr_el0[])(void) = {
read_pmevcntr0_el0, read_pmevcntr1_el0, read_pmevcntr2_el0,
read_pmevcntr3_el0, read_pmevcntr4_el0, read_pmevcntr5_el0,
read_pmevcntr6_el0, read_pmevcntr7_el0, read_pmevcntr8_el0,
read_pmevcntr9_el0, read_pmevcntr10_el0, read_pmevcntr11_el0,
read_pmevcntr12_el0, read_pmevcntr13_el0, read_pmevcntr14_el0,
read_pmevcntr15_el0, read_pmevcntr16_el0, read_pmevcntr17_el0,
read_pmevcntr18_el0, read_pmevcntr19_el0, read_pmevcntr20_el0,
read_pmevcntr21_el0, read_pmevcntr22_el0, read_pmevcntr23_el0,
read_pmevcntr24_el0, read_pmevcntr25_el0, read_pmevcntr26_el0,
read_pmevcntr27_el0, read_pmevcntr28_el0, read_pmevcntr29_el0,
read_pmevcntr30_el0,
};
/*
* write pmevcntr<n>_el0 functions
*/
#define write_pmevcntrN_el0(N) \
static void write_pmevcntr##N##_el0(uint32_t v) \
{ \
write_sysreg(v, pmevcntr##N##_el0); \
}
write_pmevcntrN_el0(0)
write_pmevcntrN_el0(1)
write_pmevcntrN_el0(2)
write_pmevcntrN_el0(3)
write_pmevcntrN_el0(4)
write_pmevcntrN_el0(5)
write_pmevcntrN_el0(6)
write_pmevcntrN_el0(7)
write_pmevcntrN_el0(8)
write_pmevcntrN_el0(9)
write_pmevcntrN_el0(10)
write_pmevcntrN_el0(11)
write_pmevcntrN_el0(12)
write_pmevcntrN_el0(13)
write_pmevcntrN_el0(14)
write_pmevcntrN_el0(15)
write_pmevcntrN_el0(16)
write_pmevcntrN_el0(17)
write_pmevcntrN_el0(18)
write_pmevcntrN_el0(19)
write_pmevcntrN_el0(20)
write_pmevcntrN_el0(21)
write_pmevcntrN_el0(22)
write_pmevcntrN_el0(23)
write_pmevcntrN_el0(24)
write_pmevcntrN_el0(25)
write_pmevcntrN_el0(26)
write_pmevcntrN_el0(27)
write_pmevcntrN_el0(28)
write_pmevcntrN_el0(29)
write_pmevcntrN_el0(30)
static void (* const write_pmevcntr_el0[])(uint32_t) = {
write_pmevcntr0_el0, write_pmevcntr1_el0, write_pmevcntr2_el0,
write_pmevcntr3_el0, write_pmevcntr4_el0, write_pmevcntr5_el0,
write_pmevcntr6_el0, write_pmevcntr7_el0, write_pmevcntr8_el0,
write_pmevcntr9_el0, write_pmevcntr10_el0, write_pmevcntr11_el0,
write_pmevcntr12_el0, write_pmevcntr13_el0, write_pmevcntr14_el0,
write_pmevcntr15_el0, write_pmevcntr16_el0, write_pmevcntr17_el0,
write_pmevcntr18_el0, write_pmevcntr19_el0, write_pmevcntr20_el0,
write_pmevcntr21_el0, write_pmevcntr22_el0, write_pmevcntr23_el0,
write_pmevcntr24_el0, write_pmevcntr25_el0, write_pmevcntr26_el0,
write_pmevcntr27_el0, write_pmevcntr28_el0, write_pmevcntr29_el0,
write_pmevcntr30_el0,
};
/*
* write pmevtyper<n>_el0 functions
*/
#define write_pmevtyperN_el0(N) \
static void write_pmevtyper##N##_el0(uint32_t v) \
{ \
write_sysreg(v, pmevtyper##N##_el0); \
}
write_pmevtyperN_el0(0)
write_pmevtyperN_el0(1)
write_pmevtyperN_el0(2)
write_pmevtyperN_el0(3)
write_pmevtyperN_el0(4)
write_pmevtyperN_el0(5)
write_pmevtyperN_el0(6)
write_pmevtyperN_el0(7)
write_pmevtyperN_el0(8)
write_pmevtyperN_el0(9)
write_pmevtyperN_el0(10)
write_pmevtyperN_el0(11)
write_pmevtyperN_el0(12)
write_pmevtyperN_el0(13)
write_pmevtyperN_el0(14)
write_pmevtyperN_el0(15)
write_pmevtyperN_el0(16)
write_pmevtyperN_el0(17)
write_pmevtyperN_el0(18)
write_pmevtyperN_el0(19)
write_pmevtyperN_el0(20)
write_pmevtyperN_el0(21)
write_pmevtyperN_el0(22)
write_pmevtyperN_el0(23)
write_pmevtyperN_el0(24)
write_pmevtyperN_el0(25)
write_pmevtyperN_el0(26)
write_pmevtyperN_el0(27)
write_pmevtyperN_el0(28)
write_pmevtyperN_el0(29)
write_pmevtyperN_el0(30)
static void (* const write_pmevtyper_el0[])(uint32_t) = {
write_pmevtyper0_el0, write_pmevtyper1_el0, write_pmevtyper2_el0,
write_pmevtyper3_el0, write_pmevtyper4_el0, write_pmevtyper5_el0,
write_pmevtyper6_el0, write_pmevtyper7_el0, write_pmevtyper8_el0,
write_pmevtyper9_el0, write_pmevtyper10_el0, write_pmevtyper11_el0,
write_pmevtyper12_el0, write_pmevtyper13_el0, write_pmevtyper14_el0,
write_pmevtyper15_el0, write_pmevtyper16_el0, write_pmevtyper17_el0,
write_pmevtyper18_el0, write_pmevtyper19_el0, write_pmevtyper20_el0,
write_pmevtyper21_el0, write_pmevtyper22_el0, write_pmevtyper23_el0,
write_pmevtyper24_el0, write_pmevtyper25_el0, write_pmevtyper26_el0,
write_pmevtyper27_el0, write_pmevtyper28_el0, write_pmevtyper29_el0,
write_pmevtyper30_el0,
};
#define ARMV8_IDX_CYCLE_COUNTER 31
#define ARMV8_IDX_COUNTER0 0
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h
* Per-CPU PMCR: config reg
*/
#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
#define ARMV8_PMU_PMCR_N_MASK 0x1f
#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h
* PMOVSR: counters overflow flag status reg
*/
#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h
* PMXEVTYPER: Event selection reg
*/
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h
* Event filters for PMUv3
*/
#define ARMV8_PMU_EXCLUDE_EL1 (1 << 31)
#define ARMV8_PMU_EXCLUDE_EL0 (1 << 30)
#define ARMV8_PMU_INCLUDE_EL2 (1 << 27)
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h
* PMUSERENR: user enable reg
*/
#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/perf_event.h
* PMUv3 event types: required events
*/
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c
* ARMv8 PMUv3 Performance Events handling code.
* Common event types (some are defined in asm/perf_event.h).
*/
/* At least one of the following is required. */
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
/* Common architectural events. */
#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
/* Common microarchitectural events. */
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22
#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24
#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25
#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
/* @ref.impl linux-v4.15-rc3 include/linux/perf/arm_pmu.h */
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xFFFF
#define PERF_MAP_ALL_UNSUPPORTED \
[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
[0 ... C(MAX) - 1] = { \
[0 ... C(OP_MAX) - 1] = { \
[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
}, \
}
/* PMUv3 HW events mapping. */
/* disable -Woverride-init for the following initializations */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverride-init"
/*
* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c
* ARMv8 Architectural defined events, not all of these may
* be supported on any given implementation. Undefined events will
* be disabled at run-time.
*/
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
/* restore warnings */
#pragma GCC diagnostic pop
/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */
static int
armpmu_map_cache_event(const unsigned (*cache_map)
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX],
uint64_t config)
{
unsigned int cache_type, cache_op, cache_result, ret;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
if (!cache_map)
return -ENOENT;
ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
if (ret == CACHE_OP_UNSUPPORTED)
return -ENOENT;
return ret;
}
/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */
static int
armpmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
uint64_t config)
{
int mapping;
if (config >= PERF_COUNT_HW_MAX)
return -EINVAL;
if (!event_map)
return -ENOENT;
mapping = (*event_map)[config];
return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
}
/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */
static int
armpmu_map_raw_event(uint32_t raw_event_mask, uint64_t config)
{
return (int)(config & raw_event_mask);
}
/* @ref.impl linux-v4.15-rc3 drivers/perf/arm_pmu.c */
static int
armpmu_map_event(uint32_t type, uint64_t config,
const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
const unsigned int (*cache_map)
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX],
uint32_t raw_event_mask)
{
switch (type) {
case PERF_TYPE_HARDWARE:
return armpmu_map_hw_event(event_map, config);
case PERF_TYPE_HW_CACHE:
return armpmu_map_cache_event(cache_map, config);
case PERF_TYPE_RAW:
return armpmu_map_raw_event(raw_event_mask, config);
}
return -ENOENT;
}
static inline int armv8pmu_counter_mask_valid(unsigned long counter_mask)
{
int num;
unsigned long event;
unsigned long cycle;
unsigned long invalid_mask;
num = get_per_cpu_pmu()->num_events;
num--; /* Sub the CPU cycles counter */
event = ((1UL << num) - 1) << ARMV8_IDX_COUNTER0;
cycle = 1UL << ARMV8_IDX_CYCLE_COUNTER;
invalid_mask = ~(event | cycle);
return !(counter_mask & invalid_mask);
}
static inline int armv8pmu_counter_valid(int idx)
{
return armv8pmu_counter_mask_valid(1UL << idx);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline uint32_t armv8pmu_getreset_flags(void)
{
uint32_t value;
/* Read */
value = read_sysreg(pmovsclr_el0);
/* Write to clear flags */
value &= ARMV8_PMU_OVSR_MASK;
write_sysreg(value, pmovsclr_el0);
return value;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline int armv8pmu_has_overflowed(uint32_t pmovsr)
{
return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
}
static inline int armv8pmu_counter_has_overflowed(uint32_t pmnc, int idx)
{
return pmnc & BIT(idx);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static int __armv8_pmuv3_map_event(uint32_t type, uint64_t config,
const unsigned int (*extra_event_map)
[PERF_COUNT_HW_MAX],
const unsigned int (*extra_cache_map)
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX])
{
int hw_event_id;
hw_event_id = armpmu_map_event(type, config, &armv8_pmuv3_perf_map,
&armv8_pmuv3_perf_cache_map,
ARMV8_PMU_EVTYPE_EVENT);
/* Onl expose micro/arch events supported by this PMU */
if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
&& test_bit(hw_event_id, get_per_cpu_pmu()->pmceid_bitmap)) {
return hw_event_id;
}
return armpmu_map_event(type, config, extra_event_map, extra_cache_map,
ARMV8_PMU_EVTYPE_EVENT);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static int armv8_pmuv3_map_event(uint32_t type, uint64_t config)
{
return __armv8_pmuv3_map_event(type, config, NULL, NULL);
}
static int armv8_pmuv3_map_hw_event(uint64_t config)
{
return __armv8_pmuv3_map_event(PERF_TYPE_HARDWARE, config, NULL, NULL);
}
static int armv8_pmuv3_map_cache_event(uint64_t config)
{
return __armv8_pmuv3_map_event(PERF_TYPE_HW_CACHE, config, NULL, NULL);
}
static int armv8_pmuv3_map_raw_event(uint64_t config)
{
return __armv8_pmuv3_map_event(PERF_TYPE_RAW, config, NULL, NULL);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline uint32_t armv8pmu_pmcr_read(void)
{
return read_sysreg(pmcr_el0);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline void armv8pmu_pmcr_write(uint32_t val)
{
val &= ARMV8_PMU_PMCR_MASK;
isb();
write_sysreg(val, pmcr_el0);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline uint32_t armv8pmu_read_counter(int idx)
{
uint32_t value = 0;
if (!armv8pmu_counter_valid(idx)) {
ekprintf("%s: The count_register#%d is not implemented.\n",
__func__, idx);
}
else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
value = read_sysreg(pmccntr_el0);
}
else {
value = read_pmevcntr_el0[idx]();
}
return value;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline void armv8pmu_write_counter(int idx, uint32_t value)
{
if (!armv8pmu_counter_valid(idx)) {
ekprintf("%s: The count_register#%d is not implemented.\n",
__func__, idx);
}
else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
/*
* Set the upper 32bits as this is a 64bit counter but we only
* count using the lower 32bits and we want an interrupt when
* it overflows.
*/
uint64_t value64 = (int32_t)value;
write_sysreg(value64, pmccntr_el0);
}
else {
write_pmevcntr_el0[idx](value);
}
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline int armv8pmu_enable_intens(unsigned long counter_mask)
{
if (!armv8pmu_counter_mask_valid(counter_mask)) {
ekprintf("%s: invalid counter mask(%#lx)\n",
__func__, counter_mask);
return -EINVAL;
}
write_sysreg(counter_mask, pmintenset_el1);
return 0;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline int armv8pmu_disable_intens(unsigned long counter_mask)
{
if (!armv8pmu_counter_mask_valid(counter_mask)) {
ekprintf("%s: invalid counter mask(%#lx)\n",
__func__, counter_mask);
return -EINVAL;
}
write_sysreg(counter_mask, pmintenclr_el1);
isb();
/* Clear the overflow flag in case an interrupt is pending. */
write_sysreg(counter_mask, pmovsclr_el0);
isb();
return 0;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static int armv8pmu_set_event_filter(unsigned long *config_base, int mode)
{
/* exclude_idle is unused mode, unsupported */
// if (attr->exclude_idle)
// return -EPERM;
/*
* If we're running in hyp mode, then we *are* the hypervisor.
* Therefore we ignore exclude_hv in this configuration, since
* there's no hypervisor to sample anyway. This is consistent
* with other architectures (x86 and Power).
*/
if (is_kernel_in_hyp_mode()) {
if (mode & PERFCTR_KERNEL_MODE)
*config_base |= ARMV8_PMU_INCLUDE_EL2;
} else {
if (!(mode & PERFCTR_KERNEL_MODE))
*config_base |= ARMV8_PMU_EXCLUDE_EL1;
/* exclude_hv is unused mode, unsupported */
// if (!attr->exclude_hv)
// config_base |= ARMV8_PMU_INCLUDE_EL2;
}
if (!(mode & PERFCTR_USER_MODE))
*config_base |= ARMV8_PMU_EXCLUDE_EL0;
return 0;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline void armv8pmu_write_evtype(int idx, uint32_t val)
{
if (!armv8pmu_counter_valid(idx)) {
ekprintf("%s: The count_register#%d is not implemented.\n",
__func__, idx);
return;
} else if (idx != ARMV8_IDX_CYCLE_COUNTER) {
write_pmevtyper_el0[idx](val);
}
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline int armv8pmu_enable_counter(unsigned long counter_mask)
{
if (!armv8pmu_counter_mask_valid(counter_mask)) {
ekprintf("%s: invalid counter mask 0x%lx.\n",
__func__, counter_mask);
return -EINVAL;
}
write_sysreg(counter_mask, pmcntenset_el0);
return 0;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static inline int armv8pmu_disable_counter(unsigned long counter_mask)
{
if (!armv8pmu_counter_mask_valid(counter_mask)) {
ekprintf("%s: invalid counter mask 0x%lx.\n",
__func__, counter_mask);
return -EINVAL;
}
write_sysreg(counter_mask, pmcntenclr_el0);
return 0;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static ihk_spinlock_t pmu_lock = SPIN_LOCK_UNLOCKED;
static int armv8pmu_start(void)
{
unsigned long flags;
flags = ihk_mc_spinlock_lock(&pmu_lock);
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
ihk_mc_spinlock_unlock(&pmu_lock, flags);
return 0;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static void armv8pmu_stop(void)
{
unsigned long flags;
flags = ihk_mc_spinlock_lock(&pmu_lock);
/* Disable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
ihk_mc_spinlock_unlock(&pmu_lock, flags);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
uint32_t nb_cnt =
cpu_pmu->per_cpu[ihk_mc_get_processor_id()].num_events;
nb_cnt--; /* Sub the CPU cycles counter */
unsigned long event = ((1UL << nb_cnt) - 1) << ARMV8_IDX_COUNTER0;
unsigned long cycle = 1UL << ARMV8_IDX_CYCLE_COUNTER;
unsigned long valid_mask = event | cycle;
/* The counter and interrupt enable registers are unknown at reset. */
armv8pmu_disable_counter(valid_mask);
armv8pmu_disable_intens(valid_mask);
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
*/
armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C |
ARMV8_PMU_PMCR_LC);
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c */
static int armv8pmu_get_event_idx(int num_events, unsigned long used_mask,
unsigned long config)
{
int idx, end;
unsigned long evtype = config & ARMV8_PMU_EVTYPE_EVENT;
/* Always prefer to place a cycle counter into the cycle counter. */
if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
if (!(used_mask & (1UL << ARMV8_IDX_CYCLE_COUNTER)))
return ARMV8_IDX_CYCLE_COUNTER;
}
/*
* Otherwise use events counters
*/
end = ARMV8_IDX_COUNTER0 + num_events;
end--; /* Sub the CPU cycles counter */
for (idx = ARMV8_IDX_COUNTER0; idx < end; ++idx) {
if (!(used_mask & (1UL << idx)))
return idx;
}
/* The counters are all in use. */
return -EAGAIN;
}
/* @ref.impl linux-v4.15-rc3 arch/arm64/kernel/perf_event.c:__armv8pmu_probe_pmu() */
/* Extract get num_events processing. */
static uint32_t armv8pmu_read_num_pmnc_events(void)
{
uint32_t num_events = 0;
/* Read the nb of CNTx counters supported from PMNC */
num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
& ARMV8_PMU_PMCR_N_MASK;
/* Add the CPU cycles counter */
num_events += 1;
return num_events;
}
static void armv8pmu_handle_irq(void *priv)
{
uint32_t pmovsr;
struct thread *thread = cpu_local_var(current);
struct process *proc = thread->proc;
const struct per_cpu_arm_pmu *cpu_pmu = get_per_cpu_pmu();
int idx;
/*
* Get and reset the IRQ flags
*/
pmovsr = armv8pmu_getreset_flags();
/*
* Did an overflow occur?
*/
if (!armv8pmu_has_overflowed(pmovsr))
return;
if (!proc->monitoring_event) {
return;
}
/*
* Handle the counter(s) overflow(s)
*/
for (idx = 0; idx < cpu_pmu->num_events; idx++) {
struct mc_perf_event *event = NULL;
struct mc_perf_event *sub;
if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) {
continue;
}
if (proc->monitoring_event->counter_id == idx) {
event = proc->monitoring_event;
} else {
list_for_each_entry(sub,
&proc->monitoring_event->sibling_list,
group_entry) {
if (sub->counter_id == idx) {
event = sub;
break;
}
}
}
if (!event) {
continue;
}
ihk_mc_event_update(event);
ihk_mc_event_set_period(event);
}
return;
}
static void armv8pmu_enable_user_access_pmu_regs(void)
{
uint32_t value = 0;
value = read_sysreg(pmuserenr_el0);
write_sysreg(value | (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR),
pmuserenr_el0);
}
static void armv8pmu_disable_user_access_pmu_regs(void)
{
uint32_t value = 0;
value = read_sysreg(pmuserenr_el0);
write_sysreg(value & ~(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR),
pmuserenr_el0);
}
static void armv8pmu_create_pmceid_bitmap(unsigned long *bitmap, uint32_t nbits)
{
uint32_t pmceid[2];
memset(bitmap, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
pmceid[0] = read_sysreg(pmceid0_el0);
bitmap[0] = (unsigned long)pmceid[0];
pmceid[1] = read_sysreg(pmceid1_el0);
bitmap[0] |= (unsigned long)pmceid[1] << 32;
}
static struct ihk_mc_interrupt_handler armv8pmu_handler = {
.func = armv8pmu_handle_irq,
.priv = NULL,
};
int armv8pmu_init(struct arm_pmu* cpu_pmu)
{
cpu_pmu->read_counter = armv8pmu_read_counter;
cpu_pmu->write_counter = armv8pmu_write_counter;
cpu_pmu->reset = armv8pmu_reset;
cpu_pmu->enable_pmu = armv8pmu_start;
cpu_pmu->disable_pmu = armv8pmu_stop;
cpu_pmu->enable_counter = armv8pmu_enable_counter;
cpu_pmu->disable_counter = armv8pmu_disable_counter;
cpu_pmu->enable_intens = armv8pmu_enable_intens;
cpu_pmu->disable_intens = armv8pmu_disable_intens;
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->write_evtype = armv8pmu_write_evtype;
cpu_pmu->get_event_idx = armv8pmu_get_event_idx;
cpu_pmu->map_event = armv8_pmuv3_map_event;
cpu_pmu->map_hw_event = armv8_pmuv3_map_hw_event;
cpu_pmu->map_cache_event = armv8_pmuv3_map_cache_event;
cpu_pmu->map_raw_event = armv8_pmuv3_map_raw_event;
cpu_pmu->enable_user_access_pmu_regs =
armv8pmu_enable_user_access_pmu_regs;
cpu_pmu->disable_user_access_pmu_regs =
armv8pmu_disable_user_access_pmu_regs;
cpu_pmu->handler = &armv8pmu_handler;
cpu_pmu->counter_mask_valid = &armv8pmu_counter_mask_valid;
return 0;
}
void armv8pmu_per_cpu_init(struct per_cpu_arm_pmu *per_cpu)
{
per_cpu->num_events = armv8pmu_read_num_pmnc_events();
armv8pmu_create_pmceid_bitmap(per_cpu->pmceid_bitmap,
ARMV8_PMUV3_MAX_COMMON_EVENTS);
}

View File

@ -0,0 +1,311 @@
/* postk_print_sysreg.c COPYRIGHT FUJITSU LIMITED 2016 */
/*
* usage:
* (gdb) call/x postk_debug_sysreg_ttbr1_el1()
* $1 = 0x4e64f000
*/
#define postk_debug_sysreg(sysreg) __postk_debug_sysreg(sysreg, sysreg)
#define __postk_debug_sysreg(fname, regname) \
unsigned long postk_debug_sysreg_ ## fname (void) \
{ \
unsigned long sysreg; \
asm volatile( \
"mrs %0, " # regname "\n" \
: "=r" (sysreg) \
: \
: "memory"); \
return sysreg; \
}
/*
* ARMR Architecture Reference Manual ARMv8, for ARMv8-A architecture profile Errata markup Beta
* - Table J-5 Alphabetical index of AArch64 Registers
*/
postk_debug_sysreg(actlr_el1)
postk_debug_sysreg(actlr_el2)
postk_debug_sysreg(actlr_el3)
postk_debug_sysreg(afsr0_el1)
postk_debug_sysreg(afsr0_el2)
postk_debug_sysreg(afsr0_el3)
postk_debug_sysreg(afsr1_el1)
postk_debug_sysreg(afsr1_el2)
postk_debug_sysreg(afsr1_el3)
postk_debug_sysreg(aidr_el1)
postk_debug_sysreg(amair_el1)
postk_debug_sysreg(amair_el2)
postk_debug_sysreg(amair_el3)
/*postk_debug_sysreg(at s12e0r)*/
/*postk_debug_sysreg(at s12e0w)*/
/*postk_debug_sysreg(at s12e1r)*/
/*postk_debug_sysreg(at s12e1w)*/
/*postk_debug_sysreg(at s1e0r)*/
/*postk_debug_sysreg(at s1e0w)*/
/*postk_debug_sysreg(at s1e1r)*/
/*postk_debug_sysreg(at s1e1w)*/
/*postk_debug_sysreg(at s1e2r)*/
/*postk_debug_sysreg(at s1e2w)*/
/*postk_debug_sysreg(at s1e3r)*/
/*postk_debug_sysreg(at s1e3w)*/
postk_debug_sysreg(ccsidr_el1)
postk_debug_sysreg(clidr_el1)
postk_debug_sysreg(cntfrq_el0)
postk_debug_sysreg(cnthctl_el2)
postk_debug_sysreg(cnthp_ctl_el2)
postk_debug_sysreg(cnthp_cval_el2)
postk_debug_sysreg(cnthp_tval_el2)
postk_debug_sysreg(cntkctl_el1)
postk_debug_sysreg(cntp_ctl_el0)
postk_debug_sysreg(cntp_cval_el0)
postk_debug_sysreg(cntp_tval_el0)
postk_debug_sysreg(cntpct_el0)
postk_debug_sysreg(cntps_ctl_el1)
postk_debug_sysreg(cntps_cval_el1)
postk_debug_sysreg(cntps_tval_el1)
postk_debug_sysreg(cntv_ctl_el0)
postk_debug_sysreg(cntv_cval_el0)
postk_debug_sysreg(cntv_tval_el0)
postk_debug_sysreg(cntvct_el0)
postk_debug_sysreg(cntvoff_el2)
postk_debug_sysreg(contextidr_el1)
postk_debug_sysreg(cpacr_el1)
postk_debug_sysreg(cptr_el2)
postk_debug_sysreg(cptr_el3)
postk_debug_sysreg(csselr_el1)
postk_debug_sysreg(ctr_el0)
postk_debug_sysreg(currentel)
postk_debug_sysreg(dacr32_el2)
postk_debug_sysreg(daif)
postk_debug_sysreg(dbgauthstatus_el1)
/*postk_debug_sysreg(dbgbcr<n>_el1)*/
/*postk_debug_sysreg(dbgbvr<n>_el1)*/
postk_debug_sysreg(dbgclaimclr_el1)
postk_debug_sysreg(dbgclaimset_el1)
postk_debug_sysreg(dbgdtr_el0)
postk_debug_sysreg(dbgdtrrx_el0)
postk_debug_sysreg(dbgdtrtx_el0)
postk_debug_sysreg(dbgprcr_el1)
postk_debug_sysreg(dbgvcr32_el2)
/*postk_debug_sysreg(dbgwcr<n>_el1)*/
/*postk_debug_sysreg(dbgwvr<n>_el1)*/
/*postk_debug_sysreg(dc cisw)*/
/*postk_debug_sysreg(dc civac)*/
/*postk_debug_sysreg(dc csw)*/
/*postk_debug_sysreg(dc cvac)*/
/*postk_debug_sysreg(dc cvau)*/
/*postk_debug_sysreg(dc isw)*/
/*postk_debug_sysreg(dc ivac)*/
/*postk_debug_sysreg(dc zva)*/
postk_debug_sysreg(dczid_el0)
postk_debug_sysreg(dlr_el0)
postk_debug_sysreg(dspsr_el0)
postk_debug_sysreg(elr_el1)
postk_debug_sysreg(elr_el2)
postk_debug_sysreg(elr_el3)
postk_debug_sysreg(esr_el1)
postk_debug_sysreg(esr_el2)
postk_debug_sysreg(esr_el3)
postk_debug_sysreg(far_el1)
postk_debug_sysreg(far_el2)
postk_debug_sysreg(far_el3)
postk_debug_sysreg(fpcr)
postk_debug_sysreg(fpexc32_el2)
postk_debug_sysreg(fpsr)
postk_debug_sysreg(hacr_el2)
postk_debug_sysreg(hcr_el2)
postk_debug_sysreg(hpfar_el2)
postk_debug_sysreg(hstr_el2)
/*postk_debug_sysreg(ic iallu)*/
/*postk_debug_sysreg(ic ialluis)*/
/*postk_debug_sysreg(ic ivau)*/
/*postk_debug_sysreg(icc_ap0r0_el1)*/
/*postk_debug_sysreg(icc_ap0r1_el1)*/
/*postk_debug_sysreg(icc_ap0r2_el1)*/
/*postk_debug_sysreg(icc_ap0r3_el1)*/
/*postk_debug_sysreg(icc_ap1r0_el1)*/
/*postk_debug_sysreg(icc_ap1r1_el1)*/
/*postk_debug_sysreg(icc_ap1r2_el1)*/
/*postk_debug_sysreg(icc_ap1r3_el1)*/
/*postk_debug_sysreg(icc_asgi1r_el1)*/
/*postk_debug_sysreg(icc_bpr0_el1)*/
/*postk_debug_sysreg(icc_bpr1_el1)*/
/*postk_debug_sysreg(icc_ctlr_el1)*/
/*postk_debug_sysreg(icc_ctlr_el3)*/
/*postk_debug_sysreg(icc_dir_el1)*/
/*postk_debug_sysreg(icc_eoir0_el1)*/
/*postk_debug_sysreg(icc_eoir1_el1)*/
/*postk_debug_sysreg(icc_hppir0_el1)*/
/*postk_debug_sysreg(icc_hppir1_el1)*/
/*postk_debug_sysreg(icc_iar0_el1)*/
/*postk_debug_sysreg(icc_iar1_el1)*/
/*postk_debug_sysreg(icc_igrpen0_el1)*/
/*postk_debug_sysreg(icc_igrpen1_el1)*/
/*postk_debug_sysreg(icc_igrpen1_el3)*/
/*postk_debug_sysreg(icc_pmr_el1)*/
/*postk_debug_sysreg(icc_rpr_el1)*/
/*postk_debug_sysreg(icc_seien_el1)*/
/*postk_debug_sysreg(icc_sgi0r_el1)*/
/*postk_debug_sysreg(icc_sgi1r_el1)*/
/*postk_debug_sysreg(icc_sre_el1)*/
/*postk_debug_sysreg(icc_sre_el2)*/
/*postk_debug_sysreg(icc_sre_el3)*/
/*postk_debug_sysreg(ich_ap0r0_el2)*/
/*postk_debug_sysreg(ich_ap0r1_el2)*/
/*postk_debug_sysreg(ich_ap0r2_el2)*/
/*postk_debug_sysreg(ich_ap0r3_el2)*/
/*postk_debug_sysreg(ich_ap1r0_el2)*/
/*postk_debug_sysreg(ich_ap1r1_el2)*/
/*postk_debug_sysreg(ich_ap1r2_el2)*/
/*postk_debug_sysreg(ich_ap1r3_el2)*/
/*postk_debug_sysreg(ich_eisr_el2)*/
/*postk_debug_sysreg(ich_elsr_el2)*/
/*postk_debug_sysreg(ich_hcr_el2)*/
/*postk_debug_sysreg(ich_lr<n>_el2)*/
/*postk_debug_sysreg(ich_misr_el2)*/
/*postk_debug_sysreg(ich_vmcr_el2)*/
/*postk_debug_sysreg(ich_vseir_el2)*/
/*postk_debug_sysreg(ich_vtr_el2)*/
postk_debug_sysreg(id_aa64afr0_el1)
postk_debug_sysreg(id_aa64afr1_el1)
postk_debug_sysreg(id_aa64dfr0_el1)
postk_debug_sysreg(id_aa64dfr1_el1)
postk_debug_sysreg(id_aa64isar0_el1)
postk_debug_sysreg(id_aa64isar1_el1)
postk_debug_sysreg(id_aa64mmfr0_el1)
postk_debug_sysreg(id_aa64mmfr1_el1)
postk_debug_sysreg(id_aa64pfr0_el1)
postk_debug_sysreg(id_aa64pfr1_el1)
postk_debug_sysreg(id_afr0_el1)
postk_debug_sysreg(id_dfr0_el1)
postk_debug_sysreg(id_isar0_el1)
postk_debug_sysreg(id_isar1_el1)
postk_debug_sysreg(id_isar2_el1)
postk_debug_sysreg(id_isar3_el1)
postk_debug_sysreg(id_isar4_el1)
postk_debug_sysreg(id_isar5_el1)
postk_debug_sysreg(id_mmfr0_el1)
postk_debug_sysreg(id_mmfr1_el1)
postk_debug_sysreg(id_mmfr2_el1)
postk_debug_sysreg(id_mmfr3_el1)
postk_debug_sysreg(id_pfr0_el1)
postk_debug_sysreg(id_pfr1_el1)
postk_debug_sysreg(ifsr32_el2)
postk_debug_sysreg(isr_el1)
postk_debug_sysreg(mair_el1)
postk_debug_sysreg(mair_el2)
postk_debug_sysreg(mair_el3)
postk_debug_sysreg(mdccint_el1)
postk_debug_sysreg(mdccsr_el0)
postk_debug_sysreg(mdcr_el2)
postk_debug_sysreg(mdcr_el3)
postk_debug_sysreg(mdrar_el1)
postk_debug_sysreg(mdscr_el1)
postk_debug_sysreg(midr_el1)
postk_debug_sysreg(mpidr_el1)
postk_debug_sysreg(mvfr0_el1)
postk_debug_sysreg(mvfr1_el1)
postk_debug_sysreg(mvfr2_el1)
postk_debug_sysreg(nzcv)
postk_debug_sysreg(osdlr_el1)
postk_debug_sysreg(osdtrrx_el1)
postk_debug_sysreg(osdtrtx_el1)
postk_debug_sysreg(oseccr_el1)
postk_debug_sysreg(oslar_el1)
postk_debug_sysreg(oslsr_el1)
postk_debug_sysreg(par_el1)
postk_debug_sysreg(pmccfiltr_el0)
postk_debug_sysreg(pmccntr_el0)
postk_debug_sysreg(pmceid0_el0)
postk_debug_sysreg(pmceid1_el0)
postk_debug_sysreg(pmcntenclr_el0)
postk_debug_sysreg(pmcntenset_el0)
postk_debug_sysreg(pmcr_el0)
/*postk_debug_sysreg(pmevcntr<n>_el0)*/
/*postk_debug_sysreg(pmevtyper<n>_el0)*/
postk_debug_sysreg(pmintenclr_el1)
postk_debug_sysreg(pmintenset_el1)
postk_debug_sysreg(pmovsclr_el0)
postk_debug_sysreg(pmovsset_el0)
postk_debug_sysreg(pmselr_el0)
postk_debug_sysreg(pmswinc_el0)
postk_debug_sysreg(pmuserenr_el0)
postk_debug_sysreg(pmxevcntr_el0)
postk_debug_sysreg(pmxevtyper_el0)
postk_debug_sysreg(revidr_el1)
postk_debug_sysreg(rmr_el1)
postk_debug_sysreg(rmr_el2)
postk_debug_sysreg(rmr_el3)
postk_debug_sysreg(rvbar_el1)
postk_debug_sysreg(rvbar_el2)
postk_debug_sysreg(rvbar_el3)
/*postk_debug_sysreg(s3_<op1>_<cn>_<cm>_<op2>)*/
postk_debug_sysreg(scr_el3)
postk_debug_sysreg(sctlr_el1)
postk_debug_sysreg(sctlr_el2)
postk_debug_sysreg(sctlr_el3)
postk_debug_sysreg(sder32_el3)
postk_debug_sysreg(sp_el0)
postk_debug_sysreg(sp_el1)
postk_debug_sysreg(sp_el2)
/*postk_debug_sysreg(sp_el3)*/
postk_debug_sysreg(spsel)
postk_debug_sysreg(spsr_abt)
postk_debug_sysreg(spsr_el1)
postk_debug_sysreg(spsr_el2)
postk_debug_sysreg(spsr_el3)
postk_debug_sysreg(spsr_fiq)
postk_debug_sysreg(spsr_irq)
postk_debug_sysreg(spsr_und)
postk_debug_sysreg(tcr_el1)
postk_debug_sysreg(tcr_el2)
postk_debug_sysreg(tcr_el3)
postk_debug_sysreg(teecr32_el1)
postk_debug_sysreg(teehbr32_el1)
/*postk_debug_sysreg(tlbi alle1)*/
/*postk_debug_sysreg(tlbi alle1is)*/
/*postk_debug_sysreg(tlbi alle2)*/
/*postk_debug_sysreg(tlbi alle2is)*/
/*postk_debug_sysreg(tlbi alle3)*/
/*postk_debug_sysreg(tlbi alle3is)*/
/*postk_debug_sysreg(tlbi aside1)*/
/*postk_debug_sysreg(tlbi aside1is)*/
/*postk_debug_sysreg(tlbi ipas2e1)*/
/*postk_debug_sysreg(tlbi ipas2e1is)*/
/*postk_debug_sysreg(tlbi ipas2le1)*/
/*postk_debug_sysreg(tlbi ipas2le1is)*/
/*postk_debug_sysreg(tlbi vaae1)*/
/*postk_debug_sysreg(tlbi vaae1is)*/
/*postk_debug_sysreg(tlbi vaale1)*/
/*postk_debug_sysreg(tlbi vaale1is)*/
/*postk_debug_sysreg(tlbi vae1)*/
/*postk_debug_sysreg(tlbi vae1is)*/
/*postk_debug_sysreg(tlbi vae2)*/
/*postk_debug_sysreg(tlbi vae2is)*/
/*postk_debug_sysreg(tlbi vae3)*/
/*postk_debug_sysreg(tlbi vae3is)*/
/*postk_debug_sysreg(tlbi vale1)*/
/*postk_debug_sysreg(tlbi vale1is)*/
/*postk_debug_sysreg(tlbi vale2)*/
/*postk_debug_sysreg(tlbi vale2is)*/
/*postk_debug_sysreg(tlbi vale3)*/
/*postk_debug_sysreg(tlbi vale3is)*/
/*postk_debug_sysreg(tlbi vmalle1)*/
/*postk_debug_sysreg(tlbi vmalle1is)*/
/*postk_debug_sysreg(tlbi vmalls12e1)*/
/*postk_debug_sysreg(tlbi vmalls12e1is)*/
postk_debug_sysreg(tpidr_el0)
postk_debug_sysreg(tpidr_el1)
postk_debug_sysreg(tpidr_el2)
postk_debug_sysreg(tpidr_el3)
postk_debug_sysreg(tpidrro_el0)
postk_debug_sysreg(ttbr0_el1)
postk_debug_sysreg(ttbr0_el2)
postk_debug_sysreg(ttbr0_el3)
postk_debug_sysreg(ttbr1_el1)
postk_debug_sysreg(vbar_el1)
postk_debug_sysreg(vbar_el2)
postk_debug_sysreg(vbar_el3)
postk_debug_sysreg(vmpidr_el2)
postk_debug_sysreg(vpidr_el2)
postk_debug_sysreg(vtcr_el2)
postk_debug_sysreg(vttbr_el2)

View File

@ -0,0 +1,13 @@
/* proc-macros.S COPYRIGHT FUJITSU LIMITED 2015 */
#include <arch-memory.h>
/*
* dcache_line_size - get the minimum D-cache line size from the CTR register.
*/
.macro dcache_line_size, reg, tmp
mrs \tmp, ctr_el0 // read CTR
ubfm \tmp, \tmp, #16, #19 // cache line size encoding
mov \reg, #4 // bytes per word
lsl \reg, \reg, \tmp // actual cache line size
.endm

148
arch/arm64/kernel/proc.S Normal file
View File

@ -0,0 +1,148 @@
/* proc.S COPYRIGHT FUJITSU LIMITED 2015-2017 */
#include <linkage.h>
#include <arch-memory.h>
#include <sysreg.h>
#include <assembler.h>
#include "proc-macros.S"
#ifdef CONFIG_ARM64_64K_PAGES
# define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
#else
# define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
#endif
//#ifdef CONFIG_SMP
#define TCR_SMP_FLAGS TCR_SHARED
//#else
//#define TCR_SMP_FLAGS 0
//#endif
/* PTWs cacheable, inner/outer WBWA */
#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA
#define MAIR(attr, mt) ((attr) << ((mt) * 8))
/*
* cpu_do_idle()
*
* Idle the processor (wait for interrupt).
*/
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
ENTRY(__cpu_do_idle)
mrs x0, daif // save I bit
msr daifset, #2 // set I bit
mrs_s x1, ICC_PMR_EL1 // save PMR
mov x2, #ICC_PMR_EL1_UNMASKED
msr_s ICC_PMR_EL1, x2 // unmask at PMR
dsb sy // WFI may enter a low-power mode
wfi
msr_s ICC_PMR_EL1, x1 // restore PMR
msr daif, x0 // restore I bit
ret
ENDPROC(__cpu_do_idle)
#else /* defined(CONFIG_HAS_NMI) */
ENTRY(__cpu_do_idle)
dsb sy // WFI may enter a low-power mode
wfi
ret
ENDPROC(__cpu_do_idle)
#endif /* defined(CONFIG_HAS_NMI) */
/*
* cpu_do_switch_mm(pgd_phys, tsk)
*
* Set the translation table base pointer to be pgd_phys.
*
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
//mmid w1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
ret
ENDPROC(cpu_do_switch_mm)
.section ".text.init", #alloc, #execinstr
/*
* __cpu_setup
*
* Initialise the processor for turning the MMU on. Return in x0 the
* value of the SCTLR_EL1 register.
*/
ENTRY(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
mov x0, #3 << 20
/* SVE */
mrs x5, id_aa64pfr0_el1
ubfx x5, x5, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x5, 1f
orr x0, x0, #CPACR_EL1_ZEN // SVE: trap disabled EL1 and EL0
1: msr cpacr_el1, x0 // Enable FP/ASIMD
mov x0, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x0 // access to the DCC from EL0
isb // Unmask debug exceptions now,
enable_dbg // since this is per-cpu
/*
* Memory region attributes for LPAE:
*
* n = AttrIndx[2:0]
* n MAIR
* DEVICE_nGnRnE 000 00000000
* DEVICE_nGnRE 001 00000100
* DEVICE_GRE 010 00001100
* NORMAL_NC 011 01000100
* NORMAL 100 11111111
*/
ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
MAIR(0x04, MT_DEVICE_nGnRE) | \
MAIR(0x0c, MT_DEVICE_GRE) | \
MAIR(0x44, MT_NORMAL_NC) | \
MAIR(0xff, MT_NORMAL)
msr mair_el1, x5
/*
* Prepare SCTLR
*/
adr x5, crval
ldp w5, w6, [x5]
mrs x0, sctlr_el1
bic x0, x0, x5 // clear bits
orr x0, x0, x6 // set bits
/*
* Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
* both user and kernel.
*/
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
* TCR_EL1.
*/
mrs x9, ID_AA64MMFR0_EL1
bfi x10, x9, #32, #3
msr tcr_el1, x10
ret // return to head.S
ENDPROC(__cpu_setup)
/*
* n n T
* U E WT T UD US IHBS
* CE0 XWHW CZ ME TEEA S
* .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
* 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved
* .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings
*/
.type crval, #object
crval:
.word 0x000802e2 // clear
.word 0x0405d11d // set

Some files were not shown because too many files have changed in this diff Show More