Compare commits

...

1012 Commits

Author SHA1 Message Date
5594a4a4a9 The build is successful, testing is required 2025-08-23 12:12:22 +08:00
9ae3a3f374 todo: modpost undefined errors 2025-08-23 11:02:25 +08:00
64dbb93260 Still need to port some kernel modules 2025-08-23 00:35:29 +08:00
015a64039d Try to port mckernel to rhel9 2025-08-22 22:02:50 +08:00
7afd1c87f6 Update IHK submodule commit
Change-Id: I503233b393e5bfec003d407512d1028de3a60946
2022-05-31 13:39:07 +09:00
27b3f59031 Update IHK submodule
Change-Id: I01bb44d3cf40e431090785ea261926e89d835e66
2021-06-14 06:05:58 -04:00
a1b9721772 RHEL8.4: make mcinspect and eclair fPIE for RPM, clear build_ldflags in mckernel.spec
Change-Id: I06f09628629c2afb0d36ad6ab2e2ed2cc716a980
2021-06-14 06:04:30 -04:00
69187ea0fd Update IHK submodule commit
Change-Id: Ic952ff15e2269452ce0693a2a96653659431372b
2021-06-13 22:43:32 -04:00
0353fc1a0a RHEL8.4: support VDSO changes for aarch64 (by Fujitsu)
Change-Id: I1148d2e56eab52ee0264995dd32b9fd2f0d661f0
2021-06-13 22:33:29 -04:00
1a71203872 release: 1.8.0: MAP_LOCKED and pre-populate PMIx shared memory PFNs
Change-Id: I171c87f0f49cf2f791693e397a1d94b1bc2d0440
2021-03-23 01:49:46 +00:00
03d99a0ed1 submodule: migrate to github.com/ihkmckernel
Change-Id: I64ee7c89e7316bb98b31833b5c15af9cf371b0ff
2021-03-23 01:12:25 +00:00
8fb42631f2 profile: fix infinite recursion for allocation miss event
Change-Id: I248c2abc7d02a9d9bffce20b3183724ddc8c2c1c
2021-03-21 15:26:39 +09:00
ba04c8a7b9 Fugaku: MAP_LOCKED and pre-populate PMIx shared memory PFNs
Change-Id: I74a0d0e50af0b6c60a6f9a4389ef3ab0534deda2
2021-03-21 15:25:15 +09:00
1bb8dcef05 release: 1.7.10: detect hungup via device-ioctl
Change-Id: I6531a159a44683085004ad3e90d7b4e67f51422c
2021-03-18 15:42:24 +09:00
ceb55d53b1 mcreboot-smp.sh: sudo ihkmond for /dev/kmsg log
Change-Id: I47aa483e6f787b8392b4b33b0fb10e4728157253
2021-03-18 06:36:33 +00:00
002f36c7f5 docs: add limitation about Linux kernel dump
Change-Id: Ic007f2f1915e37981955ad2160ea6614b1c36ec1
2021-03-17 21:39:07 +09:00
90c1ceef45 release: 1.7.9: fix smp_ihk_os_shutdown()-related double free
Change-Id: I408dc69b41d9643548226c15c67fcbd8197acb92
2021-03-17 18:21:25 +09:00
4f1b505550 docs: migrate to github.com/ihkmckernel
Change-Id: Idd8fed88545231b4aca290e1b54cbc2d2dff2e9e
2021-03-17 08:43:02 +00:00
051c0dcdd8 overlay_path: Fix resolution of symbolic link under /sys/
Change-Id: I650e72fb335aa72256d3b129a65c09bbd7cf26d3
Refs: #1463
2021-03-17 08:18:46 +00:00
09173d353c mcctrl_wakeup_desc: refcount and fix timeouts
Change-Id: I14b34f031ffb10bfac6cef07d81f53a8dece767b
2021-03-17 03:36:35 +00:00
d5c5023bf8 epoll/epoll_wait/ppoll: special handling in syscall offload
Change-Id: I792eb91c349d0ce942179996328c6f89f186ba31
2021-03-17 03:36:35 +00:00
e3493bd0be docs: lift limitations and fix ppn example
Change-Id: Id78e7db09767d5dd8a3dc5b9f911b9026608b021
2021-03-17 03:31:12 +00:00
44261678f7 cmake: fix condition to turn on/off ENABLE_KRM_WORKAROUND
Change-Id: I1a8efe88ffb1283d0343571f340a3b5715318e7d
2021-03-17 02:57:19 +00:00
6e4a29a422 docs: spec: fix description of IHK_RESERVE_MEM_MAX_SIZE_RATIO_ALL
Change-Id: I7af95524d87721fa1ce34bc560eddc947117f5f8
2021-03-15 15:32:08 +09:00
2039139380 release: 1.7.8: fix ihklib/ihk_reserve_cpu when using krm
Change-Id: I57235d51f51ae7327cb08a9e3ae56be995157100
2021-03-12 12:54:56 +09:00
c80b112ce7 release: 1.7.7: fix fput and mckernel.spec
Change-Id: I74f7530b067d44790e3f014479f580867387584a
2021-03-11 08:09:07 +00:00
4a05024656 spec: cmake-config cmake paramters
Change-Id: Ic0e7f62d9172f31afe90297bdd22b8e50cc6fc9e
2021-03-11 07:19:04 +00:00
7a04c6eb5c ihkmond: redirect kmsg to /dev/kmsg line by line
Change-Id: Iafc9d0eb47696073434dcc869a29336a51b8c50e
2021-03-11 16:11:17 +09:00
3e00189de0 kprintf: fix checking if interrupt is disabled
Change-Id: I2ee1a1e2438ae761c4136593953ede2738bc6f74
2021-03-11 07:03:04 +00:00
c94cf8e6f0 mcexec: fput executable just after its contents is transferred
Change-Id: I3fae841bd7341bca030fd6b7eceffa068c9e0f4e
2021-03-11 07:03:04 +00:00
ee974b200d mcexec_open_exec: fix missing fput on error
Change-Id: I3ac94e336dc54ec313e69c0fa85c17086dc256fd
2021-03-11 07:03:04 +00:00
546cafe6bc release: 1.7.6: fix ihk_reserve_mem_conf
Change-Id: I767f8eac655af9200f733c21353b1e141007df17
2021-03-11 15:22:36 +09:00
9dd4d99a1a docs: spec: ihk_reserve_mem_conf*: apply change only to the next reservation
Change-Id: Iaafd2ca4d96f227d03e9910a36b27801fb1e3da4
2021-03-11 15:17:44 +09:00
3a6273777a test: uti/tofu, issues/1507+1519: fix README
Change-Id: I3060e1273c8ef6a1b392a2c678da3bc02a25a4f8
2021-03-11 03:59:57 +00:00
daed585347 release: 1.7.5: fix ihk_*str() functions
Change-Id: Ic412029f856f34a10724f03e36f211f6026acd8e
2021-03-11 12:18:43 +09:00
11d7229525 docs: spec: ihk_reserve_mem_conf_str: use defaults for those not specified
Change-Id: I7cfddd3203b952cabb919ea6401e226e151e696a
2021-03-11 02:26:18 +00:00
e43d52df20 Revert "mcexec_open_exec: make fput and add to mckernel_exec_files atomic"
This reverts commit c80ea0ed23.

Change-Id: I0541e8af5157c7128f8774f6581cc207d13b649a
2021-03-10 14:21:57 +09:00
1c0da3c5b9 Revert "mcexec_open_exec: guard fput and add to mckernel_exec_files with spin_lock_irqsave"
This reverts commit cba263ff12.

Change-Id: Ifcd03a2048a3f9d6c155dd8ecd522081b5dde276
2021-03-10 14:21:49 +09:00
3084db8b26 release: 1.7.4: fix missing fput of executable
Change-Id: If3e2bb10bd21515876c5a37839cd9fcf12774329
2021-03-07 17:44:53 +09:00
cba263ff12 mcexec_open_exec: guard fput and add to mckernel_exec_files with spin_lock_irqsave
Change-Id: Id5dae8cb7f947d4e9939bf9c6762c2d1dcdd3776
2021-03-07 17:39:16 +09:00
43a6f0d41d release: 1.7.3: fix missing fput executable
Change-Id: Ib369a7803d5ef944fec4c42c5e9b20f8655e47f3
2021-03-05 15:20:16 +09:00
c80ea0ed23 mcexec_open_exec: make fput and add to mckernel_exec_files atomic
Change-Id: Iff4ac8bb9b4ebfcb9c77e84ed3f0a40e6b9efb6a
2021-03-05 14:14:30 +09:00
73d028de77 docs: move uti installation / usage into install.rst / users.rst
Change-Id: Ie1fe1593bb957e5c5fb6085543ab647ec43fd8f6
2021-03-05 11:52:38 +09:00
d812e4dedb cmake: fix POSTK_DEBUG definitions (third trial)
Change-Id: I60341362b1e17acdb7c7f9ac8c036604aec89885
2021-03-05 11:16:43 +09:00
b8cc962843 release: 1.7.2: xpmem is fixed, uti is integrated
Change-Id: I97ff528cbddaf06bb28fa064df54601308a06bca
2021-03-05 09:26:52 +09:00
c593faea89 MM: handle zero_at_free in page faults
Change-Id: Ib2b37c73936a365173d84a2a806a17374ccc05d4
2021-03-04 04:04:13 +00:00
7d69f15101 docs: describe fork starvation caused by a flood of system call offloads
Change-Id: I9826525ca582a61c6e94f44adab3068d80a699cd
Refs: #1398
Refs: #1408
2021-03-04 03:59:56 +00:00
687eae3a11 cmake: fix POSTK_DEBUG definitions (again)
Change-Id: I064b81115102bcbd2c3787c5743009be5032469e
2021-03-04 03:47:00 +00:00
eba2131f34 clv: Change no_preempt from int to ihk_atomic_t.
Fix the issue where the value of no_preempt gets unexpected value
(-1, 1, 2 etc.) after process ends when running the UTI tests.

Change-Id: I7d9c08b754a171ea3fdec20ab2e635df3b607cbd
2021-03-04 03:40:15 +00:00
1070387ed2 SCD_MSG_SCHEDULE_PROCESS: stay on current CPU if set in process' CPU mask
Change-Id: I347fe61f8123792648747ec9bf856a9340ea9d9b
2021-03-04 11:29:21 +09:00
eca107f52d __mcctrl_os_read_write_cpu_register: spin timeout in mcctrl_ikc_send_wait()
Change-Id: I918be366c81be96cd76df659e3181a194d440dd8
2021-03-04 11:29:21 +09:00
71c333965c mcexec: introduction of --flags argument
Change-Id: If5b70bf06460fc709444d8135511c3699304d31f
2021-03-04 11:29:21 +09:00
5664f54390 hugefileobj_get_page: suppress debug message
Change-Id: I2784f5bba12f961cccb17cf302f1b463721ac855
2021-03-03 05:07:49 +00:00
720b0c06d8 test: xpmem: fix test program for #1259
Change-Id: I8885e518a0202df6d20a88524564aeb648df89ab
2021-03-03 05:07:49 +00:00
e6ec52dfbd test: Add test program for #1507, #1519
Change-Id: I04927e6dd1bfe1d0b210ec0b7e9d86c449e6daca
Refs: #1507
Refs: #1519
2021-03-03 05:07:49 +00:00
b380f0790d test: shmobj: fix test program for #1381
Limitations tested in #1381 have been removed in #1458

Change-Id: I35b7ee058a75abc98f2a2ad783fdba46087b0716
Refs: #1519
2021-03-03 05:07:49 +00:00
47aec70f5f shmobj: support large page
Change-Id: I104c1b8551b87f5cbfedb13262e77c00c38e9643
2021-03-03 05:07:49 +00:00
d2db639853 xpmem: support large page
1. try to use as large page as possible on attach
2. pre-map resident remote pages on attach

Change-Id: I5580682a4199e94085a9bad9ce3958a0f14cdcea
2021-03-03 05:07:49 +00:00
3aaa5350f0 xpmem: replace mcs_rwlock with ihk_rwspinlock
Change-Id: I2d29f37f590f55db009f522395ede4c6494f8c89
2021-03-03 05:07:49 +00:00
865eb37b11 xpmem: Fix deadlock in xpmem_remove_process_memory_range()
This reverts commit 2fe5c8de2e.

Change-Id: I8ec84f654aeee7cf83603abb2aca5e90b735e977
2021-03-03 05:07:49 +00:00
5dd989450d xpmem: Make sure vm_range is used under memory_range_lock
Change-Id: I856ee9d401e7dcfd74559bad5b22a69d53a61e2f
2021-03-03 05:07:49 +00:00
4ac9dcdccd xpmem: Use correct process_vm in xpmem functions
This reverts commit 29d27b7c8d.

Change-Id: I1863cddcffa67c60e5f93d874447db9919e519dc
2021-03-03 05:07:49 +00:00
63443383e9 xpmem: truncates the size of xpmem_attach at the page boundary (workaround for fjmpi)
Fujitsu MPI tries to attach a segment with the size of the source range size plus one.

Change-Id: Iab3801727f938dfb6242b6b90c88e4986b84d08e
Refs: #1507
2021-03-03 05:07:49 +00:00
4d1d53b335 docs: integrate spec tex files
"make html" makes spec pdf files as well.

Change-Id: I0535ae97d924c15efed948dadb135210ad18f956
2021-03-03 04:20:46 +00:00
422a399f20 coredump: fix behavior when gencore fail
Change-Id: Ifdddf867b514f42a6f1ce374ec06b7550a9cdad5
Refs: #1511
2021-03-03 02:27:01 +00:00
7efb394905 remote_page_fault is handled by the offloaded thread.
Change-Id: I9cfad509260cceada74abdf39ca6a1822561e0d9
Refs: #1474
2021-03-03 02:25:56 +00:00
9c7d0cfaec getrusage: Fix memory_stat_mapped_file when SIGBUS occurs in file map
Change-Id: Ia4686f32a3c888d5c886ab6cc6c2b510885447f5
Refs: #1422
2021-03-01 05:55:37 +00:00
baa7a6adcb add ENABLE_GCOV option for cmake
Change-Id: Ic473dc52d748207e49800d0cd340918a4dce0971
2021-03-01 13:19:28 +09:00
86e12fa90c do_execveat: kill instead of panic when init_process_stack fails
Change-Id: I0845440260f04ab8e524e118c21d5a137cddb4aa
2021-03-01 12:04:50 +09:00
ae9827f628 cmake: fix missing prefix in mcreboot.sh
Change-Id: I93ee7ba1acf1269472bea40ff4e9c5bc6b3b98f8
2021-03-01 11:22:58 +09:00
79b590d732 docs: remove "mcexec -n <ppn>" requirement for Fujitsu MPI
Change-Id: I2d24fa5e11c005a7c7c61f7b351c196e920e6cef
2021-03-01 01:20:13 +00:00
1bfa339ccf man: mcexec: add "-n <ppn>" option mandatory when using MPI (except Fujitsu MPI)
Change-Id: I6d2d0337fdab13325642344529401f86bbadfdcd
2021-03-01 01:19:23 +00:00
c55a02ffba cmake: trim POSTK_DEBUG definitions
Change-Id: Ic302cd936e975abb07b998f266f7c3feecfde85c
2021-02-28 20:05:04 -05:00
dd7b7dbd0e uti: fix rpmbuild
Change-Id: I0cc60d2b38b184168a99a4778fbb5a437278da29
2021-02-27 23:06:32 -05:00
2585c8afaa prerelease: 0.95: add ihk_*_str() functions
Change-Id: I0dc2ff3c8a2b21d167cfff04ccf6d1533555ee1c
2021-02-26 11:24:48 +09:00
82056961cd uti: integrate libuti and redirect to mck/libuti.so
Change-Id: I74e0f677ea8e1cd06e8ab05d92f1d38f9be8fd7a
2021-02-26 11:03:16 +09:00
0848b64c1d uti: integrate syscall_intercept
Change-Id: Ide14341acdca1450b0ad4f8a16cc078d0743afc8
2021-02-26 10:37:56 +09:00
8a9b43fee0 cmake: add -Wno-stringop-truncation
Change-Id: I43d9ba731d0feaf8934d2724ff98072df88a902d
2021-02-26 10:37:56 +09:00
19cb302d5f uti: util_indicate_clone: check --enable-uti mcexec option
Change-Id: Ic7474d01c18acd1edbc07844d7a7b010b2175f71
2021-02-26 10:37:56 +09:00
90895cfb1f test: uti: add tofu examples
Change-Id: I1c55c872d125201e60b4fe744af74106e1c5d3a4
2021-02-26 10:37:55 +09:00
32afa80718 uti: fix handling UTI_CPU_SET env
Change-Id: Icbf8dc7e82bd6983374aefdd0d5b89ad4152c9aa
2021-02-26 10:24:19 +09:00
e3927a0b95 uti: futex: McKernel waker sends IPI to Linux waiter CPU
Change-Id: I6f725b3a6b1b26b9f553d8c58132c0c0a4416683
2021-02-26 10:24:19 +09:00
adc5b7102f uti: futex: cache remote va to remote pa result
Change-Id: Idbbb3f2981b76a0235615fceaa6281d2c7134ca2
2021-02-26 10:24:19 +09:00
5d16ce9dcc uti: identify UTI thread by thread local variable
Change-Id: I64372a932378e4ead09ea27fbf5b52062a109756
2021-02-26 10:24:19 +09:00
a9973e913d uti: futex call function in mcctrl
Previously, futex code of McKerenl was called by mccontrol,
but there ware some problems with this method.
(Mainly, location of McKernel image on memory)

Call futex code in mcctrl instead of the one in McKernel image,
giving the following benefits:
1. Not relying on shared kernel virtual address space with Linux any more
2. The cpu id store / retrieve is not needed and resulting in the code

Change-Id: Ic40929b64a655b270c435859fa287fedb713ee5c
refe: #1428
2021-02-26 10:24:19 +09:00
35296c8210 uti: fix syscall response is mis-consumed by __do_in_kernel_irq_syscall
Refs: #1617
Change-Id: Iddd8ccd81d7f692f1f45ec888d31c2a87ec521ce
2021-02-25 01:42:29 +00:00
afea6af667 Send a signal to mcexec after switching to that process.
Change-Id: Ia882ef5027931009ee65febd0cbe22022a755c4a
Refs: #1505
2021-02-19 02:28:29 +00:00
b0bd1feefb remap_file_pages: check file mapping
Change-Id: Ibf145a20181938a9825214253337a423fcd53064
Refs: #1521
2021-02-19 02:23:39 +00:00
e6e66e0392 shmget: make small free numbers reusable.
Change-Id: Ic6670214fa31a309e96794361e3ec2dcc6375f4a
Refs: #1531
2021-02-19 02:22:50 +00:00
b3ddd60277 shmget: don't update refcount when shmid is found.
Change-Id: I3eac47cd67d27efd838190f5a4c21b5d682c5fe9
Refs: #1379
2021-02-19 02:22:33 +00:00
6dce9a2bf9 add_process_memory_range: Change order of update page and insert range.
Unintended update page was occurred, when inserting range failed.

Change-Id: I3d117b8613c5fbb64463c759b5fcc81db22bd624
refs: #1512
2021-02-18 16:02:30 +09:00
93dafc5f79 migrate: Don't migrate on in-kernel interrupt
Change-Id: I9c07e0d633687ce232ec3cd0c80439ca2e856293
Refs: #1555
2021-02-18 15:30:22 +09:00
583319125a prerelease: 0.94: fix __mcctrl_os_read_write_cpu_register
Change-Id: Ibcfbe7796347cc9c2148cdea2519fe6c7ca9e97e
2021-02-18 15:23:01 +09:00
9f39d1cd88 move_pages: Fix and support some specs for LTP.
1. When nodes array is NULL, move_pages doesn't move any pages,
 instead will return the node where each page
 currently resides by status array.
2. Check whether all specified node is online or not.

Change-Id: Ie3534997833d797e2a9f595d1107b07d46e1c6cf
Refs: #1523
2021-02-18 06:16:17 +00:00
a0d446b27f smp: make smp_call_func() arch independent
Change-Id: Ib60604ceb3274b173bd7f96cf57c8c35c1889e44
2021-02-18 06:16:17 +00:00
f3c875b8e6 mbind: Use range_policy's numamask as priority on MPOL_BIND
Change-Id: Iaaa7998945c6e2b42d91d34a2f7b05db1f4d696d
2021-02-18 06:16:17 +00:00
9f1e6d707c get_mempolicy: Support (MPOL_F_NODE | MPOL_F_ADDR) specified
If flags specifies both MPOL_F_NODE and MPOL_F_ADDR,
get_mempolicy() will return the node ID of the node on
which the address addr is allocated into the location pointed to by mode.

Change-Id: Id485e3f4838e3679d877a95e53b21e3421cac88a
2021-02-18 06:16:17 +00:00
aef50d710c mempolicy: Support MPOL_INTERLEAVE
Change-Id: I6357892d792b2de8ea859a0a6799250f05066713
Refs: #959
2021-02-18 06:16:17 +00:00
7f0594d784 TO RESET: mbind: do nothing
Fixes: 00007daf ("mbind: do nothing (workaround for Fugaku)")

Change-Id: Id41940bebd2cbcc3e8637eadd4847984627b1c72
2021-02-18 06:16:17 +00:00
866f5c51a0 docs: add limitation of system calls that call copy_to_user()
Change-Id: If449c73f8d5949ab5526ea598b0f713ed4431157
Refs: #1514
2021-02-18 13:04:53 +09:00
48b1d548f2 __mcctrl_os_read_write_cpu_register: fix timeout
Change-Id: Id5a7d316d793bd535f24fd353b214aa12af1dab4
2021-02-15 08:56:04 +00:00
822b64b03c docs: add limitation related to Fujitsu TCS xos_hwb
Change-Id: I83a1ecd7a0b6d3bcde2b902cd526dfd4feb9e23a
2021-02-15 16:03:52 +09:00
aca83bcd3d Tofu: fault stack area if VM range doesn't exist in STAG registration
Change-Id: I407a8954ccaf22019b3082fd6eee68e772d1cb26
2021-02-15 14:46:58 +09:00
c7145c4b38 xpmem: fault stack area of remote process if VM range doesn't yet exist
Change-Id: I2bbb745cc9b79ab4f9ea81b242f35f1b88ad531e
2021-02-15 14:46:58 +09:00
a82d161be8 prerelease: 0.93: investigate smp_ihk_os_panic_notifier
Change-Id: I997b41f80038603261de2e8232b6b8ca200cd8cd
2021-02-09 21:39:49 -05:00
7152269a59 spec: create one rpm including .ko and binaries
Don't use kernel_module_package not to create a separate
kmod-mckernel-*.rpm containing .ko files.

Change-Id: I25b7ff662476bfc735d319b57cdf2da82f2c6aa7
2021-02-09 20:55:38 -05:00
31c08bcb7d spec, docs: update cmake options
Change-Id: Ib8277413a413b5ce956a48f7e3d9922311937ea8
2021-02-09 20:55:38 -05:00
dffb0918a2 docs: add capstone installation options
Change-Id: I96aa9a6405c17f8d9653f3d3894f0e71a57ab460
2021-02-09 06:10:32 +00:00
23cd14af7d __mcctrl_os_read_write_cpu_register: timeout in 1 sec for when McKernel can't respond
Change-Id: Ia2d5f64e107697dda1f3bae499eb3afb8a7aedba
2021-02-09 06:09:11 +00:00
a5cf2019bc cmake: fix detection of Fugaku native compilation
Change-Id: I4210e9b57223c3869464caea10c2d414e9484e14
2021-02-09 06:06:13 +00:00
11b9fe0377 page_fault_handler: fix missing increment of in_page_fault on SEGV
This integrates some of the changes of the following commit:
1cf0bd5a ("TO RESET: add debug instruments, map Linux areas for tofu")

Change-Id: Iffd8432d5a7b35f20bd45829a125583a0363dbf0
2021-02-09 00:56:15 -05:00
4905c8e638 mcexec: propagate error in __NR_gettid handler
Change-Id: I0e0f06199970fe839065567dcd5418d017b6ec00
2021-02-03 18:53:33 -05:00
3d71c6a8eb mcexec_transfer_image(): map exact size of remote memory (instead of forcing PAGE_SIZE)
Change-Id: Ic66770af6cdb15b7a2e18a08cbcd1736e5558bdf
2021-02-03 18:53:33 -05:00
1cea75dd51 mcexec: fix strncat missing NULL and pclose of uninitialized
Change-Id: I9ce4004580845a983949caa5668b2f950880cd24
2021-02-02 01:51:57 +00:00
661ba0ce4a docs: add editing spec file when building rpm
Change-Id: Ic8dc9d8c6aef6d2180844891d743a09f4a3bdd9d
2021-01-29 01:23:35 +00:00
7e82adc761 prerelease: 0.92: fix uninitialized usrdata->cpu_topology_list
Change-Id: Ia12970bda1225898823a67c2d0461144fc62ebb9
2021-01-29 09:50:53 +09:00
1f9fbe82db mcctrl: fix access to uninitialized usrdata->cpu_topology_list
Change-Id: I25a9182b9b470bb069f4f755a67fb50b88817cd2
2021-01-29 09:34:24 +09:00
aa3d4ba7bd spec: prerelease 0.91 for 4.18.0-240.8.1.el8_3.aarch64 support
Change-Id: I8b33714157b1c68c1fc1eadf0b9d072a3ee59608
2021-01-26 02:34:35 -05:00
c89ac042f9 spec: prerelease 0.9 for testing hidos and cgroup check
Change-Id: I3b04fbf3a1ffa10df9c76da7b2730b9a2521bf98
2021-01-20 13:03:16 +09:00
0f1fc88ce9 spec: prerelease 0.8 for testing hidos and cgroup check
Change-Id: I6261380ab8e99d39191cbd8aac851038cdeb5ce2
2021-01-19 17:34:45 +09:00
bbc6565e7e docs: users: add how to specify boot parameters with Fujitsu TCS
Change-Id: I0216603388780d0e5497373598c3151812238932
2021-01-19 04:03:05 +00:00
1a29f8213f spec: prerelease 0.7 for testing hidos and cgroup check
Change-Id: I17f1608051a8f8ca33d2ba7385b75b8b492d1886
2021-01-19 12:25:06 +09:00
fd21fe7411 copy_user_ranges: copy straight_start of struct vm_range
This fixes the panic in ihk_os_set_ikc_map01 of the ihklib test suite.

Change-Id: Ic03efc81c5ca2c4deaeb06673afef8cef7a1cf92
2021-01-19 00:59:46 +00:00
2460228052 mcctrl: abort on invalid addr in mcexec_transfer_image()
Change-Id: Ic064b6ffc30368ff1d3dfb14403e524cbb837ce5
2021-01-19 00:55:20 +00:00
bf926f234a Tofu: manage stag ranges in VM range split and misc cleanup
Conflicts:
	kernel/process.c

Change-Id: I480850fe93a7963a5bd4d1687fb1e5c43f58057f
2021-01-19 00:55:20 +00:00
507b937509 Tofu: mcctrl side MMU notifier and CQ/BCH cleanup
Conflicts:
	executer/kernel/mcctrl/arch/arm64/archdeps.c
	executer/kernel/mcctrl/syscall.c

Change-Id: Ided8172331a5469c6ced68fa98a42302812efe71
2021-01-19 00:55:20 +00:00
a99cf99396 cmake: add switch to turn on/off krm workaround
Change-Id: I2dfd3d7f3373cce714247f9fc36bf5040a2a8fad
2021-01-19 00:52:53 +00:00
6f373186bf docs: add specifications of IHK and McKernel
Change-Id: I523ad68c5627ca1081c0c8684606a08101982ec9
2021-01-18 08:24:37 +00:00
6667321dc1 spec: prerelease 0.6 for testing capped best-effort memory reservation
Change-Id: Iaa91b311ee6879e84ce862aeabb4bd1fcd95d35f
2021-01-07 11:14:22 +09:00
f849745b60 spec: prerelease 0.5 for testing capped best-effort memory reservation
Change-Id: I139d6e24fbadb7313116029005e115053f31a899
2021-01-07 10:56:27 +09:00
78bc06d998 cmake: set default value of ENABLE_FUGAKU_DEBUG to OFF
Change-Id: I70703410922aa1d1440d61ead6e225d92cf60003
2021-01-07 10:42:36 +09:00
d726bd3d11 profile: fix definition of PROFILE_ENABLE and __NR_profile
Change-Id: I3f9f5870f8380d3668e1ccb06fd0f6d3307e3fa4
2021-01-06 01:03:17 +00:00
df37d6867f docs: add scheduling limitations
Change-Id: Ida4a16efa4d47f448da7417a3b4bdb5fb5304fcd
2021-01-06 09:58:38 +09:00
a4b5410d0c docs: add mlockall/munlockall limitations
Change-Id: I01d1c4eb6955baee89f6827748ac8ce4082884da
2021-01-04 12:57:32 +09:00
d73e6a161c spec: prerelease 0.4 for testing capped best-effort memory reservation
Change-Id: Iec35ea1b7fa6b8930153461c395675f1576042ba
2020-12-29 17:12:14 +09:00
67334b65c3 rus_vm_fault: vmf_insert_pfn: treat VM_FAULT_NOPAGE as success
vmf_insert_pfn is added with the following commit.
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396

Refer to the following page for the meaning of VM_FAULT_NOPAGE.
https://lwn.net/Articles/242237/

Change-Id: I2b0144a20a57c74e0e2e0d2fc24281852f49b717
2020-12-29 16:31:41 +09:00
fe3992a3a2 cmake: add switch to turn on/off Fugaku debug modifications
To prevent "TO RESET: send SIGSTOP instead of SIGV in PF" from making
some tests expecting SIGSEGV fail.

Change-Id: I8bb111cff59fe5b0b2bf6bc652dfd2fa308321ed
2020-12-29 16:31:41 +09:00
5d58100c20 cmake: add switch to turn on/off Fugaku hacks
Change-Id: I2a1ac906a19c4e45ee62acdbf0bc6f77f61974f8
2020-12-29 16:31:41 +09:00
1b106d825c Tofu: fix phys addr calculation for contiguous pages in MBPT/BCH update
Change-Id: I70def9d02bdd7e1e969dedfc277a20df6ed2dff8
2020-12-29 16:31:41 +09:00
a680395093 Tofu: kmalloc cache for stag range
Change-Id: Ib5ea12c7c8cdafa7b699308c4eeb6e9ab39905c7
2020-12-29 16:31:41 +09:00
fd5a1c4b0a TO RESET: send SIGSTOP instead of SIGV in PF
Change-Id: I5f7e07cb89f5f38b7c631d838f0eee0a2a98e246
2020-12-29 16:31:40 +09:00
b3b1883ad8 eclair: turn off gdb pagination by default
Change-Id: I7758d97b90705310bc57cb9b6da6f6af436ea7fb
2020-12-29 16:31:40 +09:00
7145c4d383 TO RESET: stack changes
Change-Id: I325420701dfa5e9eac294be086a9d1e7326d95bc
2020-12-29 16:31:40 +09:00
0b82c8942b Tofu: keep track of stags per memory range
Change-Id: I033beaeee3b141dab4485dd3a2a3848eaa84e54e
2020-12-29 16:31:40 +09:00
75694152f0 Tofu: match page sizes to MBPT and fault PTEs if not present
Change-Id: Ia7aa92005a9941d6399063fec9a0776e73fc88fe
2020-12-29 16:31:40 +09:00
1cf0bd5a78 TO RESET: add debug instruments, map Linux areas for tofu
Change-Id: I09880cad3b87182cb663d414041254817c254759
2020-12-29 16:31:39 +09:00
25943634e9 TO RESET: do_mmap: show debug message when profile is turned on
Change-Id: I18f498f3a8660114b5e038e74179df95a645d232
2020-12-29 16:31:39 +09:00
72f95f92f8 TO RESET: hugefileobj: show debug messages
Change-Id: I904c811c13a59c0db74052bc92f6661a3e1b5d34
2020-12-29 16:31:39 +09:00
ab1014863d TO RESET: page_fault_handler: send SIGSTOP instead of SIGSEGV for debug
Change-Id: Ie281dbf43280464c8f412c8444a6861e43f28beb
2020-12-29 16:31:39 +09:00
4cd7051c2d TO RESET: setup_rt_frame: show debug message
Change-Id: I07d4f2dbba9bdb72f8a2892e6b5bd429b8e0aeec
2020-12-29 16:31:39 +09:00
d5716d3c3a TO RESET: mcctrl_get_request_os_cpu and __mcctrl_os_read_write_cpu_register: show debug messages
Change-Id: Ic8430e3fd6a814b888192233b029c942500a2dc9
2020-12-29 16:31:39 +09:00
2a984a12fe TO RESET: unhandled_page_fault: show instruction address
Change-Id: I29a8d30d9b3e5cfbe5e16b1faaa253e794b8fc5b
2020-12-29 16:31:38 +09:00
3949ab65a8 TO RESET: Add kernel argument to toggle on-demand paging for hugetlbfs map
Change-Id: Id748e0a2afc4ea59142fedb652a15b4007c5dee4
2020-12-29 16:31:33 +09:00
ed923ac82f TO RESET: hugefileobj: pre-allocate on mmap
Set this change to "TO RESET" because one of the Fujitsu tests fails.

Change-Id: Iddc30e8452b3d39da4975079d0c6a035e4f3dbde
2020-12-25 11:34:14 +09:00
191e6f7499 TO RESET: preempt_enable: check if no_preempt isn't negative
Change-Id: I1cef2077c50f3b3020870505dd065d10617f440e
2020-12-25 11:34:14 +09:00
4f7fd90300 TO RESET: lock: check if runq lock is held with IRQs disabled
Change-Id: I9a79ceaf9e399ad3695ed8959ca10c587591751a
2020-12-25 11:34:09 +09:00
8f2c8791bf TO RESET: arm64: enable interrupt on panic
Change-Id: I1ceb321de324f307fc82366b162c72f64184247b
2020-12-24 17:18:37 +09:00
bbfb296c26 TO RESET: mcreboot, mcstop+release.sh: add functions
Change-Id: Ic3992dc4e16b7ade00e93edbd107c64a32068c02
2020-12-24 16:53:27 +09:00
10b17e230c TO RESET: physical memory: free memory consistency checker
Change-Id: I15aa59bb81be4d8f2acfe8d161c8255f70f9e7d3
2020-12-24 16:53:12 +09:00
b268c28e7e TO RESET: mmap: ignore MAP_HUGETLB
Change-Id: Ifd50f24de0747b06d71ebba441ae2ef451f66c4d
2020-12-24 16:51:51 +09:00
2fa1c053d7 spec: prerelease 0.3 for testing ihk_reserve_mem and memory policy
Change-Id: I4fbcfa1f93522fd01af42d1ef13d0be075086773
2020-12-24 15:11:01 +09:00
530110e3a9 Tofu: fix ENABLE_TOFU switching
Change-Id: Ib33323d4b59ea8fb4f5f40dff7ea25a36773d5e2
2020-12-24 15:00:14 +09:00
f6ed44aeec spec: prerelease 0.2 for testing ihk_reserve_mem and memory policy
Change-Id: I9ff171c5d65b5f465ce7a2767be1a710de0a0400
2020-12-24 11:23:17 +09:00
33dd2e60b1 mcexec: memory policy control by environmental variable
Refs: #1470
Change-Id: I3d556cae90d31d81572b1c4e5c680e826577d428
2020-12-24 11:18:01 +09:00
ed670c03af spec: prerelease 0.1 for testing ihk_create_os_str
Change-Id: I3c9bbc6f3c9e8951c0ad700b9c02fcdec65018ff
2020-12-23 11:33:31 +09:00
e5f4a4e87d Tofu: proper cleanup of device files when mcexec gets killed
Change-Id: I6cb0290f72d96682700f945b29585e132e525ac1
2020-12-09 13:05:54 +09:00
1918df7765 Tofu: support for barrier gate, kmalloc cache
Change-Id: I6f4cfec2ec404efd03b332fc3f449a775816230e
2020-12-09 13:05:54 +09:00
5d784f3ea4 kernel: increase stack size
Change-Id: I27698149e9206138402dcc65db0078d5dbf548cb
2020-12-09 13:05:53 +09:00
10c09aa10e MM: generic lockless kmalloc and page cache
Change-Id: I71ad498fdd10136d9c72ffe2b16b9122d1bc9673
2020-12-09 13:05:53 +09:00
41f5c0bdde MM: deferred zero cleaning on Linux CPUs
Change-Id: Icdb8ac807688533be7a95b7101edfd904250cd02
2020-12-09 13:05:53 +09:00
e7b8aeb4f7 Tofu: per-fd path memory leak fix
Change-Id: I451472365806333adfac6dae32746195e3c30694
2020-12-09 13:05:53 +09:00
1b3dd45dbc MM: straight mapping memory leak fix
Change-Id: I7d841fbedb1db498b5994eb69b0350df7a5cefb0
2020-12-09 13:05:53 +09:00
623d6f8bc3 arm64: record register state at kernel mode page fault (for eclair)
Change-Id: I066bceecc0377110faaca0b21d45a476d000e684
2020-12-09 13:05:53 +09:00
92902d36fc Tofu: initial version
Change-Id: I9c464d5af883c18715a97ca9e9981cf73b260f90
2020-12-09 13:03:01 +09:00
fe83deb3db profile: make header user-space includable
Change-Id: I4a88d9be7c169f29ef6f6328e8576a3fe3b6e34f
2020-12-08 12:32:10 +09:00
e056cb799f memclear: non-temporal memory clean (arm64)
Change-Id: I8f80ff20e98bc01088450282e1790c27c67c16eb
2020-12-08 12:32:10 +09:00
201f5ce500 MM: straight mapping
Change-Id: I70871f8c382fb00aa719ed501cc5de436d916d7f
2020-12-08 12:32:10 +09:00
100bbe6231 MM: zero memory at free and deferred zero
Change-Id: Ib0055d6f2bdd10d05d749dcd1f3d5c3d318f22f3
2020-12-08 12:32:10 +09:00
fbd121d28c mmap: return -EINVAL for non-anonymous, MAP_HUGETLB map
Change-Id: I2bcbbf0ee9c0f47160eabac4a8d09991c71fe852
2020-12-07 15:23:38 +09:00
d1d93d90cc mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: I0ce1b2d48cda10713920cb88692e107b8c4d3bab
Refs: #929
2020-12-07 15:23:34 +09:00
45bc6a617a __return_syscall: check input & fix unmap memory in error cases
Change-Id: I5de3ab3acd46770518b79bdc6f1c2e00c1cd5096
2020-11-25 01:58:47 +00:00
924ba7fd65 mcctrl_ikc_send_wait: free desc only if we allocated it internally
Change-Id: I4710ea6bb31f098451347c53ac0ff0be422aec06
2020-11-25 01:58:47 +00:00
2814f7cac4 mcctrl_get_request_os_cpu: check os instance & ret_cpu
Change-Id: I4d3f6fd93eaa183d560c874ba33add83c4308c5a
2020-11-25 01:58:47 +00:00
b510de7bd5 mcctrl_perf_get: check os instance & cpu info
Change-Id: Ic4f9d818b7d58f8ae651e43175fb1c478baec9c1
2020-11-25 01:58:47 +00:00
3e927f61dc mcctrl_perf_disable: check os instance & cpu info
Change-Id: I7195272a65b31db72158f5e5bbfc490bac547b91
2020-11-25 01:58:47 +00:00
64579830dd mcctrl_perf_enable: check os instance & cpu info
Change-Id: I31ab829d63833f924af17445fd9b8488d6eb454f
2020-11-25 01:58:47 +00:00
3cc98883f5 delete_procfs_entries: fix possible crash if top entry has no children
Change-Id: I209842699615f9bb58c12ccd262ae4b17f8f558c
2020-11-25 01:58:47 +00:00
442045a320 mcctrl_ikc_send: validate os and check input packet
Change-Id: I1f8c2228043841685617b665eeeaf2ce15a08703
2020-11-25 01:58:47 +00:00
fe5d8fc71f mcctrl_getrusage: validate os input
Change-Id: I97908069f8bc4703b99f9ffca94f3dd33eb64cc4
2020-11-25 01:58:47 +00:00
550c6cc5fb mcctrl_perf_set : validate os input & check cpu info
Change-Id: If308013746ff6dce03fa8e0eb1ebaca1cb2a4a64
2020-11-25 01:58:47 +00:00
8c0b2ab6ce mcctrl_perf_num: check "os" argument
Change-Id: I13c8b0c337cac9bbb240667808e871defce34aab
2020-11-25 01:58:47 +00:00
239b1b265f release 1.7.0
Change-Id: I8413aa2d051c6164235816bae2823187870efe49
2020-11-25 10:51:40 +09:00
f646fd141b prerelase 0.96: ihk_reserve_mem: balanced, capped best effort
Change-Id: Ia98c87e651d8dd34dfd36bc0c45f1d23e245330d
2020-11-24 03:40:01 +00:00
734d1cc056 ihk submodule update: ihklib: ihk_create_os_str: add ihk_reserve_mem_conf equivalent
Change-Id: Iede1a043b0316d6541656e86091f2288fd299383
2020-11-24 03:40:01 +00:00
040a9c0c7f cmake: set QEMU_LD_PREFIX when cross-compiling
Change-Id: Ie7b86ddba344e02d6f739225e44f3ad4927f5a2f
2020-11-20 07:59:55 +00:00
8784ee4710 spec: prerelase 0.95 for testing /dev/mcosN related fix
Change-Id: I02397984cd5c4c3a3e83968ff03cf9a68e84d200
2020-09-07 16:12:09 +09:00
3a761c138e ihk submodule update: ihklib, ihkmond: fix /dev/mcosN related issues
Change-Id: I533b277f249dc4afc84929dd2bf22c19648e21d1
2020-09-07 16:11:36 +09:00
e21a3a5af3 spec: prerelase 0.94 for testing ihk_create_os_str
Change-Id: If30f6ccf269dbdbbd564498318b741a88d46a2a1
2020-09-04 12:04:01 +09:00
cd33c88025 ihk submodule update: ihklib: turn off debug messages
Change-Id: I9adc4843bd4e2d2606e0100f855c83b47a144863
2020-09-04 12:03:58 +09:00
d78a0fb74c docs: NEWS.rst: add 1.7.0-0.93 updates
Change-Id: If4f41f6d26c2da60711568f02444cf033d82a3d5
2020-09-02 01:47:20 +00:00
9f815324a4 spec: prerelase 0.93 for testing ihk_create_os_str
Change-Id: Id31646c88da0640a3d58e7805fa61f0e0583ff1c
2020-09-01 15:53:10 +09:00
2748f06c1f ihk submodule update: ihklib: add ihk_create_os_str
Change-Id: Ia219a4463562de3b9d94f8b57ba52ff19f07e721
2020-09-01 15:06:32 +09:00
a7f892113a spec: prerelase 0.92 for testing RHEL-8.3 compat
Change-Id: Ie4dbfb253aa3ddd384ed1ad481e87e5f0e042e03
2020-08-31 02:11:09 -04:00
89c696afc5 ihk submodule update: gic_chip_data: compat: RHEL-8.3
Change-Id: Ibdf67f012d66c01ed3f6a486624e6a32a42ba0e7
2020-08-31 02:04:34 -04:00
e17e86840b docs: switch to https://ihkmckernel.readthedocs.io and add contents
Change-Id: I9515034ac372dbe554e1010f646b382c5dc94458
2020-08-19 12:44:03 +09:00
0de6c6b8f9 spec: prerelase 0.91 for testing removal of mcexec -n option
Change-Id: I2b18b5fefec570bfb7a4aa0823fe97d9ea93e208
2020-08-12 13:12:06 +09:00
5ffad78b87 mcexec: use FLIB_NUM_PROCESS_ON_NODE when -n not specified (Fugaku specific)
Change-Id: I1668fecfac692d56076dd10e6e03fbf992e323ec
2020-08-12 07:30:11 +09:00
542418b1fc spec: prerelase 0.9 for testing libdwarf related package requirements
Change-Id: Iaaa116018505c4f89813883f5a99c8194cb4f99e
2020-07-29 12:22:08 +09:00
b95a2fcfab spec, README.md: fix libdwarf related package requirements
Change-Id: I460d440e33d0ff5e8ab3d4f7b328f7f2ea11bc16
2020-07-29 12:08:04 +09:00
1b11496f26 spec, README.md: add package dependency including libdwarf
Change-Id: Ie612c5dc642a9f5d6d2ba31747adb991cb568113
2020-07-22 06:59:37 +00:00
7c0e624b13 spec: prerelase 0.8 for testing mcexec -n issue
Change-Id: Ie54f7bc74097c8390f75ddbd0d6e58a8ea87ea7c
2020-07-21 13:31:45 +09:00
0b66bab992 Revert "mcexec: detect mismatch of mcexec -n and mpirun -ppn"
This reverts commit 1d135492c3.

Conflicts:
	executer/kernel/mcctrl/control.c

Change-Id: I224cced408aa4b77691a153c5e1d2fdf8043fa04
2020-07-21 13:08:21 +09:00
63ed4e7af0 spec: prerelase 0.7 for testing hugetlb map for stack
Change-Id: I4997340cd984ca8915e45749b91b1d72c1de85af
2020-07-20 08:11:40 +09:00
d7cf39883f Revert "shmobj: Support large page"
This reverts commit 9a60997ea0.

Change-Id: Id60959b4e03451987239faa0bbc2e780b72fafaa
2020-07-19 12:53:45 +00:00
40f8091fab stack: grow on page fault
The steps of the technique to replace stack with hugetlbfs map are as
follows:

(1) Prepare a hugetlbfs map with the size of rlim_cur
(2) Copy the active region of the stack to the hugetlbfs map.
    The range to copy is determined by reading /proc/[pid]/maps.
(3) Replace the stack map with the hugetlbfs map

The step (2) tries to copy a huge region if McKernel doesn't grow the
stack at run-time.

Change-Id: I5858c35b5c26dd0a42cccf9e3cc4c64b1a81f160
2020-07-19 12:53:31 +00:00
a20e1acf01 syscall: add prlimit64
Change-Id: Iad882813d54b439c236c0df74dc81508190e6707
2020-07-19 21:52:46 +09:00
b3d7bbda56 rus_vm_fault: compat: RHEL-8.2
This applies the following patch:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396
mm: change return type to vm_fault_t

Change-Id: I7189fc92824d21b4906f1033f1de5899bbad4680
2020-07-15 13:02:32 +09:00
9a60997ea0 shmobj: Support large page
Mixing page sizes is allowed by shmobj.

Change-Id: Ic48b71da2db6ce3f68fa3dbc8ad5ae96347d6018
Refs: #1381
Refs: #1458
2020-07-15 03:50:56 +00:00
4b66373813 mcexec: Don't forward SIGTSTP SIGTTIN SIGTTOUT to mckernel
Change-Id: I72bb74d6b98e1f0bf519c8f0fef742624a2a699a
Refs: #1425
2020-07-14 08:34:11 +00:00
b44b11ace7 set_robust_list: Add error check
set_robust_list is not supported by McKernel.

Change-Id: I1f679e2e4df24139cceb1f2294bc072cb7956002
Refs: 1399
2020-07-14 01:06:49 +00:00
ebc91cea0e tgkill: Fix argument validatation
Formerly, if tgid is specified as -1, tgkill() was equivalent to tkill().
Now it is treated as an error EINVAL.

Change-Id: I47bc75d439662a36dc6167c4446a5277422de507
Refs: 1380
2020-07-14 01:03:47 +00:00
58106d791a struct process: fix type of group_exit_status
Change-Id: Ib8492cbb077106cef1d0fa2d6d5e8e13bbb209c0
Refs: #1377
2020-07-13 08:33:07 +00:00
56b51d4f97 spec: prerelase 0.6 for testing cpuinfo and mmap overcommit
Change-Id: Iab5acc2c08ebe19251c37782cff87a4b5c914448
2020-07-13 10:14:23 +09:00
bafe540d86 mmap: allow unlimited overcommit
Change-Id: Iba07b5c504b4a202cd163ce682f3fc72a31284a0
2020-07-10 14:52:57 +09:00
d78a0fd05d sysinfo: support basic entries
Change-Id: I27f3e55058cc29f895831a1dddfafbc8585746a5
refs: #1389
2020-07-10 14:51:25 +09:00
999bc91b4f arch: Move some functions from arch-dependent to common part
Moved syscall rt_sigaction and functions related to signal.

Change-Id: I39f619e008d9c6018d91099a76dfb30e48757673
Refs: 1487
2020-07-10 03:54:28 +00:00
b3bd2ea9b3 procfs cpuinfo: use sequence number as processor
Change-Id: Id54ea74c5fda198a0bb9c9b6a19e6799fee0ed3f
2020-07-09 13:10:08 +09:00
d3d9e2400d test: ihklib: syscall_list.h: add robust marker for patch
Change-Id: Ie5f72b4b296db4d44e9839f38fd9a68854be78c3
2020-07-06 16:25:11 +09:00
199407b2a1 spec: prerelease 0.5 for testing ppoll
Change-Id: I51deb1c1703a986ba0aa4e02da9f53009554dbb7
2020-07-01 08:49:08 +09:00
5973d66e2d Revert "epoll_wait(): make sure to schedule in offload"
This reverts commit 5e44c9c9f9.

Change-Id: I826336f1ece31a84072c3e62c6c6c68a641e8fb5
2020-06-30 17:11:26 +09:00
d7ef74659b Revert "epoll, ppoll: deschedule on offload, don't do it when exiting system call"
This reverts commit d4056acfc3.

Change-Id: I7df15b9d3957ca571f4b4e2d576799f8b97ae299
2020-06-30 17:11:23 +09:00
ac86affecc mcexec: fix FLIB_AFFINITY_ON_PROCESS mask for McKernel CPU numbers (Fugaku)
Change-Id: If42b139fb53866bcff0809d898d4a2a712946f0c
2020-06-30 16:29:03 +09:00
2026cf8dad mcexec: explicit CPU list in partitoned execution (for Fujitsu's FLIB_AFFINITY_ON_PROCESS)
Change-Id: I05c11f73553de8ccb5f79083ce2115ac57e62584
2020-06-30 16:29:00 +09:00
1d135492c3 mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: I0c42e3119143da40ea2e69cd9ec99bde78a0ad2a
Refs: #929
2020-06-30 16:28:08 +09:00
1cfc5ca71f spec: prerelease 0.4 for testing cross-compile
Change-Id: I26908b6b415483711f55338e45d7b2d862b5c028
2020-06-23 08:34:10 +00:00
7ee533d620 spec: remove unnecessary mcinspect*.debug file
Fixes: 612f364 "spec: include recently added debug tools"
Change-Id: I29779132567d18f9468e3cecf2c713ad1c51729b
2020-06-23 08:34:10 +00:00
28334c7a29 cmake: treat libdwarf as required library when cross-compiling
Change-Id: I23ffb46c867b05de0e732c96912d62c630ebb44c
2020-06-23 16:18:35 +09:00
697e9386b3 cmake: fix resovling dwarf.h
Fixes: 0e787b7 "cmake: fix resolving libdwarf"
Change-Id: Iccb491c8ad07db0f15f6b1798ee8a91edc808cf7
2020-06-22 13:33:50 +09:00
0e787b731e cmake: fix resolving libdwarf
Change-Id: I14573f1ac7d779b4c90ed44cc310d4f584374559
2020-06-19 17:24:21 +09:00
612f364e6a spec: include recently added debug tools
Change-Id: I0318fe3551a75c7da774d26bc834c099bb235b67
2020-06-19 13:37:52 +09:00
ceee4c379f spec: prerelease 0.3 for testing fixes related to Fujitsu TSC and ihkmond
Change-Id: I4b9fcac086a3567e6e797f3e7515949c9e214c36
2020-06-18 16:23:43 +09:00
36c981bc34 sync with ihk
Change-Id: I052394121016a030d8873296b4a17b1f038d6b13
2020-06-18 16:23:43 +09:00
fd941dad44 Revert "procfs cpuinfo: use sequence number as processor"
This reverts commit bb7e140655.

Change-Id: If0c1719986706511c1e57d06bc61923d1adfc0aa
2020-06-16 13:26:55 +09:00
5f5b9f94d1 Revert "get_one_cpu_topology: Renumber core_id (physical core id)"
This reverts commit 0a4e6b49b4.

Change-Id: Icd9f2cda63d0daf661a40b146c72608b82cf2061
2020-06-16 13:26:55 +09:00
3f3c4acd71 madvise: do nothing (workaround for Fugaku)
Change-Id: Id2265e7eca4ae296dd22a8e99a2294a9a8b4c4dc
2020-06-16 13:26:54 +09:00
00007dafaa mbind: do nothing (workaround for Fugaku)
Change-Id: Id9d018304e18ed52ea7b0a872e03675c903bce6e
2020-06-16 13:26:54 +09:00
cbe2b2149d Revert "sysinfo, procfs: Support memory info partially"
This reverts commit 8f74888f87.

Change-Id: I65530dd8a4e1af2ca47cb02c02f5c54a9b4595a5
2020-06-16 13:26:54 +09:00
4cecde3fba Revert "mcexec: detect mismatch of mcexec -n and mpirun -ppn"
This reverts commit 72af689e69.

Change-Id: I25bc56cd8ac9c877852fc1092c8349fe318fd25d
2020-06-16 13:26:54 +09:00
8022a2a8c0 treat libfj90 as helper thread spawner (Fugaku specific)
Change-Id: I1f6170c7ebbfae4f575f13ac1f3106d292cd5b6a
2020-06-16 13:26:53 +09:00
3328ce03d9 Record pthread routine address in clone(), keep helper threads on caller CPU core (workaround for Fugaku)
Change-Id: I29d1589e430dc1396558cdf3df4d068c27173612
2020-06-16 13:26:53 +09:00
97b107f61c treat /var/opt/FJSVtcs/ple/daemonif/ as device file (Fugaku specific)
Change-Id: I047ec793a082f2fede3f2bd9c5fb358a30b8ea84
2020-06-16 13:26:53 +09:00
6f3be17c19 do_process_vm_read_writev: don't check vm_range (workaround for Fugaku)
Change-Id: I4ce9b5397ed876dff651c67658e43811d83658dd
2020-06-16 13:26:53 +09:00
dea7d00545 force allow_oversubscribe (workaround for Fugaku)
Change-Id: I5288f5ccbd967004fabbe71bca267feed3b9c2f8
2020-06-16 13:26:53 +09:00
4512778569 force time_sharing (workaround for Fugaku)
Change-Id: Ie3e3a0bbf00ef4e988bdee40d9d4dc93258dd4be
2020-06-16 13:26:52 +09:00
a7adb266ff mcinspect: add read memory value by specifying physical address
Change-Id: I2f2d6cb981e883c5e2ae1e0c764e10e0fec76a46
2020-06-16 13:26:52 +09:00
2566f4f213 devobj_free: don't report error on release-offload failure
Change-Id: I4179dab8cc46557a72eb3447ff0803743a1ba1a2
2020-06-16 13:26:52 +09:00
ac0081eddd handle_interrupt_gicv3: don't take runq_lock
To avoid dead-lock with the function taking the lock with
ihk_mc_spinlock_lock_noirq().

Change-Id: If689e8cc5fff81f627bcf98bfa7df7d4c13f4209
2020-06-16 13:26:52 +09:00
d4056acfc3 epoll, ppoll: deschedule on offload, don't do it when exiting system call
Change-Id: Ib1d0553ca5c50f4de055a1a5fe40b406c9c26dc7
2020-06-16 13:26:52 +09:00
1910543380 armv8pmu_write_counter: sign-extend properly
ihk_mc_event_set_period() calls armv8pmu_write_counter() by
cpu_pmu.write_counter(..., (uint64_t)(-left) & max_period)

Change-Id: I2ac8fbe5957db044ac54946f620163e3c486cb5f
2020-06-16 13:26:51 +09:00
6332903f0d Revert "xpmem: Support large page attachment"
This reverts commit a8696d811d.

Conflicts:
	kernel/include/process.h
	kernel/syscall.c
	kernel/xpmem.c

Change-Id: I726e74450f6228d3fc78fc62dda15b2067732a53
2020-06-16 13:25:57 +09:00
29d27b7c8d Revert "xpmem: Use correct process_vm in xpmem functions"
This reverts commit 0c63a2a3cd.

Change-Id: I7a67def6c45a67396b15cc55e96ffb5fc5898f28
2020-06-16 13:25:51 +09:00
7136384384 Revert "xpmem: Make sure vm_range is used under memory_range_lock"
This reverts commit 91ea69cf8f.

Conflicts:
	kernel/xpmem.c

Change-Id: Iff3eed010ad3610d63e165f53484ac56528ce384
2020-06-16 13:22:49 +09:00
2fe5c8de2e Revert "xpmem: Fix deadlock in xpmem_remove_process_memory_range()"
This reverts commit d052acab1d.

Change-Id: I31e982465ef9e0936145f27c8d1587c01737ec81
2020-06-16 12:13:49 +09:00
e774e1b984 Revert "xpmem: fix mapping of attachment and segment"
This reverts commit a5fcc91656.

Change-Id: If29415369d724391b291939ecce76482138e82f5
2020-06-16 11:28:02 +09:00
33b7414615 Revert "xpmem: map only resident segment pages at attach time (workaround for Fugaku)"
This reverts commit 3c646e2485.

Change-Id: Ibae8100403586775a32d6eb36c74383131066ac9
2020-06-16 11:27:59 +09:00
3c646e2485 xpmem: map only resident segment pages at attach time (workaround for Fugaku)
Change-Id: I50ac8ba88b208608206b68b4c57e278041913503
2020-06-16 09:17:26 +09:00
a5fcc91656 xpmem: fix mapping of attachment and segment
* Mapping attached part of segment is done at attach time instead of
  make time to work with runtimes (e.g. OpenMPI) xpmem_make-ing the
  entire user-space
* Mapping attached part of segment at attach time can be turned off by
  specifying xpmem_remote_on_demand in kernel argument
* Mapping attachment chooses appropriate page-sizes, i.e., largest
  allowed by memory range and segment page boundary

Fixes: a8696d8 "xpmem: Support large page attachment"
Change-Id: I44663865204036520e5f62fe22b9134ee4629f9b
2020-06-15 10:11:29 +09:00
d370e9241f Toggle preemption while faulting pages
Change-Id: I74201061bb3e7c7c4032e3884658ace87cb85948
2020-06-15 10:11:29 +09:00
3e254c06bf SCD_MSG_WAKE_UP_SYSCALL_THREAD: hold target thread through wake-up
Change-Id: I35b2c56f78430135b2d197d2a2cfe364dbd03947
2020-06-15 10:11:29 +09:00
07537cd2e7 eclair-dump-backtrace: expect script to dump backtrace on all CPUs
Change-Id: I358c5d5ca81903b0eaab88d227c36373164c0950
2020-06-15 10:11:29 +09:00
a37f72da0e futex_wake(): disable IRQs while iterating plist
Change-Id: I796794b2159816183c6487ef0048f42f97aac73b
2020-06-15 10:11:28 +09:00
ab11b168f0 ptrace_setoptions: debug msg
Change-Id: Iea5fdb26884c7af6e3d5aa26b5f71932f730cc9d
2020-06-15 10:11:28 +09:00
eac414d6d8 CPU read/write reg: use generic IHK messaging interface
Change-Id: Ia9637d1516d9329fdadf37822bfce7594d69105f
2020-06-15 10:11:28 +09:00
bb725f5f50 crash: print actual PTE in lookup mode
Change-Id: Ie2c1b97780347d6172ef8961ed62258117cbf115
2020-06-15 10:11:28 +09:00
5224551782 mcinspect: vtop (in progress)
Change-Id: I09f487e96edc7c4f59c97e6fb6dde28baf84c1e5
2020-06-15 10:11:28 +09:00
91146acfe5 Make struct ihk_os_rusage compatible with mckernel_rusage (workaround for Fugaku)
Change-Id: Iebae1e8b0aaf9c23cb1c9411aa1ad111b2e61028
2020-06-15 10:10:57 +09:00
f64731ab34 do_migrate: kick scheduler on target CPU
Change-Id: Ib5875ecf0c6a3118d32973329a6f1595a910562f
2020-06-15 09:58:55 +09:00
cd46cbd4b3 mcinspect and mcps: DWARF based LWK inspection
Change-Id: Ie9e209d8f77999b61afa39c38832bfc416a2c34f
2020-06-15 09:58:54 +09:00
39780917af libdwarf: compile locally if not present
Change-Id: I70d1f653f4fc4ee4daeaa2c9c6bdbf1416e43c9b
2020-06-15 09:58:52 +09:00
0f8f6d298e CMakeLists.txt: fail on missing libraries at config time
Change-Id: Ia7e4cf469d94f97fa1c565e59d2d4587f3a3d081
2020-06-13 17:18:10 +09:00
f8e8b21f04 /dev/shm: use Linux PFNs and populate mappings
Change-Id: I921c1f43c8411f896343be17e0ac6762a1bc26d1
2020-06-13 17:18:10 +09:00
5c2f9b8239 pager: prefetch all shared libraries
Change-Id: Ic62e1284d540362df817098b3926ac223245e3b6
2020-06-13 17:18:10 +09:00
1afc3d9b70 Keep track of number of context switches per CPU
Change-Id: I7a2194c8777a7efcd34e1ed7f4734da03fb4d433
2020-06-13 17:18:10 +09:00
17a8f68d60 set_timer(): treat spin wait as PS_RUNNING
Change-Id: Iea1ad5b0a49a12d5e1aef38ad68fccb8d789af5e
2020-06-13 17:18:10 +09:00
2b9a053504 syscall offload: avoid double IRQ enabling
Change-Id: I202c9f348b66672b1c9f8c146d4e28ec1d9c7658
2020-06-13 17:18:09 +09:00
6441aa1abb __sched_wakeup_thread(): check if timesharing needs to be enabled
Change-Id: I081d700f345abbbdb14dcac3b6246b79475d059b
2020-06-13 17:18:09 +09:00
9b55b68934 Allow other threads to run while waiting for I/O in page faults
Change-Id: I51e847a02a698b0ecf1e356d51599aa1c9400b15
2020-06-13 17:18:09 +09:00
83ef96a739 fileobj: disable IRQs while holding page hash locks, schedule() in I/O loop
Change-Id: Iaf72d55980f1a5df6c93c4a57fa57b0ae5b1d229
2020-06-13 17:18:09 +09:00
b5337358cf IKC: increase message queue sizes
Change-Id: Ib1eee4d26b8304cbee16fe50caabfc2c19e5c2e3
2020-06-13 17:18:09 +09:00
2db3717e57 handle_interrupt_gicv3(): check for CPU_FLAG_NEED_RESCHED as well
Change-Id: Id6ade08e4e572a6d837476de2872126442d3591c
2020-06-13 17:18:09 +09:00
5395891966 pager_req_map: fix printk
Change-Id: I98488169f02656c2df711b827d0002762de69f7a
2020-06-13 17:18:09 +09:00
c32a5e261b PF handler: print VM range's file path if available
Change-Id: I5ba55b19a0b874bc9f4b58e94bfc4afc440e6a8a
2020-06-13 17:18:09 +09:00
c0c80b71ca mmap and fileobj: handle MF_ZEROFILL properly
Change-Id: I6ee52b4cab212b1973339bc8d49065c1ec9263b0
2020-06-13 17:18:09 +09:00
d15a396d5a pager: use host physical for PMIx shared memory
Change-Id: Idfebc768ba03b5536a0e5eb1c6076769806fa7aa
2020-06-13 17:18:08 +09:00
e35ec09da1 UCX: fix page size for shared memory
Change-Id: I75b0beef8345b391e7619887765ed1a89d74c29b
2020-06-13 17:18:08 +09:00
5e44c9c9f9 epoll_wait(): make sure to schedule in offload
Change-Id: I435416cb0ac005a03cd995bf1aae75c9ce7b2082
2020-06-13 17:18:08 +09:00
0f6c36870c mcexec_syscall(): disable no per-process structure warning
Change-Id: I951575f0077054ebcfe4b3f7e29416799ab6ade8
2020-06-13 17:18:08 +09:00
2ec2112cc5 IKC: use atomic allocation during initialization
Change-Id: I5bb5d7040092d47e4cdbdad87f9d1dd5b2ceaee5
2020-06-13 17:18:08 +09:00
c86a38e18f physical memory: guard rbtree allocator with IHK_RBTREE_ALLOCATOR macro
Change-Id: I468c6bf1f641875c02b091704ef63f59fd390be5
2020-06-13 17:18:08 +09:00
6aa7b50e26 profile: refactor display code and fix ARM support
Change-Id: Ic48102c42abe17eed014f2bfe7523d0d6f03c2e9
2020-06-13 17:18:08 +09:00
c3c57940ba Memory ordering and usage of ASM cmpxchg() instead of compiler atomic intrinsics
Change-Id: I4dadebc32721744dad982f3fc5b3eea7ab7ca745
2020-06-13 17:18:08 +09:00
7aa2d64294 obtain_clone_cpuid(): avoid locking while partitioned execution
Change-Id: Iabb4784835be7dc9b2f555acc3a711fcc23ee7da
2020-06-13 17:18:08 +09:00
51fe77cdae mmap()/shmget(): use Linux huge page size when not specified
Fixes: 089b443 "mmap()/shmget(): use Linux default huge page size when not specified"
Change-Id: If8043a0993d1131ea0344aa6d500b35c7a291884
2020-06-13 17:18:08 +09:00
d5aafca1ae VM: use RW spinlock for vm_range_lock
Change-Id: Id4654084207d55bf77cc9f8b42795e0f9873cfa0
2020-06-12 03:07:33 +00:00
54b529c82d An arch independent RW spinlock implementation
Change-Id: I426d3f7b643660e6685b5c39c0ae849a9f08b9bb
2020-06-12 03:07:33 +00:00
232bc9c44b README.md: add how to checkout to specific branch or version
Change-Id: Ie727c266d576e601f4901e2f84b98c07ff49aa24
2020-06-11 18:45:52 -04:00
f34373d1c0 README.md: add how to install with rpm
Change-Id: Ic3c0ff6971686d6d64dfcdd5850ae4a70f05f40f
2020-06-11 04:38:08 -04:00
4698ae166c spec: prerelease for testing hugefileobj premap fix
Test target: a2adb0a4 "hugefileobj: rewrite page allocation/handling"

Change-Id: Ibbae5222f54704248911da9f53ca8e4675627bc4
refs: #1475
2020-06-11 04:22:48 -04:00
db9ca358f9 sync with ihk
Change-Id: I769880c52c8cfd06523cea8d77cce5703e783532
2020-06-11 13:51:44 +09:00
16a6a1d08b mcexec: Fix LD_PRELOAD string manipulation (again)
Fixes: 8cf70900 "mcexec: Fix LD_PRELOAD string manipulation"
Change-Id: I6e0188bd60f8e3977beb22c1f9212baf37f37093
2020-06-05 09:25:15 +00:00
2e2e973d78 hugefileobj: rewrite page allocation/handling
* manage pages by an array
* fix mmap of fd created by memfd_create() populates the map
* refactor pgsize and pgshift handling

Change-Id: Icaf015b10afc35f2b95f93059adf1a1b6b92e14e
refs: #1475
2020-05-19 23:36:25 -04:00
c3c0b7197f test: perf: prevent overflow counter from stopping counter
Fixes: 1a204b6 "perf: overflow test"
Change-Id: I4d8e93b97f7a8d58ef7811f55b5c995b16c5af69
2020-05-14 01:10:14 +00:00
d086100b35 perf: REFRESH: Don't perform perf_start
Change-Id: I70194467d357770f982d90a6f9b132a61a817fc5
2020-05-14 01:09:52 +00:00
8f74888f87 sysinfo, procfs: Support memory info partially
Change-Id: I597dae4f82d64d3f23889cef960db18ae879ff06
refs: #1389
2020-05-14 00:53:25 +00:00
8e42c2a254 README.md: Add description of Utility Thread offloading Interface (UTI)
Change-Id: Ibeb6e6b91e5f280214e7f78049b6f35e648198c7
2020-05-12 14:14:08 +09:00
caf0f5ef63 cmake: do NOT install crash plugin sources
Fixes "Installed (but unpackaged) file(s) found" rpmbuild error.

Fixes: 04d17dd3 "Define MAP_KERNEL_START by resolving MODULES_END at cmake time"
Change-Id: I80df58ac3c581faf1c48080115b70724eac6aea5
2020-04-20 18:51:15 -04:00
3d030391e8 spec: Update version number to 1.7.0rc4
Change-Id: I1c999cfa632711195a9c8ec9de769075292c40b9
2020-04-17 11:57:53 +09:00
0aeab6b840 NEWS.md: Add 1.7.0rc4 updates
Change-Id: I66ccbe5e8454482155243b89d9b0398994186010
2020-04-17 02:43:56 +00:00
367bbda713 mcexec: Fix resolving library path for LD_PRELOAD
Fixes: 8ee1d61d "Revert "Detect hang of McKernel in mcexec""
Fixes: b87ac8b8 "reproductible builds: remove most install paths in c code"
Change-Id: I8ef9ab81cd0a41ccd0e227ebc3e45c0745c150e9
2020-04-16 20:46:46 +09:00
0082447043 mcctrl_get_request_os_cpu: Fix debug message
Change-Id: I0d2ae427b97b7284d61dd13825d4ba3d2130f26a
2020-04-16 07:44:36 +09:00
4f50c90f6e __mcctrl_os_read_write_cpu_register: Range-check cpu number
Change-Id: I9ef991e1f0a7e301430586c261bf55bf73a4bae9
2020-04-16 07:44:36 +09:00
79950e045e eclair: Improve error message
Change-Id: Ib8fe3df0a529a17a2e331b16cf396915ab6a3eb2
2020-04-16 07:44:36 +09:00
6cf7cebb2d __mcctrl_control: Check user privilege
Change-Id: Ia87ab241f980ea25df805bd31d66f07bf3681311
2020-04-16 07:44:36 +09:00
c9f05f238d Remove unused IHK_OS_STATUS_STOPPED
Change-Id: I4aad8dac06b79a85ca8951cc26c40981c64262bb
2020-04-16 07:28:20 +09:00
f1caaa9b74 freeze: arm64: use normal interrupt instead of NMI
Fixes: 55faba7 "dump: rewrite NMI handling (for resume) and fix PANIC register saving"
Fixes: ff982b8 "freeze: change freeze-thaw to normal interrupt"
Change-Id: I9445cac191f91d20357cae11b2839e4e9384ac6f
2020-04-15 01:04:20 +00:00
97cd379ee2 mcctrl_os_shutdown_notifier: Move wait for running state to ihk side
Change-Id: I363391c63d92d952fc9a60c1e88f964eb50687fd
2020-04-15 00:54:00 +00:00
8ee1d61d0f Revert "Detect hang of McKernel in mcexec"
Change-Id: Ie8a0cf725f84a2f5d85da8b8fb15b30a826ddfcb
2020-04-15 00:50:55 +00:00
04d17dd3e9 Define MAP_KERNEL_START by resolving MODULES_END at cmake time
Change-Id: Ib88fc045b64c4ad2dad6a4b13cb0372a735a26ab
2020-04-09 00:30:05 -04:00
33eef71133 spec: Update version number to 1.7.0rc3
Change-Id: Id07122ececb562ecb4e4cf91e4983b8273c96b34
2020-04-09 00:06:17 -04:00
c10b4a1c16 spec: fix mckernel-devel package
Also fixes kernel-rpm-macro package resolution issue.

Fixes: 6d584fea "spec: Add mckernel-devel package"
Change-Id: Ide286753c89c3b931665f53dd8270427b19b39eb
2020-04-08 00:25:43 -04:00
8cf70900e7 mcexec: Fix LD_PRELOAD string manipulation
To suppress compiler warnings.

Change-Id: I4d6b5ce2d2a8fca3f2675a7fc309df40cfe3c04b
2020-04-01 01:18:10 -04:00
b2618a98f5 madvise: Support MADV_DONTDUMP and MADV_DODUMP on anonymous map
Change-Id: I231b62ed6803b797ec749ac70a66cdf8236204bd
refs: #1373
2020-03-23 13:06:26 +09:00
01d06cb218 madvise: Add locked-page check to MADV_REMOVE
Change-Id: I95465ef11aa4c772ad0ecf5d25f757192f31b93b
refs: #1372
2020-03-23 13:06:26 +09:00
c78803ac08 madvise: Support MADV_REMOVE on tmpfs
Change-Id: Ic99d374c4d2630944c7bc838937d7f45601783c6
refs: #1371
2020-03-23 13:06:26 +09:00
3300e65efc madvise: Support MADV_WIPEONFORK, MADV_KEEPONFORK and MADV_NORMAL
Change-Id: I1d4cf5affa580d7304dfdc34fa4f1707c0df617c
refs: #1374
2020-03-23 09:13:01 +09:00
d82ac31bc6 faccessat: Specify AT_SYMLINK_NOFOLLOW only when necessary.
- Specify AT_SYMLINK_NOFOLLOW in faccessat only when
   the symbolic-link is analyzed by overlay_path().

Change-Id: Ie3b1f7fedef7441fd4b39c5c8b2ef0f73cba770e
Refs: #1370
2020-03-20 00:22:50 +00:00
4946fbdd82 Fix "test: runq_lock and over-scheduling fix."
Change-Id: Iedd3b94d6ecd52b9ee67cc9b8a75735428c9fd84
Refs: #1400
2020-03-19 23:34:40 +00:00
33cba1ad48 test: ptrace: Record syscall return value before reporting
Change-Id: I8e9de3bb9bfa0b07eebe472131cc62b53ef5cc8b
Refs: #1287
2020-03-19 23:31:48 +00:00
7c69cfaf67 set_host_vma(): do NOT read protect Linux VMA
Change-Id: Id1e84464c9a06a3886b9cb16b35b1f2dda3c4c30
2020-03-19 02:15:29 +00:00
b3cbdeec84 Fix memory leak when a child exits without wait()-ed
Change-Id: I8ad9e20e3f3e6f406548a6c4de2bf4dc07c40b0e
Refs: #1349
2020-03-16 04:26:54 +00:00
1d1ec39a27 exec: Correct wrong "=" to "+=".
Change-Id: Iec8c1bb7a12ad7f2e1d4ac07c75482e4d86a0ea2
Refs: #1382
2020-03-16 04:16:03 +00:00
0a4e6b49b4 get_one_cpu_topology: Renumber core_id (physical core id)
Change-Id: I4e4857e9a063d16d19d73adfabfc18a4b461bbfb
Refs: #1439
2020-03-12 05:19:25 +00:00
bb7e140655 procfs cpuinfo: use sequence number as processor
Change-Id: Idbfa48e9b60c03495d7ba72e962c55f0ffb8bec9
2020-03-12 05:19:25 +00:00
32b32f0c4a eclair: query phys memstart on arm64
Change-Id: I32db1153f5c1e4a217db69d8d55f0d0ccfa07c77
2020-03-12 10:53:41 +09:00
bf7fd81c1b Fix includes to handle module ref counter properly
Change-Id: If3f067a14e40c346f0455f8bfb8bbc8ab2934e88
2020-03-12 10:24:01 +09:00
92d191de9e xpmem: handle size 0xffffffffffffffff
Change-Id: I04fbe21966f8a831337576a14119afefe8a2ea4f
2020-03-09 16:26:09 +09:00
baf68f7e71 mcreboot: fix ETCDIR path (cmake 3.14.5 prepends etc)
Change-Id: Ib449ef294ddaf4a4d050d705fd05b8ede8b8150d
2020-03-09 07:21:10 +00:00
26bebb2749 sched_request_migrate(): fix race condition between migration req and IRQs
make sure the caller thread holds migration queue lock with IRQs disabled
until it notifies the target CPU so that an interrupt can not deschedule
it in the middle of the request.

Change-Id: I85995018ca1e8478ccc9723985b6e8efc9c3acfb
2020-03-09 07:05:15 +00:00
9e2196c9ce fix: memory leak due to forced termination during startup
Change-Id: Ide519f01702bfd17ae4576e04806b6d155ae846a
refs: #1397
2020-03-09 01:10:38 +00:00
93581cb142 test: runq_lock and over-scheduling fix.
Change-Id: I236ab585403076d716be350c8b51e8d352122f2b
Refs: #1400
2020-03-05 15:57:57 +09:00
67f5a1d4e0 migrate-cpu: Prevent migration target from calling schedule() twice
Symptom:
A thread could call schedule() twice.

Cause:
 (1) The migrator raises rescheduling flag
 (2) The thread calls check_need_resched() for other
     reason than the migrate IPI, e.g, response to system call
     offload. And it finds that the flag is set and it's trying to
     call schedule().
 (3) The thread is interrupted by the migrate IPI and it finds that
     the flag is set and calls schedule() in the interrupt context.
 (4) The thread resumes the execution and call schedule()

Solution:
 (1) Reset the rescheduling flag when checking it and it's set
 (2) Set it again if it's decided not to call schedule()

Change-Id: I5376662d0b02ca4ebb29b42732e347f3b82d766d
Refs: #1400
2020-03-05 15:51:28 +09:00
edf7b36669 runq_lock: Fix deadlock due to cpu migration.
Symptom and analysis:
runq_lock of the migration source is acquired on
the migration destination CPU.

This happens in the following steps:
 (1) The thread stores value of cpu_local_var(runq_lock)
     to its register when trying to perform
     ihk_mc_spinlock_lock() on the lock variable.
 (2) The thread takes IPI and migrates to another CPU.
 (3) The thread resumes execution and acquires the wrong lock.

Solution:
* Disable interrupts before getting the value of
  cpu_local_var(runq_lock)

Change-Id: Ia0ea450b97f872dd6116252537e4a79f85adfc88
Refs: #1400
2020-03-05 01:51:40 +00:00
1a204b6674 perf: overflow test
Change-Id: Ic7aa0d99ae9a5b7d3ce4436129a360275e6937ca
refs: #1358
2020-03-03 15:55:13 +09:00
305511b48f perf: accumulate counter in overflow handler
Change-Id: If5f5a913e0fde889d1835ffb16c19ea0ad5e685a
2020-03-03 13:23:30 +09:00
606db376fd perf: fix perf_reset
Change-Id: I98122b0f9866bc1cc8713e7bd46fa879917ac6a0
2020-03-03 13:23:30 +09:00
5719b4c64a perf: update event structure
Change-Id: I5bc0fdd42db509b5d2daca7d97e29ad1f7d11f1a
2020-03-03 13:23:30 +09:00
343121c3d0 perf: set event period
Change-Id: Ibf569de7af8697e766c10b8d70905b8cdc4df083
2020-03-03 13:23:30 +09:00
86c45484e3 perf: add struct hw_perf_event
Change-Id: I0938e2b18064ad805a9edb6e15d26cf438bf0a59
2020-03-03 13:23:29 +09:00
767792808a perf: change count variable type to ihk_atomic64_t
Change-Id: I2bb6fab2c040683830b44fa6b963a86a233b883a
2020-03-03 13:23:29 +09:00
117f070fd6 perf: fix PERF_EVENT_IOC_REFRESH
Change-Id: Ia5d3fbe344346aabc3b5d40a801b3c21cfbaac97
2020-03-03 13:23:29 +09:00
a27909be88 ihk_atomic64_set argument to long
Change-Id: Ie9b5978028000236ae5846214a2ea14fcdffaf56
2020-03-03 13:23:29 +09:00
cec6f24559 PMU register support for cpufreq driver.
Change-Id: I11462d25ef83867ddf2e643798d1e3d0257f7f33
2020-03-02 07:14:27 +00:00
b3b8283f87 Add NEWS.md
Change-Id: Iecf193e3d5dac57f87ef8db2f43add5fb99f6a6e
2020-02-27 06:13:25 +00:00
d62f80a7c0 spec: Prevent rpmbuild from including build-id directories into package
Change-Id: Ie935d684eed3780f79f29a588233f5ab54a5f5d7
2020-02-25 10:44:08 +09:00
6d584feaef spec: Add mckernel-devel package
Change-Id: I51e9b88ed18b5a0662d1d77e344b84cb14e2189e
2020-02-25 10:44:08 +09:00
e2e015e120 spec: Remind that kernel-rpm-macros is no longer included in kernel-devel in RHEL-8
Change-Id: I4fb6a2d5f9114d9947b0eb848a21f772a2bece5e
2020-02-25 10:44:06 +09:00
5fb3abe87b spec: Relax Linux kernel version requirement for RHEL-8
Eliminate the need for rebuilding rpm for every RHEL-8 errata release.

Change-Id: I483c22d0b578809117a4f56881b11e51fcc608a7
2020-02-25 10:42:19 +09:00
37fd9e0cd2 test: rt_sigtimedwait: Add test cases for SIG_IGN and real-time signal
Change-Id: I4abafe73d81cfa77167289477ea8c5af701e7f2e
Refs: #1378
Refs: #1440
2020-02-20 04:31:08 +00:00
7e748b4ecb rt_sigtimedwait: could not wait for realtime signal
Change-Id: I341d2f0c9657c3b14eae89dddba074b68c654a12
Refs: #1440
2020-02-13 06:23:22 +00:00
cafb46efc7 rt_sigtimedwait: could not wait for ignored signal
Change-Id: I0f5a8e2eaae2b7c08a01f4ebb2c405b8972269a2
Refs: #1378
2020-02-13 06:23:22 +00:00
41ea9d16c4 mremap: Fix to work correctly when old_page is large_page
Change-Id: I5a589383644a8098d910e49cd7ade6df325e0366
Refs: #1383
2020-02-13 06:15:25 +00:00
4bbdee395e ptrace: fix execve and return value handling (fixes strace on aarch64)
Change-Id: Icb5cb7f7e99fdb74a8628bc6b550688df5fb056b
2020-02-10 07:45:06 +00:00
597baf8445 eclair: support for live debug
Change-Id: Ia9bc126e198ba4a80722529ce09de5eb0775d429
2020-02-10 07:45:06 +00:00
55faba77a5 dump: rewrite NMI handling (for resume) and fix PANIC register saving
Change-Id: I360e9aa8efa64b6ebd99b209a5dd4ee0dc7806cf
2020-02-10 07:45:01 +00:00
6bef773741 eclair and ldump2mcdump: obtain PHYS_OFFSET from dump_mem_chunks
Change-Id: I5dd5f9e7e6b5817e50b0a1855b67f163d3029f17
2020-02-10 07:42:23 +00:00
7882110e9f eclair: obtain MAP_KERNEL_START from kernel image
Change-Id: I946c640ddb2e2b32362760254a86c611517becf3
2020-02-10 07:16:06 +00:00
d1df17ffb7 eclair: fix register GDB response for descheduled threads
Change-Id: I0001d094b624bc03f2b178ec28a4cab51e2acaf0
2020-02-10 07:16:06 +00:00
72af689e69 mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: Iaf5cfb11c37bea6957b77a2114f783e9a46a48f2
Refs: #929
2020-02-05 06:39:57 +00:00
153d0609de ihk_os_{read,write}_cpu_register: Add async support
Change-Id: Ia2a2098550e856eeffbb20d8d0e0bcd57b85b6d7
2020-01-31 12:40:43 +09:00
83bbb87a0f mbind: fix processing when new range ovarlaps existing range(s)
Change-Id: I240a0205f0d836e4ff1a16b6739a3b366543bc06
Refs: #1384
2020-01-23 11:27:15 +09:00
f00d03445c epoll_pwait, ppoll, pselect: add to process sigmask
Change-Id: I6aa1db3b4c6ad81a8b5926fa87fc645269b103b6
Refs: #1361
2020-01-09 06:54:23 +00:00
911b07f507 fix: fork's race-condition caused by child and grand-child
Refs: #1329
Change-Id: Ia2d7641d1203f40155fef5db718d1bb2c583c1c5
2020-01-09 06:33:13 +00:00
5b26fe2956 do_process_vm_read_writev(): access local vector buffer using kernel virtual, PF if necessary
Change-Id: Ic90dca79e32d4151f585a5cbd5b2c7710534db0e
2019-12-23 02:54:52 +00:00
1db00ebc04 release_process_vm: free vm_range_numa_policy
Change-Id: I8084cd60a12b557b635b8e350f70d4e4f95d4c52
Refs: #1101
2019-12-20 07:12:16 +00:00
d5de68e97b eclair and crash: clean up architecture dependent codes and comply with Linux page_offset_base
Change-Id: Ie14ceb8bc9d816a9201dddd4020e2c21d6cfd686
Fujitsu: POSTK_DEBUG_ARCH_DEP_34
2019-12-18 01:53:29 +00:00
1526237bc6 x86 memory: use page_offset_base from linux
rhel 7.5 and later kernels have a page offset that is no longer
necessarily 0xffff880000000000, leading to kernel panics if we
use the wrong address

Change-Id: I3572fde1c31303a937855c23fbd3815ce0f96c64
2019-12-17 08:05:38 +00:00
b8d96a74ce Fix "arm64: Opt-out NMI for ThunderX2"
Change-Id: I95fabd17bfbae32320ed9e7a520c12e6f9527351
2019-12-17 14:48:10 +09:00
3c256e1a6c overlay: getdents: support lseek
Refs: #1421
Change-Id: Ife7ab1b50159a5897552ff695bb001ada27ec934
2019-12-13 03:49:20 +00:00
7fc4272b89 handle execveat systemcall on McKernel
Refs: #1366
Change-Id: I921e04a0df8d0d798fc94f675e5112dd2fec190a
2019-12-06 09:33:13 +09:00
d052acab1d xpmem: Fix deadlock in xpmem_remove_process_memory_range()
Refs: #1330
Change-Id: Ib62e3a7fe2811577ba8cabf174f64827e65c422c
2019-12-06 09:32:51 +09:00
91ea69cf8f xpmem: Make sure vm_range is used under memory_range_lock
Refs: #1330
Change-Id: I87a0d6042a2c388fbd260d8dff5d109106478872
2019-12-06 09:32:28 +09:00
0c63a2a3cd xpmem: Use correct process_vm in xpmem functions
Change-Id: I94c06ec69d0fe1e07d0b14bb44b448bbc63b9b63
2019-12-06 09:31:16 +09:00
a8696d811d xpmem: Support large page attachment
Change-Id: I4d672eee1c905160ece204d278f0afd9b6d7dc01
Refs: #1259
2019-12-06 09:30:51 +09:00
569dc33a9c mmap: fail and set -ENODEV when map to unmappable special file
mappable special files are /dev/mem and /dev/zero

Change-Id: Id1d4317104f901644e565007913e320d287e376f
2019-12-05 07:22:17 +00:00
4b252a990f SIGCONT: don't terminate process
Change-Id: Ib959a9e5341fda37bd055724ecb9319a469b7420
Refs: #1410
2019-12-05 07:13:56 +00:00
adb6cce3ce The process sending SIGCONT resumes the stopped process.
Change-Id: I64ee10172b99aa58540ffe8e9dd80fa0a64f4d01
Refs: #1420
2019-12-05 07:13:56 +00:00
ed21b6849d procfs: if memory_range_lock fails, process later
Change-Id: I3c5f24548455a63d8d5a4482f5081347f631885a
Refs: #452
2019-12-05 07:08:13 +00:00
37605740a4 support for backlog
Change-Id: Id8f503234e7afaa284e6b97dc264eb3a2af145c7
2019-12-05 07:08:13 +00:00
e069694c12 mem: Fix condition of whether in McKernel
Refs: #1324, #1329
Change-Id: I72bd69dbe65928f083b24513d50d29cabf3d6dff
2019-12-02 03:12:29 +00:00
dca1cb2625 arm64: Opt-out NMI for ThunderX2
Change-Id: I064da55e7e09e6d248c92ece5c56f9a9770c84a0
2019-11-28 02:22:55 +00:00
caac060684 mcctrl_getrusage: Round up cpuacct_stat_{system,user}
Change-Id: Ic1a236865fb3224dc9716c40a1eeb279c1fa1d70
2019-11-28 02:21:47 +00:00
d330721421 Rename struct cpu_topology to mcctrl_cpu_topology
To use a different name than the name in Linux kernel.

Change-Id: I44d10279195dfc9cfdc4788914b7d65b78292921
Fujitsu: POSTK_DEBUG_ARCH_DEP_40
2019-11-28 02:21:13 +00:00
157eeca41a README.md: Add contact
Change-Id: I3b038780ce91325151dfaef806e43eaaf71fe7e7
2019-11-28 02:09:41 +00:00
8ba725b225 mcstop+release.sh: Continue when releasing CPUs failed
Change-Id: Ib947843006ae9caa602e7b55309e68365edf4b2a
2019-11-28 02:09:01 +00:00
a563d780c1 munmap: fix deadlock with remote pagefault on vm range lock
Add similar protection to clear_host_pte than to set_host_vma (see #986)

Also make the page fault handler only skip taking lock if the munmap
happened on the same cpu id

Change-Id: I6d9e68e8f8905b20bb2ccfa72848e04fe6404ab6
2019-11-28 02:07:45 +00:00
621533bbd3 Add ENABLE_PERF macros so that perf support can be toggled
Change-Id: Ic50c8b329af63e63579b6a60b9557344100eaac4
2019-11-26 09:15:05 +09:00
37ea770f8c mmap: Round up map size by pagesize when specified MAP_HUGETLB
To match the behavior of Linux.

Change-Id: I7bcc2cb3c1e678ffc28f6b825c7a55032441dded
2019-11-14 07:24:25 +00:00
edd3ea0103 Revert "memory_range_lock: Enable interrupt when trylock fails"
This reverts commit 0d3ef65092.

Reason for revert: This fix causes circular dependency with memory_range manipulation and TLB flush. See #1394.

Change-Id: I4774e81ff300c199629e283e538c0a30ad0eeaae
2019-11-11 15:28:08 +09:00
41d37bcd30 mcstop+release: argument for rmmod path specification
Change-Id: I80e4e7136a90bc65050ab8f7d39615581c47f317
2019-10-03 13:58:20 +09:00
309145587f perf_event_open: Add support for counting REF_CPU_CYCLES
Using thread's tsc count instead of performance counter

Refs: #1025
Change-Id: I1d7a18f1c52f1d52087002d31818638a6b206014
2019-09-26 07:38:04 +00:00
bc06d68d84 sigsuspend: Make sure receive correct sigevent from do_kill
Change-Id: Ife9cf36a81f353e0575f6802f1e56f7dd4cb0425
Fujitsu: POSTK_DEBUG_TEMP_FIX_33
Refs: #1350
2019-09-26 07:34:34 +00:00
18412616e1 munmap: Change permission of VMA back to RWX on unmap
Change-Id: Ic02098e7458dd8fa2961fb03dc32e37fb18c5dc5
Refs: #988
2019-09-26 03:49:50 +00:00
c371fbf13b file map: cause SIGBUS when access to a page beyond EOF
Change-Id: Iaf7d792413e674267fd1c05c382212c8f67d8f5b
Refs: #1291
2019-09-26 03:41:23 +00:00
1492f16d67 make syscall_enter arch-dependent
Change-Id: I4317f3443902620ef5b3807ced05c80fa5eebbec
Fujitsu: POSTK_DEBUG_ARCH_DEP_90
Refs: #1357
2019-09-26 03:28:57 +00:00
fd38ab6fd0 Add test results for "syscall offload regardless of mcexec life and death"
Change-Id: Iee759ae8814aff4274ff81dc14f6d5d7a01494c5
Refs: #1321
2019-09-26 03:26:20 +00:00
f115bae8a7 include interrupt handling time into system time
Change-Id: If2ed2d488b4040d288d712f0a244505adbcec6f5
Refs: #1221
2019-09-26 03:21:28 +00:00
ba80dd8650 arm64: Fix for ptrace instruction rewrite on thunder-x2.
- Fixed the problem that instruction rewriting by PTRACE_POKETEXT is not reflected.
   The cause is that the instruction cache was not flushed.

 - Add instruction chache flush in ptrace_report_signal().

Change-Id: Ie9d34d3d33e1fd85aef5fe419345d82c6ca781fb
2019-09-26 02:57:07 +00:00
06960a41d9 test: signalonfork+wait: update error_injection.patch
Change-Id: Ia27e9b2fa6ec757bb05229ba3bf76e5e3bd43e5e
2019-09-26 02:34:46 +00:00
86a2aabb24 test: perf_event: add log of ThunderX2 machine (apollo)
Change-Id: I27aa1e30abdf4ed640a80b4016bcf108262ce9e3
2019-09-26 02:16:08 +00:00
b4101d9c36 brk: Fall back to demand-paging only when physically contiguous memory is unavailable
Change-Id: Id5d937b2cab7de1ad8925c9b95d85fcb620df9c6
Refs: #1353
Fujitsu: POSTK_DEBUG_ARCH_DEP_60
2019-09-26 02:16:08 +00:00
ec31d72483 freeze: add freeze_thaw test
Change-Id: I31db80b89adca9ac354a96ad21073b269d8a0e24
2019-09-26 02:13:23 +00:00
83ade5cdcd freeze: ignore multiple freeze request.
Change-Id: Ib7a7c4677137446cf7f7b387d016bacc7f0e9620
2019-09-26 02:13:23 +00:00
dec133c1dd freeze: restore state with thaw request
Change-Id: I7d6efd2c47020bedb716b6bd72d8a72b874c3cb2
2019-09-26 02:13:23 +00:00
04a528ab27 freeze: no process create in freeze state
Change-Id: Ia9cb7b8fb22d1c9d6c5a3fcdbd2873ef22f27c9f
2019-09-26 02:13:23 +00:00
8e4073c2ca freeze: allow interrupts in frozen state
Change-Id: I1d502f828ab9f9c0e1223d021979ac3dcf4d0c25
2019-09-26 02:13:23 +00:00
ff982b8594 freeze: change freeze-thaw to normal interrupt
Change-Id: Ib4dbac28f0074595e92ef316945b37ef4bc18327
2019-09-26 02:13:23 +00:00
299d47abf5 fork: memory leak detection test.
Change-Id: I9c64f8fdaee15642b3d1d2d7d869927b0bcd6511
2019-09-26 01:56:16 +00:00
f2460695c4 fork: do_fork: free resources when an error is detected
Change-Id: I0a29bb2cf886228effb088afe97d1b614728f517
2019-09-26 01:56:16 +00:00
6ce5c754f3 fork: settid: return error code.
Change-Id: I0678c266d8608b6d557b2b1e29e59bd6861314b8
2019-09-26 01:56:16 +00:00
e932f2e70c fork: release_thread: fix release of cloned thread
Change-Id: I390093bdb47a348cfec287cceaff22712df36bd9
2019-09-26 01:56:16 +00:00
bb08742467 fork: clone_thread: free resources when an error is detected
Change-Id: I922f3fddc35942ef2c67db6673980770731dced9
2019-09-26 01:56:16 +00:00
3e9fdfc0f1 fork: copy_user_ranges: rollback on error
Change-Id: Icdb8399cbce31835abcaeb783dde3ff14d30af6a
2019-09-26 01:56:16 +00:00
58f4593478 fork: fpregs: return error code.
Change-Id: I6ff150a39cd8952adad9b21d0c9f8514126ef957
2019-09-26 01:56:16 +00:00
de0e07f29e schedule: Skip save_fp_regs when the process ends
Change-Id: I32ff71a0dfcd7196d2c9e6cc1d68210933470bbb
Fujitsu: POSTK_DEBUG_ARCH_DEP_106
Refs: #1354
2019-09-25 06:43:08 +00:00
a4b83dc6d4 eclair: use snprintf instead of sprintf to prevent buffer-overrun
Change-Id: I2a27cffe303201e1738f115258f6e02058dbc63d
Refs: #1356
Fujitsu: POSTK_DEBUG_ARCH_DEP_38
2019-09-25 06:38:55 +00:00
beac6c3e80 make checking write-combine arch-dependent
Change-Id: I4c0fca7d34e69b4774141e115b8ebc03c5c1e8b3
Fujitsu: POSTK_DEBUG_ARCH_DEP_12
Refs: #1355
2019-09-23 16:42:26 +09:00
5d6715078f fix: madvise changes only the first one of vm_ranges
Change-Id: I83248c1162e28c3c24ca5f6b0933e1a8ca434d6b
Fujitsu: POSTK_DEBUG_TEMP_FIX_37
Refs: #1351
2019-09-08 14:22:00 +09:00
0615a0b00b procfs: mem: Change permission to 0600
It's 0400 in RHEL-5 and 6, but changed to 0600 in RHEL-7 and 8.

Change-Id: I9fb229e4c447eaa4570b1e2619c4fe039c07c86d
2019-08-19 01:17:03 +00:00
51cd7cbb6c arm64: rusage: Fix counting contiguous PTEs
Change-Id: I7e89c25d49dc1f6efe1c27c76c66c6fedd22af1f
Refs: #1342
2019-08-16 03:55:29 +00:00
0c1cae45fe coredump: Support signal number
Change-Id: If220bcd0865569a566e08aa53cae748fdc6317d0
Refs: #1340
2019-08-08 13:44:15 +09:00
11ef2f8092 coredump: Support threads
Change-Id: Id75ade6c87b15abcff5d772d90f77950376a32c1
Refs: #1219
2019-08-09 04:00:15 +00:00
12aef0b578 arm64: mcctrl: Fixed to search vdso_offset_sigtramp dynamically.
Change-Id: Iab5459194ca5281a1680a7fc26ae8bfaf1945a13
Refs: #1341
2019-08-08 00:48:22 +00:00
9b3450ee7e syscall offload regardless of mcexec life and death
Change-Id: I7db089993d3ee5ae6032f5085db2b67cef99fdfb
Refs: #1321
2019-08-08 00:39:26 +00:00
0d3ef65092 memory_range_lock: Enable interrupt when trylock fails
Also use read-write-lock

Change-Id: I03150b7208325ec1fe422dcd5f931e4e41c8e40e
Refs: #452
2019-08-08 00:38:55 +00:00
258156b57e support for read/write-lock and read/write-trylock
Change-Id: I609071c0f6234d0d413c8b312d8a8379abf6846e
Refs: #1323
2019-08-08 00:38:55 +00:00
8efced7bf7 mmap: Check if size exceeds available memory when MAP_HUGETLB
If size exceeds, mmap fails and set -ENOMEM

Change-Id: I4f0d6e18ee3a7c8e32e251b7ed07ee9f76305603
Refs: #1183
2019-08-08 00:31:36 +00:00
2dd8687974 flush instruction cache at context switch time if necessary
Change-Id: Ic09415ea772a9de6dca43a98168a8346ca86d3e7
2019-08-08 00:29:47 +00:00
f0bc1a6b07 cmake: Add option for "mem: per-CPU allocator cache (ThunderX2 workaround)"
Change-Id: I7156cf433b2081246d1d9b8e4fde489609676ef1
2019-08-08 00:29:34 +00:00
c52370b959 test perf_event: minor fixes(add signal handling. etc.)
Change-Id: I837d962bcaf13d3a523f80ff77f75b7fd51a98b7
2019-08-05 16:00:22 +09:00
9c78d4d249 pmu: define event validation in architecture dependent code.
Change-Id: Ia053af146ba3c89810892271cae93def6d9fd7c8
2019-07-31 16:18:50 +09:00
b6285c9aa9 pmu: Use bitmap instead of index to specify counters / events
Let the software index (or number) same as the hardware index at the
same time.

Change-Id: I847180e94bf2c57644ae2f8f571cdb4a84eac991
2019-07-31 16:17:20 +09:00
b945367c90 pmu: add ihk_mc_perfctr_value function
Change-Id: I88d25586dd470737a3eac4c3a4f1955ae6e41d64
2019-07-23 16:20:17 +09:00
0f434288e1 pmu: change to atomic register access.
Change-Id: Iebbdb8ca97e7a73f9d74138650ae18ce3a0f2605
2019-07-23 16:20:16 +09:00
b5cd813229 pmu: remove comment
Change-Id: If5819ce6f665c668f1f29724a814770957df0de0
2019-07-23 16:20:16 +09:00
7268942c35 pmu: implement ihk_mc_perf_get_num_counters.
Change-Id: I752103aedd9201fc00bda11228ca0bcf5103f12d
2019-07-23 16:20:16 +09:00
f8cad24a9a pmu: move cpu cycle event type comparison to arch dependent code.
Change-Id: If069f8893fe59e3517569b74b3a27b5267ebac03
2019-07-23 16:20:16 +09:00
2b6b3f31e5 pmu: remove pmc_{init|start|stop|reset} system call
Change-Id: I6eb65ed8c18558418c7aabfee75cd1974f4c03ff
2019-07-23 16:20:16 +09:00
ca19ee434a fix: Bug for perf_event_open error code.(LTP:perf_event_open01)
Change-Id: Ia7c942cb3c94ad5e6a0d8640f321f427cd1cd5f9
2019-07-23 16:20:16 +09:00
bb2589bac4 uti: futex_wait: Use kmalloc area for wait queue
Change-Id: Ida994c87334f9613bbf5cbda45b6b5474fd4c6be
2019-07-23 04:53:51 +00:00
e1c6e17400 uti: Use only general registers in libmck_syscall_intercept.so
Change-Id: I8e8e98bdc7e621aa111c0940d915ebe1775a10c3
2019-07-23 04:53:06 +00:00
207eba93ea uti: syscall_backward: Use kmalloc area to pass syscall arguments
Change-Id: I478a9b40b75f3d1d68c4446810a6236fe2f3a96c
Fujitsu: POSTK_DEBUG_ARCH_DEP_106
Refs: #1320
2019-07-22 03:52:44 +00:00
06af2d62c6 pmu: implement event mapping function.
Change-Id: Iac1ec99152b17a19dba0bf1a35f07724b8abc5a1
2019-07-18 16:39:18 +09:00
3e267e24cb exec: Allocate necessary number of pages to argenv area
Change-Id: I298a0de2f4e34ed774e2db7d90167dbe0d35586e
Refs: #1174
2019-07-17 06:38:35 +00:00
e58e1c6e33 uti: cmake: Add include dir pointing to libsyscall_intercept_hook_point.h
Change-Id: Iaea58725a16722d867cb27ffb4d9347b8756f9f2
2019-07-16 04:25:51 +00:00
fb924ebb9d README.md: update packages and git URL
Change-Id: I895dbece58a0ea69b39d1e07d8a16a22a2fed9a7
2019-07-08 04:24:37 +00:00
ac61577414 test: rusage: Add test private-mapping device file
Change-Id: I8b298ce598c2a5560138a1b694ccc7204d4ebbde
2019-07-05 01:18:35 +00:00
4cee9b1a27 rusage: Add comment on counting COW-source pointed-to by only fileobj
Change-Id: I082f6738dd29257c05e8a0e4b0af23dd8ffab449
2019-07-05 01:15:47 +00:00
b55e164669 page_fault_process_memory_range: Disable COW for VM region with zeroobj
This fixes ostest-mem_limits.001 which tries to anonymous-mmap 95% of
total memory. It reports a failure because:
(1) McKernel tries to allocate physically contiguous area and
    fails
(2) It turns on demand-paging
(3) It tries to obtain a page from zeroobj and fails
(4) It allocates a new page
(5) It performs COW on the page, which is unnecessary

Change-Id: Iddf0548bb9216f9bf91fb03fa21f890e599bfdad
2019-07-04 13:58:22 +09:00
aa66fe2cb1 extend_process_region: Fall back to demand paging when not contiguous enough
This fixes ostest-mem_limits.005 which tries to move brk by 95% of
total memory. It reports a failure because McKernel tries to allocate
physically contiguous area and fails.

Change-Id: I50a61cb7103fdbdbe051f0ae276a79e8e2dcdda3
2019-07-03 07:49:45 +00:00
3b74b0a093 rusage: Move pgsize_to_pgshift to arch-memory.h
Change-Id: Ia10b6e5c7d078d345347a79a3e98c06c16d28d6a
2019-07-02 09:10:04 +00:00
0267a0c8ea procfs: Fix type of number of threads
Change-Id: I7d5d17ae1e619d789cdb843f183be640efdbe9e2
Refs: #1277
2019-06-11 16:51:31 +00:00
b3b7801d51 overlay: fix /proc/PID/task/ corner cases
Change-Id: I17086c684af4c665d0c228b4a65cdb232eccf602
2019-06-07 01:48:10 +00:00
10f1fe76db ARM: set_range_middle(): fix PT deallocation bug
Change-Id: Ic8c1e1193ae33d1ae81e0df362ae1a6944c6c3b2
2019-06-06 01:11:16 +00:00
089b443aaf mmap()/shmget(): use Linux default huge page size when not specified
Change-Id: I8a9e3bed65ac1902adfaeaa254597dd30f540319
2019-06-06 01:09:38 +00:00
e9955a4bba Make heap and stack private mapping
Change-Id: I4306566b3bbbe27d206c5518a2d36d117ba4ca9f
2019-06-05 15:21:20 +09:00
dc52c8a11a crash: use fix kernel mapping instead of module space on ARM
Change-Id: I2d32dac78fc241a89bc98f8c098d4e63c8593e79
2019-06-05 14:31:48 +09:00
bc4629dfb0 ARM: fix performance counters allocation
Change-Id: Ie6c8beacf268462064f59b063d9c7b635c906dc4
2019-06-05 14:31:43 +09:00
99fba2df1c mem: per-CPU allocator cache (ThunderX2 workaround)
Change-Id: I7694524c5e9674a6f7bfcd911f8b0dbbead7df5a
2019-06-03 01:22:03 +00:00
239c95449b x86: add SMP barriers
Change-Id: I7fb36bd3d26fa272697db7c92495ce5fba34aeba
2019-06-03 01:22:03 +00:00
9dfc139eae cmake: kmod: Fix cross compile decision
Consider "arm64" to be "aarch64".
It mistakenly considers cross-compilation when compiled through spack.

Change-Id: I914df482e21517adc1105512ea3d8919ef1577b1
2019-05-22 02:34:55 +00:00
bc81d362b4 madvise: MADV_HUGEPAGE, MADV_NOHUGEPAGE: Fix error check
* Returns -EINVAL except for hugeobj and shmobj
* Fixes ostest-madvise.012 and ostest-madvise.013

Change-Id: Id1f1d6cc0c81edd204228ce5f75b641985e70cee
2019-05-13 05:54:45 +00:00
90b6aec53d get_one_cpu_topology: Fix error-handling
Fix the error handling of the following two functions:
  ihk_device_get_cpu_topology: Returns NULL when not found,
                               valid non-NULL pointer when found
  get_cache_topology: Returns NULL when not found,
                      valid non-NULL pointer when found,
                      minus error number on error

Change-Id: Ied13a61d4ab0c314477c45ea659ff2b798ad97ee
Fujitsu: POSTK_DEBUG_TEMP_FIX_21
2019-04-25 01:53:30 +00:00
0887e0de6d x86_64: mcexec: Remove "#include <asm/prctl.h>" (again)
Change-Id: Iae78954d5b520907cd6a85058e3a9fc1b842999f
Fujitsu: POSTK_DEBUG_ARCH_DEP_77
2019-04-25 10:33:00 +09:00
2c5c47344d x86_64: mcexec: Remove "#include <asm/prctl.h>"
Change-Id: I441f7a1c2e23b927fcd065fefba3ef3617356c18
Fujitsu: POSTK_DEBUG_ARCH_DEP_77
2019-04-25 10:14:19 +09:00
b9f223ceca crash: mcvtop: print proper page sizes for ARM contiguous pages
Change-Id: I2f677e64c743776de491262613b1014fe2bb7a8e
2019-04-23 08:54:26 +00:00
6297181dcd crash: mcps: print both PID and TID
Change-Id: Iafac099b1d953642509711a972962894b6111984
2019-04-23 08:54:14 +00:00
80f964e44f rus_vm_fault(): cleanup and early exit on NULL access
Change-Id: I90b18988989d4e377ed9c35df6b2e6bcdddd13b6
2019-04-23 08:53:59 +00:00
cc07d6e017 mcctrl_get_per_thread_data: Un-inline
Change-Id: I881db244ca551b3ca232918cb0b4245776f17295
Fujitsu: POSTK_DEBUG_ARCH_DEP_56
2019-04-18 02:35:52 +00:00
07c517828d procfs: add number of threads to stat and status
Change-Id: I98dd0868b20e9a1725c7d6e4f8379a4d86769780
2019-04-18 02:20:27 +00:00
75e42badf4 procfs: pagemap: Return EINVAL for unaligned offset
Change-Id: I2297818b0b31790b5452cb6f80dcba4192a7d120
2019-04-12 20:19:14 +09:00
bdccbf7356 MCS: fix ARM64 issue by using smp_XXX() functions (i.e., barrier()s)
Change-Id: I41470c082308c7c1ac91f88db2229958398d2e68
2019-04-10 20:26:13 +09:00
ad3ee26d36 Fix various issues in McKernel crash extension.
Determine V2PHYS_OFFSET dynamically.
Fix x86 hole handling in 64 bit address space.
Fix ARM64 virtual address handling and support separate user-space
and kernel-space translation tables (i.e., TTRB0 and TTRB1).
Fix page table walker's lookup functionality.

Change-Id: I6b281693cdc88bd1b8fe3f4b8f40a6af3ca95cc0
2019-04-09 01:52:49 +00:00
16f8ccb35b mcreboot: do not embed sudo when run as root
Change-Id: I59ebb4c72c12af8600a6d6d0eb13f6459ccf5bc2
2019-04-09 01:52:49 +00:00
3fda54ece8 IHK: support for using Linux work IRQ as IKC interrupt (optional)
Change-Id: I2a0e59a47c229fd9271866199c3c4d30e1ddd7f9
2019-04-09 01:52:49 +00:00
4d252c2bb2 map_fixed_area(): disable debug msg
Change-Id: Id6b3d001d908432c1adb6bba875e158a1424850d
2019-04-09 01:52:49 +00:00
0cf89c5682 Linux lockless linked list implementation
Change-Id: I8bd6ee989cecac269b55b3a0ff10cf8543629001
2019-04-09 01:52:49 +00:00
0d902872a1 x86: fix xchg() and cmpxchg() macros
Change-Id: I6faf0fff8a8595734fca6247634cdae6b86483b3
2019-04-09 01:52:49 +00:00
9b6a88eeeb x86_64: Move arch-specific interrupt vector number to arch-dependent code
Change-Id: Ie3cc631ec351503a619b019432388a827d75334c
Fujitsu: POSTK_DEBUG_ARCH_DEP_75
2019-04-08 01:48:07 +00:00
96b4729cd5 ihk_mc_map_virtual: Release virtual address range on error
It was telling the vmap allocator to release a wrong address range
(physical address range).

Change-Id: I82236ac0086b5da24ac49219166abf363672d838
Refs: #985
Fujitsu: #11
2019-04-08 00:43:55 +00:00
3372bbfd23 crash extension: port for ARM64
Change-Id: I47a4f13e96718e94c08ee8bc3e9b0be38d7a8a55
2019-03-29 07:55:28 +00:00
f17c30da07 do_mmap: give addr argument a chance even if not MAP_FIXED
hugectl relies on that to check if a range is free

Change-Id: I97963eef15c866f642e884b063b5caf5d827c776
2019-03-29 07:52:57 +00:00
9a0eb915fb Test "QLMPI (qlmpi_testsuite)" on arm64
Change-Id: I079fda2231ffb19b41fe86436d51ce9f83436c9b
2019-03-29 07:48:05 +00:00
a5ded1fc06 Add KNOWN_BUGS file
Document known major (e.g. linux crash) bugs that have not been
fixed downstream and might require workarounds on specific
hardware configurations

Change-Id: I51e5d23243afd4489ce1ae25e736afc27b2c8202
2019-03-29 07:47:28 +00:00
de042b2cb2 IPI: use logical CPU ids in ihk_mc_interrupt_cpu()
Also make remote TLB invalidation arch independent,
removes POSTK_DEBUG_ARCH_DEP_8.

Change-Id: I2b0fbcfa2bfe5da07607863e3e772d8e892e8525
2019-03-29 07:45:06 +00:00
2cee82673b test: perf_event_open: Fix test program
Change-Id: Ie5af8fb3ab7452078f2c35ec14c6369d86eedec3
2019-03-29 07:42:05 +00:00
dfb3bef96d irqbalance_mck: replace extra service with service drop-in
Using a drop-in instead of an extra service avoids having to juggle
between both services (especially since irqbalance_mck did not have a
Conflict=irqbalance.service statement)

That way, we only have a single service to check for (irqbalance.service),
and system administrators should find this less confusing if they normally
rely on irqbalance.

The drop-in is also installed in /run so will automatically disappear in
the event of a linux crash or a reboot without shutting down mckernel

Change-Id: I004f4f25d9ca037e411e0bc91f4555db138ecfef
2019-03-27 15:54:25 +09:00
2dc51530f3 mcreboot/mcstop+release: support for regular user execution
Change-Id: I9088f9c49bea13826bbab6348aa5560e6d91071b
2019-03-27 14:31:08 +09:00
13758417c5 Make boot scripts arch independent and move them to scripts
Change-Id: I3f4c3e366b325df17208a41d5f842c1a2a888494
2019-03-26 09:47:38 +00:00
c32edff2bb uti: rename x86-specific 'fs' to 'tls' + arm implem
Note: the original fujitsu implementation didn't rename the various
save_fs function/desc to save_tls for some reason, might as well go all
the way though...

Change-Id: Ic362c15c8b320c4d258d2ead8c5fd4eafd9d0ae9
Fujitsu: POSTK_DEBUG_ARCH_DEP_91
2019-03-22 16:38:29 +09:00
8356ef6c96 arm64: uti: Add arch-dependent helper for context switch
arm64 performs context-switch in kernel space instead of user space as in
x86_64.

Change-Id: Ib119b9ff014effb970183ee86cfac67fab773cba
Futjitsu: POSTK_DEBUG_ARCH_DEP_99
2019-03-22 06:52:21 +00:00
63d500515a mcexec: fix printf format warning
Some old commit before -Werror was enabled got merged,
blocking other builds. Quickly fix before anyone notices

Change-Id: I5a034cef6f79e3e99b381bb1a5d97088e33a6718
2019-03-22 05:25:34 +00:00
791e8c2114 Remove mcoverlayfs code
mcoverlayfs code is now unused (technically should work on top of the
soft emulation but not well tested, and untested unused code is bad).
Remove it.

Left the unshare/bind_mount_recursive code in mcexec in a new
MCEXEC_BIND_MOUNT ifdef (only in config.h.in directly to discourage use.
it disables the ioctl as well, but the main code is still compiled to
keep up to date with linux api changes... although it's using kallsyms
lookup so it does not validate much more than "the symbol still exists")

I honestly think this should go as well (people who would want to use it
are root and could do it manually), but will give up for now.

Change-Id: I832b6a8ab19e24ed67a1a5044b1c6c32381ae0aa
2019-03-22 05:18:43 +00:00
0bb612caea Fix test of getrusage fixes
* fix: Bug for getrusage return incorrect ru_maxrss
* fix: Bug for getrusage(RUSAGE_CHILDREN) return parent info
       (POSTK_DEBUG_TEIX_72)
* fix: Bug for getrusage often return incorrect ru_stime

Refs: #1032
Refs: #1033
Refs: #1034

Change-Id: Ifba95e4cb48ae551839819eb3abe26b37da4b196
2019-03-22 05:15:00 +00:00
5e992bc195 arm64: test: Add Makefile that was ignored commit.
Target commit:
  Test "Direct access to McKernel memory from Linux." on arm64
  Test "Scalable Vector Extension (SVE) support." on arm64

Change-Id: Ia9dc97c5cf0c4cf223423b4257745ea2101bee1d
2019-03-22 05:08:25 +00:00
08f817a654 page fault: clear writable bit for non-dirtying access to shared ranges
Change-Id: I3f3212b2aac79587f04450dfbdee9cb8a56bee04
Fujitsu: POSTK_DEBUG_ARCH_DEP_21
2019-03-22 05:03:03 +00:00
b87ac8b8c0 reproductible builds: remove most install paths in c code
In order to speed up test bot work it would be helpful to check for
identical build outputs and skip tests if required.

This removes most use of the install path in c code:
 - ql_mpi uses /proc/self/exe and looks for talker/server in same
directory as itself
 - mcexec looks for libihk.so in /proc/self/maps and use that path for
LD_PRELOAD prefix path
 - rootfsdir is not used right now but until a better fix happens just
hardcode it, someone who wants to change it can set it through cmake

There is one last occurence of the install directory, MCEXEC_PATH in
mcctrl's binfmt code, for which the build system will just overwrite it
to a constant string at build time instead of trying to remove it too
hard. It would be possible to pass it as a kernel parameter or look for
mcexec in PATH but this is too much work for now.

Change-Id: I5d1352bc5748a1ea10dcae4be630f30a07609296
2019-03-22 05:01:32 +00:00
a48a2cd3e8 add definition of util_register_desc system call number
Change-Id: I2047d33b5667761ce8399bad78eff6ab668b6ce4
2019-03-22 04:58:24 +00:00
7c238c27c9 uti: Check syscall number definition in hook()
Change-Id: I24d226199d03d23a12710ff1cad9fef29a6feedd
2019-03-22 04:58:04 +00:00
de77d2b061 add syscall_intercept.c to the mck_syscall_intercept
Change-Id: Iff8cfd2868118b6a9db7e24e4f00537251d1346c
2019-03-22 04:55:18 +00:00
52f89cf8fa add system call execution for uti
Change-Id: Ide79726b79964e72596ed78c87ec61d1eaf7e1c7
2019-03-22 04:54:34 +00:00
c96dfb0c68 mcstop: add -k to kill processses using /dev/mcos* before shutdown
Use lsof to check for processes that still open /dev/mcosX at shutdown
time.
If lsof is not installed then the check is just not done (empty PROCS
result)

If -k is not passed, print a message listing pids of users and exit
(taking bets someone will use that and sed to kill out of mcstop+release
and rerun the stop script instead of passing -k at some point)

Change-Id: Idba7486fdede4990d9885d23f8077f33839daeed
2019-03-22 04:33:33 +00:00
21c9e57646 page fault: use cow for private device mappings
Private device mappings still need copy-on-write to work, even if
there is no page.

Change-Id: I96e3e1eea81104f6b09bb7fda1105d9eeb489155
Refs: #1254
2019-03-22 04:30:55 +00:00
312b6c171b README.md: update package names
Change-Id: Ie4d37d724e60e8e473cb60db8a77b5b3a9681f4e
2019-03-19 02:20:38 +00:00
2ce695b47b proc: resurrect /proc/PID/stat and fix a few fields
Change-Id: I8ffcfde4db78c66ea10845a0451ae2610261f832
2019-03-18 20:33:29 +09:00
e5c1fdf129 MCS lock: make implementation arch independent
Change-Id: Ie5b2182555bbe1a11a005988db069d4b38f85401
2019-03-18 09:53:30 +00:00
9e3dd53c58 arm64: sve: coredump bug fix in non-sve environment.
Change-Id: I4cba5580b6367c67bef457c0273e9b70ad4a0756
2019-03-18 08:12:37 +00:00
fe53c6e0a5 Test "Process swap (swapout)" on arm64
Change-Id: I1eecb046575480966febbcb55e5f4ade6313275b
2019-03-18 08:12:14 +00:00
e988bfaf50 test: uti: Elaborate descriptions of CT12-20
Change-Id: Idfaa5fc3bfc7b65e24873f0c5e15c31a9d129420
2019-03-18 16:59:07 +09:00
f6f48b1210 Test "Direct access to McKernel memory from Linux." on arm64
Change-Id: I6e862146c3b591e671c526302bb1aad787f6bb83
2019-03-18 06:26:43 +00:00
70b42fde5d arm64: cmake: Add -mgeneral-regs-only option.
Change-Id: I0cbdc65c4b95195831344f4006bfc85b1ea58139
2019-03-12 17:26:18 +09:00
ccb36a5849 cmake: change how warning flags are added
Setting CMAKE_C_FLAGS_DEBUG does not work as first expected:
 - set(... CACHE) didn't do anything because the variables were
initialized previously
 - We could set with FORCE but then users could not change the value
 - There is a way to only do that on initial cmake run but it has the
same problem

Thus, use a new regular cache variable directly instead

Change-Id: I20741fb385c171c6c1088bbd6c25666067e07288
2019-03-08 17:22:20 +09:00
ea7f517e3d arm64: ptrace: Fix overwriting 1st argument with return value
Since arm64 shares the return value with the area of
the first argument, rewriting the return value before
the system call execution completes destroys the first argument.

Change-Id: I959944879254d8dd3a29489a65d8f274d45338e6
Fujitsu: POSTK_DEBUG_ARCH_DEP_110
2019-03-08 08:06:19 +00:00
ac18a24a27 arm64: fix phys_to_virt() calculation to be the same as Linux.
Change-Id: Ibbe17d33fd80eacff990b053fa17d8d320c227f1
2019-03-07 16:51:18 +09:00
8880710fad README.md: few minor updates
Change-Id: I7207ab2cf6ca5b69b464e0c41d2dd0ce3e80b674
2019-03-07 13:12:39 +09:00
03a85825ed copy_user_pte: base memobj copy on range & VR_PRIVATE
Some memobjs (e.g. devobj) will not be considered 'in memobj' by
page_is_in_memobj.
Instead of trying to play whack-a-mole with the non-fileobj memobjs,
base the copy check on range's memobj and VR_PRIVATE (do not copy
MAP_SHARED mappings, so the fault handler will do the right thing™
when required)

Change-Id: Ic32cdc7766754f6559753b34845eb8c5cff6ed13
Refs: #1255
2019-03-06 17:44:11 +09:00
940eeca6f5 x86 spinlock trylock: make next initializer old-gcc friendly
old gcc versions are stupid with nested structs and need us
to initialize .tickets.head and .tickets.tail in one go

Change-Id: I0d4caf8236066e7edf4a12e3270114132ced9585
2019-03-06 06:30:30 +00:00
19b02cf4ed arm64_cpu_capabilities: flatten struct
The midr_* part of the struct was never used, and confuses older gcc
with partially uninitialized assignments that were not correct.
Just flatten the struct

Change-Id: I7a9cfe064ab97cdcd5ac50ce4fb713c4d7983bd3
2019-03-06 06:30:30 +00:00
76a0cc71fc warnings: fix broken -Wmaybe-uninitialized
These variables cannot be used uninitialized, and newer gcc versions
correctly do not bring the warning up, but this will shut up older ones

Change-Id: I2b2ea9b557196a3e7eea1e04dd1f160bd12d6e54
2019-03-06 06:30:30 +00:00
ab39798181 send_syscall: remove unused variables
Change-Id: I0a350b8c7dbf27960544dd3651941d3905f93fc6
2019-03-06 06:30:30 +00:00
0cc3496747 warnings: fix missing field in initializer
use generic struct zero initializer instead.
Older gcc used on arm also seem to have trouble with '{}',
so use '{ 0 }' instead

Change-Id: I83d43b05f8d1d44e1dd86502b48e28fe242e1db2
2019-03-06 06:30:30 +00:00
10cca81401 arm64 vdso warning: fix non-void function not returning
arch_setup_vdso() needs to return something even on panic to please gcc.
In theory, flagging panic() with __attribute__((noreturn)) should work
just the same and is a much better solution but for some reason on older
gcc versions setting the flag leads to the weak memset() symbol not
being found !?

Change-Id: Ifed100df5440ca24bb495817db9afc79f0ba6751
2019-03-06 06:30:30 +00:00
0c79de67b4 warnings: disable override-init for arm perfctr arrays
The arrays first init every fields to invalid op then override a few
fields, since this is not something we want to allow everywhere use
a GCC pragma to only ignore the warning there.

Change-Id: I498546fe60d60d4b000d711e22e04c8c360b5b83
2019-03-06 06:30:30 +00:00
3fbad79afb warnings: init pte in process.c functions
pte_make_fileoff() on arm does not always init the pte, so just
init it to PTE_NULL firsthand

Change-Id: If195c1aef5b1344f13f6c0c76bb431a5fa339265
2019-03-06 06:30:30 +00:00
1b76aaa7e1 unused function warnings: add inline to static function in header files
Change-Id: I5d9bb539712a2b3e51c3ab3433a04fbb0cb0b961
2019-03-06 06:30:30 +00:00
aa3c5e91db arm64: Direct access to Mckernel memory from Linux.
Change-Id: I1a096aa5232c56382ae19d8c4e4f41d4e3e9f660
2019-03-06 14:53:16 +09:00
20d5900c35 mcstat: fix ihklib.h location
ihklib.h moved since it is no longer a generated file

Change-Id: I1ad6ff4bb8ae8c536d9ad7ee3cbeaf670ebcd11c
2019-03-01 06:24:39 +00:00
414cffd95b tests: remove calls to ihk_os_create/destroy_pseudofs
Change-Id: I04910c6a258c841437463e098fb8e02116c4f711
2019-03-01 06:24:04 +00:00
9ec0aeeab5 debug.h: merge both instances into ihk/debug.h
We do not need two debug.h files.

Take Fujitsu's STATIC_ASSERT over BUILD_BUG_ON because it is more used

Change-Id: If04c17fbb7406ab15fe86267fed8d6da460cec62
Fujitsu: POSTK_DEBUG_ARCH_DEP_9
2019-03-01 05:10:35 +00:00
06e96005a6 mcexec: restore --enable-vdso/disable-vdso for x86
Fujitsu added this ifdef together with ifndef __arch64__ and thus disabled
the option for both archs in practice; it probably does not hurt to restore...

I'm not sure I see the point of disabling the option at mcexec level though,
but who am I to care.

Change-Id: I0d4bffb6ed325edac8ae577773e19c0fff6ca2ed
Fujitsu: POSTK_DEBUG_ARCH_DEP_53
2019-03-01 05:08:45 +00:00
4606714c07 process stack: use PAGE_SIZE in aux vector
Don't ask me why this shares POSTK_DEBUG_ARCH_DEP_50 with the ksym lookups...

Change-Id: Ic3db2cd77ca88be361cefec85d8ed9deb21ffcd8
Fujitsu: POSTK_DEBUG_ARCH_DEP_50
2019-03-01 05:08:16 +00:00
a5d5baf8a8 rus_vm_fault: always use a packet on the stack
There are valid use cases where a remote page fault has no available
thread data/packet available to use, e.g. when device driver threads
need to access the data (BXI).

Do the per thread data lookup to use the right channel/tid if available,
and use mcctrl_ikc_send_wait with a new message number directly.

The fault is no longer handled in mckernel syscall forwarding code but
in the ikc handler directly in irq, this should be ok because page
faults are interrupts anyway so the code should be irq-safe.

Change-Id: Ie60f413cdaee6c1a824b4a2c93637899cb9bf9c9
2019-03-01 05:08:03 +00:00
8074445d59 README: fix background link in toc
Change-Id: Ief448fd99fddc310ea7f311798c94d0423ebf93a
2019-03-01 05:00:47 +00:00
6a456f11aa cmake: remove unused build-time symbol lookup
Everything already uses kallsyms_lookup_name or similar, this
was leftover from when the build system was ported ages ago

Change-Id: I09dd0249845df90ab2e0adc28d0eb285c0ebb64b
Fujitsu: POSTK_DEBUG_ARCH_DEP_50
2019-03-01 13:49:01 +09:00
81e665cb48 init_process: add missing initializations to proc struct
Change-Id: I4ea386ba3a8745202745bd8e35cab00c38262f65
Fujitsu: POSTK_DEBUG_ARCH_DEP_63
2019-03-01 04:39:59 +00:00
e0b9c5deec nanosleep: add cpu_pause() in spinwait loop
Probably some energy consumption saving?

Change-Id: I888f50568db8f08751abd0a002137c3b475362dc
Fujitsu: POSTK_DEBUG_ARCH_DEP_43
2019-03-01 04:38:51 +00:00
62772c8a24 gencore: Allocate ELF header to heap instead of stack
coredump() proceeds as follows:

1. coredump() calls gencore()
2. gencore() allocates ELF header to stack
3. gencore() prepares the core table and record the address of the ELF
   header to the table and return to coredump()
4. coredump() offloads __NR_coredump with the address of the core
   table

This fix prevents the ELF header from getting destroyed in the 3rd
step.

Change-Id: I770418c1658a6fdb640bb491fc076a31dfd41c22
Fujitsu: POSTK_TEMP_FIX_39
2019-03-01 04:38:28 +00:00
63d15f7dfc CMake Kbuild: fail at cmake time if KERNEL_DIR is missing
Change-Id: I66660718841d05003b87995d68bec728aa0db9ba
2019-03-01 04:38:05 +00:00
fb3f1c58a8 rpm: ignore CMakeCache.txt in dist and relax BuildRequires on cross build
CPack takes the source dir as is, so if it was used to build something
it will incorrectly grab the temporary CMakeCache file and cmake will
complain during rpmbuild later on.

The BuildRequires should be a separate patch but logic behind the change
is that the dependencies need to be installed in the sysroot, and
rpmbuild cannot test this, so just move them all to only enforce
BuildRequires for native build.

And while we are here, also add a new kernel_dir specfile option.

Change-Id: Ie67932798f632e6d307f8ead93bdbe043e6e8898
2019-03-01 04:37:46 +00:00
69846345de gencore: Zero-clear ELF header and memory range table
Change-Id: I0ff38c1b0e1e6ef204cb3605c0178848dbe40bfb
Fujitsu: POSTK_TEMP_FIX_63
2019-03-01 04:36:00 +00:00
b8155cc618 ihk submodule update: cpu/mem ioctl user access fix
Change-Id: If230c1012af5c1220e5927efba97a2ae38da42a0
2019-03-01 02:12:39 +00:00
f07e20a381 copy_user_pte: vmap area not owned by McKernel
Refs: #1166
Fujitsu: POSTK_DEBUG_TEMP_FIX_14
Change-Id: Iae0f1145d58ec2c14cecc14409b08a1db3b067b7
2019-02-28 07:50:16 +00:00
764948b51f test: Fix test programs for #1195
Add chmod 666 /dev/mcos0 for fork after setuid()

Refs: #1195
Change-Id: I2bec6a9a8378d246f50a9fc08a345b3235096a06
2019-02-28 00:57:22 +00:00
7da5fede8b Test "Scalable Vector Extension (SVE) support." on arm64
Change-Id: I3abaca932985a06b06887b962e769f2eac96c738
2019-02-27 06:26:00 +00:00
6810506c3d rusage: Fix available page sizes
Change-Id: I418075ff4b5341e0f5c7ff317e96461879a60f87
2019-02-22 14:08:18 +09:00
c82c2c1231 uti: Redirect uti thread futex() to McKernel do_futex()
Change-Id: I8203d0b60236e3ec72e22615a52907e1fff2c73c
2019-02-22 04:14:14 +00:00
5bc54a3bbe Fixed time processing.
- arm64: Get TSC corresponding to boot time from IHK.

- x86_64: Calculate the current time using vdso.

Refs: #1186
Fujitsu: POSTK_DEBUG_ARCH_DEP_52
Change-Id: I293ba4bbe5390d50dea44b8a5b7471f59237daff
2019-02-22 04:13:13 +00:00
07aa96ef95 arm64: Scalable Vector Extension (SVE) support.
Change-Id: I3568687913f583edfaa297d5cf5ac91d319d97e9
2019-02-22 04:07:29 +00:00
dac99f708c test: Add test programs for #1195
Refs: #1195
Change-Id: I21339f2597caf1704cc7d104e4bc5835d5270af6
2019-02-19 16:29:00 +09:00
f3c9fbf4ea rusage: Don't count PF_PATCH change
Fujitsu: POSTK_DEBUG_TEMP_FIX_86

Change-Id: Ia23f2d95c67062be3390acafad3e87f087466cdc
2019-02-18 14:50:56 +09:00
54122360e8 CMake: move CONFIG_ARM64_64K_PAGES and VA_BITS up to main CMakeLists
user code also needs these defines; there was a hard-coded
definition left out from debugging that didn't get cleaned up

Change-Id: I951fcd6a3d6bc1d1f1c3e897058908167520f7bc
2019-02-18 10:09:21 +09:00
21cf953a03 x86: disable zero mapping and add a boot pt for ap trampoline
the application processor trampoline needs the trampoline physical
address to be mapped for the few instructions between loading the
page table and jumping to normal memory area; setup a new pt for them.

Also make it use its stack where it needs to be directly.

With that, x86 can finally remove the 0 page from its init mapping

Change-Id: Iab3f33a2ed22570eeb47b5ab6e068c9a17c25413
2019-02-14 07:59:03 +00:00
c59d8db1b3 CMake: define RHEL_RELEASE_VERSION in config.h for non-rhel kernels
Change-Id: Iaa48e763be71e9cbc8dff6335810d3191bb3c177
2019-02-14 16:44:09 +09:00
abc0a7bdac mcs_rwlock: remove aligned(64) attribute if ENABLE_UBSAN
The attribute would impose 64-bytes alignment that we do not
respect later because the whole structures (e.g. process/thread)
are allocated at 32bytes boundaries with kmalloc

These are however justified for performance reason as we do not want
them on same page cache line, so just accept slower performance for
UBSAN only

Change-Id: Ia28968257675b7ae97b0391471986e6bf6485b7b
2019-02-14 16:44:09 +09:00
2f456b8752 cmake: Add ENABLE_UBSAN for -fsanitize=undefined
Change-Id: I73db5f904a7d86052aae62e67b01281763c83561
2019-02-14 16:44:09 +09:00
2a63c962fc build system switch to cmake
Remove old build system at the same time

Change-Id: Ifdffe1fcd4cfece05f036d8de6e7cb74aca65f62
2019-02-14 16:44:09 +09:00
4bdd9cf512 ubsan: remove most sprintf calls
sprintf is implemented as snprintf(..., INT_MAX, ...) which will overflow
the argument pointer for the end, then fix the end to be -1.
This technically works but we know the actual buffer size in all these
call sites, might as well do this properly

Change-Id: I807d09f46a0221f539063fda515e1c504e658d40
2019-02-14 16:44:09 +09:00
bc2a444828 ubsan: fix undefined shifts
A signed integer cannot be shifted in a way that will flip the
sign bit; make such arguments unsigned to be safe

Change-Id: Iafc060f98f899ae3ffb876ba22fdd6183fbb6e57
2019-02-14 16:44:09 +09:00
d9b2924249 Update patch for "Add test programs for large page"
Change-Id: I6ee96b677c65c5bf4b2312059abd689225c0581d
2019-02-14 16:26:20 +09:00
501531f3b3 shmobj: Don't page_unmap() when count isn't one in shmobj_destroy()
Change-Id: If9d567d61e1dc4db808a2aeee290034acf7be4b5
2019-02-14 16:26:19 +09:00
366e95856c Null-check ihk_os_t and mcctrl_usrdata pointers
Change-Id: I941c58d4ab6a0c1ce6bd53c24b552218a1716750
Refs: #1216
2019-02-14 16:26:19 +09:00
bdf5175d4c invalidate_one_page: Support shmobj and contiguous PTE
Change-Id: I15b74ee4afd8e2dc52c933925aae4a1e0d8bcc72
2019-02-14 16:26:18 +09:00
b174fb8099 move_pages: Check flags argument
Change-Id: Ia74aa463a060ecd43aa56ee08d622421f227dbfe
Fujitsu: POSTK_TEMP_FIX_78
2019-02-14 16:26:16 +09:00
e828398c8b do_mmap: don't pre-populate the whole file when asked for smaller segment
The linker maps parts of libs with different access flags,
so we cannot prepopulate the whole file.

[dominique.martinet@cea.fr: moved min and friends in compiler.h]
Change-Id: Ifbeddc0908699099cfae5ce9cc2adc578221db31
2019-02-14 16:26:15 +09:00
641d9f1b39 clear_range_l1, clear_range_middle: Fix handling contiguous PTE
Change-Id: I2609c94d7f9342fe25aa9a5cfc208375274d46fa
2019-02-14 16:26:14 +09:00
c1270cdf6d fileobj, shmobj: free pages in object destructor (as opposed to page_unmap())
Change-Id: I3ea50fc13ae5c090ba32aad4461f9741a4c35665
2019-02-14 16:26:00 +09:00
022e04b62b shmobj: Clean up code around memory_stat_rss_sub call
Change-Id: I6f678568c3c27799cd2a81f5574b96fd218e942f
2019-02-14 16:26:00 +09:00
9cfc373538 Refactor "do write back only MAP_SHARED pages"
* free_process_memory_range() always passes memobj to
  ihk_mc_pt_free_range()
* clear_range_*() don't flush page in fileobj with MF_PRIVATE flag

Fujitsu: POSTK_DEBUG_TEMP_FIX_87
Change-Id: I8d46d029b3fc51ca6f0e59d748a2fe93e324a374
2019-02-14 16:25:58 +09:00
fb24dcea2e unhandled_page_fault: Refactor architecture dependent parts
Fujitsu: REQ-12
Refs: #1012
Change-Id: I3c61f9cd3f514bdcd4a7f26e7c15043529269cf5
2019-02-14 16:25:57 +09:00
207d653b41 mcctrl: use vmf_insert_pfn for kernel >= 4.18
vmf_insert_pfn got added as a wrapper around vm_insert_pfn in 4.17
1c8f422059ae5da ("mm: change return type to vm_fault_t") and totally
replaced the later in 4.20 ae2b01f37044c ("mm: remove vm_insert_pfn()")

Compare with 4.18 here specifically to avoid troubles when rhel
backports this change later, and avoid adding a rhel version check down
the road.

Change-Id: Ibf108e2fb6f1199f89cde6a7973f4eb55447260b
2019-02-14 16:25:49 +09:00
0a49b6eca5 Add test programs for #1190
Change-Id: Icb63e898d5882e1fab18e6af7859af50448a1d60
2019-02-14 16:25:44 +09:00
950ea678dd Reject "setfsuid: Specify mcexec tid when asking mcexec for fsuid"
This fix is rejected because it only makes the setfsuid test in ostest
pass and doesn't fix the other issues including the one in which file
I/O could be done with the old fsuid because an mcexec thread with an
arbitrary tid could handle the system-call offload request.

Explanation of the rejected fix:

  setfsuid() proceeds as follows:

  1. McKernel asks mcexec for __NR_setfsuid (set)
  2. mcexec calls setfsuid, reports the id to McKernel
  3. McKernel asks mcexec for __NR_setfsuid (get)
  4. mcexec calls mcexec_getcred(), reports the id to Mckernel
  5. McKernel sets proc->fsuid to the obtained value

  tid of mcexec on the 2nd and 4th step could be different. So this
  fix lets mcexec report its tid on the 2nd step and McKernel specify
  it in the 3rd step.

Change-Id: Id5cfeed18c64430d576a56e961bbca1ecb2e39ad
Fujitsu: POSTK_DEBUG_TEMP_FIX_45
2019-02-14 04:42:32 +00:00
cd42d186b7 uti: Report error of offloading ioctl if any
Change-Id: If4218b9fb89f34728c4aaf81bccab2dfbb0d4a87
2019-02-14 04:15:44 +00:00
66bc44f88a Readme.md: move figures to R-CCS server
Change-Id: I6a861c15402c8e925e3692b912a8df3f6f0ffce9
2019-02-13 18:26:18 +09:00
34a995d290 perfctr_stop: add flags to no 'disable_intens'
The original fujitsu code added a whole new ihk_mc_perfctr_stop_first
function, duplicating a lot of code - add a flag to existing function
instead.

Change-Id: Ic9ce0236d68f967ff72cf88e5d9f1bda5c98aa1b
Fujitsu: POSTK_DEBUG_ARCH_DEP_107
2019-02-12 05:18:22 +00:00
d0d99adfb3 Readme.md for github
Change-Id: Ib5aa5cde10acb5f5956212f8c451baedc940d123
2019-02-12 02:37:09 +00:00
d78883c692 fix to missing exclusive processing between terminate() and
finalize_process().

The process of making a child process zombie and the process of setting
the parent of the child process to process ID 1 are excluded.

Refs: #1257
Change-Id: Ic95d4d8ee92d6a4a63847e5eda20ec1ba92566ac
2019-02-08 10:25:20 +09:00
ff0395581c Register PPD and release_handler at the same time.
Fix that process will remain even if signal is received between PPD
registration and release_handler registration.

Refs: #1201
Fujitsu: POSTK_DEBUG_TEMP_FIX_64
Change-Id: I571781963578df8cedb327f19298f595cfb137a3
2019-02-08 10:20:58 +09:00
f5023c9730 page fault handler: protect thread accesses
current cpu's thread can be NULL during init, we don't want null derefs
in the page fault handler

Change-Id: I0a2c22b39cae2c258d211317cffc2408e19f3bbf
2019-02-07 02:41:50 +00:00
fe08ac4a67 arm: turn off cpu on panic
Since interrupts are disabled on panic, linux cannot reset a
panic'd core when NMI are disabled (for e.g. mcreboot/mcstop)

Just always offline it, so linux can get it back

Change-Id: If8107172375f2924e02bd4c36e24645ec38a8999
2019-02-07 02:37:31 +00:00
60dcd0e798 move rusage into kernel ELF image (avoid dynamic alloc before NUMA init)
Change-Id: I7fe86244c8707694b379e567b31de65ee2c56887
2019-02-07 10:43:47 +09:00
4d215de641 Separate mmap area from program loading (relocation) area
We need to separate the two because the heap of a PIE is created in
the area to which it is mapped.

Related commits:

b1309a5d: PIE is mapped at map_end instead of at
          user_start
c4219655: Interpreter is mapped to map_start to make a
          system call that dereferences a NULL pointer fail

[dominique.martinet@cea.fr: Also add ULONG_MAX and friend macroes,
 used for data_min]
[ken.sato.ty@hitachi-solutions.com: fix execve]
Change-Id: I8ecaf22b7965090ab67bebece57c68283ba23664
2019-02-07 09:58:03 +09:00
97e0219f50 Make Linux handler run when mmap to procfs.
Change-Id: I98a3d098c5c676f33c83fa4354c623988ee591f2
Refs: #1222
2019-02-06 11:54:50 +00:00
f9d8d98af1 sysfs: add missing symlinks for cpu/node
Add the following patterns of symlinks:
 - /sys/bus/cpu/drivers/processor/cpu*
 - /sys/bus/node/devices/node*

And slightly change how /sys/devices/system/cpu/cpu*/node* are created
to avoid duplicate lookups

Change-Id: Id94a4d157da06d75f6bd450d5bd9a9e7709a1414
2019-02-06 09:55:54 +00:00
3738b70ad3 git hooks: fix submodule check sloppy match
Submodule check used to match any file containing submodule name (e.g.
lib/include/ihk/foo would match ihk and incorrectly be identified as
a submodule change) -- properly check for full name with anchors instead

Change-Id: Ib4330aec97e9da713cd3ab9e791962f2e0c8d396
2019-02-06 08:34:27 +00:00
9bf225d193 mckernel overlay: replace mcoverlayfs with a soft userspace overlay
mcoverlayfs has a high maintenance burden and does not work on rhel8's 4.18
kernel (while it works on vanilla 4.18...); instead of debugging this further
time is better spent making it independent from overlayfs.

Change-Id: I7454ae95b0fbb3373c256aa2fd83cdfec466c009
2019-02-06 08:27:25 +00:00
6fc9ec1c92 gencore: finish reintegration into arch-independent code
Change-Id: Ic2fc935aeec17c54931817bf43f67ef6da78adc8
Fujitsu: POSTK_DEBUG_ARCH_DEP_18
2019-02-06 17:23:54 +09:00
112ade484a page_table: Fix return value of lookup_pte when ptl4 is blank
Change-Id: I5926fedda182941a4b7a2fe480bffb12d4069713
2019-02-06 07:30:44 +00:00
be708674d3 Reject "do_migrate: Send IPI"
Change-Id: If77a51c9bc6a3caef502dd35a276b0dba22b4d24
Fujitsu: POSTK_TEMP_FIX_57
2019-02-06 04:11:16 +00:00
557f33a705 eliminate futex_cmpxchg_enabled check (not used and dereffed a NULL pointer)
Change-Id: I97b0e79acfd51b57eeaa6556eba880d231330f01
2019-02-06 02:47:31 +00:00
7dd0cbd9a6 ARM: eliminate zero page mapping (i.e, init_low_area())
Change-Id: I89bcce7fb286a4c5983a768534a0d3cea093040c
2019-02-04 04:22:24 +00:00
6ed2e5ffc1 Fix ThunderX2 write-combined PTE flag insanity
Change-Id: I59999a680b556acf3e22ac516f4758e3aee7f355
2019-02-01 21:03:19 +09:00
649059f2d2 contiguous PTE: Fix requested page-shift check
Change-Id: Iafc505457f7e10c94142070113870cd8b8c6922d
2019-02-01 21:01:27 +09:00
312c1168f3 test: XPMEM: Fix Makefile
Change-Id: If7b5887e9dc4d7f94bf18dc5ae95a549baa5fb58
2019-02-01 15:15:47 +09:00
d29419d336 test: Add test programs for #1242
Change-Id: Ib3b5d5b661e0cd027711a815d9da2e308cedeffc
Refs: #1242
2019-02-01 15:15:46 +09:00
9f7425c152 Add test programs for lage page
Tests arm64 specific, contiguous bit based large pages as well.

Change-Id: I09edad8cfde6c23a259f1f32cfc97974d9cb63c3
2019-02-01 15:15:44 +09:00
100754f556 test: add uti tests
Change-Id: Ib59f1c4dab7cec7e67ba35ec1988f6f968a2deaa
2019-02-01 15:15:14 +09:00
6d38c34993 Merge branch 'postk_topic-contiguous_pte' into development
* Merge cd7ab307fae9bc8aa49d23b32becf37368a1603e
* Merge commit is changed to one commit for gerrit

Change-Id: I75f0f4cf6b8b3286284638ac2c7816c5257551e4
2019-02-01 15:15:12 +09:00
7f1c17fc4c tests: add 'postk_master' branch tests
Change-Id: Ie0d4cfd0921aed89d2db6083c9eb068b1cfc1984
2019-02-01 15:15:00 +09:00
25ef4e9261 Merge branch 'postk_master' into development
* Merge 53e436ae7db1ed457692dbe16ccb15511aa6bc64
* Only arm64 stuff are left

Change-Id: I6b79de1f659fa61e75f44811b639d41f9a37d6cc
2019-02-01 15:14:58 +09:00
d4d78e9c61 Following arm64-support to development branch
This includes the following fixes:
* fix build of arch/arm64/kernel/vdso

Change-Id: I73b05034d29f7f8731ac17f9736edbba4fb2c639
2019-02-01 15:14:45 +09:00
e52d748744 new_mcos_handler_info: Propagate kmalloc failure
Change-Id: If484cf32cd0bf096ffd712561dd1f73046c60cd8
Fujitsu: POSTK_TEMP_FIX_64
2019-02-01 15:11:36 +09:00
39b21e7ba9 monitor_init: Use ihk_mc_cpu_info()
Its call site is moved before numa_init() as well because
monitor_init() defines ihk_os_monitor that was used in
rusage_total_memory_add() called from numa_init().
I didn't revert this modification because I don't want to touch the
working code.

Change-Id: I602467284581ce45989dd071cfe59d3fc4827e29
Fujitsu: POSTK_DEBUG_TEMP_FIX_73
2019-02-01 15:11:33 +09:00
8db2d3beec sysfs: use nr_cpu_ids for cpumasks (fixes libnuma parsing error on ARM)
Change-Id: I466ffbaf38fe5fd2b1ca0439fa7ea4a813e226ca
2019-02-01 15:08:49 +09:00
f5320fc2b4 overlayfs: make mcoverlayfs compile for 4.14.0-115 (el7 arm64)
Use the 4.18 module as a base

Change-Id: I6c9ef66399800828e1932573da5a97573545c5da
2019-02-01 15:08:47 +09:00
0fbdcc44b9 mcoverlayfs 4.18: re-define ovl_readlink
Apparently /proc needs it; it's normally implemented using get_link if
readlink isn't implemented but proc's get_link crashes the kernel in
this case (because nameidata is only defined for open* paths)

Change-Id: I1864d6c948db879d33ea29b1b281bf84ff8eeec6
2019-02-01 15:08:45 +09:00
351fdead3b kmalloc: Fix address order in free list
The order is expected by the merger.

Change-Id: I54338caaaa1a203ab5dd39a574a25aac324142a5
Fujitsu: POSTK_TEMP_FIX_46
2019-02-01 06:07:26 +00:00
859e976348 kernel/syscall.c: cleanup? pass virt_to_phys directly to do_futex
Change-Id: I196ebe5d5cdc577fce442bcd2247d07e85d2b9ff
2019-02-01 13:19:02 +09:00
49353e252b Added check of nohost to terminate_host().
Change-Id: I796a0d98b68783dad6ce04b3a80ca01db8f8eee2
Fujitsu: POSTK_DEBUG_TEMP_FIX_103
2019-02-01 13:19:00 +09:00
452d93f14d mcctrl_clear_pte_range: fix zap_page for kernel >= 4.18
zap_vma_ptes no longer returns an error code as of Linux's
27d036e33237e4 ("mm: Remove return value of zap_vma_ptes()"),
where they decided nobody is interested in it....

Just copy the check out of the function.

Change-Id: I2eda0f91ec55a34bba96f45cc3d887bc80132a82
Originally-by: Kagawa Kodai <fj1731iw@aa.jp.fujitsu.com>
2019-02-01 13:18:58 +09:00
9e5472bb94 Fix for PAGE_SIZE / PAGE_MASK magic number.
Change-Id: Icc00594d84a33495af774096ae13f830e29be39f
Fujitsu: POSTK_DEBUG_ARCH_DEP_116
2019-02-01 13:18:56 +09:00
516ab87ab9 Copyrights: fujitsu 2018 bump
Separate copyright bumps in a different commit.
A lot of files only had the copyright change at this point; these
were probably changes I added separatly in other patches but just
split these in a different commit instead to simplify git stats

Change-Id: I93cf3fc1c0fa04ee743a79c3fe9768933e6bd0d2
2019-02-01 13:18:52 +09:00
a9884453e2 vmcore2mckdump: make arm-compatible, 'fix' timeout
Change-Id: Icdb42ff47d9dff5c6a818cb8c9ae94d183b19569
Fujitsu: POSTK_DEBUG_ARCH_DEP_93
Fujitsu: POSTK_DEBUG_ARCH_DEP_102
2019-02-01 13:18:12 +09:00
0f01312040 configure.ac: remove duplicate executer/user/arch/x86_64/Makefile
Change-Id: I6b4b8e636f0194e390871600d6502d3cc94f042b
2019-02-01 13:18:10 +09:00
fb9832af6d perf counters: add arch-specific perf counters
arch perf counters are placed at start, so offset all
other counters (because placing arch perf counters at the end
wouldn't have been intrusive enough?)

Change-Id: Ifab1047872384927d9cfa0a0212327ee73545c29
Fujitsu: POSTK_DEBUG_ARCH_DEP_86
2019-02-01 13:18:09 +09:00
0e895478a1 mcctrl rus_mmap: make vma->vm_flags arch-dependent
[Dominique: renamed arch_vm_flags to arch_rus_vm_flags]
Change-Id: I5ec89b3ff80af6bf0ede342eb5816df8c78de348
Fujitsu: POSTK_DEBUG_ARCH_DEP_100
2019-02-01 13:18:07 +09:00
19659aa908 mcctrl: move translate_rva_to_rpa to archdep
Change-Id: I0efa51468a7ff4d776d8340a612e6f44eac2ed53
Fujitsu: POSTK_DEBUG_ARCH_DEP_83
2019-02-01 13:18:06 +09:00
e5de0b81ca ldump2mcdump: move PAGE_SHIFT to arch-dependent includes
Change-Id: I42e49db87e375f2dc094926e21dfc00e50484855
Fujitsu: POSTK_DEBUG_ARCH_DEP_94
2019-02-01 13:18:04 +09:00
f299fff266 stack: add hwcap auxval
Fix the AUXV_LEN to account for hwcap and remove the ifdefs

Change-Id: I303fc2c5fa4c8cea7ec9823f8580b8a66de2f58f
Fujitsu: POSTK_DEBUG_ARCH_DEP_65
2019-02-01 13:17:58 +09:00
206df33658 perfctr: remove ihk_mc_perfctr_fixed_init from api
ihk_mc_perfctr_fixed_init is only used on x86

Change-Id: I6f25d4237d45b4455ccdaae03b850dd9e8edcc57
Fujitsu: POSTK_DEBUG_TEMP_FIX_31
2019-02-01 13:17:52 +09:00
ad8a3ae962 vsnprintf: reject POSTK_DEBUG_TEMP_FIX_28 return value fix
Change-Id: I23beeca094e1b0ee84211f3ed4c33ef7e2aa62c2
2019-02-01 13:16:45 +09:00
3c1fd54a92 kernel/mem: remove unused page_table struct
Change-Id: I3593bc08206d07d7c07421240f08ac3539ddc81d
Fujitsu: POSTK_DEBUG_ARCH_DEP_89
2019-02-01 13:16:42 +09:00
ca34154a43 mcexec: lookup page_size with sysconf
page size is not defiend in sys/user.h on aarch64

Change-Id: Idbdaef2519792eeb1e1a2794be0a34d67e87907e
Fujitsu: POSTK_DEBUG_ARCH_DEP_35
2019-02-01 13:16:40 +09:00
a10f4b861c do_pageout: fix direct kernel-user access
Change-Id: Ie02faca93fdb0d52d72e1f2aa1384a214c84ebff
Fujitsu: POSTK_DEBUG_ARCH_DEP_46
2019-02-01 13:16:32 +09:00
36d473c5b5 pager linux_open/unlink: always use openat/unlinkat
some archs do not have the simple open/unlink variants, while the *at
is always available -- this is simpler than making these arch-dependent
functions

Change-Id: Ic16ae5683e6e375210b1744538d291585e67a2fa
Fujitsu: POSTK_DEBUG_ARCH_DEP_78
2019-02-01 13:16:30 +09:00
342a2e1287 x86 syscalls: add a bunch of XXat() delegated syscalls
at least funlinkat is needed because these macros define __NR_x for mckernel
side and we will use funlinkat in a later commit

Change-Id: I6b6a2eee11e2fa1e42f97eab4b67e1128cd83ddf
2019-02-01 13:16:29 +09:00
238f563e88 perf: add arch-dependent counter_mask_check function
A later version would probably want to check some mask for arm64...

Change-Id: I67e13a852c3ed406fbf8ae1688539b9e069c0e81
Fujitsu: POSTK_DEBUG_ARCH_DEP_87
2019-02-01 13:16:28 +09:00
03cadbcba2 perf: add arch-dependent get_num_counters function
Change-Id: I2230af87e0c764d97115e833dccb1842946c1b94
Fujitsu: POSTK_DEBUG_ARCH_DEP_109
2019-02-01 13:16:28 +09:00
2b254f02f8 init_process_stack: change premapped stack size based on arch
Avoid consuming a large 512MB page on 64K base page arch

Change-Id: Ice491d43fd998b375ddc24f4eff7faf5d36d9f42
Fujitsu: POSTK_DEBUG_ARCH_DEP_104
2019-02-01 13:16:27 +09:00
960a6f5f90 prepare process: add magic header in program_load_desc
Check we mapped the correct region with a magic header in the struct

Original commit: d246b93a3bced92d0ac2a4a337118091b010658a

Fujitsu: POSTK_DEBUG_TEMP_FIX_76
Change-Id: If848be64af5d76844ba65b48493021637c8114f4
2019-02-01 13:16:25 +09:00
0cc3120a01 freeze(): add cpu_pause() to the frozen state loop
I guess cpu_halt is not enough on arm?... I don't get it.

Change-Id: Ic67113ae474e5b3af91734d763f1498a19f6a948
Fujitsu: POSTK_DEBUG_ARCH_DEP_82
2019-02-01 13:16:23 +09:00
9f31abf402 monitor_init: fix undetected hang on highest numbered core
Original commit: 7d38ead4f ("Fix for bug#99 Change setting value for
monitor->num_processors.")

Change-Id: I437c957fa319c014316a6064cc660e337668bb88
2019-01-29 09:32:25 +09:00
dfd23c3ebe prctl: Add support for PR_SET_THP_DISABLE and PR_GET_THP_DISABLE
Change-Id: I04c5568a9eb78bcac632b734f34bba49cf602c4d
Refs: #1181
2019-01-22 05:40:56 +00:00
eb184419ea shmget: Use transparent huge pages when page size isn't specified
Refs: #1241
Change-Id: Ia111bfeb67d224ad1ab77e5193eac7b7d14a6577
2019-01-22 05:40:56 +00:00
13e29c0da5 mcoverlayfs: fix disabled build
Change-Id: Ia40853432547084329fc034e3942e51954e1ddf5
2019-01-22 02:15:43 +00:00
8aaf0f8551 test: Add test programs for #1166
refs: #1166
Change-Id: I9b6dd8628e8a3dcb2281e31f4b8d116e9c7852d8
2019-01-08 15:15:34 +09:00
ef9fda23a9 mcexec: Set default heap extension amount to sysconf(_SC_PAGESIZE)
Change-Id: I3ac660d33918c1fa28093ab59f3a7ead65d337d7
2018-12-12 00:38:10 +00:00
cd5cb469eb Fix "Test "Error handling improvement" on arm64"
Change-Id: Ie3c835dfe65a9754628ca221f3f563b67b0eb1a0
Refs: #727
Refs: #873
Refs: #1011
Refs: #1232
Refs: #1233
2018-12-10 19:58:15 +09:00
7a8f5043c5 mcstat: Fix test description
Change-Id: I942b351146cabd259eb164b73375a547d0fd0c30
2018-12-10 09:27:28 +00:00
cf6514def9 test: Add descriptions to "user_space" test
Change-Id: Ic14ddbfbf6bfc12d40d3284ec08e040597356963
2018-12-10 12:59:20 +09:00
96b6d773a9 ARMv8.2-LPA support
Change-Id: I12a6eac55af2e7f6a643e4e04ed59a85769f4063
2018-12-07 17:41:50 +09:00
4ba4bbd711 ContiguousPTE[12/12] modify sys_shmget/sys_mmap
Change-Id: Icfbe9fbfa6216735ec20c55da95e5b62a25fdfea
2018-12-07 08:27:51 +00:00
410bf13367 ContiguousPTE[11/12] modify ihk_mc_pt_virt_to_pagemap
Change-Id: Iff0c77cdd08a76b55c2635c6b0163ef2caade71d
2018-12-07 08:24:22 +00:00
7c231928ab ContiguousPTE[10/12] modify split_largepage
Change-Id: I0a8385af9709b11d7917eb34e8612413fefe6931
2018-12-07 08:22:56 +00:00
50de3820ad ContiguousPTE[9/12] modify ihk_mc_pt_clear|free_range
Change-Id: I75d821b81d351f4fdfd504c791543db174634261
2018-12-07 08:21:44 +00:00
c4e5bf6d6b ContiguousPTE[8/12] modify page_fault_process_memory_range
Change-Id: I79ecd08cf83aeacd3e20a7720bad66ef19573402
2018-12-07 08:17:08 +00:00
c319fe08a4 ContiguousPTE[7/12] modify ihk_mc_pt_set_range
Change-Id: Ib38530ce64a01f21107e0a6a73de7c54f214eb5a
2018-12-07 08:12:44 +00:00
24d3da32ed ContiguousPTE[6/12] modify arch_get_smaller_page_size
Change-Id: I4fe8c36cf9561b3ee895f29b112f0ac6f2418f5e
2018-12-07 08:00:32 +00:00
c4fbbb6027 ContiguousPTE[5/12] modify lookup_pte
Change-Id: Ie5aa625e5a13596ff8294699d10114aeba9d991d
2018-12-07 07:59:12 +00:00
0449437c15 ContiguousPTE[4/12] modify invalidate_process_memory_range
Change-Id: Ib59f4c5d78580a1c4344ac632d3d8f68355d7058
2018-12-07 07:56:28 +00:00
639d0e496b ContiguousPTE[3/12] modify move_pte_range
Change-Id: I20878c97bea768d1f09ab0580d744a58c070be2c
2018-12-07 07:54:28 +00:00
b6de164e9a ContiguousPTE[2/12] modify copy_user_pte
Change-Id: Ie696245a8c09e87c48426bc3e74a6f049a085471
2018-12-07 07:52:17 +00:00
d1b36aab62 ContiguousPTE[1/12] add page table access functions
Change-Id: I3291c170e66592c871f316d78d71248d26748501
2018-12-07 07:51:01 +00:00
8a2f4be443 Test "user_space" on arm64
Test: Architecture dependent separation of user space access code.
Add arm64 result files.

Change-Id: I651992c0c8bcd1da8313a35eda03612405b55b89
2018-12-07 07:46:09 +00:00
8a684587fa Fix "Test "Error handling improvement" on arm64"
* Fix test to make mcexec fail to fork()

Change-Id: I9a696787b5d4ce44541a4651622e5be60f9ef355
2018-12-07 07:40:14 +00:00
05c315857c Test "Add mcstat tool" on arm64
Change-Id: I4bf1260e999c16fe7b9c339af3833ea007277889
2018-12-07 06:24:18 +00:00
1422838dd1 sysfs-meminfo: Add page size consideration other than 4KiB.
Change-Id: I88e3aa6b9537dfff21c72b4a247fda24873216cb
2018-12-06 18:45:56 +09:00
c9fc110fc6 do_kill(): fix pids table when nr of threads is larger than num_processors
Change-Id: I0f0120c67a9b0df1cdf7d3fed34dd9c656fd317a
Refs: #1235
2018-12-05 08:17:05 +00:00
ed3c138e1f test: Fix user_space, process_vm_writev01 expected value file.
Fix to check only TPASS. (Delete pagesize)
  test: pvw_003, pvw_012

Change-Id: I4f9c3c42b855d419f3db457fbb5e7865da85eee8
2018-12-05 15:51:52 +09:00
60c97d0e60 Test "mbind support" on arm64
Add arm64 result files.

Change-Id: I32e8d4e1346076683e7d55e8e928d168e439eaca
2018-12-05 11:27:03 +09:00
95e90c727e Test "Error handling improvement" on arm64
The following test set:
  execve: fix memory leak
  add: NULL check for master_channel at IKC interrupt_handler.
  Fix the check routine for elf sections (Fujitsu: POSTK_TEMP_FIX_77)

Change-Id: I16c2a341c48f6df10a4839be08b93ea16bda8fbe
Refs: #727
Refs: #873
Refs: #1011
2018-12-05 02:01:29 +00:00
ec844bb6e3 Test "fix: Bug for getrusage" on arm64
The following test set:
  fix: Bug for getrusage return incorrect ru_maxrss
  fix: Bug for getrusage(RUSAGE_CHILDREN) return parent info (POSTK_DEBUG_TEMP_FIX_72)
  fix: Bug for getrusage often return incorrect ru_stime

Change-Id: I6734b1e34565d5d2715f9901a04ba5b6f0278032
Refs: #1032
Refs: #1033
Refs: #1034
2018-12-05 01:58:44 +00:00
a11d4d7a9d Test "mcexec_destroy_per_process_data: System calls delegation can not be terminated in error when the last process that closed /dev/mcos0 is a child process." on arm64
Change-Id: I6bc3023c1fa6089bc2ca6365b59bbab384b3e1d7
Refs: #882
2018-12-05 01:43:31 +00:00
0ee446923a Test "make sure to context-switch to idle thread when therad's status is PS_EXITED" on arm64
Change-Id: I757d529e49655e9010022f10414e4d6c9eb4c059
Refs: #1029
2018-12-05 01:21:48 +00:00
01b2a1d213 Tests: dust off x86_64 mem_dest_prev
Change-Id: I445ea0e8ae2cd631c775718a86a64fd2ecb90f35
Refs: #1228
2018-12-04 10:05:39 +00:00
52cd57fed2 memory/x86_64: fix linux safe_kernel_map
init_linux_kernel_mapping is called in setup_x86_phase1 way
before arguments are setup, but we can access kernel boot args
directly and use that, so ugly fix for now.

Change-Id: I285ecc31c6646d6d18566d411b09ae3190e8101e
Refs: #1228
2018-12-04 10:05:03 +00:00
bbc39480d2 Fix test programs for "execve: fix memory leak"
* Fix README

Change-Id: I90fe1fbb26569bbab5a34638b5f357d7000eda5d
Refs: #727
2018-12-04 10:02:42 +00:00
8521b98730 execve: Call preempt_enable() before error-exit
Fix "execve: fix execve with oversubscribing".

Change-Id: I4de3f5d44b1703db392f3da75196faa1e12d5845
Refs: #727
Refs: #1072
Refs: #1232
2018-12-04 09:43:19 +00:00
da02f76a25 mcexec: Fix error handling of init_worker_threads
Refs: #1233
Change-Id: Icce49c996d69b3cf64a71e7bd470421f329c881f
2018-12-04 09:40:24 +00:00
dbe5e99cf9 Fix test of "make sure to context-switch to idle thread when therad's status is PS_EXITED"
Change-Id: I62ea813656805b6250b0465853e8fa2918b0c86b
Refs: #1029
Refs: #1227
2018-12-04 08:17:54 +00:00
6b293409e5 mbind: Fix test programs
Refs: #1226
Change-Id: I12bf807812d93b7eca8f452e70e70e7c4e32f6a3
2018-12-04 08:17:13 +00:00
b94247c478 Test "signal: When the process receives a termination signal, it first terminates mcexec." on arm64
Change-Id: I1be32b991a45f0892146d93a9e6d6be9199faf59
Refs: #870
2018-12-04 05:07:32 +00:00
556a64ac5e Test "signal: When the process receives a termination signal, it first terminates mcexec." on arm64
Change-Id: I5c8ab90ffd5c5da30162d606f4d86dca9d387b5a
Refs: #863
2018-12-04 05:06:07 +00:00
3f11c1aee5 Test "Wait for LWK to run at shutdown." on arm64
Change-Id: I96785dda7a1a7eb36ceeb31401d71b4e40efb185
Refs: #898
Refs: #928
2018-12-03 20:06:37 +09:00
de70eac619 mcstat: Fix error propagation
Change-Id: Ib4a053d5b9ba5eb0d32c46be7c7fcd0be10cb97b
2018-11-30 14:29:14 +09:00
2ba3ec8a4c mcstat: Fix memory related stats
Refs: #1237
Change-Id: I0574cd71fe3b07aeda3ef981bd82d04ce5862f4f
2018-11-30 05:18:48 +00:00
394a1ef3c5 mcstat: Fix array of status strings
More error checks are added at the same time.

Refs: #1223
Change-Id: I406066a6ba0853584d6e1820dde74721ce2682dd
2018-11-30 14:05:21 +09:00
1954aec0ea perf_event_open: Propagate return value
Refs: #1236
Change-Id: I61a4683a533fb199a73a99bc7b2e6f2638212000
2018-11-30 04:10:54 +00:00
2b1b82b242 qlmpi: Refactor test programs
Change-Id: I3dd74eda1b77aea529f9cc044177b6c29185b6df
2018-11-29 10:33:11 +00:00
502463ed9e test: Fix user_space, testing use of copy_from_user / copy_to_user
Change-Id: I2caef1ba6597f693dc4f773ef8fedbd837c45ce6
2018-11-29 19:32:04 +09:00
715f67f32f mcreboot.sh: Fix error handling of BUILDID mismatch
Change-Id: I29d78c4739679e0b3229cc6fa28816f1ceee332c
2018-11-29 19:19:09 +09:00
82a57d5f55 test: Add MCK_DIR to mck_test_config.sample.in
Change-Id: I9ed1b0433fc6b8eeb1cb024be2d33263e3283ab7
2018-11-29 12:50:29 +09:00
56abe988f3 test: Fix user_space, testing use of copy_from_user / copy_to_user
Change-Id: I2caef1ba6597f693dc4f773ef8fedbd837c45ce6
2018-11-29 11:32:42 +09:00
68c581f721 test: Fix 898 and 928
1. Catch up with the interface change in
   ihk_os_destroy_pseudofs() and ihk_os_create_pseudofs()
2. Expect ihk_os_shutdown() to return zero when the OS had been shut
   down

Refs: #898
Refs: #928
Change-Id: Ic430550ebfd5cd21164eefaed155fe769adf8395
2018-11-28 02:19:37 +00:00
6ca5aaa1fc configure: Fix BUILDID (again)
The previous commit made BUILDID use git for submodule, but for complex
git setup (e.g. worktree) and older git version or dead .git 'link' it
would blindly rely on the existence of the .git file even if git does not
actually find anything.
This would lead to possibly empty BUILDID which would fail building.

Just always run the git command, and echo the version string if it failed

Change-Id: Ied268d2150a30dc1146498e15fa8394afc8a8d0d
2018-11-27 17:15:27 +09:00
b2a58ce3e3 Test "Confirm build ID of mcexec, ihk, mckernel" on arm64
Change-Id: Ia5fa6d6d062e8d845c7fedca1b6cc50fbeab1860
2018-11-27 08:12:28 +00:00
cfcf0137eb Test "Exclude areas not assigned to Mckernel from direct map of all phys." on arm64
Change-Id: Ida0d1f13f4a14c2ee219325aaa4b2cac1476c991
2018-11-27 05:29:15 +00:00
00395d68d4 Test "mcexec additional options (h, m, n, O, stack-premap)" on arm64
Change-Id: I85d5deb0433cc1208e4b6837dcc6d6dc2a7b7b52
2018-11-27 05:12:43 +00:00
dc1f96fee3 Add set_cputime() kernel to kernel case and mode enum.
Change-Id: Id4584389f39f255335d3bf7b5606f054f108ad51
Fujitsu: POSTK_DEBUG_TEMP_FIX_84
2018-11-27 05:03:39 +00:00
c585a37440 move mcoverlayfs kernel version check from mcexec.c to configure
While we are here:
 - fix uname -r (single quote?!)
 - add compat for rhel8 (el kernel and version is 4.18)
 - also remove linux version check in mcreboot.sh, trust configure check

Change-Id: I14726d4374b0dfd941640096044ea1d5d88bfcb8
2018-11-26 12:09:00 +00:00
98aa633856 add attribute converted flag
Change-Id: I215e42fa87752d16b8c9744b02d063098cba0af7
2018-11-22 06:04:34 +00:00
ddde519263 Test "rus_vm_fault: If page fault occurs in a thread that has not processed system call offloading, incorrectly return to normal." on arm64
Change-Id: I3dc98d8994228ad27cfdf9ca96a0a76e544bc947
Refs: #923
2018-11-22 05:27:56 +00:00
f240671fc8 Test "ptrace: support for attaching child_process to parent" on arm64
Change-Id: I752542b6bfbf023d22e91f909518660afbff813c
Refs: #885
2018-11-22 04:54:29 +00:00
cf113d392a Test "/proc/PID/maps support add" on arm64.
Change-Id: I0585ae6257b5c0269760dd7f23ba75b83dd7ac2c
2018-11-22 04:53:04 +00:00
9e57db5427 Test "sigaction: support for SA_RESETHAND on x86_64" on arm64
Change-Id: I6154134d53d1ee0344e4bc344f302ffaf810c618
Refs: #1031
2018-11-22 04:51:36 +00:00
739472bd86 Test "xpmem: support for fork()" on arm64
Change-Id: I12c628312157f35e239d3c5e67fa38adf156406b
Refs #925
2018-11-22 04:50:58 +00:00
136b749349 configure.ac: Fix BUILDID
Change-Id: Id9717422c3d5d2de51570d4672864dbd271ad0fc
2018-11-21 17:02:45 +09:00
ae9a1f39df ihk_ikc_recv: Record channel to packet for release
ihk_ikc_release_packet takes the channel and puts the packet into its
free-list.  This fix makes it easy and safe to identify the proper
channel.

Change-Id: I5584b1e8a3ed675c2f9d68f0b5ed331b909197f6
Fujitsu: POSTK_DEBUG_TEMP_FIX_89
2018-11-21 17:01:58 +09:00
10dc87dd3f mcreboot: check on SELinux
Change-Id: I2c3706c04c7977ec22407358232d7c3a21abdc14
2018-11-21 07:52:10 +00:00
724e0eb7d0 mbind(): Fix memory_range_lock deadlock.
Fixed the problem of "return error/goto out" while
locking the memory_range_lock in mbind().

Change-Id: I980a7a440f652b60379acae3cb3575211a749774
Fujitsu: POSTK_DEBUG_TEMP_FIX_100
2018-11-21 16:49:48 +09:00
04e0456232 set_mempolicy(): Add mode check.
Fix a problem that does not result in an error even
if MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES are
simultaneously specified in set_mempolicy() mode.

Change-Id: I06e695baf869daee8bc64179748cac27b64e914b
Fujitsu: POSTK_DEBUG_TEMP_FIX_99
2018-11-21 16:49:40 +09:00
6626204c99 set_cputime(): interrupt enable/disable fix.
Check interrupt enabled state in set_cputime() instead of enabling
them unconditionally on exit.

Change-Id: I99212855f33f5535f67f045665bf5e025c55b690
Fujitsu: POSTK_DEBUG_TEMP_FIX_98
2018-11-21 16:49:30 +09:00
190039f5d9 arch_cpu_read_write_register: error return fix.
Fixed an issue where errors generated in arch_cpu_read_write_register()
are not transmitted to the caller.

Change-Id: I05d7d872eab834918220cf18f628aee37208a156
Fujitsu: POSTK_DEBUG_TEMP_FIX_94
2018-11-21 16:49:21 +09:00
583cb94667 mcctrl: remove in-kernel calls to syscalls
Since 4.17.0, kernel cannot call syscalls directly because the calling
convention can be different on x86_64, as explained in this email:
https://lore.kernel.org/lkml/20180325162527.GA17492@light.dominikbrodowski.net

Use the ksys_* alternatives instead when possible, or for readlink use
do_readlinkat (and use readlinkat all the time to simplify ifdefs)

It might be possible to change some of these without ifdefs, but for
example ksys_unshare only got introduced in 4.17 so we need to keep some
syscall calling...

Change-Id: Ic47e184b29ef8b21731b2eae6193b0af2548b872
2018-11-21 16:42:26 +09:00
db4d19e419 Add crash utility extension
Change-Id: Ia3dadecdd4605c3ee74d1b5242f67486c675faa7
2018-11-21 07:40:00 +00:00
04c11f35e9 xpmem: Add xpmem_openat
In arm64, glibc-open of /dev/xpmem is hooked in sys_openat. This
commit adds xpmem_openat which is called by sys_openat.
This commit silently applies copy_from_user fix to sys_open as well.

Change-Id: I3b4f7bf0e152c359250bb2b56910db9192390cb1
Fujitsu: POSTK_DEBUG_ARCH_DEP_46, POSTK_DEBUG_ARCH_DEP_62
2018-11-21 07:39:56 +00:00
e12d5ed341 Expose McKernel version in /proc/mckernel
Change-Id: Ica0fbb0ff70a4ff2559e92738926279a3ae78a21
2018-11-21 07:39:54 +00:00
1253f4d18c mcexec shebang: delete spaces *before* path as well
Apparently, a shebang '#! /bin/sh' should work.
Will add some ostests for these...

Change-Id: Iab8ba8e3cc7e434c98742f71fe7db3c425f08278
2018-11-21 07:39:51 +00:00
527adedaa3 madvise: Add MADV_HUGEPAGE support
Since McKernel allocates hugepages by default, we could consider that
madvise call with MADV_HUGEPAGE is supported.

Change-Id: Ibdaa6f77416d029a1d17210773ef79539ba04b1c
2018-11-21 07:39:26 +00:00
525b90d028 flatten_string/process env: realign env and clear trailing bits
envs are stuck after args which are now possibly unaligned, and used
from a non-aligned pointer in prepare_process_ranges_args_envs (env)

The memory immediately after args/envs is copied anyway with memcpy_long,
so make sure the bits are initialized and realign env correctly

Fixes: 70e52faf36 ("flatten_strings: do not return unused trailing bits")
Change-Id: Ic747e947d151c0eea65dec36bc9c888cf6e0c394
2018-11-21 07:39:16 +00:00
38e68f358a Add kernel argument to turn on/off time sharing
Add "-T 0" to mcreboot.sh if you want to turn off time sharing.  When
it's turned off, McKernel doesn't activate interval timer when the
length of per-CPU run-queue is larger than one.

Change-Id: I2cedc1b30a9cd9a0f4608a32ecec0a0d58c6225e
2018-11-21 07:37:01 +00:00
7a3f4d7501 mcctrl rhel8 compat: remove unneeded RHEL_RELEASE_CODE check
it was meant for 3.10 kernels, so the regular < 4.0.0 check
will work for el7 and older kernels as well

Change-Id: I807f030f6303c9c3d17b0d80de55c256a3479486
2018-11-21 07:36:50 +00:00
1a5b10277f mcexec: load_elf: disable execvp for within-mckernel execs
the libc takes care of trying execve as many times as needed for
execvp, it's not a kernel call.

Also, sneak a double-free fix (desc was not reset properly in case
load_elf_desc_shebang failed)

Fixes: b1681f4a3affff ("mcexec/execve: fix shebangs handling")
Change-Id: If8e3d7ae53acdeffc0331ae8621e0832fcfa406f
2018-11-21 16:17:58 +09:00
a59c55c188 mcexec load_elf_desc: print error after returning
Running "mcexec dfsafds" did not print any message in normal use.
Rather than looking for which message shows in debug and turn in into
eprintf, add a single coherent message (more shell-like) at the end and
turn other messages off.

There is a small loss of information but this is equivalent to what
shells give (a single errno value with no details), and it is now easy
to add --debug to mcexec to see more information if required

Change-Id: Id2c3a47880b7d1d7467883351e6e7af561f91bbf
2018-11-21 16:17:58 +09:00
1d6a078afa mcexec: add --debug-mcexec
We already have debug statements compiled in, add a toggle for it
Also fix case indent for 's'

Change-Id: I1104ee57d571b82ec5e061f22cd44033a5c7fc39
2018-11-21 07:16:54 +00:00
fb98664f49 clone_thread: Add arch_clone_thread()
Fujitsu: POSTK_DEBUG_ARCH_DEP_23
Refs: #969
Change-Id: Ic15765b8c9e956c95fc50b333b01464d87450d3c
2018-11-21 07:10:01 +00:00
9db8d115d9 overlayfs: rhel8 compat for the 4.18 version
rhel8 is a 4.18 kernel but they've already backported some later fixes.
Instead of relying on the kernel version, the changes removed some defines so
we can check for the define presence to make the code more robust to kernel
version wilderness instead

Change-Id: I6cf5548a7b73a7394405daf850f715a1e20ab0b4
2018-11-21 16:06:31 +09:00
e26e693e58 mcoverlayfs: update and compile new overlayfs for 4.18 kernels
This newer version is much simpler than the old ones:
 - the options are noop, this lets the code simplify all the allocating
of a new option struct and passing it around
 - ovl_reset_ovl_entry was added and called all the time, but the
mechanism that made this required is gone in this kernel version

On the other hand, one new thing in this version:
 - newer kernel check the stacking depth of filesystems now, and we are
reaching the default limit of two with our setup. Bump it to three here.

Also, while we are here, make make fail if requested directory does not
exist, instead of infinitely recurse into make modules in the mcoverlayfs
directory...

Change-Id: I45050d693a0aa6fd3027deaf417c29876ef6a1ea
2018-11-21 16:06:31 +09:00
fc2775c932 mcoverlayfs: add new base from 4.18.14
This just lays out new files so the next commit is easier to review;
nothing changes here

Change-Id: I66669877d2d10632f5436c0eeb32248cd4c8b996
2018-11-21 16:06:31 +09:00
6581f9b4b2 mcctrl syscall: compat for newer zap_vma_ptes
newer version of this function no longer return an error on the basis
that "no-one checks what it returns anyway"........

See linux 4.18's 27d036e33237e ("mm: Remove return value of zap_vma_ptes()")

Change-Id: I8fb9f060e3e145cc2db21738585c9ee7f1445f74
2018-11-21 16:06:31 +09:00
3a90521489 mcexec: fix strncat bounding
strncat must not look at the appendee's length, but at how much
is left where we're appending.
This API is stupid anyway, where is strlcat when we need it...

Change-Id: Icdf418083146420a06f8ba5ffdf882982610d39b
2018-11-21 16:06:31 +09:00
03802052ed mcctrl: add handling for one more level of page tables
newer linux got a 5 level page table now, try to handle that.

Some of the macros will be no-op (e.g. loop only on one iteration) on
architecture/kernels with only 4 levels but the code needs to be there
to compile

Change-Id: Ifc6304cbb066dce7d4e30962687ae05d7e034730
2018-11-21 07:03:24 +00:00
c21485d427 mcctrl: include linux/cred.h
The headers defines __task_cred and other macroes we use, and always
existed; we must have gotten it indirectly on older kernels, it doesn't
hurt to always include

Change-Id: Iacfff0365e7a21e6247eea42606bbbf1dfccc077
2018-11-21 06:38:08 +00:00
18d50e48dc mcctrl: lookup for alternate syscall names
on newer x64 kernels (config option?), syscalls can be renamed to allow
both x64 and ia32 versions to coexist. Lookup either names

Change-Id: I2f55cc804d3eee948ee1ed6d18c69c75bd2f652c
2018-11-21 06:38:08 +00:00
a2be475ae4 mcctrl control: replace cpu_isset by cpumask_test_cpu for new kernels
Change-Id: I60635118e5ce7281de97e024c626ac40d1a4aa36
Fujitsu: POSTK_DEBUG_ARCH_DEP_54
2018-11-21 06:38:08 +00:00
38f683d1d0 mcctrl control: task start_time changed to u64 nsec
Change-Id: I1128c20cf836d20b6e84d7ec58cf8dfb075297da
Fujitsu: POSTK_DEBUG_ARCH_DEP_74
2018-11-21 06:38:08 +00:00
59828db5c9 mcctrl archdeps: rename vdso_image_64 to _vdso_image_64
The symbol appears in some header in some linux version,
it's still not exported so we need our own lookup anyway; just rename it.

Change-Id: Ia4bce85988641c96fa3f5a0ae1d42c25c713b6c2
2018-11-21 06:38:08 +00:00
1a3c73468f shmobj: Fix rusage counting for large page
Fujitsu: POSTK_DEBUG_TEMP_FIX_88
Change-Id: I852fe804bddf6da5b93a2ac72b0461ee63c98d46
2018-11-21 04:51:57 +00:00
85c936a6cb mcexec: fix terminating zero after readlink()
Change-Id: Icb5432f157ceb2182d93e2d327cfa63ad02a8c0e
2018-11-08 17:01:22 +09:00
6f9fef2b13 procfs: Make /proc/<PID>/mem unwritable
refs: #1177
Change-Id: Ibb319221155547febf9126e05a9e322bd9f140cc
2018-10-26 08:58:31 +00:00
cc1d39e55d mcctrl_perf_enable: Fix type of integer constant
Change-Id: Ib98eca85a9962520dafdd08b8fc223a6a83bafd0
2018-10-24 14:56:26 +09:00
fd8bed670e ihk_os_setperfevent: Return number of registered events
In addition to that, mcctrl_perf_set is modified so that it updates
usrdata->perf_event_num with number of registered events.

Change-Id: I3f343176f55b06d3baab0b0fe34e240f39706cf6
Fujitsu: POSTK_DEBUG_TEMP_FIX_80
2018-10-24 06:16:41 +00:00
24a3b236a0 Update .gitmodules to point IHK at github
Change-Id: I712f4cf2fb012d2b268f0881a156268024df57b9
2018-10-24 11:20:13 +09:00
27e55b8cf1 mcreboot.sh: Fix error reporting for missing argment
Change-Id: I3af99d7a117d4401c2e0a143fa74513094a53302
2018-10-18 12:06:58 +09:00
70e52faf36 flatten_strings: do not return unused trailing bits
Trailing bits were displayed in proc->saved_cmdline, displaying
uninitialized data to the user in /proc/<pid>/cmdline

Change-Id: I74831c8c68dd2f2197b35e9b49aaaae29c4c1dd5
2018-10-15 08:35:50 +00:00
8db36c3828 mcexec: do not resolve links in lookup_exec_path
This would incorrectly make "mcexec sh -c './script.sh'" run with
/bin/bash instead of /bin/sh (which is important, because bash behaviour
changes depending on how it is invoked)

Change-Id: I80610cf442c6c3ecacfa23e8ed15652bc8d4e3f7
2018-10-15 08:35:41 +00:00
06dd71a7e0 Revert "procfs: add '/proc/pid/stat' to mckernel side and fix its comm"
This reverts commit b70d470e20.

That commit had been landed too fast after a mistake during migration
from old to new gerrit that didn't keep -1 vote ; it needs some fix

Change-Id: Ifc8a23e42449dfe471049270b4706e9b137e096e
2018-10-12 10:54:14 +09:00
01fe83dcb3 do_mmap: change addr to uintptr_t
Change-Id: I7df45e125387083aef7e62b046c20b7422f60f22
2018-10-11 09:24:23 +00:00
c86d168165 procfs: handle 'comm' on mckernel side
Change-Id: Ie68514ba3e5161b931b88eeee9e8a2267ee69354
2018-10-11 09:19:42 +00:00
a032dc3d1b procfs: use length from snprintf instead of recomputing
Change-Id: I75ba4cf5c2e94798d183728c11bb34032cdddf5a
2018-10-11 09:17:58 +00:00
201fa7fb55 fork: copy saved_cmdline from parent process
This fixes empty children names for forked children.

Change-Id: I9512f0981d2a241c106ee3e8500f2084ef61a660
2018-10-11 09:14:14 +00:00
dd676f7149 saved_cmdline: only allocated necessary space
Change-Id: Ibb3fe66b46485a28c15e45dca9213f42f5afaa1c
2018-10-11 09:13:15 +00:00
a751e96b1a Add mck_num_processors symbol pointing to num_processors
the 'num_processors' symbol is also used by linux, so trying to load all
symbols from linux and mckernel at the same time renders either symbol
inaccessible (the first to be seen is kept by default).

This provides an alternate name for the mckernel symbol, thus letting us
access both more easily if required.

Change-Id: I8074d4f9f9ac45717df9a8df16be710ff762e161
2018-10-11 09:12:04 +00:00
c3bfa3f6a9 move BUG_ON, panic and kprintf define to debug.h; add BUILD_BUG_ON
these functions are more logical to keep together there as they depend
on each other.

Also add a comment about the __printf attribute, if we have a quiet
period it would be useful to enable and clear the thousands of
warnings...

Change-Id: I47d3891c9cd87da28b2883c29384959f5abd1459
2018-10-11 09:03:53 +00:00
1e1fa4f70d trivial warnings fixes (unused variable/function)
Change-Id: I71cedd2c09eeb5d2c2fd2e988dfdde0877627abc
2018-10-11 09:03:53 +00:00
39f9d7fdff Handle hugetlbfs file mapping
Hugetlbfs file mappings are handled differently than regular files:
 - pager_req_create will tell us the file is in a hugetlbfs
 - allocate memory upfront, we need to fail if not enough memory
 - the memory needs to be given again if another process maps the same
   file

This implementation still has some hacks, in particular, the memory
needs to be freed when all mappings are done and the file has been
deleted/closed by all processes.
We cannot know when the file is closed/unlinked easily, so clean up
memory when all processes have exited.

To test, install libhugetlbfs and link a program with the additional
LDFLAGS += -B /usr/share/libhugetlbfs -Wl,--hugetlbfs-align

Then run with HUGETLB_ELFMAP=RW set, you can check this works with
HUGETLB_DEBUG=1 HUGETLB_VERBOSE=2

Change-Id: I327920ff06efd82e91b319b27319f41912169af1
2018-10-11 08:54:13 +00:00
3e3ccf377c compiler.h: add READ_ONCE/WRITE_ONCE macro
These macros are needed to make sure the compiler does not optimize away
atomic constructs such as "while (!READ_ONCE(foo))" loops that do not
modify foo within the loop

Also move the barrier() define where it belongs while we are here, it is
needed for READ_ONCE/WRITE_ONCE and including ihk/cpu.h here causes
include loops

Change-Id: Ia533a849ed674719ccbc0495be47d22a3c47b8f8
2018-10-11 08:54:13 +00:00
13e71ac9dc pager: minor cleanups
- remove unused MF_END (that only makes sense for enums without holes,
  this one is a set of bits masks)
- remove useless goto in pager_req_create()
- init maxprot to 0 from the start, it's not used in the error cases
  (except for debug print)

Change-Id: Ic56c0754824b99f8a7e45fa8e99b8fe3e7c7e592
2018-10-11 08:54:13 +00:00
b1681f4a3a mcexec/execve: fix shebangs handling
There were mainly two problems with shebangs:
 - Suffix arguments handling e.g. '#!/bin/sh -x'
 - Recursive handling e.g. script1 fetchs '#!/path/to/script2'
and script2 itself has a shebang
 - (did I say two?) running shebang would replace argv[optind] instead
of appending e.g. script with '#!/bin/sh' and running './script -c'
would run '/bin/sh -c' instead of '/bin/sh ./script -c'

There also are two places where this needs parsing:
 - starting a fresh program from mcexec
 - starting a new program from execve in mcexec

The first was easy to fix as we already had argv around, but the later
required a new way to transfer the 'new argv elements from the script'
to mckernel to append before its argv -- it used to be 'desc->shell_path'
but that was no longer used at some point and just one keyword is not
enough to handle this properly.

This commit does:
 - Refactors the lookup_path + load_elf_desc that was only done at most
twice in its own function that loops indefinitely and use that in both
situations described above
 - Transmits the argv addition in the transfer to mckernel after the
desc; mckernel allocates 4 pages (hardcoded) for the descs and we will
hopefully have room for the script arguments on top of that... (there is
no guard!!!)
 - Change flatten_strings to allow prepending a flattened string instead
of a single string.
Note that the flatten_string change also brought in a difference in the
format, to have the full length embedded within the string, the latest
slot that used to be zeroes now contains the position of the end of the
buffer (where the last+1 string would be if there had been one)
This required a trivial change in mckernel prepare args function that
used this property for no real reason.

Hopefully things work™, this probably warrants adding a couple of new
ostests...
 - create a couple of scripts with recursive invocation/arguments and
check their own argv.
 - execute "mcexec script args" and "mcexec sh -c 'script args'"

Change-Id: I2cf9cde5c07c9293f730de89c9731bd93dbfa789
Refs: #1115
2018-10-04 14:31:02 +09:00
1226e692d9 mcstat: Install mcstat.1
Change-Id: Id5af2f56ef9cc9c444bfc0500190f52ffc779936
2018-10-04 02:52:18 +00:00
73ea4b1ce9 ihk_os_getperfevent,setperfevent: Return -ETIME when IKC timeouts
Change the return value from -EINVAL to -ETIME.

Refs: #1167
Change-Id: I87fa57bb45d0036b7e4b25366aa7b7ce6fb2c764
2018-10-04 02:44:22 +00:00
09f663c246 mcctrl procfs: check entry was returned before using it
Change-Id: If66e95d217d1045e2e65bc5978bba020e3fa7c0d
Refs: #1116
2018-10-04 02:41:16 +00:00
9b77630c8b mcexec: readlink and use full path for reexec
This fixes comm on linux side, showing mcexec instead of 'exe'

Change-Id: I9345d7a23dccb36b3a1e17fd3e7491eaeca54e5b
2018-10-04 01:03:10 +00:00
b70d470e20 procfs: add '/proc/pid/stat' to mckernel side and fix its comm
This lets ps show the proper executable name instead of mcexec's comm
on linux side

Change-Id: I62732037451f129fc2e905357ebdc351bf7f6d2d
Refs: #1114
2018-10-04 01:01:19 +00:00
ecc850dfef procfs/do_fork: wait until procfs entries are registered
Do not return from fork() until mcctrl side has created mckernel's
procfs entries for the child PID.

This fixes programs doing fork() immediately followed by opening
/proc/<child pid>/something, and would get some error

Refs: #1189
Change-Id: Ie10ea56b65c55f59e96a1ab6ef83a1070e36048d
2018-10-04 01:00:52 +00:00
b11377f2e9 Increase IKC master channel size
Change-Id: I183878bb22b848e1230f8028947cf46485293471
2018-10-03 06:23:17 +00:00
ed1edb152b ptrace supports threads
Fujitsu: POSTK_DEBUG_TEMP_FIX_53, POSTK_DEBUG_ARCH_DEP_44
Refs: #771, #1179, #1143
Change-Id: Ie17ece6864f0eeb0c0e550f4e369abb77980a0d0
2018-10-01 03:57:16 +00:00
28c434a230 test: Fix test for 898 and 928
Change-Id: If939dda7ccdcf568abfa42ccab7ff6be2b983cc2
2018-09-28 02:55:55 +00:00
daa234d8b9 mcexec_create_per_process_data: use copy_from_user
Refs: #1205
Change-Id: Idced73a7f88aada5fc2462b490d56603f8fe2472
2018-09-27 15:42:01 +00:00
e803698618 test: Refactor test programs
Change-Id: I77fec2f5f30f6fda3bda6f85ce00f1c2e7f7a9b3
2018-09-25 12:45:20 +09:00
c862b29d65 sched_setaffinity: Check migration after decrementing in_interrupt
refs: #1180
Change-Id: I2b3fb03066812ecc802406297084977e757092fe
2018-09-25 01:52:54 +00:00
dd58d366c3 procfs: Fix pread/pwrite to procfs fail when specified size is bigger than 4MB
Fujitsu: POSTK_DEBUG_TEMP_FIX_43
Refs: #1018
Change-Id: I736ac69885695ef8eeababc3fcfe69a6258b4e16
2018-09-20 02:06:17 +00:00
ab284b0531 test: Add test programs for #1158
refs: #1158
Change-Id: I853dd84f5433a01da510813e9fb1276e5477f73f
2018-09-20 02:05:55 +00:00
42b9b31606 mcctrl: Propagate writecore()'s return value to caller
Fujitsu: POSTK_DEBUG_TEMP_FIX_62
Change-Id: I847dd520187cbf66fbad8140f79f62c6d5d9d5fc
2018-09-20 11:01:22 +09:00
29c5c68761 coredump: Change type of coretable.len to loff_t from int
Fujitsu: POSTK_DEBUG_TEMP_FIX_61
Change-Id: I6a27a8d477c3b3dcc12be772a15dfcff370bd2a8
2018-09-20 11:01:22 +09:00
38c08a6663 coredump: Add O_TRUNC to flags opening corefile
Fujitsu: POSTK_DEBUG_TEMP_FIX_59
Change-Id: I36c89fa894dfc0cdd170781e8ca4aab6149d4928
2018-09-20 11:01:20 +09:00
57258e7f59 coredump: Don't dump when MCK_RLIMIT_CORE is zero
Fujitsu: POSTK_DEBUG_ARCH_DEP_67
Change-Id: Ic85c793b052cde9d7fa4fe510c5daee303d370c4
2018-09-20 01:51:18 +00:00
8c33c92720 mcctrl: Switch Linux functions/structures according to the version
For get_user_pages_remote in binfmt_mcexec.c:
In 4.10 with 5b56d49fc31d ("mm: add locked parameter to
get_user_pages_remote()")
In 4.9 with 9beae1ea8930 ("mm: replace get_user_pages_remote()
write/force parameters with gup_flags")

For vmf in syscall.c, these two patches in 4.10:
82b0f8c39a38 ("mm: join struct fault_env and vm_fault")
1a29d85eb0f1 ("mm: use vmf->address instead of
vmf->virtual_address")

Fujitsu: POSTK_DEBUG_ARCH_DEP_41
Change-Id: I89a02d03169a2162ea186da1804bf48910446d11
2018-09-20 01:50:04 +00:00
a269d96978 coredump: Exclude special areas
Fujitsu: POSTK_DEBUG_TEMP_FIX_38
Refs: #1005
Change-Id: I8934d2aecf06a09469afe131347e42b48b6f67f6
2018-09-20 01:48:17 +00:00
2910818f06 execve: Fix calling ptrace_report_signal after preemption is disabled
Change-Id: I451d28d985ab330d855501597713e982b8febf4e
Refs: 1194
2018-09-20 01:31:31 +00:00
3df82d61ce test: Fix tests of "user_space"
user_space/swapout/swapout_copy_to_01.sh:
* Use ~/.mck_test_config
* Fix checking if McKernel version is written in swap-file

user_space/futex/futex_test.sh:
* Use ~/.mck_test_config

user_space/perf_event_open/perf_event_open_test.sh
* Use ~/.mck_test_config

Change-Id: Id93b207ed0e3e9ebf307073db81b40335bc5b140
2018-09-19 08:54:08 +00:00
159092c58e rusage: Refactor test programs
Change-Id: I846a6416acf903f7fa19db98d4d937c51c10b4af
2018-09-18 18:42:19 +09:00
60011718d2 add common test framework
Add new file with common functions for tests to use.

 - loads config file
 - checks for mcexec etc
 - checks for LTP and OSTEST if required
 - handle mcstop / mcreboot if required, and provide function for it

At the same time, make a few changes to mck_test_config:
 - move to ~/.mck_test_config
 - add boot params to the config, tests the require specific params can
   overwite it
 - make the config "set-if-variable-is-empty", so someone can overwrite
   any param by setting the environment value e.g. LTP=.... ./test.sh
   will use the value given

Change-Id: Ib04112043e3eb89615dc7afaa8842a98571fab93
2018-09-14 03:30:06 +00:00
7e342751a2 do_syscall: Delegate system calls to the mcexec with the same pid
This includes the following fix:
send_syscall, do_syscall: remove argument pid

Fujitsu: POSTK_TEMP_FIX_26
Refs: #1165
Change-Id: I702362c07a28f507a5e43dd751949aefa24bc8c0
2018-09-13 16:59:47 +09:00
c23bc8d401 syscall_time: Handle by McKernel
refs: #1036
Change-Id: Ifa81b613c7ee8d95ae7cdf3dd54643f60526fa73
2018-09-13 07:44:02 +00:00
5e760db417 syscall: the signal received during system call processing is not processed.
Refs: #1176
Fujitsu: POSTK_DEBUG_TEMP_FIX_56
Change-Id: I410160ccbcef3ef49a0e37611a608bc87c97e63b
2018-09-13 07:04:11 +00:00
e4da71010c check_signal: system call restart is done only once
Fujitsu: POSTK_TEMP_FIX_66
Refs: #1009
Change-Id: Ic0f04ac6b7f6c6bb01b55fb389bf9befd56b1dd9
2018-09-13 07:00:49 +00:00
c25fb2aa39 memobj: transform memobj lock to refcounting
We had a deadlock between:
 - free_process_memory_range (take lock) -> ihk_mc_pt_free_range ->
... -> remote_flush_tlb_array_cpumask -> "/* Wait for all cores */"
and
 - obj_list_lookup() under fileobj_list_lock that disabled irqs
and thus never ack'd the remote flush

The rework is quite big but removes the need for the big lock,
although devobj and shmobj needed a new smaller lock to be
introduced - the new locks are used much more locally and
should not cause problems.

On the bright side, refcounting being moved to memobj level means
we could remove refcounting implemented separately in all object
types and simplifies code a bit.

Change-Id: I6bc8438a98b1d8edddc91c4ac33c11b88e097ebb
2018-09-12 18:03:25 +09:00
b51886421e uti: Don't compile syscall_intercept related stuff when not specified with configure option
Change-Id: I9be8cb9b3fcae78d33a33b057c43caee23a81fc1
2018-09-05 16:29:20 +09:00
22c6c5c736 do_syscall: Call schedule() when runq_len > 1
This optimization make the offloading thread quickly yield to
another thread. Without this, it yileded only after the interval timer
set the rescheduling flag.

Change-Id: Ida3b17ed94782d5d1af0185a96b1f50d9db8d244
2018-09-04 19:53:03 +09:00
cd00fc3a78 set_timer: Start timer when runnable thread count is bigger than one
Change-Id: Ie32799fff2936ffc057f166db5681edccdbf5920
2018-09-04 19:53:03 +09:00
00a34a8ba3 uti: util_thread: Hoist uti_desc check
Change-Id: I8c4b75140df2fe149dfe20e0a8f0bf323b5f1763
2018-09-04 19:53:03 +09:00
8900c2cec5 uti: mcexec_uti_attr: Fix CPU binding decision
Change-Id: I4047858895503ae912e5575bb232dbbb2f915722
2018-09-04 19:53:03 +09:00
fca02ee248 uti: Add error checks to kmalloc of struct uti_attr 2018-09-04 19:53:03 +09:00
781a69617b uti: Replace data types represented as arrays with C structures
Defining C structures for the following objects:
(1) Remote and local context
(2) Stack of system call arguments / return values

Change-Id: Iafbb6c795bd765e3c78c54a255d8a1e4d4536288
2018-09-04 19:53:03 +09:00
04d4145b3e uti: Replace dead uti thread with new mcexec thread in proc->tids
Change-Id: Ic6e906dd1bfac1b07f1317732cbe0a5191831cd8
2018-09-04 19:53:03 +09:00
96aab7e215 uti: Cosmetic change in util_thread
Change-Id: I8aa75efa4dbfb798e40e75f76bacbd184dae23b8
2018-09-04 19:53:02 +09:00
98ee584ab6 uti: Change field name of release_user_space_desc
Change-Id: I18ada86ec3835198c1a947d8ceb36075d6ff2e94
2018-09-04 19:53:02 +09:00
6b031c5472 uti: Fix condition for pthread_join of mcexec threads
Change-Id: Iaeee91c197b84436f84ce4380768aa79e7f9419e
2018-09-04 19:53:02 +09:00
e42c414454 uti: Hook system calls by binary-patching glibc
(1) Add --enable-uti option. The binary-patch library is
    preloaded with this option.
(2) Binary-patching is done by syscall_intercept developed by Intel

This commit includes the following fixes:

(1) Fix do_exit() and terminate() handling
(2) Fix timing of killing mcexec threads when McKernel thread calls terminate()

Change-Id: Iad885e1e5540ed79f0808debd372463e3b8fecea
2018-09-04 19:53:02 +09:00
e613483bee uti: Add system call profile 2018-09-04 19:53:02 +09:00
c0271f4727 Add debug messages for per-process data 2018-09-04 19:53:02 +09:00
4969762f15 uti: Add usage of uti specific options to mcexec 2018-09-04 19:53:02 +09:00
09d3648e43 uti: Set PROT_EXEC to host VMA when PROT_READ is set
Set PROT_EXEC to host VMA because uti needs PROT_EXEC for text VMAs.

Meanings of prot bits of Host VMA has been changed as follows.
   RWX: No mapping or RW mapping
   RX: Read only mapping
2018-09-04 19:53:02 +09:00
4e905cd412 uti: do_syscall: Don't warn when proxy is gone
This is because this is a normal case since terminate() is changed so
that it first kills all mcexec threads and then kill McKernel threads.

Change-Id: I88380bf28b60645d361baded525d71105235c16f
2018-09-04 19:53:01 +09:00
8c11daf726 uti: Fix signal relay from mcexec to McKernel
Change-Id: I2ffd8049a0fb1637cfc6bab7fe24c6a85e5e53fc
2018-09-04 19:53:01 +09:00
5cb8a1f10f uti: Workaround not to share CPU with OpenMP threads
* Assign uti thread to the last idle CPU so that it's not shared with
  an OpenMP thread

Change-Id: Ia42cae056ce81fde9b6dab6286b39a52f3c9e172
2018-09-04 19:53:01 +09:00
dbba7dea18 uti: Allow only the first do_fork() call to create a uti thread 2018-09-04 19:53:01 +09:00
b6ab5911b7 uti: Identify uti thread by clone count
--uti-thread-count <count> is added to mcexec.

Change-Id: Id9ec464412a5bb71e4d9e87d05f79de22d35b067
2018-09-04 19:53:01 +09:00
b0d7f890d0 uti: Reverse-offload msync() 2018-09-04 19:53:01 +09:00
b9c0cdddab uti: Cosmetic change 2018-09-04 19:52:14 +09:00
7ee7dd5e2c uti: Allow tracer to call release_handler() for the main process
Change-Id: I934a6eefbcb87473e87c109d6b4d32c7ab486894
2018-09-04 19:52:14 +09:00
07db4a80a7 __do_in_kernel_syscall: Move ihk_ikc_release_packet from mcexec_wait_syscall
Change-Id: Ieeb5fda42dbddc9da27242f4b547c2143659f97a
2018-09-04 19:52:14 +09:00
f04e5c24ab uti: Don't call mcexec_terminate_thread() when McKernel asks mcexec to interrupt system call 2018-09-04 19:52:14 +09:00
b8bacdd2de Reference counting per-thread data
It is accompanied by the following fixes:
(1) Fix put ppd locations in mcexec_wait_syscall()
(2) Move put ptd to end of mcexec_terminate_thread_unsafe() and mcexec_ret_syscall()
(3) Add debug messages for ptd add/get/put
(4) Fix ptd-add/get/put matching in mcexec_wait_syscall()
    * Skip put when woken-up from wait_event_interruptible() by signal

Change-Id: Ib9be3f5e62a7a370197cb36c9fa7c4d79f44c314
2018-09-04 19:52:14 +09:00
a121ffc785 uti: Release packet of reply from McKernel in backward_offload() 2018-09-04 19:52:14 +09:00
88f9693390 uti: Return -ENOSYS without offloading for set_robust_list()
Change-Id: I43466e3850fd2ad68e5754d1d460438fa47f3ed4
2018-09-04 19:52:13 +09:00
124ec580a0 uti: Call do_exit when tracer isn't working and do_syscall returned -ERESTARTSYS 2018-09-04 19:52:13 +09:00
af7f61db49 uti: mcexec: Fix error check of pthread_detach
Change-Id: Idda8e060641bbd7b01c50163140a2c5f7466d193
2018-09-04 19:52:13 +09:00
ee299b5780 uti: Check size of syscall arguments for syscall_intercept
Change-Id: I747b90e1f521b08266cfc021ef4b23e2e3c7ba4c
2018-09-04 19:52:13 +09:00
c60a778c8d uti: Zero-clear struct mckernel_exec_file before initialization
Change-Id: I315008b7f5c9e66a93b80da87d1a6332d717c2aa
2018-09-04 19:52:13 +09:00
25a129ea6a uti: Disable jumping to McKernel futex code 2018-09-04 19:52:13 +09:00
8e9924c523 uti: Lock per_thread_data_hash_lock in mcctrl_put_per_proc_data() 2018-09-04 19:52:13 +09:00
c71291a429 mcctrl: Add mcexec_terminate_thread_unsafe()
Change-Id: I6ca54cdac2ab9449d40b22f7329f1a215e5aa33b
2018-09-04 19:52:13 +09:00
ba93b83d68 uti: Add __user to mcexec_terminate_thread argument
Change-Id: Ic96a91e6a892a1bd2f1d333580e28bced6a40dc0
2018-09-04 19:52:13 +09:00
c2f41ca9ad uti: Replace hand-made list of host_threads with Linux macro
Change-Id: Ib46cc9fcdd2854b7bbe21c2cc885beeb22d16dd2
2018-09-04 19:52:13 +09:00
062d7ecae3 uti: Use copy_from_user() in mcexec_terminate_thread() 2018-09-04 19:52:12 +09:00
58d038fcac uti: Fix wrong argument passed to ihk_ikc_release_packet() in mcexec_terminate_thread() 2018-09-04 19:52:12 +09:00
510310342c uti: Use fresh struct syscall_request instance when replying to syscall_backward() 2018-09-04 19:52:12 +09:00
a6198f267b uti: Offload set_robust_list to McKernel 2018-09-04 19:52:12 +09:00
5e78bd85ab uti: Fix tracer exit code for the case when create_tracer() isn't called 2018-09-04 19:52:12 +09:00
85c0c8a01f uti: Add debug messages for syscall
Change-Id: I2f96e71d5384f883f7dc568122c57d92bc1cd818
2018-09-04 19:52:12 +09:00
e29f579061 uti: Prevent user space vma from getting copied when forking 2018-09-04 19:52:12 +09:00
63703589e5 uti: Clear user space PTEs after first fork in create_tracer()
Change-Id: I60755f0cb5e84c3a5a5cd91515411a30f0995822
2018-09-04 19:52:12 +09:00
5c8c1986b5 uti: Add comment on ppd life cycle
Change-Id: Id16cf036b2d919444e8634b536fd701d996bcef2
2018-09-04 19:52:12 +09:00
e4370d235c uti: Make tracer not call mcexec_terminate_thread() when tracee is killed by signal
Change-Id: I5878c7d623ce182a7cb9578c9d5c430c1bee8e1e
2018-09-04 19:52:12 +09:00
31ac007cb5 uti: Increase CPU_HZ to 1000
Change-Id: I8619263845fd8ebabe6fc7de619a5b51ac04470a
2018-09-04 19:52:11 +09:00
56da7e2de9 uti: Allocate memory area directly to uti_desc->wp
Change-Id: Ia5a1dbf56b937d9d05cd7fa1c5eec4a5b4b7b196
2018-09-04 19:52:11 +09:00
35300e7b4f uti: Create tracer when forking
Change-Id: Ic66cf6289ac6f32a884ba1266e641ce61620a239
2018-09-04 19:52:11 +09:00
439dc0928b uti: Streamline syscall_backward() 2018-09-04 19:52:11 +09:00
4b3e58fd3d uti: Call terminate only when exit_group is called
Tracer tells McKernel side to call do_exit() in WIFSIGNALED case.

Change-Id: If85c6cbb4856036b406b11335f1384e57f26292d
2018-09-04 19:52:11 +09:00
b7cdbd6c42 uti: Enforce mcexec is destroyed and then McKernel process is destroyed 2018-09-04 19:52:11 +09:00
77f5cac2bf uti: Make tracer exit when not used
Change-Id: I3d3b2f92fa2b160ffce633c46d1b60e9079e7f1b
2018-09-04 19:52:11 +09:00
9102b176c4 uti: Make per_proc_data of tracee survive over the signal-kill of the tracee
Change-Id: I8ff1dddb526ef2fd948cfe1b8f3aa8403c2006d6
2018-09-04 19:52:11 +09:00
bb4317beaf uti: futex: Propagate -ERESTARTSYS returned by wait_event_interruptible()
Change-Id: Id36c4df0e0a8e1f64b12c635c0502f63552ba50b
2018-09-04 19:52:11 +09:00
d24b7585b7 uti: Make tracee pthread-detached
Change-Id: I672ee18739b956980901b63e55ee3ebc192b4e56
2018-09-04 19:52:11 +09:00
4438f994dc uti: Add/Modify test programs
Change-Id: I27a39d6b11af5243f93d07c31c2ef80f6727dd53
2018-09-04 19:52:11 +09:00
52afbbbc98 uti: Call into McKernel futex()
(1) Masquerade clv
(2) Fix timeout
(3) Let mcexec thread with the same tid as McKernel thread migrating
    to Linux handles the migration request
(4) Call create_tracer() before creating proxy related objects

Change-Id: I6b2689b70db49827f10aa7d5a4c581aa81319b55
2018-09-04 19:52:10 +09:00
460917c4a0 remote_page_fault,syscall_backward: Zero-clear waitq entry
Change-Id: I151a35004183e911aaba766a8749830e1768bfe6
2018-09-04 19:52:10 +09:00
7803468afe remote_page_fault,syscall_backward: Retry when interrupted by signal
Change-Id: Ic7d72ad9ca32bb3c8e3522e00fef1d98caf3c049
2018-09-04 19:52:10 +09:00
8f2c7d2265 Fix thread-safety issue in rus_vm_fault
Change-Id: I8640a8e0de8a0dfaee700b25e5f9e2941ac98fc8
2018-09-04 19:52:10 +09:00
c6c3a84a46 syscall: Add missing definition of thread to access thread->sigpending 2018-09-04 19:52:10 +09:00
5a7ca14fcc rus_vm_fault: Return VM_FAULT_SIGBUS when per-process data is not found 2018-09-04 19:52:10 +09:00
d7b882855a Correct comments in declaration of struct ikc_scd_packet 2018-09-04 19:52:10 +09:00
2337832e4c pager_req_release(): Correct debug messages 2018-09-04 19:52:10 +09:00
be635ceb19 terminate: Fix coutning of non-leader threads
Change-Id: I8399ad553bb8e09bef508ac976e8cd56cdae8013
2018-09-04 19:51:11 +09:00
0b0b7b03d7 Prevent one CPU from getting chosen by concurrent forks
One CPU could be chosen by concurrent forks because CPU selection and
runq addition are not done atomicly. So this fix makes the two steps
atomic.

Change-Id: Ib6b75ad655789385d13207e0a47fa4717dec854a
2018-09-04 19:51:11 +09:00
82914c6a2e remote_page_fault: Retry when interrupted
Change-Id: Ib71a87ad03420e1918dc97da43351cb93e7d0754
2018-09-04 19:51:11 +09:00
f127dfdf1e mcexec_create_per_process_data: Zero ppd on allocation
Change-Id: I06306f30ce30ad6ddc6e8b8cab46ee39be0e4940
2018-09-04 19:51:11 +09:00
567dcd3846 Fix deadlock involving mmap_sem and memory_range_lock
Change-Id: I187246271163e708af6542c057d0a8dfde5b211e
Fujitsu: TEMP_FIX_1
Refs: #986
2018-09-04 19:51:10 +09:00
b080e0f301 spinlock: Add trylock
Change-Id: If349d7c0065609615f5df229f70c59f92bf97adf
2018-09-04 19:51:10 +09:00
ff383d96ba spinlock: rewrite spinlock to use Linux ticket head/tail format
This is a cherry-pick of 2964302d094f035242d6257d8af5450f72f9b5a7.

Change-Id: Ie8b7e825b28415dd41cc232fbeceb4653251f9e3
2018-09-04 19:51:10 +09:00
0bcd3d5de3 unimap: update ihk to unimap
Change-Id: I5b23270f9253d26031ad90bb38721a6234bd98e1
2018-09-04 19:51:10 +09:00
9d6e0319f7 atobytes(): restore postfix before return 2018-09-04 19:51:10 +09:00
0e50eb44a9 process/vm/access_ok: fix edge checks.
Add check for start/end being larger than the range we're checking.
Fix corner case where the access_check() was done on last vm range, and
we would be looking beyond last element (null deref)
2018-09-04 19:51:10 +09:00
2db69d0f24 process/vm: implement access_ok() 2018-09-04 19:51:10 +09:00
a697f5e98d partitioned execution: pass process rank to LWK
Cherry-pick of d2d134d5e6a4b16a34d55d31b14614a2a91ecf47

Conflicts:
	kernel/include/process.h
2018-09-04 19:51:10 +09:00
4439b04d9f ihk_mc_get_linux_kernel_pgt(): add declaration
Cherry-pick of caff967a442907dd75f8cd878b9f2ea7608c77b2
2018-09-04 19:51:10 +09:00
38c3b2358a Exclude areas not assigned to Mckernel from direct map of all phys. memory
It's enabled by adding -s to mcreboot.sh.

Cherry-pick of the following commit:

commit b5c13ce51a5a4926c2cf11c817cd0d369ac4402d
Author: Katsuya Horigome <katsuya.horigome.rj@ps.hitachi-solutions.com>
Date:   Mon Nov 20 09:40:41 2017 +0900

    Include measures to prevent memory destruction on Linux side (This is rebase commit for merging to development+hfi)
2018-09-04 19:51:10 +09:00
221ce34da2 eclair: fix MAP_KERNEL_START and apply Fujitsu's proposals
(1) Cherry-pick of 644afd8b45fc253ad7b90849e99aae354bac5b17
(2) Pass length to functions with arguments of variable length
    * POSTK_DEBUG_ARCH_DEP_38
(3) Separate architecture dependent functions/structures
    * POSTK_DEBUG_ARCH_DEP_34
(4) Fix include path
    * POSTK_DEBUG_ARCH_DEP_76
(5) Include config.h
    * POSTK_DEBUG_ARCH_DEP_33
2018-09-04 19:51:09 +09:00
4246d41007 kmalloc_header: use signed integer for target CPU id
Cherry-pick of bdb2d4d8fa94f9c0268cdfdb21af1a2a5c2bcae5
2018-09-04 19:51:09 +09:00
65df9c8084 ihk_mc_get_processor_id(): return -1 for non-McKernel CPUs
Cherry-pick of c45641e97add9fde467844d9272f2626cf4317de
2018-09-04 19:51:09 +09:00
7836aa0136 Map LWK TEXT to the end of Linux modules section (0xFFFFFFFFFE800000) 2018-09-04 19:51:09 +09:00
1cf7fad15a virt_to_phys(): fix debug messages
Cherry-pick of 46eb3b73dac75b28ead62476f017ad0f29ec4b0a
2018-09-04 19:51:09 +09:00
0076e1f5e0 mem: make McKernel kernel heap virtual addresses Linux compatible
Cherry-pick of e5334c646d2dc6fb11d419918d8139a0de583fde
2018-09-04 19:51:09 +09:00
cae6b9f154 move McKernel out of Linux kernel virtual 2018-09-04 19:51:09 +09:00
5fcbfa2eb5 page_fault_process_memory_range: Remove ihk_mc_map_virtual for CoW of device map
Device map with MAP_PRIVATE is copied when forking using copy_user_pte.
So the map isn't copied by those statements.

Futjitsu: POSTK_TEMP_FIX_14
Refs: #1039
Change-Id: I1a697ed2e003055d66a8eebd3e8d5e9e49d094ad
2018-08-30 02:21:42 +00:00
9a20cfaefb mem: Check if phys-mem is within the range of McKernel memory
Fujitsu: POSTK_DEBUG_TEMP_FIX_52
Refs: #1164
Change-Id: Idb9a6eac1d2e1df4c663c3171925c774421177fd
2018-08-30 02:18:37 +00:00
f57b0c5d4f wait: Delay wake-up parent within switch context
Fujitsu: POSTK_DEBUG_TEMP_FIX_41
Refs: #1006
Change-Id: Ia98e896505ad0f6549766604ade84550eee8bd2d
2018-08-30 02:13:51 +00:00
0fdeb254b3 switch context: Move to arch-dependent (arch_switch_context())
Fujitsu: POSTK_DEBUG_ARCH_DEP_22
Change-Id: I6faf8d9daa1e639350c2cd83db9bb27b9d37ba01
2018-08-30 02:13:34 +00:00
895a8c4099 procfs: Support multiple reads of e.g. /proc/*/maps
Refs: #1021
Change-Id: If36e1a0f3f41f0215868daf578e96775d96a59a3
2018-08-30 01:48:06 +00:00
e531ee626e mcctrl pager: handle pagers more properly
the pagers are all destroyed when linux thinks there is no process left,
but there is no synchronisation with mcexec on that and some new process
might have spawned and started using these pagers in the meantime,
leading to weird crashes because an invalid pager was used.

The reason we're cleaning up pagers when no process is left is that
mcctrl does not handle pager_req_release is the linux-side process got
killed or died before the mckernel one for some reason, so:
 - move pager_req_release to a new __do_in_kernel_irq_syscall() helper
 - have free_all_process_memory_range not set MF_HOST_RELEASED on the
memobj
 - just in case, clean up everything like before on mcctrl shutdown
instead of when no process is left.

Change-Id: I53b8b9b81b1e5b807593850af17b5ea5e8471174
Refs: #1154
2018-08-24 09:18:20 +09:00
94d093f058 fileobj_create: Suppress message on getting -ESRCH
-ESRCH from mcctrl doesn't mean an error but the file is not a regular
file and mcctrl wants McKernel to treat it as a device file.

Change-Id: Ie121f0e6a8b1f0a29c2f2cf193a51f4f52337809
2018-08-23 04:01:20 +00:00
9b8424523a mcctrl: remove rus page cache
Change-Id: Ieed7a2a0077ffde3fec8a64d2051e56a53924a42
2018-08-23 02:10:44 +00:00
ebc702624b devobj: fix object size (POSTK_DEBUG_TEMP_FIX_36)
Fujitsu: POSTK_DEBUG_TEMP_FIX_36
Change-Id: I5f020708f97b7468f19496b44c98e164d856598d
2018-08-22 07:26:50 +00:00
ea125cb58c checkpatch: remove warning on LINUX_KERNEL_VERSION and split strings
Change-Id: Ia22f3106208c6ddf46a767e142b8842373e9d6b5
2018-08-22 07:14:48 +00:00
689a799bb9 mcctrl prepare_image: return reserve_user_space error
Change-Id: I00556cb58b12acca888f9512c144a3ce3f5332b1
2018-08-22 07:14:40 +00:00
802b1ac14b ihk_os_getperfevent,setperfevent: Timeout IKC sent by mcctrl
Report timeout when McKernel doesn't respond to prevent the caller
from waiting forever.

Refs: #1167
Change-Id: I8bd87e43aafffdd0952198224e44195af4368883
2018-08-22 06:43:27 +00:00
affe3e9010 do_fork: Increase tid table size when allowing oversubscription
The size of tid table needs to be more than #CPUs when CPU oversubscription
is needed.

Note that the max number of simultaneous threads are the min of the
following two:
(1) Number of mcexec worker threads
(2) NR_TID defined in kernel/syscall.c

Change-Id: I425189da415e1d3a763ad62567950d001850cf0d
2018-08-22 06:42:13 +00:00
0b2169964a futex_wait_queue_me: Spin-sleep when timeout and idle_halt is specified
schedule_timeout() with idle_halt should use spin sleep because sleep
with timeout is not implemented.

Change-Id: Ia0bebcc10ddfb872bffeece7f13fb35a4791db18
2018-08-22 06:36:43 +00:00
f18d1f5383 __sched_wakeup_thread: Notify interrupt_exit() of re-schedule
Change-Id: I438eb168f818eb5649857e22bdc7e68a145872f7
2018-08-22 06:33:23 +00:00
ea35954613 linux side: replace vfs_read by kernel_read
vfs_read has been unexported in bd8df82be66 ("fs: unexport vfs_read and vfs_write")
in kernel 4.14.
kernel_read has always™ existed and is actually more appropriate: we can
remove the set_fs calls that are done in kernel_read.

The downside is that the function prototype also changed in 4.14 with
bdd1d2d3d251 ("fs: fix kernel_read prototype")...
(same with kernel_write e13ec939e96b ("fs: fix kernel_write prototype"))

Change-Id: I6f76a6387ae02b4d33bd62952d995a90b1952fc9
2018-08-22 06:27:12 +00:00
61a942acdc arm64 vdso/gettimeofday: add new includes for cpu_set_t and pte_t
Change-Id: I4035b179a173a6b29c34c73670d68a38d4dc5dc4
2018-08-22 06:17:56 +00:00
c4b4b7222e arm64: ihk_mc_perfctr_start/stop: fix prototype that was changed in x86
The functions now take a bitmask in argument since commit d7416c6f79
("perf_event: Specify counter by bit_mask on start/stop")...
Thanksfully the change also induced a type modification so it was easy
to notice.

(On the other hand I'm building with --disable-perf so why the hell is
that file compiled?!)

Change-Id: Ie16367cc94e81068b70e1b80142a6394de896c4f
2018-08-22 06:14:15 +00:00
21af0351d1 arm64 syscall.c needs uio.h for struct iovec
Change-Id: I9d070d0e148636be1d9ecec8ec4dfb72f93c4ed6
2018-08-22 06:08:27 +00:00
1e1c91962e mcctrl: add missing sched_param include for newer linux
struct sched_param is defined differently since headers changed in
linux ae7e81c07 ("sched/headers...")

Change-Id: I22af79bf3d9df69d09903b2830d99426309cf911
2018-08-22 06:04:35 +00:00
b1aa94d417 arm64 arch-perfctr.h: remove duplicate enums
Some enums were redefined in lib/include/mc_perf_event.h in commit
1284060 ("support PERF_TYPE_{HARDWARE|HWCACHE} in perf_event_open")

Change-Id: I1a98699955ca7fd6135b2a7dde72ed4df77b1974
2018-08-22 06:04:08 +00:00
a6a9bac5b7 Protect more code by #ifdef PERF_ENABLE
Change-Id: I20a67c56c4d7817fdb87cc6a2aa47d68fe3eae8d
2018-08-22 06:03:12 +00:00
240a23a21b arch-lock: tentative implementation of irqflags_can_interrupt for arm64
Change-Id: I814e02e757039cab8c142c0b774ad470154454c1
2018-08-22 06:02:06 +00:00
d5108dba80 arm64 eclair build: add missing explicit libs
Change-Id: I5b6f8825430c2d495da50d868a3f54fc0b354d84
2018-08-22 05:56:20 +00:00
20368dd317 syscall: move sync_child_event up a bit
The function was between two perf functions when perf functions don't
use it...
It seemed simpler to move the function than to add an extra ifdef

Use that occasion to fix style warnings, no actual code changes were
made.

Change-Id: Ie8b5fa7968a3d5e54a690d079874db54f5e6c8c9
2018-08-22 05:55:26 +00:00
b93e14f695 arm64 signal.h: add valid_signal() function
This function was added for x86 by commit 140f813d77 ("fix:
differences in behavior of sigaction between Linux and Mckernel")

The x86 and arm files are actually pretty close and could use
factoring...

Change-Id: Ia8820fd2f824d898610b384a3e137c96aadbc911
2018-08-22 05:54:31 +00:00
3e3f3c5590 mcoverlayfs: vfs_readdir -> iterate_dir compat for el7.5
Also enable mcoverlay for new kernel version / actually build it

Change-Id: I80bc043c65cf99c3b41a54a5666ea7652e6c2bbd
2018-08-09 04:30:24 +00:00
e8f8660b73 mcctrl: lookup unexported symbols at runtime
Instead of parsing System.map, use kallsyms_lookup_name() to
get unexported symbols addresses at module loading time.

This lets mckernel work with kaslr enabled (it gets enabled by
default from el7.5 onwards)

Change-Id: Ie4349fc1145ebce44f37f1f40c16f9d75584074d
2018-08-08 06:00:20 +00:00
794684985f mcctrl syscall: remove unused walk page debug function
This saves looking up one symbol for a debug function that is not
used anywhere

Change-Id: I6a3a480ce8067b4f6f0faf9aa837119ea46888ad
2018-08-08 05:57:46 +00:00
625607e6db mcctrl sysfs_files: cleanup vfs_readdir -> iterate_dir compat
Cleanup the fix suggested by Fujitsu a bit

Change-Id: I95165b834e32a01f43eb3b4fcaca039e4d04fe86
2018-08-08 05:41:04 +00:00
05afa8b6dd mcctrl sysfs_files: vfs_readdir -> iterate_dir compat
vfs_readdir got removed in recent kernels

Change-Id: Iac9a9954afefa0f6dbcdc2c94786cf747e21e1fe
Fujitsu: POSTK_DEBUG_TEMP_FIX_22
2018-08-08 05:39:07 +00:00
6cf89076dc mcctrl handle_mm_fault compat: add el7.5 support
Change-Id: I8c7738b70ca914e857be119b7720cdc22e61ae0e
2018-08-08 05:36:35 +00:00
29a658716b configure: Create config file for test programs
Change-Id: I3ec90fed348ff535b24c8116416c6b89636c532c
2018-08-02 02:29:19 +00:00
a7c9988aeb schedule: Don't reschedule immediately when wake up on migrate
Refs: #1027
Change-Id: Ibe563c45c42611170273f1e437566c20fbef68d3
2018-08-02 02:28:25 +00:00
d4fa953975 test: Add testcase for #1001
Refs: #1001
Change-Id: I3edd750108bd3f887af1f0afe3f2651f1243062b
2018-08-02 02:24:41 +00:00
786649d2a3 perf_event: Move changing monitoring-status into perf_stop
Change-Id: I84a13c2a825de24bfdada533c7049e8770a07061
2018-08-02 02:23:38 +00:00
d7416c6f79 perf_event: Specify counter by bit_mask on start/stop
Fujitsu: POSTK_DEBUG_TEMP_FIX_30
Refs: #1002
Change-Id: Iea51e9aef78927a5033e3a226d5efc6298da056a
2018-08-02 11:22:28 +09:00
cb1522ca92 perf_event: Handle fixed-pmc in arch-dep part
Fujitsu: POSTK_DEBUG_TEMP_FIX_31
Refs: #1003
Change-Id: I66c7d18b9137894cf5764464482e2ebd5ecb9d52
2018-08-02 02:14:04 +00:00
14660a10c3 Fix to procfs read returns EIO
Refs: #1152
Change-Id: I48b330953fd7674ba1a3ac35744f9f50a5712730
2018-08-02 01:48:51 +00:00
1387c9687b Add test cases for #765
Refs: #765
Change-Id: I50d70a15d5d5ce31227cacbed4eccd49b218713b
2018-08-02 01:42:46 +00:00
ec99adde4a Add test cases for #998 and #999
Refs: #998 #999
Change-Id: I86f8857594b2446c833c1e59d53b484ef022a9ee
2018-08-02 01:42:11 +00:00
c716e87c53 execve: Clear sigaltstack and fp_regs
Fujitsu: POSTK_DEBUG_TEMP_FIX_19
Refs: #976
Change-Id: I16895eab13eecbb47b7e6da961fae82ee5e570ee
2018-08-01 15:11:05 +09:00
d898f18293 mcexec: Do not close fd returned to mckernel side
Fixes: 9a79920ef9 ("Static analysis fixes")
Change-Id: I2b51d6e288e7bb2b0f4bff579fa237d575dcb026
Reported-by: Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com>
2018-07-30 23:27:17 +00:00
bc0759e2dc arm64 arch-lock: add missing include for cpu_set
Probably only needed for recent system, see ihk's 3271b5e6 ("fix
compilation with recent glibc (cpu_set define change)")

The root of the problem really is that we rely on system headers for
mckernel that ought to be independent...

Change-Id: Ieb9a017e5a7697ad767087370ced7b615efc917e
2018-07-27 02:33:03 +00:00
1aa429d4f5 init_normal_area: fix warnings
- unused variable pt_phys
 - undeclared function set_pt_large_page (move definition lower)

Change-Id: I4625b70efe8e914160b17064078c42b86a461d3e
2018-07-27 02:32:23 +00:00
1543119139 mcctrl rus_vm_fault: tpe changed with kernel >= 4.11
vma is part of vmf and isn't needed, so type changed (see linux 11bac80
("mm, fs: reduce fault, [...] to take only vmf"))

Change-Id: I4c023e23c7e7416ad2df2dcc0698a0032e574e4c
2018-07-27 02:31:39 +00:00
0a0a78ac2e mcctrl: replace GFP_TEMPORARY by GFP_KERNEL
See linux's commit 0ee931c4 ("mm: treewide: remove GFP_TEMPORARY
allocation flag") for a long explanation, but basically that flag
"is just cargo cult" and should be removed

Change-Id: I2147cd65b6b9ec509a72e11cc3abf1fe1561c10b
2018-07-27 02:31:00 +00:00
6999d0a3f9 bind_mount_recursive: Use lstat instead of d_type of readdir
Change-Id: I0eb8d6c7e1fa5df6dbc5962a639901546a159d04
2018-07-26 18:38:48 +09:00
f01a883971 devobj: fix out of bounds shift
Similarily, pgoff << PAGE_SHIFT would need pgoff to be unsigned to fit,
but off_t is signed.
The reason for this shift was to truncate the offset argument to be
aligned to page boundaries, do that instead

Change-Id: I36c3de34b1834fdb0503942a6f3212e94986effd
2018-07-26 05:20:19 +00:00
3185334c1c debug messages: implement dynamic debug
Heavily inspired off linux kernel's dynamic debug:
 * add a /sys/kernel/debug/dynamic_debug/control file
 (accessible from linux side in /sys/class/mcos/mcos0/sys/kernel/debug/dynamic_debug/control)
 * read from file to list debug statements (currently limited to 4k in size)
 * write to file with '[file foo ][func bar ][line [x][-[y]]] [+-]p' to change values

Side effects:
 * reindented all linker scripts, there is a new __verbose section
 * added string function strpbrk

Change-Id: I36d7707274dcc3ecaf200075a31a2f0f76021059
2018-07-26 14:16:31 +09:00
bc887aab44 x86 futex: fix out of bounds shift
8 << 28 needs unsigned to fit, other shifts were done to truncate
the input, use a mask instead

Change-Id: I81ba41595f4629f1df554e34392116440ff3b641
2018-07-26 05:10:36 +00:00
6f7c428a34 terminate: fix oversubscribe hang when waiting for other threads on same CPU to die
Change-Id: I8c4fbdd3aab9d0567ce5457a4a6405490608925d
2018-07-26 05:02:13 +00:00
68c702d024 process_procfs_request: Add Pid to /proc/<PID>/status
The standard UNIX tool to get processes information, need to have the
process id inside /proc/<PID>/status.

Using ps without PID in /proc/<PID>/status gives :

  PID TTY          TIME CMD
 2551 pts/0    00:00:00 bash
    0 pts/0    00:00:00 exe
    0 pts/0    00:00:00 exe

With this patch:
  PID TTY          TIME CMD
 2551 pts/0    00:00:00 bash
11966 pts/0    00:00:00 exe
12619 pts/0    00:00:00 exe

Change-Id: Ic9d255cbef4d49e49bdaedcfc8e3545d9c144325
2018-07-26 05:00:21 +00:00
97273adcc5 x86_64 move_pages_smp_handler: rework initialisation
- add missing break statement
- remove duplicate memset for mpsr->status

Change-Id: I1fd1a8b2bb7bbabb32db9e7d3fc84102d9b0ff82
2018-07-26 04:59:23 +00:00
ad2cb6375a kprintf: only call eventfd() if it is safe to interrupt
Missing ARM64 implementation, cannot test right now

Change-Id: Ia05e8b7952b19bcd8fdac1f920d9bfe341be8b97
2018-07-26 04:57:30 +00:00
6df4bd8f8c Fix a few more warnings
Some are important, e.g. the seemingly harmless braces around if with dprintf,
since that dprintf is defined as empty, will screw things up and grab the next
line

Change-Id: Ie5e1cf813178ad708ff42ae5e477fbc96034471c
2018-07-26 04:52:17 +00:00
0994c3300e search_free_space: remove POSTK_DEBUG_ARCH_DEP_27 side
search_free_space changed since this was implemented and the code is
no longer compatible
Looking at it again, the function is not used anywhere other than syscall.c
and the second function does not seem to fix anything specific so this
just removes the untested side.

Change-Id: If28d35ec4da083a40dc6936fcb21f05fb64e378a
Fujitsu: POSTK_DEBUG_ARCH_DEP_27
2018-07-26 04:43:05 +00:00
a5c3e48843 search_free_space(): manage region->map_end internally
Change-Id: If9176773868c44fa1eb801c0815c35cea9f4b54b
2018-07-26 04:43:05 +00:00
df2c993721 fileobj_create: only allocate new object if one wasn't found
Change-Id: I5e12439333bf0c9cc7dad6e3cf410bfee616f77e
2018-07-26 04:41:03 +00:00
dc8d6b740c pager_req_read: handle short read
Change-Id: Iff89046041e012a65c80a29b485ddbb636435dd0
2018-07-26 04:37:54 +00:00
c2e1b8d694 mcctrl_ikc_send_wait: fix interrupt with do_frees == NULL
do_frees is allowed to be NULL only if free_addrs_count is 0, but that
is increased to account for the wakeup_desc itself before this failure

Change-Id: Iab33712c76ae452df7044558a12745a89adb47ac
2018-07-26 04:34:03 +00:00
f6d8138e05 mcexec_wait_syscall: requeue potential request on interrupted wait
Change-Id: Id7a324f18ebb8c81f05bd8362e19d9314a445308
2018-07-26 04:31:34 +00:00
9d587dcbe8 fileobj_release: do not notify linux of surplus refs
Surplus refs on the linux side will not change anything, so spare
ourselves a message.
The final message will free all refs at once when the object is
destroyed.

Change-Id: Ie086b9dda663729962037c67e8233370509234a5
2018-07-26 04:08:43 +00:00
eb675818c7 x86 mmap: fix out of bounds shift
0x3F << MAP_HUGE_SHIFT is too big to fit in signed int,
make it unsigned

Change-Id: I0e476b80ff51a8e141c90da6f985ba18a3438752
2018-07-26 03:50:44 +00:00
3ce7763715 x86 mem init: do not map identity mapping
init_normal_area was mapping identity lookups (phys = virt) from 0,
leading to many undetected null pointer dereferences in init_pt (but
not in new process page tables leading to odd behaviour)

This also makes the code use the set_pt_large_page() function, cleaning
it up a bit

Change-Id: I22889031de26a7e48501b0eb4d453ca62e671835
2018-07-26 03:50:44 +00:00
fd429ecc5b rusage_private: fix null pointer dereference
Change-Id: Id1f066699a41c249203073c5937e34012f5fe6c3
2018-07-26 03:50:44 +00:00
ed7f5abc28 schedule: fix null pointer dereferences
Change-Id: I1d4b0a2fabb5810a89cca4c6a0a837db3a9813ee
2018-07-26 03:50:44 +00:00
79e5026f01 x86 mem init: fix clearing of init_pt
memset(init_pt...) had the wrong size.

Change-Id: Idb5d0d53b3c70ee4a16a101dd265d0854cfd3b72
2018-07-26 03:50:31 +00:00
a1b50051ed mcexec: always compile debug statements
This helps catching errors like accessing a field that no longer exists
in a debug print that wasn't compiled...

Change-Id: If6c862ea2b866f819195aae93c7fd68e610fe48e
2018-07-26 03:38:00 +00:00
9a79920ef9 Static analysis fixes
Change-Id: I7bc42545a1c497f704d7bfa6ea1b7e3893acc697
2018-07-26 03:36:50 +00:00
141fa5120e git hooks: use correct directory for submodule
Change-Id: I7a39021dc02212065612b21cafcb6c653e2280f0
2018-07-26 03:29:43 +00:00
699cb4f88c arm64/arch-lock: typedef mcs_lock_t
Was done in x86_64 for fileobj in commit 249bda4aef ("fileobj: use
MCS locks for per-file page hash")

Change-Id: I61957de336b6657687803e6288afed9360a42032
2018-07-26 03:28:40 +00:00
bc3e6ded65 disable sse for everyone
GCC optimizes big switches with sse so we could clobber users floating
point registers when they would do a syscall

Reproducer:
```
 #include <stdio.h>
 #include <stdlib.h>

 union num {
 	float f;
 	unsigned long long i;
 };

 #define WORKSIZE (1024 * 1024 * 32)

 int main(int argc, char **argv) {
 	char *work = malloc(WORKSIZE);
 	char *fromaddr;
 	char sink;
 	union num r;
 	unsigned long long int offset;

 	r.f = drand48();
 	printf("r: %llx\n", (long long)r.i);
 	offset = (long long int)(r.f * (double)WORKSIZE);
 	fromaddr = work + offset;
 	printf("%e %llx %llx\n", r.f, offset, fromaddr);
 	sink = *fromaddr;

 	return 0;
 }
```

Change-Id: I7bb0883ec8ef2f245ab98064e308025422afc115
2018-07-26 03:26:25 +00:00
eae5c40f60 init_process_stack: Support "ulimit -s unlimited"
Refs: #1109
Change-Id: I395f012fd747cb6a2f93be71e34c7f6f3666ed67
2018-07-26 02:40:27 +00:00
0c7384f980 Add test cases for #840
Refs: #840
Change-Id: Ie29867d29ba6a25cfac77b95b8effc2f057aae14
2018-07-26 02:39:24 +00:00
67ebcca74d Fix to VMAP virtual address leak
Fujitsu: POSTK_DEBUG_TEMP_FIX_51
Refs: #1024
Change-Id: I1692ee4f004cb4d1f725baf47a8ed31fce1bf42a
2018-07-26 02:17:55 +00:00
3d365b0d7a add ihk as submodule
Change-Id: I512255a96d0d95795bd0d803289fffe4394eb7ec
2018-07-26 01:50:48 +00:00
94e96927a6 mremap: Do nothing when no size change and !MREMAP_FIXED
Behave in the same way as Linux which returns old_address when
old_size == new_size && !MREMAP_FIXED.

Refs: #1112
Change-Id: Ice1421a8a77f962d087de8475aa2cd40c59be5f7
2018-07-26 01:49:01 +00:00
3636c8e7e4 setrlimit: Check arguments in the same order as in Linux
(1) Check if rlim's address is valid
(2) Check if soft-limit does not exceed hard-limit

Fujitsu: POSTK_DEBUG_TEMP_FIX_3
Refs: #1050
Change-Id: I5bf1008ce172f9dff64ec89b1f97614926abaf13
2018-07-26 01:48:05 +00:00
b920da5103 execve: Use interp in shebang as is
Fujitsu: POSTK_DEBUG_TEMP_FIX_9
Refs: #995
Change-Id: I09751d13c4fecd68087d47815029c0b65e51f18a
2018-07-26 01:46:22 +00:00
f1a40a409f perf_event: Include list.h by itself
Fujitsu: POSTK_DEBUG_TEMP_FIX_32
Refs: #1004
Change-Id: I8670477cf498ac98df971f2c0288f335a989f675
2018-07-26 00:45:57 +00:00
4ce4c9f264 init_process: Inherit parent cpu_set
Fujitsu: POSTK_DEBUG_TEMP_FIX_69
Refs: #1028
Change-Id: I1628bb5bf35fa670bb0019e1f3ae295277b1566e
2018-07-26 00:44:41 +00:00
e770a22fa5 scripts: add checkpatch.pl & git hooks
Change-Id: I29e5f7a99e8dd92511c0b1d099f3e1a2f37d7a72
2018-07-12 00:55:58 +00:00
9bb8076dc0 shmget: Make shmobj underwent IPC_RMID invisible to shmget
Refs: #926
Change-Id: I16120623b581da5d5d484fd05d5111788c8ad5e2
2018-07-10 02:13:00 +00:00
229b041320 test: Add testcase for #1122
Refs: #1122
Change-Id: Ieafee7469d1397461abf05552ffad0bfea1dd6cd
2018-07-10 02:12:23 +00:00
e1f204de4a test: Add testcase for #1112
Refs: #1112
Change-Id: I0041366d8dcf035a09fbb59a5dbd5c94cae0d65e
2018-07-10 02:12:04 +00:00
c6cc0bf07a test: Add testcase for #1111
Refs: #1111
Change-Id: Ifdf25a9ce98ef495200daf1c24d7ac2c81b3ef17
2018-07-10 02:11:45 +00:00
04e54ead5d test: Add testcase for #1031
Refs: #1031
Change-Id: I6a51596b84a97329ba7d5b765c8471246dcf85df
2018-07-10 02:11:13 +00:00
992705d465 pager_get_path: Append \0 to path
Change-Id: Iaabd89a649bb20b37b35cd345da0f468fd5dd0b5
2018-07-10 02:10:19 +00:00
ae09d979b6 Add testcases for #1141
Refs: #1141
Change-Id: I50d1ac6248e9dfc33c372b825c10cf0bd8b61d3e
2018-07-10 02:09:38 +00:00
1cbe389879 do_fork: Propagate error code returned by mcexec
Refs: #731
Change-Id: I7eb52c1c76103d65d108b18b7beaf8041b51cd03
2018-07-03 09:19:54 +00:00
0758f6254e headers: declare void arguments for functions
Not giving any argument means that any argument is OK,
this is not what is meant here.

Change-Id: Ide651c1dec973d4b8709cf00646988f4c4f3acdd
2018-07-03 09:18:25 +00:00
db732a245c execve: Reinitialize vm_regions's map area on execve
Reinitialize vm->region.map_end in sys_execve()
in the same way as when creating a new process.

Change-Id: I7fc048a187e619ba4b5a578976e2a6774d13a6a7
2018-07-03 08:58:50 +00:00
08f2840f7d procfs: Show file names in /proc/<PID>/maps
Refs: #1065
Change-Id: I2f1603b02d12e60972c8f2e5f059d0025f4ceaea
2018-07-03 08:56:44 +00:00
521bdc6181 mremap: Fix type of size arguments (from ssize_t to size_t)
Refs: #1112
Change-Id: I3987d3a20a1e7c4b60f3880e91a670bc0bdc240f
2018-07-03 08:54:14 +00:00
e7b6a3472b sched_getaffinity: Check arguments in the same order as in Linux
(1) Check if size is large enough
(2) Check if size is positive

Fujitsu: POSTK_DEBUG_TEMP_FIX_5
Refs: #1121
Change-Id: I3e41720c89ef89294820f7f4fa8df1a69a7011b0
2018-07-03 08:53:30 +00:00
11756d96ef mmap, mremap: Check arguments in the same order as in Linux
Refs: #1137
Change-Id: I4fd2ac83b013a2741a3facce4dd7e0c37b14fd25
2018-07-03 08:41:30 +00:00
f185be06eb mcoverlay-create.sh, mcoverlay-destroy.sh: Return -EINVAL on failure
Change-Id: I0561df33e8068327bf2d921c8facac7b18ac8866
2018-07-03 05:19:55 +00:00
854bc85602 mcctrl: convert send_signal to mcctrl_ihk_send_wait
Change-Id: Ibd2fc834444d83341a96579f0c9c22080a53e8fa
2018-07-02 16:11:01 +09:00
ab8fe0bbbf mcctrl: convert perf ctrl ioctls to mcctrl_ihk_send_wait
While we are here, also optimize code a bit: perf_desc does not need
to be allocated for every cpu; and fix coding style.

Change-Id: Iad19fed08205d38594fd3f1b7ddf2b19a9cf0d9d
2018-07-02 16:11:01 +09:00
b87c06cbcb mcctrl_ikc_send_wait: give possibility to use pre-allocated desc
Change-Id: I1afbabe792648bbf2c5a9a38ebbfba8ea9060d06
2018-07-02 16:11:01 +09:00
b939ca9370 mcctrl: refactor prepare_image into new generic ikc send&wait
Many ikc messages expecting a reply use wait_event_interruptible
incorrectly, freeing memory that could still be used on the other side.

This commit implements a generic ikc send and wait helper that helps
with memory management and ownership properly:
 - if the message succeeds and a reply comes back normally, the memory
is freed by the caller as usual
 - if the wait fails (signal before the reply comes or timeout) then the
memory is set as owner by ikc and will be free when the reply comes back
later
 - if the reply never comes, the memory is freed at shutdown when
destroying ikc channels

Refs: #1076
Change-Id: I7f348d9029a6ad56ba9a50c836105ec39fa14943
2018-07-02 04:34:44 +00:00
ec202a1ca9 execve: fix execve with oversubscribing
Issue: #1072
Change-Id: I88446e075b60de3c94cad2a19a4731e58037ea63
2018-07-02 13:31:23 +09:00
d4471df94e execve: use thread variable instead of cpu_local_var(current)
This fixes crashes _without_ oversubscribing with a process doing
fork() execve() / wait() in a loop

Issue: #1132
Change-Id: I98531f4643ad6b6a8f750a1a3f05b9ff3ebfd50f
2018-07-02 04:28:23 +00:00
a6ac4acf40 rusage: Fix initialization of rusage->num_processors
Refs: #1064
Change-Id: I4c04127a766b9c71f726113b8b7d6416ff971bff
2018-06-28 11:24:47 +09:00
8ff754c466 test: delete garbage files 2018-06-21 13:50:40 +09:00
90dba00742 fix return value of sched_getaffinity (POSTK_DEBUG_TEMP_FIX_58) refs#1122
Change-Id: I3d7b9b74eec268dd49b703600ca56df1d2933bd9
2018-06-21 09:15:22 +09:00
86ae1380e4 configure.ac: Move man directory to share/man
Change-Id: Idaa5c0f61fbbe3bda4697bc59487f562e09ff2d6
2018-06-11 13:13:13 +09:00
9bb48186e6 add testcases for #732 #1065 #1102 2018-06-07 10:11:23 +09:00
139123dc12 move test programs 2018-06-07 10:08:48 +09:00
6602cf442c add test cases 2018-06-07 10:04:33 +09:00
f148863586 pager_req_map(): do not take mmap_sem if not needed 2018-06-07 07:17:41 +09:00
ec375da27a pager_req_create(): prefetch libiomp, libpthread and libc 2018-06-07 07:17:31 +09:00
c50e7c1029 prepare_process_ranges_args_envs(): fix saving cmdline 2018-06-07 07:17:21 +09:00
5f4dbb2c71 mprotect: Fix early exit condition on page table attribute 2018-06-06 01:39:44 +09:00
328609269b Clean up "Detect hang of McKernel in mcexec"
* Clean up error checks
2018-06-01 14:51:07 +09:00
056fdb2633 Fix "Detect hang of McKernel in mcexec"
1. Call exit() when detecting hang
2. Clean up error checks
2018-06-01 14:21:19 +09:00
09d0a59e22 Detect hang of McKernel in mcexec
mcexec spawns a thread which detects hang of McKernel by using
ihk_os_get_eventfd().

Change-Id: I6cf0ee0c1f0c2c31a8422224b2105f64a9b9ab93
2018-06-01 10:44:34 +09:00
511555c8cb fix: /proc/<PID>/maps outputs a unnecessary NULL character 2018-05-30 16:38:28 +09:00
81699345cc mprotect: do not set page table writable for cow pages
Change-Id: If8b0bb56e7dae59aa9dc3d745a4cc4e43bf4bf9a
2018-05-30 13:29:55 +09:00
130751ff66 fileobj: avoid memory leak in path recording 2018-05-14 17:46:52 +09:00
f3d18eb9de fileobj/devobj: record path name (originally by Takagi-san) 2018-05-14 17:46:52 +09:00
249bda4aef fileobj: use MCS locks for per-file page hash 2018-05-14 17:46:52 +09:00
aaa246f86f mcexec: change debug printf macros to be more tolerant to trivial format
Enabling DEBUG fails to compile. It'd be easy to fix the dprintf to dprint
but this is just as generic and we can now use dprintf everywhere
2018-05-11 09:23:46 +09:00
c52f7a5b49 syscall wait4: add _WALL (POSTK_DEBUG_ARCH_DEP_44)
Needed by strace -f
2018-05-11 09:22:54 +09:00
90a34f54c9 mcreboot.sh,mcstop+release.sh: Disable irqbalance_mck forcefully 2018-04-26 15:06:53 +09:00
bfb5080b71 pager_req_unmap: Put per-process data at exit 2018-04-10 11:35:03 +09:00
2482 changed files with 206397 additions and 36795 deletions

40
.gitignore vendored
View File

@ -1,3 +1,4 @@
*~
*.o
*.elf
*.bin
@ -8,9 +9,36 @@
Module.symvers
*.order
.tmp_versions
elfboot/elfboot
elfboot/elfboot_test
linux/executer/mcexec
linux/mod_test*
linux/target
old_timestamp
CMakeFiles
CMakeCache.txt
Makefile
!test/*/*/Makefile
!test/signalonfork+wait/Makefile
!test/perf_overflow/Makefile
!test/*/*/*.cmd
Kbuild
cmake_install.cmake
config.h
mcstop+release.sh
mcreboot.sh
mcreboot.1
mcoverlay-destroy.sh
mcoverlay-create.sh
kernel/mckernel.img
kernel/include/swapfmt.h
executer/user/vmcore2mckdump
executer/user/ql_talker
executer/user/mcexec.1
executer/user/mcexec
executer/user/libsched_yield.so.1.0.0
executer/user/libsched_yield.so
executer/user/libmcexec.a
executer/user/libldump2mcdump.so
executer/user/eclair
tools/mcstat/mcstat
/_CPack_Packages
/CPackSourceConfig.cmake
CPackConfig.cmake
/build
mckernel-*.tar.gz

12
.gitmodules vendored Normal file
View File

@ -0,0 +1,12 @@
[submodule "ihk"]
path = ihk
url = https://github.com/ihkmckernel/ihk.git
[submodule "executer/user/lib/libdwarf/libdwarf"]
path = executer/user/lib/libdwarf/libdwarf
url = https://github.com/bgerofi/libdwarf.git
[submodule "executer/user/lib/syscall_intercept"]
path = executer/user/lib/syscall_intercept
url = https://github.com/ihkmckernel/syscall_intercept.git
[submodule "executer/user/lib/uti"]
path = executer/user/lib/uti
url = https://github.com/ihkmckernel/uti.git

361
CMakeLists.txt Normal file
View File

@ -0,0 +1,361 @@
cmake_minimum_required(VERSION 3.11)
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type: Debug Release..." FORCE)
endif (NOT CMAKE_BUILD_TYPE)
enable_language(C ASM)
project(mckernel C ASM)
set(MCKERNEL_VERSION "1.8.0")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
if(DEFINED SYSCONF_INSTALL_DIR)
set(CMAKE_INSTALL_SYSCONFDIR "${SYSCONF_INSTALL_DIR}")
endif()
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
set(BUILD_TARGET "smp-x86" CACHE STRING "Build target: smp-x86 | smp-arm64")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(BUILD_TARGET "smp-arm64" CACHE STRING "Build target: smp-x86 | smp-arm64")
endif()
if (BUILD_TARGET STREQUAL "smp-x86")
set(ARCH "x86_64")
elseif (BUILD_TARGET STREQUAL "smp-arm64")
set(ARCH "arm64")
endif()
include(GNUInstallDirs)
include(CMakeParseArguments)
include(Kbuild)
include(CheckCCompilerFlag)
include(AutoconfHelper)
CHECK_C_COMPILER_FLAG(-Wno-implicit-fallthrough IMPLICIT_FALLTHROUGH)
if(IMPLICIT_FALLTHROUGH)
set(EXTRA_WARNINGS "-Wno-implicit-fallthrough")
endif(IMPLICIT_FALLTHROUGH)
CHECK_C_COMPILER_FLAG(-Wno-stringop-truncation STRINGOP_TRUNCATION)
if(STRINGOP_TRUNCATION)
list(APPEND EXTRA_WARNINGS "-Wno-stringop-truncation")
endif(STRINGOP_TRUNCATION)
# build options
set(CFLAGS_WARNING "-Wall" "-Wextra" "-Wno-unused-parameter" "-Wno-sign-compare" "-Wno-unused-function" ${EXTRA_WARNINGS} CACHE STRING "Warning flags")
add_compile_options(${CFLAGS_WARNING})
option(ENABLE_WERROR "Enable -Werror" OFF)
if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
execute_process(COMMAND bash -c "ls -ld /proc/tofu/ 2>/dev/null | wc -l"
OUTPUT_VARIABLE PROC_TOFU OUTPUT_STRIP_TRAILING_WHITESPACE)
if(PROC_TOFU STREQUAL "1")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
else()
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
endif()
if(ENABLE_TOFU)
add_definitions(-DENABLE_TOFU)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_TOFU")
endif()
# when compiling on a compute-node
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2 | grep -o CN"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)
else()
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" OFF)
endif()
if(ENABLE_FUGAKU_HACKS)
add_definitions(-DENABLE_FUGAKU_HACKS)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_HACKS")
endif()
# Fujitsu MPI tries to xpmem-attach segment with size of range size + 1?
#set(FJMPI_VERSION_COMMAND "a=\$(which mpifcc); b=\${a%/*/*}; c=\${b##*/}; d=\${c#*-}; echo \$d")
set(FJMPI_VERSION_COMMAND "a=\$(which mpifort); b=\${a%/*/*}; c=\${b##*/}; d=\${c#*-}; echo \$d")
execute_process(COMMAND bash -c "${FJMPI_VERSION_COMMAND}"
OUTPUT_VARIABLE FJMPI_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
message("FJMPI_VERSION: ${FJMPI_VERSION}")
if(NOT "${FJMPI_VERSION}" STREQUAL "")
option(ENABLE_FJMPI_WORKAROUND "Fujitsu MPI workaround" ON)
else()
option(ENABLE_FJMPI_WORKAROUND "Fujitsu MPI workaround" OFF)
endif()
if(ENABLE_FJMPI_WORKAROUND)
add_definitions(-DENABLE_FJMPI_WORKAROUND)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FJMPI_WORKAROUND")
endif()
# krm that mandates reserved memory amount >= available at boot time?
execute_process(COMMAND bash -c "rpm -qi FJSVpxkrm-plugin-mckernel | awk '$1 == \"Version\" && $2 == \":\" { print $3 }'"
OUTPUT_VARIABLE KRM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
message("KRM_VERSION: ${KRM_VERSION}")
execute_process(COMMAND bash -c "rpm -qi FJSVpxkrm-plugin-mckernel | awk '$1 == \"Release\" && $2 == \":\" { print $3 }'"
OUTPUT_VARIABLE KRM_RELEASE OUTPUT_STRIP_TRAILING_WHITESPACE)
message("KRM_RELEASE: ${KRM_RELEASE}")
if("${KRM_VERSION}" STREQUAL "")
option(ENABLE_KRM_WORKAROUND "krm workaround" OFF)
elseif("${KRM_VERSION}" VERSION_GREATER_EQUAL 4.0.2 OR
("${KRM_VERSION}" VERSION_EQUAL 4.0.1 AND "${KRM_RELEASE}" VERSION_GREATER_EQUAL 25.13.1.0))
option(ENABLE_KRM_WORKAROUND "krm workaround" OFF)
else()
option(ENABLE_KRM_WORKAROUND "krm workaround" ON)
endif()
if(ENABLE_KRM_WORKAROUND)
add_definitions(-DENABLE_KRM_WORKAROUND)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_KRM_WORKAROUND")
endif()
# SIGSTOP instead of SIGSEGV, additional IHK Linux kmsg
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" OFF)
if(ENABLE_FUGAKU_DEBUG)
add_definitions(-DENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
# redirect kernel messages to Linux's /dev/kmsg
option(ENABLE_KMSG_REDIRECT "Redirect kernel message to Linux's /dev/kmsg" OFF)
if(ENABLE_KMSG_REDIRECT)
add_definitions(-DENABLE_KMSG_REDIRECT)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_KMSG_REDIRECT")
endif()
option(PROFILE_ENABLE "System call profile" ON)
if(PROFILE_ENABLE)
add_definitions(-DPROFILE_ENABLE)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DPROFILE_ENABLE")
endif()
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
add_definitions(-DIHK_IKC_USE_LINUX_WORK_IRQ)
endif()
if (BUILD_TARGET STREQUAL "smp-arm64")
set(POSTK_DEBUG_DEFINES "-DPOSTK_DEBUG_ARCH_DEP_100 -DPOSTK_DEBUG_ARCH_DEP_42 -DPOSTK_DEBUG_TEMP_FIX_29 -DPOSTK_DEBUG_TEMP_FIX_49")
add_definitions("${POSTK_DEBUG_DEFINES}")
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} ${POSTK_DEBUG_DEFINES}")
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_64K_PAGES\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_64K_PAGES OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND awk -F= "$1 == \"CONFIG_ARM64_VA_BITS\" { print $2; exit; }" "${KERNEL_DIR}/.config"
OUTPUT_VARIABLE CONFIG_ARM64_VA_BITS OUTPUT_STRIP_TRAILING_WHITESPACE)
message("Host kernel CONFIG_ARM64_64K_PAGES=${CONFIG_ARM64_64K_PAGES}")
message("Host kernel CONFIG_ARM64_VA_BITS=${CONFIG_ARM64_VA_BITS}")
if(CONFIG_ARM64_64K_PAGES STREQUAL "y")
if(CONFIG_ARM64_VA_BITS STREQUAL 42)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=2 -DCONFIG_ARM64_VA_BITS=42 -DCONFIG_ARM64_64K_PAGES)
set(LINKER_SCRIPT "smp-arm64_type3.lds")
elseif(CONFIG_ARM64_VA_BITS STREQUAL 48)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=3 -DCONFIG_ARM64_VA_BITS=48 -DCONFIG_ARM64_64K_PAGES)
set(LINKER_SCRIPT "smp-arm64_type4.lds")
endif()
else(CONFIG_ARM64_64K_PAGES STREQUAL "y")
if(CONFIG_ARM64_VA_BITS STREQUAL 39)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=3 -DCONFIG_ARM64_VA_BITS=39)
set(LINKER_SCRIPT "smp-arm64_type1.lds")
elseif(CONFIG_ARM64_VA_BITS STREQUAL 48)
add_definitions(-DCONFIG_ARM64_PGTABLE_LEVELS=4 -DCONFIG_ARM64_VA_BITS=48)
set(LINKER_SCRIPT "smp-arm64_type2.lds")
endif()
endif(CONFIG_ARM64_64K_PAGES STREQUAL "y")
endif()
set_property(CACHE BUILD_TARGET PROPERTY STRINGS smp-x86 smp-arm64)
# define MAP_KERNEL_START
set(tmpdir ${CMAKE_CURRENT_BINARY_DIR}/tmp.resolve_MODULES_END)
file(REMOVE_RECURSE ${tmpdir})
file(MAKE_DIRECTORY ${tmpdir})
file(WRITE ${tmpdir}/driver.c "#include <linux/module.h>\n")
file(APPEND ${tmpdir}/driver.c "unsigned long MAP_KERNEL_START = MODULES_END - (1UL << 23);\n")
file(APPEND ${tmpdir}/driver.c "MODULE_LICENSE(\"GPL\");\n")
file(WRITE ${tmpdir}/Makefile "obj-m := driver.o\n")
file(APPEND ${tmpdir}/Makefile "all:\n")
file(APPEND ${tmpdir}/Makefile "\tmake ${KBUILD_MAKE_FLAGS_STR} -C ${KERNEL_DIR} M=${tmpdir} modules\n")
execute_process(COMMAND make -C ${tmpdir})
execute_process(COMMAND bash -c "offset=`readelf -S ${tmpdir}/driver.ko | grep .data | sed 's/.* //g'`; echo $((0x$offset))"
OUTPUT_VARIABLE MAP_KERNEL_START_OFFSET OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND bash -c "dd if=${tmpdir}/driver.ko bs=1 skip=${MAP_KERNEL_START_OFFSET} count=8 2>/dev/null | od -tx8 -Ax | head -1 | sed 's|.* |0x|g'"
OUTPUT_VARIABLE MAP_KERNEL_START OUTPUT_STRIP_TRAILING_WHITESPACE)
set(ENABLE_MEMDUMP ON)
option(ENABLE_PERF "Enable perf support" ON)
option(ENABLE_RUSAGE "Enable rusage support" ON)
option(ENABLE_QLMPI "Enable qlmpi programs" OFF)
option(ENABLE_UTI "Enable uti support" OFF)
option(ENABLE_UBSAN "Enable undefined behaviour sanitizer on mckernel size" OFF)
option(ENABLE_PER_CPU_ALLOC_CACHE "Enable per-CPU allocator cache (ThunderX2 workaround)" OFF)
find_package(PkgConfig REQUIRED)
set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH ON)
find_library(LIBRT rt)
if (NOT LIBRT)
message(FATAL_ERROR "error: couldn't find librt")
endif()
find_library(LIBNUMA numa)
if (NOT LIBNUMA)
message(FATAL_ERROR "error: couldn't find libnuma")
endif()
find_library(LIBBFD bfd)
if (NOT LIBBFD)
message(FATAL_ERROR "error: couldn't find libbfd")
endif()
find_library(LIBIBERTY iberty)
if (NOT LIBIBERTY)
message(FATAL_ERROR "error: couldn't find libiberty")
endif()
find_library(LIBDWARF dwarf)
if (NOT LIBDWARF)
if (CMAKE_CROSSCOMPILING)
message(FATAL_ERROR "Could not find libdwarf.so, install libdwarf-devel to ${CMAKE_FIND_ROOT_PATH}")
endif()
message("WARNING: libdwarf will be compiled locally")
enable_language(CXX)
else()
# Note that libdwarf-devel provides /usr/include/libdwarf/dwarf.h
# but elfutils-devel provides /usr/include/dwarf.h
# while mcinspect.c performs "#include <dwarf.h>"
find_path(DWARF_H dwarf.h PATH_SUFFIXES libdwarf)
endif()
if (ENABLE_QLMPI)
find_package(MPI REQUIRED)
endif()
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-([0-9]+)(.*))?" "\\1;\\2;\\3;\\5;\\6" LINUX_VERSION ${UNAME_R})
list(GET LINUX_VERSION 0 LINUX_VERSION_MAJOR)
list(GET LINUX_VERSION 1 LINUX_VERSION_MINOR)
list(GET LINUX_VERSION 2 LINUX_VERSION_PATCH)
list(GET LINUX_VERSION 3 LINUX_VERSION_RELEASE)
math(EXPR LINUX_VERSION_CODE "${LINUX_VERSION_MAJOR} * 65536 + ${LINUX_VERSION_MINOR} * 256 + ${LINUX_VERSION_PATCH}")
# compat with various install paths
set(BINDIR ${CMAKE_INSTALL_FULL_BINDIR})
set(SBINDIR ${CMAKE_INSTALL_FULL_SBINDIR})
set(LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR})
set(ETCDIR ${CMAKE_INSTALL_PREFIX}/etc)
set(ROOTFSDIR "/rootfs")
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
set(KMODDIR "/lib/modules/${UNAME_R}/extra/mckernel")
set(MCKERNELDIR "${CMAKE_INSTALL_FULL_DATADIR}/mckernel/${BUILD_TARGET}")
else()
set(KMODDIR "${CMAKE_INSTALL_PREFIX}/kmod")
set(MCKERNELDIR "${CMAKE_INSTALL_PREFIX}/${BUILD_TARGET}/kernel")
endif()
set(prefix ${CMAKE_INSTALL_PREFIX})
# set rpath for everyone
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
# ihk: ultimately should support extrnal build, but add as subproject for now
if (EXISTS ${PROJECT_SOURCE_DIR}/ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "ihk" CACHE STRING "path to ihk source directory from mckernel sources")
elseif (EXISTS ${PROJECT_SOURCE_DIR}/../ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "../ihk" CACHE STRING "path to ihk source directory from mckernel sources")
else()
set(IHK_SOURCE_DIR "ihk" CACHE STRING "path to ihk source directory from mckernel sources")
endif()
if (EXISTS ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR})
elseif (EXISTS /${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR /${IHK_SOURCE_DIR})
else()
message(FATAL_ERROR "Could not find ihk dir, or it does not contain CMakeLists.txt, either clone ihk or run git submodule update --init")
endif()
add_subdirectory(${IHK_SOURCE_DIR} ihk)
configure_file(config.h.in config.h)
# actual build section - just subdirs
add_subdirectory(executer/kernel/mcctrl)
add_subdirectory(executer/user)
add_subdirectory(kernel)
add_subdirectory(tools/mcstat)
add_subdirectory(tools/crash)
configure_file(scripts/mcreboot-smp.sh.in mcreboot.sh @ONLY)
configure_file(scripts/mcstop+release-smp.sh.in mcstop+release.sh @ONLY)
configure_file(scripts/mcreboot.1in mcreboot.1 @ONLY)
configure_file(scripts/eclair-dump-backtrace.exp.in eclair-dump-backtrace.exp @ONLY)
install(PROGRAMS
"${CMAKE_CURRENT_BINARY_DIR}/mcreboot.sh"
"${CMAKE_CURRENT_BINARY_DIR}/mcstop+release.sh"
DESTINATION "${CMAKE_INSTALL_SBINDIR}")
install(PROGRAMS
"${CMAKE_CURRENT_BINARY_DIR}/eclair-dump-backtrace.exp"
DESTINATION "${CMAKE_INSTALL_BINDIR}")
install(FILES "scripts/irqbalance_mck.in"
DESTINATION "${CMAKE_INSTALL_SYSCONFDIR}")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mcreboot.1"
DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")
configure_file(scripts/mckernel.spec.in scripts/mckernel.spec @ONLY)
set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CMAKE_PROJECT_NAME}-${MCKERNEL_VERSION}")
set(CPACK_SOURCE_IGNORE_FILES "/.git/;/build;/CMakeCache.txt$;/CMakeFiles$;/Makefile$")
set(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};/;${IHK_FULL_SOURCE_DIR};/ihk;${CMAKE_BINARY_DIR}/scripts;/scripts")
set(CPACK_SOURCE_GENERATOR "TGZ")
include(CPack)
add_custom_target(dist COMMAND ${CMAKE_MAKE_PROGRAM} package_source)
# config report
message("-------------------------------")
message("Option summary")
message("-------------------------------")
message("Build type: ${CMAKE_BUILD_TYPE}")
message("Build target: ${BUILD_TARGET}")
message("IHK_SOURCE_DIR: ${IHK_SOURCE_DIR} (relative to mckernel source tree)")
message("UNAME_R: ${UNAME_R}")
message("KERNEL_DIR: ${KERNEL_DIR}")
message("SYSTEM_MAP: ${SYSTEM_MAP}")
message("VMLINUX: ${VMLINUX}")
message("KBUILD_C_FLAGS: ${KBUILD_C_FLAGS}")
message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_TOFU: ${ENABLE_TOFU}")
message("ENABLE_FUGAKU_HACKS: ${ENABLE_FUGAKU_HACKS}")
message("ENABLE_FUGAKU_DEBUG: ${ENABLE_FUGAKU_DEBUG}")
message("ENABLE_KRM_WORKAROUND: ${ENABLE_KRM_WORKAROUND}")
message("ENABLE_FJMPI_WORKAROUND: ${ENABLE_FJMPI_WORKAROUND}")
message("PROFILE_ENABLE: ${PROFILE_ENABLE}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")
message("ENABLE_WERROR: ${ENABLE_WERROR}")
message("ENABLE_UBSAN: ${ENABLE_UBSAN}")
message("ENABLE_LINUX_WORK_IRQ_FOR_IKC: ${ENABLE_LINUX_WORK_IRQ_FOR_IKC}")
message("ENABLE_PER_CPU_ALLOC_CACHE: ${ENABLE_PER_CPU_ALLOC_CACHE}")
message("ENABLE_KMSG_REDIRECT: ${ENABLE_KMSG_REDIRECT}")
message("-------------------------------")

70
KNOWN_BUGS.md Normal file
View File

@ -0,0 +1,70 @@
Linux crash when offlining CPU (el7, hardware-specific)
=========================================================
On some hardware with el7 kernel, linux can crash due to a bug in the
irq handling when offlining CPUs (reserve cpu part of mcreboot)
Example stack trace:
```
[ 4147.052753] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040
[ 4147.060677] IP: [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.068226] PGD 1057e44067 PUD 105f1e7067 PMD 0
[ 4147.072935] Oops: 0000 [#1] SMP
[ 4147.076230] Modules linked in: mcctrl(OE) ihk_smp_x86_64(OE) ihk(OE) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_core
[ 4147.148619] dm_mirror dm_region_hash dm_log dm_mod sb_edac edac_core intel_powerclamp coretemp ext4 mbcache jbd2 intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul ipmi_ssif glue_helper ablk_helper joydev iTCO_wdt iTCO_vendor_support cryptd ipmi_si ipmi_devintf ipmi_msghandler pcspkr wmi mei_me mei lpc_ich i2c_i801 sg ioatdma shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c mlx4_en sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm isci igb drm mlx4_core libsas ahci libahci scsi_transport_sas libata crct10dif_pclmul ptp crct10dif_common pps_core crc32c_intel dca i2c_algo_bit i2c_core devlink [last unloaded: ihk]
[ 4147.215370] CPU: 6 PID: 38 Comm: migration/6 Tainted: G OE ------------ T 3.10.0-693.2.2.el7.x86_64 #1
[ 4147.225672] Hardware name: SGI.COM C1104G-RP5/X9DRG-HF, BIOS 3.0 10/25/2013
[ 4147.232747] task: ffff880174689fa0 ti: ffff8801746ac000 task.ti: ffff8801746ac000
[ 4147.240278] RIP: 0010:[<ffffffff8102ce26>] [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.250275] RSP: 0018:ffff8801746afd30 EFLAGS: 00010046
[ 4147.255608] RAX: 0000000000000000 RBX: 000000000000004e RCX: 0000000000000000
[ 4147.262770] RDX: 0000000000000020 RSI: 000000000000005f RDI: 0000000000000023
[ 4147.269936] RBP: ffff8801746afd58 R08: 0000000000000001 R09: ffff88017f800490
[ 4147.277103] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000006
[ 4147.284269] R13: 0000000000000000 R14: ffff88085ca82500 R15: 000000000000005f
[ 4147.291429] FS: 0000000000000000(0000) GS:ffff88085fb80000(0000) knlGS:0000000000000000
[ 4147.299556] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4147.305326] CR2: 0000000000000040 CR3: 0000001059704000 CR4: 00000000001407e0
[ 4147.312490] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 4147.319659] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 4147.326827] Stack:
[ 4147.328857] ffff8808f43078c8 ffff8808f4307850 0000000000000286 ffff8808f4307701
[ 4147.336384] 0000000000000000 ffff8801746afd70 ffffffff81052a82 0000000200000000
[ 4147.343915] ffff8801746afd88 ffffffff81693ca3 0000000000000003 ffff8801746afdc0
[ 4147.351447] Call Trace:
[ 4147.353921] [<ffffffff81052a82>] native_cpu_disable+0x12/0x40
[ 4147.359795] [<ffffffff81693ca3>] take_cpu_down+0x13/0x40
[ 4147.365236] [<ffffffff81116899>] multi_cpu_stop+0xd9/0x100
[ 4147.370850] [<ffffffff811167c0>] ? cpu_stop_should_run+0x50/0x50
[ 4147.376983] [<ffffffff81116ab7>] cpu_stopper_thread+0x97/0x150
[ 4147.382942] [<ffffffff816a8fad>] ? __schedule+0x39d/0x8b0
[ 4147.388461] [<ffffffff810b909f>] smpboot_thread_fn+0x12f/0x180
[ 4147.394406] [<ffffffff810b8f70>] ? lg_double_unlock+0x40/0x40
[ 4147.400276] [<ffffffff810b098f>] kthread+0xcf/0xe0
[ 4147.405182] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[ 4147.411319] [<ffffffff816b4f58>] ret_from_fork+0x58/0x90
[ 4147.418893] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[ 4147.426524] Code: 81 fb 00 01 00 00 0f 84 8a 00 00 00 89 d8 65 44 8b 3c 85 20 c6 00 00 45 85 ff 78 e1 44 89 ff e8 91 31 10 00 48 63 15 7e 10 af 00 <48> 8b 70 40 48 c7 c7 80 71 cf 81 49 89 c6 48 83 c2 3f 48 c1 fa
[ 4147.450352] RIP [<ffffffff8102ce26>] check_irq_vectors_for_cpu_disable+0x86/0x1c0
[ 4147.460135] RSP <ffff8801746afd30>
[ 4147.465154] CR2: 0000000000000040
```
This bug has been fixed upstream, but redhat will not backport the fixes.
You can work around the problem with a kpatch by backporting the three
following commits:
x86: irq: Get correct available vectors for cpu disable
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ac2a55395eddccd6e3e39532df9869d61e97b2ee
x86/irq: Check for valid irq descriptor in check_irq_vectors_for_cpu_disable()
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d97eb8966c91f2c9d05f0a22eb89ed5b76d966d1
x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable()
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cbb24dc761d95fe39a7a122bb1b298e9604cae15
Alternatively, since it is related to the irq configuration, it might
be possible to mitigate the issue by setting the irq affinities early
on and making sure none of the cpus that will be offlined have any irq
configured.

339
LICENSE Normal file
View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View File

@ -1,81 +0,0 @@
TARGET = @TARGET@
SBINDIR = @SBINDIR@
INCDIR = @INCDIR@
ETCDIR = @ETCDIR@
MANDIR = @MANDIR@
all: executer-mcctrl executer-mcoverlayfs executer-user mckernel mck-tools
executer-mcctrl:
+@(cd executer/kernel/mcctrl; $(MAKE) modules)
executer-mcoverlayfs:
+@(cd executer/kernel/mcoverlayfs; $(MAKE) modules)
executer-user:
+@(cd executer/user; $(MAKE))
mckernel:
+@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; $(MAKE)) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
mck-tools:
+@(cd tools/mcstat; $(MAKE))
install:
@(cd executer/kernel/mcctrl; $(MAKE) install)
@(cd executer/kernel/mcoverlayfs; $(MAKE) install)
@(cd executer/user; $(MAKE) install)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; $(MAKE) install) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@case "$(TARGET)" in \
smp-x86 | smp-arm64) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86_64/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86_64/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
install -m 755 arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
install -m 755 arch/x86_64/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
install -m 755 arch/x86_64/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
mkdir -p -m 755 $(ETCDIR); \
install -m 644 arch/x86_64/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
install -m 644 arch/x86_64/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
mkdir -p -m 755 $(INCDIR); \
install -m 644 kernel/include/swapfmt.h $(INCDIR); \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86_64/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@(cd tools/mcstat/; $(MAKE) install)
clean:
@(cd executer/kernel/mcctrl; $(MAKE) clean)
@(cd executer/kernel/mcoverlayfs; $(MAKE) clean)
@(cd executer/user; $(MAKE) clean)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic | smp-x86 | smp-arm64) \
(cd kernel; $(MAKE) clean) \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
@(cd tools/mcstat; $(MAKE) clean)

23
README.rst Normal file
View File

@ -0,0 +1,23 @@
.. figure:: docs/mckernel-logo.png
IHK/McKernel is a light-weight multi-kernel operating system designed
for high-end supercomputing. It runs Linux and McKernel, a light-weight
kernel (LWK), side-by-side inside compute nodes and aims at the
following:
- Provide scalable and consistent execution of large-scale parallel
scientific applications, but at the same time maintain the ability to
rapidly adapt to new hardware features and emerging programming
models
- Provide efficient memory and device management so that resource
contention and data movement are minimized at the system level
- Eliminate OS noise by isolating OS services in Linux and provide
jitter free execution on the LWK
- Support the full POSIX/Linux APIs by selectively offloading
(slow-path) system calls to Linux
Documentation
=============
Documentation is available
`here <https://ihkmckernel.readthedocs.io>`__.

View File

@ -1,4 +1,4 @@
# Makefile.arch COPYRIGHT FUJITSU LIMITED 2015-2017
# Makefile.arch.in COPYRIGHT FUJITSU LIMITED 2015-2018
VDSO_SRCDIR = $(SRC)/../arch/$(IHKARCH)/kernel/vdso
VDSO_BUILDDIR = @abs_builddir@/vdso
VDSO_SO_O = $(O)/vdso.so.o
@ -6,23 +6,22 @@ VDSO_SO_O = $(O)/vdso.so.o
IHK_OBJS += assert.o cache.o cpu.o cputable.o context.o entry.o entry-fpsimd.o
IHK_OBJS += fault.o head.o hyp-stub.o local.o perfctr.o perfctr_armv8pmu.o proc.o proc-macros.o
IHK_OBJS += psci.o smp.o trampoline.o traps.o fpsimd.o
IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o
IHK_OBJS += debug-monitors.o hw_breakpoint.o ptrace.o timer.o
IHK_OBJS += $(notdir $(VDSO_SO_O)) memory.o syscall.o vdso.o
IHK_OBJS += irq-gic-v2.o irq-gic-v3.o
IHK_OBJS += memcpy.o memset.o
IHK_OBJS += cpufeature.o
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
# IHK_OBJS added coredump.o
IHK_OBJS += imp-sysreg.o
IHK_OBJS += coredump.o
$(VDSO_SO_O): $(VDSO_BUILDDIR)/vdso.so
$(VDSO_BUILDDIR)/vdso.so: FORCE
$(call echo_cmd,BUILD VDSO,$(TARGET))
@mkdir -p $(O)/vdso
@TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare
@TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS)
mkdir -p $(O)/vdso
TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS) prepare
TARGETDIR="$(TARGETDIR)" $(submake) -C $(VDSO_BUILDDIR) $(SUBOPTS)
FORCE:

View File

@ -1,4 +1,4 @@
/* assert.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* assert.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
#include <process.h>
#include <list.h>
@ -24,6 +24,7 @@ STATIC_ASSERT(offsetof(struct pt_regs, sp) == S_SP);
STATIC_ASSERT(offsetof(struct pt_regs, pc) == S_PC);
STATIC_ASSERT(offsetof(struct pt_regs, pstate) == S_PSTATE);
STATIC_ASSERT(offsetof(struct pt_regs, orig_x0) == S_ORIG_X0);
STATIC_ASSERT(offsetof(struct pt_regs, orig_pc) == S_ORIG_PC);
STATIC_ASSERT(offsetof(struct pt_regs, syscallno) == S_SYSCALLNO);
STATIC_ASSERT(sizeof(struct pt_regs) == S_FRAME_SIZE);
@ -50,3 +51,6 @@ STATIC_ASSERT(sizeof(struct sigcontext) - offsetof(struct sigcontext, __reserved
ALIGN_UP(sizeof(struct _aarch64_ctx), 16) > sizeof(struct extra_context));
STATIC_ASSERT(SVE_PT_FPSIMD_OFFSET == sizeof(struct user_sve_header));
STATIC_ASSERT(SVE_PT_SVE_OFFSET == sizeof(struct user_sve_header));
/* assert for struct arm64_cpu_local_thread member offset define */
STATIC_ASSERT(offsetof(struct arm64_cpu_local_thread, panic_regs) == 168);

View File

@ -1,10 +1,15 @@
/* coredump.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/* coredump.c COPYRIGHT FUJITSU LIMITED 2015-2019 */
#include <process.h>
#include <elfcore.h>
#include <string.h>
#include <ptrace.h>
#include <cls.h>
#include <hwcap.h>
void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0)
#define align32(x) ((((x) + 3) / 4) * 4)
void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct thread *thread, void *regs0, int sig)
{
struct pt_regs *regs = regs0;
struct elf_prstatus64 tmp_prstatus;
@ -15,8 +20,6 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread,
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
@ -24,12 +27,66 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread,
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
*/
/* copy x0-30, sp, pc, pstate */
memcpy(&tmp_prstatus.pr_reg, &regs->user_regs, sizeof(tmp_prstatus.pr_reg));
tmp_prstatus.pr_fpvalid = 0; /* We assume no fp */
/* copy unaligned prstatus addr */
memcpy(prstatus, &tmp_prstatus, sizeof(*prstatus));
prstatus->pr_pid = thread->tid;
if (thread->proc->parent) {
prstatus->pr_ppid = thread->proc->parent->pid;
}
prstatus->pr_info.si_signo = sig;
prstatus->pr_cursig = sig;
}
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
int arch_get_thread_core_info_size(void)
{
const struct user_regset_view *view = current_user_regset_view();
const struct user_regset *regset = find_regset(view, NT_ARM_SVE);
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return 0;
}
return sizeof(struct note) + align32(sizeof("LINUX"))
+ regset_size(cpu_local_var(current), regset);
}
void arch_fill_thread_core_info(struct note *head,
struct thread *thread, void *regs)
{
const struct user_regset_view *view = current_user_regset_view();
const struct user_regset *regset = find_regset(view, NT_ARM_SVE);
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return;
}
/* pre saved registers */
save_fp_regs(thread);
if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(thread, regset))) {
int ret;
size_t size = regset_size(thread, regset);
void *namep;
void *descp;
namep = (void *) (head + 1);
descp = namep + align32(sizeof("LINUX"));
ret = regset->get(thread, regset, 0, size, descp, NULL);
if (ret) {
return;
}
head->namesz = sizeof("LINUX");
head->descsz = size;
head->type = NT_ARM_SVE;
memcpy(namep, "LINUX", sizeof("LINUX"));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* cpufeature.c COPYRIGHT FUJITSU LIMITED 2017 */
/* cpufeature.c COPYRIGHT FUJITSU LIMITED 2017-2018 */
#include <cpufeature.h>
#include <ihk/debug.h>
@ -10,9 +10,7 @@
#include <ptrace.h>
#include <hwcap.h>
#ifdef POSTK_DEBUG_ARCH_DEP_65
unsigned long elf_hwcap;
#endif /* POSTK_DEBUG_ARCH_DEP_65 */
/* @ref.impl arch/arm64/kernel/cpufeature.c */
#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
@ -54,6 +52,19 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_END,
};
/* @ref.impl linux4.16.0 arch/arm64/kernel/cpufeature.c */
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE,
ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
/* @ref.impl arch/arm64/kernel/cpufeature.c */
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
@ -304,7 +315,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@ -959,7 +970,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SVE),
#endif
{},
{ 0 },
};
/* @ref.impl arch/arm64/kernel/cpufeature.c */
@ -997,9 +1008,7 @@ void setup_cpu_features(void)
setup_elf_hwcaps(arm64_elf_hwcaps);
}
#ifdef POSTK_DEBUG_ARCH_DEP_65
unsigned long arch_get_hwcap(void)
{
return elf_hwcap;
}
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -10,5 +10,5 @@ struct cpu_info cpu_table[] = {
.cpu_name = "AArch64 Processor",
.cpu_setup = __cpu_setup,
},
{ /* Empty */ },
{ 0 },
};

View File

@ -2,7 +2,6 @@
#include <cputype.h>
#include <irqflags.h>
#include <ihk/context.h>
#include <ihk/debug.h>
#include <signal.h>
#include <errno.h>
#include <debug-monitors.h>

View File

@ -1,10 +1,11 @@
/* entry.S COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* entry.S COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <linkage.h>
#include <assembler.h>
#include <asm-offsets.h>
#include <esr.h>
#include <thread_info.h>
#include <asm-syscall.h>
/*
* Bad Abort numbers
@ -77,11 +78,12 @@
.macro kernel_exit, el, need_enable_step = 0
.if \el == 0
bl check_sig_pending
bl check_need_resched // or reschedule is needed.
mov x0, #0
mov x1, sp
mov x2, #0
bl check_signal // check whether the signal is delivered
bl check_need_resched // or reschedule is needed.
mov x0, #0
mov x1, sp
mov x2, #0
@ -367,7 +369,12 @@ el0_sync:
b el0_inv
el0_svc:
uxtw scno, w8 // syscall number in w8
stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
cmp scno, #__NR_rt_sigreturn
b.eq 1f
str x0, [sp, #S_ORIG_X0] // save the original x0
ldr x16, [sp, #S_PC]
str x16, [sp, #S_ORIG_PC] // save the original pc
1: str scno, [sp, #S_SYSCALLNO] // save syscall number
enable_nmi
enable_dbg_and_irq x0
adrp x16, __arm64_syscall_handler
@ -550,9 +557,7 @@ ENTRY(ret_from_fork)
blr x19
1: get_thread_info tsk
bl release_runq_lock
bl utilthr_migrate
b ret_to_user
ENDPROC(ret_from_fork)
/* TODO: skeleton for rusage */
ENTRY(__freeze)
ENDPROC(__freeze)

View File

@ -1,4 +1,4 @@
/* fault.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* fault.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <ihk/context.h>
#include <ihk/debug.h>
@ -13,7 +13,6 @@
unsigned long __page_fault_handler_address;
extern int interrupt_from_user(void *);
void set_signal(int sig, void *regs, struct siginfo *info);
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
static int do_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs);
@ -105,12 +104,13 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const struct fault_info *inf = fault_info + (esr & 63);
struct siginfo info;
const int from_user = interrupt_from_user(regs);
/* set_cputime called in inf->fn() */
if (!inf->fn(addr, esr, regs))
return;
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
kprintf("Unhandled fault: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
@ -118,7 +118,7 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
info._sifields._sigfault.si_addr = (void*)addr;
arm64_notify_die("", regs, &info, esr);
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
/*
@ -127,21 +127,24 @@ void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const int from_user = interrupt_from_user(regs);
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
info._sifields._sigfault.si_addr = (void*)addr;
arm64_notify_die("", regs, &info, esr);
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
set_cputime(interrupt_from_user(regs) ? 1: 2);
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
@ -163,7 +166,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
(addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr);
panic("OOps.");
}
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
static int is_el0_instruction_abort(unsigned int esr)
@ -192,6 +195,7 @@ static int do_page_fault(unsigned long addr, unsigned int esr,
}
}
/* set_cputime() call in page_fault_handler() */
page_fault_handler = (void *)__page_fault_handler_address;
(*page_fault_handler)((void *)addr, reason, regs);
@ -219,7 +223,12 @@ static int do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
#ifdef ENABLE_TOFU
// XXX: Handle kernel space page faults for Tofu driver
//if (addr < USER_END)
#else
if (addr < USER_END)
#endif
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);
@ -252,10 +261,10 @@ int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *reg
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
struct siginfo info;
int from_user = interrupt_from_user(regs);
const int from_user = interrupt_from_user(regs);
int ret = -1;
set_cputime(from_user ? 1: 2);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
if (!inf->fn(addr, esr, regs)) {
ret = 1;
@ -274,7 +283,7 @@ int do_debug_exception(unsigned long addr, unsigned int esr, struct pt_regs *reg
ret = 0;
out:
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
return ret;
}
@ -283,7 +292,9 @@ out:
*/
static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
set_cputime(interrupt_from_user(regs) ? 1: 2);
set_cputime(0);
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
return 1;
}

View File

@ -1,4 +1,4 @@
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2017 */
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2019 */
#include <thread_info.h>
#include <fpsimd.h>
#include <cpuinfo.h>
@ -9,27 +9,100 @@
#include <prctl.h>
#include <cpufeature.h>
#include <kmalloc.h>
#include <ihk/debug.h>
#include <process.h>
#include <bitmap.h>
//#define DEBUG_PRINT_FPSIMD
#ifdef DEBUG_PRINT_FPSIMD
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
#ifdef CONFIG_ARM64_SVE
/* Set of available vector lengths, as vq_to_bit(vq): */
static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
/* Maximum supported vector length across all CPUs (initially poisoned) */
int sve_max_vl = -1;
/* Default VL for tasks that don't set it explicitly: */
int sve_default_vl = -1;
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
* _maximum_ VQ not exceeding a certain value.
*/
static unsigned int vq_to_bit(unsigned int vq)
{
return SVE_VQ_MAX - vq;
}
static unsigned int bit_to_vq(unsigned int bit)
{
if (bit >= SVE_VQ_MAX) {
bit = SVE_VQ_MAX - 1;
}
return SVE_VQ_MAX - bit;
}
/*
* All vector length selection from userspace comes through here.
* We're on a slow path, so some sanity-checks are included.
* If things go wrong there's a bug somewhere, but try to fall back to a
* safe choice.
*/
static unsigned int find_supported_vector_length(unsigned int vl)
{
int bit;
int max_vl = sve_max_vl;
if (!sve_vl_valid(vl)) {
vl = SVE_VL_MIN;
}
if (!sve_vl_valid(max_vl)) {
max_vl = SVE_VL_MIN;
}
if (vl > max_vl) {
vl = max_vl;
}
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(bit_to_vq(bit));
}
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
unsigned long zcr;
bitmap_zero(map, SVE_VQ_MAX);
zcr = ZCR_EL1_LEN_MASK;
zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
/* self-syncing */
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1);
vl = sve_get_vl();
/* skip intervening lengths */
vq = sve_vq_from_vl(vl);
set_bit(vq_to_bit(vq), map);
}
}
void sve_init_vq_map(void)
{
sve_probe_vqs(sve_vq_map);
}
size_t sve_state_size(struct thread const *thread)
{
unsigned int vl = thread->ctx.thread->sve_vl;
@ -46,17 +119,19 @@ void sve_free(struct thread *thread)
}
}
void sve_alloc(struct thread *thread)
int sve_alloc(struct thread *thread)
{
if (thread->ctx.thread->sve_state) {
return;
return 0;
}
thread->ctx.thread->sve_state =
kmalloc(sve_state_size(thread), IHK_MC_AP_NOWAIT);
BUG_ON(!thread->ctx.thread->sve_state);
if (thread->ctx.thread->sve_state == NULL) {
return -ENOMEM;
}
memset(thread->ctx.thread->sve_state, 0, sve_state_size(thread));
return 0;
}
static int get_nr_threads(struct process *proc)
@ -73,28 +148,13 @@ static int get_nr_threads(struct process *proc)
return nr_threads;
}
extern void save_fp_regs(struct thread *thread);
extern void clear_fp_regs(struct thread *thread);
extern void restore_fp_regs(struct thread *thread);
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_vector_length */
int sve_set_vector_length(struct thread *thread,
unsigned long vl, unsigned long flags)
{
struct thread_info *ti = thread->ctx.thread;
BUG_ON(thread == cpu_local_var(current) && cpu_local_var(no_preempt) == 0);
/*
* To avoid accidents, forbid setting for individual threads of a
* multithreaded process. User code that knows what it's doing can
* pass PR_SVE_SET_VL_THREAD to override this restriction:
*/
if (!(flags & PR_SVE_SET_VL_THREAD) && get_nr_threads(thread->proc) != 1) {
return -EINVAL;
}
flags &= ~(unsigned long)PR_SVE_SET_VL_THREAD;
if (flags & ~(unsigned long)(PR_SVE_SET_VL_INHERIT |
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC)) {
return -EINVAL;
}
@ -103,13 +163,19 @@ int sve_set_vector_length(struct thread *thread,
return -EINVAL;
}
if (vl > sve_max_vl) {
BUG_ON(!sve_vl_valid(sve_max_vl));
vl = sve_max_vl;
/*
* Clamp to the maximum vector length that VL-agnostic SVE code can
* work with. A flag may be assigned in the future to allow setting
* of larger vector lengths without confusing older software.
*/
if (vl > SVE_VL_ARCH_MAX) {
vl = SVE_VL_ARCH_MAX;
}
if (flags & (PR_SVE_SET_VL_ONEXEC |
PR_SVE_SET_VL_INHERIT)) {
vl = find_supported_vector_length(vl);
if (flags & (PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC)) {
ti->sve_vl_onexec = vl;
} else {
/* Reset VL to system default on next exec: */
@ -121,39 +187,42 @@ int sve_set_vector_length(struct thread *thread,
goto out;
}
if (vl != ti->sve_vl) {
if ((elf_hwcap & HWCAP_SVE)) {
fp_regs_struct fp_regs;
memset(&fp_regs, 0, sizeof(fp_regs));
if (vl == ti->sve_vl) {
goto out;
}
/* for self at prctl syscall */
if (thread == cpu_local_var(current)) {
save_fp_regs(thread);
clear_fp_regs(thread);
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
if ((elf_hwcap & HWCAP_SVE)) {
fp_regs_struct fp_regs;
ti->sve_vl = vl;
memset(&fp_regs, 0, sizeof(fp_regs));
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
restore_fp_regs(thread);
/* for target thread at ptrace */
} else {
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
/* for self at prctl syscall */
if (thread == cpu_local_var(current)) {
save_fp_regs(thread);
clear_fp_regs();
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
ti->sve_vl = vl;
ti->sve_vl = vl;
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
}
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
restore_fp_regs(thread);
/* for target thread at ptrace */
} else {
thread_sve_to_fpsimd(thread, &fp_regs);
sve_free(thread);
ti->sve_vl = vl;
sve_alloc(thread);
thread_fpsimd_to_sve(thread, &fp_regs);
}
}
ti->sve_vl = vl;
out:
ti->sve_flags = flags & PR_SVE_SET_VL_INHERIT;
ti->sve_flags = flags & PR_SVE_VL_INHERIT;
return 0;
}
@ -163,44 +232,53 @@ out:
* Encode the current vector length and flags for return.
* This is only required for prctl(): ptrace has separate fields
*/
static int sve_prctl_status(const struct thread_info *ti)
static int sve_prctl_status(unsigned long flags)
{
int ret = ti->sve_vl;
int ret;
struct thread_info *ti = cpu_local_var(current)->ctx.thread;
ret |= ti->sve_flags << 16;
if (flags & PR_SVE_SET_VL_ONEXEC) {
ret = ti->sve_vl_onexec;
}
else {
ret = ti->sve_vl;
}
if (ti->sve_flags & PR_SVE_VL_INHERIT) {
ret |= PR_SVE_VL_INHERIT;
}
return ret;
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_task_vl */
int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length,
const unsigned long flags)
int sve_set_thread_vl(unsigned long arg)
{
unsigned long vl, flags;
int ret;
if (!(elf_hwcap & HWCAP_SVE)) {
vl = arg & PR_SVE_VL_LEN_MASK;
flags = arg & ~vl;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return -EINVAL;
}
BUG_ON(thread != cpu_local_var(current));
preempt_disable();
ret = sve_set_vector_length(thread, vector_length, flags);
preempt_enable();
ret = sve_set_vector_length(cpu_local_var(current), vl, flags);
if (ret) {
return ret;
}
return sve_prctl_status(thread->ctx.thread);
return sve_prctl_status(flags);
}
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_get_ti_vl */
int sve_get_thread_vl(const struct thread *thread)
int sve_get_thread_vl(void)
{
if (!(elf_hwcap & HWCAP_SVE)) {
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return -EINVAL;
}
return sve_prctl_status(thread->ctx.thread);
return sve_prctl_status(0);
}
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
@ -210,25 +288,48 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
panic("");
}
void init_sve_vl(void)
void sve_setup(void)
{
extern unsigned long ihk_param_default_vl;
uint64_t zcr;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return;
}
zcr = read_system_reg(SYS_ZCR_EL1);
BUG_ON(((zcr & ZCR_EL1_LEN_MASK) + 1) * 16 > sve_max_vl);
/* init sve_vq_map bitmap */
sve_init_vq_map();
/*
* The SVE architecture mandates support for 128-bit vectors,
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
if (!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)) {
set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
}
zcr = read_system_reg(SYS_ZCR_EL1);
sve_max_vl = sve_vl_from_vq((zcr & ZCR_EL1_LEN_MASK) + 1);
/*
* Sanity-check that the max VL we determined through CPU features
* corresponds properly to sve_vq_map. If not, do our best:
*/
if (sve_max_vl != find_supported_vector_length(sve_max_vl)) {
sve_max_vl = find_supported_vector_length(sve_max_vl);
}
sve_max_vl = ((zcr & ZCR_EL1_LEN_MASK) + 1) * 16;
sve_default_vl = ihk_param_default_vl;
if (sve_default_vl == 0) {
kprintf("SVE: Getting default VL = 0 from HOST-Linux.\n");
sve_default_vl = sve_max_vl > 64 ? 64 : sve_max_vl;
kprintf("SVE: Using default vl(%d byte).\n", sve_default_vl);
if (ihk_param_default_vl !=
find_supported_vector_length(ihk_param_default_vl)) {
kprintf("SVE: Getting unsupported default VL = %d "
"from HOST-Linux.\n", sve_default_vl);
sve_default_vl = find_supported_vector_length(64);
kprintf("SVE: Using default vl(%d byte).\n",
sve_default_vl);
}
kprintf("SVE: maximum available vector length %u bytes per vector\n",
@ -239,7 +340,7 @@ void init_sve_vl(void)
#else /* CONFIG_ARM64_SVE */
void init_sve_vl(void)
void sve_setup(void)
{
/* nothing to do. */
}

View File

@ -1,472 +0,0 @@
/* gencore.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <ihk/debug.h>
#include <kmalloc.h>
#include <cls.h>
#include <list.h>
#include <process.h>
#include <string.h>
#include <elfcore.h>
#define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
//#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
/*
* Generate a core file image, which consists of many chunks.
* Returns an allocated table, an etnry of which is a pair of the address
* of a chunk and its length.
*/
/**
* \brief Fill the elf header.
*
* \param eh An Elf64_Ehdr structure.
* \param segs Number of segments of the core file.
*/
void fill_elf_header(Elf64_Ehdr *eh, int segs)
{
eh->e_ident[EI_MAG0] = 0x7f;
eh->e_ident[EI_MAG1] = 'E';
eh->e_ident[EI_MAG2] = 'L';
eh->e_ident[EI_MAG3] = 'F';
eh->e_ident[EI_CLASS] = ELFCLASS64;
eh->e_ident[EI_DATA] = ELFDATA2LSB;
eh->e_ident[EI_VERSION] = El_VERSION;
eh->e_ident[EI_OSABI] = ELFOSABI_NONE;
eh->e_ident[EI_ABIVERSION] = El_ABIVERSION_NONE;
eh->e_type = ET_CORE;
#ifdef CONFIG_MIC
eh->e_machine = EM_K10M;
#else
eh->e_machine = EM_X86_64;
#endif
eh->e_version = EV_CURRENT;
eh->e_entry = 0; /* Do we really need this? */
eh->e_phoff = 64; /* fixed */
eh->e_shoff = 0; /* no section header */
eh->e_flags = 0;
eh->e_ehsize = 64; /* fixed */
eh->e_phentsize = 56; /* fixed */
eh->e_phnum = segs;
eh->e_shentsize = 0;
eh->e_shnum = 0;
eh->e_shstrndx = 0;
}
/**
* \brief Return the size of the prstatus entry of the NOTE segment.
*
*/
int get_prstatus_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ align32(sizeof(struct elf_prstatus64));
}
/**
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs0 A pointer to a x86_regs structure.
*/
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
{
/* TODO(pka_idle) */
}
/**
* \brief Return the size of the prpsinfo entry of the NOTE segment.
*
*/
int get_prpsinfo_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ align32(sizeof(struct elf_prpsinfo64));
}
/**
* \brief Fill a prpsinfo structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
head->namesz = sizeof("CORE");
head->descsz = sizeof(struct elf_prpsinfo64);
head->type = NT_PRPSINFO;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
/*
We leave most of the fields unfilled.
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
*/
}
/**
* \brief Return the size of the AUXV entry of the NOTE segment.
*
*/
int get_auxv_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ sizeof(unsigned long) * AUXV_LEN;
}
/**
* \brief Fill an AUXV structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_auxv(struct note *head, struct thread *thread, void *regs)
{
void *name;
void *auxv;
head->namesz = sizeof("CORE");
head->descsz = sizeof(unsigned long) * AUXV_LEN;
head->type = NT_AUXV;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
}
/**
* \brief Return the size of the whole NOTE segment.
*
*/
int get_note_size(void)
{
return get_prstatus_size() + get_prpsinfo_size()
+ get_auxv_size();
}
/**
* \brief Fill the NOTE segment.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_note(void *note, struct thread *thread, void *regs)
{
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
fill_auxv(note, thread, regs);
}
/**
* \brief Generate an image of the core file.
*
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
* \param coretable(out) An array of core chunks.
* \param chunks(out) Number of the entires of coretable.
*
* A core chunk is represented by a pair of a physical
* address of memory region and its size. If there are
* no corresponding physical address for a VM area
* (an unallocated demand-paging page, e.g.), the address
* should be zero.
*/
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
{
struct coretable *ct = NULL;
Elf64_Ehdr eh;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range, *next;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
int i;
*chunks = 3; /* Elf header , header table and NOTE segment */
if (vm == NULL) {
dkprintf("no vm found.\n");
return -1;
}
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
mckernel's internal use. */
if (range->flag & VR_RESERVED)
continue;
/* We need a chunk for each page for a demand paging area.
This can be optimized for spacial complexity but we would
lose simplicity instead. */
if (range->flag & VR_DEMAND_PAGING) {
unsigned long p, phys;
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
if (prevzero == 1)
(*chunks)++;
(*chunks)++;
prevzero = 0;
}
}
if (prevzero == 1)
(*chunks)++;
} else {
(*chunks)++;
}
segs++;
}
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = thread->vm->region;
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
dkprintf("brk: %lx-%lx\n", region.brk_start, region.brk_end);
dkprintf("map: %lx-%lx\n", region.map_start, region.map_end);
dkprintf("stack: %lx-%lx\n", region.stack_start, region.stack_end);
dkprintf("user: %lx-%lx\n\n", region.user_start, region.user_end);
}
dkprintf("now generate a core file image\n");
offset += sizeof(eh);
fill_elf_header(&eh, segs);
/* program header table */
phsize = sizeof(Elf64_Phdr) * segs;
ph = kmalloc(phsize, IHK_MC_AP_NOWAIT);
if (ph == NULL) {
dkprintf("could not alloc a program header table.\n");
goto fail;
}
memset(ph, 0, phsize);
offset += phsize;
/* NOTE segment
* To align the next segment page-sized, we prepare a padded
* region for our NOTE segment.
*/
notesize = get_note_size();
alignednotesize = alignpage(notesize + offset) - offset;
note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT);
if (note == NULL) {
dkprintf("could not alloc NOTE for core.\n");
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, thread, regs);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
ph[0].p_flags = 0;
ph[0].p_offset = offset;
ph[0].p_vaddr = 0;
ph[0].p_paddr = 0;
ph[0].p_filesz = notesize;
ph[0].p_memsz = notesize;
ph[0].p_align = 0;
offset += alignednotesize;
/* program header for each memory chunk */
i = 1;
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
if (range->flag & VR_RESERVED)
continue;
ph[i].p_type = PT_LOAD;
ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0)
| ((flag & VR_PROT_WRITE) ? PF_W : 0)
| ((flag & VR_PROT_EXEC) ? PF_X : 0);
ph[i].p_offset = offset;
ph[i].p_vaddr = range->start;
ph[i].p_paddr = 0;
ph[i].p_filesz = size;
ph[i].p_memsz = size;
ph[i].p_align = PAGE_SIZE;
i++;
offset += size;
}
/* coretable to send to host */
ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT);
if (!ct) {
dkprintf("could not alloc a coretable.\n");
goto fail;
}
ct[0].addr = virt_to_phys(&eh); /* ELF header */
ct[0].len = 64;
dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, &eh);
ct[1].addr = virt_to_phys(ph); /* program header table */
ct[1].len = phsize;
dkprintf("coretable[1]: %lx@%lx(%lx)\n", ct[1].len, ct[1].addr, ph);
ct[2].addr = virt_to_phys(note); /* NOTE segment */
ct[2].len = alignednotesize;
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
i = 3; /* memory segments */
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long phys;
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */
unsigned long p, start, phys;
int prevzero = 0;
unsigned long size = 0;
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* We begin a new chunk */
size = PAGE_SIZE;
start = p;
} else {
/* We extend the previous chunk */
size += PAGE_SIZE;
}
prevzero = 1;
} else {
if (prevzero == 1) {
/* Flush out an empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
ct[i].addr = phys;
ct[i].len = PAGE_SIZE;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, p);
i++;
prevzero = 0;
}
}
if (prevzero == 1) {
/* An empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
} else {
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start, &phys) != 0) {
dkprintf("could not convert user virtual address %lx"
"to physical address", range->start);
goto fail;
}
} else {
phys = virt_to_phys((void *)range->start);
}
ct[i].addr = phys;
ct[i].len = range->end - range->start;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, range->start);
i++;
}
}
*coretable = ct;
return 0;
fail:
if (ct)
kfree(ct);
if (ph)
kfree(ph);
if (note)
kfree(note);
return -1;
}
/**
* \brief Free all the allocated spaces for an image of the core file.
*
* \param coretable An array of core chunks.
*/
void freecore(struct coretable **coretable)
{
struct coretable *ct = *coretable;
kfree(phys_to_virt(ct[2].addr)); /* NOTE segment */
kfree(phys_to_virt(ct[1].addr)); /* ph */
kfree(*coretable);
}
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,4 +1,4 @@
/* head.S COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* head.S COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <linkage.h>
#include <ptrace.h>
@ -10,9 +10,7 @@
#include <smp.h>
#include <arm-gic-v3.h>
#define KERNEL_RAM_VADDR MAP_KERNEL_START
#define EARLY_ALLOC_VADDR MAP_EARLY_ALLOC
#define BOOT_PARAM_VADDR MAP_BOOT_PARAM
/* KERNEL_RAM_VADDR is defined by cmake */
//#ifndef CONFIG_SMP
//# define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF
@ -50,16 +48,6 @@
add \ttb0, \ttb0, \virt_to_phys
.endm
#ifdef CONFIG_ARM64_64K_PAGES
# define BLOCK_SHIFT PAGE_SHIFT
# define BLOCK_SIZE PAGE_SIZE
# define TABLE_SHIFT PMD_SHIFT
#else
# define BLOCK_SHIFT SECTION_SHIFT
# define BLOCK_SIZE SECTION_SIZE
# define TABLE_SHIFT PUD_SHIFT
#endif
#define KERNEL_START KERNEL_RAM_VADDR
#define KERNEL_END _end
@ -87,6 +75,7 @@
#define TRAMPOLINE_DATA_CPU_MAP_SIZE_SIZE 0x08
#define TRAMPOLINE_DATA_CPU_MAP_SIZE (NR_CPUS * 8)
#define TRAMPOLINE_DATA_DATA_RDISTS_PA_SIZE (NR_CPUS * 8)
#define TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE 0x08
#define TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE 0x04
#define TRAMPOLINE_DATA_PMU_AFF_SIZE (CONFIG_SMP_MAX_CORES * 4)
@ -105,9 +94,9 @@
.globl ihk_param_gic_percpu_offset, ihk_param_gic_version
.globl ihk_param_lpj, ihk_param_hz, ihk_param_psci_method
.globl ihk_param_cpu_logical_map, ihk_param_gic_rdist_base_pa
.globl ihk_param_pmu_irq_affiniry, ihk_param_nr_pmu_irq_affiniry
.globl ihk_param_pmu_irq_affi, ihk_param_nr_pmu_irq_affi
.globl ihk_param_use_virt_timer, ihk_param_evtstrm_timer_rate
.globl ihk_param_default_vl
.globl ihk_param_retention_state_flag_pa, ihk_param_default_vl
ihk_param_head:
ihk_param_param_addr:
.quad 0
@ -145,9 +134,11 @@ ihk_param_cpu_logical_map:
.skip NR_CPUS * 8 /* array of the MPIDR and the core number */
ihk_param_gic_rdist_base_pa:
.skip NR_CPUS * 8 /* per-cpu re-distributer PA */
ihk_param_pmu_irq_affiniry:
ihk_param_retention_state_flag_pa:
.quad 0
ihk_param_pmu_irq_affi:
.skip CONFIG_SMP_MAX_CORES * 4 /* array of the pmu affinity list */
ihk_param_nr_pmu_irq_affiniry:
ihk_param_nr_pmu_irq_affi:
.word 0 /* number of pmu affinity list elements. */
/* @ref.impl arch/arm64/include/asm/kvm_arm.h */
@ -265,13 +256,17 @@ ENTRY(arch_start)
mov x16, #NR_CPUS /* calc next data */
lsl x16, x16, 3
add x0, x0, x16
/* nr_pmu_irq_affiniry */
/* retention_state_flag_pa */
ldr x16, [x0], #TRAMPOLINE_DATA_RETENTION_STATE_FLAG_PA_SIZE
adr x15, ihk_param_retention_state_flag_pa
str x16, [x15]
/* nr_pmu_irq_affi */
ldr w16, [x0], #TRAMPOLINE_DATA_NR_PMU_AFFI_SIZE
adr x15, ihk_param_nr_pmu_irq_affiniry
adr x15, ihk_param_nr_pmu_irq_affi
str w16, [x15]
/* pmu_irq_affiniry */
/* pmu_irq_affi */
mov x18, x0
adr x15, ihk_param_pmu_irq_affiniry
adr x15, ihk_param_pmu_irq_affi
b 2f
1: ldr w17, [x18], #4
str w17, [x15], #4
@ -410,14 +405,17 @@ __create_page_tables:
* Map the early_alloc_pages area, kernel_img next block
*/
ldr x3, =KERNEL_END
add x3, x3, x28 // __pa(KERNEL_END)
add x3, x3, x28 // __pa(KERNEL_END)
add x3, x3, #BLOCK_SIZE
sub x3, x3, #1
bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc.
ldr x5, =EARLY_ALLOC_VADDR // get start VA
mov x6, #1
lsl x6, x6, #(PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT)
sub x3, x3, #1
bic x3, x3, #(BLOCK_SIZE - 1) // start PA calc.
ldr x5, =KERNEL_END // get start VA
add x5, x5, #BLOCK_SIZE
sub x5, x5, #1
bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc.
mov x6, #MAP_EARLY_ALLOC_SIZE
add x6, x5, x6 // end VA calc
mov x23, x6 // save end VA
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
@ -425,11 +423,13 @@ __create_page_tables:
* Map the boot_param area
*/
adr x3, ihk_param_param_addr
ldr x3, [x3] // get boot_param PA
ldr x5, =BOOT_PARAM_VADDR // get boot_param VA
mov x6, #1
lsl x6, x6, #MAP_BOOT_PARAM_SHIFT
add x6, x5, x6 // end VA calc
ldr x3, [x3] // get boot_param PA
mov x5, x23 // get start VA
add x5, x5, #BLOCK_SIZE
sub x5, x5, #1
bic x5, x5, #(BLOCK_SIZE - 1) // start VA calc
mov x6, #MAP_BOOT_PARAM_SIZE
add x6, x5, x6 // end VA calc.
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6

View File

@ -7,6 +7,7 @@
#include <hw_breakpoint.h>
#include <arch-memory.h>
#include <signal.h>
#include <process.h>
/* @ref.impl arch/arm64/kernel/hw_breakpoint.c::core_num_[brps|wrps] */
/* Number of BRP/WRP registers on this CPU. */

View File

@ -0,0 +1,131 @@
/* imp-sysreg.c COPYRIGHT FUJITSU LIMITED 2018 */
#include <sysreg.h>
/* hpc */
ACCESS_REG_FUNC(fj_tag_address_ctrl_el1, IMP_FJ_TAG_ADDRESS_CTRL_EL1);
ACCESS_REG_FUNC(pf_ctrl_el1, IMP_PF_CTRL_EL1);
ACCESS_REG_FUNC(pf_stream_detect_ctrl_el0, IMP_PF_STREAM_DETECT_CTRL_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl0_el0, IMP_PF_INJECTION_CTRL0_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl1_el0, IMP_PF_INJECTION_CTRL1_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl2_el0, IMP_PF_INJECTION_CTRL2_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl3_el0, IMP_PF_INJECTION_CTRL3_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl4_el0, IMP_PF_INJECTION_CTRL4_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl5_el0, IMP_PF_INJECTION_CTRL5_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl6_el0, IMP_PF_INJECTION_CTRL6_EL0);
ACCESS_REG_FUNC(pf_injection_ctrl7_el0, IMP_PF_INJECTION_CTRL7_EL0);
ACCESS_REG_FUNC(pf_injection_distance0_el0, IMP_PF_INJECTION_DISTANCE0_EL0);
ACCESS_REG_FUNC(pf_injection_distance1_el0, IMP_PF_INJECTION_DISTANCE1_EL0);
ACCESS_REG_FUNC(pf_injection_distance2_el0, IMP_PF_INJECTION_DISTANCE2_EL0);
ACCESS_REG_FUNC(pf_injection_distance3_el0, IMP_PF_INJECTION_DISTANCE3_EL0);
ACCESS_REG_FUNC(pf_injection_distance4_el0, IMP_PF_INJECTION_DISTANCE4_EL0);
ACCESS_REG_FUNC(pf_injection_distance5_el0, IMP_PF_INJECTION_DISTANCE5_EL0);
ACCESS_REG_FUNC(pf_injection_distance6_el0, IMP_PF_INJECTION_DISTANCE6_EL0);
ACCESS_REG_FUNC(pf_injection_distance7_el0, IMP_PF_INJECTION_DISTANCE7_EL0);
static void hpc_prefetch_regs_init(void)
{
uint64_t reg = 0;
/* PF_CTRL_EL1 */
reg = IMP_PF_CTRL_EL1_EL1AE_ENABLE | IMP_PF_CTRL_EL1_EL0AE_ENABLE;
xos_access_pf_ctrl_el1(WRITE_ACCESS, &reg);
/* PF_STREAM_DETECT_CTRL */
reg = 0;
xos_access_pf_stream_detect_ctrl_el0(WRITE_ACCESS, &reg);
/* PF_INJECTION_CTRL */
reg = 0;
xos_access_pf_injection_ctrl0_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl1_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl2_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl3_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl4_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl5_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl6_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_ctrl7_el0(WRITE_ACCESS, &reg);
/* PF_INJECTION_DISTANCE */
reg = 0;
xos_access_pf_injection_distance0_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance1_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance2_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance3_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance4_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance5_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance6_el0(WRITE_ACCESS, &reg);
xos_access_pf_injection_distance7_el0(WRITE_ACCESS, &reg);
}
static void hpc_tag_address_regs_init(void)
{
uint64_t reg = IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK |
IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK |
IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK;
/* FJ_TAG_ADDRESS_CTRL */
xos_access_fj_tag_address_ctrl_el1(WRITE_ACCESS, &reg);
}
void hpc_registers_init(void)
{
hpc_prefetch_regs_init();
hpc_tag_address_regs_init();
}
/* vhbm */
ACCESS_REG_FUNC(barrier_ctrl_el1, IMP_BARRIER_CTRL_EL1);
ACCESS_REG_FUNC(barrier_bst_bit_el1, IMP_BARRIER_BST_BIT_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb0_el1, IMP_BARRIER_INIT_SYNC_BB0_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb1_el1, IMP_BARRIER_INIT_SYNC_BB1_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb2_el1, IMP_BARRIER_INIT_SYNC_BB2_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb3_el1, IMP_BARRIER_INIT_SYNC_BB3_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb4_el1, IMP_BARRIER_INIT_SYNC_BB4_EL1);
ACCESS_REG_FUNC(barrier_init_sync_bb5_el1, IMP_BARRIER_INIT_SYNC_BB5_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w0_el1, IMP_BARRIER_ASSIGN_SYNC_W0_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w1_el1, IMP_BARRIER_ASSIGN_SYNC_W1_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w2_el1, IMP_BARRIER_ASSIGN_SYNC_W2_EL1);
ACCESS_REG_FUNC(barrier_assign_sync_w3_el1, IMP_BARRIER_ASSIGN_SYNC_W3_EL1);
void vhbm_barrier_registers_init(void)
{
uint64_t reg = 0;
reg = IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE |
IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE;
xos_access_barrier_ctrl_el1(WRITE_ACCESS, &reg);
reg = 0;
xos_access_barrier_init_sync_bb0_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb1_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb2_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb3_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb4_el1(WRITE_ACCESS, &reg);
xos_access_barrier_init_sync_bb5_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w0_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w1_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w2_el1(WRITE_ACCESS, &reg);
xos_access_barrier_assign_sync_w3_el1(WRITE_ACCESS, &reg);
}
/* sccr */
ACCESS_REG_FUNC(sccr_ctrl_el1, IMP_SCCR_CTRL_EL1);
ACCESS_REG_FUNC(sccr_assign_el1, IMP_SCCR_ASSIGN_EL1);
ACCESS_REG_FUNC(sccr_set0_l2_el1, IMP_SCCR_SET0_L2_EL1);
ACCESS_REG_FUNC(sccr_l1_el0, IMP_SCCR_L1_EL0);
void scdrv_registers_init(void)
{
uint64_t reg = 0;
reg = IMP_SCCR_CTRL_EL1_EL1AE_MASK;
xos_access_sccr_ctrl_el1(WRITE_ACCESS, &reg);
reg = 0;
xos_access_sccr_assign_el1(WRITE_ACCESS, &reg);
xos_access_sccr_l1_el0(WRITE_ACCESS, &reg);
reg = (14UL << IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT);
xos_access_sccr_set0_l2_el1(WRITE_ACCESS, &reg);
}

View File

@ -1,4 +1,4 @@
/* arch-futex.h COPYRIGHT FUJITSU LIMITED 2015 */
/* arch-futex.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_FUTEX_H
#define __HEADER_ARM64_COMMON_ARCH_FUTEX_H
@ -7,7 +7,8 @@
* @ref.impl
* linux-linaro/arch/arm64/include/asm/futex.h:__futex_atomic_op
*/
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
#define ___futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
do { \
asm volatile( \
"1: ldxr %w1, %2\n" \
insn "\n" \
@ -26,18 +27,36 @@
" .popsection\n" \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
: "r" (oparg), "Ir" (-EFAULT) \
: "memory")
: "memory"); \
} while (0);
#ifndef IHK_OS_MANYCORE
#include <linux/uaccess.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
do { \
uaccess_enable(); \
___futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
uaccess_disable(); \
} while (0);
#else
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
___futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
#endif
/*
* @ref.impl
* linux-linaro/arch/arm64/include/asm/futex.h:futex_atomic_op_inuser
*/
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = encoded_op & 0xfff;
int oldval = 0, ret, tmp;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
@ -134,12 +153,4 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
return ret;
}
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
*dest = *(volatile uint32_t *)from;
return 0;
}
#endif /* !__HEADER_ARM64_COMMON_ARCH_FUTEX_H */

View File

@ -1,4 +1,4 @@
/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* arch-lock.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_LOCK_H
#define __HEADER_ARM64_COMMON_ARCH_LOCK_H
@ -6,6 +6,12 @@
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/debug.h>
#endif
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -19,14 +25,18 @@ int __kprintf(const char *format, ...);
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::arch_spinlock_t */
typedef struct {
//#ifdef __AARCH64EB__
// uint16_t next;
// uint16_t owner;
//#else /* __AARCH64EB__ */
#ifdef __AARCH64EB__
uint16_t next;
uint16_t owner;
#else /* __AARCH64EB__ */
uint16_t owner;
uint16_t next;
//#endif /* __AARCH64EB__ */
} ihk_spinlock_t;
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
#ifdef ENABLE_FUGAKU_HACKS
extern ihk_spinlock_t *get_this_cpu_runq_lock(void);
#endif
extern void preempt_enable(void);
extern void preempt_disable(void);
@ -34,14 +44,112 @@ extern void preempt_disable(void);
/* @ref.impl arch/arm64/include/asm/spinlock_types.h::__ARCH_SPIN_LOCK_UNLOCKED */
#define SPIN_LOCK_UNLOCKED { 0, 0 }
/* @ref.impl arch/arm64/include/asm/barrier.h::__nops */
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
/* @ref.impl ./arch/arm64/include/asm/lse.h::ARM64_LSE_ATOMIC_INSN */
/* else defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
/* initialized spinlock struct */
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = (ihk_spinlock_t)SPIN_LOCK_UNLOCKED;
}
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */
/* spinlock lock */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { \
int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", \
ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", \
ihk_mc_get_processor_id()); \
rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_trylock */
/* spinlock trylock */
static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
unsigned int tmp;
ihk_spinlock_t lockval;
int success;
preempt_disable();
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldaxr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 2f\n"
" add %w0, %w0, %3\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
"2:",
/* LSE atomics */
" ldr %w0, %2\n"
" eor %w1, %w0, %w0, ror #16\n"
" cbnz %w1, 1f\n"
" add %w1, %w0, %3\n"
" casa %w0, %w1, %2\n"
" sub %w1, %w1, %3\n"
" eor %w1, %w1, %w0\n"
"1:")
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
: "I" (1 << TICKET_SHIFT)
: "memory");
success = !tmp;
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (success) {
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
}
#endif
#endif
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ \
unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", \
ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", \
ihk_mc_get_processor_id()); \
rc; \
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
/* spinlock trylock & interrupt disable & PSTATE.DAIF save */
static unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock,
int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -52,6 +160,8 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_lock */
/* spinlock lock */
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
unsigned int tmp;
@ -61,11 +171,19 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
asm volatile(
/* Atomically increment the next ticket. */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %3\n"
"1: ldaxr %w0, %3\n"
" add %w1, %w0, %w5\n"
" stxr %w2, %w1, %3\n"
" cbnz %w2, 1b\n"
" cbnz %w2, 1b\n",
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
__nops(3)
)
/* Did we get the lock? */
" eor %w1, %w0, %w0, ror #16\n"
" cbz %w1, 3f\n"
@ -83,9 +201,16 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
#endif
#endif
}
/* spinlock lock & interrupt disable & PSTATE.DAIF save */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -95,6 +220,8 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
/* spinlock lock & interrupt disable & PSTATE.DAIF save */
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
@ -106,8 +233,6 @@ static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
return flags;
}
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */
/* spinlock unlock */
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
@ -117,12 +242,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
/* @ref.impl arch/arm64/include/asm/spinlock.h::arch_spin_unlock */
/* spinlock unlock */
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
asm volatile(
" stlrh %w1, %0\n"
: "=Q" (lock->owner)
: "r" (lock->owner + 1)
unsigned long tmp;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" ldrh %w1, %0\n"
" add %w1, %w1, #1\n"
" stlrh %w1, %0",
/* LSE atomics */
" mov %w1, #1\n"
" staddlh %w1, %0\n"
__nops(1))
: "=Q" (lock->owner), "=&r" (tmp)
:
: "memory");
preempt_enable();
@ -145,84 +282,6 @@ static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
cpu_restore_interrupt(flags);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void __mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = xchg8(&(lock->next), node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void __mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = atomic_cmpxchg8(&(lock->next), node, 0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
static void mcs_lock_lock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
preempt_disable();
__mcs_lock_lock(lock, node);
}
static void mcs_lock_unlock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
__mcs_lock_unlock(lock, node);
preempt_enable();
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
node->irqsave = cpu_disable_interrupt_save();
mcs_lock_lock_noirq(lock, node);
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
mcs_lock_unlock_noirq(lock, node);
cpu_restore_interrupt(node->irqsave);
}
#define SPINLOCK_IN_MCS_RWLOCK
// reader/writer lock
@ -238,14 +297,22 @@ typedef struct mcs_rwlock_node {
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
} __attribute__((aligned(64))) mcs_rwlock_node_t;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_t;
#else
} mcs_rwlock_node_t;
#endif
typedef struct mcs_rwlock_node_irqsave {
#ifndef SPINLOCK_IN_MCS_RWLOCK
struct mcs_rwlock_node node;
#endif
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_irqsave_t;
#else
} mcs_rwlock_node_irqsave_t;
#endif
typedef struct mcs_rwlock_lock {
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -254,7 +321,11 @@ typedef struct mcs_rwlock_lock {
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
#endif
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_lock_t;
#else
} mcs_rwlock_lock_t;
#endif
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
@ -602,4 +673,115 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif
}
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
static inline int irqflags_can_interrupt(unsigned long flags)
{
return (flags == ICC_PMR_EL1_UNMASKED);
}
#else /* CONFIG_HAS_NMI */
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !(flags & 0x2);
}
#endif /* CONFIG_HAS_NMI */
struct ihk_rwlock {
unsigned int lock;
};
static inline void ihk_mc_rwlock_init(struct ihk_rwlock *rw)
{
rw->lock = 0;
}
static inline void ihk_mc_read_lock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1b\n"
" stxr %w1, %w0, %2\n"
" cbnz %w1, 2b\n"
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
}
static inline int ihk_mc_read_trylock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2 = 1;
asm volatile(
" ldaxr %w0, %2\n"
" add %w0, %w0, #1\n"
" tbnz %w0, #31, 1f\n"
" stxr %w1, %w0, %2\n"
"1:\n"
: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
return !tmp2;
}
static inline void ihk_mc_read_unlock(struct ihk_rwlock *rw)
{
unsigned int tmp, tmp2;
asm volatile(
"1: ldxr %w0, %2\n"
" sub %w0, %w0, #1\n"
" stlxr %w1, %w0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
:
: "cc", "memory");
}
static inline void ihk_mc_write_lock(struct ihk_rwlock *rw)
{
unsigned int tmp;
asm volatile(
" sevl\n"
"1: wfe\n"
"2: ldaxr %w0, %1\n"
" cbnz %w0, 1b\n"
" stxr %w0, %w2, %1\n"
" cbnz %w0, 2b\n"
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "cc", "memory");
}
static inline int ihk_mc_write_trylock(struct ihk_rwlock *rw)
{
unsigned int tmp;
asm volatile(
" ldaxr %w0, %1\n"
" cbnz %w0, 1f\n"
" stxr %w0, %w2, %1\n"
"1:\n"
: "=&r" (tmp), "+Q" (rw->lock)
: "r" (0x80000000)
: "cc", "memory");
return !tmp;
}
static inline void ihk_mc_write_unlock(struct ihk_rwlock *rw)
{
asm volatile(
" stlr %w1, %0\n"
: "=Q" (rw->lock) : "r" (0) : "memory");
}
#define ihk_mc_read_can_lock(rw) ((rw)->lock < 0x80000000)
#define ihk_mc_write_can_lock(rw) ((rw)->lock == 0)
#endif /* !__HEADER_ARM64_COMMON_ARCH_LOCK_H */

View File

@ -1,96 +1,114 @@
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_MEMORY_H
#define __HEADER_ARM64_COMMON_ARCH_MEMORY_H
#include <const.h>
#include <errno.h>
#ifndef __ASSEMBLY__
#include <list.h>
#include <page.h>
void panic(const char *);
#endif /*__ASSEMBLY__*/
#define _SZ4KB (1UL<<12)
#define _SZ16KB (1UL<<14)
#define _SZ64KB (1UL<<16)
#ifdef CONFIG_ARM64_64K_PAGES
# define GRANULE_SIZE _SZ64KB
# define GRANULE_SIZE _SZ64KB
# define BLOCK_SHIFT PAGE_SHIFT
# define BLOCK_SIZE PAGE_SIZE
# define TABLE_SHIFT PMD_SHIFT
#else
# define GRANULE_SIZE _SZ4KB
# define GRANULE_SIZE _SZ4KB
# define BLOCK_SHIFT SECTION_SHIFT
# define BLOCK_SIZE SECTION_SIZE
# define TABLE_SHIFT PUD_SHIFT
#endif
#define VA_BITS CONFIG_ARM64_VA_BITS
/*
* Address define
*/
#define MAP_KERNEL_SHIFT 21
#define MAP_KERNEL_SIZE (UL(1) << MAP_KERNEL_SHIFT)
#define MAP_EARLY_ALLOC_SHIFT 9
/* early alloc area address */
/* START:_end, SIZE:512 pages */
#define MAP_EARLY_ALLOC_SHIFT 5
#define MAP_EARLY_ALLOC_SIZE (UL(1) << (PAGE_SHIFT + MAP_EARLY_ALLOC_SHIFT))
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
extern char _end[];
# define MAP_EARLY_ALLOC (ALIGN_UP((unsigned long)_end, BLOCK_SIZE))
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
#endif /* !__ASSEMBLY__ */
/* bootparam area address */
/* START:early alloc area end, SIZE:2MiB */
#define MAP_BOOT_PARAM_SHIFT 21
#define MAP_BOOT_PARAM_SIZE (UL(1) << MAP_BOOT_PARAM_SHIFT)
#if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB)
#ifndef __ASSEMBLY__
# define MAP_BOOT_PARAM (ALIGN_UP(MAP_EARLY_ALLOC_END, BLOCK_SIZE))
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE)
#endif /* !__ASSEMBLY__ */
/*
* MAP_KERNEL_START is HOST MODULES_END - 8MiB.
* It's defined by cmake.
*/
#if (VA_BITS == 39 && GRANULE_SIZE == _SZ4KB) /* ARM64_MEMORY_LAYOUT=1 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000000400000000)
# define TASK_UNMAPPED_BASE UL(0x0000000800000000)
# define USER_END UL(0x0000002000000000)
# define MAP_VMAP_START UL(0xffffffbdc0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffffffbffbdfd000)
# define MAP_ST_START UL(0xffffffc000000000)
# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_003f_ff80_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000
#
#elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB)
#elif (VA_BITS == 42 && GRANULE_SIZE == _SZ64KB) /* ARM64_MEMORY_LAYOUT=3 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000002000000000)
# define TASK_UNMAPPED_BASE UL(0x0000004000000000)
# define USER_END UL(0x0000010000000000)
# define MAP_VMAP_START UL(0xfffffdfee0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xfffffdfffbdd0000)
# define MAP_ST_START UL(0xfffffe0000000000)
# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_01ff_e000_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB)
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ4KB) /* ARM64_MEMORY_LAYOUT=2 */
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
# define MAP_VMAP_START UL(0xffff7bffc0000000)
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdfd000)
# define MAP_ST_START UL(0xffff800000000000)
# define MAP_KERNEL_START UL(0xffffffffff800000) // 0xffff_ffff_ff80_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_ff80_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_ffa0_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_ffc0_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_ffe0_0000
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB) /* ARM64_MEMORY_LAYOUT=4 */
#
#elif (VA_BITS == 48 && GRANULE_SIZE == _SZ64KB)
#
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
#ifdef ENABLE_TOFU
# define MAP_VMAP_START UL(0xffff7bdfffff0000)
#else
# define MAP_VMAP_START UL(0xffff780000000000)
#endif
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)
# define MAP_KERNEL_START UL(0xffffffffe0000000) // 0xffff_ffff_e000_0000
# define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START) // 0x0000_7fff_e000_0000
# define MAP_EARLY_ALLOC (MAP_KERNEL_START + MAP_KERNEL_SIZE) // 0xffff_ffff_e020_0000
# define MAP_EARLY_ALLOC_END (MAP_EARLY_ALLOC + MAP_EARLY_ALLOC_SIZE)
# define MAP_BOOT_PARAM (MAP_EARLY_ALLOC_END) // 0xffff_ffff_e220_0000
# define MAP_BOOT_PARAM_END (MAP_BOOT_PARAM + MAP_BOOT_PARAM_SIZE) // 0xffff_ffff_e240_0000
#
#else
# error address space is not defined.
#endif
#define STACK_TOP(region) ((region)->user_end)
#define MAP_ST_SIZE (MAP_KERNEL_START - MAP_ST_START)
#define STACK_TOP(region) ((region)->user_end)
/*
* pagetable define
@ -104,7 +122,10 @@
# define PTL3_INDEX_MASK PTL4_INDEX_MASK
# define PTL2_INDEX_MASK PTL3_INDEX_MASK
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define FIRST_LEVEL_BLOCK_SUPPORT 1
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 4)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 4)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 4)
#elif GRANULE_SIZE == _SZ16KB
# define __PTL4_SHIFT 47
# define __PTL3_SHIFT 36
@ -114,29 +135,53 @@
# define PTL3_INDEX_MASK ((UL(1) << 11) - 1)
# define PTL2_INDEX_MASK PTL3_INDEX_MASK
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define FIRST_LEVEL_BLOCK_SUPPORT 0
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 0)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 7)
#elif GRANULE_SIZE == _SZ64KB
# define __PTL4_SHIFT 0
# define __PTL4_SHIFT 55
# define __PTL3_SHIFT 42
# define __PTL2_SHIFT 29
# define __PTL1_SHIFT 16
# define PTL4_INDEX_MASK 0
# define PTL3_INDEX_MASK ((UL(1) << 6) - 1)
# define PTL3_INDEX_MASK_LINUX ((UL(1) << 10) - 1)
# define PTL2_INDEX_MASK ((UL(1) << 13) - 1)
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define FIRST_LEVEL_BLOCK_SUPPORT 0
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
# define __PTL3_CONT_SHIFT (__PTL3_SHIFT + 0)
# define __PTL2_CONT_SHIFT (__PTL2_SHIFT + 5)
# define __PTL1_CONT_SHIFT (__PTL1_SHIFT + 5)
#else
# error granule size error.
#endif
#ifndef __ASSEMBLY__
extern int first_level_block_support;
#endif /* __ASSEMBLY__ */
# define __PTL4_SIZE (UL(1) << __PTL4_SHIFT)
# define __PTL3_SIZE (UL(1) << __PTL3_SHIFT)
# define __PTL2_SIZE (UL(1) << __PTL2_SHIFT)
# define __PTL1_SIZE (UL(1) << __PTL1_SHIFT)
# define __PTL4_MASK (~__PTL4_SIZE - 1)
# define __PTL3_MASK (~__PTL3_SIZE - 1)
# define __PTL2_MASK (~__PTL2_SIZE - 1)
# define __PTL1_MASK (~__PTL1_SIZE - 1)
# define __PTL4_MASK (~(__PTL4_SIZE - 1))
# define __PTL3_MASK (~(__PTL3_SIZE - 1))
# define __PTL2_MASK (~(__PTL2_SIZE - 1))
# define __PTL1_MASK (~(__PTL1_SIZE - 1))
# define __PTL4_CONT_SIZE (UL(1) << __PTL4_CONT_SHIFT)
# define __PTL3_CONT_SIZE (UL(1) << __PTL3_CONT_SHIFT)
# define __PTL2_CONT_SIZE (UL(1) << __PTL2_CONT_SHIFT)
# define __PTL1_CONT_SIZE (UL(1) << __PTL1_CONT_SHIFT)
# define __PTL4_CONT_MASK (~(__PTL4_CONT_SIZE - 1))
# define __PTL3_CONT_MASK (~(__PTL3_CONT_SIZE - 1))
# define __PTL2_CONT_MASK (~(__PTL2_CONT_SIZE - 1))
# define __PTL1_CONT_MASK (~(__PTL1_CONT_SIZE - 1))
# define __PTL4_CONT_COUNT (UL(1) << (__PTL4_CONT_SHIFT - __PTL4_SHIFT))
# define __PTL3_CONT_COUNT (UL(1) << (__PTL3_CONT_SHIFT - __PTL3_SHIFT))
# define __PTL2_CONT_COUNT (UL(1) << (__PTL2_CONT_SHIFT - __PTL2_SHIFT))
# define __PTL1_CONT_COUNT (UL(1) << (__PTL1_CONT_SHIFT - __PTL1_SHIFT))
/* calculate entries */
#if (CONFIG_ARM64_PGTABLE_LEVELS > 3) && (VA_BITS > __PTL4_SHIFT)
@ -183,6 +228,22 @@ static const unsigned int PTL4_ENTRIES = __PTL4_ENTRIES;
static const unsigned int PTL3_ENTRIES = __PTL3_ENTRIES;
static const unsigned int PTL2_ENTRIES = __PTL2_ENTRIES;
static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
static const unsigned int PTL4_CONT_SHIFT = __PTL4_CONT_SHIFT;
static const unsigned int PTL3_CONT_SHIFT = __PTL3_CONT_SHIFT;
static const unsigned int PTL2_CONT_SHIFT = __PTL2_CONT_SHIFT;
static const unsigned int PTL1_CONT_SHIFT = __PTL1_CONT_SHIFT;
static const unsigned long PTL4_CONT_SIZE = __PTL4_CONT_SIZE;
static const unsigned long PTL3_CONT_SIZE = __PTL3_CONT_SIZE;
static const unsigned long PTL2_CONT_SIZE = __PTL2_CONT_SIZE;
static const unsigned long PTL1_CONT_SIZE = __PTL1_CONT_SIZE;
static const unsigned long PTL4_CONT_MASK = __PTL4_CONT_MASK;
static const unsigned long PTL3_CONT_MASK = __PTL3_CONT_MASK;
static const unsigned long PTL2_CONT_MASK = __PTL2_CONT_MASK;
static const unsigned long PTL1_CONT_MASK = __PTL1_CONT_MASK;
static const unsigned int PTL4_CONT_COUNT = __PTL4_CONT_COUNT;
static const unsigned int PTL3_CONT_COUNT = __PTL3_CONT_COUNT;
static const unsigned int PTL2_CONT_COUNT = __PTL2_CONT_COUNT;
static const unsigned int PTL1_CONT_COUNT = __PTL1_CONT_COUNT;
#else
# define PTL4_SHIFT __PTL4_SHIFT
# define PTL3_SHIFT __PTL3_SHIFT
@ -200,8 +261,26 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
# define PTL3_ENTRIES __PTL3_ENTRIES
# define PTL2_ENTRIES __PTL2_ENTRIES
# define PTL1_ENTRIES __PTL1_ENTRIES
# define PTL4_CONT_SHIFT __PTL4_CONT_SHIFT
# define PTL3_CONT_SHIFT __PTL3_CONT_SHIFT
# define PTL2_CONT_SHIFT __PTL2_CONT_SHIFT
# define PTL1_CONT_SHIFT __PTL1_CONT_SHIFT
# define PTL4_CONT_SIZE __PTL4_CONT_SIZE
# define PTL3_CONT_SIZE __PTL3_CONT_SIZE
# define PTL2_CONT_SIZE __PTL2_CONT_SIZE
# define PTL1_CONT_SIZE __PTL1_CONT_SIZE
# define PTL4_CONT_MASK __PTL4_CONT_MASK
# define PTL3_CONT_MASK __PTL3_CONT_MASK
# define PTL2_CONT_MASK __PTL2_CONT_MASK
# define PTL1_CONT_MASK __PTL1_CONT_MASK
# define PTL4_CONT_COUNT __PTL4_CONT_COUNT
# define PTL3_CONT_COUNT __PTL3_CONT_COUNT
# define PTL2_CONT_COUNT __PTL2_CONT_COUNT
# define PTL1_CONT_COUNT __PTL1_CONT_COUNT
#endif/*__ASSEMBLY__*/
#define __page_size(pgshift) (UL(1) << (pgshift))
#define __page_mask(pgsize) (~((pgsize) - 1))
#define __page_offset(addr, size) ((unsigned long)(addr) & ((size) - 1))
#define __page_align(addr, size) ((unsigned long)(addr) & ~((size) - 1))
#define __page_align_up(addr, size) __page_align((unsigned long)(addr) + (size) - 1, size)
@ -210,8 +289,8 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
* nornal page
*/
#define PAGE_SHIFT __PTL1_SHIFT
#define PAGE_SIZE (UL(1) << __PTL1_SHIFT)
#define PAGE_MASK (~(PTL1_SIZE - 1))
#define PAGE_SIZE __page_size(PAGE_SHIFT)
#define PAGE_MASK __page_mask(PAGE_SIZE)
#define PAGE_P2ALIGN 0
#define page_offset(addr) __page_offset(addr, PAGE_SIZE)
#define page_align(addr) __page_align(addr, PAGE_SIZE)
@ -221,8 +300,8 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
* large page
*/
#define LARGE_PAGE_SHIFT __PTL2_SHIFT
#define LARGE_PAGE_SIZE (UL(1) << __PTL2_SHIFT)
#define LARGE_PAGE_MASK (~(PTL2_SIZE - 1))
#define LARGE_PAGE_SIZE __page_size(LARGE_PAGE_SHIFT)
#define LARGE_PAGE_MASK __page_mask(LARGE_PAGE_SIZE)
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
#define large_page_offset(addr) __page_offset(addr, LARGE_PAGE_SIZE)
#define large_page_align(addr) __page_align(addr, LARGE_PAGE_SIZE)
@ -263,6 +342,18 @@ static const unsigned int PTL1_ENTRIES = __PTL1_ENTRIES;
#define PTE_FILEOFF PTE_SPECIAL
#ifdef CONFIG_ARM64_64K_PAGES
# define USER_STACK_PREPAGE_SIZE PAGE_SIZE
# define USER_STACK_PAGE_MASK PAGE_MASK
# define USER_STACK_PAGE_P2ALIGN PAGE_P2ALIGN
# define USER_STACK_PAGE_SHIFT PAGE_SHIFT
#else
# define USER_STACK_PREPAGE_SIZE LARGE_PAGE_SIZE
# define USER_STACK_PAGE_MASK LARGE_PAGE_MASK
# define USER_STACK_PAGE_P2ALIGN LARGE_PAGE_P2ALIGN
# define USER_STACK_PAGE_SHIFT LARGE_PAGE_SHIFT
#endif
#define PT_ENTRIES (PAGE_SIZE >> 3)
#ifndef __ASSEMBLY__
@ -312,6 +403,8 @@ enum ihk_mc_pt_attribute {
PTATTR_FOR_USER = UL(1) << (PHYS_MASK_SHIFT - 1),
/* WriteCombine */
PTATTR_WRITE_COMBINED = PTE_ATTRINDX(2),
/* converted flag */
ARCH_PTATTR_FLIPPED = PTE_PROT_NONE,
};
extern enum ihk_mc_pt_attribute attr_mask;
@ -323,18 +416,23 @@ static inline int pfn_is_write_combined(uintptr_t pfn)
//共通部と意味がするビット定義
#define attr_flip_bits (PTATTR_WRITABLE | PTATTR_LARGEPAGE)
static inline int pgsize_to_tbllv(size_t pgsize);
static inline int pte_is_type_page(const pte_t *ptep, size_t pgsize)
{
int ret = 0; //default D_TABLE
if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize)) {
int level = pgsize_to_tbllv(pgsize);
switch (level) {
case 4:
case 3:
case 2:
// check D_BLOCK
ret = ((*ptep & PMD_TYPE_MASK) == PMD_TYPE_SECT);
}
else if (PTL1_SIZE == pgsize) {
break;
case 1:
// check D_PAGE
ret = ((*ptep & PTE_TYPE_MASK) == PTE_TYPE_PAGE);
break;
}
return ret;
}
@ -413,21 +511,18 @@ static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize)
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize) ||
(PTL1_SIZE == pgsize)) {
*ptep = PTE_NULL;
}
*ptep = PTE_NULL;
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
if ((PTL4_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(PTL3_SIZE == pgsize && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize) ||
(PTL1_SIZE == pgsize)) {
if (((PTL4_SIZE == pgsize || PTL4_CONT_SIZE == pgsize)
&& CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
((PTL3_SIZE == pgsize || PTL3_CONT_SIZE == pgsize)
&& CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(PTL2_SIZE == pgsize || PTL2_CONT_SIZE == pgsize) ||
(PTL1_SIZE == pgsize || PTL1_CONT_SIZE == pgsize)) {
*ptep = PTE_FILEOFF | off | PTE_TYPE_PAGE;
}
}
@ -457,7 +552,276 @@ static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
}
}
struct page_table;
static inline int pte_is_contiguous(const pte_t *ptep)
{
return !!(*ptep & PTE_CONT);
}
static inline int pgsize_is_contiguous(size_t pgsize)
{
int ret = 0;
if ((pgsize == PTL4_CONT_SIZE && CONFIG_ARM64_PGTABLE_LEVELS > 3) ||
(pgsize == PTL3_CONT_SIZE && CONFIG_ARM64_PGTABLE_LEVELS > 2) ||
(pgsize == PTL2_CONT_SIZE) ||
(pgsize == PTL1_CONT_SIZE)) {
ret = 1;
}
return ret;
}
static inline int pgsize_to_tbllv(size_t pgsize)
{
int level = -EINVAL;
if ((pgsize == PTL4_CONT_SIZE || pgsize == PTL4_SIZE)
&& (CONFIG_ARM64_PGTABLE_LEVELS > 3)) {
level = 4;
} else if ((pgsize == PTL3_CONT_SIZE || pgsize == PTL3_SIZE)
&& (CONFIG_ARM64_PGTABLE_LEVELS > 2)) {
level = 3;
} else if (pgsize == PTL2_CONT_SIZE || pgsize == PTL2_SIZE) {
level = 2;
} else if (pgsize == PTL1_CONT_SIZE || pgsize == PTL1_SIZE) {
level = 1;
}
return level;
}
static inline int pgsize_to_pgshift(size_t pgsize)
{
/* We need to use if instead of switch because
* sometimes PTLX_CONT_SIZE == PTLX_SIZE
*/
if (pgsize == PTL4_CONT_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
return PTL4_CONT_SHIFT;
}
} else if (pgsize == PTL4_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
return PTL4_SHIFT;
}
} else if (pgsize == PTL3_CONT_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
return PTL3_CONT_SHIFT;
}
} else if (pgsize == PTL3_SIZE) {
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
return PTL3_SHIFT;
}
} else if (pgsize == PTL2_CONT_SIZE) {
return PTL2_CONT_SHIFT;
} else if (pgsize == PTL2_SIZE) {
return PTL2_SHIFT;
} else if (pgsize == PTL1_CONT_SIZE) {
return PTL1_CONT_SHIFT;
} else if (pgsize == PTL1_SIZE) {
return PTL1_SHIFT;
}
return -EINVAL;
}
static inline size_t tbllv_to_pgsize(int level)
{
size_t pgsize = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
pgsize = PTL4_SIZE;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
pgsize = PTL3_SIZE;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
pgsize = PTL2_SIZE;
break;
case 1:
pgsize = PTL1_SIZE;
break;
default:
panic("page table level is invalid.");
}
return pgsize;
}
static inline size_t tbllv_to_contpgsize(int level)
{
size_t pgsize = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
pgsize = PTL4_CONT_SIZE;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
pgsize = PTL3_CONT_SIZE;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
pgsize = PTL2_CONT_SIZE;
break;
case 1:
pgsize = PTL1_CONT_SIZE;
break;
default:
panic("page table level is invalid.");
}
return pgsize;
}
static inline int tbllv_to_contpgshift(int level)
{
int ret = 0;
switch (level) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
ret = PTL4_CONT_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
ret = PTL3_CONT_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
ret = PTL2_CONT_SHIFT;
break;
case 1:
ret = PTL1_CONT_SHIFT;
break;
default:
panic("page table level is invalid.");
}
return ret;
}
static inline pte_t *get_contiguous_head(pte_t *__ptep, size_t __pgsize)
{
unsigned long align;
int shift = 0;
switch (pgsize_to_tbllv(__pgsize)) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
shift = PTL4_CONT_SHIFT - PTL4_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
shift = PTL3_CONT_SHIFT - PTL3_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
shift = PTL2_CONT_SHIFT - PTL2_SHIFT;
break;
case 1:
shift = PTL1_CONT_SHIFT - PTL1_SHIFT;
break;
default:
panic("page table level is invalid.");
}
align = sizeof(*__ptep) << shift;
return (pte_t *)__page_align(__ptep, align);
}
static inline pte_t *get_contiguous_tail(pte_t *__ptep, size_t __pgsize)
{
unsigned long align;
int shift = 0;
switch (pgsize_to_tbllv(__pgsize)) {
case 4:
if (CONFIG_ARM64_PGTABLE_LEVELS > 3) {
shift = PTL4_CONT_SHIFT - PTL4_SHIFT;
} else {
panic("page table level 4 is invalid.");
}
break;
case 3:
if (CONFIG_ARM64_PGTABLE_LEVELS > 2) {
shift = PTL3_CONT_SHIFT - PTL3_SHIFT;
} else {
panic("page table level 3 is invalid.");
}
break;
case 2:
shift = PTL2_CONT_SHIFT - PTL2_SHIFT;
break;
case 1:
shift = PTL1_CONT_SHIFT - PTL1_SHIFT;
break;
default:
panic("page table level is invalid.");
}
align = sizeof(*__ptep) << shift;
return (pte_t *)__page_align_up(__ptep + 1, align) - 1;
}
int split_contiguous_pages(pte_t *ptep, size_t pgsize,
uint32_t memobj_flags);
static inline int page_is_contiguous_head(pte_t *ptep, size_t pgsize)
{
pte_t *ptr = get_contiguous_head(ptep, pgsize);
return (ptr == ptep);
}
static inline int page_is_contiguous_tail(pte_t *ptep, size_t pgsize)
{
pte_t *ptr = get_contiguous_tail(ptep, pgsize);
return (ptr == ptep);
}
/* Return true if PTE doesn't belong to a contiguous PTE group or PTE
* is the head of a contiguous PTE group
*/
static inline int pte_is_head(pte_t *ptep, pte_t *old, size_t cont_size)
{
if (!pte_is_contiguous(old))
return 1;
return page_is_contiguous_head(ptep, cont_size);
}
typedef pte_t translation_table_t;
struct page_table {
translation_table_t* tt;
translation_table_t* tt_pa;
int asid;
};
void arch_adjust_allocate_page_size(struct page_table *pt,
uintptr_t fault_addr,
pte_t *ptep,
void **pgaddrp,
size_t *pgsizep);
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
@ -472,7 +836,6 @@ void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
void set_address_space_id(struct page_table *pt, int asid);
int get_address_space_id(const struct page_table *pt);
typedef pte_t translation_table_t;
void set_translation_table(struct page_table *pt, translation_table_t* tt);
translation_table_t* get_translation_table(const struct page_table *pt);
translation_table_t* get_translation_table_as_paddr(const struct page_table *pt);

View File

@ -1,9 +1,16 @@
/* arch-perfctr.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
/* arch-perfctr.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __ARCH_PERFCTR_H__
#define __ARCH_PERFCTR_H__
#include <ihk/types.h>
#include <ihk/cpu.h>
#include <bitops.h>
struct per_cpu_arm_pmu {
int num_events;
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
};
/* @ref.impl arch/arm64/include/asm/pmu.h */
struct arm_pmu {
@ -13,15 +20,22 @@ struct arm_pmu {
void (*reset)(void*);
int (*enable_pmu)(void);
void (*disable_pmu)(void);
int (*enable_counter)(int);
int (*disable_counter)(int);
int (*enable_intens)(int);
int (*disable_intens)(int);
int (*enable_counter)(unsigned long counter_mask);
int (*disable_counter)(unsigned long counter_mask);
int (*enable_intens)(unsigned long counter_mask);
int (*disable_intens)(unsigned long counter_mask);
int (*set_event_filter)(unsigned long*, int);
void (*write_evtype)(int, uint32_t);
int (*get_event_idx)(int, unsigned long);
int (*get_event_idx)(int num_events, unsigned long used_mask,
unsigned long config);
int (*map_event)(uint32_t, uint64_t);
int num_events;
int (*map_hw_event)(uint64_t config);
int (*map_cache_event)(uint64_t config);
int (*map_raw_event)(uint64_t config);
void (*enable_user_access_pmu_regs)(void);
void (*disable_user_access_pmu_regs)(void);
int (*counter_mask_valid)(unsigned long counter_mask);
struct per_cpu_arm_pmu *per_cpu;
};
static inline const struct arm_pmu* get_cpu_pmu(void)
@ -29,44 +43,21 @@ static inline const struct arm_pmu* get_cpu_pmu(void)
extern struct arm_pmu cpu_pmu;
return &cpu_pmu;
}
static inline const struct per_cpu_arm_pmu *get_per_cpu_pmu(void)
{
const struct arm_pmu *cpu_pmu = get_cpu_pmu();
return &cpu_pmu->per_cpu[ihk_mc_get_processor_id()];
}
int arm64_init_perfctr(void);
void arm64_init_per_cpu_perfctr(void);
int arm64_enable_pmu(void);
void arm64_disable_pmu(void);
int armv8pmu_init(struct arm_pmu* cpu_pmu);
/* TODO[PMU]: 共通部に定義があっても良い。今後の動向を見てここの定義を削除する */
/*
* Generalized hardware cache events:
*
* { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
* { read, write, prefetch } x
* { accesses, misses }
*/
enum perf_hw_cache_id {
PERF_COUNT_HW_CACHE_L1D = 0,
PERF_COUNT_HW_CACHE_L1I = 1,
PERF_COUNT_HW_CACHE_LL = 2,
PERF_COUNT_HW_CACHE_DTLB = 3,
PERF_COUNT_HW_CACHE_ITLB = 4,
PERF_COUNT_HW_CACHE_BPU = 5,
PERF_COUNT_HW_CACHE_NODE = 6,
PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
};
enum perf_hw_cache_op_id {
PERF_COUNT_HW_CACHE_OP_READ = 0,
PERF_COUNT_HW_CACHE_OP_WRITE = 1,
PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
};
enum perf_hw_cache_op_result_id {
PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
};
void armv8pmu_per_cpu_init(struct per_cpu_arm_pmu *per_cpu);
void arm64_enable_user_access_pmu_regs(void);
void arm64_disable_user_access_pmu_regs(void);
#endif

View File

@ -10,4 +10,13 @@ extern void *__inline_memcpy(void *to, const void *from, size_t t);
extern void *__inline_memset(void *s, unsigned long c, size_t count);
#define ARCH_MEMCLEAR
extern void __memclear(void *addr, unsigned long len, void *tmp);
inline static void memclear(void *addr, unsigned long len)
{
uint64_t q0q1[4];
__memclear(addr, len, (void *)&q0q1);
}
#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */

View File

@ -1,7 +1,9 @@
/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016 */
/* arch-timer.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __HEADER_ARM64_COMMON_ARCH_TIMER_H
#define __HEADER_ARM64_COMMON_ARCH_TIMER_H
#include <ihk/cpu.h>
/* @ref.impl include/clocksource/arm_arch_timer.h */
#define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */
#define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */
@ -11,4 +13,19 @@
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
/* @ref.impl linux4.10.16 */
/* include/clocksource/arm_arch_timer.h */
#define ARCH_TIMER_CTRL_ENABLE (1 << 0)
#define ARCH_TIMER_CTRL_IT_MASK (1 << 1)
#define ARCH_TIMER_CTRL_IT_STAT (1 << 2)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
ARCH_TIMER_REG_TVAL,
};
extern int get_timer_intrid(void);
extern void arch_timer_init(void);
extern struct ihk_mc_interrupt_handler *get_timer_handler(void);
#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */

View File

@ -1,4 +1,4 @@
/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
/* cpu.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __HEADER_ARM64_ARCH_CPU_H
#define __HEADER_ARM64_ARCH_CPU_H
@ -12,6 +12,8 @@
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#include <registers.h>
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
@ -69,12 +71,10 @@ do { \
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */
#define read_tsc() \
({ \
unsigned long cval; \
isb(); \
asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); \
cval = rdtsc(); \
cval; \
})
@ -102,4 +102,6 @@ static inline void cpu_disable_nmi(void)
#endif /* __ASSEMBLY__ */
void arch_flush_icache_all(void);
#endif /* !__HEADER_ARM64_ARCH_CPU_H */

View File

@ -21,12 +21,11 @@
/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
#define MAP_HUGE_SHIFT 26
#if FIRST_LEVEL_BLOCK_SUPPORT
# define MAP_HUGE_FIRST_BLOCK (__PTL3_SHIFT << MAP_HUGE_SHIFT)
#else
# define MAP_HUGE_FIRST_BLOCK -1 /* not supported */
#endif
#define MAP_HUGE_SECOND_BLOCK (__PTL2_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_FIRST_BLOCK (__PTL3_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_FIRST_CONT_BLOCK (__PTL3_CONT_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_SECOND_BLOCK (__PTL2_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_SECOND_CONT_BLOCK (__PTL2_CONT_SHIFT << MAP_HUGE_SHIFT)
#define MAP_HUGE_THIRD_CONT_BLOCK (__PTL1_CONT_SHIFT << MAP_HUGE_SHIFT)
/*
* for mlockall()

View File

@ -1,60 +0,0 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#define DEBUG_RUSAGE
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
extern struct ihk_os_monitor *monitor;
extern int sprintf(char * buf, const char *fmt, ...);
#define DEBUG_ARCH_RUSAGE
#ifdef DEBUG_ARCH_RUSAGE
#define dprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#else
#define dprintf(...) do { } while (0)
#define eprintf(...) \
do { \
char msg[1024]; \
sprintf(msg, __VA_ARGS__); \
kprintf("%s,%s", __FUNCTION__, msg); \
} while (0);
#endif
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
#if 0 /* postk-TODO */
switch (pgsize) {
case PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case PTL2_SIZE:
ret = IHK_OS_PGSIZE_2MB;
break;
case PTL3_SIZE:
ret = IHK_OS_PGSIZE_1GB;
break;
default:
eprintf("unknown pgsize=%ld\n", pgsize);
break;
}
#endif
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -6,12 +6,11 @@
/* shmflg */
#define SHM_HUGE_SHIFT 26
#if FIRST_LEVEL_BLOCK_SUPPORT
# define SHM_HUGE_FIRST_BLOCK (__PTL3_SHIFT << SHM_HUGE_SHIFT)
#else
# define SHM_HUGE_FIRST_BLOCK -1 /* not supported */
#endif
#define SHM_HUGE_SECOND_BLOCK (__PTL2_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_FIRST_BLOCK (__PTL3_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_FIRST_CONT_BLOCK (__PTL3_CONT_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_SECOND_BLOCK (__PTL2_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_SECOND_CONT_BLOCK (__PTL2_CONT_SHIFT << SHM_HUGE_SHIFT)
#define SHM_HUGE_THIRD_CONT_BLOCK (__PTL1_CONT_SHIFT << SHM_HUGE_SHIFT)
struct ipc_perm {
key_t key;

View File

@ -1,34 +0,0 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#include <arch-memory.h>
//#define DEBUG_RUSAGE
extern struct rusage_global *rusage;
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_16KB 1
#define IHK_OS_PGSIZE_64KB 2
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case __PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case __PTL2_SIZE:
ret = IHK_OS_PGSIZE_16KB;
break;
case __PTL3_SIZE:
ret = IHK_OS_PGSIZE_64KB;
break;
default:
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -60,9 +60,9 @@
#ifdef CONFIG_HAS_NMI
#define GICD_INT_NMI_PRI 0x40
#define GICD_INT_DEF_PRI 0xc0
#define GICD_INT_DEF_PRI 0xc0U
#else
#define GICD_INT_DEF_PRI 0xa0
#define GICD_INT_DEF_PRI 0xa0U
#endif
#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
(GICD_INT_DEF_PRI << 16) |\

View File

@ -19,6 +19,7 @@
#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H
#define __LINUX_IRQCHIP_ARM_GIC_V3_H
#include <stringify.h>
/* @ref.impl include/linux/irqchip/arm-gic-v3.h */
#include <sysreg.h>
@ -381,11 +382,4 @@
#define ICH_AP1R2_EL2 __AP1Rx_EL2(2)
#define ICH_AP1R3_EL2 __AP1Rx_EL2(3)
/**
* @ref.impl host-kernel/include/linux/stringify.h
*/
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_H */

View File

@ -15,8 +15,9 @@
#define S_PC 0x100 /* offsetof(struct pt_regs, pc) */
#define S_PSTATE 0x108 /* offsetof(struct pt_regs, pstate) */
#define S_ORIG_X0 0x110 /* offsetof(struct pt_regs, orig_x0) */
#define S_SYSCALLNO 0x118 /* offsetof(struct pt_regs, syscallno) */
#define S_FRAME_SIZE 0x120 /* sizeof(struct pt_regs) */
#define S_ORIG_PC 0x118 /* offsetof(struct pt_regs, orig_pc) */
#define S_SYSCALLNO 0x120 /* offsetof(struct pt_regs, syscallno) */
#define S_FRAME_SIZE 0x130 /* sizeof(struct pt_regs) must be 16 byte align */
#define CPU_INFO_SETUP 0x10 /* offsetof(struct cpu_info, cpu_setup) */
#define CPU_INFO_SZ 0x18 /* sizeof(struct cpu_info) */

View File

@ -0,0 +1,20 @@
/* asm-syscall.h COPYRIGHT FUJITSU LIMITED 2018 */
#ifndef __HEADER_ARM64_ASM_SYSCALL_H
#define __HEADER_ARM64_ASM_SYSCALL_H
#ifdef __ASSEMBLY__
#define DECLARATOR(number, name) .equ __NR_##name, number
#define SYSCALL_HANDLED(number, name) DECLARATOR(number, name)
#define SYSCALL_DELEGATED(number, name) DECLARATOR(number, name)
#include <config.h>
#include <syscall_list.h>
#undef DECLARATOR
#undef SYSCALL_HANDLED
#undef SYSCALL_DELEGATED
#endif /* __ASSEMBLY__ */
#endif /* !__HEADER_ARM64_ASM_SYSCALL_H */

View File

@ -67,21 +67,12 @@ struct arm64_cpu_capabilities {
int def_scope;/* default scope */
int (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
int (*enable)(void *);/* Called on all active CPUs */
union {
struct {/* To be used for erratum handling only */
uint32_t midr_model;
uint32_t midr_range_min, midr_range_max;
};
struct {/* Feature register checking */
uint32_t sys_reg;
uint8_t field_pos;
uint8_t min_field_value;
uint8_t hwcap_type;
int sign;
unsigned long hwcap;
};
};
uint32_t sys_reg;
uint8_t field_pos;
uint8_t min_field_value;
uint8_t hwcap_type;
int sign;
unsigned long hwcap;
};
/* @ref.impl include/linux/bitops.h */

View File

@ -25,17 +25,78 @@
#define MIDR_PARTNUM(midr) \
(((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
#define MIDR_ARCHITECTURE_SHIFT 16
#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT)
#define MIDR_ARCHITECTURE(midr) \
(((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
#define MIDR_VARIANT_SHIFT 20
#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define ARM_CPU_IMP_CAVIUM 0x43
#define MIDR_CPU_MODEL(imp, partnum) \
(((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
#define MIDR_CPU_VAR_REV(var, rev) \
(((var) << MIDR_VARIANT_SHIFT) | (rev))
#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
MIDR_ARCHITECTURE_MASK)
#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \
({ \
u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \
u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \
\
_model == (model) && rv >= (rv_min) && rv <= (rv_max); \
})
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_APM 0x50
#define ARM_CPU_IMP_CAVIUM 0x43
#define ARM_CPU_IMP_BRCM 0x42
#define ARM_CPU_IMP_QCOM 0x51
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
#define ARM_CPU_PART_CORTEX_A57 0xD07
#define ARM_CPU_PART_CORTEX_A72 0xD08
#define ARM_CPU_PART_CORTEX_A53 0xD03
#define ARM_CPU_PART_CORTEX_A73 0xD09
#define ARM_CPU_PART_CORTEX_A75 0xD0A
#define APM_CPU_PART_POTENZA 0x000
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
#define QCOM_CPU_PART_FALKOR 0xC00
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#ifndef __ASSEMBLY__

View File

@ -1,92 +0,0 @@
/* elfcore.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#ifndef __HEADER_ARM64_COMMON_ELFCORE_H
#define __HEADER_ARM64_COMMON_ELFCORE_H
typedef uint16_t Elf64_Half;
typedef uint32_t Elf64_Word;
typedef uint64_t Elf64_Xword;
typedef uint64_t Elf64_Addr;
typedef uint64_t Elf64_Off;
#define EI_NIDENT 16
typedef struct {
unsigned char e_ident[EI_NIDENT];
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry;
Elf64_Off e_phoff;
Elf64_Off e_shoff;
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr;
#define EI_MAG0 0
#define EI_MAG1 1
#define EI_MAG2 2
#define EI_MAG3 3
#define EI_CLASS 4
#define EI_DATA 5
#define EI_VERSION 6
#define EI_OSABI 7
#define EI_ABIVERSION 8
#define EI_PAD 9
#define ELFMAG0 0x7f
#define ELFMAG1 'E'
#define ELFMAG2 'L'
#define ELFMAG3 'F'
#define ELFCLASS64 2 /* 64-bit object */
#define ELFDATA2LSB 1 /* LSB */
#define El_VERSION 1 /* defined to be the same as EV CURRENT */
#define ELFOSABI_NONE 0 /* unspecied */
#define El_ABIVERSION_NONE 0 /* unspecied */
#define ET_CORE 4 /* Core file */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_K10M 181 /* Intel K10M */
#define EV_CURRENT 1 /* Current version */
typedef struct {
Elf64_Word p_type;
Elf64_Word p_flags;
Elf64_Off p_offset;
Elf64_Addr p_vaddr;
Elf64_Addr p_paddr;
Elf64_Xword p_filesz;
Elf64_Xword p_memsz;
Elf64_Xword p_align;
} Elf64_Phdr;
#define PT_LOAD 1
#define PT_NOTE 4
#define PF_X 1 /* executable bit */
#define PF_W 2 /* writable bit */
#define PF_R 4 /* readable bit */
struct note {
Elf64_Word namesz;
Elf64_Word descsz;
Elf64_Word type;
/* name char[namesz] and desc[descsz] */
};
#define NT_PRSTATUS 1
#define NT_PRFRPREG 2
#define NT_PRPSINFO 3
#define NT_AUXV 6
#define NT_X86_STATE 0x202
#include "elfcoregpl.h"
#endif /* !__HEADER_ARM64_COMMON_ELFCORE_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,98 +0,0 @@
/* elfcoregpl.h COPYRIGHT FUJITSU LIMITED 2015 */
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#ifndef __HEADER_ARM64_COMMON_ELFCOREGPL_H
#define __HEADER_ARM64_COMMON_ELFCOREGPL_H
#define pid_t int
/* From /usr/include/linux/elfcore.h of Linux */
#define ELF_PRARGSZ (80)
/* From /usr/include/linux/elfcore.h fro Linux */
struct elf_siginfo
{
int si_signo;
int si_code;
int si_errno;
};
/* From bfd/hosts/x86-64linux.h of gdb. */
typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t;
typedef a8_uint64_t elf_greg64_t;
struct user_regs64_struct
{
a8_uint64_t r15;
a8_uint64_t r14;
a8_uint64_t r13;
a8_uint64_t r12;
a8_uint64_t rbp;
a8_uint64_t rbx;
a8_uint64_t r11;
a8_uint64_t r10;
a8_uint64_t r9;
a8_uint64_t r8;
a8_uint64_t rax;
a8_uint64_t rcx;
a8_uint64_t rdx;
a8_uint64_t rsi;
a8_uint64_t rdi;
a8_uint64_t orig_rax;
a8_uint64_t rip;
a8_uint64_t cs;
a8_uint64_t eflags;
a8_uint64_t rsp;
a8_uint64_t ss;
a8_uint64_t fs_base;
a8_uint64_t gs_base;
a8_uint64_t ds;
a8_uint64_t es;
a8_uint64_t fs;
a8_uint64_t gs;
};
#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t))
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
struct prstatus64_timeval
{
a8_uint64_t tv_sec;
a8_uint64_t tv_usec;
};
struct elf_prstatus64
{
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
elf_gregset64_t pr_reg;
int pr_fpvalid;
};
struct elf_prpsinfo64
{
char pr_state;
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_pid, pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
#endif /* !__HEADER_ARM64_COMMON_ELFCOREGPL_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,4 +1,4 @@
/* fpsimd.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
/* fpsimd.h COPYRIGHT FUJITSU LIMITED 2016-2019 */
#ifndef __HEADER_ARM64_COMMON_FPSIMD_H
#define __HEADER_ARM64_COMMON_FPSIMD_H
@ -42,16 +42,19 @@ extern void thread_sve_to_fpsimd(struct thread *thread, fp_regs_struct *fp_regs)
extern size_t sve_state_size(struct thread const *thread);
extern void sve_free(struct thread *thread);
extern void sve_alloc(struct thread *thread);
extern int sve_alloc(struct thread *thread);
extern void sve_save_state(void *state, unsigned int *pfpsr);
extern void sve_load_state(void const *state, unsigned int const *pfpsr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length, const unsigned long flags);
extern int sve_get_thread_vl(const struct thread *thread);
extern int sve_set_thread_vl(unsigned long arg);
extern int sve_get_thread_vl(void);
extern int sve_set_vector_length(struct thread *thread, unsigned long vl, unsigned long flags);
#define SVE_SET_VL(thread, vector_length, flags) sve_set_thread_vl(thread, vector_length, flags)
#define SVE_GET_VL(thread) sve_get_thread_vl(thread)
#define SVE_SET_VL(arg) sve_set_thread_vl(arg)
#define SVE_GET_VL() sve_get_thread_vl()
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
#else /* CONFIG_ARM64_SVE */
@ -80,12 +83,12 @@ static int sve_set_vector_length(struct thread *thread, unsigned long vl, unsign
}
/* for prctl syscall */
#define SVE_SET_VL(a,b,c) (-EINVAL)
#define SVE_GET_VL(a) (-EINVAL)
#define SVE_SET_VL(a) (-EINVAL)
#define SVE_GET_VL() (-EINVAL)
#endif /* CONFIG_ARM64_SVE */
extern void init_sve_vl(void);
extern void sve_setup(void);
extern void fpsimd_save_state(struct fpsimd_state *state);
extern void fpsimd_load_state(struct fpsimd_state *state);
extern void thread_fpsimd_save(struct thread *thread);

View File

@ -1,5 +1,4 @@
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifdef POSTK_DEBUG_ARCH_DEP_65
#ifndef _UAPI__ASM_HWCAP_H
#define _UAPI__ASM_HWCAP_H
@ -25,4 +24,3 @@ unsigned long arch_get_hwcap(void);
extern unsigned long elf_hwcap;
#endif /* _UAPI__ASM_HWCAP_H */
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -124,7 +124,7 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
static inline void ihk_atomic64_set(ihk_atomic64_t *v, long i)
{
v->counter64 = i;
}
@ -147,6 +147,8 @@ static inline void ihk_atomic64_add(long i, ihk_atomic64_t *v)
/* @ref.impl arch/arm64/include/asm/atomic.h::atomic64_inc */
#define ihk_atomic64_inc(v) ihk_atomic64_add(1LL, (v))
#define ihk_atomic64_cmpxchg(p, o, n) cmpxchg(&((p)->counter64), o, n)
/***********************************************************************
* others
*/

View File

@ -1,4 +1,4 @@
/* context.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* context.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_IHK_CONTEXT_H
#define __HEADER_ARM64_IHK_CONTEXT_H
@ -27,7 +27,9 @@ struct pt_regs {
};
};
unsigned long orig_x0;
unsigned long orig_pc;
unsigned long syscallno;
unsigned long __padding;
};
typedef struct pt_regs ihk_mc_user_context_t;
@ -65,17 +67,17 @@ static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
}
/* temp */
#define ihk_mc_syscall_arg0(uc) (uc)->regs[0]
#define ihk_mc_syscall_arg1(uc) (uc)->regs[1]
#define ihk_mc_syscall_arg2(uc) (uc)->regs[2]
#define ihk_mc_syscall_arg3(uc) (uc)->regs[3]
#define ihk_mc_syscall_arg4(uc) (uc)->regs[4]
#define ihk_mc_syscall_arg5(uc) (uc)->regs[5]
#define ihk_mc_syscall_arg0(uc) ((uc)->regs[0])
#define ihk_mc_syscall_arg1(uc) ((uc)->regs[1])
#define ihk_mc_syscall_arg2(uc) ((uc)->regs[2])
#define ihk_mc_syscall_arg3(uc) ((uc)->regs[3])
#define ihk_mc_syscall_arg4(uc) ((uc)->regs[4])
#define ihk_mc_syscall_arg5(uc) ((uc)->regs[5])
#define ihk_mc_syscall_ret(uc) (uc)->regs[0]
#define ihk_mc_syscall_number(uc) (uc)->regs[8]
#define ihk_mc_syscall_ret(uc) ((uc)->regs[0])
#define ihk_mc_syscall_number(uc) ((uc)->regs[8])
#define ihk_mc_syscall_pc(uc) (uc)->pc
#define ihk_mc_syscall_sp(uc) (uc)->sp
#define ihk_mc_syscall_pc(uc) ((uc)->pc)
#define ihk_mc_syscall_sp(uc) ((uc)->sp)
#endif /* !__HEADER_ARM64_IHK_CONTEXT_H */

View File

@ -20,13 +20,11 @@ typedef uint64_t size_t;
typedef int64_t ssize_t;
typedef int64_t off_t;
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
#endif /* __ASSEMBLY__ */

View File

@ -0,0 +1,103 @@
/* imp-sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
#ifndef __ASM_IMP_SYSREG_H
#define __ASM_IMP_SYSREG_H
#ifndef __ASSEMBLY__
/* register sys_reg list */
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1 sys_reg(3, 0, 11, 2, 0)
#define IMP_SCCR_CTRL_EL1 sys_reg(3, 0, 11, 8, 0)
#define IMP_SCCR_ASSIGN_EL1 sys_reg(3, 0, 11, 8, 1)
#define IMP_SCCR_SET0_L2_EL1 sys_reg(3, 0, 15, 8, 2)
#define IMP_SCCR_SET1_L2_EL1 sys_reg(3, 0, 15, 8, 3)
#define IMP_SCCR_L1_EL0 sys_reg(3, 3, 11, 8, 2)
#define IMP_PF_CTRL_EL1 sys_reg(3, 0, 11, 4, 0)
#define IMP_PF_STREAM_DETECT_CTRL_EL0 sys_reg(3, 3, 11, 4, 0)
#define IMP_PF_INJECTION_CTRL0_EL0 sys_reg(3, 3, 11, 6, 0)
#define IMP_PF_INJECTION_CTRL1_EL0 sys_reg(3, 3, 11, 6, 1)
#define IMP_PF_INJECTION_CTRL2_EL0 sys_reg(3, 3, 11, 6, 2)
#define IMP_PF_INJECTION_CTRL3_EL0 sys_reg(3, 3, 11, 6, 3)
#define IMP_PF_INJECTION_CTRL4_EL0 sys_reg(3, 3, 11, 6, 4)
#define IMP_PF_INJECTION_CTRL5_EL0 sys_reg(3, 3, 11, 6, 5)
#define IMP_PF_INJECTION_CTRL6_EL0 sys_reg(3, 3, 11, 6, 6)
#define IMP_PF_INJECTION_CTRL7_EL0 sys_reg(3, 3, 11, 6, 7)
#define IMP_PF_INJECTION_DISTANCE0_EL0 sys_reg(3, 3, 11, 7, 0)
#define IMP_PF_INJECTION_DISTANCE1_EL0 sys_reg(3, 3, 11, 7, 1)
#define IMP_PF_INJECTION_DISTANCE2_EL0 sys_reg(3, 3, 11, 7, 2)
#define IMP_PF_INJECTION_DISTANCE3_EL0 sys_reg(3, 3, 11, 7, 3)
#define IMP_PF_INJECTION_DISTANCE4_EL0 sys_reg(3, 3, 11, 7, 4)
#define IMP_PF_INJECTION_DISTANCE5_EL0 sys_reg(3, 3, 11, 7, 5)
#define IMP_PF_INJECTION_DISTANCE6_EL0 sys_reg(3, 3, 11, 7, 6)
#define IMP_PF_INJECTION_DISTANCE7_EL0 sys_reg(3, 3, 11, 7, 7)
#define IMP_PF_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0)
#define IMP_BARRIER_CTRL_EL1 sys_reg(3, 0, 11, 12, 0)
#define IMP_BARRIER_BST_BIT_EL1 sys_reg(3, 0, 11, 12, 4)
#define IMP_BARRIER_INIT_SYNC_BB0_EL1 sys_reg(3, 0, 15, 13, 0)
#define IMP_BARRIER_INIT_SYNC_BB1_EL1 sys_reg(3, 0, 15, 13, 1)
#define IMP_BARRIER_INIT_SYNC_BB2_EL1 sys_reg(3, 0, 15, 13, 2)
#define IMP_BARRIER_INIT_SYNC_BB3_EL1 sys_reg(3, 0, 15, 13, 3)
#define IMP_BARRIER_INIT_SYNC_BB4_EL1 sys_reg(3, 0, 15, 13, 4)
#define IMP_BARRIER_INIT_SYNC_BB5_EL1 sys_reg(3, 0, 15, 13, 5)
#define IMP_BARRIER_ASSIGN_SYNC_W0_EL1 sys_reg(3, 0, 15, 15, 0)
#define IMP_BARRIER_ASSIGN_SYNC_W1_EL1 sys_reg(3, 0, 15, 15, 1)
#define IMP_BARRIER_ASSIGN_SYNC_W2_EL1 sys_reg(3, 0, 15, 15, 2)
#define IMP_BARRIER_ASSIGN_SYNC_W3_EL1 sys_reg(3, 0, 15, 15, 3)
#define IMP_SOC_STANDBY_CTRL_EL1 sys_reg(3, 0, 11, 0, 0)
#define IMP_FJ_CORE_UARCH_CTRL_EL2 sys_reg(3, 4, 11, 0, 4)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1 sys_reg(3, 0, 11, 0, 5)
/* macros */
#define PWR_REG_MASK(reg, feild) (((UL(1) << ((reg##_##feild##_MSB) - (reg##_##feild##_LSB) + 1)) - 1) << (reg##_##feild##_LSB))
/* IMP_FJ_TAG_ADDRESS_CTRL_EL1 */
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT (0)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT (8)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT (9)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_TBO0_SHIFT)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_SEC0_SHIFT)
#define IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_MASK (1UL << IMP_FJ_TAG_ADDRESS_CTRL_EL1_PFE0_SHIFT)
/* IMP_SCCR_CTRL_EL1 */
#define IMP_SCCR_CTRL_EL1_EL1AE_SHIFT (63)
#define IMP_SCCR_CTRL_EL1_EL1AE_MASK (1UL << IMP_SCCR_CTRL_EL1_EL1AE_SHIFT)
/* IMP_SCCR_SET0_L2_EL1 */
#define IMP_SCCR_SET0_L2_EL1_L2_SEC0_SHIFT (0)
/* IMP_PF_CTRL_EL1 */
#define IMP_PF_CTRL_EL1_EL1AE_ENABLE (1UL << 63)
#define IMP_PF_CTRL_EL1_EL0AE_ENABLE (1UL << 62)
/* IMP_BARRIER_CTRL_EL1 */
#define IMP_BARRIER_CTRL_EL1_EL1AE_ENABLE (1UL << 63)
#define IMP_BARRIER_CTRL_EL1_EL0AE_ENABLE (1UL << 62)
/* IMP_SOC_STANDBY_CTRL_EL1 */
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_MSB 2
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE_LSB 2
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_MSB 1
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE_LSB 1
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_MSB 0
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION_LSB 0
#define IMP_SOC_STANDBY_CTRL_EL1_ECO_MODE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, ECO_MODE)
#define IMP_SOC_STANDBY_CTRL_EL1_MODE_CHANGE PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, MODE_CHANGE)
#define IMP_SOC_STANDBY_CTRL_EL1_RETENTION PWR_REG_MASK(IMP_SOC_STANDBY_CTRL_EL1, RETENTION)
/* IMP_FJ_CORE_UARCH_RESTRECTION_EL1 */
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_MSB 33
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS_LSB 33
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_MSB 9
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION_LSB 8
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_MSB 0
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION_LSB 0
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_FL_RESTRICT_TRANS PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, FL_RESTRICT_TRANS)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_ISSUE_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, ISSUE_RESTRICTION)
#define IMP_FJ_CORE_UARCH_RESTRECTION_EL1_EX_RESTRICTION PWR_REG_MASK(IMP_FJ_CORE_UARCH_RESTRECTION_EL1, EX_RESTRICTION)
void scdrv_registers_init(void);
void hpc_registers_init(void);
void vhbm_barrier_registers_init(void);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_IMP_SYSREG_H */

View File

@ -80,6 +80,10 @@ static inline uint64_t __raw_readq(const volatile void *addr)
return val;
}
/* IO barriers */
#define __iormb() rmb()
#define __iowmb() wmb()
/*
* Relaxed I/O memory access primitives. These follow the Device memory
* ordering rules but do not guarantee any ordering relative to Normal memory
@ -95,5 +99,20 @@ static inline uint64_t __raw_readq(const volatile void *addr)
#define writel_relaxed(v,c) ((void)__raw_writel((uint32_t)(v),(c)))
#define writeq_relaxed(v,c) ((void)__raw_writeq((uint64_t)(v),(c)))
/*
* I/O memory access primitives. Reads are ordered relative to any
* following Normal memory access. Writes are ordered relative to any prior
* Normal memory access.
*/
#define readb(c) ({ uint8_t __v = readb_relaxed(c); __iormb(); __v; })
#define readw(c) ({ uint16_t __v = readw_relaxed(c); __iormb(); __v; })
#define readl(c) ({ uint32_t __v = readl_relaxed(c); __iormb(); __v; })
#define readq(c) ({ uint64_t __v = readq_relaxed(c); __iormb(); __v; })
#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); })
#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); })
#endif /* __KERNEL__ */
#endif /* __ASM_IO_H */

View File

@ -1,4 +1,4 @@
/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* irq.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_IRQ_H
#define __HEADER_ARM64_IRQ_H
@ -14,48 +14,24 @@
#define INTRID_QUERY_FREE_MEM 2
#define INTRID_CPU_STOP 3
#define INTRID_TLB_FLUSH 4
#define INTRID_STACK_TRACE 6
#define INTRID_MEMDUMP 7
#define INTRID_STACK_TRACE 5
#define INTRID_MULTI_INTR 6
#define INTRID_MULTI_NMI 7
#define LOCAL_SMP_FUNC_CALL_VECTOR 1 /* same as IKC */
/* use PPI interrupt number */
#define INTRID_PERF_OVF 23
#define INTRID_HYP_PHYS_TIMER 26 /* cnthp */
#define INTRID_VIRT_TIMER 27 /* cntv */
#define INTRID_HYP_VIRT_TIMER 28 /* cnthv */
#define INTRID_PHYS_TIMER 30 /* cntp */
/* timer intrid getter */
static int get_virt_timer_intrid(void)
{
#ifdef CONFIG_ARM64_VHE
unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1);
if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) {
return INTRID_HYP_VIRT_TIMER;
}
#endif /* CONFIG_ARM64_VHE */
return INTRID_VIRT_TIMER;
}
static int get_phys_timer_intrid(void)
{
#ifdef CONFIG_ARM64_VHE
unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1);
if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) {
return INTRID_HYP_PHYS_TIMER;
}
#endif /* CONFIG_ARM64_VHE */
return INTRID_PHYS_TIMER;
}
/* use timer checker */
extern unsigned long is_use_virt_timer(void);
/* Functions for GICv2 */
extern void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size);
extern void gic_cpu_init_gicv2(unsigned long cpu_base_pa, unsigned long size);
extern void gic_enable_gicv2(void);
extern void arm64_issue_ipi_gicv2(unsigned int cpuid, unsigned int vector);
extern void arm64_issue_host_ipi_gicv2(uint32_t cpuid, uint32_t vector);
extern void handle_interrupt_gicv2(struct pt_regs *regs);
/* Functions for GICv3 */
@ -63,6 +39,7 @@ extern void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size);
extern void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size);
extern void gic_enable_gicv3(void);
extern void arm64_issue_ipi_gicv3(unsigned int cpuid, unsigned int vector);
extern void arm64_issue_host_ipi_gicv3(uint32_t cpuid, uint32_t vector);
extern void handle_interrupt_gicv3(struct pt_regs *regs);
void handle_IPI(unsigned int vector, struct pt_regs *regs);

View File

@ -72,6 +72,7 @@
#define PMD_SECT_S (UL(3) << 8)
#define PMD_SECT_AF (UL(1) << 10)
#define PMD_SECT_NG (UL(1) << 11)
#define PMD_SECT_CONT (UL(1) << 52)
#define PMD_SECT_PXN (UL(1) << 53)
#define PMD_SECT_UXN (UL(1) << 54)
@ -93,6 +94,7 @@
#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF (UL(1) << 10) /* Access Flag */
#define PTE_NG (UL(1) << 11) /* nG */
#define PTE_CONT (UL(1) << 52) /* Contiguous range */
#define PTE_PXN (UL(1) << 53) /* Privileged XN */
#define PTE_UXN (UL(1) << 54) /* User XN */
/* Software defined PTE bits definition.*/

View File

@ -1,17 +1,17 @@
/* prctl.h COPYRIGHT FUJITSU LIMITED 2017 */
/* prctl.h COPYRIGHT FUJITSU LIMITED 2017-2019 */
#ifndef __HEADER_ARM64_COMMON_PRCTL_H
#define __HEADER_ARM64_COMMON_PRCTL_H
/* arm64 Scalable Vector Extension controls */
#define PR_SVE_SET_VL 48 /* set task vector length */
#define PR_SVE_SET_VL_THREAD (1 << 1) /* set just this thread */
#define PR_SVE_SET_VL_INHERIT (1 << 2) /* inherit across exec */
#define PR_SVE_SET_VL_ONEXEC (1 << 3) /* defer effect until exec */
#define PR_SET_THP_DISABLE 41
#define PR_GET_THP_DISABLE 42
#define PR_SVE_GET_VL 49 /* get task vector length */
/* Decode helpers for the return value from PR_SVE_GET_VL: */
#define PR_SVE_GET_VL_LEN(ret) ((ret) & 0x3fff) /* vector length */
#define PR_SVE_GET_VL_INHERIT (PR_SVE_SET_VL_INHERIT << 16)
/* For conveinence, PR_SVE_SET_VL returns the result in the same encoding */
/* arm64 Scalable Vector Extension controls */
/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
#define PR_SVE_SET_VL 50 /* set task vector length */
# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
#define PR_SVE_GET_VL 51 /* get task vector length */
/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
# define PR_SVE_VL_LEN_MASK 0xffff
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
#endif /* !__HEADER_ARM64_COMMON_PRCTL_H */

View File

@ -1,4 +1,4 @@
/* ptrace.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* ptrace.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_COMMON_PTRACE_H
#define __HEADER_ARM64_COMMON_PTRACE_H
@ -46,6 +46,7 @@
#ifndef __ASSEMBLY__
#include <lwk/compiler.h>
#include <ihk/types.h>
struct user_hwdebug_state {
@ -78,6 +79,70 @@ struct user_sve_header {
uint16_t __reserved;
};
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_TLS,
REGSET_HW_BREAK,
REGSET_HW_WATCH,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif /* CONFIG_ARM64_SVE */
};
struct thread;
struct user_regset;
typedef int user_regset_active_fn(struct thread *target,
const struct user_regset *regset);
typedef long user_regset_get_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf);
typedef long user_regset_set_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf);
typedef int user_regset_writeback_fn(struct thread *target,
const struct user_regset *regset,
int immediate);
typedef unsigned int user_regset_get_size_fn(struct thread *target,
const struct user_regset *regset);
struct user_regset {
user_regset_get_fn *get;
user_regset_set_fn *set;
user_regset_active_fn *active;
user_regset_writeback_fn *writeback;
user_regset_get_size_fn *get_size;
unsigned int n;
unsigned int size;
unsigned int align;
unsigned int bias;
unsigned int core_note_type;
};
struct user_regset_view {
const char *name;
const struct user_regset *regsets;
unsigned int n;
uint32_t e_flags;
uint16_t e_machine;
uint8_t ei_osabi;
};
extern const struct user_regset_view *current_user_regset_view(void);
extern const struct user_regset *find_regset(
const struct user_regset_view *view,
unsigned int type);
extern unsigned int regset_size(struct thread *target,
const struct user_regset *regset);
/* Definitions for user_sve_header.flags: */
#define SVE_PT_REGS_MASK (1 << 0)
@ -85,7 +150,7 @@ struct user_sve_header {
#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK
#define SVE_PT_VL_THREAD PR_SVE_SET_VL_THREAD
#define SVE_PT_VL_INHERIT PR_SVE_SET_VL_INHERIT
#define SVE_PT_VL_INHERIT PR_SVE_VL_INHERIT
#define SVE_PT_VL_ONEXEC PR_SVE_SET_VL_ONEXEC
/*
@ -99,7 +164,9 @@ struct user_sve_header {
*/
/* Offset from the start of struct user_sve_header to the register data */
#define SVE_PT_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16)
#define SVE_PT_REGS_OFFSET \
((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
/*
* The register data content and layout depends on the value of the
@ -174,8 +241,10 @@ struct user_sve_header {
#define SVE_PT_SVE_FFR_OFFSET(vq) \
__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
#define SVE_PT_SVE_FPSR_OFFSET(vq) \
((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + 15) / 16 * 16)
#define SVE_PT_SVE_FPSR_OFFSET(vq) \
((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + \
(SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_SVE_FPCR_OFFSET(vq) \
(SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
@ -184,9 +253,10 @@ struct user_sve_header {
* 128-bit boundary.
*/
#define SVE_PT_SVE_SIZE(vq, flags) \
((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE - \
SVE_PT_SVE_OFFSET + 15) / 16 * 16)
#define SVE_PT_SVE_SIZE(vq, flags) \
((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE \
- SVE_PT_SVE_OFFSET + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_PT_SIZE(vq, flags) \
(((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \

View File

@ -1,9 +1,10 @@
/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* registers.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
#ifndef __HEADER_ARM64_COMMON_REGISTERS_H
#define __HEADER_ARM64_COMMON_REGISTERS_H
#include <types.h>
#include <arch/cpu.h>
#include <sysreg.h>
#define RFLAGS_CF (1 << 0)
#define RFLAGS_PF (1 << 2)
@ -76,15 +77,12 @@ static unsigned long rdmsr(unsigned int index)
return 0;
}
/* @ref.impl linux-linaro/arch/arm64/include/asm/arch_timer.h::arch_counter_get_cntvct */
static unsigned long rdtsc(void)
/* @ref.impl linux4.10.16 */
/* arch/arm64/include/asm/arch_timer.h:arch_counter_get_cntvct() */
static inline unsigned long rdtsc(void)
{
unsigned long cval;
isb();
asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
return cval;
return read_sysreg(cntvct_el0);
}
static void set_perfctl(int counter, int event, int mask)

View File

@ -85,7 +85,11 @@ enum __rlimit_resource
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
__RLIMIT_NLIMITS = 15,
/* timeout for RT tasks in us */
__RLIMIT_RTTIME = 15,
#define RLIMIT_RTTIME __RLIMIT_RTTIME
__RLIMIT_NLIMITS = 16,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS

View File

@ -1,4 +1,4 @@
/* signal.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* signal.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_COMMON_SIGNAL_H
#define __HEADER_ARM64_COMMON_SIGNAL_H
@ -9,6 +9,11 @@
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
static inline int valid_signal(unsigned long sig)
{
return sig <= _NSIG ? 1 : 0;
}
typedef unsigned long int __sigset_t;
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))
@ -293,6 +298,7 @@ struct extra_context {
struct _aarch64_ctx head;
void *data; /* 16-byte aligned pointer to the extra space */
uint32_t size; /* size in bytes of the extra space */
uint32_t __reserved[3];
};
#define SVE_MAGIC 0x53564501
@ -313,19 +319,25 @@ struct sve_context {
* The SVE architecture leaves space for future expansion of the
* vector length beyond its initial architectural limit of 2048 bits
* (16 quadwords).
*
* See linux/Documentation/arm64/sve.txt for a description of the VL/VQ
* terminology.
*/
#define SVE_VQ_MIN 1
#define SVE_VQ_MAX 0x200
#define SVE_VQ_BYTES 16 /* number of bytes per quadword */
#define SVE_VL_MIN (SVE_VQ_MIN * 0x10)
#define SVE_VL_MAX (SVE_VQ_MAX * 0x10)
#define SVE_VQ_MIN 1
#define SVE_VQ_MAX 512
#define SVE_VL_MIN (SVE_VQ_MIN * SVE_VQ_BYTES)
#define SVE_VL_MAX (SVE_VQ_MAX * SVE_VQ_BYTES)
#define SVE_NUM_ZREGS 32
#define SVE_NUM_PREGS 16
#define sve_vl_valid(vl) \
((vl) % 0x10 == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
#define sve_vq_from_vl(vl) ((vl) / 0x10)
((vl) % SVE_VQ_BYTES == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
#define sve_vq_from_vl(vl) ((vl) / SVE_VQ_BYTES)
#define sve_vl_from_vq(vq) ((vq) * SVE_VQ_BYTES)
/*
* The total size of meaningful data in the SVE context in bytes,
@ -360,11 +372,13 @@ struct sve_context {
* Additional data might be appended in the future.
*/
#define SVE_SIG_ZREG_SIZE(vq) ((uint32_t)(vq) * 16)
#define SVE_SIG_PREG_SIZE(vq) ((uint32_t)(vq) * 2)
#define SVE_SIG_ZREG_SIZE(vq) ((uint32_t)(vq) * SVE_VQ_BYTES)
#define SVE_SIG_PREG_SIZE(vq) ((uint32_t)(vq) * (SVE_VQ_BYTES / 8))
#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
#define SVE_SIG_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16)
#define SVE_SIG_REGS_OFFSET \
((sizeof(struct sve_context) + (SVE_VQ_BYTES - 1)) \
/ SVE_VQ_BYTES * SVE_VQ_BYTES)
#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET
#define SVE_SIG_ZREG_OFFSET(vq, n) \
@ -402,8 +416,6 @@ struct ucontext {
};
void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs, int num);
void check_signal_irq_disabled(unsigned long rc, void *regs, int num);
#endif /* __HEADER_ARM64_COMMON_SIGNAL_H */

View File

@ -1,17 +1,14 @@
/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2015-2018 */
SYSCALL_DELEGATED(4, io_getevents)
SYSCALL_DELEGATED(17, getcwd)
SYSCALL_DELEGATED(22, epoll_pwait)
SYSCALL_HANDLED(22, epoll_pwait)
SYSCALL_DELEGATED(25, fcntl)
SYSCALL_HANDLED(29, ioctl)
SYSCALL_DELEGATED(35, unlinkat)
SYSCALL_DELEGATED(43, statfs)
SYSCALL_DELEGATED(44, fstatfs)
#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */
SYSCALL_HANDLED(56, openat)
#else /* POSTK_DEBUG_ARCH_DEP_62 */
SYSCALL_DELEGATED(56, openat)
#endif /* POSTK_DEBUG_ARCH_DEP_62 */
SYSCALL_HANDLED(57, close)
SYSCALL_DELEGATED(61, getdents64)
SYSCALL_DELEGATED(62, lseek)
@ -20,8 +17,8 @@ SYSCALL_DELEGATED(64, write)
SYSCALL_DELEGATED(66, writev)
SYSCALL_DELEGATED(67, pread64)
SYSCALL_DELEGATED(68, pwrite64)
SYSCALL_DELEGATED(72, pselect6)
SYSCALL_DELEGATED(73, ppoll)
SYSCALL_HANDLED(72, pselect6)
SYSCALL_HANDLED(73, ppoll)
SYSCALL_HANDLED(74, signalfd4)
SYSCALL_DELEGATED(78, readlinkat)
SYSCALL_DELEGATED(80, fstat)
@ -86,6 +83,7 @@ SYSCALL_HANDLED(175, geteuid)
SYSCALL_HANDLED(176, getgid)
SYSCALL_HANDLED(177, getegid)
SYSCALL_HANDLED(178, gettid)
SYSCALL_HANDLED(179, sysinfo)
SYSCALL_DELEGATED(188, msgrcv)
SYSCALL_DELEGATED(189, msgsnd)
SYSCALL_DELEGATED(192, semtimedop)
@ -114,21 +112,24 @@ SYSCALL_HANDLED(236, get_mempolicy)
SYSCALL_HANDLED(237, set_mempolicy)
SYSCALL_HANDLED(238, migrate_pages)
SYSCALL_HANDLED(239, move_pages)
#ifdef ENABLE_PERF
SYSCALL_HANDLED(241, perf_event_open)
#else // PERF_ENABLE
SYSCALL_DELEGATED(241, perf_event_open)
#endif // PERF_ENABLE
SYSCALL_HANDLED(260, wait4)
SYSCALL_HANDLED(261, prlimit64)
SYSCALL_HANDLED(270, process_vm_readv)
SYSCALL_HANDLED(271, process_vm_writev)
SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)
SYSCALL_HANDLED(604, pmc_reset)
SYSCALL_HANDLED(281, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system)
SYSCALL_HANDLED(733, util_register_desc)
/* McKernel Specific */
SYSCALL_HANDLED(801, swapout)
@ -138,9 +139,14 @@ SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
SYSCALL_DELEGATED(1024, open)
SYSCALL_DELEGATED(1026, unlink)
SYSCALL_DELEGATED(1035, readlink)
SYSCALL_HANDLED(1045, signalfd)
SYSCALL_DELEGATED(1049, stat)
SYSCALL_DELEGATED(1060, getpgrp)
SYSCALL_DELEGATED(1062, time)
SYSCALL_HANDLED(1062, time)
SYSCALL_DELEGATED(1069, epoll_wait)
/* Do not edit the lines including this comment and
* EOF just after it because those are used as a
* robust marker for the autotest patch.
*/

View File

@ -1,4 +1,4 @@
/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2017 */
/* sysreg.h COPYRIGHT FUJITSU LIMITED 2016-2018 */
/*
* Macros for accessing system registers with older binutils.
*
@ -23,6 +23,7 @@
#include <types.h>
#include <stringify.h>
#include <ihk/types.h>
/*
* ARMv8 ARM reserves the following encoding for system registers:
@ -56,12 +57,6 @@
#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask)
#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask)
#ifdef __ASSEMBLY__
#define __emit_inst(x).inst (x)
#else
#define __emit_inst(x)".inst " __stringify((x)) "\n\t"
#endif
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@ -143,6 +138,12 @@
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64isar1 */
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
#define ID_AA64PFR0_SVE_SHIFT 32
#define ID_AA64PFR0_GIC_SHIFT 24
@ -178,6 +179,14 @@
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#define ID_AA64MMFR0_PARANGE_48 0x5
#define ID_AA64MMFR0_PARANGE_52 0x6
#ifdef CONFIG_ARM64_PA_BITS_52
#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
#else
#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
#endif
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_PAN_SHIFT 20
@ -264,15 +273,46 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (1UL << 31)
#ifdef __ASSEMBLY__
/* SYS_MIDR_EL1 */
//mask
#define SYS_MIDR_EL1_IMPLEMENTER_MASK (0xFFUL)
#define SYS_MIDR_EL1_PPNUM_MASK (0xFFFUL)
//shift
#define SYS_MIDR_EL1_IMPLEMENTER_SHIFT (24)
#define SYS_MIDR_EL1_PPNUM_SHIFT (0x4)
//val
#define SYS_MIDR_EL1_IMPLEMENTER_FJ (0x46)
#define SYS_MIDR_EL1_PPNUM_TCHIP (0x1)
#define READ_ACCESS (0)
#define WRITE_ACCESS (1)
#define ACCESS_REG_FUNC(name, reg) \
static void xos_access_##name(uint8_t flag, uint64_t *reg_value) \
{ \
if (flag == READ_ACCESS) { \
__asm__ __volatile__("mrs_s %0," __stringify(reg) "\n\t" \
:"=&r"(*reg_value)::); \
} \
else if (flag == WRITE_ACCESS) { \
__asm__ __volatile__("msr_s" __stringify(reg) ", %0\n\t" \
::"r"(*reg_value):); \
} else { \
; \
} \
}
#define XOS_FALSE (0)
#define XOS_TRUE (1)
#ifdef __ASSEMBLY__
#define __emit_inst(x).inst (x)
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__reg_num_x\num, \num
.endr
.equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
__emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
.endm
.macro msr_s, sreg, rt
@ -280,7 +320,7 @@
.endm
#else
#define __emit_inst(x)".inst " __stringify((x)) "\n\t"
asm(
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
" .equ .L__reg_num_x\\num, \\num\n"
@ -296,6 +336,28 @@ asm(
" .endm\n"
);
ACCESS_REG_FUNC(midr_el1, SYS_MIDR_EL1);
static int xos_is_tchip(void)
{
uint64_t reg = 0;
int ret = 0, impl = 0, part = 0;
xos_access_midr_el1(READ_ACCESS, &reg);
impl = (reg >> SYS_MIDR_EL1_IMPLEMENTER_SHIFT) &
SYS_MIDR_EL1_IMPLEMENTER_MASK;
part = (reg >> SYS_MIDR_EL1_PPNUM_SHIFT) & SYS_MIDR_EL1_PPNUM_MASK;
if ((impl == SYS_MIDR_EL1_IMPLEMENTER_FJ) &&
(part == SYS_MIDR_EL1_PPNUM_TCHIP)) {
ret = XOS_TRUE;
}
else {
ret = XOS_FALSE;
}
return ret;
}
#endif
/*
@ -336,4 +398,6 @@ asm(
/* @ref.impl arch/arm64/include/asm/kvm_arm.h */
#define CPTR_EL2_TZ (1 << 8)
#include "imp-sysreg.h"
#endif /* __ASM_SYSREG_H */

View File

@ -1,15 +1,22 @@
/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* thread_info.h COPYRIGHT FUJITSU LIMITED 2015-2019 */
#ifndef __HEADER_ARM64_COMMON_THREAD_INFO_H
#define __HEADER_ARM64_COMMON_THREAD_INFO_H
#define KERNEL_STACK_SIZE 32768 /* 8 page */
#define MIN_KERNEL_STACK_SHIFT 18
#include <arch-memory.h>
#if (MIN_KERNEL_STACK_SHIFT < PAGE_SHIFT)
#define KERNEL_STACK_SHIFT PAGE_SHIFT
#else
#define KERNEL_STACK_SHIFT MIN_KERNEL_STACK_SHIFT
#endif
#define KERNEL_STACK_SIZE (UL(1) << KERNEL_STACK_SHIFT)
#define THREAD_START_SP KERNEL_STACK_SIZE - 16
#ifndef __ASSEMBLY__
#define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
#define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#include <process.h>
#include <prctl.h>
@ -39,9 +46,9 @@ struct thread_info {
int cpu; /* cpu */
struct cpu_context cpu_context; /* kernel_context */
void *sve_state; /* SVE registers, if any */
uint16_t sve_vl; /* SVE vector length */
uint16_t sve_vl_onexec; /* SVE vl after next exec */
uint16_t sve_flags; /* SVE related flags */
unsigned int sve_vl; /* SVE vector length */
unsigned int sve_vl_onexec; /* SVE vl after next exec */
unsigned long sve_flags; /* SVE related flags */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
};
@ -49,12 +56,12 @@ struct thread_info {
/* Flags for sve_flags (intentionally defined to match the prctl flags) */
/* Inherit sve_vl and sve_flags across execve(): */
#define THREAD_VL_INHERIT PR_SVE_SET_VL_INHERIT
#define THREAD_VL_INHERIT PR_SVE_VL_INHERIT
struct arm64_cpu_local_thread {
struct thread_info thread_info;
unsigned long paniced; /* 136 */
uint64_t panic_regs[34]; /* 144 */
unsigned long paniced;
uint64_t panic_regs[34];
};
union arm64_cpu_local_variables {

View File

@ -4,6 +4,7 @@
#define __ASM_TRAP_H
#include <types.h>
#include <arch-lock.h>
struct pt_regs;

View File

@ -1,8 +1,22 @@
/* virt.h COPYRIGHT FUJITSU LIMITED 2015 */
/* virt.h COPYRIGHT FUJITSU LIMITED 2015-2017 */
#ifndef __HEADER_ARM64_COMMON_VIRT_H
#define __HEADER_ARM64_COMMON_VIRT_H
/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
#ifndef __ASSEMBLY__
#include <sysreg.h>
#include <ptrace.h>
/* @ref.impl linux-v4.15-rc3 arch/arm64/include/asm/virt.h */
static inline int is_kernel_in_hyp_mode(void)
{
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
#endif /* !__ASSEMBLY__ */
#endif /* !__HEADER_ARM64_COMMON_VIRT_H */

View File

@ -1,21 +1,21 @@
/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* irq-gic-v2.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <ihk/cpu.h>
#include <irq.h>
#include <arm-gic-v2.h>
#include <io.h>
#include <arch/cpu.h>
#include <memory.h>
#include <affinity.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <arch-timer.h>
#include <cls.h>
// #define DEBUG_GICV2
#ifdef DEBUG_GICV2
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
void *dist_base;
@ -31,10 +31,9 @@ void *cpu_base;
* function, it is not necessary to perform the disable/enable
* interrupts in this function as gic_raise_softirq() .
*/
static void arm64_raise_sgi_gicv2(unsigned int cpuid, unsigned int vector)
static void __arm64_raise_sgi_gicv2(unsigned int hw_cpuid, unsigned int vector)
{
/* Build interrupt destination of the target cpu */
unsigned int hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
uint8_t cpu_target_list = gic_hwid_to_affinity(hw_cpuid);
/*
@ -50,21 +49,32 @@ static void arm64_raise_sgi_gicv2(unsigned int cpuid, unsigned int vector)
);
}
static void arm64_raise_sgi_gicv2(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target CPU */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
__arm64_raise_sgi_gicv2(hw_cpuid, vector);
}
static void arm64_raise_sgi_to_host_gicv2(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target Linux/host CPU */
uint32_t hw_cpuid = ihk_mc_get_apicid(cpuid);
__arm64_raise_sgi_gicv2(hw_cpuid, vector);
}
/**
* arm64_raise_spi_gicv2
* @ref.impl nothing.
*/
extern unsigned int ihk_ikc_irq_apicid;
static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector)
{
uint64_t spi_reg_offset;
uint32_t spi_set_pending_bitpos;
if (cpuid != ihk_ikc_irq_apicid) {
ekprintf("SPI(irq#%d) cannot send other than the host.\n", vector);
return;
}
/**
* calculates register offset and bit position corresponding to the numbers.
*
@ -83,6 +93,11 @@ static void arm64_raise_spi_gicv2(unsigned int cpuid, unsigned int vector)
);
}
void arm64_issue_host_ipi_gicv2(uint32_t cpuid, uint32_t vector)
{
arm64_raise_sgi_to_host_gicv2(cpuid, vector);
}
/**
* arm64_issue_ipi_gicv2
* @param cpuid : hardware cpu id
@ -111,8 +126,9 @@ extern int interrupt_from_user(void *);
void handle_interrupt_gicv2(struct pt_regs *regs)
{
unsigned int irqstat, irqnr;
const int from_user = interrupt_from_user(regs);
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
do {
// get GICC_IAR.InterruptID
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
@ -132,7 +148,13 @@ void handle_interrupt_gicv2(struct pt_regs *regs)
*/
break;
} while (1);
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
/* for migration by IPI */
if (get_this_cpu_local_var()->flags & CPU_FLAG_NEED_MIGRATE) {
schedule();
check_signal(0, regs, 0);
}
}
void gic_dist_init_gicv2(unsigned long dist_base_pa, unsigned long size)
@ -149,10 +171,6 @@ void gic_enable_gicv2(void)
{
unsigned int enable_ppi_sgi = 0;
if (is_use_virt_timer()) {
enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid();
} else {
enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid();
}
enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid();
writel_relaxed(enable_ppi_sgi, dist_base + GIC_DIST_ENABLE_SET);
}

View File

@ -1,5 +1,4 @@
/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* irq-gic-v3.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <irq.h>
#include <arm-gic-v2.h>
#include <arm-gic-v3.h>
@ -7,17 +6,20 @@
#include <cputype.h>
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/monitor.h>
#endif
#include <arch-timer.h>
#include <cls.h>
//#define DEBUG_GICV3
#define USE_CAVIUM_THUNDER_X
#ifdef DEBUG_GICV3
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#ifdef USE_CAVIUM_THUNDER_X
@ -196,15 +198,12 @@ static inline void gic_write_bpr1(uint32_t val)
}
#endif
static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector)
static void __arm64_raise_sgi_gicv3(uint32_t hw_cpuid, uint32_t vector)
{
uint64_t mpidr, cluster_id;
uint16_t tlist;
uint64_t val;
/* Build interrupt destination of the target cpu */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
/*
* Ensure that stores to Normal memory are visible to the
* other CPUs before issuing the IPI.
@ -240,6 +239,22 @@ static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector)
}
}
static void arm64_raise_sgi_gicv3(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target CPU */
uint32_t hw_cpuid = ihk_mc_get_cpu_info()->hw_ids[cpuid];
__arm64_raise_sgi_gicv3(hw_cpuid, vector);
}
static void arm64_raise_sgi_to_host_gicv3(uint32_t cpuid, uint32_t vector)
{
/* Build interrupt destination of the target Linux/host CPU */
uint32_t hw_cpuid = ihk_mc_get_apicid(cpuid);
__arm64_raise_sgi_gicv3(hw_cpuid, vector);
}
static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector)
{
uint64_t spi_reg_offset;
@ -266,8 +281,14 @@ static void arm64_raise_spi_gicv3(uint32_t cpuid, uint32_t vector)
static void arm64_raise_lpi_gicv3(uint32_t cpuid, uint32_t vector)
{
// @todo.impl
ekprintf("%s called.\n", __func__);
}
void arm64_issue_host_ipi_gicv3(uint32_t cpuid, uint32_t vector)
{
arm64_raise_sgi_to_host_gicv3(cpuid, vector);
}
void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
{
dkprintf("Send irq#%d to cpuid=%d\n", vector, cpuid);
@ -283,7 +304,7 @@ void arm64_issue_ipi_gicv3(uint32_t cpuid, uint32_t vector)
// send LPI (allow only to host)
arm64_raise_lpi_gicv3(cpuid, vector);
} else {
ekprintf("#%d is bad irq number.", vector);
ekprintf("#%d is bad irq number.\n", vector);
}
}
@ -291,22 +312,97 @@ extern int interrupt_from_user(void *);
void handle_interrupt_gicv3(struct pt_regs *regs)
{
uint64_t irqnr;
const int from_user = interrupt_from_user(regs);
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
#ifdef ENABLE_FUGAKU_HACKS
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
++v->in_interrupt;
#endif
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
#ifndef ENABLE_FUGAKU_HACKS
handle_IPI(irqnr, regs);
#else
/* Once paniced, only allow CPU stop and NMI IRQs */
if (monitor->status != IHK_OS_MONITOR_PANIC ||
irqnr == INTRID_CPU_STOP ||
irqnr == INTRID_MULTI_NMI) {
handle_IPI(irqnr, regs);
}
#endif
}
irqnr = gic_read_iar();
}
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
//irqflags = ihk_mc_spinlock_lock(&v->runq_lock);
/* For migration by IPI or by timesharing */
if (v->flags & CPU_FLAG_NEED_RESCHED) {
if (v->flags & CPU_FLAG_NEED_MIGRATE && !from_user) {
// Don't migrate on K2K schedule
} else {
v->flags &= ~CPU_FLAG_NEED_RESCHED;
do_check = 1;
}
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
#ifndef ENABLE_FUGAKU_HACKS
if (do_check) {
#else
--v->in_interrupt;
if (monitor->status != IHK_OS_MONITOR_PANIC && do_check) {
#endif
check_signal(0, regs, 0);
schedule();
}
}
static uint64_t gic_mpidr_to_affinity(unsigned long mpidr)
{
uint64_t aff;
aff = ((uint64_t)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
MPIDR_AFFINITY_LEVEL(mpidr, 0));
return aff;
}
static void init_spi_routing(uint32_t irq, uint32_t linux_cpu)
{
uint64_t spi_route_reg_val, spi_route_reg_offset;
if (irq < 32 || 1020 <= irq) {
ekprintf("%s: irq is not spi number. (irq=%d)\n",
__func__, irq);
return;
}
/* write to GICD_IROUTER */
spi_route_reg_offset = irq * 8;
spi_route_reg_val = gic_mpidr_to_affinity(cpu_logical_map(linux_cpu));
writeq_relaxed(spi_route_reg_val,
(void *)(dist_base + GICD_IROUTER +
spi_route_reg_offset));
}
void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
{
#ifndef IHK_IKC_USE_LINUX_WORK_IRQ
extern int spi_table[];
extern int nr_spi_table;
int i;
#endif // !IHK_IKC_USE_LINUX_WORK_IRQ
dist_base = map_fixed_area(dist_base_pa, size, 1 /*non chachable*/);
#ifdef USE_CAVIUM_THUNDER_X
@ -315,6 +411,16 @@ void gic_dist_init_gicv3(unsigned long dist_base_pa, unsigned long size)
is_cavium_thunderx = 1;
}
#endif
#ifndef IHK_IKC_USE_LINUX_WORK_IRQ
/* initialize spi routing */
for (i = 0; i < nr_spi_table; i++) {
if (spi_table[i] == -1) {
continue;
}
init_spi_routing(spi_table[i], i);
}
#endif // !IHK_IKC_USE_LINUX_WORK_IRQ
}
void gic_cpu_init_gicv3(unsigned long cpu_base_pa, unsigned long size)
@ -351,11 +457,23 @@ void gic_enable_gicv3(void)
void *rd_sgi_base = rbase + 0x10000 /* SZ_64K */;
int i;
unsigned int enable_ppi_sgi = GICD_INT_EN_SET_SGI;
extern int ihk_param_nr_pmu_irq_affi;
extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES];
if (is_use_virt_timer()) {
enable_ppi_sgi |= GICD_ENABLE << get_virt_timer_intrid();
} else {
enable_ppi_sgi |= GICD_ENABLE << get_phys_timer_intrid();
enable_ppi_sgi |= GICD_ENABLE << get_timer_intrid();
if (0 < ihk_param_nr_pmu_irq_affi) {
for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) {
if ((0 <= ihk_param_pmu_irq_affi[i]) &&
(ihk_param_pmu_irq_affi[i] <
sizeof(enable_ppi_sgi) * BITS_PER_BYTE)) {
enable_ppi_sgi |= GICD_ENABLE <<
ihk_param_pmu_irq_affi[i];
}
}
}
else {
enable_ppi_sgi |= GICD_ENABLE << INTRID_PERF_OVF;
}
/*
@ -368,9 +486,10 @@ void gic_enable_gicv3(void)
/*
* Set priority on PPI and SGI interrupts
*/
for (i = 0; i < 32; i += 4)
for (i = 0; i < 32; i += 4) {
writel_relaxed(GICD_INT_DEF_PRI_X4,
rd_sgi_base + GIC_DIST_PRI + i * 4 / 4);
rd_sgi_base + GIC_DIST_PRI + i);
}
/* sync wait */
gic_do_wait_for_rwp(rbase);
@ -406,9 +525,12 @@ void gic_enable_gicv3(void)
gic_write_bpr1(0);
/* Set specific IPI to NMI */
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_MEMDUMP);
writeb_relaxed(GICD_INT_NMI_PRI, rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_CPU_STOP);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_MULTI_NMI);
writeb_relaxed(GICD_INT_NMI_PRI,
rd_sgi_base + GIC_DIST_PRI + INTRID_STACK_TRACE);
/* sync wait */
gic_do_wait_for_rwp(rbase);

View File

@ -1,4 +1,4 @@
/* local.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* local.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <cpulocal.h>
#include <ihk/atomic.h>
#include <ihk/mm.h>
@ -7,24 +7,31 @@
#include <registers.h>
#include <string.h>
#define LOCALS_SPAN (8 * PAGE_SIZE)
/* BSP initialized stack area */
union arm64_cpu_local_variables init_thread_info __attribute__((aligned(KERNEL_STACK_SIZE)));
/* BSP/AP idle stack pointer head */
static union arm64_cpu_local_variables *locals;
size_t arm64_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
size_t arm64_cpu_local_variables_span = KERNEL_STACK_SIZE; /* for debugger */
/* allocate & initialize BSP/AP idle stack */
void init_processors_local(int max_id)
{
int i = 0;
const int sz = (max_id + 1) * KERNEL_STACK_SIZE;
union arm64_cpu_local_variables *tmp;
const int npages = ((max_id + 1) *
(ALIGN_UP(KERNEL_STACK_SIZE, PAGE_SIZE) >>
PAGE_SHIFT));
if (npages < 1) {
panic("idle kernel stack allocation failed.");
}
/* allocate one more for alignment */
locals = ihk_mc_alloc_pages(((sz + PAGE_SIZE - 1) / PAGE_SIZE), IHK_MC_AP_CRITICAL);
locals = ihk_mc_alloc_pages(npages, IHK_MC_AP_CRITICAL);
if (locals == NULL) {
panic("idle kernel stack allocation failed.");
}
locals = (union arm64_cpu_local_variables *)ALIGN_UP((unsigned long)locals, KERNEL_STACK_SIZE);
/* clear struct process, struct process_vm, struct thread_info area */

File diff suppressed because it is too large Load Diff

View File

@ -218,3 +218,41 @@ ENTRY(__inline_memset)
ret
ENDPIPROC(__inline_memset)
ENDPROC(____inline_memset)
/*
* Non-temporal vector memory clear
*
* Parameters:
* x0 - buf (assumed to be aligned to page size)
* x1 - n (assumed to be at least page size)
*/
ENTRY(__memclear)
stp q0, q1, [x2] /* Preserve two 128 bit vector regs */
eor v0.16B, v0.16B, v0.16B
eor v1.16B, v1.16B, v1.16B
1:
stnp q0, q1, [x0, #32 * 0]
stnp q0, q1, [x0, #32 * 1]
stnp q0, q1, [x0, #32 * 2]
stnp q0, q1, [x0, #32 * 3]
stnp q0, q1, [x0, #32 * 4]
stnp q0, q1, [x0, #32 * 5]
stnp q0, q1, [x0, #32 * 6]
stnp q0, q1, [x0, #32 * 7]
stnp q0, q1, [x0, #32 * 8]
stnp q0, q1, [x0, #32 * 9]
stnp q0, q1, [x0, #32 * 10]
stnp q0, q1, [x0, #32 * 11]
stnp q0, q1, [x0, #32 * 12]
stnp q0, q1, [x0, #32 * 13]
stnp q0, q1, [x0, #32 * 14]
stnp q0, q1, [x0, #32 * 15]
add x0, x0, #512
subs x1, x1, #512
cmp x1, #0
b.ne 1b
ldp q0, q1, [x2] /* Restore vector regs */
ret
ENDPROC(__memclear)

View File

@ -19,7 +19,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE)
mikc_queue_pages = ((8 * num_processors * MASTER_IKCQ_PKTSIZE)
+ (PAGE_SIZE - 1)) / PAGE_SIZE;
/* Place both sides in this side */

View File

@ -1,4 +1,4 @@
/* perfctr.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* perfctr.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <arch-perfctr.h>
#include <ihk/perfctr.h>
#include <mc_perf_event.h>
@ -6,32 +6,61 @@
#include <ihk/debug.h>
#include <registers.h>
#include <string.h>
#include <ihk/mm.h>
#include <irq.h>
#include <process.h>
/*
* @ref.impl arch/arm64/kernel/perf_event.c
* Set at runtime when we know what CPU type we are.
*/
struct arm_pmu cpu_pmu;
extern int ihk_param_pmu_irq_affiniry[CONFIG_SMP_MAX_CORES];
extern int ihk_param_nr_pmu_irq_affiniry;
extern int ihk_param_pmu_irq_affi[CONFIG_SMP_MAX_CORES];
extern int ihk_param_nr_pmu_irq_affi;
int arm64_init_perfctr(void)
{
int ret;
int i;
int pages;
const struct ihk_mc_cpu_info *cpu_info;
memset(&cpu_pmu, 0, sizeof(cpu_pmu));
ret = armv8pmu_init(&cpu_pmu);
if (!ret) {
if (ret) {
return ret;
}
for (i = 0; i < ihk_param_nr_pmu_irq_affiniry; i++) {
ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affiniry[i], cpu_pmu.handler);
cpu_info = ihk_mc_get_cpu_info();
pages = (sizeof(struct per_cpu_arm_pmu) * cpu_info->ncpus +
PAGE_SIZE - 1) >> PAGE_SHIFT;
cpu_pmu.per_cpu = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if (cpu_pmu.per_cpu == NULL) {
return -ENOMEM;
}
memset(cpu_pmu.per_cpu, 0, pages * PAGE_SIZE);
if (0 < ihk_param_nr_pmu_irq_affi) {
for (i = 0; i < ihk_param_nr_pmu_irq_affi; i++) {
ret = ihk_mc_register_interrupt_handler(ihk_param_pmu_irq_affi[i],
cpu_pmu.handler);
if (ret) {
break;
}
}
}
else {
ret = ihk_mc_register_interrupt_handler(INTRID_PERF_OVF,
cpu_pmu.handler);
}
return ret;
}
void arm64_init_per_cpu_perfctr(void)
{
armv8pmu_per_cpu_init(&cpu_pmu.per_cpu[ihk_mc_get_processor_id()]);
}
int arm64_enable_pmu(void)
{
int ret;
@ -47,34 +76,36 @@ void arm64_disable_pmu(void)
cpu_pmu.disable_pmu();
}
extern unsigned int *arm64_march_perfmap;
void arm64_enable_user_access_pmu_regs(void)
{
cpu_pmu.enable_user_access_pmu_regs();
}
void arm64_disable_user_access_pmu_regs(void)
{
cpu_pmu.disable_user_access_pmu_regs();
}
static int __ihk_mc_perfctr_init(int counter, uint32_t type, uint64_t config, int mode)
{
int ret;
int ret = -1;
unsigned long config_base = 0;
int mapping;
mapping = cpu_pmu.map_event(type, config);
if (mapping < 0) {
return mapping;
}
ret = cpu_pmu.disable_counter(counter);
if (!ret) {
ret = cpu_pmu.disable_counter(1UL << counter);
if (ret < 0) {
return ret;
}
ret = cpu_pmu.enable_intens(counter);
if (!ret) {
ret = cpu_pmu.enable_intens(1UL << counter);
if (ret < 0) {
return ret;
}
ret = cpu_pmu.set_event_filter(&config_base, mode);
if (!ret) {
if (ret) {
return ret;
}
config_base |= (unsigned long)mapping;
config_base |= config;
cpu_pmu.write_evtype(counter, config_base);
return ret;
}
@ -86,41 +117,24 @@ int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode)
return ret;
}
int ihk_mc_perfctr_init(int counter, uint64_t config, int mode)
int ihk_mc_perfctr_start(unsigned long counter_mask)
{
int ret;
ret = __ihk_mc_perfctr_init(counter, PERF_TYPE_RAW, config, mode);
return ret;
return cpu_pmu.enable_counter(counter_mask);
}
int ihk_mc_perfctr_start(int counter)
int ihk_mc_perfctr_stop(unsigned long counter_mask, int flags)
{
int ret;
ret = cpu_pmu.enable_counter(counter);
return ret;
}
int ihk_mc_perfctr_stop(int counter)
{
cpu_pmu.disable_counter(counter);
// ihk_mc_perfctr_startが呼ばれるときには、
// init系関数が呼ばれるのでdisableにする。
cpu_pmu.disable_intens(counter);
return 0;
return cpu_pmu.disable_counter(counter_mask);
}
int ihk_mc_perfctr_reset(int counter)
{
// TODO[PMU]: ihk_mc_perfctr_setと同様にサンプリングレートの共通部実装の扱いを見てから本実装。
cpu_pmu.write_counter(counter, 0);
return 0;
}
//int ihk_mc_perfctr_set(int counter, unsigned long val)
int ihk_mc_perfctr_set(int counter, long val) /* 0416_patchtemp */
int ihk_mc_perfctr_set(int counter, long val)
{
// TODO[PMU]: 共通部でサンプリングレートの計算をして、設定するカウンタ値をvalに渡してくるようになると想定。サンプリングレートの扱いを見てから本実装。
uint32_t v = val;
cpu_pmu.write_counter(counter, v);
return 0;
@ -133,6 +147,15 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
return 0;
}
int ihk_mc_perfctr_alloc(struct thread *thread, struct mc_perf_event *event)
{
const int counters = ihk_mc_perf_get_num_counters();
return cpu_pmu.get_event_idx(counters,
thread->pmc_alloc_map,
event->hw_config);
}
unsigned long ihk_mc_perfctr_read(int counter)
{
unsigned long count;
@ -140,17 +163,135 @@ unsigned long ihk_mc_perfctr_read(int counter)
return count;
}
//int ihk_mc_perfctr_alloc_counter(unsigned long pmc_status)
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status) /* 0416_patchtemp */
unsigned long ihk_mc_perfctr_value(int counter, unsigned long correction)
{
unsigned long count = ihk_mc_perfctr_read(counter) + correction;
count &= ((1UL << 32) - 1);
return count;
}
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config,
unsigned long pmc_status)
{
int ret;
ret = cpu_pmu.get_event_idx(cpu_pmu.num_events, pmc_status);
if (*type == PERF_TYPE_HARDWARE) {
switch (*config) {
case PERF_COUNT_HW_INSTRUCTIONS:
ret = cpu_pmu.map_event(*type, *config);
if (ret < 0) {
return -1;
}
*type = PERF_TYPE_RAW;
break;
default:
// Unexpected config
return -1;
}
}
else if (*type != PERF_TYPE_RAW) {
return -1;
}
ret = cpu_pmu.get_event_idx(get_per_cpu_pmu()->num_events, pmc_status,
*config);
return ret;
}
/* 0416_patchtemp */
/* ihk_mc_perfctr_fixed_init() stub added. */
int ihk_mc_perfctr_fixed_init(int counter, int mode)
int ihk_mc_perf_counter_mask_check(unsigned long counter_mask)
{
return -1;
return cpu_pmu.counter_mask_valid(counter_mask);
}
int ihk_mc_perf_get_num_counters(void)
{
const struct per_cpu_arm_pmu *per_cpu_arm_pmu = get_per_cpu_pmu();
return per_cpu_arm_pmu->num_events;
}
int ihk_mc_perfctr_set_extra(struct mc_perf_event *event)
{
/* Nothing to do. */
return 0;
}
static inline uint64_t arm_pmu_event_max_period(struct mc_perf_event *event)
{
return 0xFFFFFFFF;
}
int hw_perf_event_init(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
if (!is_sampling_event(event)) {
hwc->sample_period = arm_pmu_event_max_period(event) >> 1;
hwc->last_period = hwc->sample_period;
ihk_atomic64_set(&hwc->period_left, hwc->sample_period);
}
return 0;
}
int ihk_mc_event_set_period(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int64_t left = ihk_atomic64_read(&hwc->period_left);
int64_t period = hwc->sample_period;
uint64_t max_period;
int ret = 0;
max_period = arm_pmu_event_max_period(event);
if (unlikely(left <= -period)) {
left = period;
ihk_atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
ihk_atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
/*
* Limit the maximum period to prevent the counter value
* from overtaking the one we are about to program. In
* effect we are reducing max_period to account for
* interrupt latency (and we are being very conservative).
*/
if (left > (max_period >> 1))
left = (max_period >> 1);
ihk_atomic64_set(&hwc->prev_count, (uint64_t)-left);
cpu_pmu.write_counter(event->counter_id,
(uint64_t)(-left) & max_period);
return ret;
}
uint64_t ihk_mc_event_update(struct mc_perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int64_t delta;
uint64_t prev_raw_count, new_raw_count;
uint64_t max_period = arm_pmu_event_max_period(event);
again:
prev_raw_count = ihk_atomic64_read(&hwc->prev_count);
new_raw_count = cpu_pmu.read_counter(event->counter_id);
if (ihk_atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count - prev_raw_count) & max_period;
ihk_atomic64_add(delta, &event->count);
ihk_atomic64_add(-delta, &hwc->period_left);
return new_raw_count;
}

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,7 @@
*/
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
ENTRY(cpu_do_idle)
ENTRY(__cpu_do_idle)
mrs x0, daif // save I bit
msr daifset, #2 // set I bit
mrs_s x1, ICC_PMR_EL1 // save PMR
@ -41,13 +41,13 @@ ENTRY(cpu_do_idle)
msr_s ICC_PMR_EL1, x1 // restore PMR
msr daif, x0 // restore I bit
ret
ENDPROC(cpu_do_idle)
ENDPROC(__cpu_do_idle)
#else /* defined(CONFIG_HAS_NMI) */
ENTRY(cpu_do_idle)
ENTRY(__cpu_do_idle)
dsb sy // WFI may enter a low-power mode
wfi
ret
ENDPROC(cpu_do_idle)
ENDPROC(__cpu_do_idle)
#endif /* defined(CONFIG_HAS_NMI) */
/*

View File

@ -1,4 +1,4 @@
/* psci.c COPYRIGHT FUJITSU LIMITED 2015-2016 */
/* psci.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
/* @ref.impl arch/arm64/kernel/psci.c */
/*
* This program is free software; you can redistribute it and/or modify
@ -18,18 +18,15 @@
#include <psci.h>
#include <errno.h>
#include <ihk/types.h>
#include <ihk/debug.h>
#include <compiler.h>
#include <lwk/compiler.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_PSCI
#ifdef DEBUG_PRINT_PSCI
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1

View File

@ -1,4 +1,4 @@
/* ptrace.c COPYRIGHT FUJITSU LIMITED 2016-2017 */
/* ptrace.c COPYRIGHT FUJITSU LIMITED 2016-2019 */
#include <errno.h>
#include <debug-monitors.h>
#include <hw_breakpoint.h>
@ -11,57 +11,21 @@
#include <hwcap.h>
#include <string.h>
#include <thread_info.h>
#include <ptrace.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_SC
#ifdef DEBUG_PRINT_SC
#define dkprintf kprintf
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#define NOT_IMPLEMENTED() do { kprintf("%s is not implemented\n", __func__); while(1);} while(0)
#define BUG_ON(condition) do { if (condition) { kprintf("PANIC: %s: %s(line:%d)\n",\
__FILE__, __FUNCTION__, __LINE__); panic(""); } } while(0)
extern void save_debugreg(unsigned long *debugreg);
extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont);
extern int interrupt_from_user(void *);
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_TLS,
REGSET_HW_BREAK,
REGSET_HW_WATCH,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif /* CONFIG_ARM64_SVE */
};
struct user_regset;
typedef long user_regset_get_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf);
typedef long user_regset_set_fn(struct thread *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf);
struct user_regset {
user_regset_get_fn *get;
user_regset_set_fn *set;
unsigned int n;
unsigned int size;
unsigned int core_note_type;
};
long ptrace_read_user(struct thread *thread, long addr, unsigned long *value)
{
return -EIO;
@ -279,6 +243,17 @@ static inline long copy_regset_from_user(struct thread *target,
return regset->set(target, regset, offset, size, NULL, data);
}
unsigned int regset_size(struct thread *target,
const struct user_regset *regset)
{
if (!regset->get_size) {
return regset->n * regset->size;
}
else {
return regset->get_size(target, regset);
}
}
/*
* Bits which are always architecturally RES0 per ARM DDI 0487A.h
* Userspace cannot use these until they have an architectural meaning.
@ -630,6 +605,48 @@ out:
#ifdef CONFIG_ARM64_SVE
static void sve_init_header_from_thread(struct user_sve_header *header,
struct thread *target)
{
unsigned int vq;
memset(header, 0, sizeof(*header));
/* McKernel processes always enable SVE. */
header->flags = SVE_PT_REGS_SVE;
if (target->ctx.thread->sve_flags & SVE_PT_VL_INHERIT) {
header->flags |= SVE_PT_VL_INHERIT;
}
header->vl = target->ctx.thread->sve_vl;
vq = sve_vq_from_vl(header->vl);
header->max_vl = sve_max_vl;
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
}
static unsigned int sve_size_from_header(struct user_sve_header const *header)
{
return ALIGN(header->size, SVE_VQ_BYTES);
}
static unsigned int sve_get_size(struct thread *target,
const struct user_regset *regset)
{
struct user_sve_header header;
/* Instead of system_supports_sve() */
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
return 0;
}
sve_init_header_from_thread(&header, target);
return sve_size_from_header(&header);
}
/* read NT_ARM_SVE */
static long sve_get(struct thread *target,
const struct user_regset *regset,
@ -652,23 +669,9 @@ static long sve_get(struct thread *target,
}
/* Header */
memset(&header, 0, sizeof(header));
header.vl = target->ctx.thread->sve_vl;
BUG_ON(!sve_vl_valid(header.vl));
sve_init_header_from_thread(&header, target);
vq = sve_vq_from_vl(header.vl);
BUG_ON(!sve_vl_valid(sve_max_vl));
header.max_vl = sve_max_vl;
/* McKernel processes always enable SVE. */
header.flags = SVE_PT_REGS_SVE;
header.size = SVE_PT_SIZE(vq, header.flags);
header.max_size = SVE_PT_SIZE(sve_vq_from_vl(header.max_vl),
SVE_PT_REGS_SVE);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
0, sizeof(header));
if (ret) {
@ -682,11 +685,9 @@ static long sve_get(struct thread *target,
*/
/* Otherwise: full SVE case */
start = SVE_PT_SVE_OFFSET;
end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
BUG_ON(end < start);
BUG_ON(end - start > sve_state_size(target));
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
target->ctx.thread->sve_state,
start, end);
@ -696,24 +697,18 @@ static long sve_get(struct thread *target,
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
BUG_ON(end < start);
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
start, end);
if (ret) {
goto out;
}
/*
* Copy fpsr, and fpcr which must follow contiguously in
* struct fpsimd_state:
*/
start = end;
end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
BUG_ON((char *)(&target->fp_regs->fpcr + 1) <
(char *)&target->fp_regs->fpsr);
BUG_ON(end < start);
BUG_ON((char *)(&target->fp_regs->fpcr + 1) -
(char *)&target->fp_regs->fpsr !=
end - start);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->fp_regs->fpsr,
start, end);
@ -722,9 +717,7 @@ static long sve_get(struct thread *target,
}
start = end;
end = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16 * 16;
BUG_ON(end < start);
end = sve_size_from_header(&header);
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
start, end);
out:
@ -768,13 +761,12 @@ static long sve_set(struct thread *target,
* sve_set_vector_length(), which will also validate them for us:
*/
ret = sve_set_vector_length(target, header.vl,
header.flags & ~SVE_PT_REGS_MASK);
((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
if (ret) {
goto out;
}
/* Actual VL set may be less than the user asked for: */
BUG_ON(!sve_vl_valid(target->ctx.thread->sve_vl));
vq = sve_vq_from_vl(target->ctx.thread->sve_vl);
/* Registers: FPSIMD-only case */
@ -785,11 +777,19 @@ static long sve_set(struct thread *target,
}
/* Otherwise: full SVE case */
/*
* If setting a different VL from the requested VL and there is
* register data, the data layout will be wrong: don't even
* try to set the registers in this case.
*/
if (count && vq != sve_vq_from_vl(header.vl)) {
ret = -EIO;
goto out;
}
start = SVE_PT_SVE_OFFSET;
end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
BUG_ON(end < start);
BUG_ON(end - start > sve_state_size(target));
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
target->ctx.thread->sve_state,
start, end);
@ -799,27 +799,21 @@ static long sve_set(struct thread *target,
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
BUG_ON(end < start);
ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
start, end);
if (ret) {
goto out;
}
/*
* Copy fpsr, and fpcr which must follow contiguously in
* struct fpsimd_state:
*/
start = end;
end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
BUG_ON((char *)(&target->fp_regs->fpcr + 1) <
(char *)&target->fp_regs->fpsr);
BUG_ON(end < start);
BUG_ON((char *)(&target->fp_regs->fpcr + 1) -
(char *)&target->fp_regs->fpsr !=
end - start);
user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->fp_regs->fpsr,
start, end);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->fp_regs->fpsr,
start, end);
out:
return ret;
}
@ -831,8 +825,9 @@ static const struct user_regset aarch64_regsets[] = {
.core_note_type = NT_PRSTATUS,
.n = sizeof(struct user_pt_regs) / sizeof(uint64_t),
.size = sizeof(uint64_t),
.align = sizeof(uint64_t),
.get = gpr_get,
.set = gpr_set
.set = gpr_set,
},
[REGSET_FPR] = {
.core_note_type = NT_PRFPREG,
@ -842,56 +837,75 @@ static const struct user_regset aarch64_regsets[] = {
* fpcr are 32-bits wide.
*/
.size = sizeof(uint32_t),
.align = sizeof(uint32_t),
.get = fpr_get,
.set = fpr_set
.set = fpr_set,
},
[REGSET_TLS] = {
.core_note_type = NT_ARM_TLS,
.n = 1,
.size = sizeof(void *),
.align = sizeof(void *),
.get = tls_get,
.set = tls_set
.set = tls_set,
},
[REGSET_HW_BREAK] = {
.core_note_type = NT_ARM_HW_BREAK,
.n = sizeof(struct user_hwdebug_state) / sizeof(uint32_t),
.size = sizeof(uint32_t),
.align = sizeof(uint32_t),
.get = hw_break_get,
.set = hw_break_set
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
.core_note_type = NT_ARM_HW_WATCH,
.n = sizeof(struct user_hwdebug_state) / sizeof(uint32_t),
.size = sizeof(uint32_t),
.align = sizeof(uint32_t),
.get = hw_break_get,
.set = hw_break_set
.set = hw_break_set,
},
[REGSET_SYSTEM_CALL] = {
.core_note_type = NT_ARM_SYSTEM_CALL,
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
.get = system_call_get,
.set = system_call_set
.set = system_call_set,
},
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
.core_note_type = NT_ARM_SVE,
.n = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16,
.size = 16,
.n = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) +
(SVE_VQ_BYTES - 1)) / SVE_VQ_BYTES,
.size = SVE_VQ_BYTES,
.align = SVE_VQ_BYTES,
.get = sve_get,
.set = sve_set
.set = sve_set,
.get_size = sve_get_size,
},
#endif /* CONFIG_ARM64_SVE */
};
static const struct user_regset *
find_regset(const struct user_regset *regset, unsigned int type, int n)
static const struct user_regset_view user_aarch64_view = {
.name = "aarch64", .e_machine = EM_AARCH64,
.regsets = aarch64_regsets,
.n = sizeof(aarch64_regsets) / sizeof(aarch64_regsets[0])
};
const struct user_regset_view *current_user_regset_view(void)
{
return &user_aarch64_view;
}
const struct user_regset *find_regset(const struct user_regset_view *view,
unsigned int type)
{
int i = 0;
for (i = 0; i < n; i++) {
if (regset[i].core_note_type == type) {
return &regset[i];
for (i = 0; i < view->n; i++) {
if (view->regsets[i].core_note_type == type) {
return &view->regsets[i];
}
}
return NULL;
@ -900,8 +914,8 @@ find_regset(const struct user_regset *regset, unsigned int type, int n)
static long ptrace_regset(struct thread *thread, int req, long type, struct iovec *iov)
{
long rc = -EINVAL;
const struct user_regset *regset = find_regset(aarch64_regsets, type,
sizeof(aarch64_regsets) / sizeof(aarch64_regsets[0]));
const struct user_regset *regset =
find_regset(&user_aarch64_view, type);
if (!regset) {
kprintf("%s: not supported type 0x%x\n", __FUNCTION__, type);
@ -950,30 +964,43 @@ void ptrace_report_signal(struct thread *thread, int sig)
/* save thread_info, if called by ptrace_report_exec() */
if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8)))) {
memcpy(&tinfo, thread->ctx.thread, sizeof(struct thread_info));
thread->uctx->user_regs.regs[0] = 0;
}
mcs_rwlock_writer_lock(&proc->update_lock, &lock);
if(!(proc->ptrace & PT_TRACED)){
if (!(thread->ptrace & PT_TRACED)) {
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
return;
}
thread->exit_status = sig;
/* Transition thread state */
#ifdef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
proc->status = PS_DELAY_TRACED;
#else /* POSTK_DEBUG_TEMP_FIX_41 */
proc->status = PS_TRACED;
#endif /* POSTK_DEBUG_TEMP_FIX_41 */
thread->exit_status = sig;
thread->status = PS_TRACED;
proc->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
proc->signal_flags |= SIGNAL_STOP_STOPPED;
} else {
proc->signal_flags &= ~SIGNAL_STOP_STOPPED;
thread->ptrace &= ~PT_TRACE_SYSCALL;
if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8))) &&
thread->ptrace & PTRACE_O_TRACEEXEC) {
/* PTRACE_O_TRACEEXEC: since Linux 3.0, the former
* thread ID can be retrieved with PTRACE_GETEVENTMSG.
* Report no change. */
thread->ptrace_eventmsg = thread->tid;
}
parent_pid = proc->parent->pid;
save_debugreg(thread->ptrace_debugreg);
if (sig == SIGSTOP || sig == SIGTSTP ||
sig == SIGTTIN || sig == SIGTTOU) {
thread->signal_flags |= SIGNAL_STOP_STOPPED;
}
else {
thread->signal_flags &= ~SIGNAL_STOP_STOPPED;
}
if (thread == proc->main_thread) {
proc->status = PS_DELAY_TRACED;
parent_pid = proc->parent->pid;
}
else {
parent_pid = thread->report_proc->pid;
waitq_wakeup(&thread->report_proc->waitpid_q);
}
mcs_rwlock_writer_unlock(&proc->update_lock, &lock);
memset(&info, '\0', sizeof info);
@ -982,10 +1009,6 @@ void ptrace_report_signal(struct thread *thread, int sig)
info._sifields._sigchld.si_pid = thread->tid;
info._sifields._sigchld.si_status = thread->exit_status;
do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0);
#ifndef POSTK_DEBUG_TEMP_FIX_41 /* early to wait4() wakeup for ptrace, fix. */
/* Wake parent (if sleeping in wait4()) */
waitq_wakeup(&proc->parent->waitpid_q);
#endif /* !POSTK_DEBUG_TEMP_FIX_41 */
dkprintf("ptrace_report_signal,sleeping\n");
/* Sleep */
@ -996,6 +1019,7 @@ void ptrace_report_signal(struct thread *thread, int sig)
if (sig == ((SIGTRAP | (PTRACE_EVENT_EXEC << 8)))) {
memcpy(thread->ctx.thread, &tinfo, sizeof(struct thread_info));
}
arch_flush_icache_all();
}
long

File diff suppressed because it is too large Load Diff

203
arch/arm64/kernel/timer.c Normal file
View File

@ -0,0 +1,203 @@
/* timer.c COPYRIGHT FUJITSU LIMITED 2018 */
#include <ihk/types.h>
#include <ihk/cpu.h>
#include <ihk/lock.h>
#include <sysreg.h>
#include <kmalloc.h>
#include <cls.h>
#include <cputype.h>
#include <irq.h>
#include <arch-timer.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_TIMER
#ifdef DEBUG_PRINT_TIMER
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
static unsigned int per_cpu_timer_val[NR_CPUS] = { 0 };
static int timer_intrid = INTRID_VIRT_TIMER;
static void arch_timer_virt_reg_write(enum arch_timer_reg reg, uint32_t val);
static void (*arch_timer_reg_write)(enum arch_timer_reg, uint32_t) =
arch_timer_virt_reg_write;
static uint32_t arch_timer_virt_reg_read(enum arch_timer_reg reg);
static uint32_t (*arch_timer_reg_read)(enum arch_timer_reg) =
arch_timer_virt_reg_read;
static void arch_timer_phys_reg_write(enum arch_timer_reg reg, uint32_t val)
{
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntp_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
write_sysreg(val, cntp_tval_el0);
break;
}
isb();
}
static void arch_timer_virt_reg_write(enum arch_timer_reg reg, uint32_t val)
{
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntv_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
write_sysreg(val, cntv_tval_el0);
break;
}
isb();
}
static uint32_t arch_timer_phys_reg_read(enum arch_timer_reg reg)
{
uint32_t val = 0;
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = read_sysreg(cntp_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
val = read_sysreg(cntp_tval_el0);
break;
}
return val;
}
static uint32_t arch_timer_virt_reg_read(enum arch_timer_reg reg)
{
uint32_t val = 0;
switch (reg) {
case ARCH_TIMER_REG_CTRL:
val = read_sysreg(cntv_ctl_el0);
break;
case ARCH_TIMER_REG_TVAL:
val = read_sysreg(cntv_tval_el0);
break;
}
return val;
}
static void timer_handler(void *priv)
{
unsigned long ctrl;
const int cpu = ihk_mc_get_processor_id();
dkprintf("CPU%d: catch %s timer\n", cpu,
((timer_intrid == INTRID_PHYS_TIMER) ||
(timer_intrid == INTRID_HYP_PHYS_TIMER)) ?
"physical" : "virtual");
ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL);
if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
const unsigned int clocks = per_cpu_timer_val[cpu];
struct cpu_local_var *v = get_this_cpu_local_var();
unsigned long irqstate;
/* set resched flag */
irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
/* gen control register value */
ctrl &= ~ARCH_TIMER_CTRL_IT_STAT;
/* set timer re-enable for periodic */
arch_timer_reg_write(ARCH_TIMER_REG_TVAL, clocks);
arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl);
do_backlog();
}
}
static unsigned long is_use_virt_timer(void)
{
extern unsigned long ihk_param_use_virt_timer;
switch (ihk_param_use_virt_timer) {
case 0: /* physical */
case 1: /* virtual */
break;
default: /* invalid */
panic("PANIC: is_use_virt_timer(): timer select neither phys-timer nor virt-timer.\n");
break;
}
return ihk_param_use_virt_timer;
}
static struct ihk_mc_interrupt_handler timer_interrupt_handler = {
.func = timer_handler,
.priv = NULL,
};
/* other source use functions */
struct ihk_mc_interrupt_handler *get_timer_handler(void)
{
return &timer_interrupt_handler;
}
void
lapic_timer_enable(unsigned int clocks)
{
unsigned long ctrl = 0;
/* gen control register value */
ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~(ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_IT_STAT);
arch_timer_reg_write(ARCH_TIMER_REG_TVAL, clocks);
arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl);
per_cpu_timer_val[ihk_mc_get_processor_id()] = clocks;
}
void
lapic_timer_disable()
{
unsigned long ctrl = 0;
ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL);
ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl);
per_cpu_timer_val[ihk_mc_get_processor_id()] = 0;
}
int get_timer_intrid(void)
{
return timer_intrid;
}
void arch_timer_init(void)
{
const unsigned long is_virt = is_use_virt_timer();
#ifdef CONFIG_ARM64_VHE
const unsigned long mmfr = read_cpuid(ID_AA64MMFR1_EL1);
#endif /* CONFIG_ARM64_VHE */
if (is_virt) {
timer_intrid = INTRID_VIRT_TIMER;
arch_timer_reg_write = arch_timer_virt_reg_write;
arch_timer_reg_read = arch_timer_virt_reg_read;
} else {
timer_intrid = INTRID_PHYS_TIMER;
arch_timer_reg_write = arch_timer_phys_reg_write;
arch_timer_reg_read = arch_timer_phys_reg_read;
}
#ifdef CONFIG_ARM64_VHE
if ((mmfr >> ID_AA64MMFR1_VHE_SHIFT) & 1UL) {
if (is_virt) {
timer_intrid = INTRID_HYP_VIRT_TIMER;
} else {
timer_intrid = INTRID_HYP_PHYS_TIMER;
}
}
#endif /* CONFIG_ARM64_VHE */
}

View File

@ -1,4 +1,4 @@
/* traps.c COPYRIGHT FUJITSU LIMITED 2015-2017 */
/* traps.c COPYRIGHT FUJITSU LIMITED 2015-2018 */
#include <ihk/context.h>
#include <ihk/debug.h>
#include <traps.h>
@ -29,12 +29,14 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *inf
*/
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
const int from_user = interrupt_from_user(regs);
// /* TODO: implement lazy context saving/restoring */
set_cputime(1);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
// WARN_ON(1);
kprintf("WARNING: CPU: %d PID: %d Trapped FP/ASIMD access.\n",
ihk_mc_get_processor_id(), cpu_local_var(current)->proc->pid);
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
/*
@ -51,7 +53,9 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
{
siginfo_t info;
unsigned int si_code = 0;
set_cputime(1);
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
if (esr & FPEXC_IOF)
si_code = FPE_FLTINV;
@ -70,7 +74,7 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
info._sifields._sigfault.si_addr = (void*)regs->pc;
set_signal(SIGFPE, regs, &info);
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
/* @ref.impl arch/arm64/kernel/traps.c */
@ -133,8 +137,9 @@ exit:
void do_undefinstr(struct pt_regs *regs)
{
siginfo_t info;
const int from_user = interrupt_from_user(regs);
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
if (call_undef_hook(regs) == 0) {
goto out;
@ -147,7 +152,7 @@ void do_undefinstr(struct pt_regs *regs)
arm64_notify_die("Oops - undefined instruction", regs, &info, 0);
out:
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
/*
@ -157,7 +162,9 @@ out:
void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
siginfo_t info;
set_cputime(interrupt_from_user(regs)? 1: 2);
const int from_user = interrupt_from_user(regs);
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
kprintf("entering bad_mode !! (regs:0x%p, reason:%d, esr:0x%x)\n", regs, reason, esr);
kprintf("esr Analyse:\n");
@ -167,11 +174,16 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
arch_show_interrupt_context(regs);
#ifdef ENABLE_TOFU
info.si_signo = SIGSTOP;
info.si_errno = 0;
#else
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
#endif
info._sifields._sigfault.si_addr = (void*)regs->pc;
arm64_notify_die("Oops - bad mode", regs, &info, 0);
set_cputime(0);
set_cputime(from_user ? CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}

View File

@ -1,4 +1,4 @@
/* vdso.c COPYRIGHT FUJITSU LIMITED 2016 */
/* vdso.c COPYRIGHT FUJITSU LIMITED 2016-2018 */
/* @ref.impl arch/arm64/kernel/vdso.c */
#include <arch-memory.h>
@ -11,21 +11,17 @@
#include <process.h>
#include <string.h>
#include <syscall.h>
#include <ihk/debug.h>
#include <ikc/queue.h>
#include <vdso.h>
#include <ihk/debug.h>
//#define DEBUG_PRINT_VDSO
#ifdef DEBUG_PRINT_VDSO
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 1
struct vdso {
long busy;
@ -36,9 +32,7 @@ struct vdso {
long lbase;
long offset_sigtramp;
};
#endif /*POSTK_DEBUG_ARCH_DEP_52*/
extern char vdso_start, vdso_end;
static struct vdso vdso;
struct tod_data_s tod_data
@ -90,26 +84,9 @@ int arch_setup_vdso(void)
kprintf("Enable Host mapping vDSO.\n");
return 0;
}
kprintf("Enable McK mapping vDSO.\n");
if (memcmp(&vdso_start, "\177ELF", 4)) {
panic("vDSO is not a valid ELF object!\n");
}
vdso.vdso_npages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
dkprintf("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
vdso.vdso_npages + 1, vdso.vdso_npages, &vdso_start, 1L, &tod_data);
if (vdso.vdso_npages != 1) {
panic("vDSO is not a valid number of pages!\n");
}
vdso.vvar_phys = virt_to_phys((void *)&tod_data);
vdso.vdso_physlist[0] = virt_to_phys((void *)&vdso_start);
vdso.lbase = VDSO_LBASE;
vdso.offset_sigtramp = vdso_offset_sigtramp;
return 0;
panic("Only support host mapping vDSO");
return -1;
}
static int get_free_area(struct process_vm *vm, size_t len, intptr_t hint,
@ -158,6 +135,7 @@ int arch_map_vdso(struct process_vm *vm)
unsigned long start, end;
unsigned long flag;
int ret;
struct vm_range *range;
vdso_text_len = vdso.vdso_npages << PAGE_SHIFT;
/* Be sure to map the data page */
@ -176,7 +154,7 @@ int arch_map_vdso(struct process_vm *vm)
flag = VR_REMOTE | VR_PROT_READ;
flag |= VRFLAG_PROT_TO_MAXPROT(flag);
ret = add_process_memory_range(vm, start, end, vdso.vvar_phys, flag,
NULL, 0, PAGE_SHIFT, NULL);
NULL, 0, PAGE_SHIFT, NULL, &range);
if (ret != 0){
dkprintf("ERROR: adding memory range for tod_data\n");
goto exit;
@ -188,7 +166,7 @@ int arch_map_vdso(struct process_vm *vm)
flag = VR_REMOTE | VR_PROT_READ | VR_PROT_EXEC;
flag |= VRFLAG_PROT_TO_MAXPROT(flag);
ret = add_process_memory_range(vm, start, end, vdso.vdso_physlist[0], flag,
NULL, 0, PAGE_SHIFT, NULL);
NULL, 0, PAGE_SHIFT, NULL, &range);
if (ret != 0) {
dkprintf("ERROR: adding memory range for vdso_text\n");

View File

@ -1,33 +0,0 @@
/* vdso.so.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/vdso.S */
/*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <arch-memory.h>
#include <vdso-so-path.h>
.section ".vdso.txet", "aw"
.globl vdso_start, vdso_end
.balign PAGE_SIZE
vdso_start:
.incbin VDSO_SO_PATH
.balign PAGE_SIZE
vdso_end:
.previous

View File

@ -1,131 +0,0 @@
# Makefile.in COPYRIGHT FUJITSU LIMITED 2016
# @ref.impl arch/arm64/kernel/vdso/Makefile
# Building a vDSO image for AArch64.
HOST_DIR=@KDIR@
HOST_CONFIG=$(HOST_DIR)/.config
HOST_KERNEL_CONFIG_ARM64_4K_PAGES=$(shell grep -E "^CONFIG_ARM64_4K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_4K_PAGES=||g')
HOST_KERNEL_CONFIG_ARM64_16K_PAGES=$(shell grep -E "^CONFIG_ARM64_16K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_16K_PAGES=||g')
HOST_KERNEL_CONFIG_ARM64_64K_PAGES=$(shell grep -E "^CONFIG_ARM64_64K_PAGES=y" $(HOST_CONFIG) | sed 's|CONFIG_ARM64_64K_PAGES=||g')
VDSOSRC = @abs_srcdir@
VDSOBUILD = @abs_builddir@
INCDIR = $(VDSOSRC)/../include
ECHO_SUFFIX = [VDSO]
VDSOOBJS := gettimeofday.o
DESTOBJS = $(addprefix $(VDSOBUILD)/, $(VDSOOBJS))
VDSOASMOBJS := note.o sigreturn.o
DESTASMOBJS = $(addprefix $(VDSOBUILD)/, $(VDSOASMOBJS))
$(if $(VDSOSRC),,$(error IHK output directory is not specified))
$(if $(TARGET),,$(error Target is not specified))
#CFLAGS := -nostdinc -mlittle-endian -Wall -mabi=lp64 -Wa,-gdwarf-2
CFLAGS := -nostdinc -mlittle-endian -Wall -Wa,-gdwarf-2
CFLAGS += -D__KERNEL__ -I$(SRC)/include
CFLAGS += -I$(SRC)/../lib/include -I$(INCDIR) -I$(IHKBASE)/smp/arm64/include
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_ARCH_DEP_, $(i)))
CFLAGS += $(foreach i, $(shell seq 1 100), $(addprefix -DPOSTK_DEBUG_TEMP_FIX_, $(i)))
LDFLAGS := -nostdinc -mlittle-endian -Wall -Wundef -Wstrict-prototypes
LDFLAGS += -Wno-trigraphs -fno-strict-aliasing -fno-common
LDFLAGS += -Werror-implicit-function-declaration -Wno-format-security
#LDFLAGS += -std=gnu89 -mgeneral-regs-only -mabi=lp64 -O2
LDFLAGS += -std=gnu89 -mgeneral-regs-only -O2
LDFLAGS += -Wframe-larger-than=2048 -fno-stack-protector
LDFLAGS += -fno-delete-null-pointer-checks -Wno-unused-but-set-variable
LDFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
LDFLAGS += -fno-var-tracking-assignments -g -Wdeclaration-after-statement
LDFLAGS += -Wno-pointer-sign -fno-strict-overflow -fconserve-stack
LDFLAGS += -Werror=implicit-int -Werror=strict-prototypes -Werror=date-time
LDFLAGS += -shared -fno-common -fno-builtin -nostdlib
LDFLAGS += -Wl,-soname=linux-vdso.so.1 -Wl,--hash-style=sysv -Wl,-n -Wl,-T
LDFLAGS += --param=allow-store-data-races=0 -DCC_HAVE_ASM_GOTO
LDFLAGS += -D"KBUILD_STR(s)=\#s" -D"KBUILD_BASENAME=KBUILD_STR(vdso.so)"
LDFLAGS += -D"KBUILD_MODNAME=KBUILD_STR(vdso.so)" -D__KERNEL__
DEPSRCS = $(wildcard $(VDSOSRC)/*.c $(VDSOSRC)/*.S)
CFLAGS_lds := -E -P -C -U$(ARCH)
CFLAGS_lds += -nostdinc
CFLAGS_lds += -mlittle-endian
CFLAGS_lds += -D__KERNEL__
CFLAGS_lds += -D__ASSEMBLY__
CFLAGS_lds += -DLINKER_SCRIPT
CFLAGS_lds += -DVDSO_LBASE=0
ifeq ($(HOST_KERNEL_CONFIG_ARM64_4K_PAGES), y)
CFLAGS_lds += -DPAGE_SIZE=0x1000
endif
ifeq ($(HOST_KERNEL_CONFIG_ARM64_16K_PAGES), y)
CFLAGS_lds += -DPAGE_SIZE=0x4000
endif
ifeq ($(HOST_KERNEL_CONFIG_ARM64_64K_PAGES), y)
CFLAGS_lds += -DPAGE_SIZE=0x10000
endif
#load mckernel config (append CPPFLAGS)
include @abs_top_builddir@/../ihk/cokernel/$(TARGETDIR)/Makefile.predefines
default: all
.PHONY: all clean depend prepare
all: depend $(VDSOBUILD)/vdso.so $(VDSOBUILD)/../include/vdso-offsets.h $(VDSOBUILD)/../include/vdso-so-path.h
# Strip rule for the .so file
$(VDSOBUILD)/vdso.so: OBJCOPYFLAGS := -S
$(VDSOBUILD)/vdso.so: $(VDSOBUILD)/vdso.so.dbg
$(objcopy_cmd)
# Generate VDSO offsets using helper script
$(VDSOBUILD)/../include/vdso-offsets.h: $(VDSOBUILD)/vdso.so.dbg
$(call echo_cmd,VDSOSYM,$<)
@mkdir -p $(VDSOBUILD)/../include
@nm $< | sh $(VDSOSRC)/gen_vdso_offsets.sh | LC_ALL=C sort > $@
$(VDSOBUILD)/../include/vdso-so-path.h:
@echo "#define VDSO_SO_PATH \"@abs_builddir@/vdso.so\"" > $@
# Link rule for the .so file, .lds has to be first
$(VDSOBUILD)/vdso.so.dbg: $(VDSOBUILD)/vdso.lds $(DESTOBJS) $(DESTASMOBJS)
$(ld_cmd)
$(VDSOBUILD)/vdso.lds: $(VDSOSRC)/vdso.lds.S
$(lds_cmd)
clean:
$(rm_cmd) $(DESTOBJS) $(DESTASMOBJS) $(VDSOBUILD)/Makefile.dep $(VDSOBUILD)/vdso.* -r $(VDSOBUILD)/../include
depend: $(VDSOBUILD)/Makefile.dep
$(VDSOBUILD)/Makefile.dep:
$(call dep_cmd,$(DEPSRCS))
prepare:
@$(RM) $(VDSOBUILD)/Makefile.dep
-include $(VDSOBUILD)/Makefile.dep
# Actual build commands
ifeq ($(V),1)
echo_cmd =
submake = make
else
echo_cmd = @echo ' ($(TARGET))' $1 $(ECHO_SUFFIX) $2;
submake = make --no-print-directory
endif
cc_cmd = $(call echo_cmd,CC,$<)$(CC) $(CFLAGS) -c -o $@
ld_cmd = $(call echo_cmd,LD,$@)$(CC) $(LDFLAGS) $^ -o $@
dep_cmd = $(call echo_cmd,DEPEND,)$(CC) $(CFLAGS) -MM $1 > $@
rm_cmd = $(call echo_cmd,CLEAN,)$(RM)
objcopy_cmd = $(call echo_cmd,OBJCOPY,$<)$(OBJCOPY) $(OBJCOPYFLAGS) $< $@
lds_cmd = $(call echo_cmd,LDS,$<)$(CC) $(CFLAGS_lds) -c -o $@ $<
$(DESTOBJS):
$(cc_cmd) $(addprefix $(VDSOSRC)/, $(notdir $(@:.o=.c)))
$(DESTASMOBJS):
$(cc_cmd) $(addprefix $(VDSOSRC)/, $(notdir $(@:.o=.S))) -D__ASSEMBLY__

View File

@ -1,17 +0,0 @@
#!/bin/sh
# gen_vdso_offsets.sh COPYRIGHT FUJITSU LIMITED 2016
# @ref.impl arch/arm64/kernel/vdso/gen_vdso_offsets.sh
#
# Match symbols in the DSO that look like VDSO_*; produce a header file
# of constant offsets into the shared object.
#
# Doing this inside the Makefile will break the $(filter-out) function,
# causing Kbuild to rebuild the vdso-offsets header file every time.
#
# Author: Will Deacon <will.deacon@arm.com
#
LC_ALL=C
sed -n -e 's/^00*/0/' -e \
's/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'

View File

@ -1,205 +0,0 @@
/* gettimeofday.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <time.h>
#include <syscall.h>
#include <registers.h>
#include <ihk/atomic.h>
extern int __kernel_gettimeofday(struct timeval *tv, void *tz);
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("yield" ::: "memory");
return;
}
static inline void calculate_time_from_tsc(struct timespec *ts,
struct tod_data_s *tod_data)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data->version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data->origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data->version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data->clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data->clocks_per_sec)
/ tod_data->clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
}
static inline struct tod_data_s *get_tod_data_addr(void)
{
unsigned long addr;
asm volatile("adr %0, _tod_data\n"
: "=r" (addr)
:
: "memory");
return (struct tod_data_s *)addr;
}
int __kernel_gettimeofday(struct timeval *tv, void *tz)
{
long ret;
struct tod_data_s *tod_data;
struct timespec ats;
if(!tv && !tz) {
/* nothing to do */
return 0;
}
tod_data = get_tod_data_addr();
/* DO it locally if supported */
if (!tz && tod_data->do_local) {
calculate_time_from_tsc(&ats, tod_data);
tv->tv_sec = ats.tv_sec;
tv->tv_usec = ats.tv_nsec / 1000;
return 0;
}
/* Otherwize syscall */
asm volatile("mov w8, %w1\n"
"mov x0, %2\n"
"mov x1, %3\n"
"svc #0\n"
"mov %0, x0\n"
: "=r" (ret)
: "r" (__NR_gettimeofday), "r"(tv), "r"(tz)
: "memory");
if (ret) {
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
}
return (int)ret;
}
/*
* The IDs of the various system clocks (for POSIX.1b interval timers):
* @ref.impl include/uapi/linux/time.h
*/
// #define CLOCK_REALTIME 0
// #define CLOCK_MONOTONIC 1
// #define CLOCK_PROCESS_CPUTIME_ID 2
// #define CLOCK_THREAD_CPUTIME_ID 3
#define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
#define CLOCK_BOOTTIME 7
#define CLOCK_REALTIME_ALARM 8
#define CLOCK_BOOTTIME_ALARM 9
#define CLOCK_SGI_CYCLE 10 /* Hardware specific */
#define CLOCK_TAI 11
#define HIGH_RES_NSEC 1 /* nsec. */
#define CLOCK_REALTIME_RES HIGH_RES_NSEC
#define CLOCK_COARSE_RES ((NS_PER_SEC+CONFIG_HZ/2)/CONFIG_HZ) /* 10,000,000 nsec*/
typedef int clockid_t;
int __kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
{
long ret;
struct tod_data_s *tod_data;
struct timespec ats;
if (!tp) {
/* nothing to do */
return 0;
}
tod_data = get_tod_data_addr();
/* DO it locally if supported */
if (tod_data->do_local && clk_id == CLOCK_REALTIME) {
calculate_time_from_tsc(&ats, tod_data);
tp->tv_sec = ats.tv_sec;
tp->tv_nsec = ats.tv_nsec;
return 0;
}
/* Otherwize syscall */
asm volatile("mov w8, %w1\n"
"mov x0, %2\n"
"mov x1, %3\n"
"svc #0\n"
"mov %0, x0\n"
: "=r" (ret)
: "r" (__NR_clock_gettime), "r"(clk_id), "r"(tp)
: "memory");
return (int)ret;
}
int __kernel_clock_getres(clockid_t clk_id, struct timespec *res)
{
long ret;
if (!res) {
/* nothing to do */
return 0;
}
switch (clk_id) {
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
res->tv_sec = 0;
res->tv_nsec = CLOCK_REALTIME_RES;
return 0;
break;
case CLOCK_REALTIME_COARSE:
case CLOCK_MONOTONIC_COARSE:
res->tv_sec = 0;
res->tv_nsec = CLOCK_COARSE_RES;
return 0;
break;
default:
break;
}
/* Otherwise syscall */
asm volatile("mov w8, %w1\n"
"mov x0, %2\n"
"mov x1, %3\n"
"svc #0\n"
"mov %0, x0\n"
: "=r" (ret)
: "r" (__NR_clock_getres), "r"(clk_id), "r"(res)
: "memory");
return (int)ret;
}

View File

@ -1,28 +0,0 @@
/* note.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/note.S */
/*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
*/
#include <elfnote.h>
ELFNOTE_START(McKernel, 0, "a")
.long 0x10000 /* MCKERNEL_VERSION_CODE */
ELFNOTE_END

View File

@ -1,39 +0,0 @@
/* sigreturn.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/sigreturn.S */
/*
* Sigreturn trampoline for returning from a signal when the SA_RESTORER
* flag is not set.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <linkage.h>
#include "syscall.h"
.text
nop
ENTRY(__kernel_rt_sigreturn)
.cfi_startproc
.cfi_signal_frame
.cfi_def_cfa x29, 0
.cfi_offset x29, 0 * 8
.cfi_offset x30, 1 * 8
mov x8, #__NR_rt_sigreturn
svc #0
.cfi_endproc
ENDPROC(__kernel_rt_sigreturn)

View File

@ -1,15 +0,0 @@
/* syscall.h COPYRIGHT FUJITSU LIMITED 2016 */
#ifndef __HEADER_ARM64_VDSO_SYSCALL_H
#define __HEADER_ARM64_VDSO_SYSCALL_H
#define DECLARATOR(number,name) .equ __NR_##name, number
#define SYSCALL_HANDLED(number,name) DECLARATOR(number,name)
#define SYSCALL_DELEGATED(number,name) DECLARATOR(number,name)
#include <syscall_list.h>
#undef DECLARATOR
#undef SYSCALL_HANDLED
#undef SYSCALL_DELEGATED
#endif /* !__HEADER_ARM64_VDSO_SYSCALL_H */

View File

@ -1,96 +0,0 @@
/* vdso.lds.S COPYRIGHT FUJITSU LIMITED 2016 */
/* @ref.impl arch/arm64/kernel/vdso/vdso.lds.S */
/*
* GNU linker script for the VDSO library.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
* Heavily based on the vDSO linker scripts for other archs.
*/
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
SECTIONS
{
PROVIDE(_tod_data = . - PAGE_SIZE);
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.note : { *(.note.*) } :text :note
. = ALIGN(16);
.text : { *(.text*) } :text =0xd503201f
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.dynamic : { *(.dynamic) } :text :dynamic
.rodata : { *(.rodata*) } :text
_end = .;
PROVIDE(end = .);
/DISCARD/ : {
*(.note.GNU-stack)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
}
}
/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
}
/*
* This controls what symbols we export from the DSO.
*/
VERSION
{
LINUX_2.6.39 {
global:
__kernel_rt_sigreturn;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
local: *;
};
}
/*
* Make the sigreturn code visible to the kernel.
*/
VDSO_sigtramp = __kernel_rt_sigreturn;

View File

@ -9,29 +9,29 @@ PHDRS
SECTIONS
{
. = SIZEOF_HEADERS;
. = ALIGN(4096);
. = ALIGN(4096);
.text : {
*(.text)
*(.text)
} :text
.data : {
*(.data)
*(.data.*)
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
*(.rodata .rodata.*)
} :data
. = ALIGN(8);
.bss : {
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
*(.eh_frame)
*(.note.gnu.build-id)
}
}
}

View File

@ -18,7 +18,7 @@ extern char data_start[], data_end[];
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
#define MAP_ST_START 0xffff800000000000UL
#define MAP_KERNEL_START 0xffffffff80000000UL
/* MAP_KERNEL_START is defined by cmake */
#define PTL4_SHIFT 39
#define PTL3_SHIFT 30

View File

@ -1,2 +1,2 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o coredump.o

View File

@ -1,8 +1,9 @@
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/* coredump.c COPYRIGHT FUJITSU LIMITED 2018-2019 */
#include <process.h>
#include <elfcore.h>
void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0)
void arch_fill_prstatus(struct elf_prstatus64 *prstatus,
struct thread *thread, void *regs0, int sig)
{
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
@ -18,8 +19,6 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread,
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
@ -28,6 +27,14 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread,
struct prstatus64_timeval pr_cstime;
*/
prstatus->pr_pid = thread->tid;
if (thread->proc->parent) {
prstatus->pr_ppid = thread->proc->parent->pid;
}
prstatus->pr_info.si_signo = sig;
prstatus->pr_cursig = sig;
prstatus->pr_reg[0] = _r15;
prstatus->pr_reg[1] = _r14;
prstatus->pr_reg[2] = _r13;
@ -56,4 +63,12 @@ void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread,
prstatus->pr_fpvalid = 0; /* We assume no fp */
}
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
void arch_fill_thread_core_info(struct note *head,
struct thread *thread, void *regs)
{
}
int arch_get_thread_core_info_size(void)
{
return 0;
}

View File

@ -1,3 +1,4 @@
/* cpu.c COPYRIGHT FUJITSU LIMITED 2018-2019 */
/**
* \file cpu.c
* License details are found in the file LICENSE.
@ -15,7 +16,6 @@
*/
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <ihk/mm.h>
#include <types.h>
#include <errno.h>
@ -31,6 +31,7 @@
#include <prctl.h>
#include <page.h>
#include <kmalloc.h>
#include <ihk/debug.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@ -43,11 +44,9 @@
#define LAPIC_ICR0 0x300
#define LAPIC_ICR2 0x310
#define LAPIC_ESR 0x280
#ifdef POSTK_DEBUG_ARCH_DEP_75 /* x86 depend hide */
#define LOCAL_TIMER_VECTOR 0xef
#define LOCAL_PERF_VECTOR 0xf0
#define LOCAL_SMP_FUNC_CALL_VECTOR 0xf1
#endif /* POSTK_DEBUG_ARCH_DEP_75 */
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
@ -69,11 +68,8 @@
//#define DEBUG_PRINT_CPU
#ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#endif
static void *lapic_vp;
@ -84,7 +80,11 @@ static void (*lapic_icr_write)(unsigned int h, unsigned int l);
static void (*lapic_wait_icr_idle)(void);
void (*x86_issue_ipi)(unsigned int apicid, unsigned int low);
int running_on_kvm(void);
static void smp_func_call_handler(void);
void smp_func_call_handler(void);
int ihk_mc_get_smp_handler_irq(void)
{
return LOCAL_SMP_FUNC_CALL_VECTOR;
}
void init_processors_local(int max_id);
void assign_processor_id(void);
@ -93,9 +93,10 @@ void x86_set_warm_reset(unsigned long ip, char *first_page_va);
void x86_init_perfctr(void);
int gettime_local_support = 0;
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
extern int kprintf(const char *format, ...);
extern int interrupt_from_user(void *);
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
static struct idt_entry{
uint32_t desc[4];
@ -148,7 +149,7 @@ void reload_idt(void)
}
static struct list_head handlers[256 - 32];
extern char nmi[];
extern char nmi_handler[];
extern char page_fault[], general_protection_exception[];
extern char debug_exception[], int3_exception[];
@ -175,7 +176,7 @@ static void init_idt(void)
set_idt_entry(i, generic_common_handlers[i]);
}
set_idt_entry(2, (uintptr_t)nmi);
set_idt_entry(2, (uintptr_t)nmi_handler);
set_idt_entry(13, (unsigned long)general_protection_exception);
set_idt_entry(14, (unsigned long)page_fault);
@ -824,11 +825,14 @@ void call_ap_func(void (*next_func)(void))
next_func();
}
struct page_table *get_init_page_table(void);
void setup_x86_ap(void (*next_func)(void))
{
unsigned long rsp;
cpu_disable_interrupt();
ihk_mc_load_page_table(get_init_page_table());
assign_processor_id();
init_smp_processor();
@ -847,9 +851,6 @@ void setup_x86_ap(void (*next_func)(void))
}
void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long, void *, int);
void check_sig_pending();
extern void tlb_flush_handler(int vector);
void __show_stack(uintptr_t *sp) {
@ -871,16 +872,56 @@ void show_context_stack(uintptr_t *rbp) {
return;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void interrupt_exit(struct x86_user_context *regs)
{
if (interrupt_from_user(regs)) {
cpu_enable_interrupt();
check_sig_pending();
check_need_resched();
check_signal(0, regs, 0);
}
else {
check_sig_pending();
check_signal(0, regs, -1);
}
}
@ -888,11 +929,13 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
{
struct ihk_mc_interrupt_handler *h;
struct cpu_local_var *v = get_this_cpu_local_var();
int from_user = interrupt_from_user(regs);
lapic_ack();
++v->in_interrupt;
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(from_user ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
@ -954,6 +997,8 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
do_backlog();
}
else if (vector == LOCAL_PERF_VECTOR) {
struct siginfo info;
@ -1007,14 +1052,25 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
}
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs)? 0: 1);
set_cputime(from_user ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
--v->in_interrupt;
/* for migration by IPI */
if (v->flags & CPU_FLAG_NEED_MIGRATE) {
// Don't migrate on K2K schedule
if (from_user) {
schedule();
check_signal(0, regs, 0);
}
}
}
void gpe_handler(struct x86_user_context *regs)
{
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
@ -1023,7 +1079,8 @@ void gpe_handler(struct x86_user_context *regs)
}
set_signal(SIGSEGV, regs, NULL);
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs)? 0: 1);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
panic("GPF");
}
@ -1033,7 +1090,8 @@ void debug_handler(struct x86_user_context *regs)
int si_code = 0;
struct siginfo info;
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
#ifdef DEBUG_PRINT_CPU
kprintf("debug exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
@ -1052,14 +1110,16 @@ void debug_handler(struct x86_user_context *regs)
info.si_code = si_code;
set_signal(SIGTRAP, regs, &info);
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs)? 0: 1);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
void int3_handler(struct x86_user_context *regs)
{
struct siginfo info;
set_cputime(interrupt_from_user(regs)? 1: 2);
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
#ifdef DEBUG_PRINT_CPU
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
@ -1070,59 +1130,8 @@ void int3_handler(struct x86_user_context *regs)
info.si_code = TRAP_BRKPT;
set_signal(SIGTRAP, regs, &info);
interrupt_exit(regs);
set_cputime(interrupt_from_user(regs)? 0: 1);
}
void
unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
{
const uintptr_t address = (uintptr_t)fault_addr;
struct process_vm *vm = thread->vm;
struct vm_range *range;
unsigned long irqflags;
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
irqflags = kprintf_lock();
__kprintf("Page fault for 0x%lx\n", address);
__kprintf("%s for %s access in %s mode (reserved bit %s set), "
"it %s an instruction fetch\n",
(error & PF_PROT ? "protection fault" : "no page found"),
(error & PF_WRITE ? "write" : "read"),
(error & PF_USER ? "user" : "kernel"),
(error & PF_RSVD ? "was" : "wasn't"),
(error & PF_INSTR ? "was" : "wasn't"));
range = lookup_process_memory_range(vm, address, address+1);
if (range) {
__kprintf("address is in range, flag: 0x%lx\n",
range->flag);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
} else {
__kprintf("address is out of range! \n");
}
kprintf_unlock(irqflags);
/* TODO */
ihk_mc_debug_show_interrupt_context(regs);
if (!(error & PF_USER)) {
panic("panic: kernel mode PF");
}
//dkprintf("now dump a core file\n");
//coredump(proc, regs);
#ifdef DEBUG_PRINT_MEM
{
uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs);
kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n",
sp[0], sp[1], sp[2], sp[3]);
}
#endif
return;
set_cputime(interrupt_from_user(regs) ?
CPUTIME_MODE_K2U : CPUTIME_MODE_K2K_OUT);
}
static void outb(uint8_t v, uint16_t port)
@ -1176,6 +1185,17 @@ void cpu_halt(void)
asm volatile("hlt");
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
cpu_halt();
}
#endif
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@ -1244,6 +1264,24 @@ unsigned long cpu_disable_interrupt_save(void)
return flags;
}
unsigned long cpu_enable_interrupt_save(void)
{
unsigned long flags;
asm volatile("pushf; pop %0; sti" : "=r"(flags) : : "memory", "cc");
return flags;
}
int cpu_interrupt_disabled(void)
{
unsigned long flags;
asm volatile("pushf; pop %0" : "=r"(flags) : : "memory", "cc");
return !(flags & 0x200);
}
/*@
@ behavior valid_vector:
@ assumes 32 <= vector <= 255;
@ -1287,7 +1325,7 @@ void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *))
}
extern char trampoline_code_data[], trampoline_code_data_end[];
struct page_table *get_init_page_table(void);
struct page_table *get_boot_page_table(void);
unsigned long get_transit_page_table(void);
/* reusable, but not reentrant */
@ -1311,9 +1349,10 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
memcpy(p, trampoline_code_data,
trampoline_code_data_end - trampoline_code_data);
p[1] = (unsigned long)virt_to_phys(get_init_page_table());
p[1] = (unsigned long)virt_to_phys(get_boot_page_table());
p[2] = (unsigned long)setup_x86_ap;
p[3] = pc;
p[4] = (unsigned long)get_x86_cpu_local_kstack(cpuid);
p[6] = (unsigned long)get_transit_page_table();
if (!p[6]) {
p[6] = p[1];
@ -1431,13 +1470,11 @@ long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int
}
#endif /* POSTK_DEBUG_ARCH_DEP_42 */
#ifdef POSTK_DEBUG_ARCH_DEP_23 /* add arch dep. clone_thread() function */
void arch_clone_thread(struct thread *othread, unsigned long pc,
unsigned long sp, struct thread *nthread)
{
return;
}
#endif /* POSTK_DEBUG_ARCH_DEP_23 */
void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx)
{
@ -1541,7 +1578,8 @@ void arch_print_pre_interrupt_stack(const struct x86_basic_regs *regs) {
__print_stack(rbp, regs->rip);
}
void arch_print_stack() {
void arch_print_stack(void)
{
struct stack *rbp;
__kprintf("Approximative stack trace:\n");
@ -1551,6 +1589,16 @@ void arch_print_stack() {
__print_stack(rbp, 0);
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
return regs->rip;
}
#endif
/*@
@ requires \valid(reg);
@ assigns \nothing;
@ -1589,6 +1637,13 @@ return;
kprintf_unlock(irqflags);
}
void arch_cpu_stop(void)
{
while (1) {
cpu_halt();
}
}
/*@
@ behavior fs_base:
@ assumes type == IHK_ASR_X86_FS;
@ -1632,24 +1687,31 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
}
}
int ihk_mc_get_interrupt_id(int cpu)
{
return get_x86_cpu_local_variable(cpu)->apic_id;
}
/*@
@ requires \valid_apicid(cpu); // valid APIC ID or not
@ requires \valid_cpuid(cpu); // valid CPU logical ID
@ ensures \result == 0
@*/
int ihk_mc_interrupt_cpu(int cpu, int vector)
{
if (cpu < 0 || cpu >= num_processors) {
kprintf("%s: invalid CPU id: %d\n", __func__, cpu);
return -1;
}
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
x86_issue_ipi(cpu, vector);
x86_issue_ipi(get_x86_cpu_local_variable(cpu)->apic_id, vector);
return 0;
}
#ifdef POSTK_DEBUG_ARCH_DEP_22
extern void perf_start(struct mc_perf_event *event);
extern void perf_reset(struct mc_perf_event *event);
struct thread *arch_switch_context(struct thread *prev, struct thread *next)
{
struct thread *last;
struct mcs_rwlock_node_irqsave lock;
dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n",
ihk_mc_get_processor_id(), next->tlsblock_base);
@ -1657,6 +1719,7 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
/* Set up new TLS.. */
ihk_mc_init_user_tlsbase(next->uctx, next->tlsblock_base);
#ifdef ENABLE_PERF
/* Performance monitoring inherit */
if(next->proc->monitoring_event) {
if(next->proc->perf_status == PP_RESET)
@ -1666,9 +1729,10 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
perf_start(next->proc->monitoring_event);
}
}
#endif
#ifdef PROFILE_ENABLE
if (prev->profile && prev->profile_start_ts != 0) {
if (prev && prev->profile && prev->profile_start_ts != 0) {
prev->profile_elapsed_ts +=
(rdtsc() - prev->profile_start_ts);
prev->profile_start_ts = 0;
@ -1680,6 +1744,28 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
#endif
if (prev) {
mcs_rwlock_writer_lock(&prev->proc->update_lock, &lock);
if (prev->proc->status & (PS_DELAY_STOPPED | PS_DELAY_TRACED)) {
switch (prev->proc->status) {
case PS_DELAY_STOPPED:
prev->proc->status = PS_STOPPED;
break;
case PS_DELAY_TRACED:
prev->proc->status = PS_TRACED;
break;
default:
break;
}
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
/* Wake up the parent who tried wait4 and sleeping */
waitq_wakeup(&prev->proc->parent->waitpid_q);
} else {
mcs_rwlock_writer_unlock(&prev->proc->update_lock,
&lock);
}
last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev);
}
else {
@ -1687,7 +1773,6 @@ struct thread *arch_switch_context(struct thread *prev, struct thread *next)
}
return last;
}
#endif
/*@
@ requires \valid(thread);
@ -1720,7 +1805,7 @@ check_and_allocate_fp_regs(struct thread *thread)
if (!thread->fp_regs) {
kprintf("error: allocating fp_regs pages\n");
result = 1;
result = -ENOMEM;
goto out;
}
@ -1733,12 +1818,14 @@ out:
/*@
@ requires \valid(thread);
@*/
void
int
save_fp_regs(struct thread *thread)
{
if (check_and_allocate_fp_regs(thread) != 0) {
// alloc error
return;
int ret = 0;
ret = check_and_allocate_fp_regs(thread);
if (ret) {
goto out;
}
if (xsave_available) {
@ -1753,23 +1840,25 @@ save_fp_regs(struct thread *thread)
dkprintf("fp_regs for TID %d saved\n", thread->tid);
}
out:
return ret;
}
void copy_fp_regs(struct thread *from, struct thread *to)
int copy_fp_regs(struct thread *from, struct thread *to)
{
if ((from->fp_regs != NULL) && (check_and_allocate_fp_regs(to) == 0)) {
memcpy(to->fp_regs, from->fp_regs, sizeof(fp_regs_struct));
int ret = 0;
if (from->fp_regs != NULL) {
ret = check_and_allocate_fp_regs(to);
if (!ret) {
memcpy(to->fp_regs,
from->fp_regs,
sizeof(fp_regs_struct));
}
}
return ret;
}
#ifdef POSTK_DEBUG_TEMP_FIX_19
void
clear_fp_regs(struct thread *thread)
{
return;
}
#endif /* POSTK_DEBUG_TEMP_FIX_19 */
/*@
@ requires \valid(thread);
@ assigns thread->fp_regs;
@ -1777,8 +1866,11 @@ clear_fp_regs(struct thread *thread)
void
restore_fp_regs(struct thread *thread)
{
if (!thread->fp_regs)
if (!thread->fp_regs) {
// only clear fpregs.
clear_fp_regs();
return;
}
if (xsave_available) {
unsigned int low, high;
@ -1797,6 +1889,13 @@ restore_fp_regs(struct thread *thread)
//release_fp_regs(thread);
}
void clear_fp_regs(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
restore_fp_regs(&v->idle);
}
ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
{
ihk_mc_user_context_t *uctx = thread->uctx;
@ -1830,6 +1929,10 @@ ihk_mc_init_user_tlsbase(ihk_mc_user_context_t *ctx,
do_arch_prctl(ARCH_SET_FS, tls_base_addr);
}
void arch_flush_icache_all(void)
{
return;
}
/*@
@ assigns \nothing;
@ -1983,6 +2086,92 @@ mod_nmi_ctx(void *nmi_ctx, void (*func)())
l[i++] = 0x28; // KERNEL DS
}
void arch_save_panic_regs(void *irq_regs)
{
struct thread *current = cpu_local_var(current);
struct x86_user_context *regs =
(struct x86_user_context *)irq_regs;
struct x86_cpu_local_variables *x86v =
get_x86_cpu_local_variable(ihk_mc_get_processor_id());
struct segment_regs {
uint32_t rflags;
uint32_t cs;
uint32_t ss;
uint32_t ds;
uint32_t es;
uint32_t fs;
uint32_t gs;
} *sregs;
/* Kernel space? */
if (regs->gpr.rip > USER_END) {
x86v->panic_regs[0] = regs->gpr.rax;
x86v->panic_regs[1] = regs->gpr.rbx;
x86v->panic_regs[2] = regs->gpr.rcx;
x86v->panic_regs[3] = regs->gpr.rdx;
x86v->panic_regs[4] = regs->gpr.rsi;
x86v->panic_regs[5] = regs->gpr.rdi;
x86v->panic_regs[6] = regs->gpr.rbp;
x86v->panic_regs[7] = regs->gpr.rsp;
x86v->panic_regs[8] = regs->gpr.r8;
x86v->panic_regs[9] = regs->gpr.r9;
x86v->panic_regs[10] = regs->gpr.r10;
x86v->panic_regs[11] = regs->gpr.r11;
x86v->panic_regs[12] = regs->gpr.r12;
x86v->panic_regs[13] = regs->gpr.r13;
x86v->panic_regs[14] = regs->gpr.r14;
x86v->panic_regs[15] = regs->gpr.r15;
x86v->panic_regs[16] = regs->gpr.rip;
sregs = (struct segment_regs *)&x86v->panic_regs[17];
sregs->rflags = regs->gpr.rflags;
sregs->cs = regs->gpr.cs;
sregs->ss = regs->gpr.ss;
sregs->ds = regs->sr.ds;
sregs->es = regs->sr.es;
sregs->fs = regs->sr.fs;
sregs->gs = regs->sr.gs;
}
/* User-space, show kernel context */
else {
kprintf("%s: in user-space: %p\n", __func__, regs->gpr.rip);
x86v->panic_regs[0] = 0;
x86v->panic_regs[1] = current->ctx.rbx;
x86v->panic_regs[2] = 0;
x86v->panic_regs[3] = 0;
x86v->panic_regs[4] = current->ctx.rsi;
x86v->panic_regs[5] = current->ctx.rdi;
x86v->panic_regs[6] = current->ctx.rbp;
x86v->panic_regs[7] = current->ctx.rsp;
x86v->panic_regs[8] = 0;
x86v->panic_regs[9] = 0;
x86v->panic_regs[10] = 0;
x86v->panic_regs[11] = 0;
x86v->panic_regs[12] = regs->gpr.r12;
x86v->panic_regs[13] = regs->gpr.r13;
x86v->panic_regs[14] = regs->gpr.r14;
x86v->panic_regs[15] = regs->gpr.r15;
x86v->panic_regs[16] = (unsigned long)enter_user_mode;
sregs = (struct segment_regs *)&x86v->panic_regs[17];
sregs->rflags = regs->gpr.rflags;
sregs->cs = regs->gpr.cs;
sregs->ss = regs->gpr.ss;
sregs->ds = regs->sr.ds;
sregs->es = regs->sr.es;
sregs->fs = regs->sr.fs;
sregs->gs = regs->sr.gs;
}
x86v->paniced = 1;
}
void arch_clear_panic(void)
{
struct x86_cpu_local_variables *x86v =
get_x86_cpu_local_variable(ihk_mc_get_processor_id());
x86v->paniced = 0;
}
int arch_cpu_read_write_register(
struct ihk_os_cpu_register *desc,
enum mcctrl_os_cpu_operation op)
@ -2000,144 +2189,48 @@ int arch_cpu_read_write_register(
return 0;
}
/*
* Generic remote CPU function invocation facility.
*/
static void smp_func_call_handler(void)
extern int nmi_mode;
extern long freeze_thaw(void *nmi_ctx);
void multi_nm_interrupt_handler(void *irq_regs)
{
int irq_flags;
struct smp_func_call_request *req;
int reqs_left;
dkprintf("%s: ...\n", __func__);
switch (nmi_mode) {
case 1:
case 2:
/* mode == 1 or 2, for FREEZER NMI */
dkprintf("%s: freeze mode NMI catch. (nmi_mode=%d)\n",
__func__, nmi_mode);
freeze_thaw(NULL);
break;
reiterate:
req = NULL;
reqs_left = 0;
case 0:
/* mode == 0, for MEMDUMP NMI */
arch_save_panic_regs(irq_regs);
ihk_mc_query_mem_areas();
/* memdump-nmi is halted McKernel, break is unnecessary. */
/* fall through */
case 3:
/* mode == 3, for SHUTDOWN-WAIT NMI */
kprintf("%s: STOP\n", __func__);
while (nmi_mode != 4)
cpu_halt();
break;
irq_flags = ihk_mc_spinlock_lock(
&cpu_local_var(smp_func_req_lock));
/* Take requests one-by-one */
if (!list_empty(&cpu_local_var(smp_func_req_list))) {
req = list_first_entry(&cpu_local_var(smp_func_req_list),
struct smp_func_call_request, list);
list_del(&req->list);
reqs_left = !list_empty(&cpu_local_var(smp_func_req_list));
}
ihk_mc_spinlock_unlock(&cpu_local_var(smp_func_req_lock),
irq_flags);
if (req) {
req->ret = req->sfcd->func(req->cpu_index,
req->sfcd->nr_cpus, req->sfcd->arg);
ihk_atomic_dec(&req->sfcd->cpus_left);
}
if (reqs_left)
goto reiterate;
}
int smp_call_func(cpu_set_t *__cpu_set, smp_func_t __func, void *__arg)
{
int cpu, nr_cpus = 0;
int cpu_index = 0;
int this_cpu_index = 0;
struct smp_func_call_data sfcd;
struct smp_func_call_request *reqs;
int ret = 0;
int call_on_this_cpu = 0;
cpu_set_t cpu_set;
/* Sanity checks */
if (!__cpu_set || !__func) {
return -EINVAL;
}
/* Make sure it won't change in between */
cpu_set = *__cpu_set;
for_each_set_bit(cpu, (unsigned long *)&cpu_set,
sizeof(cpu_set) * BITS_PER_BYTE) {
if (cpu == ihk_mc_get_processor_id()) {
call_on_this_cpu = 1;
case 4:
/* mode == 4, continue NMI */
arch_clear_panic();
if (!ihk_mc_get_processor_id()) {
ihk_mc_clear_dump_page_completion();
}
++nr_cpus;
kprintf("%s: RESUME, nmi_mode: %d\n", __func__, nmi_mode);
break;
default:
ekprintf("%s: Unknown nmi-mode(%d) detected.\n",
__func__, nmi_mode);
break;
}
if (!nr_cpus) {
return -EINVAL;
}
reqs = kmalloc(sizeof(*reqs) * nr_cpus, IHK_MC_AP_NOWAIT);
if (!reqs) {
ret = -ENOMEM;
goto free_out;
}
sfcd.nr_cpus = nr_cpus;
sfcd.func = __func;
sfcd.arg = __arg;
ihk_atomic_set(&sfcd.cpus_left,
call_on_this_cpu ? nr_cpus - 1 : nr_cpus);
/* Add requests and send IPIs */
cpu_index = 0;
for_each_set_bit(cpu, (unsigned long *)&cpu_set,
sizeof(cpu_set) * BITS_PER_BYTE) {
unsigned long irq_flags;
reqs[cpu_index].cpu_index = cpu_index;
reqs[cpu_index].ret = 0;
if (cpu == ihk_mc_get_processor_id()) {
this_cpu_index = cpu_index;
++cpu_index;
continue;
}
reqs[cpu_index].sfcd = &sfcd;
irq_flags =
ihk_mc_spinlock_lock(&get_cpu_local_var(cpu)->smp_func_req_lock);
list_add_tail(&reqs[cpu_index].list,
&get_cpu_local_var(cpu)->smp_func_req_list);
ihk_mc_spinlock_unlock(&get_cpu_local_var(cpu)->smp_func_req_lock,
irq_flags);
ihk_mc_interrupt_cpu(
get_x86_cpu_local_variable(cpu)->apic_id,
LOCAL_SMP_FUNC_CALL_VECTOR);
++cpu_index;
}
/* Is this CPU involved? */
if (call_on_this_cpu) {
reqs[this_cpu_index].ret =
__func(this_cpu_index, nr_cpus, __arg);
}
/* Wait for the rest of the CPUs */
while (ihk_atomic_read(&sfcd.cpus_left) > 0) {
cpu_pause();
}
/* Check return values, if error, report the first non-zero */
for (cpu_index = 0; cpu_index < nr_cpus; ++cpu_index) {
if (reqs[cpu_index].ret != 0) {
ret = reqs[cpu_index].ret;
goto free_out;
}
}
ret = 0;
free_out:
kfree(reqs);
return ret;
}
/*** end of file ***/

View File

@ -64,12 +64,13 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return oldval;
}
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
static inline int futex_atomic_op_inuser(int encoded_op,
int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oparg = (encoded_op & 0x00fff000) >> 12;
int cmparg = encoded_op & 0xfff;
int oldval = 0, ret, tem;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
@ -128,12 +129,4 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
return ret;
}
static inline int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
*dest = *(volatile uint32_t *)from;
return 0;
}
#endif

View File

@ -6,6 +6,8 @@
#include <ihk/cpu.h>
#include <ihk/atomic.h>
#include <lwk/compiler.h>
#include "config.h"
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -14,18 +16,84 @@
int __kprintf(const char *format, ...);
#endif
typedef int ihk_spinlock_t;
typedef unsigned short __ticket_t;
typedef unsigned int __ticketpair_t;
typedef struct ihk_spinlock {
union {
__ticketpair_t head_tail;
struct __raw_tickets {
__ticket_t head, tail;
} tickets;
};
} ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
#define IHK_STATIC_SPINLOCK_FUNCS
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
static inline void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = 0;
lock->head_tail = 0;
}
#define SPIN_LOCK_UNLOCKED { .head_tail = 0 }
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock_noirq(l) { int rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock_noirq\n", ihk_mc_get_processor_id()); rc; \
}
#else
#define ihk_mc_spinlock_trylock_noirq __ihk_mc_spinlock_trylock_noirq
#endif
static inline int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
{
ihk_spinlock_t cur = { .head_tail = lock->head_tail };
ihk_spinlock_t next = { .tickets = {
.head = cur.tickets.head,
.tail = cur.tickets.tail + 2
} };
int success;
if (cur.tickets.head != cur.tickets.tail) {
return 0;
}
preempt_disable();
/* Use the same increment amount as other functions! */
success = __sync_bool_compare_and_swap((__ticketpair_t*)lock, cur.head_tail, next.head_tail);
if (!success) {
preempt_enable();
}
return success;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_trylock(l, result) ({ unsigned long rc; \
__kprintf("[%d] call ihk_mc_spinlock_trylock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_trylock(l, result); \
__kprintf("[%d] ret ihk_mc_spinlock_trylock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_trylock __ihk_mc_spinlock_trylock
#endif
static inline unsigned long __ihk_mc_spinlock_trylock(ihk_spinlock_t *lock,
int *result)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
*result = __ihk_mc_spinlock_trylock_noirq(lock);
return flags;
}
#define SPIN_LOCK_UNLOCKED 0
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
@ -37,42 +105,26 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
static inline void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
#if 0
asm volatile("lock ; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
"jmp 1b\n"
"2:"
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
#endif
register struct __raw_tickets inc = { .tail = 0x0002 };
preempt_disable();
asm volatile("lock; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (*lock), "=&r" (tmp)
:
: "memory", "cc");
asm volatile ("lock xaddl %0, %1\n"
: "+r" (inc), "+m" (*(lock)) : : "memory", "cc");
if (inc.head == inc.tail)
goto out;
for (;;) {
if (*((volatile __ticket_t *)&lock->tickets.head) == inc.tail)
goto out;
cpu_pause();
}
out:
barrier(); /* make sure nothing creeps before the lock is taken */
}
#ifdef DEBUG_SPINLOCK
@ -84,7 +136,7 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
static inline unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
@ -104,10 +156,13 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
static inline void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
__ticket_t inc = 0x0002;
asm volatile ("lock addw %1, %0\n"
: "+m" (lock->tickets.head) : "ri" (inc) : "memory", "cc");
preempt_enable();
}
@ -120,94 +175,14 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
#else
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
#endif
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
static inline void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock,
unsigned long flags)
{
__ihk_mc_spinlock_unlock_noirq(lock);
cpu_restore_interrupt(flags);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void __mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
(unsigned long)node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void __mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = (struct mcs_lock_node *)
atomic_cmpxchg8((unsigned long *)&lock->next,
(unsigned long)node, (unsigned long)0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
static void mcs_lock_lock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
preempt_disable();
__mcs_lock_lock(lock, node);
}
static void mcs_lock_unlock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
__mcs_lock_unlock(lock, node);
preempt_enable();
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
node->irqsave = cpu_disable_interrupt_save();
mcs_lock_lock_noirq(lock, node);
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
mcs_lock_unlock_noirq(lock, node);
cpu_restore_interrupt(node->irqsave);
}
#define SPINLOCK_IN_MCS_RWLOCK
// reader/writer lock
@ -223,14 +198,22 @@ typedef struct mcs_rwlock_node {
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
} __attribute__((aligned(64))) mcs_rwlock_node_t;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_t;
#else
} mcs_rwlock_node_t;
#endif
typedef struct mcs_rwlock_node_irqsave {
#ifndef SPINLOCK_IN_MCS_RWLOCK
struct mcs_rwlock_node node;
#endif
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_node_irqsave_t;
#else
} mcs_rwlock_node_irqsave_t;
#endif
typedef struct mcs_rwlock_lock {
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -239,9 +222,13 @@ typedef struct mcs_rwlock_lock {
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
#endif
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_rwlock_lock_t;
#else
} mcs_rwlock_lock_t;
#endif
static void
static inline void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -262,7 +249,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id());
#else
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
#endif
static void
static inline void
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -289,7 +276,7 @@ __mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_n
}
#ifndef SPINLOCK_IN_MCS_RWLOCK
static void
static inline void
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
{
struct mcs_rwlock_node *p;
@ -356,7 +343,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()
#else
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
#endif
static void
static inline void
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -416,7 +403,7 @@ atomic_inc_ifnot0(ihk_atomic_t *v)
return old;
}
static void
static inline void
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -482,7 +469,7 @@ __kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()
#else
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
#endif
static void
static inline void
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -529,7 +516,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
#endif
static void
static inline void
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -549,7 +536,7 @@ __kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
#endif
static void
static inline void
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -569,7 +556,7 @@ __kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
#endif
static void
static inline void
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -589,7 +576,7 @@ __kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
#else
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
#endif
static void
static inline void
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
@ -600,4 +587,95 @@ __mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_
#endif
}
static inline int irqflags_can_interrupt(unsigned long flags)
{
return !!(flags & 0x200);
}
struct ihk_rwlock {
union {
long lock;
struct {
unsigned int read;
int write;
};
} lock;
};
static inline void ihk_mc_rwlock_init(struct ihk_rwlock *rw)
{
rw->lock.read = 0;
rw->lock.write = 1;
}
static inline void ihk_mc_read_lock(struct ihk_rwlock *rw)
{
asm volatile("1:\t"
"lock; decq %0\n\t"
"jns 3f\n\t"
"lock incq %0\n\t"
"2:\t"
"pause\n\t"
"cmpq $0x1, %0\n\t"
"jns 1b\n\t"
"jmp 2b\n\t"
"3:"
: "+m" (rw->lock.lock) : : "memory");
}
static inline void ihk_mc_write_lock(struct ihk_rwlock *rw)
{
asm volatile("1:\t"
"lock; decl %0\n\t"
"je 3f\n\t"
"lock; incl %0\n\t"
"2:\t"
"pause\n\t"
"cmpl $0x1,%0\n\t"
"je 1b\n\t"
"jmp 2b\n\t"
"3:"
: "+m" (rw->lock.write) : "i" (((1L) << 32)) : "memory");
}
static inline int ihk_mc_read_trylock(struct ihk_rwlock *rw)
{
ihk_atomic64_t *count = (ihk_atomic64_t *)rw;
if (ihk_atomic64_sub_return(1, count) >= 0)
return 1;
ihk_atomic64_inc(count);
return 0;
}
static inline int ihk_mc_write_trylock(struct ihk_rwlock *rw)
{
ihk_atomic_t *count = (ihk_atomic_t *)&rw->lock.write;
if (ihk_atomic_dec_and_test(count))
return 1;
ihk_atomic_inc(count);
return 0;
}
static inline void ihk_mc_read_unlock(struct ihk_rwlock *rw)
{
asm volatile("lock; incq %0" : "+m" (rw->lock.lock) : : "memory");
}
static inline void ihk_mc_write_unlock(struct ihk_rwlock *rw)
{
asm volatile("lock; incl %0"
: "+m" (rw->lock.write) : "i" (((1L) << 32)) : "memory");
}
static inline int ihk_mc_write_can_lock(struct ihk_rwlock *rw)
{
return rw->lock.write == 1;
}
static inline int ihk_mc_read_can_lock(struct ihk_rwlock *rw)
{
return rw->lock.lock > 0;
}
#endif

View File

@ -1,3 +1,4 @@
/* arch-memory.h COPYRIGHT FUJITSU LIMITED 2018 */
/**
* \file arch-memomry.h
* License details are found in the file LICENSE.
@ -16,7 +17,9 @@
#define __HEADER_X86_COMMON_ARCH_MEMORY_H
#include <ihk/types.h>
#include <errno.h>
struct memobj;
#define KERNEL_CS_ENTRY 4
#define KERNEL_DS_ENTRY 5
#define USER_CS_ENTRY 6
@ -40,18 +43,37 @@
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
#define USER_END 0x0000800000000000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
#define USER_END 0x0000800000000000UL
#define LD_TASK_UNMAPPED_BASE 0x0000155555500000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
/*
* Canonical negative addresses (i.e., the smallest kernel virtual address)
* on x86 64 bit mode (in its most restricted 48 bit format) starts from
* 0xffff800000000000, but Linux starts mapping physical memory at 0xffff880000000000.
* The 0x80000000000 long gap (8TBs, i.e., 16 PGD level entries in the page tables)
* is used for Xen hyervisor (see arch/x86/include/asm/page.h) and that is
* what we utilize for McKernel.
* This gives us the benefit of being able to use Linux kernel virtual
* addresses identically as in Linux.
*
* NOTE: update these also in eclair.c when modified!
*/
#define MAP_ST_START 0xffff800000000000UL
#define MAP_VMAP_START 0xfffff00000000000UL
#define MAP_FIXED_START 0xffffffff70000000UL
#define MAP_KERNEL_START 0xffffffff80000000UL
#define MAP_VMAP_START 0xffff850000000000UL
#define MAP_FIXED_START 0xffff860000000000UL
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
/*
* MAP_KERNEL_START is 8MB below MODULES_END in Linux.
* Placing the LWK image in the virtual address space at the end of
* the Linux modules section enables us to map the LWK TEXT in Linux
* as well, so that Linux can also call into LWK text.
* It's defined by cmake.
*/
#define STACK_TOP(region) ((region)->user_end)
#define MAP_VMAP_SIZE 0x0000000100000000UL
#define KERNEL_PHYS_OFFSET MAP_ST_START
#define PTL4_SHIFT 39
#define PTL4_SIZE (1UL << PTL4_SHIFT)
#define PTL3_SHIFT 30
@ -142,6 +164,10 @@ typedef unsigned long pte_t;
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
#define USER_STACK_PREPAGE_SIZE LARGE_PAGE_SIZE
#define USER_STACK_PAGE_MASK LARGE_PAGE_MASK
#define USER_STACK_PAGE_P2ALIGN LARGE_PAGE_P2ALIGN
#define USER_STACK_PAGE_SHIFT LARGE_PAGE_SHIFT
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
@ -157,14 +183,12 @@ enum ihk_mc_pt_attribute {
PTATTR_WRITE_COMBINED = 0x40000,
};
enum ihk_mc_pt_attribute attr_mask;
extern enum ihk_mc_pt_attribute attr_mask;
#ifdef POSTK_DEBUG_ARCH_DEP_12
static inline int pfn_is_write_combined(uintptr_t pfn)
{
return ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD));
}
#endif /* #ifdef POSTK_DEBUG_ARCH_DEP_12 */
static inline int pte_is_null(pte_t *ptep)
{
@ -314,7 +338,102 @@ static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
return;
}
static inline int pte_is_contiguous(pte_t *ptep)
{
return 0;
}
static inline int pgsize_is_contiguous(size_t pgsize)
{
return 0;
}
static inline int pgsize_to_tbllv(size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return 1;
case PTL2_SIZE: return 2;
case PTL3_SIZE: return 3;
case PTL4_SIZE: return 4;
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pgsize_to_tbllv");
#else
return 0;
#endif
}
return 0;
}
static inline int pgsize_to_pgshift(size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return PTL1_SHIFT;
case PTL2_SIZE: return PTL2_SHIFT;
case PTL3_SIZE: return PTL3_SHIFT;
case PTL4_SIZE: return PTL4_SHIFT;
default: return -EINVAL;
}
}
static inline size_t tbllv_to_pgsize(int level)
{
switch (level) {
case 1: return PTL1_SIZE;
case 2: return PTL2_SIZE;
case 3: return PTL3_SIZE;
case 4: return PTL4_SIZE;
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("tbllv_to_pgsize");
#else
return 0;
#endif
}
return 0;
}
static inline size_t tbllv_to_contpgsize(int level)
{
return 0;
}
static inline int tbllv_to_contpgshift(int level)
{
return 0;
}
static inline pte_t *get_contiguous_head(pte_t *__ptep, size_t __pgsize)
{
return __ptep;
}
static inline pte_t *get_contiguous_tail(pte_t *__ptep, size_t __pgsize)
{
return __ptep;
}
int split_contiguous_pages(pte_t *ptep, size_t pgsize,
uint32_t memobj_flags);
static inline int page_is_contiguous_head(pte_t *ptep, size_t pgsize)
{
return 0;
}
static inline int page_is_contiguous_tail(pte_t *ptep, size_t pgsize)
{
return 0;
}
struct page_table;
static inline void arch_adjust_allocate_page_size(struct page_table *pt,
uintptr_t fault_addr,
pte_t *ptep,
void **pgaddrp,
size_t *pgsizep)
{
}
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
@ -331,4 +450,12 @@ extern unsigned long ap_trampoline;
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#ifdef ENABLE_FUGAKU_HACKS
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#endif /* !__ASSEMBLY__ */
#endif
#endif

View File

@ -13,19 +13,17 @@
#ifndef ARCH_CPU_H
#define ARCH_CPU_H
#define mb() asm volatile("mfence":::"memory")
#define rmb() asm volatile("lfence":::"memory")
#define wmb() asm volatile("sfence" ::: "memory")
#define smp_mb() mb()
#define smp_rmb() rmb()
#define smp_wmb() barrier()
#define arch_barrier() asm volatile("" : : : "memory")
static inline void rmb(void)
{
arch_barrier();
}
static inline void wmb(void)
{
arch_barrier();
}
static unsigned long read_tsc(void)
static inline unsigned long read_tsc(void)
{
unsigned int low, high;
@ -34,4 +32,21 @@ static unsigned long read_tsc(void)
return (low | ((unsigned long)high << 32));
}
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
})
#define smp_store_release(p, v) \
({ \
compiletime_assert_atomic_type(*p); \
barrier(); \
WRITE_ONCE(*p, v); \
})
void arch_flush_icache_all(void);
#endif /* ARCH_CPU_H */

View File

@ -1,32 +0,0 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#define DEBUG_RUSAGE
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
extern struct rusage_global *rusage;
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case PTL2_SIZE:
ret = IHK_OS_PGSIZE_2MB;
break;
case PTL3_SIZE:
ret = IHK_OS_PGSIZE_1GB;
break;
default:
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -50,7 +50,12 @@ struct x86_cpu_local_variables {
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);
#ifdef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#define KERNEL_STACK_SIZE LOCALS_SPAN
#endif
#endif

View File

@ -1,4 +1,4 @@
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/* elf.h COPYRIGHT FUJITSU LIMITED 2018 */
#ifndef __HEADER_X86_COMMON_ELF_H
#define __HEADER_X86_COMMON_ELF_H
@ -56,4 +56,3 @@ struct user_regs64_struct
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
#endif /* __HEADER_S64FX_COMMON_ELF_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,94 +0,0 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/*
* Structures and definitions for ELF core file.
* Extracted from
* System V Application Binary Interface - DRAFT - 10 June 2013,
* http://www.sco.com/developers/gabi/latest/contents.html
*/
typedef uint16_t Elf64_Half;
typedef uint32_t Elf64_Word;
typedef uint64_t Elf64_Xword;
typedef uint64_t Elf64_Addr;
typedef uint64_t Elf64_Off;
#define EI_NIDENT 16
typedef struct {
unsigned char e_ident[EI_NIDENT];
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry;
Elf64_Off e_phoff;
Elf64_Off e_shoff;
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr;
#define EI_MAG0 0
#define EI_MAG1 1
#define EI_MAG2 2
#define EI_MAG3 3
#define EI_CLASS 4
#define EI_DATA 5
#define EI_VERSION 6
#define EI_OSABI 7
#define EI_ABIVERSION 8
#define EI_PAD 9
#define ELFMAG0 0x7f
#define ELFMAG1 'E'
#define ELFMAG2 'L'
#define ELFMAG3 'F'
#define ELFCLASS64 2 /* 64-bit object */
#define ELFDATA2LSB 1 /* LSB */
#define El_VERSION 1 /* defined to be the same as EV CURRENT */
#define ELFOSABI_NONE 0 /* unspecied */
#define El_ABIVERSION_NONE 0 /* unspecied */
#define ET_CORE 4 /* Core file */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_K10M 181 /* Intel K10M */
#define EV_CURRENT 1 /* Current version */
typedef struct {
Elf64_Word p_type;
Elf64_Word p_flags;
Elf64_Off p_offset;
Elf64_Addr p_vaddr;
Elf64_Addr p_paddr;
Elf64_Xword p_filesz;
Elf64_Xword p_memsz;
Elf64_Xword p_align;
} Elf64_Phdr;
#define PT_LOAD 1
#define PT_NOTE 4
#define PF_X 1 /* executable bit */
#define PF_W 2 /* writable bit */
#define PF_R 4 /* readable bit */
struct note {
Elf64_Word namesz;
Elf64_Word descsz;
Elf64_Word type;
/* name char[namesz] and desc[descsz] */
};
#define NT_PRSTATUS 1
#define NT_PRFRPREG 2
#define NT_PRPSINFO 3
#define NT_AUXV 6
#define NT_X86_STATE 0x202
#include "elfcoregpl.h"
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,96 +0,0 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/*
* Structures and defines from GPLed file.
*/
#define pid_t int
/* From /usr/include/linux/elfcore.h of Linux */
#define ELF_PRARGSZ (80)
/* From /usr/include/linux/elfcore.h fro Linux */
struct elf_siginfo
{
int si_signo;
int si_code;
int si_errno;
};
/* From bfd/hosts/x86-64linux.h of gdb. */
typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t;
typedef a8_uint64_t elf_greg64_t;
struct user_regs64_struct
{
a8_uint64_t r15;
a8_uint64_t r14;
a8_uint64_t r13;
a8_uint64_t r12;
a8_uint64_t rbp;
a8_uint64_t rbx;
a8_uint64_t r11;
a8_uint64_t r10;
a8_uint64_t r9;
a8_uint64_t r8;
a8_uint64_t rax;
a8_uint64_t rcx;
a8_uint64_t rdx;
a8_uint64_t rsi;
a8_uint64_t rdi;
a8_uint64_t orig_rax;
a8_uint64_t rip;
a8_uint64_t cs;
a8_uint64_t eflags;
a8_uint64_t rsp;
a8_uint64_t ss;
a8_uint64_t fs_base;
a8_uint64_t gs_base;
a8_uint64_t ds;
a8_uint64_t es;
a8_uint64_t fs;
a8_uint64_t gs;
};
#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t))
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
struct prstatus64_timeval
{
a8_uint64_t tv_sec;
a8_uint64_t tv_usec;
};
struct elf_prstatus64
{
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
elf_gregset64_t pr_reg;
int pr_fpvalid;
};
struct elf_prpsinfo64
{
char pr_state;
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_pid, pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -1,5 +1,4 @@
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifdef POSTK_DEBUG_ARCH_DEP_65
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017-2018 */
#ifndef _UAPI__ASM_HWCAP_H
#define _UAPI__ASM_HWCAP_H
@ -9,4 +8,3 @@ static unsigned long arch_get_hwcap(void)
}
#endif /* _UAPI__ASM_HWCAP_H */
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -13,6 +13,8 @@
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
#define HEADER_X86_COMMON_IHK_ATOMIC_H
#include <lwk/compiler.h>
/***********************************************************************
* ihk_atomic_t
*/
@ -114,7 +116,7 @@ static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
static inline void ihk_atomic64_set(ihk_atomic64_t *v, long i)
{
v->counter64 = i;
}
@ -124,6 +126,22 @@ static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
asm volatile ("lock incq %0" : "+m"(v->counter64));
}
static inline long ihk_atomic64_add_return(long i, ihk_atomic64_t *v)
{
long __i;
__i = i;
asm volatile("lock xaddq %0, %1"
: "+r" (i), "+m" (v->counter64)
: : "memory");
return i + __i;
}
static inline long ihk_atomic64_sub_return(long i, ihk_atomic64_t *v)
{
return ihk_atomic64_add_return(-i, v);
}
/***********************************************************************
* others
*/
@ -156,43 +174,55 @@ static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
return __x;
}
#define __xchg(x, ptr, size) \
({ \
__typeof(*(ptr)) __x = (x); \
switch (size) { \
case 1: \
asm volatile("xchgb %b0,%1" \
: "=q" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 2: \
asm volatile("xchgw %w0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 4: \
asm volatile("xchgl %k0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 8: \
asm volatile("xchgq %0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
default: \
panic("xchg for wrong size"); \
} \
__x; \
})
#define __X86_CASE_B 1
#define __X86_CASE_W 2
#define __X86_CASE_L 4
#define __X86_CASE_Q 8
extern void __xchg_wrong_size(void)
__compiletime_error("Bad argument size for xchg");
#define xchg(ptr, v) \
__xchg((v), (ptr), sizeof(*ptr))
/*
* An exchange-type operation, which takes a value and a pointer, and
* returns the old value.
*/
#define __xchg_op(ptr, arg, op, lock) \
({ \
__typeof__(*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
asm volatile (lock #op "b %b0, %1\n" \
: "+q" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
asm volatile (lock #op "w %w0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
asm volatile (lock #op "l %0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
asm volatile (lock #op "q %q0, %1\n" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
default: \
__xchg_wrong_size(); \
} \
__ret; \
})
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
* Since this is generally used to protect other memory information, we
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around.
*/
#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
unsigned long oldval,
@ -241,4 +271,66 @@ static inline unsigned long ihk_atomic_add_long_return(long i, long *v) {
return i + __i;
}
extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile uint8_t *__ptr = (volatile uint8_t *)(ptr);\
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile uint16_t *__ptr = (volatile uint16_t *)(ptr);\
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile uint32_t *__ptr = (volatile uint32_t *)(ptr);\
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile uint64_t *__ptr = (volatile uint64_t *)(ptr);\
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#endif

View File

@ -1,3 +1,4 @@
/* types.h COPYRIGHT FUJITSU LIMITED 2018 */
/**
* \file types.h
* Licence details are found in the file LICENSE.
@ -29,13 +30,11 @@ typedef uint64_t size_t;
typedef int64_t ssize_t;
typedef int64_t off_t;
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
#define NULL ((void *)0)

View File

@ -9,6 +9,9 @@
#ifndef __ARCH_PRCTL_H
#define __ARCH_PRCTL_H
#define PR_SET_THP_DISABLE 41
#define PR_GET_THP_DISABLE 42
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003

Some files were not shown because too many files have changed in this diff Show More