Compare commits

...

134 Commits

Author SHA1 Message Date
a82d161be8 prerelease: 0.93: investigate smp_ihk_os_panic_notifier
Change-Id: I997b41f80038603261de2e8232b6b8ca200cd8cd
2021-02-09 21:39:49 -05:00
7152269a59 spec: create one rpm including .ko and binaries
Don't use kernel_module_package not to create a separate
kmod-mckernel-*.rpm containing .ko files.

Change-Id: I25b7ff662476bfc735d319b57cdf2da82f2c6aa7
2021-02-09 20:55:38 -05:00
31c08bcb7d spec, docs: update cmake options
Change-Id: Ib8277413a413b5ce956a48f7e3d9922311937ea8
2021-02-09 20:55:38 -05:00
dffb0918a2 docs: add capstone installation options
Change-Id: I96aa9a6405c17f8d9653f3d3894f0e71a57ab460
2021-02-09 06:10:32 +00:00
23cd14af7d __mcctrl_os_read_write_cpu_register: timeout in 1 sec for when McKernel can't respond
Change-Id: Ia2d5f64e107697dda1f3bae499eb3afb8a7aedba
2021-02-09 06:09:11 +00:00
a5cf2019bc cmake: fix detection of Fugaku native compilation
Change-Id: I4210e9b57223c3869464caea10c2d414e9484e14
2021-02-09 06:06:13 +00:00
11b9fe0377 page_fault_handler: fix missing increment of in_page_fault on SEGV
This integrates some of the changes of the following commit:
1cf0bd5a ("TO RESET: add debug instruments, map Linux areas for tofu")

Change-Id: Iffd8432d5a7b35f20bd45829a125583a0363dbf0
2021-02-09 00:56:15 -05:00
4905c8e638 mcexec: propagate error in __NR_gettid handler
Change-Id: I0e0f06199970fe839065567dcd5418d017b6ec00
2021-02-03 18:53:33 -05:00
3d71c6a8eb mcexec_transfer_image(): map exact size of remote memory (instead of forcing PAGE_SIZE)
Change-Id: Ic66770af6cdb15b7a2e18a08cbcd1736e5558bdf
2021-02-03 18:53:33 -05:00
1cea75dd51 mcexec: fix strncat missing NULL and pclose of uninitialized
Change-Id: I9ce4004580845a983949caa5668b2f950880cd24
2021-02-02 01:51:57 +00:00
661ba0ce4a docs: add editing spec file when building rpm
Change-Id: Ic8dc9d8c6aef6d2180844891d743a09f4a3bdd9d
2021-01-29 01:23:35 +00:00
7e82adc761 prerelease: 0.92: fix uninitialized usrdata->cpu_topology_list
Change-Id: Ia12970bda1225898823a67c2d0461144fc62ebb9
2021-01-29 09:50:53 +09:00
1f9fbe82db mcctrl: fix access to uninitialized usrdata->cpu_topology_list
Change-Id: I25a9182b9b470bb069f4f755a67fb50b88817cd2
2021-01-29 09:34:24 +09:00
aa3d4ba7bd spec: prerelease 0.91 for 4.18.0-240.8.1.el8_3.aarch64 support
Change-Id: I8b33714157b1c68c1fc1eadf0b9d072a3ee59608
2021-01-26 02:34:35 -05:00
c89ac042f9 spec: prerelease 0.9 for testing hidos and cgroup check
Change-Id: I3b04fbf3a1ffa10df9c76da7b2730b9a2521bf98
2021-01-20 13:03:16 +09:00
0f1fc88ce9 spec: prerelease 0.8 for testing hidos and cgroup check
Change-Id: I6261380ab8e99d39191cbd8aac851038cdeb5ce2
2021-01-19 17:34:45 +09:00
bbc6565e7e docs: users: add how to specify boot parameters with Fujitsu TCS
Change-Id: I0216603388780d0e5497373598c3151812238932
2021-01-19 04:03:05 +00:00
1a29f8213f spec: prerelease 0.7 for testing hidos and cgroup check
Change-Id: I17f1608051a8f8ca33d2ba7385b75b8b492d1886
2021-01-19 12:25:06 +09:00
fd21fe7411 copy_user_ranges: copy straight_start of struct vm_range
This fixes the panic in ihk_os_set_ikc_map01 of the ihklib test suite.

Change-Id: Ic03efc81c5ca2c4deaeb06673afef8cef7a1cf92
2021-01-19 00:59:46 +00:00
2460228052 mcctrl: abort on invalid addr in mcexec_transfer_image()
Change-Id: Ic064b6ffc30368ff1d3dfb14403e524cbb837ce5
2021-01-19 00:55:20 +00:00
bf926f234a Tofu: manage stag ranges in VM range split and misc cleanup
Conflicts:
	kernel/process.c

Change-Id: I480850fe93a7963a5bd4d1687fb1e5c43f58057f
2021-01-19 00:55:20 +00:00
507b937509 Tofu: mcctrl side MMU notifier and CQ/BCH cleanup
Conflicts:
	executer/kernel/mcctrl/arch/arm64/archdeps.c
	executer/kernel/mcctrl/syscall.c

Change-Id: Ided8172331a5469c6ced68fa98a42302812efe71
2021-01-19 00:55:20 +00:00
a99cf99396 cmake: add switch to turn on/off krm workaround
Change-Id: I2dfd3d7f3373cce714247f9fc36bf5040a2a8fad
2021-01-19 00:52:53 +00:00
6f373186bf docs: add specifications of IHK and McKernel
Change-Id: I523ad68c5627ca1081c0c8684606a08101982ec9
2021-01-18 08:24:37 +00:00
6667321dc1 spec: prerelease 0.6 for testing capped best-effort memory reservation
Change-Id: Iaa91b311ee6879e84ce862aeabb4bd1fcd95d35f
2021-01-07 11:14:22 +09:00
f849745b60 spec: prerelease 0.5 for testing capped best-effort memory reservation
Change-Id: I139d6e24fbadb7313116029005e115053f31a899
2021-01-07 10:56:27 +09:00
78bc06d998 cmake: set default value of ENABLE_FUGAKU_DEBUG to OFF
Change-Id: I70703410922aa1d1440d61ead6e225d92cf60003
2021-01-07 10:42:36 +09:00
d726bd3d11 profile: fix definition of PROFILE_ENABLE and __NR_profile
Change-Id: I3f9f5870f8380d3668e1ccb06fd0f6d3307e3fa4
2021-01-06 01:03:17 +00:00
df37d6867f docs: add scheduling limitations
Change-Id: Ida4a16efa4d47f448da7417a3b4bdb5fb5304fcd
2021-01-06 09:58:38 +09:00
a4b5410d0c docs: add mlockall/munlockall limitations
Change-Id: I01d1c4eb6955baee89f6827748ac8ce4082884da
2021-01-04 12:57:32 +09:00
d73e6a161c spec: prerelease 0.4 for testing capped best-effort memory reservation
Change-Id: Iec35ea1b7fa6b8930153461c395675f1576042ba
2020-12-29 17:12:14 +09:00
67334b65c3 rus_vm_fault: vmf_insert_pfn: treat VM_FAULT_NOPAGE as success
vmf_insert_pfn is added with the following commit.
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396

Refer to the following page for the meaning of VM_FAULT_NOPAGE.
https://lwn.net/Articles/242237/

Change-Id: I2b0144a20a57c74e0e2e0d2fc24281852f49b717
2020-12-29 16:31:41 +09:00
fe3992a3a2 cmake: add switch to turn on/off Fugaku debug modifications
To prevent "TO RESET: send SIGSTOP instead of SIGV in PF" from making
some tests expecting SIGSEGV fail.

Change-Id: I8bb111cff59fe5b0b2bf6bc652dfd2fa308321ed
2020-12-29 16:31:41 +09:00
5d58100c20 cmake: add switch to turn on/off Fugaku hacks
Change-Id: I2a1ac906a19c4e45ee62acdbf0bc6f77f61974f8
2020-12-29 16:31:41 +09:00
1b106d825c Tofu: fix phys addr calculation for contiguous pages in MBPT/BCH update
Change-Id: I70def9d02bdd7e1e969dedfc277a20df6ed2dff8
2020-12-29 16:31:41 +09:00
a680395093 Tofu: kmalloc cache for stag range
Change-Id: Ib5ea12c7c8cdafa7b699308c4eeb6e9ab39905c7
2020-12-29 16:31:41 +09:00
fd5a1c4b0a TO RESET: send SIGSTOP instead of SIGV in PF
Change-Id: I5f7e07cb89f5f38b7c631d838f0eee0a2a98e246
2020-12-29 16:31:40 +09:00
b3b1883ad8 eclair: turn off gdb pagination by default
Change-Id: I7758d97b90705310bc57cb9b6da6f6af436ea7fb
2020-12-29 16:31:40 +09:00
7145c4d383 TO RESET: stack changes
Change-Id: I325420701dfa5e9eac294be086a9d1e7326d95bc
2020-12-29 16:31:40 +09:00
0b82c8942b Tofu: keep track of stags per memory range
Change-Id: I033beaeee3b141dab4485dd3a2a3848eaa84e54e
2020-12-29 16:31:40 +09:00
75694152f0 Tofu: match page sizes to MBPT and fault PTEs if not present
Change-Id: Ia7aa92005a9941d6399063fec9a0776e73fc88fe
2020-12-29 16:31:40 +09:00
1cf0bd5a78 TO RESET: add debug instruments, map Linux areas for tofu
Change-Id: I09880cad3b87182cb663d414041254817c254759
2020-12-29 16:31:39 +09:00
25943634e9 TO RESET: do_mmap: show debug message when profile is turned on
Change-Id: I18f498f3a8660114b5e038e74179df95a645d232
2020-12-29 16:31:39 +09:00
72f95f92f8 TO RESET: hugefileobj: show debug messages
Change-Id: I904c811c13a59c0db74052bc92f6661a3e1b5d34
2020-12-29 16:31:39 +09:00
ab1014863d TO RESET: page_fault_handler: send SIGSTOP instead of SIGSEGV for debug
Change-Id: Ie281dbf43280464c8f412c8444a6861e43f28beb
2020-12-29 16:31:39 +09:00
4cd7051c2d TO RESET: setup_rt_frame: show debug message
Change-Id: I07d4f2dbba9bdb72f8a2892e6b5bd429b8e0aeec
2020-12-29 16:31:39 +09:00
d5716d3c3a TO RESET: mcctrl_get_request_os_cpu and __mcctrl_os_read_write_cpu_register: show debug messages
Change-Id: Ic8430e3fd6a814b888192233b029c942500a2dc9
2020-12-29 16:31:39 +09:00
2a984a12fe TO RESET: unhandled_page_fault: show instruction address
Change-Id: I29a8d30d9b3e5cfbe5e16b1faaa253e794b8fc5b
2020-12-29 16:31:38 +09:00
3949ab65a8 TO RESET: Add kernel argument to toggle on-demand paging for hugetlbfs map
Change-Id: Id748e0a2afc4ea59142fedb652a15b4007c5dee4
2020-12-29 16:31:33 +09:00
ed923ac82f TO RESET: hugefileobj: pre-allocate on mmap
Set this change to "TO RESET" because one of the Fujitsu tests fails.

Change-Id: Iddc30e8452b3d39da4975079d0c6a035e4f3dbde
2020-12-25 11:34:14 +09:00
191e6f7499 TO RESET: preempt_enable: check if no_preempt isn't negative
Change-Id: I1cef2077c50f3b3020870505dd065d10617f440e
2020-12-25 11:34:14 +09:00
4f7fd90300 TO RESET: lock: check if runq lock is held with IRQs disabled
Change-Id: I9a79ceaf9e399ad3695ed8959ca10c587591751a
2020-12-25 11:34:09 +09:00
8f2c8791bf TO RESET: arm64: enable interrupt on panic
Change-Id: I1ceb321de324f307fc82366b162c72f64184247b
2020-12-24 17:18:37 +09:00
bbfb296c26 TO RESET: mcreboot, mcstop+release.sh: add functions
Change-Id: Ic3992dc4e16b7ade00e93edbd107c64a32068c02
2020-12-24 16:53:27 +09:00
10b17e230c TO RESET: physical memory: free memory consistency checker
Change-Id: I15aa59bb81be4d8f2acfe8d161c8255f70f9e7d3
2020-12-24 16:53:12 +09:00
b268c28e7e TO RESET: mmap: ignore MAP_HUGETLB
Change-Id: Ifd50f24de0747b06d71ebba441ae2ef451f66c4d
2020-12-24 16:51:51 +09:00
2fa1c053d7 spec: prerelease 0.3 for testing ihk_reserve_mem and memory policy
Change-Id: I4fbcfa1f93522fd01af42d1ef13d0be075086773
2020-12-24 15:11:01 +09:00
530110e3a9 Tofu: fix ENABLE_TOFU switching
Change-Id: Ib33323d4b59ea8fb4f5f40dff7ea25a36773d5e2
2020-12-24 15:00:14 +09:00
f6ed44aeec spec: prerelease 0.2 for testing ihk_reserve_mem and memory policy
Change-Id: I9ff171c5d65b5f465ce7a2767be1a710de0a0400
2020-12-24 11:23:17 +09:00
33dd2e60b1 mcexec: memory policy control by environmental variable
Refs: #1470
Change-Id: I3d556cae90d31d81572b1c4e5c680e826577d428
2020-12-24 11:18:01 +09:00
ed670c03af spec: prerelease 0.1 for testing ihk_create_os_str
Change-Id: I3c9bbc6f3c9e8951c0ad700b9c02fcdec65018ff
2020-12-23 11:33:31 +09:00
e5f4a4e87d Tofu: proper cleanup of device files when mcexec gets killed
Change-Id: I6cb0290f72d96682700f945b29585e132e525ac1
2020-12-09 13:05:54 +09:00
1918df7765 Tofu: support for barrier gate, kmalloc cache
Change-Id: I6f4cfec2ec404efd03b332fc3f449a775816230e
2020-12-09 13:05:54 +09:00
5d784f3ea4 kernel: increase stack size
Change-Id: I27698149e9206138402dcc65db0078d5dbf548cb
2020-12-09 13:05:53 +09:00
10c09aa10e MM: generic lockless kmalloc and page cache
Change-Id: I71ad498fdd10136d9c72ffe2b16b9122d1bc9673
2020-12-09 13:05:53 +09:00
41f5c0bdde MM: deferred zero cleaning on Linux CPUs
Change-Id: Icdb8ac807688533be7a95b7101edfd904250cd02
2020-12-09 13:05:53 +09:00
e7b8aeb4f7 Tofu: per-fd path memory leak fix
Change-Id: I451472365806333adfac6dae32746195e3c30694
2020-12-09 13:05:53 +09:00
1b3dd45dbc MM: straight mapping memory leak fix
Change-Id: I7d841fbedb1db498b5994eb69b0350df7a5cefb0
2020-12-09 13:05:53 +09:00
623d6f8bc3 arm64: record register state at kernel mode page fault (for eclair)
Change-Id: I066bceecc0377110faaca0b21d45a476d000e684
2020-12-09 13:05:53 +09:00
92902d36fc Tofu: initial version
Change-Id: I9c464d5af883c18715a97ca9e9981cf73b260f90
2020-12-09 13:03:01 +09:00
fe83deb3db profile: make header user-space includable
Change-Id: I4a88d9be7c169f29ef6f6328e8576a3fe3b6e34f
2020-12-08 12:32:10 +09:00
e056cb799f memclear: non-temporal memory clean (arm64)
Change-Id: I8f80ff20e98bc01088450282e1790c27c67c16eb
2020-12-08 12:32:10 +09:00
201f5ce500 MM: straight mapping
Change-Id: I70871f8c382fb00aa719ed501cc5de436d916d7f
2020-12-08 12:32:10 +09:00
100bbe6231 MM: zero memory at free and deferred zero
Change-Id: Ib0055d6f2bdd10d05d749dcd1f3d5c3d318f22f3
2020-12-08 12:32:10 +09:00
fbd121d28c mmap: return -EINVAL for non-anonymous, MAP_HUGETLB map
Change-Id: I2bcbbf0ee9c0f47160eabac4a8d09991c71fe852
2020-12-07 15:23:38 +09:00
d1d93d90cc mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: I0ce1b2d48cda10713920cb88692e107b8c4d3bab
Refs: #929
2020-12-07 15:23:34 +09:00
45bc6a617a __return_syscall: check input & fix unmap memory in error cases
Change-Id: I5de3ab3acd46770518b79bdc6f1c2e00c1cd5096
2020-11-25 01:58:47 +00:00
924ba7fd65 mcctrl_ikc_send_wait: free desc only if we allocated it internally
Change-Id: I4710ea6bb31f098451347c53ac0ff0be422aec06
2020-11-25 01:58:47 +00:00
2814f7cac4 mcctrl_get_request_os_cpu: check os instance & ret_cpu
Change-Id: I4d3f6fd93eaa183d560c874ba33add83c4308c5a
2020-11-25 01:58:47 +00:00
b510de7bd5 mcctrl_perf_get: check os instance & cpu info
Change-Id: Ic4f9d818b7d58f8ae651e43175fb1c478baec9c1
2020-11-25 01:58:47 +00:00
3e927f61dc mcctrl_perf_disable: check os instance & cpu info
Change-Id: I7195272a65b31db72158f5e5bbfc490bac547b91
2020-11-25 01:58:47 +00:00
64579830dd mcctrl_perf_enable: check os instance & cpu info
Change-Id: I31ab829d63833f924af17445fd9b8488d6eb454f
2020-11-25 01:58:47 +00:00
3cc98883f5 delete_procfs_entries: fix possible crash if top entry has no children
Change-Id: I209842699615f9bb58c12ccd262ae4b17f8f558c
2020-11-25 01:58:47 +00:00
442045a320 mcctrl_ikc_send: validate os and check input packet
Change-Id: I1f8c2228043841685617b665eeeaf2ce15a08703
2020-11-25 01:58:47 +00:00
fe5d8fc71f mcctrl_getrusage: validate os input
Change-Id: I97908069f8bc4703b99f9ffca94f3dd33eb64cc4
2020-11-25 01:58:47 +00:00
550c6cc5fb mcctrl_perf_set : validate os input & check cpu info
Change-Id: If308013746ff6dce03fa8e0eb1ebaca1cb2a4a64
2020-11-25 01:58:47 +00:00
8c0b2ab6ce mcctrl_perf_num: check "os" argument
Change-Id: I13c8b0c337cac9bbb240667808e871defce34aab
2020-11-25 01:58:47 +00:00
239b1b265f release 1.7.0
Change-Id: I8413aa2d051c6164235816bae2823187870efe49
2020-11-25 10:51:40 +09:00
f646fd141b prerelase 0.96: ihk_reserve_mem: balanced, capped best effort
Change-Id: Ia98c87e651d8dd34dfd36bc0c45f1d23e245330d
2020-11-24 03:40:01 +00:00
734d1cc056 ihk submodule update: ihklib: ihk_create_os_str: add ihk_reserve_mem_conf equivalent
Change-Id: Iede1a043b0316d6541656e86091f2288fd299383
2020-11-24 03:40:01 +00:00
040a9c0c7f cmake: set QEMU_LD_PREFIX when cross-compiling
Change-Id: Ie7b86ddba344e02d6f739225e44f3ad4927f5a2f
2020-11-20 07:59:55 +00:00
8784ee4710 spec: prerelase 0.95 for testing /dev/mcosN related fix
Change-Id: I02397984cd5c4c3a3e83968ff03cf9a68e84d200
2020-09-07 16:12:09 +09:00
3a761c138e ihk submodule update: ihklib, ihkmond: fix /dev/mcosN related issues
Change-Id: I533b277f249dc4afc84929dd2bf22c19648e21d1
2020-09-07 16:11:36 +09:00
e21a3a5af3 spec: prerelase 0.94 for testing ihk_create_os_str
Change-Id: If30f6ccf269dbdbbd564498318b741a88d46a2a1
2020-09-04 12:04:01 +09:00
cd33c88025 ihk submodule update: ihklib: turn off debug messages
Change-Id: I9adc4843bd4e2d2606e0100f855c83b47a144863
2020-09-04 12:03:58 +09:00
d78a0fb74c docs: NEWS.rst: add 1.7.0-0.93 updates
Change-Id: If4f41f6d26c2da60711568f02444cf033d82a3d5
2020-09-02 01:47:20 +00:00
9f815324a4 spec: prerelase 0.93 for testing ihk_create_os_str
Change-Id: Id31646c88da0640a3d58e7805fa61f0e0583ff1c
2020-09-01 15:53:10 +09:00
2748f06c1f ihk submodule update: ihklib: add ihk_create_os_str
Change-Id: Ia219a4463562de3b9d94f8b57ba52ff19f07e721
2020-09-01 15:06:32 +09:00
a7f892113a spec: prerelase 0.92 for testing RHEL-8.3 compat
Change-Id: Ie4dbfb253aa3ddd384ed1ad481e87e5f0e042e03
2020-08-31 02:11:09 -04:00
89c696afc5 ihk submodule update: gic_chip_data: compat: RHEL-8.3
Change-Id: Ibdf67f012d66c01ed3f6a486624e6a32a42ba0e7
2020-08-31 02:04:34 -04:00
e17e86840b docs: switch to https://ihkmckernel.readthedocs.io and add contents
Change-Id: I9515034ac372dbe554e1010f646b382c5dc94458
2020-08-19 12:44:03 +09:00
0de6c6b8f9 spec: prerelase 0.91 for testing removal of mcexec -n option
Change-Id: I2b18b5fefec570bfb7a4aa0823fe97d9ea93e208
2020-08-12 13:12:06 +09:00
5ffad78b87 mcexec: use FLIB_NUM_PROCESS_ON_NODE when -n not specified (Fugaku specific)
Change-Id: I1668fecfac692d56076dd10e6e03fbf992e323ec
2020-08-12 07:30:11 +09:00
542418b1fc spec: prerelase 0.9 for testing libdwarf related package requirements
Change-Id: Iaaa116018505c4f89813883f5a99c8194cb4f99e
2020-07-29 12:22:08 +09:00
b95a2fcfab spec, README.md: fix libdwarf related package requirements
Change-Id: I460d440e33d0ff5e8ab3d4f7b328f7f2ea11bc16
2020-07-29 12:08:04 +09:00
1b11496f26 spec, README.md: add package dependency including libdwarf
Change-Id: Ie612c5dc642a9f5d6d2ba31747adb991cb568113
2020-07-22 06:59:37 +00:00
7c0e624b13 spec: prerelase 0.8 for testing mcexec -n issue
Change-Id: Ie54f7bc74097c8390f75ddbd0d6e58a8ea87ea7c
2020-07-21 13:31:45 +09:00
0b66bab992 Revert "mcexec: detect mismatch of mcexec -n and mpirun -ppn"
This reverts commit 1d135492c3.

Conflicts:
	executer/kernel/mcctrl/control.c

Change-Id: I224cced408aa4b77691a153c5e1d2fdf8043fa04
2020-07-21 13:08:21 +09:00
63ed4e7af0 spec: prerelase 0.7 for testing hugetlb map for stack
Change-Id: I4997340cd984ca8915e45749b91b1d72c1de85af
2020-07-20 08:11:40 +09:00
d7cf39883f Revert "shmobj: Support large page"
This reverts commit 9a60997ea0.

Change-Id: Id60959b4e03451987239faa0bbc2e780b72fafaa
2020-07-19 12:53:45 +00:00
40f8091fab stack: grow on page fault
The steps of the technique to replace stack with hugetlbfs map are as
follows:

(1) Prepare a hugetlbfs map with the size of rlim_cur
(2) Copy the active region of the stack to the hugetlbfs map.
    The range to copy is determined by reading /proc/[pid]/maps.
(3) Replace the stack map with the hugetlbfs map

The step (2) tries to copy a huge region if McKernel doesn't grow the
stack at run-time.

Change-Id: I5858c35b5c26dd0a42cccf9e3cc4c64b1a81f160
2020-07-19 12:53:31 +00:00
a20e1acf01 syscall: add prlimit64
Change-Id: Iad882813d54b439c236c0df74dc81508190e6707
2020-07-19 21:52:46 +09:00
b3d7bbda56 rus_vm_fault: compat: RHEL-8.2
This applies the following patch:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396
mm: change return type to vm_fault_t

Change-Id: I7189fc92824d21b4906f1033f1de5899bbad4680
2020-07-15 13:02:32 +09:00
9a60997ea0 shmobj: Support large page
Mixing page sizes is allowed by shmobj.

Change-Id: Ic48b71da2db6ce3f68fa3dbc8ad5ae96347d6018
Refs: #1381
Refs: #1458
2020-07-15 03:50:56 +00:00
4b66373813 mcexec: Don't forward SIGTSTP SIGTTIN SIGTTOUT to mckernel
Change-Id: I72bb74d6b98e1f0bf519c8f0fef742624a2a699a
Refs: #1425
2020-07-14 08:34:11 +00:00
b44b11ace7 set_robust_list: Add error check
set_robust_list is not supported by McKernel.

Change-Id: I1f679e2e4df24139cceb1f2294bc072cb7956002
Refs: 1399
2020-07-14 01:06:49 +00:00
ebc91cea0e tgkill: Fix argument validatation
Formerly, if tgid is specified as -1, tgkill() was equivalent to tkill().
Now it is treated as an error EINVAL.

Change-Id: I47bc75d439662a36dc6167c4446a5277422de507
Refs: 1380
2020-07-14 01:03:47 +00:00
58106d791a struct process: fix type of group_exit_status
Change-Id: Ib8492cbb077106cef1d0fa2d6d5e8e13bbb209c0
Refs: #1377
2020-07-13 08:33:07 +00:00
56b51d4f97 spec: prerelase 0.6 for testing cpuinfo and mmap overcommit
Change-Id: Iab5acc2c08ebe19251c37782cff87a4b5c914448
2020-07-13 10:14:23 +09:00
bafe540d86 mmap: allow unlimited overcommit
Change-Id: Iba07b5c504b4a202cd163ce682f3fc72a31284a0
2020-07-10 14:52:57 +09:00
d78a0fd05d sysinfo: support basic entries
Change-Id: I27f3e55058cc29f895831a1dddfafbc8585746a5
refs: #1389
2020-07-10 14:51:25 +09:00
999bc91b4f arch: Move some functions from arch-dependent to common part
Moved syscall rt_sigaction and functions related to signal.

Change-Id: I39f619e008d9c6018d91099a76dfb30e48757673
Refs: 1487
2020-07-10 03:54:28 +00:00
b3bd2ea9b3 procfs cpuinfo: use sequence number as processor
Change-Id: Id54ea74c5fda198a0bb9c9b6a19e6799fee0ed3f
2020-07-09 13:10:08 +09:00
d3d9e2400d test: ihklib: syscall_list.h: add robust marker for patch
Change-Id: Ie5f72b4b296db4d44e9839f38fd9a68854be78c3
2020-07-06 16:25:11 +09:00
199407b2a1 spec: prerelease 0.5 for testing ppoll
Change-Id: I51deb1c1703a986ba0aa4e02da9f53009554dbb7
2020-07-01 08:49:08 +09:00
5973d66e2d Revert "epoll_wait(): make sure to schedule in offload"
This reverts commit 5e44c9c9f9.

Change-Id: I826336f1ece31a84072c3e62c6c6c68a641e8fb5
2020-06-30 17:11:26 +09:00
d7ef74659b Revert "epoll, ppoll: deschedule on offload, don't do it when exiting system call"
This reverts commit d4056acfc3.

Change-Id: I7df15b9d3957ca571f4b4e2d576799f8b97ae299
2020-06-30 17:11:23 +09:00
ac86affecc mcexec: fix FLIB_AFFINITY_ON_PROCESS mask for McKernel CPU numbers (Fugaku)
Change-Id: If42b139fb53866bcff0809d898d4a2a712946f0c
2020-06-30 16:29:03 +09:00
2026cf8dad mcexec: explicit CPU list in partitoned execution (for Fujitsu's FLIB_AFFINITY_ON_PROCESS)
Change-Id: I05c11f73553de8ccb5f79083ce2115ac57e62584
2020-06-30 16:29:00 +09:00
1d135492c3 mcexec: detect mismatch of mcexec -n and mpirun -ppn
Change-Id: I0c42e3119143da40ea2e69cd9ec99bde78a0ad2a
Refs: #929
2020-06-30 16:28:08 +09:00
1cfc5ca71f spec: prerelease 0.4 for testing cross-compile
Change-Id: I26908b6b415483711f55338e45d7b2d862b5c028
2020-06-23 08:34:10 +00:00
7ee533d620 spec: remove unnecessary mcinspect*.debug file
Fixes: 612f364 "spec: include recently added debug tools"
Change-Id: I29779132567d18f9468e3cecf2c713ad1c51729b
2020-06-23 08:34:10 +00:00
28334c7a29 cmake: treat libdwarf as required library when cross-compiling
Change-Id: I23ffb46c867b05de0e732c96912d62c630ebb44c
2020-06-23 16:18:35 +09:00
697e9386b3 cmake: fix resovling dwarf.h
Fixes: 0e787b7 "cmake: fix resolving libdwarf"
Change-Id: Iccb491c8ad07db0f15f6b1798ee8a91edc808cf7
2020-06-22 13:33:50 +09:00
212 changed files with 21657 additions and 1695 deletions

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.6)
cmake_minimum_required(VERSION 3.11)
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type: Debug Release..." FORCE)
@ -7,10 +7,10 @@ endif (NOT CMAKE_BUILD_TYPE)
enable_language(C ASM)
project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.0")
set(MCKERNEL_VERSION "1.7.1")
# See "Fedora Packaging Guidlines -- Versioning"
set(MCKERNEL_RELEASE "0.3")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "0.93")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
@ -50,6 +50,64 @@ if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
execute_process(COMMAND bash -c "ls -ld /proc/tofu/ 2>/dev/null | wc -l"
OUTPUT_VARIABLE PROC_TOFU OUTPUT_STRIP_TRAILING_WHITESPACE)
if(PROC_TOFU STREQUAL "1")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
else()
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
endif()
if(ENABLE_TOFU)
add_definitions(-DENABLE_TOFU)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_TOFU")
endif()
# when compiling on a compute-node
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2 | grep -o CN"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)
else()
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" OFF)
endif()
if(ENABLE_FUGAKU_HACKS)
add_definitions(-DENABLE_FUGAKU_HACKS)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_HACKS")
endif()
# krm that mandates reserved memory amount >= available at boot time?
execute_process(COMMAND bash -c "rpm -qi FJSVpxkrm-plugin-mckernel | awk '$1 == \"Version\" && $2 == \":\" { print $3 }'"
OUTPUT_VARIABLE KRM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
message("KRM_VERSION: ${KRM_VERSION}")
if(NOT "${KRM_VERSION}" STREQUAL "" AND "${KRM_VERSION}" VERSION_LESS_EQUAL 4.0.1)
option(ENABLE_KRM_WORKAROUND "krm workaround" ON)
else()
option(ENABLE_KRM_WORKAROUND "krm workaround" OFF)
endif()
if(ENABLE_KRM_WORKAROUND)
add_definitions(-DENABLE_KRM_WORKAROUND)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_KRM_WORKAROUND")
endif()
# SIGSTOP instead of SIGSEGV, additional IHK Linux kmsg
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" OFF)
if(ENABLE_FUGAKU_DEBUG)
add_definitions(-DENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
option(PROFILE_ENABLE "System call profile" ON)
if(PROFILE_ENABLE)
add_definitions(-DPROFILE_ENABLE)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DPROFILE_ENABLE")
endif()
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
@ -136,17 +194,19 @@ if (NOT LIBIBERTY)
message(FATAL_ERROR "error: couldn't find libiberty")
endif()
# libdwarf-devel provides /usr/lib64/libdwarf.so
find_library(LIBDWARF dwarf)
# elfutils-devel provides /usr/include/dwarf.h
find_library(LIBEBL ebl)
if ((NOT LIBDWARF) OR (NOT LIBEBL))
if (NOT LIBDWARF)
if (CMAKE_CROSSCOMPILING)
message(FATAL_ERROR "Could not find libdwarf.so, install libdwarf-devel to ${CMAKE_FIND_ROOT_PATH}")
endif()
message("WARNING: libdwarf will be compiled locally")
set(LIBDWARF LIBDWARF-NOTFOUND)
set(LIBEBL LIBEBL-NOTFOUND)
enable_language(CXX)
else()
# Note that libdwarf-devel provides /usr/include/libdwarf/dwarf.h
# but elfutils-devel provides /usr/include/dwarf.h
# while mcinspect.c performs "#include <dwarf.h>"
find_path(DWARF_H dwarf.h PATH_SUFFIXES libdwarf)
endif()
if (ENABLE_QLMPI)
@ -250,6 +310,11 @@ message("KBUILD_C_FLAGS: ${KBUILD_C_FLAGS}")
message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_TOFU: ${ENABLE_TOFU}")
message("ENABLE_FUGAKU_HACKS: ${ENABLE_FUGAKU_HACKS}")
message("ENABLE_FUGAKU_DEBUG: ${ENABLE_FUGAKU_DEBUG}")
message("ENABLE_KRM_WORKAROUND: ${ENABLE_KRM_WORKAROUND}")
message("PROFILE_ENABLE: ${PROFILE_ENABLE}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")

537
NEWS.md
View File

@ -1,537 +0,0 @@
=============================================
What's new in version 1.7.0rc4 (Apr 15, 2020)
=============================================
----------------------
McKernel major updates
----------------------
1. arm64: Contiguous PTE support
2. arm64: Scalable Vector Extension (SVE) support
3. arm64: PMU overflow interrupt support
4. xpmem: Support large page attachment
5. arm64 port: Direct access to Mckernel memory from Linux
6. arm64 port: utility thread offloading, which spawns thread onto
Linux CPU
7. eclair: support for live debug
8. Crash utility extension
9. Replace mcoverlayfs with a soft userspace overlay
10. Build system is switched to cmake
11. Core dump includes thread information
------------------------
McKernel major bug fixes
------------------------
1. shmobj: Fix rusage counting for large page
2. mcctrl control: task start_time changed to u64 nsec
3. mcctrl: add handling for one more level of page tables
4. Add kernel argument to turn on/off time sharing
5. flatten_string/process env: realign env and clear trailing bits
6. madvise: Add MADV_HUGEPAGE support
8. mcctrl: remove in-kernel calls to syscalls
9. arch_cpu_read_write_register: error return fix.
10. set_cputime(): interrupt enable/disable fix.
11. set_mempolicy(): Add mode check.
12. mbind(): Fix memory_range_lock deadlock.
13. ihk_ikc_recv: Record channel to packet for release
14. Add set_cputime() kernel to kernel case and mode enum.
15. execve: Call preempt_enable() before error-exit
16. memory/x86_64: fix linux safe_kernel_map
17. do_kill(): fix pids table when nr of threads is larger than num_processors
18. shmget: Use transparent huge pages when page size isn't specified
19. prctl: Add support for PR_SET_THP_DISABLE and PR_GET_THP_DISABLE
20. monitor_init: fix undetected hang on highest numbered core
21. init_process_stack: change premapped stack size based on arch
22. x86 syscalls: add a bunch of XXat() delegated syscalls
23. do_pageout: fix direct kernel-user access
24. stack: add hwcap auxval
25. perf counters: add arch-specific perf counters
26. Added check of nohost to terminate_host().
27. kmalloc: Fix address order in free list
28. sysfs: use nr_cpu_ids for cpumasks (fixes libnuma parsing error on ARM)
29. monitor_init: Use ihk_mc_cpu_info()
30. Fix ThunderX2 write-combined PTE flag insanity
31. ARM: eliminate zero page mapping (i.e, init_low_area())
32. eliminate futex_cmpxchg_enabled check (not used and dereffed a NULL pointer)
33. page_table: Fix return value of lookup_pte when ptl4 is blank
34. sysfs: add missing symlinks for cpu/node
35. Make Linux handler run when mmap to procfs.
36. Separate mmap area from program loading (relocation) area
37. move rusage into kernel ELF image (avoid dynamic alloc before NUMA init)
38. arm: turn off cpu on panic
39. page fault handler: protect thread accesses
40. Register PPD and release_handler at the same time.
41. fix to missing exclusive processing between terminate() and
finalize_process().
42. perfctr_stop: add flags to no 'disable_intens'
43. fileobj, shmobj: free pages in object destructor (as opposed to page_unmap())
44. clear_range_l1, clear_range_middle: Fix handling contiguous PTE
45. do_mmap: don't pre-populate the whole file when asked for smaller segment
46. invalidate_one_page: Support shmobj and contiguous PTE
47. ubsan: fix undefined shifts
48. x86: disable zero mapping and add a boot pt for ap trampoline
49. rusage: Don't count PF_PATCH change
50. Fixed time processing.
51. copy_user_pte: vmap area not owned by McKernel
52. gencore: Zero-clear ELF header and memory range table
53. rpm: ignore CMakeCache.txt in dist and relax BuildRequires on cross build
54. gencore: Allocate ELF header to heap instead of stack
55. nanosleep: add cpu_pause() in spinwait loop
56. init_process: add missing initializations to proc struct
57. rus_vm_fault: always use a packet on the stack
58. process stack: use PAGE_SIZE in aux vector
59. copy_user_pte: base memobj copy on range & VR_PRIVATE
60. arm64: ptrace: Fix overwriting 1st argument with return value
61. page fault: use cow for private device mappings
62. reproductible builds: remove most install paths in c code
63. page fault: clear writable bit for non-dirtying access to shared ranges
64. mcreboot/mcstop+release: support for regular user execution
65. irqbalance_mck: replace extra service with service drop-in
66. do_mmap: give addr argument a chance even if not MAP_FIXED
67. x86: fix xchg() and cmpxchg() macros
68. IHK: support for using Linux work IRQ as IKC interrupt (optional)
69. MCS: fix ARM64 issue by using smp_XXX() functions (i.e., barrier()s)
70. procfs: add number of threads to stat and status
71. memory_range_lock: Fix deadlock in procfs/sysfs handler
72. flush instruction cache at context switch time if necessary
73. arm64: Fix PMU related functions
74. page_fault_process_memory_range: Disable COW for VM region with zeroobj
75. extend_process_region: Fall back to demand paging when not contiguous
76. munmap: fix deadlock with remote pagefault on vm range lock
77. procfs: if memory_range_lock fails, process later
78. migrate-cpu: Prevent migration target from calling schedule() twice
79. sched_request_migrate(): fix race condition between migration req and IRQs
80. get_one_cpu_topology: Renumber core_id (physical core id)
81. bb7e140 procfs cpuinfo: use sequence number as processor
82. set_host_vma(): do NOT read protect Linux VMA
===========================================
What's new in V1.6.0 (Nov 11, 2018)
===========================================
-----------------------------------------------
McKernel new features, improvements and changes
-----------------------------------------------
1. McKernel and Linux share one unified kernel virtual address space.
That is, McKernel sections resides in Linux sections spared for
modules. In this way, Linux can access the McKernel kernel memory
area.
2. hugetlbfs support
3. IHK is now included as a git submodule
4. Debug messages are turned on/off in per souce file basis at run-time.
5. It's prohibited for McKernel to access physical memory ranges which
Linux didn't give to McKernel.
6. UTI (capability to spawn a thread on Linux CPU) improvement:
* System calls issued from the thread are hooked by modifying
binary in memory.
---------------------------
McKernel bug fixes (digest)
---------------------------
#<num> below corresponds to the redmine issue number
(https://postpeta.pccluster.org/redmine/).
1. #926: shmget: Hide object with IPC_RMID from shmget
2. #1028: init_process: Inherit parent cpu_set
3. #995: Fix shebang recorded in argv[0]
4. #1024: Fix VMAP virtual address leak
5. #1109: init_process_stack: Support "ulimit -s unlimited"
6. x86 mem init: do not map identity mapping
7. mcexec_wait_syscall: requeue potential request on interrupted wait
8. mcctrl_ikc_send_wait: fix interrupt with do_frees == NULL
9. pager_req_read: handle short read
10. kprintf: only call eventfd() if it is safe to interrupt
11. process_procfs_request: Add Pid to /proc/<PID>/status
12. terminate: fix oversubscribe hang when waiting for other threads on same CPU to die
13. mcexec: Do not close fd returned to mckernel side
14. #976: execve: Clear sigaltstack and fp_regs
15. #1002: perf_event: Specify counter by bit_mask on start/stop
16. #1027: schedule: Don't reschedule immediately when wake up on migrate
17. #mcctrl: lookup unexported symbols at runtime
18. __sched_wakeup_thread: Notify interrupt_exit() of re-schedule
19. futex_wait_queue_me: Spin-sleep when timeout and idle_halt is specified
20. #1167: ihk_os_getperfevent,setperfevent: Timeout IKC sent by mcctrl
21. devobj: fix object size (POSTK_DEBUG_TEMP_FIX_36)
22. mcctrl: remove rus page cache
23. #1021: procfs: Support multiple reads of e.g. /proc/*/maps
24. #1006: wait: Delay wake-up parent within switch context
25. #1164: mem: Check if phys-mem is within the range of McKernel memory
26. #1039: page_fault_process_memory_range: Remove ihk_mc_map_virtual for CoW of device map
27. partitioned execution: pass process rank to LWK
28. process/vm: implement access_ok()
29. spinlock: rewrite spinlock to use Linux ticket head/tail format
30. #986: Fix deadlock involving mmap_sem and memory_range_lock
31. Prevent one CPU from getting chosen by concurrent forks
32. #1009: check_signal: system call restart is done only once
33. #1176: syscall: the signal received during system call processing is not processed.
34. #1036 syscall_time: Handle by McKernel
35. #1165 do_syscall: Delegate system calls to the mcexec with the same pid
36. #1194 execve: Fix calling ptrace_report_signal after preemption is disabled
37. #1005 coredump: Exclude special areas
38. #1018 procfs: Fix pread/pwrite to procfs fail when specified size is bigger than 4MB
39. #1180 sched_setaffinity: Check migration after decrementing in_interrupt
40. #771, #1179, #1143 ptrace supports threads
41. #1189 procfs/do_fork: wait until procfs entries are registered
42. #1114 procfs: add '/proc/pid/stat' to mckernel side and fix its comm
43. #1116 mcctrl procfs: check entry was returned before using it
44. #1167 ihk_os_getperfevent,setperfevent: Return -ETIME when IKC timeouts
45. mcexec/execve: fix shebangs handling
46. procfs: handle 'comm' on mckernel side
47. ihk_os_setperfevent: Return number of registered events
48. mcexec: fix terminating zero after readlink()
===========================================
What's new in V1.5.1 (July 9, 2018)
===========================================
-----------------------------------------------
McKernel new features, improvements and changes
-----------------------------------------------
1. Watchdog timer to detect hang of McKernel
mcexec prints out the following line to its stderr when a hang of
McKernel is detected.
mcexec detected hang of McKernel
The watchdog timer is enabled by passing -i <timeout_in_sec> option
to mcreboot.sh. <timeout_in_sec> specifies the interval of checking
if McKernel is alive.
Example: mcreboot.sh -i 600: Detect the hang with 10 minutes interval
The detailed step of the hang detection is as follows.
(1) mcexec acquires eventfd for notification from IHK and perform
epoll() on it.
(2) A daemon called ihkmond monitors the state of McKernel periodically
with the interval specified by the -i option. It judges that
McKernel is hanging and notifies mcexec by the eventfd if its
state hasn't changed since the last check.
2. Documentation
man page: Installed directory is changed to <install_dir>/share/man
---------------------------
McKernel bug fixes (digest)
---------------------------
1. #1146: pager_req_map(): do not take mmap_sem if not needed
2. #1135: prepare_process_ranges_args_envs(): fix saving cmdline
3. #1144: fileobj/devobj: record path name
4. #1145: fileobj: use MCS locks for per-file page hash
5. #1076: mcctrl: refactor prepare_image into new generic ikc send&wait
6. #1072: execve: fix execve with oversubscribing
7. #1132: execve: use thread variable instead of cpu_local_var(current)
8. #1117: mprotect: do not set page table writable for cow pages
9. #1143: syscall wait4: add _WALL (POSTK_DEBUG_ARCH_DEP_44)
10. #1064: rusage: Fix initialization of rusage->num_processors
11. #1133: pager_req_unmap: Put per-process data at exit
12. #731: do_fork: Propagate error code returned by mcexec
13. #1149: execve: Reinitialize vm_regions's map area on execve
14. #1065: procfs: Show file names in /proc/<PID>/maps
15. #1112: mremap: Fix type of size arguments (from ssize_t to size_t)
16. #1121: sched_getaffinity: Check arguments in the same order as in Linux
17. #1137: mmap, mremap: Check arguments in the same order as in Linux
18. #1122: fix return value of sched_getaffinity
19. #732: fix: /proc/<PID>/maps outputs a unnecessary NULL character
===================================
What's new in V1.5.0 (Apr 5, 2018)
===================================
--------------------------------------
McKernel new features and improvements
--------------------------------------
1. Aid for Linux version migration: Detect /proc, /sys format change
between two kernel verions
2. Swap out
* Only swap-out anonymous pages for now
3. Improve support of /proc/maps
4. mcstat: Linux tool to show resource usage
---------------------------
McKernel bug fixes (digest)
---------------------------
1. #727: execve: Fix memory leak when receiving SIGKILL
2. #829: perf_event_open: Support PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
3. #906: mcexec: Check return code of fork()
4. #1038: mcexec: Timeout when incorrect value is given to -n option
5. #943 #945 #946 #960 $961: mcexec: Support strace
6. #1029: struct thread is not released with stress-test involving signal
and futex
7. #863 #870: Respond immediately to terminating signal when
offloading system call
8. #1119: translate_rva_to_rpa(): use 2MB blocks in 1GB pages on x86
11. #898: Shutdown OS only after no in-flight IKC exist
12. #882: release_handler: Destroy objects as the process which opened it
13. #882: mcexec: Make child process exit if the parent is killed during
fork()
14. #925: XPMEM: Don't destroy per-process object of the parent
15. #885: ptrace: Support the case where a process attaches its child
16. #1031: sigaction: Support SA_RESETHAND
17. #923: rus_vm_fault: Return error when a thread not performing
system call offloading causes remote page fault
18. #1032 #1033 #1034: getrusage: Fix ru_maxrss, RUSAGE_CHILDREN,
ru_stime related bugs
19. #1120: getrusage: Fix deadlock on thread->times_update
20. #1123: Fix deadlock related to wait_queue_head_list_node
21. #1124: Fix deadlock of calling terminate() from terminate()
22. #1125: Fix deadlock related to thread status
* Related functions are: hold_thread(), do_kill() and terminate()
23. #1126: uti: Fix uti thread on the McKernel side blocks others in do_syscall()
24. #1066: procfs: Show Linux /proc/self/cgroup
25. #1127: prepare_process_ranges_args_envs(): fix generating saved_cmdline to
avoid PF in strlen()
26. #1128: ihk_mc_map/unmap_virtual(): do proper TLB invalidation
27. #1043: terminate(): fix update_lock and threads_lock order to avoid deadlock
28. #1129: mcreboot.sh: Save /proc/irq/*/smp_affinity to /tmp/mcreboot
29. #1130: mcexec: drop READ_IMPLIES_EXEC from personality
--------------------
McKernel workarounds
--------------------
1. Forbid CPU oversubscription
* It can be turned on by mcreboot.sh -O option
===================================
What's new in V1.4.0 (Oct 30, 2017)
===================================
-----------------------------------------------------------
Feature: Abstracted event type support in perf_event_open()
-----------------------------------------------------------
PERF_TYPE_HARDWARE and PERF_TYPE_CACHE types are supported.
----------------------------------
Clean-up: Direct user-space access
----------------------------------
Code lines using direct user-space access (e.g. passing user-space
pointer to memcpy()) becomes more portable across processor
architectures. The modification follows the following rules.
1. Move the code section as it is to the architecture dependent
directory if it is a part of the critical-path.
2. Otherwise, rewrite the code section by using the portable methods.
The methods include copy_from_user(), copy_to_user(),
pte_get_phys() and phys_to_virt().
--------------------------------
Test: MPI and OpenMP micro-bench
--------------------------------
The performance figures of MPI and OpenMP primitives are compared with
those of Linux by using Intel MPI Benchmarks and EPCC OpenMP Micro
Benchmark.
===================================
What's new in V1.3.0 (Sep 30, 2017)
===================================
--------------------
Feature: Kernel dump
--------------------
1. A dump level of "only kernel memory" is added.
The following two levels are available now:
0: Dump all
24: Dump only kernel memory
The dump level can be set by -d option in ihkosctl or the argument
for ihk_os_makedumpfile(), as shown in the following examples:
Command: ihkosctl 0 dump -d 24
Function call: ihk_os_makedumpfile(0, NULL, 24, 0);
2. Dump file is created when Linux panics.
The dump level can be set by dump_level kernel argument, as shown in the
following example:
ihkosctl 0 kargs "hidos dump_level=24"
The IHK dump function is registered to panic_notifier_list when creating
/dev/mcdX and called when Linux panics.
-----------------------------
Feature: Quick Process Launch
-----------------------------
MPI process launch time and some of the initialization time can be
reduced in application consisting of multiple MPI programs which are
launched in turn in the job script.
The following two steps should be performed to use this feature:
1. Replace mpiexec with ql_mpiexec_start and add some lines for
ql_mpiexec_finalize in the job script
2. Modify the app so that it can repeat calculations and wait for the
instructions from ql_mpiexec_{start,finalize} at the end of the
loop
The first step is explained using an example. Assume the original job
script looks like this:
/* Execute ensamble simulation and then data assimilation, and repeat this
ten times */
for i in {1..10}; do
/* Each ensamble simulation execution uses 100 nodes, launch ten of them
in parallel */
for j in {1..10}; do
mpiexec -n 100 -machinefile ./list1_$j p1.out a1 & pids[$i]=$!;
done
/* Wait until the ten ensamble simulation programs finish */
for j in {1..10}; do wait ${pids[$j]}; done
/* Launch one data assimilation program using 1000 nodes */
mpiexec -n 1000 -machinefile ./list2 p2.out a2
done
The job script should be modified like this:
for i in {1..10}; do
for j in {1..10}; do
/* Replace mpiexec with ql_mpiexec_start */
ql_mpiexec_start -n 100 -machinefile ./list1_$j p1.out a1 & pids[$j]=$!;
done
for j in {1..10}; do wait ${pids[$j]}; done
ql_mpiexec_start -n 1000 -machinefile ./list2 p2.out a2
done
/* p1.out and p2.out don't exit but are waiting for the next calculation.
So tell them to exit */
for j in {1..10}; do
ql_mpiexec_finalize -machinefile ./list1_$i p1.out a1;
done
ql_mpiexec_finalize -machinefile ./list2 p2.out a2;
The second step is explained using a pseudo-code.
MPI_Init();
Prepare data exchange with preceding / following MPI programs
loop:
foreach Fortran module
Initialize data using command-line argments, parameter files,
environment variables
Input data from preceding MPI programs / Read snap-shot
Perform main calculation
Output data to following MPI programs / Write snap-shot
/* ql_client() waits for command of ql_mpiexec_{start,finish} */
if (ql_client() == QL_CONTINUE) { goto loop; }
MPI_Finalize();
qlmpilib.h should be included in the code and libql{mpi,fort}.so
should be linked to the executable file.
========================
Restrictions on McKernel
========================
1. Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed
correctly even if the mmap() returns a success. An access of their
mapping receives the SIGSEGV signal.
2. clone() supports only the following flags. All the other flags
cause clone() to return error or are simply ignored.
* CLONE_CHILD_CLEARTID
* CLONE_CHILD_SETTID
* CLONE_PARENT_SETTID
* CLONE_SETTLS
* CLONE_SIGHAND
* CLONE_VM
3. PAPI has the following restriction.
* Number of counters a user can use at the same time is up to the
number of the physical counters in the processor.
4. msync writes back only the modified pages mapped by the calling process.
5. The following syscalls always return the ENOSYS error.
* migrate_pages()
* move_pages()
* set_robust_list()
6. The following syscalls always return the EOPNOTSUPP error.
* arch_prctl(ARCH_SET_GS)
* signalfd()
7. signalfd4() returns a fd, but signal is not notified through the
fd.
8. set_rlimit sets the limit values but they are not enforced.
9. Address randomization is not supported.
10. brk() extends the heap more than requestd when -h
(--extend-heap-by=)<step> option of mcexec is used with the value
larger than 4 KiB. syscall_pwrite02 of LTP would fail for this
reason. This is because the test expects that the end of the heap
is set to the same address as the argument of sbrk() and expects a
segmentation violation occurs when it tries to access the memory
area right next to the boundary. However, the optimization sets
the end to a value larger than the requested. Therefore, the
expected segmentation violation doesn't occur.
11. setpriority()/getpriority() won't work. They might set/get the
priority of a random mcexec thread. This is because there's no
fixed correspondence between a McKernel thread which issues the
system call and a mcexec thread which handles the offload request.
12. mbind() can set the policy but it is not used when allocating
physical pages.
13. MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for
set_mempolicy()/mbind() are not supported.
14. The MPOL_BIND policy for set_mempolicy()/mbind() works as the same
as the MPOL_PREFERRED policy. That is, the physical page allocator
doesn't give up the allocation when the specified nodes are
running out of pages but continues to search pages in the other
nodes.
15. Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and
later. In addition, crash_kexec_post_notifiers kernel argument
must be given to Linux kernel.
16. setfsuid()/setfsgid() cannot change the id of the calling thread.
Instead, it changes that of the mcexec worker thread which takes
the system-call offload request.
17. mmap (hugeTLBfs): The physical pages corresponding to a map are
released when no McKernel process exist. The next map gets fresh
physical pages.
18. Sticky bit on executable file has no effect.
19. Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the
process of booting McKernel due to the Linux bug, found in
Linux-3.10 and fixed in the later version. One way to circumvent
this is to always assign the same CPU set to McKernel.
20. madvise:
* MADV_HWPOISON and MADV_SOFT_OFFLINE always returns -EPERM.
* MADV_MERGEABLE and MADV_UNMERGEABLE always returns -EINVAL.
* MADV_HUGEPAGE and MADV_NOHUGEPAGE on file map returns -EINVAL
(It succeeds on RHEL-8 for aarch64).
21. brk() and mmap() doesn't report out-of-memory through its return
value. Instead, page-fault reports the error.
22. Anonymous mmap pre-maps requested number of pages when contiguous
pages are available. Demand paging is used when not available.
23. Mixing page sizes in anonymous shared mapping is not allowed. mmap
creates vm_range with one page size. And munmap or mremap that
needs the reduced page size changes the sizes of all the pages of
the vm_range.
24. ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS
(job-scheduler).
25. The behaviors of madvise and mbind are changed to do nothing and
report success as a workaround for Fugaku.

285
README.md
View File

@ -1,285 +0,0 @@
![McKernel Logo](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/mckernel-logo.png)
-------------------------
IHK/McKernel is a light-weight multi-kernel operating system designed for high-end supercomputing. It runs Linux and McKernel, a light-weight kernel (LWK), side-by-side inside compute nodes and aims at the following:
- Provide scalable and consistent execution of large-scale parallel scientific applications, but at the same time maintain the ability to rapidly adapt to new hardware features and emerging programming models
- Provide efficient memory and device management so that resource contention and data movement are minimized at the system level
- Eliminate OS noise by isolating OS services in Linux and provide jitter free execution on the LWK
- Support the full POSIX/Linux APIs by selectively offloading (slow-path) system calls to Linux
## Contents
- [Background](#background-and-motivation)
- [Architectural Overview](#architectural-overview)
- [Installation](#installation)
- [The Team](#the-team)
## Background and Motivation
With the growing complexity of high-end supercomputers, the current system software stack faces significant challenges as we move forward to exascale and beyond. The necessity to deal with extreme degree of parallelism, heterogeneous architectures, multiple levels of memory hierarchy, power constraints, etc., advocates operating systems that can rapidly adapt to new hardware requirements, and that can support novel programming paradigms and runtime systems. On the other hand, a new class of more dynamic and complex applications are also on the horizon, with an increasing demand for application constructs such as in-situ analysis, workflows, elaborate monitoring and performance tools. This complexity relies not only on the rich features of POSIX, but also on the Linux APIs (such as the */proc*, */sys* filesystems, etc.) in particular.
##### Two Traditional HPC OS Approaches
Traditionally, light-weight operating systems specialized for HPC followed two approaches to tackle scalable execution of large-scale applications. In the full weight kernel (FWK) approach, a full Linux environment is taken as the basis, and features that inhibit attaining HPC scalability are removed, i.e., making it light-weight. The pure light-weight kernel (LWK) approach, on the other hand, starts from scratch and effort is undertaken to add sufficient functionality so that it provides a familiar API, typically something close to that of a general purpose OS, while at the same time it retains the desired scalability and reliability attributes. Neither of these approaches yields a fully Linux compatible environment.
##### The Multi-kernel Approach
A hybrid approach recognized recently by the system software community is to run Linux simultaneously with a lightweight kernel on compute nodes and multiple research projects are now pursuing this direction. The basic idea is that simulations run on an HPC tailored lightweight kernel, ensuring the necessary isolation for noiseless execution of parallel applications, but Linux is leveraged so that the full POSIX API is supported. Additionally, the small code base of the LWK can also facilitate rapid prototyping for new, exotic hardware features. Nevertheless, the questions of how to share node resources between the two types of kernels, where do device drivers execute, how exactly do the two kernels interact with each other and to what extent are they integrated, remain subjects of ongoing debate.
## Architectural Overview
At the heart of the stack is a low-level software infrastructure called Interface for Heterogeneous Kernels (IHK). IHK is a general framework that provides capabilities for partitioning resources in a many-core environment (e.g.,CPU cores and physical memory) and it enables management of lightweight kernels. IHK can allocate and release host resources dynamically and no reboot of the host machine is required when altering configuration. IHK also provides a low-level inter-kernel messaging infrastructure, called the Inter-Kernel Communication (IKC) layer. An architectural overview of the main system components is shown below.
![arch](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/mckernel.png)
McKernel is a lightweight kernel written from scratch. It is designed for HPC and is booted from IHK. McKernel retains a binary compatible ABI with Linux, however, it implements only a small set of performance sensitive system calls and the rest are offloaded to Linux. Specifically, McKernel has its own memory management, it supports processes and multi-threading with a simple round-robin cooperative (tick-less) scheduler, and it implements signaling. It also allows inter-process memory mappings and it provides interfaces to hardware performance counters.
### Functionality
An overview of some of the principal functionalities of the IHK/McKernel stack is provided below.
#### System Call Offloading
System call forwarding in McKernel is implemented as follows. When an offloaded system call occurs, McKernel marshals the system call number along with its arguments and sends a message to Linux via a dedicated IKC channel. The corresponding proxy process running on Linux is by default waiting for system call requests through an ioctl() call into IHKs system call delegator kernel module. The delegator kernel modules IKC interrupt handler wakes up the proxy process, which returns to userspace and simply invokes the requested system call. Once it obtains the return value, it instructs the delegator module to send the result back to McKernel, which subsequently passes the value to user-space.
#### Unified Address Space
The unified address space model in IHK/McKernel ensures that offloaded system calls can seamlessly resolve arguments even in case of pointers. This mechanism is depicted below and is implemented as follows.
![unified_ap](https://www.sys.r-ccs.riken.jp/members_files/bgerofi/img/unified_address_space_en.png)
First, the proxy process is compiled as a position independent binary, which enables us to map the code and data segments specific to the proxy process to an address range which is explicitly excluded from McKernels user space. The grey box on the right side of the figure demonstrates the excluded region. Second, the entire valid virtual address range of McKernels application user-space is covered by a special mapping in the proxy process for which we use a pseudo file mapping in Linux. This mapping is indicated by the blue box on the left side of the figure.
## Installation
For a smooth experience, we recommend the following combination of OS distributions and platforms:
- CentOS 7.3+ running on Intel Xeon / Xeon Phi
##### 1. Change SELinux settings
Log in as the root and disable SELinux:
~~~~
vim /etc/selinux/config
~~~~
Change the file to SELINUX=disabled
##### 2. Reboot the host machine
~~~~
sudo reboot
~~~~
##### 3. Prepare packages, kernel symbol table file
You will need the following packages installed:
~~~~
sudo yum install cmake kernel-devel binutils-devel systemd-devel numactl-devel gcc make nasm git
~~~~
Grant read permission to the System.map file of your kernel version:
~~~~
sudo chmod a+r /boot/System.map-`uname -r`
~~~~
##### 4. Obtain sources and compile the kernel
Clone the source code:
~~~~
mkdir -p ~/src/ihk+mckernel/
cd ~/src/ihk+mckernel/
git clone --recursive -b development https://github.com/RIKEN-SysSoft/mckernel.git
~~~~
(Optional) Checkout to the specific branch or version:
~~~~
cd mckernel
git checkout <pathspec>
git submodule update
~~~~
Foe example, if you want to try the development branch, use "development" as the pathspec. If you want to try the prerelease version 1.7.0-0.2, use "1.7.0-0.2".
###### 4.1 Install with cmake
Configure and compile:
~~~~
mkdir -p build && cd build
cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel $HOME/src/ihk+mckernel/mckernel
make -j install
~~~~
The IHK kernel modules and McKernel kernel image should be installed under the **ihk+mckernel** folder in your home directory.
###### 4.2 Install with rpm
Configure, compile and build rpm:
~~~~
mkdir -p build && cd build
cmake $HOME/src/ihk+mckernel/mckernel
make dist
cp mckernel-<version>.tar.gz <rpmbuild>/SOURCES
rpm -ba scripts/mckernel.spec
sudo rpm -ivh <rpmbuild>/RPMS/<arch>/mckernel-<version>-<release>_<linux_kernel_ver>_<dist>.<arch>.rpm
~~~~
The IHK kernel modules and McKernel kernel image are installed under the system directory.
##### 5. Boot McKernel
A boot script called mcreboot.sh is provided under sbin in the install folder. To boot on logical CPU 1 with 512MB of memory, use the following invocation:
~~~~
export TOP=${HOME}/ihk+mckernel/
cd ${TOP}
sudo ./sbin/mcreboot.sh -c 1 -m 512m
~~~~
You should see something similar like this if you display the McKernel's kernel message log:
~~~~
./sbin/ihkosctl 0 kmsg
IHK/McKernel started.
[ -1]: no_execute_available: 1
[ -1]: map_fixed: phys: 0xfee00000 => 0xffff860000009000 (1 pages)
[ -1]: setup_x86 done.
[ -1]: ns_per_tsc: 385
[ -1]: KCommand Line: hidos dump_level=24
[ -1]: Physical memory: 0x1ad3000 - 0x21000000, 525520896 bytes, 128301 pages available @ NUMA: 0
[ -1]: NUMA: 0, Linux NUMA: 0, type: 1, available bytes: 525520896, pages: 128301
[ -1]: NUMA 0 distances: 0 (10),
[ -1]: map_fixed: phys: 0x28000 => 0xffff86000000a000 (2 pages)
[ -1]: Trampoline area: 0x28000
[ -1]: map_fixed: phys: 0x0 => 0xffff86000000c000 (1 pages)
[ -1]: # of cpus : 1
[ -1]: locals = ffff880001af6000
[ 0]: BSP: 0 (HW ID: 1 @ NUMA 0)
[ 0]: BSP: booted 0 AP CPUs
[ 0]: Master channel init acked.
[ 0]: vdso is enabled
IHK/McKernel booted.
~~~~
##### 5. Run a simple program on McKernel
The mcexec command line tool (which is also the Linux proxy process) can be used for executing applications on McKernel:
~~~~
./bin/mcexec hostname
centos-vm
~~~~
##### 6. Shutdown McKernel
Finally, to shutdown McKernel and release CPU/memory resources back to Linux use the following command:
~~~~
sudo ./sbin/mcstop+release.sh
~~~~
##### 7. Advanced: Enable Utility Thread offloading Interface (UTI)
UTI enables a runtime such as MPI runtime to spawn utility threads such as MPI asynchronous progress threads to Linux cores.
1. Install capstone
Install EPEL capstone-devel:
~~~~
sudo yum install epel-release
sudo yum install capstone-devel
~~~~
2. Install syscall_intercept
~~~~
git clone https://github.com/RIKEN-SysSoft/syscall_intercept.git
cmake ../arch/aarch64 -DCMAKE_INSTALL_PREFIX=<syscall-intercept-install> -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
~~~~
3. Install UTI for McKernel
Install:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<mckernel-install> --with-rm=mckernel
make && make install
~~~~
4. Install McKernel
~~~~
CMAKE_PREFIX_PATH=<syscall-intercept-install> cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel -DENABLE_UTI=ON $HOME/src/ihk+mckernel/mckernel
~~~~
5. Run executable
~~~~
mcexec --enable-uti <command>
~~~~
6. Install UTI for Linux for performance comparison
Install by make:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
make && make install
~~~~
Install by rpm:
~~~~
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
rm -f ~/rpmbuild/SOURCES/<version>.tar.gz
rpmbuild -ba ./scripts/uti.spec
rpm -Uvh uti-<version>-<release>-<arch>.rpm
~~~~
## The Team
The McKernel project was started at The University of Tokyo and currently it is mainly developed at RIKEN.
Some of our collaborators include:
- Hitachi
- Fujitsu
- CEA (France)
- NEC
## License
McKernel is GPL licensed, as found in the LICENSE file.
## Contact
Please give your feedback to us via one of the following mailing lists. Subscription via [www.pccluster.org](http://www.pccluster.org/mailman/listinfo/mckernel-users) is needed.
* English: mckernel-users@pccluster.org
* Japanese: mckernel-users-jp@pccluster.org

23
README.rst Normal file
View File

@ -0,0 +1,23 @@
.. figure:: docs/mckernel-logo.png
IHK/McKernel is a light-weight multi-kernel operating system designed
for high-end supercomputing. It runs Linux and McKernel, a light-weight
kernel (LWK), side-by-side inside compute nodes and aims at the
following:
- Provide scalable and consistent execution of large-scale parallel
scientific applications, but at the same time maintain the ability to
rapidly adapt to new hardware features and emerging programming
models
- Provide efficient memory and device management so that resource
contention and data movement are minimized at the system level
- Eliminate OS noise by isolating OS services in Linux and provide
jitter free execution on the LWK
- Support the full POSIX/Linux APIs by selectively offloading
(slow-path) system calls to Linux
Documentation
=============
Documentation is available
`here <https://ihkmckernel.readthedocs.io>`__.

View File

@ -143,6 +143,11 @@ void arch_save_panic_regs(void *irq_regs)
clv = get_arm64_this_cpu_local();
/* If kernel mode PF occurred, unroll the causing call stack */
if (cpu_local_var(kernel_mode_pf_regs)) {
regs = cpu_local_var(kernel_mode_pf_regs);
}
/* For user-space, use saved kernel context */
if (regs->pc < USER_END) {
memset(clv->arm64_cpu_local_thread.panic_regs,
@ -725,6 +730,49 @@ static void show_context_stack(struct pt_regs *regs)
}
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void handle_IPI(unsigned int vector, struct pt_regs *regs)
{
struct ihk_mc_interrupt_handler *h;
@ -786,6 +834,19 @@ void cpu_safe_halt(void)
cpu_enable_interrupt();
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
extern void __cpu_do_idle(void);
cpu_enable_interrupt();
__cpu_do_idle();
}
#endif
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
@ -851,6 +912,21 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void)
{
unsigned long flags;
unsigned long masked = ICC_PMR_EL1_MASKED;
asm volatile(
"mrs_s %0, " __stringify(ICC_PMR_EL1)
: "=&r" (flags)
:
: "memory");
return (flags == masked);
}
#endif
#else /* defined(CONFIG_HAS_NMI) */
/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_enable */
@ -1279,7 +1355,7 @@ long ihk_mc_show_cpuinfo(char *buf, size_t buf_size, unsigned long read_off, int
/* generate strings */
loff += scnprintf(lbuf + loff, lbuf_size - loff,
"processor\t: %d\n", cpuinfo->hwid);
"processor\t: %d\n", i);
loff += scnprintf(lbuf + loff, lbuf_size - loff, "Features\t:");
for (j = 0; hwcap_str[j]; j++) {
@ -1372,6 +1448,14 @@ void arch_print_stack(void)
{
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;
return regs->pc;
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;

View File

@ -223,7 +223,12 @@ static int do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
#ifdef ENABLE_TOFU
// XXX: Handle kernel space page faults for Tofu driver
//if (addr < USER_END)
#else
if (addr < USER_END)
#endif
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);

View File

@ -9,6 +9,9 @@
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/debug.h>
#endif
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@ -31,6 +34,10 @@ typedef struct {
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
#ifdef ENABLE_FUGAKU_HACKS
extern ihk_spinlock_t *get_this_cpu_runq_lock(void);
#endif
extern void preempt_enable(void);
extern void preempt_disable(void);
@ -98,6 +105,18 @@ static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
: "memory");
success = !tmp;
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (success) {
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
}
#endif
#endif
if (!success) {
preempt_enable();
}
@ -182,6 +201,14 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
#endif
#endif
}
#ifdef DEBUG_SPINLOCK

View File

@ -94,7 +94,11 @@ extern char _end[];
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
#ifdef ENABLE_TOFU
# define MAP_VMAP_START UL(0xffff7bdfffff0000)
#else
# define MAP_VMAP_START UL(0xffff780000000000)
#endif
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)
@ -142,6 +146,7 @@ extern char _end[];
# define __PTL1_SHIFT 16
# define PTL4_INDEX_MASK 0
# define PTL3_INDEX_MASK ((UL(1) << 6) - 1)
# define PTL3_INDEX_MASK_LINUX ((UL(1) << 10) - 1)
# define PTL2_INDEX_MASK ((UL(1) << 13) - 1)
# define PTL1_INDEX_MASK PTL2_INDEX_MASK
# define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0)
@ -829,7 +834,13 @@ static inline int pte_is_head(pte_t *ptep, pte_t *old, size_t cont_size)
return page_is_contiguous_head(ptep, cont_size);
}
struct page_table;
typedef pte_t translation_table_t;
struct page_table {
translation_table_t* tt;
translation_table_t* tt_pa;
int asid;
};
void arch_adjust_allocate_page_size(struct page_table *pt,
uintptr_t fault_addr,
pte_t *ptep,
@ -849,7 +860,6 @@ void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
void set_address_space_id(struct page_table *pt, int asid);
int get_address_space_id(const struct page_table *pt);
typedef pte_t translation_table_t;
void set_translation_table(struct page_table *pt, translation_table_t* tt);
translation_table_t* get_translation_table(const struct page_table *pt);
translation_table_t* get_translation_table_as_paddr(const struct page_table *pt);

View File

@ -10,4 +10,13 @@ extern void *__inline_memcpy(void *to, const void *from, size_t t);
extern void *__inline_memset(void *s, unsigned long c, size_t count);
#define ARCH_MEMCLEAR
extern void __memclear(void *addr, unsigned long len, void *tmp);
inline static void memclear(void *addr, unsigned long len)
{
uint64_t q0q1[4];
__memclear(addr, len, (void *)&q0q1);
}
#endif /* __HEADER_ARM64_COMMON_ARCH_TIMER_H */

View File

@ -80,6 +80,10 @@ static inline uint64_t __raw_readq(const volatile void *addr)
return val;
}
/* IO barriers */
#define __iormb() rmb()
#define __iowmb() wmb()
/*
* Relaxed I/O memory access primitives. These follow the Device memory
* ordering rules but do not guarantee any ordering relative to Normal memory
@ -95,5 +99,20 @@ static inline uint64_t __raw_readq(const volatile void *addr)
#define writel_relaxed(v,c) ((void)__raw_writel((uint32_t)(v),(c)))
#define writeq_relaxed(v,c) ((void)__raw_writeq((uint64_t)(v),(c)))
/*
* I/O memory access primitives. Reads are ordered relative to any
* following Normal memory access. Writes are ordered relative to any prior
* Normal memory access.
*/
#define readb(c) ({ uint8_t __v = readb_relaxed(c); __iormb(); __v; })
#define readw(c) ({ uint16_t __v = readw_relaxed(c); __iormb(); __v; })
#define readl(c) ({ uint32_t __v = readl_relaxed(c); __iormb(); __v; })
#define readq(c) ({ uint64_t __v = readq_relaxed(c); __iormb(); __v; })
#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); })
#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); })
#endif /* __KERNEL__ */
#endif /* __ASM_IO_H */

View File

@ -85,7 +85,11 @@ enum __rlimit_resource
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
__RLIMIT_NLIMITS = 15,
/* timeout for RT tasks in us */
__RLIMIT_RTTIME = 15,
#define RLIMIT_RTTIME __RLIMIT_RTTIME
__RLIMIT_NLIMITS = 16,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS

View File

@ -83,6 +83,7 @@ SYSCALL_HANDLED(175, geteuid)
SYSCALL_HANDLED(176, getgid)
SYSCALL_HANDLED(177, getegid)
SYSCALL_HANDLED(178, gettid)
SYSCALL_HANDLED(179, sysinfo)
SYSCALL_DELEGATED(188, msgrcv)
SYSCALL_DELEGATED(189, msgsnd)
SYSCALL_DELEGATED(192, semtimedop)
@ -117,12 +118,13 @@ SYSCALL_HANDLED(241, perf_event_open)
SYSCALL_DELEGATED(241, perf_event_open)
#endif // PERF_ENABLE
SYSCALL_HANDLED(260, wait4)
SYSCALL_HANDLED(261, prlimit64)
SYSCALL_HANDLED(270, process_vm_readv)
SYSCALL_HANDLED(271, process_vm_writev)
SYSCALL_HANDLED(281, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
@ -143,3 +145,8 @@ SYSCALL_DELEGATED(1049, stat)
SYSCALL_DELEGATED(1060, getpgrp)
SYSCALL_HANDLED(1062, time)
SYSCALL_DELEGATED(1069, epoll_wait)
/* Do not edit the lines including this comment and
* EOF just after it because those are used as a
* robust marker for the autotest patch.
*/

View File

@ -2,7 +2,7 @@
#ifndef __HEADER_ARM64_COMMON_THREAD_INFO_H
#define __HEADER_ARM64_COMMON_THREAD_INFO_H
#define MIN_KERNEL_STACK_SHIFT 15
#define MIN_KERNEL_STACK_SHIFT 18
#include <arch-memory.h>

View File

@ -7,6 +7,9 @@
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/monitor.h>
#endif
#include <arch-timer.h>
#include <cls.h>
@ -313,14 +316,27 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
#ifdef ENABLE_FUGAKU_HACKS
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
++v->in_interrupt;
#endif
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
#ifndef ENABLE_FUGAKU_HACKS
handle_IPI(irqnr, regs);
#else
/* Once paniced, only allow CPU stop and NMI IRQs */
if (monitor->status != IHK_OS_MONITOR_PANIC ||
irqnr == INTRID_CPU_STOP ||
irqnr == INTRID_MULTI_NMI) {
handle_IPI(irqnr, regs);
}
#endif
}
irqnr = gic_read_iar();
}
@ -335,7 +351,12 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
#ifndef ENABLE_FUGAKU_HACKS
if (do_check) {
#else
--v->in_interrupt;
if (monitor->status != IHK_OS_MONITOR_PANIC && do_check) {
#endif
check_signal(0, regs, 0);
schedule();
}

View File

@ -150,12 +150,6 @@ void flush_tlb_single(unsigned long addr)
arch_flush_tlb_single(asid, addr);
}
struct page_table {
translation_table_t* tt;
translation_table_t* tt_pa;
int asid;
};
extern struct page_table swapper_page_table;
static struct page_table *init_pt = &swapper_page_table;
static ihk_spinlock_t init_pt_lock;
@ -223,6 +217,13 @@ static inline int ptl4_index(unsigned long addr)
int idx = (addr >> PTL4_SHIFT) & PTL4_INDEX_MASK;
return idx;
}
#ifdef ENABLE_TOFU
static inline int ptl3_index_linux(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK_LINUX;
return idx;
}
#endif
static inline int ptl3_index(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK;
@ -281,6 +282,40 @@ static inline pte_t* ptl4_offset(const translation_table_t* ptl4, unsigned long
}
return ptep;
}
#ifdef ENABLE_TOFU
static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
{
pte_t* ptep = NULL;
pte_t pte = 0;
unsigned long phys = 0;
translation_table_t* ptl3 = NULL;
int idx = 0;
switch (CONFIG_ARM64_PGTABLE_LEVELS)
{
case 4:
pte = ptl4_val(l4p);
phys = pte & PT_PHYSMASK;
ptl3 = phys_to_virt(phys);
idx = ptl3_index_linux(addr);
ptep = (pte_t*)ptl3 + idx;
break;
case 3:
ptl3 = (translation_table_t*)l4p;
idx = ptl3_index_linux(addr);
ptep = (pte_t*)ptl3 + idx;
break;
case 2:
case 1:
/* PTL3が無いときにはエントリではなくページテーブルのアドレスを引渡していく。*/
ptep = (pte_t*)l4p;
break;
}
return ptep;
}
#endif
static inline pte_t* ptl3_offset(const pte_t* l4p, unsigned long addr)
{
pte_t* ptep = NULL;
@ -959,7 +994,14 @@ static void init_normal_area(struct page_table *pt)
int i;
tt = get_translation_table(pt);
#ifdef ENABLE_TOFU
setup(tt,
arm64_st_phys_base,
arm64_st_phys_base + (1UL << 40));
return;
#endif
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) {
unsigned long map_start, map_end;
int numa_id;
@ -1287,6 +1329,58 @@ out:
return ret;
}
#ifdef ENABLE_TOFU
int ihk_mc_linux_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,
unsigned long *phys,
unsigned long *size)
{
unsigned long v = (unsigned long)virt;
pte_t* ptep;
translation_table_t* tt;
unsigned long paddr;
unsigned long lsize;
tt = get_translation_table(pt);
ptep = ptl4_offset(tt, v);
if (!ptl4_present(ptep)) {
return -EFAULT;
}
ptep = ptl3_offset_linux(ptep, v);
if (!ptl3_present(ptep)) {
return -EFAULT;
}
if (ptl3_type_block(ptep)) {
paddr = ptl3_phys(ptep);
lsize = PTL3_SIZE;
goto out;
}
ptep = ptl2_offset(ptep, v);
if (!ptl2_present(ptep)) {
return -EFAULT;
}
if (ptl2_type_block(ptep)) {
paddr = ptl2_phys(ptep);
lsize = PTL2_SIZE;
goto out;
}
ptep = ptl1_offset(ptep, v);
if (!ptl1_present(ptep)) {
return -EFAULT;
}
paddr = ptl1_phys(ptep);
lsize = PTL1_SIZE;
out:
*phys = paddr | (v & (lsize - 1));
if(size) *size = lsize;
return 0;
}
#endif
int ihk_mc_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,
@ -1348,7 +1442,6 @@ int ihk_mc_pt_virt_to_phys(struct page_table *pt,
return ihk_mc_pt_virt_to_phys_size(pt, virt, phys, NULL);
}
int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
{
const unsigned long v = (unsigned long)virt;
@ -1360,6 +1453,15 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt)
}
tt = get_translation_table(pt);
__kprintf("%s: 0x%lx, CONFIG_ARM64_PGTABLE_LEVELS: %d, ptl4_index: %ld, ptl3_index: %ld, ptl2_index: %ld, ptl1_index: %ld\n",
__func__,
v,
CONFIG_ARM64_PGTABLE_LEVELS,
ptl4_index(v),
ptl3_index(v),
ptl2_index(v),
ptl1_index(v));
ptep = ptl4_offset(tt, v);
__kprintf("l4 table: 0x%lX l4idx: %d\n", virt_to_phys(tt), ptl4_index(v));
if (!(ptl4_present(ptep))) {
@ -2147,6 +2249,198 @@ static void unmap_free_stat(struct page *page, unsigned long phys,
}
}
/*
* Kernel space page table clearing functions.
*/
struct clear_kernel_range_args {
int free_physical;
};
static int clear_kernel_range_middle(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end, int level);
static int clear_kernel_range_l1(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
const struct table {
unsigned long pgsize;
unsigned long cont_pgsize;
} tbl = {
.pgsize = PTL1_SIZE,
.cont_pgsize = PTL1_CONT_SIZE
};
struct clear_kernel_range_args *args = args0;
uint64_t phys = 0;
pte_t old;
size_t clear_size;
if (ptl1_null(ptep)) {
return -ENOENT;
}
old = xchg(ptep, PTE_NULL);
if (!pte_is_present(&old))
return 0;
arch_flush_tlb_single(0, base);
clear_size = pte_is_contiguous(&old) ?
tbl.cont_pgsize : tbl.pgsize;
dkprintf("%s: 0x%lx:%lu unmapped\n",
__func__, base, clear_size);
if (args->free_physical) {
phys = ptl1_phys(&old);
ihk_mc_free_pages(phys_to_virt(phys), clear_size >> PAGE_SHIFT);
}
return 0;
}
static int clear_kernel_range_l2(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
return clear_kernel_range_middle(args0, ptep, base, start, end, 2);
}
static int clear_kernel_range_l3(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
return clear_kernel_range_middle(args0, ptep, base, start, end, 3);
}
static int clear_kernel_range_l4(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
return clear_kernel_range_middle(args0, ptep, base, start, end, 4);
}
static int clear_kernel_range_middle(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end, int level)
{
const struct table {
walk_pte_t* walk;
walk_pte_fn_t* callback;
unsigned long pgsize;
unsigned long cont_pgsize;
} table[] = {
{walk_pte_l1, clear_kernel_range_l1, PTL2_SIZE, PTL2_CONT_SIZE}, /*PTL2*/
{walk_pte_l2, clear_kernel_range_l2, PTL3_SIZE, PTL3_CONT_SIZE}, /*PTL3*/
{walk_pte_l3, clear_kernel_range_l3, PTL4_SIZE, PTL4_CONT_SIZE}, /*PTL4*/
};
const struct table tbl = table[level-2];
struct clear_kernel_range_args *args = args0;
uint64_t phys = 0;
translation_table_t *tt;
int error;
pte_t old;
size_t clear_size;
if (ptl_null(ptep, level)) {
return -ENOENT;
}
dkprintf("%s(level: %d): 0x%lx in 0x%lx-0x%lx\n",
__func__, level, base, start, end);
if (ptl_type_page(ptep, level)
&& ((base < start) || (end < (base + tbl.pgsize)))) {
error = -EINVAL;
ekprintf("clear_range_middle(%p,%p,%lx,%lx,%lx,%d):"
"split page. %d\n",
args0, ptep, base, start, end, level, error);
return error;
}
if (ptl_type_page(ptep, level)) {
old = xchg(ptep, PTE_NULL);
if (!ptl_present(&old, level)) {
return 0;
}
arch_flush_tlb_single(0, base);
clear_size = pte_is_contiguous(&old) ?
tbl.cont_pgsize : tbl.pgsize;
dkprintf("%s(level: %d): 0x%lx:%lu unmapped\n",
__func__, level, base, clear_size);
if (args->free_physical) {
phys = ptl_phys(&old, level);
ihk_mc_free_pages(phys_to_virt(phys), clear_size >> PAGE_SHIFT);
}
return 0;
}
tt = (translation_table_t*)phys_to_virt(ptl_phys(ptep, level));
error = tbl.walk(tt, base, start, end, tbl.callback, args0);
if (error && (error != -ENOENT)) {
return error;
}
if (args->free_physical) {
if ((start <= base) && ((base + tbl.pgsize) <= end)) {
ptl_clear(ptep, level);
arch_flush_tlb_single(0, base);
ihk_mc_free_pages(tt, 1);
}
}
return 0;
}
static int clear_kernel_range(uintptr_t start, uintptr_t end, int free_physical)
{
const struct table {
walk_pte_t* walk;
walk_pte_fn_t* callback;
} tables[] = {
{walk_pte_l2, clear_kernel_range_l2}, /*second*/
{walk_pte_l3, clear_kernel_range_l3}, /*first*/
{walk_pte_l4, clear_kernel_range_l4}, /*zero*/
};
const struct table initial_lookup = tables[CONFIG_ARM64_PGTABLE_LEVELS - 2];
int error;
struct clear_kernel_range_args args;
translation_table_t* tt;
unsigned long irqflags;
dkprintf("%s: start: 0x%lx, end: 0x%lx, free phys: %d\n",
__func__, start, end, free_physical);
if (start <= USER_END)
return -EINVAL;
args.free_physical = free_physical;
irqflags = ihk_mc_spinlock_lock(&init_pt_lock);
tt = get_translation_table(get_init_page_table());
error = initial_lookup.walk(tt, 0,
(start & ~(0xffff000000000000)),
(end & ~(0xffff000000000000)),
initial_lookup.callback, &args);
dkprintf("%s: start: 0x%lx, end: 0x%lx, free phys: %d, ret: %d\n",
__func__, start, end, free_physical, error);
ihk_mc_spinlock_unlock(&init_pt_lock, irqflags);
return error;
}
int ihk_mc_clear_kernel_range(void *start, void *end)
{
#define KEEP_PHYSICAL 0
return clear_kernel_range((uintptr_t)start, (uintptr_t)end, KEEP_PHYSICAL);
}
/*
* User space page table clearing functions.
*/
struct clear_range_args {
int free_physical;
struct memobj *memobj;
@ -2344,6 +2638,14 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
if (memobj && ((memobj->flags & MF_PREMAP))) {
args.free_physical = 0;
}
if (vm->proc->straight_va &&
(void *)start == vm->proc->straight_va &&
(void *)end == (vm->proc->straight_va +
vm->proc->straight_len)) {
args.free_physical = 0;
}
args.memobj = memobj;
args.vm = vm;

View File

@ -218,3 +218,41 @@ ENTRY(__inline_memset)
ret
ENDPIPROC(__inline_memset)
ENDPROC(____inline_memset)
/*
* Non-temporal vector memory clear
*
* Parameters:
* x0 - buf (assumed to be aligned to page size)
* x1 - n (assumed to be at least page size)
*/
ENTRY(__memclear)
stp q0, q1, [x2] /* Preserve two 128 bit vector regs */
eor v0.16B, v0.16B, v0.16B
eor v1.16B, v1.16B, v1.16B
1:
stnp q0, q1, [x0, #32 * 0]
stnp q0, q1, [x0, #32 * 1]
stnp q0, q1, [x0, #32 * 2]
stnp q0, q1, [x0, #32 * 3]
stnp q0, q1, [x0, #32 * 4]
stnp q0, q1, [x0, #32 * 5]
stnp q0, q1, [x0, #32 * 6]
stnp q0, q1, [x0, #32 * 7]
stnp q0, q1, [x0, #32 * 8]
stnp q0, q1, [x0, #32 * 9]
stnp q0, q1, [x0, #32 * 10]
stnp q0, q1, [x0, #32 * 11]
stnp q0, q1, [x0, #32 * 12]
stnp q0, q1, [x0, #32 * 13]
stnp q0, q1, [x0, #32 * 14]
stnp q0, q1, [x0, #32 * 15]
add x0, x0, #512
subs x1, x1, #512
cmp x1, #0
b.ne 1b
ldp q0, q1, [x2] /* Restore vector regs */
ret
ENDPROC(__memclear)

View File

@ -142,17 +142,6 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last)
return min_cpu;
}
int
arch_clear_host_user_space()
{
struct thread *th = cpu_local_var(current);
/* XXX: might be unnecessary */
clear_host_pte(th->vm->region.user_start,
(th->vm->region.user_end - th->vm->region.user_start), 0);
return 0;
}
/* archtecture-depended syscall handlers */
extern unsigned long do_fork(int clone_flags, unsigned long newsp,
unsigned long parent_tidptr, unsigned long child_tidptr,
@ -185,33 +174,6 @@ SYSCALL_DECLARE(clone)
return ret;
}
SYSCALL_DECLARE(rt_sigaction)
{
int sig = ihk_mc_syscall_arg0(ctx);
const struct sigaction *act = (const struct sigaction *)ihk_mc_syscall_arg1(ctx);
struct sigaction *oact = (struct sigaction *)ihk_mc_syscall_arg2(ctx);
size_t sigsetsize = ihk_mc_syscall_arg3(ctx);
struct k_sigaction new_sa, old_sa;
int rc;
if (sigsetsize != sizeof(sigset_t))
return -EINVAL;
if(act)
if(copy_from_user(&new_sa.sa, act, sizeof new_sa.sa)){
goto fault;
}
rc = do_sigaction(sig, act? &new_sa: NULL, oact? &old_sa: NULL);
if(rc == 0 && oact)
if(copy_to_user(oact, &old_sa.sa, sizeof old_sa.sa)){
goto fault;
}
return rc;
fault:
return -EFAULT;
}
SYSCALL_DECLARE(prctl)
{
struct process *proc = cpu_local_var(current)->proc;
@ -1109,6 +1071,9 @@ static int setup_rt_frame(int usig, unsigned long rc, int to_restart,
if (k->sa.sa_flags & SA_RESTORER){
regs->regs[30] = (unsigned long)k->sa.sa_restorer;
#ifdef ENABLE_FUGAKU_HACKS
kprintf("%s: SA_RESTORER: 0x%lx\n", __func__, regs->regs[30]);
#endif
} else {
regs->regs[30] = (unsigned long)VDSO_SYMBOL(thread->vm->vdso_addr, sigtramp);
}
@ -1368,70 +1333,6 @@ out:
return restart;
}
static struct sig_pending *
getsigpending(struct thread *thread, int delflag){
struct list_head *head;
mcs_rwlock_lock_t *lock;
struct mcs_rwlock_node_irqsave mcs_rw_node;
struct sig_pending *next;
struct sig_pending *pending;
__sigset_t w;
w = thread->sigmask.__val[0];
lock = &thread->sigcommon->lock;
head = &thread->sigcommon->sigpending;
for(;;) {
if (delflag) {
mcs_rwlock_writer_lock(lock, &mcs_rw_node);
}
else {
mcs_rwlock_reader_lock(lock, &mcs_rw_node);
}
list_for_each_entry_safe(pending, next, head, list){
if(!(pending->sigmask.__val[0] & w)){
if(delflag)
list_del(&pending->list);
if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
}
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
return pending;
}
}
if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
}
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
if(lock == &thread->sigpendinglock)
return NULL;
lock = &thread->sigpendinglock;
head = &thread->sigpending;
}
return NULL;
}
struct sig_pending *
hassigpending(struct thread *thread)
{
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
return NULL;
}
return getsigpending(thread, 0);
}
int
interrupt_from_user(void *regs0)
{
@ -1455,191 +1356,6 @@ void save_syscall_return_value(int num, unsigned long rc)
}
}
void
check_signal(unsigned long rc, void *regs0, int num)
{
__check_signal(rc, regs0, num, 0);
}
void
check_signal_irq_disabled(unsigned long rc, void *regs0, int num)
{
__check_signal(rc, regs0, num, 1);
}
static void
__check_signal(unsigned long rc, void *regs0, int num, int irq_disabled)
{
ihk_mc_user_context_t *regs = regs0;
struct thread *thread;
struct sig_pending *pending;
int irqstate;
if(clv == NULL)
return;
thread = cpu_local_var(current);
if(thread == NULL || thread->proc->pid == 0){
struct thread *t;
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(&(cpu_local_var(runq_lock)));
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
if(t->proc->pid <= 0)
continue;
if(t->status == PS_INTERRUPTIBLE &&
hassigpending(t)){
t->status = PS_RUNNING;
break;
}
}
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
cpu_restore_interrupt(irqstate);
goto out;
}
if(regs != NULL && !interrupt_from_user(regs)) {
goto out;
}
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
goto out;
}
for(;;){
/* When this function called from check_signal_irq_disabled,
* return with interrupt invalid.
* This is to eliminate signal loss.
*/
if (irq_disabled == 1) {
irqstate = cpu_disable_interrupt_save();
}
pending = getsigpending(thread, 1);
if(!pending) {
dkprintf("check_signal,queue is empty\n");
goto out;
}
if (irq_disabled == 1) {
cpu_restore_interrupt(irqstate);
}
if (do_signal(rc, regs, thread, pending, num)) {
num = -1;
}
}
out:
return;
}
static int
check_sig_pending_thread(struct thread *thread)
{
int found = 0;
struct list_head *head;
mcs_rwlock_lock_t *lock;
struct mcs_rwlock_node_irqsave mcs_rw_node;
struct sig_pending *next;
struct sig_pending *pending;
__sigset_t w;
__sigset_t x;
int sig = 0;
struct k_sigaction *k;
struct cpu_local_var *v;
v = get_this_cpu_local_var();
w = thread->sigmask.__val[0];
lock = &thread->sigcommon->lock;
head = &thread->sigcommon->sigpending;
for (;;) {
mcs_rwlock_reader_lock(lock, &mcs_rw_node);
list_for_each_entry_safe(pending, next, head, list) {
for (x = pending->sigmask.__val[0], sig = 0; x;
sig++, x >>= 1)
;
k = thread->sigcommon->action + sig - 1;
if ((sig != SIGCHLD &&
sig != SIGURG &&
sig != SIGCONT) ||
(k->sa.sa_handler != SIG_IGN &&
k->sa.sa_handler != NULL)) {
if (!(pending->sigmask.__val[0] & w)) {
if (pending->interrupted == 0) {
pending->interrupted = 1;
found = 1;
if (sig != SIGCHLD &&
sig != SIGURG &&
sig != SIGCONT &&
!k->sa.sa_handler) {
found = 2;
break;
}
}
}
}
}
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
if (found == 2) {
break;
}
if (lock == &thread->sigpendinglock) {
break;
}
lock = &thread->sigpendinglock;
head = &thread->sigpending;
}
if (found == 2) {
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
terminate_mcexec(0, sig);
return 1;
}
else if (found == 1) {
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
interrupt_syscall(thread, 0);
return 1;
}
return 0;
}
void
check_sig_pending(void)
{
struct thread *thread;
struct cpu_local_var *v;
if (clv == NULL)
return;
v = get_this_cpu_local_var();
repeat:
v->runq_irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
list_for_each_entry(thread, &(v->runq), sched_list) {
if (thread == NULL || thread == &cpu_local_var(idle)) {
continue;
}
if (thread->in_syscall_offload == 0) {
continue;
}
if (thread->proc->group_exit_status & 0x0000000100000000L) {
continue;
}
if (check_sig_pending_thread(thread))
goto repeat;
}
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
}
unsigned long
do_kill(struct thread * thread, int pid, int tid, int sig, siginfo_t *info, int ptracecont)
{
@ -2010,9 +1726,18 @@ SYSCALL_DECLARE(mmap)
/* check arguments */
pgsize = PAGE_SIZE;
#ifndef ENABLE_FUGAKU_HACKS
if (flags & MAP_HUGETLB) {
int hugeshift = flags & (0x3F << MAP_HUGE_SHIFT);
/* OpenMPI expects -EINVAL when trying to map
* /dev/shm/ file with MAP_SHARED | MAP_HUGETLB
*/
if (!(flags & MAP_ANONYMOUS)) {
error = -EINVAL;
goto out;
}
if (hugeshift == 0) {
/* default hugepage size */
flags |= ihk_mc_get_linux_default_huge_page_shift() <<
@ -2042,6 +1767,11 @@ SYSCALL_DECLARE(mmap)
goto out;
}
}
#else
if (flags & MAP_HUGETLB) {
flags &= ~(MAP_HUGETLB);
}
#endif
#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1))
addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR;

View File

@ -174,9 +174,14 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
arch_show_interrupt_context(regs);
#ifdef ENABLE_TOFU
info.si_signo = SIGSTOP;
info.si_errno = 0;
#else
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
#endif
info._sifields._sigfault.si_addr = (void*)regs->pc;
arm64_notify_die("Oops - bad mode", regs, &info, 0);

View File

@ -868,6 +868,49 @@ void show_context_stack(uintptr_t *rbp) {
return;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void interrupt_exit(struct x86_user_context *regs)
{
if (interrupt_from_user(regs)) {
@ -1137,6 +1180,17 @@ void cpu_halt(void)
asm volatile("hlt");
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
cpu_halt();
}
#endif
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@ -1521,6 +1575,16 @@ void arch_print_stack(void)
__print_stack(rbp, 0);
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
return regs->rip;
}
#endif
/*@
@ requires \valid(reg);
@ assigns \nothing;

View File

@ -451,4 +451,12 @@ extern unsigned long ap_trampoline;
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#ifdef ENABLE_FUGAKU_HACKS
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#endif /* !__ASSEMBLY__ */
#endif
#endif

View File

@ -53,5 +53,9 @@ struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);
#ifdef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#define KERNEL_STACK_SIZE LOCALS_SPAN
#endif
#endif

View File

@ -84,7 +84,11 @@ enum __rlimit_resource
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
__RLIMIT_NLIMITS = 15,
/* timeout for RT tasks in us */
__RLIMIT_RTTIME = 15,
#define RLIMIT_RTTIME __RLIMIT_RTTIME
__RLIMIT_NLIMITS = 16,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS

View File

@ -74,6 +74,7 @@ SYSCALL_DELEGATED(89, readlink)
SYSCALL_HANDLED(96, gettimeofday)
SYSCALL_HANDLED(97, getrlimit)
SYSCALL_HANDLED(98, getrusage)
SYSCALL_HANDLED(99, sysinfo)
SYSCALL_HANDLED(100, times)
SYSCALL_HANDLED(101, ptrace)
SYSCALL_HANDLED(102, getuid)
@ -157,6 +158,7 @@ SYSCALL_HANDLED(289, signalfd4)
#ifdef ENABLE_PERF
SYSCALL_HANDLED(298, perf_event_open)
#endif
SYSCALL_HANDLED(302, prlimit64)
#ifdef DCFA_KMOD
SYSCALL_HANDLED(303, mod_call)
#endif
@ -166,7 +168,7 @@ SYSCALL_HANDLED(311, process_vm_writev)
SYSCALL_HANDLED(322, execveat)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
SYSCALL_HANDLED(PROFILE_EVENT_MAX, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
@ -179,4 +181,8 @@ SYSCALL_HANDLED(802, linux_mlock)
SYSCALL_HANDLED(803, suspend_threads)
SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
/**** End of File ****/
/* Do not edit the lines including this comment and
* EOF just after it because those are used as a
* robust marker for the autotest patch.
*/

View File

@ -21,7 +21,9 @@
#include <registers.h>
#include <string.h>
#ifndef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#endif
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */

View File

@ -1651,6 +1651,14 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
if (memobj && ((memobj->flags & MF_PREMAP))) {
args.free_physical = 0;
}
if (vm->proc->straight_va &&
(void *)start == vm->proc->straight_va &&
(void *)end == (vm->proc->straight_va +
vm->proc->straight_len)) {
args.free_physical = 0;
}
args.memobj = memobj;
args.vm = vm;

View File

@ -147,44 +147,6 @@ int obtain_clone_cpuid(cpu_set_t *cpu_set, int use_last) {
return min_cpu;
}
int
arch_clear_host_user_space()
{
struct thread *th = cpu_local_var(current);
/* XXX: might be unnecessary */
clear_host_pte(th->vm->region.user_start,
(th->vm->region.user_end - th->vm->region.user_start), 0);
return 0;
}
SYSCALL_DECLARE(rt_sigaction)
{
int sig = ihk_mc_syscall_arg0(ctx);
const struct sigaction *act = (const struct sigaction *)ihk_mc_syscall_arg1(ctx);
struct sigaction *oact = (struct sigaction *)ihk_mc_syscall_arg2(ctx);
size_t sigsetsize = ihk_mc_syscall_arg3(ctx);
struct k_sigaction new_sa, old_sa;
int rc;
if (sigsetsize != sizeof(sigset_t))
return -EINVAL;
if(act)
if(copy_from_user(&new_sa.sa, act, sizeof new_sa.sa)){
goto fault;
}
rc = do_sigaction(sig, act? &new_sa: NULL, oact? &old_sa: NULL);
if(rc == 0 && oact)
if(copy_to_user(oact, &old_sa.sa, sizeof old_sa.sa)){
goto fault;
}
return rc;
fault:
return -EFAULT;
}
SYSCALL_DECLARE(prctl)
{
struct process *proc = cpu_local_var(current)->proc;
@ -1039,82 +1001,6 @@ out:
return restart;
}
static struct sig_pending *
getsigpending(struct thread *thread, int delflag){
struct list_head *head;
mcs_rwlock_lock_t *lock;
struct mcs_rwlock_node_irqsave mcs_rw_node;
struct sig_pending *next;
struct sig_pending *pending;
__sigset_t w;
__sigset_t x;
int sig;
struct k_sigaction *k;
w = thread->sigmask.__val[0];
lock = &thread->sigcommon->lock;
head = &thread->sigcommon->sigpending;
for(;;) {
if (delflag) {
mcs_rwlock_writer_lock(lock, &mcs_rw_node);
}
else {
mcs_rwlock_reader_lock(lock, &mcs_rw_node);
}
list_for_each_entry_safe(pending, next, head, list){
for(x = pending->sigmask.__val[0], sig = 0; x; sig++, x >>= 1);
k = thread->sigcommon->action + sig - 1;
if(delflag ||
(sig != SIGCHLD &&
sig != SIGURG &&
sig != SIGCONT) ||
(k->sa.sa_handler != (void *)1 &&
k->sa.sa_handler != NULL)){
if(!(pending->sigmask.__val[0] & w)){
if(delflag)
list_del(&pending->list);
if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
}
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
return pending;
}
}
}
if (delflag) {
mcs_rwlock_writer_unlock(lock, &mcs_rw_node);
}
else {
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
}
if(lock == &thread->sigpendinglock)
return NULL;
lock = &thread->sigpendinglock;
head = &thread->sigpending;
}
return NULL;
}
struct sig_pending *
hassigpending(struct thread *thread)
{
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
return NULL;
}
return getsigpending(thread, 0);
}
int
interrupt_from_user(void *regs0)
{
@ -1129,175 +1015,6 @@ void save_syscall_return_value(int num, unsigned long rc)
return;
}
/** \brief check arrived signals and processing
*
* @param rc return value of syscall
* @param regs0 context
* @param num syscall number (-1: Not called on exiting system call)
*/
void
check_signal(unsigned long rc, void *regs0, int num)
{
struct x86_user_context *regs = regs0;
struct thread *thread;
struct sig_pending *pending;
int irqstate;
if(clv == NULL)
return;
thread = cpu_local_var(current);
if(thread == NULL || thread == &cpu_local_var(idle)){
struct thread *t;
irqstate = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(&(cpu_local_var(runq_lock)));
list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){
if(t == &cpu_local_var(idle))
continue;
if(t->status == PS_INTERRUPTIBLE &&
hassigpending(t)){
t->status = PS_RUNNING;
break;
}
}
ihk_mc_spinlock_unlock_noirq(&(cpu_local_var(runq_lock)));
cpu_restore_interrupt(irqstate);
goto out;
}
if(regs != NULL && !interrupt_from_user(regs)) {
goto out;
}
if (list_empty(&thread->sigpending) &&
list_empty(&thread->sigcommon->sigpending)) {
goto out;
}
for(;;){
pending = getsigpending(thread, 1);
if(!pending) {
dkprintf("check_signal,queue is empty\n");
goto out;
}
if (do_signal(rc, regs, thread, pending, num)) {
num = -1;
}
}
out:
return;
}
static int
check_sig_pending_thread(struct thread *thread)
{
int found = 0;
struct list_head *head;
mcs_rwlock_lock_t *lock;
struct mcs_rwlock_node_irqsave mcs_rw_node;
struct sig_pending *next;
struct sig_pending *pending;
__sigset_t w;
__sigset_t x;
int sig = 0;
struct k_sigaction *k;
struct cpu_local_var *v;
v = get_this_cpu_local_var();
w = thread->sigmask.__val[0];
lock = &thread->sigcommon->lock;
head = &thread->sigcommon->sigpending;
for (;;) {
mcs_rwlock_reader_lock(lock, &mcs_rw_node);
list_for_each_entry_safe(pending, next, head, list){
for (x = pending->sigmask.__val[0], sig = 0; x;
sig++, x >>= 1);
k = thread->sigcommon->action + sig - 1;
if ((sig != SIGCHLD &&
sig != SIGURG &&
sig != SIGCONT) ||
(k->sa.sa_handler != (void *)1 &&
k->sa.sa_handler != NULL)) {
if (!(pending->sigmask.__val[0] & w)) {
if (pending->interrupted == 0) {
pending->interrupted = 1;
found = 1;
if (sig != SIGCHLD &&
sig != SIGURG &&
sig != SIGCONT &&
!k->sa.sa_handler) {
found = 2;
break;
}
}
}
}
}
mcs_rwlock_reader_unlock(lock, &mcs_rw_node);
if (found == 2) {
break;
}
if (lock == &thread->sigpendinglock) {
break;
}
lock = &thread->sigpendinglock;
head = &thread->sigpending;
}
if (found == 2) {
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
terminate_mcexec(0, sig);
return 1;
}
else if (found == 1) {
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
interrupt_syscall(thread, 0);
return 1;
}
return 0;
}
void
check_sig_pending(void)
{
struct thread *thread;
struct cpu_local_var *v;
if (clv == NULL)
return;
v = get_this_cpu_local_var();
repeat:
v->runq_irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
list_for_each_entry(thread, &(v->runq), sched_list) {
if (thread == NULL || thread == &cpu_local_var(idle)) {
continue;
}
if (thread->in_syscall_offload == 0) {
continue;
}
if (thread->proc->group_exit_status & 0x0000000100000000L) {
continue;
}
if (check_sig_pending_thread(thread))
goto repeat;
}
ihk_mc_spinlock_unlock(&v->runq_lock, v->runq_irqstate);
}
unsigned long
do_kill(struct thread *thread, int pid, int tid, int sig, siginfo_t *info,
int ptracecont)
@ -1713,6 +1430,14 @@ SYSCALL_DECLARE(mmap)
/* check arguments */
pgsize = PAGE_SIZE;
if (flags & MAP_HUGETLB) {
/* OpenMPI expects -EINVAL when trying to map
* /dev/shm/ file with MAP_SHARED | MAP_HUGETLB
*/
if (!(flags & MAP_ANONYMOUS)) {
error = -EINVAL;
goto out;
}
switch (flags & (0x3F << MAP_HUGE_SHIFT)) {
case 0:
/* default hugepage size */

View File

@ -30,6 +30,9 @@ endif ()
if (NOT "${LINUX_ARCH}" STREQUAL "${CMAKE_HOST_SYSTEM_PROCESSOR}")
string(REGEX REPLACE "ld$" "" CROSS_COMPILE "${CMAKE_LINKER}")
if (CMAKE_CROSSCOMPILING)
list(APPEND KBUILD_MAKE_FLAGS "QEMU_LD_PREFIX=${CMAKE_FIND_ROOT_PATH}")
endif()
list(APPEND KBUILD_MAKE_FLAGS "ARCH=${ARCH}")
list(APPEND KBUILD_MAKE_FLAGS "CROSS_COMPILE=${CROSS_COMPILE}")
endif()

29
docs/Makefile Normal file
View File

@ -0,0 +1,29 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
po:
sphinx-build -b gettext . _build/gettext
/opt/local/Library/Frameworks/Python.framework/Versions/3.8/bin/sphinx-intl update -p _build/gettext -l ja
ja:
sphinx-build -b html -D language=ja . _build/html/ja
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
echo ".. figure:: docs/mckernel-logo.png" > ../README.rst; echo >> ../README.rst
cat summary.rst >> ../README.rst; echo >> ../README.rst
cat doc.rst >> ../README.rst
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

446
docs/NEWS.rst Normal file
View File

@ -0,0 +1,446 @@
=============================================
Version 1.7.0 (Nov 25, 2020)
=============================================
----------------------
IHK major updates
----------------------
#. ihklib: add ihk_create_os_str
#. ihklib: ihk_reserve_mem: add capped best effort to balanced
------------------------
IHK major bug fixes
------------------------
#. make /dev/mcdN sharable
#. acpi: compat: RHEL-8.2
#. gic_chip_data: compat: RHEL-8.3
----------------------
McKernel major updates
----------------------
#. arm64: Contiguous PTE support
#. arm64: Scalable Vector Extension (SVE) support
#. arm64: PMU overflow interrupt support
#. arm64 port: Direct access to Mckernel memory from Linux
#. arm64 port: utility thread offloading, which spawns thread onto Linux CPU
#. eclair: support for live debug
#. Crash utility extension
#. Replace mcoverlayfs with a soft userspace overlay
#. Build system is switched to cmake
#. Core dump includes thread information
#. mcinspect and mcps: DWARF based LWK inspection
------------------------
McKernel major bug fixes
------------------------
#. shmobj: Fix rusage counting for large page
#. mcctrl control: task start_time changed to u64 nsec
#. mcctrl: add handling for one more level of page tables
#. Add kernel argument to turn on/off time sharing
#. flatten_string / process env: realign env and clear trailing bits
#. madvise: Add MADV_HUGEPAGE support
#. mcctrl: remove in-kernel calls to syscalls
#. arch_cpu_read_write_register: error return fix.
#. set_cputime(): interrupt enable/disable fix.
#. set_mempolicy(): Add mode check.
#. mbind(): Fix memory_range_lock deadlock.
#. ihk_ikc_recv: Record channel to packet for release
#. Add set_cputime() kernel to kernel case and mode enum.
#. execve: Call preempt_enable() before error-exit
#. memory/x86_64: fix linux safe_kernel_map
#. do_kill(): fix pids table when nr of threads is larger than num_processors
#. shmget: Use transparent huge pages when page size isn't specified
#. prctl: Add support for PR_SET_THP_DISABLE and PR_GET_THP_DISABLE
#. monitor_init: fix undetected hang on highest numbered core
#. init_process_stack: change premapped stack size based on arch
#. x86 syscalls: add a bunch of XXat() delegated syscalls
#. do_pageout: fix direct kernel-user access
#. stack: add hwcap auxval
#. perf counters: add arch-specific perf counters
#. Added check of nohost to terminate_host().
#. kmalloc: Fix address order in free list
#. sysfs: use nr_cpu_ids for cpumasks (fixes libnuma parsing error on ARM)
#. monitor_init: Use ihk_mc_cpu_info()
#. Fix ThunderX2 write-combined PTE flag insanity
#. ARM: eliminate zero page mapping (i.e, init_low_area())
#. eliminate futex_cmpxchg_enabled check (not used and dereffed a NULL pointer)
#. page_table: Fix return value of lookup_pte when ptl4 is blank
#. sysfs: add missing symlinks for cpu/node
#. Make Linux handler run when mmap to procfs.
#. Separate mmap area from program loading (relocation) area
#. move rusage into kernel ELF image (avoid dynamic alloc before NUMA init)
#. arm: turn off cpu on panic
#. page fault handler: protect thread accesses
#. Register PPD and release_handler at the same time.
#. fix to missing exclusive processing between terminate() and finalize_process().
#. perfctr_stop: add flags to no 'disable_intens'
#. fileobj, shmobj: free pages in object destructor (as opposed to page_unmap())
#. clear_range_l1, clear_range_middle: Fix handling contiguous PTE
#. do_mmap: don't pre-populate the whole file when asked for smaller segment
#. invalidate_one_page: Support shmobj and contiguous PTE
#. ubsan: fix undefined shifts
#. x86: disable zero mapping and add a boot pt for ap trampoline
#. rusage: Don't count PF_PATCH change
#. Fixed time processing.
#. copy_user_pte: vmap area not owned by McKernel
#. gencore: Zero-clear ELF header and memory range table
#. rpm: ignore CMakeCache.txt in dist and relax BuildRequires on cross build
#. gencore: Allocate ELF header to heap instead of stack
#. nanosleep: add cpu_pause() in spinwait loop
#. init_process: add missing initializations to proc struct
#. rus_vm_fault: always use a packet on the stack
#. process stack: use PAGE_SIZE in aux vector
#. copy_user_pte: base memobj copy on range & VR_PRIVATE
#. arm64: ptrace: Fix overwriting 1st argument with return value
#. page fault: use cow for private device mappings
#. reproductible builds: remove most install paths in c code
#. page fault: clear writable bit for non-dirtying access to shared ranges
#. mcreboot/mcstop+release: support for regular user execution
#. irqbalance_mck: replace extra service with service drop-in
#. do_mmap: give addr argument a chance even if not MAP_FIXED
#. x86: fix xchg() and cmpxchg() macros
#. IHK: support for using Linux work IRQ as IKC interrupt (optional)
#. MCS: fix ARM64 issue by using smp_XXX() functions (i.e., barrier()s)
#. procfs: add number of threads to stat and status
#. memory_range_lock: Fix deadlock in procfs/sysfs handler
#. flush instruction cache at context switch time if necessary
#. arm64: Fix PMU related functions
#. page_fault_process_memory_range: Disable COW for VM region with zeroobj
#. extend_process_region: Fall back to demand paging when not contiguous
#. munmap: fix deadlock with remote pagefault on vm range lock
#. procfs: if memory_range_lock fails, process later
#. migrate-cpu: Prevent migration target from calling schedule() twice
#. sched_request_migrate(): fix race condition between migration req and IRQs
#. get_one_cpu_topology: Renumber core_id (physical core id)
#. bb7e140 procfs cpuinfo: use sequence number as processor
#. set_host_vma(): do NOT read protect Linux VMA
#. hugefileobj: rewrite page allocation/handling
#. VM: use RW spinlock for vm_range_lock
#. /dev/shm: use Linux PFNs and populate mappings
#. Make struct ihk_os_rusage compatible with mckernel_rusage (workaround for Fugaku)
#. Record pthread routine address in clone(), keep helper threads on caller CPU core (workaround for Fugaku)
#. struct process: fix type of group_exit_status
#. tgkill: Fix argument validatation
#. set_robust_list: Add error check
#. mcexec: Don't forward SIGTSTP SIGTTIN SIGTTOUT to mckernel
#. syscall: add prlimit64
#. stack: grow on page fault
#. mcexec: use FLIB_NUM_PROCESS_ON_NODE when -n not specified (Fugaku specific)
===========================================
Version 1.6.0 (Nov 11, 2018)
===========================================
-----------------------------------------------
McKernel major updates
-----------------------------------------------
#. McKernel and Linux share one unified kernel virtual address space.
That is, McKernel sections resides in Linux sections spared for
modules. In this way, Linux can access the McKernel kernel memory area.
#. hugetlbfs support
#. IHK is now included as a git submodule
#. Debug messages are turned on/off in per souce file basis at run-time.
#. It's prohibited for McKernel to access physical memory ranges which Linux didn't give to McKernel.
#. UTI (capability to spawn a thread on Linux CPU) improvement:
* System calls issued from the thread are hooked by modifying binary in memory.
---------------------------
McKernel major bug fixes
---------------------------
#<digits> below denotes the redmine issue number (https://postpeta.pccluster.org/redmine/).
1. #926: shmget: Hide object with IPC_RMID from shmget
2. #1028: init_process: Inherit parent cpu_set
3. #995: Fix shebang recorded in argv[0]
4. #1024: Fix VMAP virtual address leak
5. #1109: init_process_stack: Support "ulimit -s unlimited"
6. x86 mem init: do not map identity mapping
7. mcexec_wait_syscall: requeue potential request on interrupted wait
8. mcctrl_ikc_send_wait: fix interrupt with do_frees == NULL
9. pager_req_read: handle short read
10. kprintf: only call eventfd() if it is safe to interrupt
11. process_procfs_request: Add Pid to /proc/<PID>/status
12. terminate: fix oversubscribe hang when waiting for other threads on same CPU to die
13. mcexec: Do not close fd returned to mckernel side
14. #976: execve: Clear sigaltstack and fp_regs
15. #1002: perf_event: Specify counter by bit_mask on start/stop
16. #1027: schedule: Don't reschedule immediately when wake up on migrate
17. #mcctrl: lookup unexported symbols at runtime
18. __sched_wakeup_thread: Notify interrupt_exit() of re-schedule
19. futex_wait_queue_me: Spin-sleep when timeout and idle_halt is specified
20. #1167: ihk_os_getperfevent,setperfevent: Timeout IKC sent by mcctrl
21. devobj: fix object size (POSTK_DEBUG_TEMP_FIX_36)
22. mcctrl: remove rus page cache
23. #1021: procfs: Support multiple reads of e.g. ``/proc/*/maps``
24. #1006: wait: Delay wake-up parent within switch context
25. #1164: mem: Check if phys-mem is within the range of McKernel memory
26. #1039: page_fault_process_memory_range: Remove ihk_mc_map_virtual for CoW of device map
27. partitioned execution: pass process rank to LWK
28. process/vm: implement access_ok()
29. spinlock: rewrite spinlock to use Linux ticket head/tail format
30. #986: Fix deadlock involving mmap_sem and memory_range_lock
31. Prevent one CPU from getting chosen by concurrent forks
32. #1009: check_signal: system call restart is done only once
33. #1176: syscall: the signal received during system call processing is not processed.
34. #1036 syscall_time: Handle by McKernel
35. #1165 do_syscall: Delegate system calls to the mcexec with the same pid
36. #1194 execve: Fix calling ptrace_report_signal after preemption is disabled
37. #1005 coredump: Exclude special areas
38. #1018 procfs: Fix pread/pwrite to procfs fail when specified size is bigger than 4MB
39. #1180 sched_setaffinity: Check migration after decrementing in_interrupt
40. #771, #1179, #1143 ptrace supports threads
41. #1189 procfs/do_fork: wait until procfs entries are registered
42. #1114 procfs: add '/proc/pid/stat' to mckernel side and fix its comm
43. #1116 mcctrl procfs: check entry was returned before using it
44. #1167 ihk_os_getperfevent,setperfevent: Return -ETIME when IKC timeouts
45. mcexec/execve: fix shebangs handling
46. procfs: handle 'comm' on mckernel side
47. ihk_os_setperfevent: Return number of registered events
48. mcexec: fix terminating zero after readlink()
===========================================
Version 1.5.1 (July 9, 2018)
===========================================
-----------------------------------------------
McKernel major updates
-----------------------------------------------
Watchdog timer to detect hang of McKernel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mcexec prints out the following line to its stderr when a hang of McKernel is detected.
::
mcexec detected hang of McKernel
The watchdog timer is enabled by passing -i <timeout_in_sec> option to mcreboot.sh. <timeout_in_sec> specifies the interval of checking if McKernel is alive.
For example, specify ``-i 600`` to detect the hang with 10 minutes interval:
::
mcreboot.sh -i 600
The detailed step of the hang detection is as follows.
#. mcexec acquires eventfd for notification from IHK and perform epoll() on it.
#. A daemon called ihkmond monitors the state of McKernel periodically with the interval specified by the -i option. It judges that McKernel is hanging and notifies mcexec by the eventfd if its state hasn't changed since the last check.
---------------------------
McKernel major bug fixes
---------------------------
1. #1146: pager_req_map(): do not take mmap_sem if not needed
2. #1135: prepare_process_ranges_args_envs(): fix saving cmdline
3. #1144: fileobj/devobj: record path name
4. #1145: fileobj: use MCS locks for per-file page hash
5. #1076: mcctrl: refactor prepare_image into new generic ikc send&wait
6. #1072: execve: fix execve with oversubscribing
7. #1132: execve: use thread variable instead of cpu_local_var(current)
8. #1117: mprotect: do not set page table writable for cow pages
9. #1143: syscall wait4: add _WALL (POSTK_DEBUG_ARCH_DEP_44)
10. #1064: rusage: Fix initialization of rusage->num_processors
11. #1133: pager_req_unmap: Put per-process data at exit
12. #731: do_fork: Propagate error code returned by mcexec
13. #1149: execve: Reinitialize vm_regions's map area on execve
14. #1065: procfs: Show file names in /proc/<PID>/maps
15. #1112: mremap: Fix type of size arguments (from ssize_t to size_t)
16. #1121: sched_getaffinity: Check arguments in the same order as in Linux
17. #1137: mmap, mremap: Check arguments in the same order as in Linux
18. #1122: fix return value of sched_getaffinity
19. #732: fix: /proc/<PID>/maps outputs a unnecessary NULL character
===================================
Version 1.5.0 (Apr 5, 2018)
===================================
--------------------------------------
McKernel major updates
--------------------------------------
1. Aid for Linux version migration: Detect /proc, /sys format change
between two kernel verions
2. Swap out
* Only swap-out anonymous pages for now
3. Improve support of /proc/maps
4. mcstat: Linux tool to show resource usage
---------------------------
McKernel major bug fixes
---------------------------
#. #727: execve: Fix memory leak when receiving SIGKILL
#. #829: perf_event_open: Support PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
#. #906: mcexec: Check return code of fork()
#. #1038: mcexec: Timeout when incorrect value is given to -n option
#. #943 #945 #946 #960 #961: mcexec: Support strace
#. #1029: struct thread is not released with stress-test involving signal and futex
#. #863 #870 : Respond immediately to terminating signal when offloading system call
#. #1119: translate_rva_to_rpa(): use 2MB blocks in 1GB pages on x86
#. #898: Shutdown OS only after no in-flight IKC exist
#. #882: release_handler: Destroy objects as the process which opened it
#. #882: mcexec: Make child process exit if the parent is killed during fork()
#. #925: XPMEM: Don't destroy per-process object of the parent
#. #885: ptrace: Support the case where a process attaches its child
#. #1031: sigaction: Support SA_RESETHAND
#. #923: rus_vm_fault: Return error when a thread not performing system call offloading causes remote page fault
#. #1032 #1033 #1034: getrusage: Fix ru_maxrss, RUSAGE_CHILDREN, ru_stime related bugs
#. #1120: getrusage: Fix deadlock on thread->times_update
#. #1123: Fix deadlock related to wait_queue_head_list_node
#. #1124: Fix deadlock of calling terminate() from terminate()
#. #1125: Fix deadlock related to thread status
* Related functions are: hold_thread(), do_kill() and terminate()
#. #1126: uti: Fix uti thread on the McKernel side blocks others in do_syscall()
#. #1066: procfs: Show Linux /proc/self/cgroup
#. #1127: prepare_process_ranges_args_envs(): fix generating saved_cmdline to avoid PF in strlen()
#. #1128: ihk_mc_map/unmap_virtual(): do proper TLB invalidation
#. #1043: terminate(): fix update_lock and threads_lock order to avoid deadlock
#. #1129: mcreboot.sh: Save ``/proc/irq/*/smp_affinity`` to ``/tmp/mcreboot``
#. #1130: mcexec: drop READ_IMPLIES_EXEC from personality
--------------------
McKernel workarounds
--------------------
#. Forbid CPU oversubscription
* It can be turned on by mcreboot.sh -O option
===================================
Version 1.4.0 (Oct 30, 2017)
===================================
-----------------------------------------------------------
Abstracted event type support in perf_event_open()
-----------------------------------------------------------
PERF_TYPE_HARDWARE and PERF_TYPE_CACHE types are supported.
----------------------------------
Direct user-space access
----------------------------------
Code lines using direct user-space access (e.g. passing user-space
pointer to memcpy()) becomes more portable across processor
architectures. The modification follows the following rules.
1. Move the code section as it is to the architecture dependent
directory if it is a part of the critical-path.
2. Otherwise, rewrite the code section by using the portable methods.
The methods include copy_from_user(), copy_to_user(),
pte_get_phys() and phys_to_virt().
--------------------------------
MPI and OpenMP micro-bench tests
--------------------------------
The performance figures of MPI and OpenMP primitives are compared with
those of Linux by using Intel MPI Benchmarks and EPCC OpenMP Micro
Benchmark.
===================================
Version 1.3.0 (Sep 30, 2017)
===================================
--------------------
Kernel dump
--------------------
#. A dump level of "only kernel memory" is added.
The following two levels are available now:
+--+-----------------------+
| 0|Dump all |
+--+-----------------------+
|24|Dump only kernel memory|
+--+-----------------------+
The dump level can be set by -d option in ihkosctl or the argument
for ihk_os_makedumpfile(), as shown in the following examples:
::
Command: ihkosctl 0 dump -d 24
Function call: ihk_os_makedumpfile(0, NULL, 24, 0);
#. Dump file is created when Linux panics.
The dump level can be set by dump_level kernel argument, as shown in the
following example:
::
ihkosctl 0 kargs "hidos dump_level=24"
The IHK dump function is registered to panic_notifier_list when creating /dev/mcdX and called when Linux panics.
-----------------------------
Quick Process Launch
-----------------------------
MPI process launch time and some of the initialization time can be
reduced in application consisting of multiple MPI programs which are
launched in turn in the job script.
The following two steps should be performed to use this feature:
#. Replace mpiexec with ql_mpiexec_start and add some lines for ql_mpiexec_finalize in the job script
#. Modify the app so that it can repeat calculations and wait for the instructions from ql_mpiexec_{start,finalize} at the end of the loop
The first step is explained using an example. Assume the original job script looks like this:
.. code-block:: none
/* Execute ensamble simulation and then data assimilation, and repeat this ten times */
for i in {1..10}; do
/* Each ensamble simulation execution uses 100 nodes, launch ten of them in parallel */
for j in {1..10}; do
mpiexec -n 100 -machinefile ./list1_$j p1.out a1 & pids[$i]=$!;
done
/* Wait until the ten ensamble simulation programs finish */
for j in {1..10}; do wait ${pids[$j]}; done
/* Launch one data assimilation program using 1000 nodes */
mpiexec -n 1000 -machinefile ./list2 p2.out a2
done
The job script should be modified like this:
.. code-block:: none
for i in {1..10}; do
for j in {1..10}; do
/* Replace mpiexec with ql_mpiexec_start */
ql_mpiexec_start -n 100 -machinefile ./list1_$j p1.out a1 & pids[$j]=$!;
done
for j in {1..10}; do wait ${pids[$j]}; done
ql_mpiexec_start -n 1000 -machinefile ./list2 p2.out a2
done
/* p1.out and p2.out don't exit but are waiting for the next calculation. So tell them to exit */
for j in {1..10}; do
ql_mpiexec_finalize -machinefile ./list1_$i p1.out a1;
done
ql_mpiexec_finalize -machinefile ./list2 p2.out a2;
The second step is explained using a pseudo-code.
.. code-block:: none
MPI_Init();
Prepare data exchange with preceding / following MPI programs
loop:
foreach Fortran module
Initialize data using command-line argments, parameter files, environment variables
Input data from preceding MPI programs / Read snap-shot
Perform main calculation
Output data to following MPI programs / Write snap-shot
/* ql_client() waits for command of ql_mpiexec_{start,finish} */
if (ql_client() == QL_CONTINUE) { goto loop; }
MPI_Finalize();
qlmpilib.h should be included in the code and libql{mpi,fort}.so should be linked to the executable file.

65
docs/archtecture.rst Normal file
View File

@ -0,0 +1,65 @@
Architectural Overview
======================
At the heart of the stack is a low-level software infrastructure called
Interface for Heterogeneous Kernels (IHK). IHK is a general framework
that provides capabilities for partitioning resources in a many-core
environment (e.g.,CPU cores and physical memory) and it enables
management of lightweight kernels. IHK can allocate and release host
resources dynamically and no reboot of the host machine is required when
altering configuration. IHK also provides a low-level inter-kernel
messaging infrastructure, called the Inter-Kernel Communication (IKC)
layer. An architectural overview of the main system components is shown
below.
.. figure:: mckernel.png
:alt: arch
McKernel is a lightweight kernel written from scratch. It is designed
for HPC and is booted from IHK. McKernel retains a binary compatible ABI
with Linux, however, it implements only a small set of performance
sensitive system calls and the rest are offloaded to Linux.
Specifically, McKernel has its own memory management, it supports
processes and multi-threading with a simple round-robin cooperative
(tick-less) scheduler, and it implements signaling. It also allows
inter-process memory mappings and it provides interfaces to hardware
performance counters.
Functionality
-------------
An overview of some of the principal functionalities of the IHK/McKernel
stack is provided below.
System Call Offloading
~~~~~~~~~~~~~~~~~~~~~~
System call forwarding in McKernel is implemented as follows. When an
offloaded system call occurs, McKernel marshals the system call number
along with its arguments and sends a message to Linux via a dedicated
IKC channel. The corresponding proxy process running on Linux is by
default waiting for system call requests through an ioctl() call into
IHKs system call delegator kernel module. The delegator kernel modules
IKC interrupt handler wakes up the proxy process, which returns to
userspace and simply invokes the requested system call. Once it obtains
the return value, it instructs the delegator module to send the result
back to McKernel, which subsequently passes the value to user-space.
Unified Address Space
~~~~~~~~~~~~~~~~~~~~~
The unified address space model in IHK/McKernel ensures that offloaded
system calls can seamlessly resolve arguments even in case of pointers.
This mechanism is depicted below and is implemented as follows.
.. figure:: unified_address_space_en.png
:alt: unified_ap
First, the proxy process is compiled as a position independent binary,
which enables us to map the code and data segments specific to the proxy
process to an address range which is explicitly excluded from McKernels
user space. The grey box on the right side of the figure demonstrates
the excluded region. Second, the entire valid virtual address range of
McKernels application user-space is covered by a special mapping in the
proxy process for which we use a pseudo file mapping in Linux. This
mapping is indicated by the blue box on the left side of the figure.

47
docs/background.rst Normal file
View File

@ -0,0 +1,47 @@
Background and Motivation
=========================
With the growing complexity of high-end supercomputers, the current
system software stack faces significant challenges as we move forward to
exascale and beyond. The necessity to deal with extreme degree of
parallelism, heterogeneous architectures, multiple levels of memory
hierarchy, power constraints, etc., advocates operating systems that can
rapidly adapt to new hardware requirements, and that can support novel
programming paradigms and runtime systems. On the other hand, a new
class of more dynamic and complex applications are also on the horizon,
with an increasing demand for application constructs such as in-situ
analysis, workflows, elaborate monitoring and performance tools. This
complexity relies not only on the rich features of POSIX, but also on
the Linux APIs (such as the */proc*, */sys* filesystems, etc.) in
particular.
Two Traditional HPC OS Approaches
---------------------------------
Traditionally, light-weight operating systems specialized for HPC
followed two approaches to tackle scalable execution of large-scale
applications. In the full weight kernel (FWK) approach, a full Linux
environment is taken as the basis, and features that inhibit attaining
HPC scalability are removed, i.e., making it light-weight. The pure
light-weight kernel (LWK) approach, on the other hand, starts from
scratch and effort is undertaken to add sufficient functionality so that
it provides a familiar API, typically something close to that of a
general purpose OS, while at the same time it retains the desired
scalability and reliability attributes. Neither of these approaches
yields a fully Linux compatible environment.
The Multi-kernel Approach
-------------------------
A hybrid approach recognized recently by the system software community
is to run Linux simultaneously with a lightweight kernel on compute
nodes and multiple research projects are now pursuing this direction.
The basic idea is that simulations run on an HPC tailored lightweight
kernel, ensuring the necessary isolation for noiseless execution of
parallel applications, but Linux is leveraged so that the full POSIX API
is supported. Additionally, the small code base of the LWK can also
facilitate rapid prototyping for new, exotic hardware features.
Nevertheless, the questions of how to share node resources between the
two types of kernels, where do device drivers execute, how exactly do
the two kernels interact with each other and to what extent are they
integrated, remain subjects of ongoing debate.

View File

@ -0,0 +1,60 @@
Boot McKernel
----------------
A boot script called ``mcreboot.sh`` is provided under ``sbin`` in the install
folder. To boot on logical CPU 1 with 512MB of memory, use the following
invocation:
::
export TOP=${HOME}/ihk+mckernel/
cd ${TOP}
sudo ./sbin/mcreboot.sh -c 1 -m 512m
You should see something similar like this if you display the McKernels
kernel message log:
.. code-block:: none
./sbin/ihkosctl 0 kmsg
IHK/McKernel started.
[ -1]: no_execute_available: 1
[ -1]: map_fixed: phys: 0xfee00000 => 0xffff860000009000 (1 pages)
[ -1]: setup_x86 done.
[ -1]: ns_per_tsc: 385
[ -1]: KCommand Line: hidos dump_level=24
[ -1]: Physical memory: 0x1ad3000 - 0x21000000, 525520896 bytes, 128301 pages available @ NUMA: 0
[ -1]: NUMA: 0, Linux NUMA: 0, type: 1, available bytes: 525520896, pages: 128301
[ -1]: NUMA 0 distances: 0 (10),
[ -1]: map_fixed: phys: 0x28000 => 0xffff86000000a000 (2 pages)
[ -1]: Trampoline area: 0x28000
[ -1]: map_fixed: phys: 0x0 => 0xffff86000000c000 (1 pages)
[ -1]: # of cpus : 1
[ -1]: locals = ffff880001af6000
[ 0]: BSP: 0 (HW ID: 1 @ NUMA 0)
[ 0]: BSP: booted 0 AP CPUs
[ 0]: Master channel init acked.
[ 0]: vdso is enabled
IHK/McKernel booted.
Run a simple program on McKernel
-----------------------------------
The mcexec command line tool (which is also the Linux proxy process) can
be used for executing applications on McKernel:
::
./bin/mcexec hostname
centos-vm
Shutdown McKernel
--------------------
Finally, to shutdown McKernel and release CPU/memory resources back to
Linux use the following command:
::
sudo ./sbin/mcstop+release.sh

173
docs/conf.py Normal file
View File

@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = u'IHK/McKernel'
copyright = u'2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa'
author = u'Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa'
# The short X.Y version
version = u''
# The full version, including alpha/beta/rc tags
release = u''
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'recommonmark',
]
numfig = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
source_suffix = ['.rst', '.md']
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'IHKMcKerneldoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'IHKMcKernel.tex', u'IHK/McKernel Documentation',
u'Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'ihkmckernel', u'IHK/McKernel Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'IHKMcKernel', u'IHK/McKernel Documentation',
author, 'IHKMcKernel', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

11
docs/contact.rst Normal file
View File

@ -0,0 +1,11 @@
Contact
=======
Please give your feedback to us via one of the following mailing lists.
Subscription via
`www.pccluster.org <http://www.pccluster.org/mailman/listinfo/mckernel-users>`__
is needed.
- English: mckernel-users@pccluster.org
- Japanese: mckernel-users-jp@pccluster.org

5
docs/cover.rst Normal file
View File

@ -0,0 +1,5 @@
.. include:: logo.rst
.. include:: summary.rst
See `Quick Guide -- Installation <quick.html#installation>`__ for jump start.

5
docs/doc.rst Normal file
View File

@ -0,0 +1,5 @@
Documentation
=============
Documentation is available
`here <https://ihkmckernel.readthedocs.io>`__.

18
docs/ihk_developers.rst Normal file
View File

@ -0,0 +1,18 @@
.. sectnum::
:suffix: .
:depth: 3
External Specs
==============
Overview
--------
Function Specs
--------------
Command / Daemon Specs
----------------------
Booting LWK
===========

58
docs/index.rst Normal file
View File

@ -0,0 +1,58 @@
.. IHK/McKernel documentation master file, created by
sphinx-quickstart on Mon Jul 27 15:57:24 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
===================
IHK/McKernel
===================
.. include:: cover.rst
.. toctree::
:maxdepth: 2
:caption: Quick Guide
quick
.. toctree::
:maxdepth: 2
:caption: Users' Guide
users
.. toctree::
:maxdepth: 2
:caption: Operators' Guide
operators
.. toctree::
:maxdepth: 2
:caption: IHK Developers' Guide
ihk_developers
.. toctree::
:maxdepth: 2
:caption: McKernel Developers' Guide
mckernel_developers
.. toctree::
:maxdepth: 2
:caption: IHK Specifications
spec/ihk
.. toctree::
:maxdepth: 2
:caption: McKernel Specifications
spec/mckernel
.. toctree::
:maxdepth: 2
:caption: What's New
NEWS

220
docs/install.rst Normal file
View File

@ -0,0 +1,220 @@
.. highlight:: bash
Installation
============
The following OS distributions and platforms are recommended:
* OS distribution
* CentOS 7.3 or later
* RHEL 7.3 or later
* Platform
* Intel Xeon
* Intel Xeon Phi
* Fujitsu A64FX
Prepare files for building McKernel
-----------------------------------
Grant read permission to the System.map file of your kernel version on the build machine:
::
sudo chmod a+r /boot/System.map-`uname -r`
Install the following packages to the build machine:
::
cmake kernel-devel binutils-devel systemd-devel numactl-devel gcc make nasm git libdwarf-devel
When having access to repositories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On RHEL 8, enable the CodeReady Linux Builder (CLB) repository:
::
sudo subscription-manager repos --enable codeready-builder-for-rhel-8-$(/bin/arch)-rpms
On CentOS 8, enable the PowerTools repository:
::
sudo dnf config-manager --set-enabled PowerTools
Install with yum:
::
sudo yum install cmake kernel-devel binutils-devel systemd-devel numactl-devel gcc make nasm git libdwarf-devel
When not having access to repositories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Ask the system administrator to install them. Note that ``libdwarf-devel`` is in the CodeReady Linux Builder repository on RHEL 8 or in the PowerTools repository on CentOS 8.
Clone, compile, install
--------------------------
Clone the source code:
::
mkdir -p ~/src/ihk+mckernel/
cd ~/src/ihk+mckernel/
git clone --recursive -b development https://github.com/RIKEN-SysSoft/mckernel.git
(Optional) Checkout to the specific branch or version:
::
cd mckernel
git checkout <pathspec>
git submodule update
Foe example, if you want to try the development branch, use
“development” as the pathspec. If you want to try the prerelease version
1.7.0-0.2, use “1.7.0-0.2”.
Move to build directory:
::
mkdir -p ~/src/ihk+mckernel/build && cd ~/src/ihk+mckernel/build
Run cmake:
When not cross-compiling:
~~~~~~~~~~~~~~~~~~~~~~~~~
::
cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel ../mckernel
When cross-compiling:
~~~~~~~~~~~~~~~~~~~~~
::
cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel \
-DUNAME_R=<target_uname_r> \
-DKERNEL_DIR=<kernnel_dir> \
-DBUILD_TARGET=smp-arm64 \
-DCMAKE_TOOLCHAIN_FILE=../mckernel/cmake/cross-aarch64.cmake \
../mckernel
Install with cmake
~~~~~~~~~~~~~~~~~~~~~~
Install with make:
::
make -j install
The kernel modules and McKernel kernel image should be installed
under the **ihk+mckernel** folder in your home directory.
Install with rpm
~~~~~~~~~~~~~~~~~~~~
Create the tarball and the spec file:
::
make dist
cp mckernel-<version>.tar.gz <rpmbuild>/SOURCES
(optional) Edit the following line in ``scripts/mckernel.spec`` to change
cmake options. For example:
::
%cmake -DCMAKE_BUILD_TYPE=Release \
-DUNAME_R=%{kernel_version} \
-DKERNEL_DIR=%{kernel_dir} \
%{?cmake_libdir:-DCMAKE_INSTALL_LIBDIR=%{cmake_libdir}} \
%{?build_target:-DBUILD_TARGET=%{build_target}} \
%{?toolchain_file:-DCMAKE_TOOLCHAIN_FILE=%{toolchain_file}} \
-DENABLE_TOFU=ON -DENABLE_FUGAKU_HACKS=ON \
-DENABLE_KRM_WORKAROUND=OFF -DWITH_KRM=ON \
-DENABLE_FUGAKU_DEBUG=OFF \
.
Create the rpm package:
When not cross-compiling:
"""""""""""""""""""""""""
Then build the rpm:
::
rpmbuild -ba scripts/mckernel.spec
When cross-compiling:
"""""""""""""""""""""
::
rpmbuild -ba scripts/mckernel.spec --target <target_uname_m> -D 'kernel_version <target_uname_r>' -D 'kernel_dir <kernel_source>'
Install the rpm package:
::
sudo rpm -ivh <rpmbuild>/RPMS/<arch>/mckernel-<version>-<release>_<linux_kernel_ver>_<dist>.<arch>.rpm
The kernel modules and McKernel kernel image are installed under the
standard system directories.
Prepare files and change settings for installing McKernel
---------------------------------------------------------
Disable SELinux of the compute nodes:
::
sudo vim /etc/selinux/config
Change the file to SELINUX=disabled. And then reboot the compute nodes:
::
sudo reboot
Install the following packages to the compute nodes:
::
systemd-libs numactl-libs libdwarf
When having access to repositories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On RHEL 8, enable the CodeReady Linux Builder (CLB) repository:
::
sudo subscription-manager repos --enable codeready-builder-for-rhel-8-$(/bin/arch)-rpms
On CentOS 8, enable the PowerTools repository:
::
sudo dnf config-manager --set-enabled PowerTools
Install with yum:
::
sudo yum install systemd-libs numactl-libs libdwarf
When not having access to repositories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Ask the system administrator to install them. Note that ``libdwarf`` is in the CodeReady Linux Builder repository on RHEL 8 or in the PowerTools repository on CentOS 8.

5
docs/license.rst Normal file
View File

@ -0,0 +1,5 @@
License
=======
McKernel is GPL licensed, as found in the LICENSE file.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,469 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../README_.rst:2
msgid "|McKernel Logo|"
msgstr ""
#: ../../README_.rst:4
msgid ""
"IHK/McKernel is a light-weight multi-kernel operating system designed for"
" high-end supercomputing. It runs Linux and McKernel, a light-weight "
"kernel (LWK), side-by-side inside compute nodes and aims at the "
"following:"
msgstr ""
#: ../../README_.rst:9
msgid ""
"Provide scalable and consistent execution of large-scale parallel "
"scientific applications, but at the same time maintain the ability to "
"rapidly adapt to new hardware features and emerging programming models"
msgstr ""
#: ../../README_.rst:13
msgid ""
"Provide efficient memory and device management so that resource "
"contention and data movement are minimized at the system level"
msgstr ""
#: ../../README_.rst:15
msgid ""
"Eliminate OS noise by isolating OS services in Linux and provide jitter "
"free execution on the LWK"
msgstr ""
#: ../../README_.rst:17
msgid ""
"Support the full POSIX/Linux APIs by selectively offloading (slow-path) "
"system calls to Linux"
msgstr ""
#: ../../README_.rst:21
msgid "Documentation"
msgstr ""
#: ../../README_.rst:23
msgid "Documentation is available `here <https://ihkmckernel.readthedocs.io>`__."
msgstr ""
#: ../../README_.rst:27
msgid "Contents"
msgstr ""
#: ../../README_.rst:29
msgid "`Background <#background-and-motivation>`__"
msgstr ""
#: ../../README_.rst:30
msgid "`Architectural Overview <#architectural-overview>`__"
msgstr ""
#: ../../README_.rst:31
msgid "`Installation <#installation>`__"
msgstr ""
#: ../../README_.rst:32
msgid "`The Team <#the-team>`__"
msgstr ""
#: ../../README_.rst:35
msgid "Background and Motivation"
msgstr ""
#: ../../README_.rst:37
msgid ""
"With the growing complexity of high-end supercomputers, the current "
"system software stack faces significant challenges as we move forward to "
"exascale and beyond. The necessity to deal with extreme degree of "
"parallelism, heterogeneous architectures, multiple levels of memory "
"hierarchy, power constraints, etc., advocates operating systems that can "
"rapidly adapt to new hardware requirements, and that can support novel "
"programming paradigms and runtime systems. On the other hand, a new class"
" of more dynamic and complex applications are also on the horizon, with "
"an increasing demand for application constructs such as in-situ analysis,"
" workflows, elaborate monitoring and performance tools. This complexity "
"relies not only on the rich features of POSIX, but also on the Linux APIs"
" (such as the */proc*, */sys* filesystems, etc.) in particular."
msgstr ""
#: ../../README_.rst:52
msgid "Two Traditional HPC OS Approaches"
msgstr ""
#: ../../README_.rst:54
msgid ""
"Traditionally, light-weight operating systems specialized for HPC "
"followed two approaches to tackle scalable execution of large-scale "
"applications. In the full weight kernel (FWK) approach, a full Linux "
"environment is taken as the basis, and features that inhibit attaining "
"HPC scalability are removed, i.e., making it light-weight. The pure "
"light-weight kernel (LWK) approach, on the other hand, starts from "
"scratch and effort is undertaken to add sufficient functionality so that "
"it provides a familiar API, typically something close to that of a "
"general purpose OS, while at the same time it retains the desired "
"scalability and reliability attributes. Neither of these approaches "
"yields a fully Linux compatible environment."
msgstr ""
#: ../../README_.rst:67
msgid "The Multi-kernel Approach"
msgstr ""
#: ../../README_.rst:69
msgid ""
"A hybrid approach recognized recently by the system software community is"
" to run Linux simultaneously with a lightweight kernel on compute nodes "
"and multiple research projects are now pursuing this direction. The basic"
" idea is that simulations run on an HPC tailored lightweight kernel, "
"ensuring the necessary isolation for noiseless execution of parallel "
"applications, but Linux is leveraged so that the full POSIX API is "
"supported. Additionally, the small code base of the LWK can also "
"facilitate rapid prototyping for new, exotic hardware features. "
"Nevertheless, the questions of how to share node resources between the "
"two types of kernels, where do device drivers execute, how exactly do the"
" two kernels interact with each other and to what extent are they "
"integrated, remain subjects of ongoing debate."
msgstr ""
#: ../../README_.rst:83
msgid "Architectural Overview"
msgstr ""
#: ../../README_.rst:85
msgid ""
"At the heart of the stack is a low-level software infrastructure called "
"Interface for Heterogeneous Kernels (IHK). IHK is a general framework "
"that provides capabilities for partitioning resources in a many-core "
"environment (e.g.,CPU cores and physical memory) and it enables "
"management of lightweight kernels. IHK can allocate and release host "
"resources dynamically and no reboot of the host machine is required when "
"altering configuration. IHK also provides a low-level inter-kernel "
"messaging infrastructure, called the Inter-Kernel Communication (IKC) "
"layer. An architectural overview of the main system components is shown "
"below."
msgstr ""
#: ../../README_.rst:99
msgid "arch"
msgstr ""
#: ../../README_.rst:101
msgid ""
"McKernel is a lightweight kernel written from scratch. It is designed for"
" HPC and is booted from IHK. McKernel retains a binary compatible ABI "
"with Linux, however, it implements only a small set of performance "
"sensitive system calls and the rest are offloaded to Linux. Specifically,"
" McKernel has its own memory management, it supports processes and multi-"
"threading with a simple round-robin cooperative (tick-less) scheduler, "
"and it implements signaling. It also allows inter-process memory mappings"
" and it provides interfaces to hardware performance counters."
msgstr ""
#: ../../README_.rst:112
msgid "Functionality"
msgstr ""
#: ../../README_.rst:114
msgid ""
"An overview of some of the principal functionalities of the IHK/McKernel "
"stack is provided below."
msgstr ""
#: ../../README_.rst:118
msgid "System Call Offloading"
msgstr ""
#: ../../README_.rst:120
msgid ""
"System call forwarding in McKernel is implemented as follows. When an "
"offloaded system call occurs, McKernel marshals the system call number "
"along with its arguments and sends a message to Linux via a dedicated IKC"
" channel. The corresponding proxy process running on Linux is by default "
"waiting for system call requests through an ioctl() call into IHKs "
"system call delegator kernel module. The delegator kernel modules IKC "
"interrupt handler wakes up the proxy process, which returns to userspace "
"and simply invokes the requested system call. Once it obtains the return "
"value, it instructs the delegator module to send the result back to "
"McKernel, which subsequently passes the value to user-space."
msgstr ""
#: ../../README_.rst:132
msgid "Unified Address Space"
msgstr ""
#: ../../README_.rst:134
msgid ""
"The unified address space model in IHK/McKernel ensures that offloaded "
"system calls can seamlessly resolve arguments even in case of pointers. "
"This mechanism is depicted below and is implemented as follows."
msgstr ""
#: ../../README_.rst:141
msgid "unified_ap"
msgstr ""
#: ../../README_.rst:143
msgid ""
"First, the proxy process is compiled as a position independent binary, "
"which enables us to map the code and data segments specific to the proxy "
"process to an address range which is explicitly excluded from McKernels "
"user space. The grey box on the right side of the figure demonstrates the"
" excluded region. Second, the entire valid virtual address range of "
"McKernels application user-space is covered by a special mapping in the "
"proxy process for which we use a pseudo file mapping in Linux. This "
"mapping is indicated by the blue box on the left side of the figure."
msgstr ""
#: ../../README_.rst:153
msgid "Installation"
msgstr ""
#: ../../README_.rst:155
msgid ""
"For a smooth experience, we recommend the following combination of OS "
"distributions and platforms:"
msgstr ""
#: ../../README_.rst:158
msgid "CentOS 7.3+ running on Intel Xeon, Xeon Phi, Fujitsu A64FX"
msgstr ""
#: ../../README_.rst:161
msgid "1. Change SELinux settings"
msgstr ""
#: ../../README_.rst:163
msgid "Log in as the root and disable SELinux:"
msgstr ""
#: ../../README_.rst:169
msgid "Change the file to SELINUX=disabled"
msgstr ""
#: ../../README_.rst:172
msgid "2. Reboot the host machine"
msgstr ""
#: ../../README_.rst:179
msgid "3. Prepare packages, kernel symbol table file"
msgstr ""
#: ../../README_.rst:181
msgid "You will need the following packages installed:"
msgstr ""
#: ../../README_.rst:187
msgid ""
"Note that to install libdwarf-devel to RHEL-8.2, you need to enable the "
"CodeReady Linux Builder (CLB) repository and the EPEL repository with the"
" following commands:"
msgstr ""
#: ../../README_.rst:195
msgid "Grant read permission to the System.map file of your kernel version:"
msgstr ""
#: ../../README_.rst:202
msgid "4. Obtain sources and compile the kernel"
msgstr ""
#: ../../README_.rst:204
msgid "Clone the source code:"
msgstr ""
#: ../../README_.rst:212
msgid "(Optional) Checkout to the specific branch or version:"
msgstr ""
#: ../../README_.rst:220
msgid ""
"Foe example, if you want to try the development branch, use “development”"
" as the pathspec. If you want to try the prerelease version 1.7.0-0.2, "
"use “1.7.0-0.2”."
msgstr ""
#: ../../README_.rst:225
msgid "4.1 Install with cmake"
msgstr ""
#: ../../README_.rst:227
msgid "Configure and compile:"
msgstr ""
#: ../../README_.rst:235
msgid ""
"The IHK kernel modules and McKernel kernel image should be installed "
"under the **ihk+mckernel** folder in your home directory."
msgstr ""
#: ../../README_.rst:239
msgid "4.2 Install with rpm"
msgstr ""
#: ../../README_.rst:241
msgid "Build rpm:"
msgstr ""
#: ../../README_.rst:252
msgid ""
"The IHK kernel modules and McKernel kernel image are installed under the "
"system directory."
msgstr ""
#: ../../README_.rst:256
msgid "5. Boot McKernel"
msgstr ""
#: ../../README_.rst:258
msgid ""
"A boot script called mcreboot.sh is provided under sbin in the install "
"folder. To boot on logical CPU 1 with 512MB of memory, use the following "
"invocation:"
msgstr ""
#: ../../README_.rst:268
msgid ""
"You should see something similar like this if you display the McKernels "
"kernel message log:"
msgstr ""
#: ../../README_.rst:296
msgid "6. Run a simple program on McKernel"
msgstr ""
#: ../../README_.rst:298
msgid ""
"The mcexec command line tool (which is also the Linux proxy process) can "
"be used for executing applications on McKernel:"
msgstr ""
#: ../../README_.rst:307
msgid "7. Shutdown McKernel"
msgstr ""
#: ../../README_.rst:309
msgid ""
"Finally, to shutdown McKernel and release CPU/memory resources back to "
"Linux use the following command:"
msgstr ""
#: ../../README_.rst:317
msgid "8. Advanced: Enable Utility Thread offloading Interface (UTI)"
msgstr ""
#: ../../README_.rst:319
msgid ""
"UTI enables a runtime such as MPI runtime to spawn utility threads such "
"as MPI asynchronous progress threads to Linux cores."
msgstr ""
#: ../../README_.rst:323
msgid "8.1 Install capstone"
msgstr ""
#: ../../README_.rst:325
msgid "Install EPEL capstone-devel:"
msgstr ""
#: ../../README_.rst:333
msgid "8.2 Install syscall_intercept"
msgstr ""
#: ../../README_.rst:342
msgid "8.3 Install UTI for McKernel"
msgstr ""
#: ../../README_.rst:344
msgid "Install:"
msgstr ""
#: ../../README_.rst:354
msgid "8.4 Install McKernel"
msgstr ""
#: ../../README_.rst:361
msgid "8.5 Run executable"
msgstr ""
#: ../../README_.rst:368
msgid "8.6 Install UTI for Linux for performance comparison"
msgstr ""
#: ../../README_.rst:370
msgid "Install by make:"
msgstr ""
#: ../../README_.rst:379
msgid "Install by rpm:"
msgstr ""
#: ../../README_.rst:391
msgid "The Team"
msgstr ""
#: ../../README_.rst:393
msgid ""
"The McKernel project was started at The University of Tokyo and currently"
" it is mainly developed at RIKEN. Some of our collaborators include:"
msgstr ""
#: ../../README_.rst:397
msgid "Hitachi"
msgstr ""
#: ../../README_.rst:398
msgid "Fujitsu"
msgstr ""
#: ../../README_.rst:399
msgid "CEA (France)"
msgstr ""
#: ../../README_.rst:400
msgid "NEC"
msgstr ""
#: ../../README_.rst:403
msgid "License"
msgstr ""
#: ../../README_.rst:405
msgid "McKernel is GPL licensed, as found in the LICENSE file."
msgstr ""
#: ../../README_.rst:408
msgid "Contact"
msgstr ""
#: ../../README_.rst:410
msgid ""
"Please give your feedback to us via one of the following mailing lists. "
"Subscription via `www.pccluster.org "
"<http://www.pccluster.org/mailman/listinfo/mckernel-users>`__ is needed."
msgstr ""
#: ../../README_.rst:415
msgid "English: mckernel-users@pccluster.org"
msgstr ""
#: ../../README_.rst:416
msgid "Japanese: mckernel-users-jp@pccluster.org"
msgstr ""

View File

@ -0,0 +1,101 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../archtecture.rst:2
msgid "Architectural Overview"
msgstr ""
#: ../../archtecture.rst:4
msgid ""
"At the heart of the stack is a low-level software infrastructure called "
"Interface for Heterogeneous Kernels (IHK). IHK is a general framework "
"that provides capabilities for partitioning resources in a many-core "
"environment (e.g.,CPU cores and physical memory) and it enables "
"management of lightweight kernels. IHK can allocate and release host "
"resources dynamically and no reboot of the host machine is required when "
"altering configuration. IHK also provides a low-level inter-kernel "
"messaging infrastructure, called the Inter-Kernel Communication (IKC) "
"layer. An architectural overview of the main system components is shown "
"below."
msgstr ""
#: ../../archtecture.rst:18
msgid ""
"McKernel is a lightweight kernel written from scratch. It is designed for"
" HPC and is booted from IHK. McKernel retains a binary compatible ABI "
"with Linux, however, it implements only a small set of performance "
"sensitive system calls and the rest are offloaded to Linux. Specifically,"
" McKernel has its own memory management, it supports processes and multi-"
"threading with a simple round-robin cooperative (tick-less) scheduler, "
"and it implements signaling. It also allows inter-process memory mappings"
" and it provides interfaces to hardware performance counters."
msgstr ""
#: ../../archtecture.rst:29
msgid "Functionality"
msgstr ""
#: ../../archtecture.rst:31
msgid ""
"An overview of some of the principal functionalities of the IHK/McKernel "
"stack is provided below."
msgstr ""
#: ../../archtecture.rst:35
msgid "System Call Offloading"
msgstr ""
#: ../../archtecture.rst:37
msgid ""
"System call forwarding in McKernel is implemented as follows. When an "
"offloaded system call occurs, McKernel marshals the system call number "
"along with its arguments and sends a message to Linux via a dedicated IKC"
" channel. The corresponding proxy process running on Linux is by default "
"waiting for system call requests through an ioctl() call into IHKs "
"system call delegator kernel module. The delegator kernel modules IKC "
"interrupt handler wakes up the proxy process, which returns to userspace "
"and simply invokes the requested system call. Once it obtains the return "
"value, it instructs the delegator module to send the result back to "
"McKernel, which subsequently passes the value to user-space."
msgstr ""
#: ../../archtecture.rst:49
msgid "Unified Address Space"
msgstr ""
#: ../../archtecture.rst:51
msgid ""
"The unified address space model in IHK/McKernel ensures that offloaded "
"system calls can seamlessly resolve arguments even in case of pointers. "
"This mechanism is depicted below and is implemented as follows."
msgstr ""
#: ../../archtecture.rst:58
msgid ""
"First, the proxy process is compiled as a position independent binary, "
"which enables us to map the code and data segments specific to the proxy "
"process to an address range which is explicitly excluded from McKernels "
"user space. The grey box on the right side of the figure demonstrates the"
" excluded region. Second, the entire valid virtual address range of "
"McKernels application user-space is covered by a special mapping in the "
"proxy process for which we use a pseudo file mapping in Linux. This "
"mapping is indicated by the blue box on the left side of the figure."
msgstr ""

View File

@ -0,0 +1,79 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../background.rst:2
msgid "Background and Motivation"
msgstr ""
#: ../../background.rst:4
msgid ""
"With the growing complexity of high-end supercomputers, the current "
"system software stack faces significant challenges as we move forward to "
"exascale and beyond. The necessity to deal with extreme degree of "
"parallelism, heterogeneous architectures, multiple levels of memory "
"hierarchy, power constraints, etc., advocates operating systems that can "
"rapidly adapt to new hardware requirements, and that can support novel "
"programming paradigms and runtime systems. On the other hand, a new class"
" of more dynamic and complex applications are also on the horizon, with "
"an increasing demand for application constructs such as in-situ analysis,"
" workflows, elaborate monitoring and performance tools. This complexity "
"relies not only on the rich features of POSIX, but also on the Linux APIs"
" (such as the */proc*, */sys* filesystems, etc.) in particular."
msgstr ""
#: ../../background.rst:19
msgid "Two Traditional HPC OS Approaches"
msgstr ""
#: ../../background.rst:21
msgid ""
"Traditionally, light-weight operating systems specialized for HPC "
"followed two approaches to tackle scalable execution of large-scale "
"applications. In the full weight kernel (FWK) approach, a full Linux "
"environment is taken as the basis, and features that inhibit attaining "
"HPC scalability are removed, i.e., making it light-weight. The pure "
"light-weight kernel (LWK) approach, on the other hand, starts from "
"scratch and effort is undertaken to add sufficient functionality so that "
"it provides a familiar API, typically something close to that of a "
"general purpose OS, while at the same time it retains the desired "
"scalability and reliability attributes. Neither of these approaches "
"yields a fully Linux compatible environment."
msgstr ""
#: ../../background.rst:34
msgid "The Multi-kernel Approach"
msgstr ""
#: ../../background.rst:36
msgid ""
"A hybrid approach recognized recently by the system software community is"
" to run Linux simultaneously with a lightweight kernel on compute nodes "
"and multiple research projects are now pursuing this direction. The basic"
" idea is that simulations run on an HPC tailored lightweight kernel, "
"ensuring the necessary isolation for noiseless execution of parallel "
"applications, but Linux is leveraged so that the full POSIX API is "
"supported. Additionally, the small code base of the LWK can also "
"facilitate rapid prototyping for new, exotic hardware features. "
"Nevertheless, the questions of how to share node resources between the "
"two types of kernels, where do device drivers execute, how exactly do the"
" two kernels interact with each other and to what extent are they "
"integrated, remain subjects of ongoing debate."
msgstr ""

View File

@ -0,0 +1,57 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../boot_run_shutdown.rst:2
msgid "Boot McKernel"
msgstr ""
#: ../../boot_run_shutdown.rst:4
msgid ""
"A boot script called ``mcreboot.sh`` is provided under ``sbin`` in the "
"install folder. To boot on logical CPU 1 with 512MB of memory, use the "
"following invocation:"
msgstr ""
#: ../../boot_run_shutdown.rst:14
msgid ""
"You should see something similar like this if you display the McKernels "
"kernel message log:"
msgstr ""
#: ../../boot_run_shutdown.rst:42
msgid "Run a simple program on McKernel"
msgstr ""
#: ../../boot_run_shutdown.rst:44
msgid ""
"The mcexec command line tool (which is also the Linux proxy process) can "
"be used for executing applications on McKernel:"
msgstr ""
#: ../../boot_run_shutdown.rst:53
msgid "Shutdown McKernel"
msgstr ""
#: ../../boot_run_shutdown.rst:55
msgid ""
"Finally, to shutdown McKernel and release CPU/memory resources back to "
"Linux use the following command:"
msgstr ""

View File

@ -0,0 +1,39 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../contact.rst:2
msgid "Contact"
msgstr ""
#: ../../contact.rst:4
msgid ""
"Please give your feedback to us via one of the following mailing lists. "
"Subscription via `www.pccluster.org "
"<http://www.pccluster.org/mailman/listinfo/mckernel-users>`__ is needed."
msgstr ""
#: ../../contact.rst:9
msgid "English: mckernel-users@pccluster.org"
msgstr ""
#: ../../contact.rst:10
msgid "Japanese: mckernel-users-jp@pccluster.org"
msgstr ""

View File

@ -0,0 +1,59 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../summary.rst:1
msgid ""
"IHK/McKernel is a light-weight multi-kernel operating system designed for"
" high-end supercomputing. It runs Linux and McKernel, a light-weight "
"kernel (LWK), side-by-side inside compute nodes and aims at the "
"following:"
msgstr ""
#: ../../summary.rst:6
msgid ""
"Provide scalable and consistent execution of large-scale parallel "
"scientific applications, but at the same time maintain the ability to "
"rapidly adapt to new hardware features and emerging programming models"
msgstr ""
#: ../../summary.rst:10
msgid ""
"Provide efficient memory and device management so that resource "
"contention and data movement are minimized at the system level"
msgstr ""
#: ../../summary.rst:12
msgid ""
"Eliminate OS noise by isolating OS services in Linux and provide jitter "
"free execution on the LWK"
msgstr ""
#: ../../summary.rst:14
msgid ""
"Support the full POSIX/Linux APIs by selectively offloading (slow-path) "
"system calls to Linux"
msgstr ""
#: ../../cover.rst:5
msgid ""
"See `Quick Guide -- Installation <quick.html#installation>`__ for jump "
"start."
msgstr ""

View File

@ -0,0 +1,28 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../doc.rst:2
msgid "Documentation"
msgstr ""
#: ../../doc.rst:4
msgid "Documentation is available `here <https://ihkmckernel.readthedocs.io>`__."
msgstr ""

View File

@ -0,0 +1,40 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../ihk_developers.rst:6
msgid "External Specs"
msgstr ""
#: ../../ihk_developers.rst:9
msgid "Overview"
msgstr ""
#: ../../ihk_developers.rst:12
msgid "Function Specs"
msgstr ""
#: ../../ihk_developers.rst:15
msgid "Command / Daemon Specs"
msgstr ""
#: ../../ihk_developers.rst:18
msgid "Booting LWK"
msgstr ""

View File

@ -0,0 +1,95 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-07 10:00+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../index.rst:12
msgid "Quick Guide"
msgstr "クイックガイド"
#: ../../index.rst:18
msgid "Users' Guide"
msgstr "ユーザガイド"
#: ../../index.rst:24
msgid "Operators' Guide"
msgstr "運用ガイド"
#: ../../index.rst:30
msgid "IHK Developers' Guide"
msgstr ""
#: ../../index.rst:36
msgid "McKernel Developers' Guide"
msgstr ""
#: ../../index.rst:42
msgid "IHK Specifications"
msgstr ""
#: ../../index.rst:48
msgid "McKernel Specifications"
msgstr ""
#: ../../index.rst:54
msgid "What's New"
msgstr ""
#: ../../index.rst:8
msgid "IHK/McKernel"
msgstr ""
#: ../../summary.rst:1
msgid ""
"IHK/McKernel is a light-weight multi-kernel operating system designed for"
" high-end supercomputing. It runs Linux and McKernel, a light-weight "
"kernel (LWK), side-by-side inside compute nodes and aims at the "
"following:"
msgstr ""
#: ../../summary.rst:6
msgid ""
"Provide scalable and consistent execution of large-scale parallel "
"scientific applications, but at the same time maintain the ability to "
"rapidly adapt to new hardware features and emerging programming models"
msgstr ""
#: ../../summary.rst:10
msgid ""
"Provide efficient memory and device management so that resource "
"contention and data movement are minimized at the system level"
msgstr ""
#: ../../summary.rst:12
msgid ""
"Eliminate OS noise by isolating OS services in Linux and provide jitter "
"free execution on the LWK"
msgstr ""
#: ../../summary.rst:14
msgid ""
"Support the full POSIX/Linux APIs by selectively offloading (slow-path) "
"system calls to Linux"
msgstr ""
#: ../../cover.rst:5
msgid ""
"See `Quick Guide -- Installation <quick.html#installation>`__ for jump "
"start."
msgstr ""

View File

@ -0,0 +1,191 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../install.rst:4
msgid "Installation"
msgstr "インストール"
#: ../../install.rst:6
msgid "The following OS distributions and platforms are recommended:"
msgstr "推奨OSディストリビューションとプロセッサは以下の通り。"
#: ../../install.rst:8
msgid "OS distribution"
msgstr ""
#: ../../install.rst:10
msgid "CentOS 7.3 or later"
msgstr ""
#: ../../install.rst:11
msgid "RHEL 7.3 or later"
msgstr ""
#: ../../install.rst:13
msgid "Platform"
msgstr ""
#: ../../install.rst:15
msgid "Intel Xeon"
msgstr ""
#: ../../install.rst:16
msgid "Intel Xeon Phi"
msgstr ""
#: ../../install.rst:17
msgid "Fujitsu A64FX"
msgstr ""
#: ../../install.rst:20
msgid "Prepare files for building McKernel"
msgstr ""
#: ../../install.rst:22
msgid ""
"Grant read permission to the System.map file of your kernel version on "
"the build machine:"
msgstr ""
#: ../../install.rst:28
msgid "Install the following packages to the build machine:"
msgstr ""
#: ../../install.rst:35 ../../install.rst:179
msgid "When having access to repositories"
msgstr ""
#: ../../install.rst:37 ../../install.rst:181
msgid "On RHEL 8, enable the CodeReady Linux Builder (CLB) repository:"
msgstr ""
#: ../../install.rst:43 ../../install.rst:187
msgid "On CentOS 8, enable the PowerTools repository:"
msgstr ""
#: ../../install.rst:49 ../../install.rst:193
msgid "Install with yum:"
msgstr ""
#: ../../install.rst:56 ../../install.rst:200
msgid "When not having access to repositories"
msgstr ""
#: ../../install.rst:58
msgid ""
"Ask the system administrator to install them. Note that ``libdwarf-"
"devel`` is in the CodeReady Linux Builder repository on RHEL 8 or in the "
"PowerTools repository on CentOS 8."
msgstr ""
#: ../../install.rst:61
msgid "Clone, compile, install"
msgstr ""
#: ../../install.rst:63
msgid "Clone the source code:"
msgstr ""
#: ../../install.rst:71
msgid "(Optional) Checkout to the specific branch or version:"
msgstr ""
#: ../../install.rst:79
msgid ""
"Foe example, if you want to try the development branch, use “development”"
" as the pathspec. If you want to try the prerelease version 1.7.0-0.2, "
"use “1.7.0-0.2”."
msgstr ""
#: ../../install.rst:83
msgid "Move to build directory:"
msgstr ""
#: ../../install.rst:89
msgid "Run cmake:"
msgstr ""
#: ../../install.rst:92 ../../install.rst:135
msgid "When not cross-compiling:"
msgstr ""
#: ../../install.rst:99 ../../install.rst:142
msgid "When cross-compiling:"
msgstr ""
#: ../../install.rst:111
msgid "Install with cmake"
msgstr ""
#: ../../install.rst:113
msgid "Install with make:"
msgstr ""
#: ../../install.rst:119
msgid ""
"The kernel modules and McKernel kernel image should be installed under "
"the **ihk+mckernel** folder in your home directory."
msgstr ""
#: ../../install.rst:123
msgid "Install with rpm"
msgstr ""
#: ../../install.rst:125
msgid "Create the tarball and the spec file:"
msgstr ""
#: ../../install.rst:132
msgid "Create the rpm package:"
msgstr ""
#: ../../install.rst:148
msgid "Install the rpm package:"
msgstr ""
#: ../../install.rst:154
msgid ""
"The kernel modules and McKernel kernel image are installed under the "
"standard system directories."
msgstr ""
#: ../../install.rst:158
msgid "Prepare files and change settings for installing McKernel"
msgstr ""
#: ../../install.rst:160
msgid "Disable SELinux of the compute nodes:"
msgstr ""
#: ../../install.rst:166
msgid "Change the file to SELINUX=disabled. And then reboot the compute nodes:"
msgstr ""
#: ../../install.rst:172
msgid "Install the following packages to the compute nodes:"
msgstr ""
#: ../../install.rst:202
msgid ""
"Ask the system administrator to install them. Note that ``libdwarf`` is "
"in the CodeReady Linux Builder repository on RHEL 8 or in the PowerTools "
"repository on CentOS 8."
msgstr ""

View File

@ -0,0 +1,28 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../license.rst:2
msgid "License"
msgstr ""
#: ../../license.rst:4
msgid "McKernel is GPL licensed, as found in the LICENSE file."
msgstr ""

View File

@ -0,0 +1,20 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"

View File

@ -0,0 +1,28 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../mckernel_developers.rst:6
msgid "Interfaces"
msgstr ""
#: ../../mckernel_developers.rst:9
msgid "Interface details"
msgstr ""

View File

@ -0,0 +1,886 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-07 10:26+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../operators.rst:5
#, fuzzy
msgid "This document will explain how to operate system with McKernel."
msgstr "McKernelを用いたシステムを運用するシステム管理者を対象として、運用手順を説明する。"
#: ../../operators.rst:8
msgid "Installation"
msgstr "インストール"
#: ../../operators.rst:10
msgid "See `Quick Guide -- Installation <quick.html#installation>`__."
msgstr "`クイックガイド ― インストール <quick.html#installation>`__ に記載する。"
#: ../../uti.rst:2
msgid "Advanced: Enable Utility Thread offloading Interface (UTI)"
msgstr ""
#: ../../uti.rst:4
msgid ""
"UTI enables a runtime such as MPI runtime to spawn utility threads such "
"as MPI asynchronous progress threads to Linux cores."
msgstr ""
#: ../../uti.rst:8
msgid "Install capstone"
msgstr ""
#: ../../uti.rst:11 ../../uti.rst:22
msgid "When compute nodes don't have access to repositories"
msgstr ""
#: ../../uti.rst:13
msgid "Install EPEL capstone-devel:"
msgstr ""
#: ../../uti.rst:24
msgid ""
"Ask the system administrator to install ``capstone-devel``. Note that it "
"is in the EPEL repository."
msgstr ""
#: ../../uti.rst:28
msgid "Install syscall_intercept"
msgstr ""
#: ../../uti.rst:37
msgid "Install UTI for McKernel"
msgstr ""
#: ../../uti.rst:39
msgid "Install:"
msgstr ""
#: ../../uti.rst:49
msgid "Install McKernel"
msgstr ""
#: ../../uti.rst:51
msgid "Add ``-DENABLE_UTI=ON`` option to ``cmake``:"
msgstr ""
#: ../../uti.rst:58
msgid "Run programs"
msgstr ""
#: ../../uti.rst:60
msgid "Add ``--enable-uti`` option to ``mcexec``:"
msgstr ""
#: ../../uti.rst:67
msgid "Install UTI for Linux"
msgstr ""
#: ../../uti.rst:69
msgid ""
"You should skip this step if it's already installed as with, for example,"
" Fujitsu Technical Computing Suite."
msgstr ""
#: ../../uti.rst:72
msgid "Install by make"
msgstr ""
#: ../../uti.rst:82
msgid "Install by rpm"
msgstr ""
#: ../../operators.rst:15
msgid "Boot and Shut-down"
msgstr "起動停止"
#: ../../operators.rst:18
msgid "Related files"
msgstr "関連ファイル"
#: ../../operators.rst:20
msgid ""
"In the followings, the install directory of IHK/McKernel is shown as "
"``<install>`` . The kernel modules and their locations are as follows."
msgstr "以降、IHK/McKernelのインストールディレクトリを<install>とする。SMPプロセッサ向け、x86_64アーキ向けの関連ファイルの場所は以下の通り。"
#: ../../operators.rst:24
msgid "<install>/kmod/ihk.ko"
msgstr ""
#: ../../operators.rst:24
msgid "IHK-master core"
msgstr ""
#: ../../operators.rst:26
msgid "|ihk-smp|"
msgstr ""
#: ../../operators.rst:26
msgid "IHK-master driver"
msgstr ""
#: ../../operators.rst:29
msgid "|mcctrl|"
msgstr ""
#: ../../operators.rst:29
msgid "Delegator module"
msgstr ""
#: ../../operators.rst:32
msgid "|mckernel.img|"
msgstr ""
#: ../../operators.rst:32
msgid "Kernel Image"
msgstr "カーネルイメージ"
#: ../../operators.rst:40
msgid "The commands and daemons for operation and their locations are as follows."
msgstr "運用向けコマンド・デーモンのファイルの場所は以下の通り。"
#: ../../operators.rst:43
msgid "|mcreboot|"
msgstr ""
#: ../../operators.rst:43
msgid "Boot script"
msgstr "起動スクリプト"
#: ../../operators.rst:46
msgid "|mcstop|"
msgstr ""
#: ../../operators.rst:46
msgid "Shutdown script"
msgstr "シャットダウンスクリプト"
#: ../../operators.rst:49
msgid "<install>/bin/mcexec"
msgstr ""
#: ../../operators.rst:49
msgid "Process invocation command"
msgstr "プロセス起動コマンド"
#: ../../operators.rst:51
msgid "<install>/bin/eclair"
msgstr ""
#: ../../operators.rst:51
msgid "Kernel dump analysis tool"
msgstr "カーネルダンプ解析ツール"
#: ../../operators.rst:53
msgid "|vmcore2mckdump|"
msgstr ""
#: ../../operators.rst:53
msgid "Kernel dump format conversion tool"
msgstr "カーネルダンプ形式変換ツール"
#: ../../operators.rst:60
msgid "以下、関連コマンドおよび関連関数のインターフェイスを説明する。"
msgstr ""
#: ../../operators.rst:63
msgid "インターフェイス"
msgstr ""
#: ../../operators.rst:66
msgid "カーネル引数"
msgstr ""
#: ../../operators.rst:68
msgid "McKernelのカーネル引数を表 :numref:`tab-kargs` に示す。"
msgstr ""
#: ../../operators.rst:72
msgid "McKernelのカーネル引数"
msgstr ""
#: ../../operators.rst:75
msgid "hidos"
msgstr ""
#: ../../operators.rst:75
msgid "IKCを有効にする。"
msgstr ""
#: ../../operators.rst:77
msgid "|dlv|"
msgstr ""
#: ../../operators.rst
msgid "Linuxのpanicハンドラ経由でダンプを行った場合の、ダ"
msgstr ""
#: ../../operators.rst
msgid "ンプ対象とするメモリ領域の種類を<level>に設定する。"
msgstr ""
#: ../../operators.rst
msgid "設定可能な値は以下の通り。"
msgstr ""
#: ../../operators.rst:83 ../../operators.rst:164 ../../operators.rst:196
#: ../../operators.rst:234
msgid "0"
msgstr ""
#: ../../operators.rst:84 ../../operators.rst:165
msgid "IHKがMcKernelに割り当てたメモリ領域を出力する"
msgstr ""
#: ../../operators.rst:85 ../../operators.rst:166
msgid "24"
msgstr ""
#: ../../operators.rst:86 ../../operators.rst:167
msgid "カーネルが使用しているメモリ領域を出力する"
msgstr ""
#: ../../operators.rst:88 ../../operators.rst:169
msgid "指定がなかった場合は24が用いられる。"
msgstr ""
#: ../../operators.rst:90
msgid "|allow|"
msgstr ""
#: ../../operators.rst
msgid "McKernelに割り当てられたCPU数より大きい数のスレッド"
msgstr ""
#: ../../operators.rst
msgid "またはプロセスの生成を許可する。この引数が指定され"
msgstr ""
#: ../../operators.rst
msgid "ない場合に、CPU数より大きい数のスレッドまたはプロセ"
msgstr ""
#: ../../operators.rst
msgid "スをclone(), fork(), vfork()などで生成しようとする"
msgstr ""
#: ../../operators.rst
msgid "と、当該システムコールがEINVALエラーを返す。"
msgstr ""
#: ../../operators.rst:102
msgid "ブートスクリプト"
msgstr "ブートスクリプト"
#: ../../operators.rst:105 ../../operators.rst:207
msgid "書式"
msgstr ""
#: ../../operators.rst:113 ../../operators.rst:217
msgid "オプション"
msgstr ""
#: ../../operators.rst:116
msgid "|opt-c|"
msgstr ""
#: ../../operators.rst
msgid "McKernelに割り当てるCPUのリストを指定する。フォー"
msgstr ""
#: ../../operators.rst
msgid "マットは以下の通り。"
msgstr ""
#: ../../operators.rst
msgid "<CPU logical id>,<CPU logical id>...または"
msgstr ""
#: ../../operators.rst
msgid "<CPU logical id>-<CPU logical id>,<CPU logical id>"
msgstr ""
#: ../../operators.rst
msgid "-<CPU logical id>...または両者の混合。"
msgstr ""
#: ../../operators.rst:122
msgid "|opt-r|"
msgstr ""
#: ../../operators.rst
msgid "McKernelのCPUがIKCメッセージを送るLinux"
msgstr ""
#: ../../operators.rst
msgid "CPUを指定する。フォーマットは以下の通り。"
msgstr ""
#: ../../operators.rst
msgid "<CPU list>:<CPU id>+<CPU list>:<CPU id>..."
msgstr ""
#: ../../operators.rst
msgid "<CPU list>のフォーマットは-cオプションにおけるもの"
msgstr ""
#: ../../operators.rst
msgid "と同じである。"
msgstr ""
#: ../../operators.rst
msgid "各<CPU list>:<CPU id>は<CPU list>で示されるMcKernel"
msgstr ""
#: ../../operators.rst
msgid "のCPUが<CPU logical id>で示されるLinuxのCPUにIKC"
msgstr ""
#: ../../operators.rst
msgid "メッセージを送信することを意味する。"
msgstr ""
#: ../../operators.rst:131
msgid "|opt-m|"
msgstr ""
#: ../../operators.rst
msgid "McKernelに割り当てるメモリ領域を指定する。フォーマッ"
msgstr ""
#: ../../operators.rst
msgid "トは以下の通り。"
msgstr ""
#: ../../operators.rst
msgid "<size>@<NUMA-id>, <size>@<NUMA-id>..."
msgstr ""
#: ../../operators.rst:135
msgid "|opt-f|"
msgstr ""
#: ../../operators.rst
msgid "ihkmondが使用するsyslogプロトコルのfacilityを指定す"
msgstr ""
#: ../../operators.rst
msgid "る。デフォルトはLOG_LOCAL6。"
msgstr ""
#: ../../operators.rst:138
msgid "|opt-o|"
msgstr ""
#: ../../operators.rst
msgid "IHKのデバイスファイル(/dev/mcd*, /dev/mcos*)のオー"
msgstr ""
#: ../../operators.rst
msgid "ナーとグループの値を<user>[:<group>]の形式で指定す"
msgstr ""
#: ../../operators.rst
msgid "る。デフォルトはmcreboot.shを実行したユーザ。"
msgstr ""
#: ../../operators.rst:142
msgid "|opt-i|"
msgstr ""
#: ../../operators.rst
msgid "ihkmondがハングアップ検知のためにOS状態を確認する時"
msgstr ""
#: ../../operators.rst
msgid "間間隔を秒単位で指定する。-1が指定された場合はハン"
msgstr ""
#: ../../operators.rst
msgid "グアップ検知を行わない。指定がない場合はハングアッ"
msgstr ""
#: ../../operators.rst
msgid "プ検知を行わない。"
msgstr ""
#: ../../operators.rst:147
msgid "|opt-k|"
msgstr ""
#: ../../operators.rst
msgid "カーネルメッセージの/dev/logへのリダイレクト有無を"
msgstr ""
#: ../../operators.rst
msgid "指定する。0が指定された場合はリダイレクトを行わず、"
msgstr ""
#: ../../operators.rst
msgid "0以外が指定された場合はリダイレクトを行う。指定がな"
msgstr ""
#: ../../operators.rst
msgid "い場合はリダイレクトを行わない。"
msgstr ""
#: ../../operators.rst:152
msgid "-q <irq>"
msgstr ""
#: ../../operators.rst
msgid "IHKが使用するIRQ番号を指定する。指定がない場合は"
msgstr ""
#: ../../operators.rst
msgid "64-255の範囲で空いているものを使用する。"
msgstr ""
#: ../../operators.rst:155
msgid "-t"
msgstr ""
#: ../../operators.rst
msgid "x86_64アーキテクチャのみTurbo"
msgstr ""
#: ../../operators.rst
msgid "Boostをオンにする。デフォルトはオフ。"
msgstr ""
#: ../../operators.rst:158
msgid "-d <level>"
msgstr ""
#: ../../operators.rst:171
msgid "-O"
msgstr ""
#: ../../operators.rst
msgid "またはプロセスの生成を許可する。指定がない場合は許可"
msgstr ""
#: ../../operators.rst
msgid "しない。すなわち、CPU数より大きい数のスレッドまたは"
msgstr ""
#: ../../operators.rst
msgid "プロセスを生成しようとするとエラーとなる。"
msgstr ""
#: ../../operators.rst:187 ../../operators.rst:224
msgid "説明"
msgstr ""
#: ../../operators.rst:189
msgid ""
"McKernel関連カーネルモジュールをinsmodし、<cpulist>で指定されたCPUと<memlist>で指定されたメモリ領域からなるパーティションを作成し、IKC"
" mapを<ikcmap>に設定し、前記パーティションにMcKernelをブートする。"
msgstr ""
#: ../../operators.rst:193 ../../operators.rst:231
msgid "戻り値"
msgstr ""
#: ../../operators.rst:196 ../../operators.rst:234
msgid "正常終了"
msgstr ""
#: ../../operators.rst:198 ../../operators.rst:236
msgid "0以外"
msgstr ""
#: ../../operators.rst:198 ../../operators.rst:236
msgid "エラー"
msgstr ""
#: ../../operators.rst:202
msgid "シャットダウンスクリプト"
msgstr "シャットダウンスクリプト"
#: ../../operators.rst:219
msgid "なし"
msgstr ""
#: ../../operators.rst:226
msgid "McKernelをシャットダウンし、McKernel用パーティションを削除し、関連カーネルモジュールをrmmodする。"
msgstr ""
#: ../../operators.rst:240
msgid "プロセス起動コマンド"
msgstr ""
#: ../../operators.rst:245
msgid "ダンプ解析コマンド"
msgstr ""
#: ../../operators.rst:250
msgid "ダンプ形式変換コマンド"
msgstr ""
#: ../../operators.rst:255
msgid "ブート手順"
msgstr ""
#: ../../operators.rst:257
msgid "mcreboot.shを用いてブート手順を説明する。"
msgstr ""
#: ../../operators.rst:259 ../../operators.rst:906
msgid "スクリプトは以下の通り。"
msgstr ""
#: ../../operators.rst:852 ../../operators.rst:1050
msgid "手順は以下の通り。"
msgstr ""
#: ../../operators.rst:854
msgid "ihkmondを起動する。ihkmondは任意のタイミングで起動してよい。これは、ihkmondはOSインスタンスの作成を検知して動作を開始するためである。83行目"
msgstr ""
#: ../../operators.rst:856
msgid "Linuxのカーネルバージョンが、mcoverlayfsが動作するものであるかを確認する。200216行目"
msgstr ""
#: ../../operators.rst:858
msgid "irqbalanceを停止する。251257行目"
msgstr ""
#: ../../operators.rst:860
msgid ""
"/proc/irq/[n]/affinityの設定を保存した上でMcKernel "
"CPUを担当から外す。担当CPUが無くなる場合は、全てのLinux CPUを指定する。269303行目"
msgstr ""
#: ../../operators.rst:864
msgid "ihk.koをinsmodする。307行目"
msgstr ""
#: ../../operators.rst:866
msgid "Linuxによるメモリフラグメンテーションを緩和するために以下を実施する。313320行目"
msgstr ""
#: ../../operators.rst:868
msgid "アクティブでないプロセスを積極的にスワップアウトするように設定する"
msgstr ""
#: ../../operators.rst:870
msgid "クリーンなページキャッシュを無効化し、またdentriesやinodeのslabオブジェクトのうち可能なものを破棄する"
msgstr ""
#: ../../operators.rst:872
msgid "連続する空き領域を結合してより大きな空き領域にまとめる"
msgstr ""
#: ../../operators.rst:874
msgid ""
"ihk-smp-x86.koをinsmodする。340行目ihk-smp-x86.koは関数をihk.koに登録する。このため、ihk-"
"smp-x86.koはihk.koをinsmodした後にinsmodする必要がある。"
msgstr ""
#: ../../operators.rst:876
msgid "メモリを予約する。370行目"
msgstr ""
#: ../../operators.rst:878
msgid "CPUを予約する。374行目"
msgstr ""
#: ../../operators.rst:880
msgid "McKernelのカーネルモジュールmcctrl.koをinsmodする。382行目mcctrl.koはMcKernelブート時に呼ばれる関数をihk.koに登録する。このため、mcctrl.koのinsmodはihk.koのinsmodの後に、またブートの前に行う必要がある。"
msgstr ""
#: ../../operators.rst:882
msgid "OSインスタンスを作成する。406行目"
msgstr ""
#: ../../operators.rst:884
msgid "OSインスタンスにCPUを割り当てる。412行目"
msgstr ""
#: ../../operators.rst:886
msgid "McKernel CPUのIKCメッセージ送信先のLinux CPUを設定する。419行目"
msgstr ""
#: ../../operators.rst:888
msgid "OSインスタンスにメモリを割り当てる。426行目"
msgstr ""
#: ../../operators.rst:890
msgid "カーネルイメージをロードする。432行目"
msgstr ""
#: ../../operators.rst:892
msgid "カーネル引数をカーネルに渡す。438行目"
msgstr ""
#: ../../operators.rst:894
msgid "カーネルをブートする。444行目"
msgstr ""
#: ../../operators.rst:896
msgid ""
"/proc, "
"/sysファイルの準備をする。また、その中でmcoverlayfs.koをinsmodする。mcoverlayfs.koは他モジュールとの依存関係を持たない。454行目から567行目なお、関数インターフェイスでの対応関数はihk_os_create_pseudofs()である。"
msgstr ""
#: ../../operators.rst:898
msgid "irqbalanceを、Linux CPUのみを対象とする設定で開始する。569587行目"
msgstr ""
#: ../../operators.rst:902
msgid "シャットダウン手順"
msgstr ""
#: ../../operators.rst:904
msgid "mcstop+release.shを用いてシャットダウン手順を説明する。"
msgstr ""
#: ../../operators.rst:1052
msgid "ブート時にLinux CPUのみを対象とする設定で開始されたirqbalanceを停止する。2433行目"
msgstr ""
#: ../../operators.rst:1055
msgid "全てのOSインスタンスを破壊する。OSインスタンスに割り当てられていた資源はIHKがLWKのために予約した状態に移行する。3550行目"
msgstr ""
#: ../../operators.rst:1057
msgid "IHKがLWKのために予約していた資源を開放する。5277行目"
msgstr ""
#: ../../operators.rst:1059
msgid "mcctrl.koをrmmodする。81行目"
msgstr ""
#: ../../operators.rst:1061
msgid ""
"/proc, "
"/sysファイルの準備をする。また、その中でmcoverlayfs.koをrmmodする。87100行目なお、関数インターフェイスでの対応関数はihk_os_destroy_pseudofs()である。"
msgstr ""
#: ../../operators.rst:1063
msgid "ihk-smp-x86.koをrmmodする。104行目"
msgstr ""
#: ../../operators.rst:1065
msgid "ihk.koをrmmodする。112行目"
msgstr ""
#: ../../operators.rst:1067
msgid "ihkmondを停止する。121行目"
msgstr ""
#: ../../operators.rst:1069
msgid "/proc/irq/[n]/affinityの設定をブート時に保存しておいたものに戻し、ブート前の設定でirqbalanceを開始する。124135行目"
msgstr ""
#: ../../operators.rst:1071
msgid "Linuxカーネルのスワップアウト積極度の設定をデフォルトの値に戻す。138行目"
msgstr ""
#~ msgid "The following OS distributions and platforms are recommended:"
#~ msgstr ""
#~ msgid "OS distribution"
#~ msgstr ""
#~ msgid "CentOS 7.3 or later"
#~ msgstr ""
#~ msgid "RHEL 7.3 or later"
#~ msgstr ""
#~ msgid "Platform"
#~ msgstr ""
#~ msgid "Intel Xeon"
#~ msgstr ""
#~ msgid "Intel Xeon Phi"
#~ msgstr ""
#~ msgid "Fujitsu A64FX"
#~ msgstr ""
#~ msgid "Prepare files for building McKernel"
#~ msgstr ""
#~ msgid ""
#~ "Grant read permission to the System.map"
#~ " file of your kernel version on "
#~ "the build machine:"
#~ msgstr ""
#~ msgid "Install the following packages to the build machine:"
#~ msgstr ""
#~ msgid "When having access to repositories"
#~ msgstr ""
#~ msgid "On RHEL 8, enable the CodeReady Linux Builder (CLB) repository:"
#~ msgstr ""
#~ msgid "On CentOS 8, enable the PowerTools repository:"
#~ msgstr ""
#~ msgid "Install with yum:"
#~ msgstr ""
#~ msgid "When not having access to repositories"
#~ msgstr ""
#~ msgid ""
#~ "Ask the system administrator to install"
#~ " them. Note that ``libdwarf-devel`` "
#~ "is in the CodeReady Linux Builder "
#~ "repository on RHEL 8 or in the "
#~ "PowerTools repository on CentOS 8."
#~ msgstr ""
#~ msgid "Clone, compile, install"
#~ msgstr ""
#~ msgid "Clone the source code:"
#~ msgstr ""
#~ msgid "(Optional) Checkout to the specific branch or version:"
#~ msgstr ""
#~ msgid ""
#~ "Foe example, if you want to try"
#~ " the development branch, use “development”"
#~ " as the pathspec. If you want "
#~ "to try the prerelease version 1.7.0-0.2,"
#~ " use “1.7.0-0.2”."
#~ msgstr ""
#~ msgid "Move to build directory:"
#~ msgstr ""
#~ msgid "Run cmake:"
#~ msgstr ""
#~ msgid "When not cross-compiling:"
#~ msgstr ""
#~ msgid "When cross-compiling:"
#~ msgstr ""
#~ msgid "Install with cmake"
#~ msgstr ""
#~ msgid "Install with make:"
#~ msgstr ""
#~ msgid ""
#~ "The kernel modules and McKernel kernel"
#~ " image should be installed under the"
#~ " **ihk+mckernel** folder in your home "
#~ "directory."
#~ msgstr ""
#~ msgid "Install with rpm"
#~ msgstr ""
#~ msgid "Create the tarball and the spec file:"
#~ msgstr ""
#~ msgid "Create the rpm package:"
#~ msgstr ""
#~ msgid "Install the rpm package:"
#~ msgstr ""
#~ msgid ""
#~ "The kernel modules and McKernel kernel"
#~ " image are installed under the "
#~ "standard system directories."
#~ msgstr ""
#~ msgid "Prepare files and change settings for installing McKernel"
#~ msgstr ""
#~ msgid "Disable SELinux of the compute nodes:"
#~ msgstr ""
#~ msgid "Change the file to SELINUX=disabled. And then reboot the compute nodes:"
#~ msgstr ""
#~ msgid "Install the following packages to the compute nodes:"
#~ msgstr ""
#~ msgid ""
#~ "Ask the system administrator to install"
#~ " them. Note that ``libdwarf`` is in"
#~ " the CodeReady Linux Builder repository "
#~ "on RHEL 8 or in the PowerTools "
#~ "repository on CentOS 8."
#~ msgstr ""
#~ msgid "起動停止"
#~ msgstr "起動停止"
#~ msgid "関連ファイル"
#~ msgstr "関連ファイル"
#~ msgid "McKernelを用いたシステムを運用するシステム管理者を対象として、運用手順を説明する。"
#~ msgstr ""
#~ msgid ""
#~ "SMPプロセッサ向け、x86_64アーキ向けの関連ファイルの場所は以下の通り。 "
#~ "なお、IHK/McKernelのインストールディレクトリを<install>とする。"
#~ msgstr "運用向けコマンド・デーモンのファイルの場所は以下の通り。 なお、IHK/McKernelのインストールディレクトリを<install>とする。"
#~ msgid ""
#~ "SMPプロセッサ向け、x86_64アーキ向けの関連ファイルの場所は以下の通り。 "
#~ "なお、IHK/McKernelのインストールディレクトリを<install>とする。 The related "
#~ "files and their locations are as "
#~ "follows. Denote by ``<install>`` the "
#~ "install directory of IHK/McKernel."
#~ msgstr ""
#~ msgid ""
#~ "The related files and their locations"
#~ " are as follows. Denote by "
#~ "``<install>`` the install directory of "
#~ "IHK/McKernel."
#~ msgstr ""
#~ "SMPプロセッサ向け、x86_64アーキ向けの関連ファイルの場所は以下の通り。 "
#~ "なお、IHK/McKernelのインストールディレクトリを<install>とする。"
#~ msgid "運用向けコマンド・デーモンのファイルの場所は以下の通り。 なお、IHK/McKernelのインストールディレクトリを<install>とする。"
#~ msgstr "運用向けコマンド・デーモンのファイルの場所は以下の通り。 なお、IHK/McKernelのインストールディレクトリを<install>とする。"
#~ msgid "運用向けコマンド・デーモンのファイルの場所は以下の通り。"
#~ msgstr ""
#~ "The commands and daemons for operation"
#~ " and their locations are as follows."
#~ msgid "カーネルイメージ"
#~ msgstr ""
#~ msgid "ダンプ解析ツール"
#~ msgstr ""
#~ msgid "ダンプ形式変換ツール"
#~ msgstr ""
#~ msgid "tool"
#~ msgstr "ツール"

View File

@ -0,0 +1,821 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-06 10:10+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../ops.rst:5
msgid "McKernelを用いたシステムを運用するシステム管理者を対象として、運用手順を説明する。"
msgstr ""
#: ../../ops.rst:8
msgid "Installation"
msgstr "インストール"
#: ../../ops.rst:10
msgid "See `Quick Guide -- Installation <quick.html#installation>`__."
msgstr ""
#: ../../uti.rst:2
msgid "Advanced: Enable Utility Thread offloading Interface (UTI)"
msgstr ""
#: ../../uti.rst:4
msgid ""
"UTI enables a runtime such as MPI runtime to spawn utility threads such "
"as MPI asynchronous progress threads to Linux cores."
msgstr ""
#: ../../uti.rst:8
msgid "Install capstone"
msgstr ""
#: ../../uti.rst:11 ../../uti.rst:22
msgid "When compute nodes don't have access to repositories"
msgstr ""
#: ../../uti.rst:13
msgid "Install EPEL capstone-devel:"
msgstr ""
#: ../../uti.rst:24
msgid ""
"Ask the system administrator to install ``capstone-devel``. Note that it "
"is in the EPEL repository."
msgstr ""
#: ../../uti.rst:28
msgid "Install syscall_intercept"
msgstr ""
#: ../../uti.rst:37
msgid "Install UTI for McKernel"
msgstr ""
#: ../../uti.rst:39
msgid "Install:"
msgstr ""
#: ../../uti.rst:49
msgid "Install McKernel"
msgstr ""
#: ../../uti.rst:51
msgid "Add ``-DENABLE_UTI=ON`` option to ``cmake``:"
msgstr ""
#: ../../uti.rst:58
msgid "Run programs"
msgstr ""
#: ../../uti.rst:60
msgid "Add ``--enable-uti`` option to ``mcexec``:"
msgstr ""
#: ../../uti.rst:67
msgid "Install UTI for Linux"
msgstr ""
#: ../../uti.rst:69
msgid ""
"You should skip this step if it's already installed as with, for example,"
" Fujitsu Technical Computing Suite."
msgstr ""
#: ../../uti.rst:72
msgid "Install by make"
msgstr ""
#: ../../uti.rst:82
msgid "Install by rpm"
msgstr ""
#: ../../ops.rst:15
msgid "起動停止"
msgstr ""
#: ../../ops.rst:18
msgid "関連ファイル"
msgstr ""
#: ../../ops.rst:20
msgid ""
"SMPプロセッサ向け、x86_64アーキ向けの関連ファイルの場所は以下の通り。 "
"なお、IHK/McKernelのインストールディレクトリを<install>とする。"
msgstr ""
#: ../../ops.rst:24
msgid "<install>/kmod/ihk.ko"
msgstr ""
#: ../../ops.rst:24
msgid "IHK-master core"
msgstr ""
#: ../../ops.rst:26
msgid "|ihk-smp|"
msgstr ""
#: ../../ops.rst:26
msgid "IHK-master driver"
msgstr ""
#: ../../ops.rst:29
msgid "|mcctrl|"
msgstr ""
#: ../../ops.rst:29
msgid "Delegator module"
msgstr ""
#: ../../ops.rst:32
msgid "|mckernel.img|"
msgstr ""
#: ../../ops.rst:32
msgid "カーネルイメージ"
msgstr ""
#: ../../ops.rst:40
msgid "運用向けコマンド・デーモンのファイルの場所は以下の通り。 なお、IHK/McKernelのインストールディレクトリを<install>とする。"
msgstr ""
#: ../../ops.rst:44
msgid "|mcreboot|"
msgstr ""
#: ../../ops.rst:44 ../../ops.rst:104
msgid "ブートスクリプト"
msgstr ""
#: ../../ops.rst:47
msgid "|mcstop|"
msgstr ""
#: ../../ops.rst:47 ../../ops.rst:204
msgid "シャットダウンスクリプト"
msgstr ""
#: ../../ops.rst:50
msgid "<install>/bin/mcexec"
msgstr ""
#: ../../ops.rst:50 ../../ops.rst:242
msgid "プロセス起動コマンド"
msgstr ""
#: ../../ops.rst:52
msgid "<install>/bin/eclair"
msgstr ""
#: ../../ops.rst:52
msgid "ダンプ解析ツール"
msgstr ""
#: ../../ops.rst:54
msgid "|vmcore2mckdump|"
msgstr ""
#: ../../ops.rst:54
msgid "ダンプ形式変換ツール"
msgstr ""
#: ../../ops.rst:62
msgid "以下、関連コマンドおよび関連関数のインターフェイスを説明する。"
msgstr ""
#: ../../ops.rst:65
msgid "インターフェイス"
msgstr ""
#: ../../ops.rst:68
msgid "カーネル引数"
msgstr ""
#: ../../ops.rst:70
msgid "McKernelのカーネル引数を表 :numref:`tab-kargs` に示す。"
msgstr ""
#: ../../ops.rst:74
msgid "McKernelのカーネル引数"
msgstr ""
#: ../../ops.rst:77
msgid "hidos"
msgstr ""
#: ../../ops.rst:77
msgid "IKCを有効にする。"
msgstr ""
#: ../../ops.rst:79
msgid "|dlv|"
msgstr ""
#: ../../ops.rst
msgid "Linuxのpanicハンドラ経由でダンプを行った場合の、ダ"
msgstr ""
#: ../../ops.rst
msgid "ンプ対象とするメモリ領域の種類を<level>に設定する。"
msgstr ""
#: ../../ops.rst
msgid "設定可能な値は以下の通り。"
msgstr ""
#: ../../ops.rst:85 ../../ops.rst:166 ../../ops.rst:198 ../../ops.rst:236
msgid "0"
msgstr ""
#: ../../ops.rst:86 ../../ops.rst:167
msgid "IHKがMcKernelに割り当てたメモリ領域を出力する"
msgstr ""
#: ../../ops.rst:87 ../../ops.rst:168
msgid "24"
msgstr ""
#: ../../ops.rst:88 ../../ops.rst:169
msgid "カーネルが使用しているメモリ領域を出力する"
msgstr ""
#: ../../ops.rst:90 ../../ops.rst:171
msgid "指定がなかった場合は24が用いられる。"
msgstr ""
#: ../../ops.rst:92
msgid "|allow|"
msgstr ""
#: ../../ops.rst
msgid "McKernelに割り当てられたCPU数より大きい数のスレッド"
msgstr ""
#: ../../ops.rst
msgid "またはプロセスの生成を許可する。この引数が指定され"
msgstr ""
#: ../../ops.rst
msgid "ない場合に、CPU数より大きい数のスレッドまたはプロセ"
msgstr ""
#: ../../ops.rst
msgid "スをclone(), fork(), vfork()などで生成しようとする"
msgstr ""
#: ../../ops.rst
msgid "と、当該システムコールがEINVALエラーを返す。"
msgstr ""
#: ../../ops.rst:107 ../../ops.rst:209
msgid "書式"
msgstr ""
#: ../../ops.rst:115 ../../ops.rst:219
msgid "オプション"
msgstr ""
#: ../../ops.rst:118
msgid "|opt-c|"
msgstr ""
#: ../../ops.rst
msgid "McKernelに割り当てるCPUのリストを指定する。フォー"
msgstr ""
#: ../../ops.rst
msgid "マットは以下の通り。"
msgstr ""
#: ../../ops.rst
msgid "<CPU logical id>,<CPU logical id>...または"
msgstr ""
#: ../../ops.rst
msgid "<CPU logical id>-<CPU logical id>,<CPU logical id>"
msgstr ""
#: ../../ops.rst
msgid "-<CPU logical id>...または両者の混合。"
msgstr ""
#: ../../ops.rst:124
msgid "|opt-r|"
msgstr ""
#: ../../ops.rst
msgid "McKernelのCPUがIKCメッセージを送るLinux"
msgstr ""
#: ../../ops.rst
msgid "CPUを指定する。フォーマットは以下の通り。"
msgstr ""
#: ../../ops.rst
msgid "<CPU list>:<CPU id>+<CPU list>:<CPU id>..."
msgstr ""
#: ../../ops.rst
msgid "<CPU list>のフォーマットは-cオプションにおけるもの"
msgstr ""
#: ../../ops.rst
msgid "と同じである。"
msgstr ""
#: ../../ops.rst
msgid "各<CPU list>:<CPU id>は<CPU list>で示されるMcKernel"
msgstr ""
#: ../../ops.rst
msgid "のCPUが<CPU logical id>で示されるLinuxのCPUにIKC"
msgstr ""
#: ../../ops.rst
msgid "メッセージを送信することを意味する。"
msgstr ""
#: ../../ops.rst:133
msgid "|opt-m|"
msgstr ""
#: ../../ops.rst
msgid "McKernelに割り当てるメモリ領域を指定する。フォーマッ"
msgstr ""
#: ../../ops.rst
msgid "トは以下の通り。"
msgstr ""
#: ../../ops.rst
msgid "<size>@<NUMA-id>, <size>@<NUMA-id>..."
msgstr ""
#: ../../ops.rst:137
msgid "|opt-f|"
msgstr ""
#: ../../ops.rst
msgid "ihkmondが使用するsyslogプロトコルのfacilityを指定す"
msgstr ""
#: ../../ops.rst
msgid "る。デフォルトはLOG_LOCAL6。"
msgstr ""
#: ../../ops.rst:140
msgid "|opt-o|"
msgstr ""
#: ../../ops.rst
msgid "IHKのデバイスファイル(/dev/mcd*, /dev/mcos*)のオー"
msgstr ""
#: ../../ops.rst
msgid "ナーとグループの値を<user>[:<group>]の形式で指定す"
msgstr ""
#: ../../ops.rst
msgid "る。デフォルトはmcreboot.shを実行したユーザ。"
msgstr ""
#: ../../ops.rst:144
msgid "|opt-i|"
msgstr ""
#: ../../ops.rst
msgid "ihkmondがハングアップ検知のためにOS状態を確認する時"
msgstr ""
#: ../../ops.rst
msgid "間間隔を秒単位で指定する。-1が指定された場合はハン"
msgstr ""
#: ../../ops.rst
msgid "グアップ検知を行わない。指定がない場合はハングアッ"
msgstr ""
#: ../../ops.rst
msgid "プ検知を行わない。"
msgstr ""
#: ../../ops.rst:149
msgid "|opt-k|"
msgstr ""
#: ../../ops.rst
msgid "カーネルメッセージの/dev/logへのリダイレクト有無を"
msgstr ""
#: ../../ops.rst
msgid "指定する。0が指定された場合はリダイレクトを行わず、"
msgstr ""
#: ../../ops.rst
msgid "0以外が指定された場合はリダイレクトを行う。指定がな"
msgstr ""
#: ../../ops.rst
msgid "い場合はリダイレクトを行わない。"
msgstr ""
#: ../../ops.rst:154
msgid "-q <irq>"
msgstr ""
#: ../../ops.rst
msgid "IHKが使用するIRQ番号を指定する。指定がない場合は"
msgstr ""
#: ../../ops.rst
msgid "64-255の範囲で空いているものを使用する。"
msgstr ""
#: ../../ops.rst:157
msgid "-t"
msgstr ""
#: ../../ops.rst
msgid "x86_64アーキテクチャのみTurbo"
msgstr ""
#: ../../ops.rst
msgid "Boostをオンにする。デフォルトはオフ。"
msgstr ""
#: ../../ops.rst:160
msgid "-d <level>"
msgstr ""
#: ../../ops.rst:173
msgid "-O"
msgstr ""
#: ../../ops.rst
msgid "またはプロセスの生成を許可する。指定がない場合は許可"
msgstr ""
#: ../../ops.rst
msgid "しない。すなわち、CPU数より大きい数のスレッドまたは"
msgstr ""
#: ../../ops.rst
msgid "プロセスを生成しようとするとエラーとなる。"
msgstr ""
#: ../../ops.rst:189 ../../ops.rst:226
msgid "説明"
msgstr ""
#: ../../ops.rst:191
msgid ""
"McKernel関連カーネルモジュールをinsmodし、<cpulist>で指定されたCPUと<memlist>で指定されたメモリ領域からなるパーティションを作成し、IKC"
" mapを<ikcmap>に設定し、前記パーティションにMcKernelをブートする。"
msgstr ""
#: ../../ops.rst:195 ../../ops.rst:233
msgid "戻り値"
msgstr ""
#: ../../ops.rst:198 ../../ops.rst:236
msgid "正常終了"
msgstr ""
#: ../../ops.rst:200 ../../ops.rst:238
msgid "0以外"
msgstr ""
#: ../../ops.rst:200 ../../ops.rst:238
msgid "エラー"
msgstr ""
#: ../../ops.rst:221
msgid "なし"
msgstr ""
#: ../../ops.rst:228
msgid "McKernelをシャットダウンし、McKernel用パーティションを削除し、関連カーネルモジュールをrmmodする。"
msgstr ""
#: ../../ops.rst:247
msgid "ダンプ解析コマンド"
msgstr ""
#: ../../ops.rst:252
msgid "ダンプ形式変換コマンド"
msgstr ""
#: ../../ops.rst:257
msgid "ブート手順"
msgstr ""
#: ../../ops.rst:259
msgid "mcreboot.shを用いてブート手順を説明する。"
msgstr ""
#: ../../ops.rst:261 ../../ops.rst:908
msgid "スクリプトは以下の通り。"
msgstr ""
#: ../../ops.rst:854 ../../ops.rst:1052
msgid "手順は以下の通り。"
msgstr ""
#: ../../ops.rst:856
msgid "ihkmondを起動する。ihkmondは任意のタイミングで起動してよい。これは、ihkmondはOSインスタンスの作成を検知して動作を開始するためである。83行目"
msgstr ""
#: ../../ops.rst:858
msgid "Linuxのカーネルバージョンが、mcoverlayfsが動作するものであるかを確認する。200216行目"
msgstr ""
#: ../../ops.rst:860
msgid "irqbalanceを停止する。251257行目"
msgstr ""
#: ../../ops.rst:862
msgid ""
"/proc/irq/[n]/affinityの設定を保存した上でMcKernel "
"CPUを担当から外す。担当CPUが無くなる場合は、全てのLinux CPUを指定する。269303行目"
msgstr ""
#: ../../ops.rst:866
msgid "ihk.koをinsmodする。307行目"
msgstr ""
#: ../../ops.rst:868
msgid "Linuxによるメモリフラグメンテーションを緩和するために以下を実施する。313320行目"
msgstr ""
#: ../../ops.rst:870
msgid "アクティブでないプロセスを積極的にスワップアウトするように設定する"
msgstr ""
#: ../../ops.rst:872
msgid "クリーンなページキャッシュを無効化し、またdentriesやinodeのslabオブジェクトのうち可能なものを破棄する"
msgstr ""
#: ../../ops.rst:874
msgid "連続する空き領域を結合してより大きな空き領域にまとめる"
msgstr ""
#: ../../ops.rst:876
msgid ""
"ihk-smp-x86.koをinsmodする。340行目ihk-smp-x86.koは関数をihk.koに登録する。このため、ihk-"
"smp-x86.koはihk.koをinsmodした後にinsmodする必要がある。"
msgstr ""
#: ../../ops.rst:878
msgid "メモリを予約する。370行目"
msgstr ""
#: ../../ops.rst:880
msgid "CPUを予約する。374行目"
msgstr ""
#: ../../ops.rst:882
msgid "McKernelのカーネルモジュールmcctrl.koをinsmodする。382行目mcctrl.koはMcKernelブート時に呼ばれる関数をihk.koに登録する。このため、mcctrl.koのinsmodはihk.koのinsmodの後に、またブートの前に行う必要がある。"
msgstr ""
#: ../../ops.rst:884
msgid "OSインスタンスを作成する。406行目"
msgstr ""
#: ../../ops.rst:886
msgid "OSインスタンスにCPUを割り当てる。412行目"
msgstr ""
#: ../../ops.rst:888
msgid "McKernel CPUのIKCメッセージ送信先のLinux CPUを設定する。419行目"
msgstr ""
#: ../../ops.rst:890
msgid "OSインスタンスにメモリを割り当てる。426行目"
msgstr ""
#: ../../ops.rst:892
msgid "カーネルイメージをロードする。432行目"
msgstr ""
#: ../../ops.rst:894
msgid "カーネル引数をカーネルに渡す。438行目"
msgstr ""
#: ../../ops.rst:896
msgid "カーネルをブートする。444行目"
msgstr ""
#: ../../ops.rst:898
msgid ""
"/proc, "
"/sysファイルの準備をする。また、その中でmcoverlayfs.koをinsmodする。mcoverlayfs.koは他モジュールとの依存関係を持たない。454行目から567行目なお、関数インターフェイスでの対応関数はihk_os_create_pseudofs()である。"
msgstr ""
#: ../../ops.rst:900
msgid "irqbalanceを、Linux CPUのみを対象とする設定で開始する。569587行目"
msgstr ""
#: ../../ops.rst:904
msgid "シャットダウン手順"
msgstr ""
#: ../../ops.rst:906
msgid "mcstop+release.shを用いてシャットダウン手順を説明する。"
msgstr ""
#: ../../ops.rst:1054
msgid "ブート時にLinux CPUのみを対象とする設定で開始されたirqbalanceを停止する。2433行目"
msgstr ""
#: ../../ops.rst:1057
msgid "全てのOSインスタンスを破壊する。OSインスタンスに割り当てられていた資源はIHKがLWKのために予約した状態に移行する。3550行目"
msgstr ""
#: ../../ops.rst:1059
msgid "IHKがLWKのために予約していた資源を開放する。5277行目"
msgstr ""
#: ../../ops.rst:1061
msgid "mcctrl.koをrmmodする。81行目"
msgstr ""
#: ../../ops.rst:1063
msgid ""
"/proc, "
"/sysファイルの準備をする。また、その中でmcoverlayfs.koをrmmodする。87100行目なお、関数インターフェイスでの対応関数はihk_os_destroy_pseudofs()である。"
msgstr ""
#: ../../ops.rst:1065
msgid "ihk-smp-x86.koをrmmodする。104行目"
msgstr ""
#: ../../ops.rst:1067
msgid "ihk.koをrmmodする。112行目"
msgstr ""
#: ../../ops.rst:1069
msgid "ihkmondを停止する。121行目"
msgstr ""
#: ../../ops.rst:1071
msgid "/proc/irq/[n]/affinityの設定をブート時に保存しておいたものに戻し、ブート前の設定でirqbalanceを開始する。124135行目"
msgstr ""
#: ../../ops.rst:1073
msgid "Linuxカーネルのスワップアウト積極度の設定をデフォルトの値に戻す。138行目"
msgstr ""
#~ msgid "The following OS distributions and platforms are recommended:"
#~ msgstr "推奨OSディストリビューションとプロセッサは以下の通り。"
#~ msgid "OS distribution"
#~ msgstr ""
#~ msgid "CentOS 7.3 or later"
#~ msgstr ""
#~ msgid "RHEL 7.3 or later"
#~ msgstr ""
#~ msgid "Platform"
#~ msgstr ""
#~ msgid "Intel Xeon"
#~ msgstr ""
#~ msgid "Intel Xeon Phi"
#~ msgstr ""
#~ msgid "Fujitsu A64FX"
#~ msgstr ""
#~ msgid "Prepare files for building McKernel"
#~ msgstr ""
#~ msgid ""
#~ "Grant read permission to the System.map"
#~ " file of your kernel version on "
#~ "the build machine:"
#~ msgstr ""
#~ msgid "Install the following packages to the build machine:"
#~ msgstr ""
#~ msgid "When having access to repositories"
#~ msgstr ""
#~ msgid "On RHEL 8, enable the CodeReady Linux Builder (CLB) repository:"
#~ msgstr ""
#~ msgid "On CentOS 8, enable the PowerTools repository:"
#~ msgstr ""
#~ msgid "Install with yum:"
#~ msgstr ""
#~ msgid "When not having access to repositories"
#~ msgstr ""
#~ msgid ""
#~ "Ask the system administrator to install"
#~ " them. Note that ``libdwarf-devel`` "
#~ "is in the CodeReady Linux Builder "
#~ "repository on RHEL 8 or in the "
#~ "PowerTools repository on CentOS 8."
#~ msgstr ""
#~ msgid "Clone, compile, install"
#~ msgstr ""
#~ msgid "Clone the source code:"
#~ msgstr ""
#~ msgid "(Optional) Checkout to the specific branch or version:"
#~ msgstr ""
#~ msgid ""
#~ "Foe example, if you want to try"
#~ " the development branch, use “development”"
#~ " as the pathspec. If you want "
#~ "to try the prerelease version 1.7.0-0.2,"
#~ " use “1.7.0-0.2”."
#~ msgstr ""
#~ msgid "Move to build directory:"
#~ msgstr ""
#~ msgid "Run cmake:"
#~ msgstr ""
#~ msgid "When not cross-compiling:"
#~ msgstr ""
#~ msgid "When cross-compiling:"
#~ msgstr ""
#~ msgid "Install with cmake"
#~ msgstr ""
#~ msgid "Install with make:"
#~ msgstr ""
#~ msgid ""
#~ "The kernel modules and McKernel kernel"
#~ " image should be installed under the"
#~ " **ihk+mckernel** folder in your home "
#~ "directory."
#~ msgstr ""
#~ msgid "Install with rpm"
#~ msgstr ""
#~ msgid "Create the tarball and the spec file:"
#~ msgstr ""
#~ msgid "Create the rpm package:"
#~ msgstr ""
#~ msgid "Install the rpm package:"
#~ msgstr ""
#~ msgid ""
#~ "The kernel modules and McKernel kernel"
#~ " image are installed under the "
#~ "standard system directories."
#~ msgstr ""
#~ msgid "Prepare files and change settings for installing McKernel"
#~ msgstr ""
#~ msgid "Disable SELinux of the compute nodes:"
#~ msgstr ""
#~ msgid "Change the file to SELINUX=disabled. And then reboot the compute nodes:"
#~ msgstr ""
#~ msgid "Install the following packages to the compute nodes:"
#~ msgstr ""
#~ msgid ""
#~ "Ask the system administrator to install"
#~ " them. Note that ``libdwarf`` is in"
#~ " the CodeReady Linux Builder repository "
#~ "on RHEL 8 or in the PowerTools "
#~ "repository on CentOS 8."
#~ msgstr ""

View File

@ -0,0 +1,458 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../quick.rst:6
msgid "Introduction"
msgstr ""
#: ../../summary.rst:1
msgid ""
"IHK/McKernel is a light-weight multi-kernel operating system designed for"
" high-end supercomputing. It runs Linux and McKernel, a light-weight "
"kernel (LWK), side-by-side inside compute nodes and aims at the "
"following:"
msgstr ""
#: ../../summary.rst:6
msgid ""
"Provide scalable and consistent execution of large-scale parallel "
"scientific applications, but at the same time maintain the ability to "
"rapidly adapt to new hardware features and emerging programming models"
msgstr ""
#: ../../summary.rst:10
msgid ""
"Provide efficient memory and device management so that resource "
"contention and data movement are minimized at the system level"
msgstr ""
#: ../../summary.rst:12
msgid ""
"Eliminate OS noise by isolating OS services in Linux and provide jitter "
"free execution on the LWK"
msgstr ""
#: ../../summary.rst:14
msgid ""
"Support the full POSIX/Linux APIs by selectively offloading (slow-path) "
"system calls to Linux"
msgstr ""
#: ../../background.rst:2
msgid "Background and Motivation"
msgstr ""
#: ../../background.rst:4
msgid ""
"With the growing complexity of high-end supercomputers, the current "
"system software stack faces significant challenges as we move forward to "
"exascale and beyond. The necessity to deal with extreme degree of "
"parallelism, heterogeneous architectures, multiple levels of memory "
"hierarchy, power constraints, etc., advocates operating systems that can "
"rapidly adapt to new hardware requirements, and that can support novel "
"programming paradigms and runtime systems. On the other hand, a new class"
" of more dynamic and complex applications are also on the horizon, with "
"an increasing demand for application constructs such as in-situ analysis,"
" workflows, elaborate monitoring and performance tools. This complexity "
"relies not only on the rich features of POSIX, but also on the Linux APIs"
" (such as the */proc*, */sys* filesystems, etc.) in particular."
msgstr ""
#: ../../background.rst:19
msgid "Two Traditional HPC OS Approaches"
msgstr ""
#: ../../background.rst:21
msgid ""
"Traditionally, light-weight operating systems specialized for HPC "
"followed two approaches to tackle scalable execution of large-scale "
"applications. In the full weight kernel (FWK) approach, a full Linux "
"environment is taken as the basis, and features that inhibit attaining "
"HPC scalability are removed, i.e., making it light-weight. The pure "
"light-weight kernel (LWK) approach, on the other hand, starts from "
"scratch and effort is undertaken to add sufficient functionality so that "
"it provides a familiar API, typically something close to that of a "
"general purpose OS, while at the same time it retains the desired "
"scalability and reliability attributes. Neither of these approaches "
"yields a fully Linux compatible environment."
msgstr ""
#: ../../background.rst:34
msgid "The Multi-kernel Approach"
msgstr ""
#: ../../background.rst:36
msgid ""
"A hybrid approach recognized recently by the system software community is"
" to run Linux simultaneously with a lightweight kernel on compute nodes "
"and multiple research projects are now pursuing this direction. The basic"
" idea is that simulations run on an HPC tailored lightweight kernel, "
"ensuring the necessary isolation for noiseless execution of parallel "
"applications, but Linux is leveraged so that the full POSIX API is "
"supported. Additionally, the small code base of the LWK can also "
"facilitate rapid prototyping for new, exotic hardware features. "
"Nevertheless, the questions of how to share node resources between the "
"two types of kernels, where do device drivers execute, how exactly do the"
" two kernels interact with each other and to what extent are they "
"integrated, remain subjects of ongoing debate."
msgstr ""
#: ../../archtecture.rst:2
msgid "Architectural Overview"
msgstr ""
#: ../../archtecture.rst:4
msgid ""
"At the heart of the stack is a low-level software infrastructure called "
"Interface for Heterogeneous Kernels (IHK). IHK is a general framework "
"that provides capabilities for partitioning resources in a many-core "
"environment (e.g.,CPU cores and physical memory) and it enables "
"management of lightweight kernels. IHK can allocate and release host "
"resources dynamically and no reboot of the host machine is required when "
"altering configuration. IHK also provides a low-level inter-kernel "
"messaging infrastructure, called the Inter-Kernel Communication (IKC) "
"layer. An architectural overview of the main system components is shown "
"below."
msgstr ""
#: ../../archtecture.rst:18
msgid ""
"McKernel is a lightweight kernel written from scratch. It is designed for"
" HPC and is booted from IHK. McKernel retains a binary compatible ABI "
"with Linux, however, it implements only a small set of performance "
"sensitive system calls and the rest are offloaded to Linux. Specifically,"
" McKernel has its own memory management, it supports processes and multi-"
"threading with a simple round-robin cooperative (tick-less) scheduler, "
"and it implements signaling. It also allows inter-process memory mappings"
" and it provides interfaces to hardware performance counters."
msgstr ""
#: ../../archtecture.rst:29
msgid "Functionality"
msgstr ""
#: ../../archtecture.rst:31
msgid ""
"An overview of some of the principal functionalities of the IHK/McKernel "
"stack is provided below."
msgstr ""
#: ../../archtecture.rst:35
msgid "System Call Offloading"
msgstr ""
#: ../../archtecture.rst:37
msgid ""
"System call forwarding in McKernel is implemented as follows. When an "
"offloaded system call occurs, McKernel marshals the system call number "
"along with its arguments and sends a message to Linux via a dedicated IKC"
" channel. The corresponding proxy process running on Linux is by default "
"waiting for system call requests through an ioctl() call into IHKs "
"system call delegator kernel module. The delegator kernel modules IKC "
"interrupt handler wakes up the proxy process, which returns to userspace "
"and simply invokes the requested system call. Once it obtains the return "
"value, it instructs the delegator module to send the result back to "
"McKernel, which subsequently passes the value to user-space."
msgstr ""
#: ../../archtecture.rst:49
msgid "Unified Address Space"
msgstr ""
#: ../../archtecture.rst:51
msgid ""
"The unified address space model in IHK/McKernel ensures that offloaded "
"system calls can seamlessly resolve arguments even in case of pointers. "
"This mechanism is depicted below and is implemented as follows."
msgstr ""
#: ../../archtecture.rst:58
msgid ""
"First, the proxy process is compiled as a position independent binary, "
"which enables us to map the code and data segments specific to the proxy "
"process to an address range which is explicitly excluded from McKernels "
"user space. The grey box on the right side of the figure demonstrates the"
" excluded region. Second, the entire valid virtual address range of "
"McKernels application user-space is covered by a special mapping in the "
"proxy process for which we use a pseudo file mapping in Linux. This "
"mapping is indicated by the blue box on the left side of the figure."
msgstr ""
#: ../../install.rst:4
msgid "Installation"
msgstr "インストール"
#: ../../install.rst:6
msgid "The following OS distributions and platforms are recommended:"
msgstr "推奨OSディストリビューションとプロセッサは以下の通り。"
#: ../../install.rst:8
msgid "OS distribution"
msgstr ""
#: ../../install.rst:10
msgid "CentOS 7.3 or later"
msgstr ""
#: ../../install.rst:11
msgid "RHEL 7.3 or later"
msgstr ""
#: ../../install.rst:13
msgid "Platform"
msgstr ""
#: ../../install.rst:15
msgid "Intel Xeon"
msgstr ""
#: ../../install.rst:16
msgid "Intel Xeon Phi"
msgstr ""
#: ../../install.rst:17
msgid "Fujitsu A64FX"
msgstr ""
#: ../../install.rst:20
msgid "Prepare files for building McKernel"
msgstr ""
#: ../../install.rst:22
msgid ""
"Grant read permission to the System.map file of your kernel version on "
"the build machine:"
msgstr ""
#: ../../install.rst:28
msgid "Install the following packages to the build machine:"
msgstr ""
#: ../../install.rst:35 ../../install.rst:179
msgid "When having access to repositories"
msgstr ""
#: ../../install.rst:37 ../../install.rst:181
msgid "On RHEL 8, enable the CodeReady Linux Builder (CLB) repository:"
msgstr ""
#: ../../install.rst:43 ../../install.rst:187
msgid "On CentOS 8, enable the PowerTools repository:"
msgstr ""
#: ../../install.rst:49 ../../install.rst:193
msgid "Install with yum:"
msgstr ""
#: ../../install.rst:56 ../../install.rst:200
msgid "When not having access to repositories"
msgstr ""
#: ../../install.rst:58
msgid ""
"Ask the system administrator to install them. Note that ``libdwarf-"
"devel`` is in the CodeReady Linux Builder repository on RHEL 8 or in the "
"PowerTools repository on CentOS 8."
msgstr ""
#: ../../install.rst:61
msgid "Clone, compile, install"
msgstr ""
#: ../../install.rst:63
msgid "Clone the source code:"
msgstr ""
#: ../../install.rst:71
msgid "(Optional) Checkout to the specific branch or version:"
msgstr ""
#: ../../install.rst:79
msgid ""
"Foe example, if you want to try the development branch, use “development”"
" as the pathspec. If you want to try the prerelease version 1.7.0-0.2, "
"use “1.7.0-0.2”."
msgstr ""
#: ../../install.rst:83
msgid "Move to build directory:"
msgstr ""
#: ../../install.rst:89
msgid "Run cmake:"
msgstr ""
#: ../../install.rst:92 ../../install.rst:135
msgid "When not cross-compiling:"
msgstr ""
#: ../../install.rst:99 ../../install.rst:142
msgid "When cross-compiling:"
msgstr ""
#: ../../install.rst:111
msgid "Install with cmake"
msgstr ""
#: ../../install.rst:113
msgid "Install with make:"
msgstr ""
#: ../../install.rst:119
msgid ""
"The kernel modules and McKernel kernel image should be installed under "
"the **ihk+mckernel** folder in your home directory."
msgstr ""
#: ../../install.rst:123
msgid "Install with rpm"
msgstr ""
#: ../../install.rst:125
msgid "Create the tarball and the spec file:"
msgstr ""
#: ../../install.rst:132
msgid "Create the rpm package:"
msgstr ""
#: ../../install.rst:148
msgid "Install the rpm package:"
msgstr ""
#: ../../install.rst:154
msgid ""
"The kernel modules and McKernel kernel image are installed under the "
"standard system directories."
msgstr ""
#: ../../install.rst:158
msgid "Prepare files and change settings for installing McKernel"
msgstr ""
#: ../../install.rst:160
msgid "Disable SELinux of the compute nodes:"
msgstr ""
#: ../../install.rst:166
msgid "Change the file to SELINUX=disabled. And then reboot the compute nodes:"
msgstr ""
#: ../../install.rst:172
msgid "Install the following packages to the compute nodes:"
msgstr ""
#: ../../install.rst:202
msgid ""
"Ask the system administrator to install them. Note that ``libdwarf`` is "
"in the CodeReady Linux Builder repository on RHEL 8 or in the PowerTools "
"repository on CentOS 8."
msgstr ""
#: ../../boot_run_shutdown.rst:2
msgid "Boot McKernel"
msgstr ""
#: ../../boot_run_shutdown.rst:4
msgid ""
"A boot script called ``mcreboot.sh`` is provided under ``sbin`` in the "
"install folder. To boot on logical CPU 1 with 512MB of memory, use the "
"following invocation:"
msgstr ""
#: ../../boot_run_shutdown.rst:14
msgid ""
"You should see something similar like this if you display the McKernels "
"kernel message log:"
msgstr ""
#: ../../boot_run_shutdown.rst:42
msgid "Run a simple program on McKernel"
msgstr ""
#: ../../boot_run_shutdown.rst:44
msgid ""
"The mcexec command line tool (which is also the Linux proxy process) can "
"be used for executing applications on McKernel:"
msgstr ""
#: ../../boot_run_shutdown.rst:53
msgid "Shutdown McKernel"
msgstr ""
#: ../../boot_run_shutdown.rst:55
msgid ""
"Finally, to shutdown McKernel and release CPU/memory resources back to "
"Linux use the following command:"
msgstr ""
#: ../../team.rst:2
msgid "The Team"
msgstr ""
#: ../../team.rst:4
msgid ""
"The McKernel project was started at The University of Tokyo and currently"
" it is mainly developed at RIKEN. Some of our collaborators include:"
msgstr ""
#: ../../team.rst:8
msgid "Hitachi"
msgstr ""
#: ../../team.rst:9
msgid "Fujitsu"
msgstr ""
#: ../../team.rst:10
msgid "CEA (France)"
msgstr ""
#: ../../team.rst:11
msgid "NEC"
msgstr ""
#: ../../license.rst:2
msgid "License"
msgstr ""
#: ../../license.rst:4
msgid "McKernel is GPL licensed, as found in the LICENSE file."
msgstr ""
#: ../../contact.rst:2
msgid "Contact"
msgstr ""
#: ../../contact.rst:4
msgid ""
"Please give your feedback to us via one of the following mailing lists. "
"Subscription via `www.pccluster.org "
"<http://www.pccluster.org/mailman/listinfo/mckernel-users>`__ is needed."
msgstr ""
#: ../../contact.rst:9
msgid "English: mckernel-users@pccluster.org"
msgstr ""
#: ../../contact.rst:10
msgid "Japanese: mckernel-users-jp@pccluster.org"
msgstr ""

View File

@ -0,0 +1,28 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../spec-ihk.md:1
msgid "hi"
msgstr ""
#: ../../spec-ihk.md:3
msgid ":download:IHK Spec <ihk.pdf>"
msgstr ""

View File

@ -0,0 +1,24 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../spec-mckernel.md:1
msgid "Hello"
msgstr ""

View File

@ -0,0 +1,48 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-07 10:00+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../spec/ihk.rst:6
msgid "External Specs"
msgstr ""
#: ../../spec/ihk.rst:9
msgid "Overview"
msgstr ""
#: ../../spec/ihk.rst:12
msgid "Function Specs"
msgstr ""
#: ../../spec/ihk.rst:15
msgid "Command / Daemon Specs"
msgstr ""
#: ../../spec/ihk.rst:18
msgid "Booting LWK"
msgstr ""
#: ../../spec/mckernel.rst:6
msgid "Interfaces"
msgstr ""
#: ../../spec/mckernel.rst:9
msgid "Interface details"
msgstr ""

View File

@ -0,0 +1,53 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../summary.rst:1
msgid ""
"IHK/McKernel is a light-weight multi-kernel operating system designed for"
" high-end supercomputing. It runs Linux and McKernel, a light-weight "
"kernel (LWK), side-by-side inside compute nodes and aims at the "
"following:"
msgstr ""
#: ../../summary.rst:6
msgid ""
"Provide scalable and consistent execution of large-scale parallel "
"scientific applications, but at the same time maintain the ability to "
"rapidly adapt to new hardware features and emerging programming models"
msgstr ""
#: ../../summary.rst:10
msgid ""
"Provide efficient memory and device management so that resource "
"contention and data movement are minimized at the system level"
msgstr ""
#: ../../summary.rst:12
msgid ""
"Eliminate OS noise by isolating OS services in Linux and provide jitter "
"free execution on the LWK"
msgstr ""
#: ../../summary.rst:14
msgid ""
"Support the full POSIX/Linux APIs by selectively offloading (slow-path) "
"system calls to Linux"
msgstr ""

View File

@ -0,0 +1,46 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../team.rst:2
msgid "The Team"
msgstr ""
#: ../../team.rst:4
msgid ""
"The McKernel project was started at The University of Tokyo and currently"
" it is mainly developed at RIKEN. Some of our collaborators include:"
msgstr ""
#: ../../team.rst:8
msgid "Hitachi"
msgstr ""
#: ../../team.rst:9
msgid "Fujitsu"
msgstr ""
#: ../../team.rst:10
msgid "CEA (France)"
msgstr ""
#: ../../team.rst:11
msgid "NEC"
msgstr ""

View File

@ -0,0 +1,454 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-07 10:00+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../users.rst:6
msgid "Architectural Overview"
msgstr ""
#: ../../users.rst:8
msgid ""
"See `Quick Guide -- Architectural Overview <quick.html#architectural-"
"overview>`__."
msgstr ""
#: ../../users.rst:11
msgid "Running Programs"
msgstr ""
#: ../../users.rst:13
msgid ""
"You need to check if your application and pre-/post-processing programs "
"are suited to run with McKernel. Follow the guide below to choose to run "
"the whole on McKernel, or run the whole on Linux, or run pre-/post-"
"processing on Linux and the application on McKernel:"
msgstr ""
#: ../../users.rst:16
msgid "Application"
msgstr ""
#: ../../users.rst:18
msgid ""
"Run the whole on Linux if it issues system calls frequently and becoming "
"the bottleneck with McKernel. For example, it's better to run on Linux "
"those programs performing many file I/O operations."
msgstr ""
#: ../../users.rst:19
msgid "Otherwise, run it on McKernel."
msgstr ""
#: ../../users.rst:21
msgid "Pre-/Post-processing"
msgstr ""
#: ../../users.rst:23
msgid ""
"Run it on McKernel if it consumes a large amount of memory or the "
"execution time isn't prolonged prohivitively with McKernel. The reason "
"for the first condition is that the resource could be limited for Linux "
"CPUs in the nodes for McKernel."
msgstr ""
#: ../../users.rst:24
msgid "Otherwise, run it on Linux."
msgstr ""
#: ../../users.rst:28
msgid "Modify job script"
msgstr ""
#: ../../users.rst:30
msgid ""
"When using job submission system, you need to modify the job scripts so "
"that the job script itself is going to run on Linux. For example, with "
"Fujitsu Technical Computing Suite (TCS), you need to specify "
"``jobenv=linux`` by inserting the following line into the job script:"
msgstr ""
#: ../../users.rst:38
msgid "Insert ``mcexec`` into the command line"
msgstr ""
#: ../../users.rst:40
msgid ""
"You need to insert ``mcexec`` into the command lines invoking the "
"programs that you chose to run on McKernel:"
msgstr ""
#: ../../users.rst:43
msgid "Non-MPI programs"
msgstr ""
#: ../../users.rst:45
msgid "Insert ``mcexec`` before an executable:"
msgstr ""
#: ../../users.rst:52
msgid "MPI programs"
msgstr ""
#: ../../users.rst:54
msgid ""
"Insert ``mcexec -n <processes-per-node>`` **after mpirun** and before an "
"executable:"
msgstr ""
#: ../../users.rst:61
msgid ""
"``<processes-per-node>`` is the number of the processes per node and "
"calculated by (number of MPI processes) / (number of nodes)."
msgstr ""
#: ../../users.rst:64
msgid ""
"For example, ``<processes-per-node>`` equals to 4 (=32/8) when specifying"
" the number of processes and nodes as follows with Fujitsu Technical "
"Computing Suite."
msgstr ""
#: ../../users.rst:74
msgid "Limitations"
msgstr ""
#: ../../users.rst:76
msgid ""
"Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed correctly "
"even if the mmap() returns a success. An access of their mapping receives"
" the SIGSEGV signal."
msgstr ""
#: ../../users.rst:80
msgid ""
"clone() supports only the following flags. All the other flags cause "
"clone() to return error or are simply ignored."
msgstr ""
#: ../../users.rst:83
msgid "CLONE_CHILD_CLEARTID"
msgstr ""
#: ../../users.rst:84
msgid "CLONE_CHILD_SETTID"
msgstr ""
#: ../../users.rst:85
msgid "CLONE_PARENT_SETTID"
msgstr ""
#: ../../users.rst:86
msgid "CLONE_SETTLS"
msgstr ""
#: ../../users.rst:87
msgid "CLONE_SIGHAND"
msgstr ""
#: ../../users.rst:88
msgid "CLONE_VM"
msgstr ""
#: ../../users.rst:90
msgid "PAPI has the following restriction."
msgstr ""
#: ../../users.rst:92
msgid ""
"Number of counters a user can use at the same time is up to the number of"
" the physical counters in the processor."
msgstr ""
#: ../../users.rst:95
msgid "msync writes back only the modified pages mapped by the calling process."
msgstr ""
#: ../../users.rst:98
msgid "The following syscalls always return the ENOSYS error."
msgstr ""
#: ../../users.rst:100
msgid "migrate_pages()"
msgstr ""
#: ../../users.rst:101
msgid "move_pages()"
msgstr ""
#: ../../users.rst:102
msgid "set_robust_list()"
msgstr ""
#: ../../users.rst:104
msgid "The following syscalls always return the EOPNOTSUPP error."
msgstr ""
#: ../../users.rst:106
msgid "arch_prctl(ARCH_SET_GS)"
msgstr ""
#: ../../users.rst:107
msgid "signalfd()"
msgstr ""
#: ../../users.rst:109
msgid "signalfd4() returns a fd, but signal is not notified through the fd."
msgstr ""
#: ../../users.rst:111
msgid "set_rlimit sets the limit values but they are not enforced."
msgstr ""
#: ../../users.rst:113
msgid "Address randomization is not supported."
msgstr ""
#: ../../users.rst:115
msgid ""
"brk() extends the heap more than requestd when -h (extend-heap-by=) "
"option of mcexec is used with the value larger than 4 KiB. "
"syscall_pwrite02 of LTP would fail for this reason. This is because the "
"test expects that the end of the heap is set to the same address as the "
"argument of sbrk() and expects a segmentation violation occurs when it "
"tries to access the memory area right next to the boundary. However, the "
"optimization sets the end to a value larger than the requested. "
"Therefore, the expected segmentation violation doesnt occur."
msgstr ""
#: ../../users.rst:125
msgid ""
"setpriority()/getpriority() wont work. They might set/get the priority "
"of a random mcexec thread. This is because theres no fixed "
"correspondence between a McKernel thread which issues the system call and"
" a mcexec thread which handles the offload request."
msgstr ""
#: ../../users.rst:130
msgid ""
"mbind() can set the policy but it is not used when allocating physical "
"pages."
msgstr ""
#: ../../users.rst:133
msgid ""
"MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for "
"set_mempolicy()/mbind() are not supported."
msgstr ""
#: ../../users.rst:136
msgid ""
"The MPOL_BIND policy for set_mempolicy()/mbind() works as the same as the"
" MPOL_PREFERRED policy. That is, the physical page allocator doesnt give"
" up the allocation when the specified nodes are running out of pages but "
"continues to search pages in the other nodes."
msgstr ""
#: ../../users.rst:141
msgid ""
"Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and later. In"
" addition, crash_kexec_post_notifiers kernel argument must be given to "
"Linux kernel."
msgstr ""
#: ../../users.rst:145
msgid ""
"setfsuid()/setfsgid() cannot change the id of the calling thread. "
"Instead, it changes that of the mcexec worker thread which takes the "
"system-call offload request."
msgstr ""
#: ../../users.rst:149
msgid ""
"mmap (hugeTLBfs): The physical pages corresponding to a map are released "
"when no McKernel process exist. The next map gets fresh physical pages."
msgstr ""
#: ../../users.rst:153
msgid "Sticky bit on executable file has no effect."
msgstr ""
#: ../../users.rst:155
msgid ""
"Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the process "
"of booting McKernel due to the Linux bug, found in Linux-3.10 and fixed "
"in the later version. One way to circumvent this is to always assign the "
"same CPU set to McKernel."
msgstr ""
#: ../../users.rst:160
msgid "madvise:"
msgstr ""
#: ../../users.rst:162
msgid "MADV_HWPOISON and MADV_SOFT_OFFLINE always returns -EPERM."
msgstr ""
#: ../../users.rst:163
msgid "MADV_MERGEABLE and MADV_UNMERGEABLE always returns -EINVAL."
msgstr ""
#: ../../users.rst:164
msgid ""
"MADV_HUGEPAGE and MADV_NOHUGEPAGE on file map returns -EINVAL except on "
"RHEL-8 for aarch64."
msgstr ""
#: ../../users.rst:167
msgid ""
"brk() and mmap() doesnt report out-of-memory through its return value. "
"Instead, page-fault reports the error."
msgstr ""
#: ../../users.rst:170
msgid ""
"Anonymous mmap pre-maps requested number of pages when contiguous pages "
"are available. Demand paging is used when not available."
msgstr ""
#: ../../users.rst:173
msgid ""
"Mixing page sizes in anonymous shared mapping is not allowed. mmap "
"creates vm_range with one page size. And munmap or mremap that needs the "
"reduced page size changes the sizes of all the pages of the vm_range."
msgstr ""
#: ../../users.rst:178
msgid ""
"ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS (job-"
"scheduler)."
msgstr ""
#: ../../users.rst:181
msgid ""
"The behaviors of madvise and mbind are changed to do nothing and report "
"success as a workaround for Fugaku."
msgstr ""
#: ../../users.rst:184
msgid ""
"mmap() allows unlimited overcommit. Note that it corresponds to setting "
"sysctl ``vm.overcommit_memory`` to 1."
msgstr ""
#~ msgid ""
#~ "At the heart of the stack is "
#~ "a low-level software infrastructure "
#~ "called Interface for Heterogeneous Kernels "
#~ "(IHK). IHK is a general framework "
#~ "that provides capabilities for partitioning"
#~ " resources in a many-core environment"
#~ " (e.g.,CPU cores and physical memory) "
#~ "and it enables management of lightweight"
#~ " kernels. IHK can allocate and "
#~ "release host resources dynamically and "
#~ "no reboot of the host machine is"
#~ " required when altering configuration. IHK"
#~ " also provides a low-level inter-"
#~ "kernel messaging infrastructure, called the"
#~ " Inter-Kernel Communication (IKC) layer."
#~ " An architectural overview of the "
#~ "main system components is shown below."
#~ msgstr ""
#~ msgid ""
#~ "McKernel is a lightweight kernel written"
#~ " from scratch. It is designed for "
#~ "HPC and is booted from IHK. "
#~ "McKernel retains a binary compatible ABI"
#~ " with Linux, however, it implements "
#~ "only a small set of performance "
#~ "sensitive system calls and the rest "
#~ "are offloaded to Linux. Specifically, "
#~ "McKernel has its own memory management,"
#~ " it supports processes and multi-"
#~ "threading with a simple round-robin "
#~ "cooperative (tick-less) scheduler, and "
#~ "it implements signaling. It also allows"
#~ " inter-process memory mappings and it"
#~ " provides interfaces to hardware "
#~ "performance counters."
#~ msgstr ""
#~ msgid "Functionality"
#~ msgstr ""
#~ msgid ""
#~ "An overview of some of the "
#~ "principal functionalities of the IHK/McKernel"
#~ " stack is provided below."
#~ msgstr ""
#~ msgid "System Call Offloading"
#~ msgstr ""
#~ msgid ""
#~ "System call forwarding in McKernel is"
#~ " implemented as follows. When an "
#~ "offloaded system call occurs, McKernel "
#~ "marshals the system call number along"
#~ " with its arguments and sends a "
#~ "message to Linux via a dedicated "
#~ "IKC channel. The corresponding proxy "
#~ "process running on Linux is by "
#~ "default waiting for system call requests"
#~ " through an ioctl() call into IHKs"
#~ " system call delegator kernel module. "
#~ "The delegator kernel modules IKC "
#~ "interrupt handler wakes up the proxy "
#~ "process, which returns to userspace and"
#~ " simply invokes the requested system "
#~ "call. Once it obtains the return "
#~ "value, it instructs the delegator module"
#~ " to send the result back to "
#~ "McKernel, which subsequently passes the "
#~ "value to user-space."
#~ msgstr ""
#~ msgid "Unified Address Space"
#~ msgstr ""
#~ msgid ""
#~ "The unified address space model in "
#~ "IHK/McKernel ensures that offloaded system "
#~ "calls can seamlessly resolve arguments "
#~ "even in case of pointers. This "
#~ "mechanism is depicted below and is "
#~ "implemented as follows."
#~ msgstr ""
#~ msgid ""
#~ "First, the proxy process is compiled "
#~ "as a position independent binary, which"
#~ " enables us to map the code and"
#~ " data segments specific to the proxy"
#~ " process to an address range which"
#~ " is explicitly excluded from McKernels "
#~ "user space. The grey box on the"
#~ " right side of the figure "
#~ "demonstrates the excluded region. Second, "
#~ "the entire valid virtual address range"
#~ " of McKernels application user-space "
#~ "is covered by a special mapping in"
#~ " the proxy process for which we "
#~ "use a pseudo file mapping in "
#~ "Linux. This mapping is indicated by "
#~ "the blue box on the left side "
#~ "of the figure."
#~ msgstr ""

View File

@ -0,0 +1,94 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2020, Masamichi Takagi, Balazs Gerofi, Yutaka Ishikawa
# This file is distributed under the same license as the IHK/McKernel
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2020.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: IHK/McKernel \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 16:40+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.7.0\n"
#: ../../uti.rst:2
msgid "Advanced: Enable Utility Thread offloading Interface (UTI)"
msgstr ""
#: ../../uti.rst:4
msgid ""
"UTI enables a runtime such as MPI runtime to spawn utility threads such "
"as MPI asynchronous progress threads to Linux cores."
msgstr ""
#: ../../uti.rst:8
msgid "Install capstone"
msgstr ""
#: ../../uti.rst:11 ../../uti.rst:22
msgid "When compute nodes don't have access to repositories"
msgstr ""
#: ../../uti.rst:13
msgid "Install EPEL capstone-devel:"
msgstr ""
#: ../../uti.rst:24
msgid ""
"Ask the system administrator to install ``capstone-devel``. Note that it "
"is in the EPEL repository."
msgstr ""
#: ../../uti.rst:28
msgid "Install syscall_intercept"
msgstr ""
#: ../../uti.rst:37
msgid "Install UTI for McKernel"
msgstr ""
#: ../../uti.rst:39
msgid "Install:"
msgstr ""
#: ../../uti.rst:49
msgid "Install McKernel"
msgstr ""
#: ../../uti.rst:51
msgid "Add ``-DENABLE_UTI=ON`` option to ``cmake``:"
msgstr ""
#: ../../uti.rst:58
msgid "Run programs"
msgstr ""
#: ../../uti.rst:60
msgid "Add ``--enable-uti`` option to ``mcexec``:"
msgstr ""
#: ../../uti.rst:67
msgid "Install UTI for Linux"
msgstr ""
#: ../../uti.rst:69
msgid ""
"You should skip this step if it's already installed as with, for example,"
" Fujitsu Technical Computing Suite."
msgstr ""
#: ../../uti.rst:72
msgid "Install by make"
msgstr ""
#: ../../uti.rst:82
msgid "Install by rpm"
msgstr ""

1
docs/logo.rst Normal file
View File

@ -0,0 +1 @@
.. figure:: mckernel-logo.png

BIN
docs/mckernel-logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

BIN
docs/mckernel.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

View File

@ -0,0 +1,9 @@
.. sectnum::
:suffix: .
:depth: 3
Interfaces
==========
Interface details
=================

1071
docs/operators.rst Normal file

File diff suppressed because it is too large Load Diff

22
docs/quick.rst Normal file
View File

@ -0,0 +1,22 @@
.. sectnum::
:suffix: .
:depth: 3
Introduction
============
.. include:: summary.rst
.. include:: background.rst
.. include:: archtecture.rst
.. include:: install.rst
.. include:: boot_run_shutdown.rst
.. include:: team.rst
.. include:: license.rst
.. include:: contact.rst

30
docs/requirements.txt Normal file
View File

@ -0,0 +1,30 @@
alabaster==0.7.12
attrs==19.3.0
Babel==2.7.0
certifi==2019.11.28
chardet==3.0.4
commonmark==0.9.1
docutils==0.15
idna==2.8
imagesize==1.1.0
Jinja2==2.10.3
MarkupSafe==1.1.1
mistune==0.8.4
packaging==19.2
Pygments==2.5.2
pyparsing==2.4.5
pytz==2019.3
recommonmark==0.6.0
requests==2.22.0
roman==3.2
six==1.13.0
snowballstemmer==2.0.0
Sphinx==2.2.2
sphinx-rtd-theme==0.5.0
sphinxcontrib-applehelp==1.0.1
sphinxcontrib-devhelp==1.0.1
sphinxcontrib-htmlhelp==1.0.2
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.2
sphinxcontrib-serializinghtml==1.1.3
urllib3==1.25.7

3
docs/spec-ihk.md Normal file
View File

@ -0,0 +1,3 @@
## hi
:download:`IHK Spec <ihk.pdf>`

1
docs/spec-mckernel.md Normal file
View File

@ -0,0 +1 @@
## Hello

BIN
docs/spec/ihk.pdf Normal file

Binary file not shown.

7
docs/spec/ihk.rst Normal file
View File

@ -0,0 +1,7 @@
.. sectnum::
:suffix: .
:depth: 3
Specifications
==============
The specifications pdf is :download:`here <ihk.pdf>`

BIN
docs/spec/mckernel.pdf Normal file

Binary file not shown.

7
docs/spec/mckernel.rst Normal file
View File

@ -0,0 +1,7 @@
.. sectnum::
:suffix: .
:depth: 3
Specifications
==============
The specifications pdf is :download:`here <mckernel.pdf>`

15
docs/summary.rst Normal file
View File

@ -0,0 +1,15 @@
IHK/McKernel is a light-weight multi-kernel operating system designed
for high-end supercomputing. It runs Linux and McKernel, a light-weight
kernel (LWK), side-by-side inside compute nodes and aims at the
following:
- Provide scalable and consistent execution of large-scale parallel
scientific applications, but at the same time maintain the ability to
rapidly adapt to new hardware features and emerging programming
models
- Provide efficient memory and device management so that resource
contention and data movement are minimized at the system level
- Eliminate OS noise by isolating OS services in Linux and provide
jitter free execution on the LWK
- Support the full POSIX/Linux APIs by selectively offloading
(slow-path) system calls to Linux

12
docs/team.rst Normal file
View File

@ -0,0 +1,12 @@
The Team
========
The McKernel project was started at The University of Tokyo and
currently it is mainly developed at RIKEN. Some of our collaborators
include:
- Hitachi
- Fujitsu
- CEA (France)
- NEC

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

204
docs/users.rst Normal file
View File

@ -0,0 +1,204 @@
.. sectnum::
:suffix: .
:depth: 3
Architectural Overview
======================
See `Quick Guide -- Architectural Overview <quick.html#architectural-overview>`__.
Running Programs
================
You need to check if your application and pre-/post-processing programs are suited to run with McKernel.
Follow the guide below to choose to run the whole on McKernel, or run the whole on Linux, or run pre-/post-processing on Linux and the application on McKernel:
* Application
- Run the whole on Linux if it issues system calls frequently and becoming the bottleneck with McKernel, e.g., those performing many file I/O operations.
- Otherwise, run it on McKernel.
* Pre-/Post-processing
- Run it on McKernel if it consumes a large amount of memory or the execution time isn't prolonged prohivitively with McKernel. The reason for the first condition is that the resource could be limited for Linux CPUs in the nodes for McKernel.
- Otherwise, run it on Linux.
Modify job script
-----------------
When using job submission system, you need to modify the job scripts so that the job script itself is going to run on Linux.
For example, with Fujitsu Technical Computing Suite (TCS), you need to specify ``jobenv=mck1`` by inserting the following line into the job script:
.. code-block:: none
#PJM -L jobenv=mck1
(Optional, Fujitsu TCS only) Specify boot parameters
----------------------------------------------------
You can specify the boot parameters by defining environmental variables and pass them to Fujitsu TCS.
The parameters include the resource reservation settings, resource reservation amount, kernel arguments and routing of message channels between McKernel CPUs and Linux CPUs.
See `IHK Specifications - ihk_create_os_str() <spec/ihk.html>`__ for the parameter names and allowed values.
The example of setting the memory amount is shown below.
.. code-block:: none
export IHK_MEM="7G@4,7G@5,7G@6,7G@7"
pjsub -X run.sh
Insert ``mcexec`` into the command line
---------------------------------------
You need to insert ``mcexec`` into the command lines invoking the programs that you chose to run on McKernel:
Non-MPI programs
~~~~~~~~~~~~~~~~
Insert ``mcexec`` before an executable:
::
mcexec ./a.out
MPI programs
~~~~~~~~~~~~
Insert ``mcexec -n <processes-per-node>`` **after mpirun** and before an
executable:
::
mpirun -n <number-of-MPI-processes> mcexec -n <processes-per-node> ./a.out
``<processes-per-node>`` is the number of the processes per node and
calculated by (number of MPI processes) / (number of nodes).
For example, ``<processes-per-node>`` equals to 4 (=32/8) when
specifying the number of processes and nodes as follows with
Fujitsu Technical Computing Suite.
.. code-block:: none
#PJM --mpi "proc=32"
#PJM -L "node=8"
Limitations
===========
1. Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed
correctly even if the mmap() returns a success. An access of their
mapping receives the SIGSEGV signal.
2. clone() supports only the following flags. All the other flags cause
clone() to return error or are simply ignored.
- CLONE_CHILD_CLEARTID
- CLONE_CHILD_SETTID
- CLONE_PARENT_SETTID
- CLONE_SETTLS
- CLONE_SIGHAND
- CLONE_VM
3. PAPI has the following restriction.
- Number of counters a user can use at the same time is up to the
number of the physical counters in the processor.
4. msync writes back only the modified pages mapped by the calling
process.
5. The following syscalls always return the ENOSYS error.
- migrate_pages()
- move_pages()
- set_robust_list()
6. The following syscalls always return the EOPNOTSUPP error.
- arch_prctl(ARCH_SET_GS)
- signalfd()
7. signalfd4() returns a fd, but signal is not notified through the fd.
8. set_rlimit sets the limit values but they are not enforced.
9. Address randomization is not supported.
10. brk() extends the heap more than requestd when -h (extend-heap-by=)
option of mcexec is used with the value larger than 4 KiB.
syscall_pwrite02 of LTP would fail for this reason. This is because
the test expects that the end of the heap is set to the same address
as the argument of sbrk() and expects a segmentation violation
occurs when it tries to access the memory area right next to the
boundary. However, the optimization sets the end to a value larger
than the requested. Therefore, the expected segmentation violation
doesnt occur.
11. setpriority()/getpriority() wont work. They might set/get the
priority of a random mcexec thread. This is because theres no fixed
correspondence between a McKernel thread which issues the system
call and a mcexec thread which handles the offload request.
12. mbind() can set the policy but it is not used when allocating
physical pages.
13. MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for
set_mempolicy()/mbind() are not supported.
14. The MPOL_BIND policy for set_mempolicy()/mbind() works as the same
as the MPOL_PREFERRED policy. That is, the physical page allocator
doesnt give up the allocation when the specified nodes are running
out of pages but continues to search pages in the other nodes.
15. Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and
later. In addition, crash_kexec_post_notifiers kernel argument must
be given to Linux kernel.
16. setfsuid()/setfsgid() cannot change the id of the calling thread.
Instead, it changes that of the mcexec worker thread which takes the
system-call offload request.
17. mmap (hugeTLBfs): The physical pages corresponding to a map are
released when no McKernel process exist. The next map gets fresh
physical pages.
18. Sticky bit on executable file has no effect.
19. Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the
process of booting McKernel due to the Linux bug, found in
Linux-3.10 and fixed in the later version. One way to circumvent
this is to always assign the same CPU set to McKernel.
20. madvise:
- MADV_HWPOISON and MADV_SOFT_OFFLINE always returns -EPERM.
- MADV_MERGEABLE and MADV_UNMERGEABLE always returns -EINVAL.
- MADV_HUGEPAGE and MADV_NOHUGEPAGE on file map returns -EINVAL
except on RHEL-8 for aarch64.
21. brk() and mmap() doesnt report out-of-memory through its return
value. Instead, page-fault reports the error.
22. Anonymous mmap pre-maps requested number of pages when contiguous
pages are available. Demand paging is used when not available.
23. Mixing page sizes in anonymous shared mapping is not allowed. mmap
creates vm_range with one page size. And munmap or mremap that needs
the reduced page size changes the sizes of all the pages of the
vm_range.
24. ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS
(job-scheduler).
25. The behaviors of madvise and mbind are changed to do nothing and
report success as a workaround for Fugaku.
26. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.
27. mlockall() is not supported and returns -EPERM.
28. munlockall() is not supported and returns zero.
29. scheduling behavior is not Linux compatible. For example, sometimes one of the two processes on the same CPU continues to run after yielding.

126
docs/uti.rst Normal file
View File

@ -0,0 +1,126 @@
Advanced: Enable Utility Thread offloading Interface (UTI)
-------------------------------------------------------------
UTI enables a runtime such as MPI runtime to spawn utility threads such
as MPI asynchronous progress threads to Linux cores.
Install ``capstone`` and ``capstone-devel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When compute nodes don't have access to EPEL repository
"""""""""""""""""""""""""""""""""""""""""""""""""""""""
Install EPEL ``capstone`` and ``capstone-devel``:
::
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
sudo yum install capstone capstone-devel
When compute nodes don't have access to EPEL repository
"""""""""""""""""""""""""""""""""""""""""""""""""""""""
A. Ask the system administrator to install ``capstone`` and ``capstone-devel``. Note that it is in the EPEL repository.
B. Download the rpm with the machine in which you are the administrator:
::
sudo yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
sudo yum install yum-utils
yumdownloader capstone capstone-devel
and then install it to your home directory of the login node:
::
cd $HOME/$(uname -p)
rpm2cpio capstone-4.0.1-9.el8.aarch64.rpm | cpio -idv
rpm2cpio capstone-devel-4.0.1-9.el8.aarch64.rpm | cpio -idv
sed -i 's#/usr/#'"$HOME"'/'"$(uname -p)"'/usr/#' $HOME/$(uname -p)/usr/lib64/pkgconfig/capstone.pc
Install syscall_intercept
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
git clone https://github.com/RIKEN-SysSoft/syscall_intercept.git
mkdir build && cd build
When ``capstone`` and ``capstone-devel`` are installed into the system directory:
::
cmake ../syscall_intercept/arch/aarch64 -DCMAKE_INSTALL_PREFIX=${HOME}/$(uname -p)/usr -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
When ``capstone`` and ``capstone-devel`` are installed into your home directory:
::
CMAKE_PREFIX_PATH=${HOME}/$(uname -p)/usr cmake ../syscall_intercept/arch/aarch64 -DCMAKE_INSTALL_PREFIX=${HOME}/$(uname -p)/usr -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DTREAT_WARNINGS_AS_ERRORS=OFF
Install:
::
make && make install && make test
Install UTI for McKernel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Install:
.. code-block:: none
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<mckernel-install> --with-rm=mckernel
make && make install
Install McKernel
~~~~~~~~~~~~~~~~~~~~
``cmake`` with the additional options:
::
CMAKE_PREFIX_PATH=${HOME}/$(uname -p)/usr cmake -DCMAKE_INSTALL_PREFIX=${HOME}/ihk+mckernel -DENABLE_UTI=ON $HOME/src/ihk+mckernel/mckernel
make -j install
Run programs
~~~~~~~~~~~~~~~~
``mcexec`` with ``--enable-uti`` option:
::
mcexec --enable-uti <command>
Install UTI for Linux
~~~~~~~~~~~~~~~~~~~~~~~~~
You should skip this step if it's already installed as with, for example, Fujitsu Technical Computing Suite.
Install by make
"""""""""""""""
.. code-block:: none
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
make && make install
Install by rpm
""""""""""""""
.. code-block:: none
git clone https://github.com/RIKEN-SysSoft/uti.git
mkdir build && cd build
../uti/configure --prefix=<uti-install> --with-rm=linux
rm -f ~/rpmbuild/SOURCES/<version>.tar.gz
rpmbuild -ba ./scripts/uti.spec
rpm -Uvh uti-<version>-<release>-<arch>.rpm

View File

@ -4671,7 +4671,7 @@ void cmd_ipcs(void); /* ipcs.c */
/*
* main.c
*/
void main_loop(void);
//void main_loop(void);
void exec_command(void);
struct command_table_entry *get_command_table_entry(char *);
void program_usage(int);

View File

@ -91,7 +91,10 @@ struct program_image_section {
struct get_cpu_set_arg {
int nr_processes;
char *req_cpu_list; // Requested by user-space
int req_cpu_list_len; // Lenght of request string
int *process_rank;
pid_t ppid;
void *cpu_set;
size_t cpu_set_size; // Size in bytes
int *target_core;
@ -110,6 +113,18 @@ typedef unsigned long __cpu_set_unit;
#define MPOL_NO_BSS 0x04
#define MPOL_SHM_PREMAP 0x08
/* should be the same as process.h */
#define PLD_PROCESS_NUMA_MASK_BITS 256
enum {
PLD_MPOL_DEFAULT,
PLD_MPOL_PREFERRED,
PLD_MPOL_BIND,
PLD_MPOL_INTERLEAVE,
PLD_MPOL_LOCAL,
PLD_MPOL_MAX, /* always last member of enum */
};
#define PLD_MAGIC 0xcafecafe44332211UL
struct program_load_desc {
@ -144,9 +159,18 @@ struct program_load_desc {
unsigned long heap_extension;
long stack_premap;
unsigned long mpol_bind_mask;
int mpol_mode;
unsigned long mpol_nodemask[PLD_PROCESS_NUMA_MASK_BITS /
(sizeof(unsigned long) * 8)];
int thp_disable;
int uti_thread_rank; /* N-th clone() spawns a thread on Linux CPU */
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
@ -193,6 +217,9 @@ struct syscall_response {
unsigned long req_thread_status;
long ret;
unsigned long fault_address;
#ifdef ENABLE_TOFU
void *pde_data;
#endif
};
struct syscall_ret_desc {

View File

@ -2,11 +2,16 @@
#include <linux/version.h>
#include <linux/mm_types.h>
#include <linux/kallsyms.h>
#include <linux/delay.h>
#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
#include <linux/sched/task_stack.h>
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <linux/mmu_notifier.h>
#include <linux/kref.h>
#include <linux/file.h>
#include <linux/proc_fs.h>
#include <asm/vdso.h>
#include "config.h"
#include "../../mcctrl.h"
@ -27,6 +32,39 @@ void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
#endif
#ifdef ENABLE_TOFU
/* Tofu CQ and barrier gate release functions */
struct file_operations *mcctrl_tof_utofu_procfs_ops_cq;
int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
struct file *filp);
struct file_operations *mcctrl_tof_utofu_procfs_ops_bch;
int (*mcctrl_tof_utofu_release_bch)(struct inode *inode,
struct file *filp);
int (*mcctrl_tof_core_cq_cacheflush)(int tni, int cqid);
int (*mcctrl_tof_core_disable_bch)(int tni, int bgid);
int (*mcctrl_tof_core_unset_bg)(int tni, int bgid);
typedef void (*tof_core_signal_handler)(int, int, uint64_t, uint64_t);
void (*mcctrl_tof_core_register_signal_bg)(int tni, int bgid,
tof_core_signal_handler handler);
struct tof_utofu_bg;
struct tof_utofu_bg *mcctrl_tof_utofu_bg;
/* Tofu MMU notifier */
struct mmu_notifier_ops *mcctrl_tof_utofu_mn_ops;
struct mmu_notifier_ops __mcctrl_tof_utofu_mn_ops;
static void (*mcctrl_tof_utofu_mn_invalidate_range_end)(
struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end);
void __mcctrl_tof_utofu_mn_invalidate_range_end(
struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end);
#endif
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
@ -43,6 +81,71 @@ int arch_symbols_init(void)
return -EFAULT;
#endif
#ifdef ENABLE_TOFU
mcctrl_tof_utofu_procfs_ops_cq =
(void *)kallsyms_lookup_name("tof_utofu_procfs_ops_cq");
if (WARN_ON(!mcctrl_tof_utofu_procfs_ops_cq))
return -EFAULT;
mcctrl_tof_utofu_procfs_ops_bch =
(void *)kallsyms_lookup_name("tof_utofu_procfs_ops_bch");
if (WARN_ON(!mcctrl_tof_utofu_procfs_ops_bch))
return -EFAULT;
mcctrl_tof_utofu_release_cq =
(void *)kallsyms_lookup_name("tof_utofu_release_cq");
if (WARN_ON(!mcctrl_tof_utofu_release_cq))
return -EFAULT;
mcctrl_tof_utofu_release_bch =
(void *)kallsyms_lookup_name("tof_utofu_release_bch");
if (WARN_ON(!mcctrl_tof_utofu_release_bch))
return -EFAULT;
mcctrl_tof_core_cq_cacheflush =
(void *)kallsyms_lookup_name("tof_core_cq_cacheflush");
if (WARN_ON(!mcctrl_tof_core_cq_cacheflush))
return -EFAULT;
mcctrl_tof_core_disable_bch =
(void *)kallsyms_lookup_name("tof_core_disable_bch");
if (WARN_ON(!mcctrl_tof_core_disable_bch))
return -EFAULT;
mcctrl_tof_core_unset_bg =
(void *)kallsyms_lookup_name("tof_core_unset_bg");
if (WARN_ON(!mcctrl_tof_core_unset_bg))
return -EFAULT;
mcctrl_tof_core_register_signal_bg =
(void *)kallsyms_lookup_name("tof_core_register_signal_bg");
if (WARN_ON(!mcctrl_tof_core_register_signal_bg))
return -EFAULT;
mcctrl_tof_utofu_bg =
(void *)kallsyms_lookup_name("tof_utofu_bg");
if (WARN_ON(!mcctrl_tof_utofu_bg))
return -EFAULT;
mcctrl_tof_utofu_mn_ops =
(void *)kallsyms_lookup_name("tof_utofu_mn_ops");
if (WARN_ON(!mcctrl_tof_utofu_mn_ops))
return -EFAULT;
/*
* Copy original content and update redirected function,
* CQ will be pointed to this structure after init ioctl()
*/
memcpy(&__mcctrl_tof_utofu_mn_ops, mcctrl_tof_utofu_mn_ops,
sizeof(*mcctrl_tof_utofu_mn_ops));
__mcctrl_tof_utofu_mn_ops.invalidate_range =
__mcctrl_tof_utofu_mn_invalidate_range_end;
mcctrl_tof_utofu_mn_invalidate_range_end =
(void *)kallsyms_lookup_name("tof_utofu_mn_invalidate_range_end");
if (WARN_ON(!mcctrl_tof_utofu_mn_invalidate_range_end))
return -EFAULT;
#endif
return 0;
}
@ -331,6 +434,15 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
// page table to translation_table.
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpt, PAGE_SIZE);
#ifdef ENABLE_FUGAKU_HACKS
if (!phys) {
pr_err("%s(): ERROR: VA: 0x%lx, rpt is NULL for PID %d\n",
__func__, rva, task_tgid_vnr(current));
error = -EFAULT;
goto out;
}
#endif
tbl = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
rpa = (unsigned long)tbl->tt_pa;
@ -417,3 +529,488 @@ long arch_switch_ctx(struct uti_switch_ctx_desc *desc)
out:
return rc;
}
#ifdef ENABLE_TOFU
/*
* Tofu CQ and BCH release handlers
*/
int __mcctrl_tof_utofu_release_cq(struct inode *inode, struct file *filp);
int __mcctrl_tof_utofu_release_bch(struct inode *inode, struct file *filp);
void mcctrl_tofu_hijack_release_handlers(void)
{
mcctrl_tof_utofu_procfs_ops_cq->release =
__mcctrl_tof_utofu_release_cq;
mcctrl_tof_utofu_procfs_ops_bch->release =
__mcctrl_tof_utofu_release_bch;
wmb();
}
void mcctrl_tofu_restore_release_handlers(void)
{
mcctrl_tof_utofu_procfs_ops_cq->release =
mcctrl_tof_utofu_release_cq;
mcctrl_tof_utofu_procfs_ops_bch->release =
mcctrl_tof_utofu_release_bch;
wmb();
}
/*
* Tofu cleanup functions
*/
#include <tofu/tof_uapi.h>
#include <tofu/tof_icc.h>
#include <tofu/tofu_generated-tof_core_cq.h>
#include <tofu/tofu_generated-tof_utofu_device.h>
#include <tofu/tofu_generated-tof_utofu_cq.h>
#include <tofu/tofu_generated-tof_utofu_mbpt.h>
#include <tofu/tofu_generated-tof_utofu_bg.h>
#define TOF_UTOFU_VERSION TOF_UAPI_VERSION
#define TOF_UTOFU_NUM_STAG_NTYPES 3
#define TOF_UTOFU_NUM_STAG_BITS(size) ((size) + 13)
#define TOF_UTOFU_NUM_STAG(size) ((uint64_t)1 << TOF_UTOFU_NUM_STAG_BITS(size))
#define TOF_UTOFU_STAG_TRANS_BITS 3
#define TOF_UTOFU_STAG_TRANS_SIZE ((uint64_t)1 << TOF_UTOFU_STAG_TRANS_BITS)
#define TOF_UTOFU_STAG_TRANS_TABLE_LEN(size) (TOF_UTOFU_NUM_STAG(size) * TOF_UTOFU_STAG_TRANS_SIZE)
#define TOF_UTOFU_STEERING_TABLE_LEN(size) (TOF_UTOFU_NUM_STAG(size) * TOF_ICC_STEERING_SIZE)
#define TOF_UTOFU_MB_TABLE_LEN(size) (TOF_UTOFU_NUM_STAG(size) * TOF_ICC_MB_SIZE)
#define TOF_UTOFU_STAG_MEM_LEN(size) (TOF_UTOFU_STEERING_TABLE_LEN(size) * 4)
#define TOF_UTOFU_SPECIAL_STAG 4096
#define TOF_UTOFU_ICC_COMMON_REGISTER (tof_icc_reg_pa + 0x0B000000)
#define TOF_UTOFU_REG_START tof_icc_reg_pa
#define TOF_UTOFU_REG_END (TOF_UTOFU_ICC_COMMON_REGISTER + 0x000FFFFF)
#define TOF_UTOFU_SET_SUBNET_TNI 0 /* This number is kernel TNIs number in setting subnet */
#define TOF_UTOFU_KCQ 11
#define TOF_UTOFU_LINKDOWN_PORT_MASK 0x000003FF
#define TOF_UTOFU_ALLOC_STAG_LPG 0x2
#define TOF_UTOFU_BLANK_MBVA (-1)
#define TOF_UTOFU_MRU_EMPTY (-1)
struct tof_utofu_trans_list {
int16_t prev;
int16_t next;
uint8_t pgszbits;
struct tof_utofu_mbpt *mbpt;
};
/*
* Bit 30 marks a kref as McKernel internal.
* This can be used to distinguish krefs from Linux and
* it also ensures that a non deallocated kref will not
* crash the Linux allocator.
*/
#define MCKERNEL_KREF_MARK (1U << 30)
static inline unsigned int mcctrl_kref_is_mckernel(const struct kref *kref)
{
return (refcount_read(&kref->refcount) & (MCKERNEL_KREF_MARK));
}
/**
* kref_put - decrement refcount for object.
* @kref: object.
* @release: pointer to the function that will clean up the object when the
* last reference to the object is released.
* This pointer is required, and it is not acceptable to pass kfree
* in as this function. If the caller does pass kfree to this
* function, you will be publicly mocked mercilessly by the kref
* maintainer, and anyone else who happens to notice it. You have
* been warned.
*
* Decrement the refcount, and if 0, call release().
* Return 1 if the object was removed, otherwise return 0. Beware, if this
* function returns 0, you still can not count on the kref from remaining in
* memory. Only use the return value if you want to see if the kref is now
* gone, not present.
*/
static inline int mcctrl_kref_put(struct kref *kref, void (*release)(struct kref *kref))
{
if (atomic_dec_return(&kref->refcount.refs) == MCKERNEL_KREF_MARK) {
release(kref);
return 1;
}
return 0;
}
static int tof_utofu_cq_cacheflush(struct tof_utofu_cq *ucq){
return mcctrl_tof_core_cq_cacheflush(ucq->tni, ucq->cqid);
}
static void tof_utofu_trans_mru_delete(struct tof_utofu_cq *ucq, int stag){
struct tof_utofu_trans_list *mru = ucq->trans.mru;
int prev = mru[stag].prev;
int next = mru[stag].next;
if(prev == TOF_UTOFU_MRU_EMPTY || next == TOF_UTOFU_MRU_EMPTY){ /* already deleted */
return;
}
if(prev == stag){ /* a single entry */
ucq->trans.mruhead = TOF_UTOFU_MRU_EMPTY;
}else{
if(ucq->trans.mruhead == stag){
ucq->trans.mruhead = next;
}
mru[prev].next = next;
mru[next].prev = prev;
}
mru[stag].prev = TOF_UTOFU_MRU_EMPTY;
mru[stag].next = TOF_UTOFU_MRU_EMPTY;
}
static void tof_utofu_trans_disable(struct tof_utofu_cq *ucq, int stag){
struct tof_trans_table *table = ucq->trans.table;
atomic64_set((atomic64_t *)&table[stag], 0);
tof_utofu_trans_mru_delete(ucq, stag);
}
/* McKernel scatterlist is simply a contiguous buffer. */
struct scatterlist {
void *pages;
unsigned int offset;
unsigned int length;
unsigned long dma_address;
unsigned int dma_length;
};
static uintptr_t tof_utofu_disable_mbpt(struct tof_utofu_mbpt *mbpt, int idx){
int i0, i1;
struct tof_icc_mbpt_entry *ent;
uintptr_t ipa;
i0 = idx / (PAGE_SIZE / TOF_ICC_MBPT_SIZE);
i1 = idx - i0 * (PAGE_SIZE / TOF_ICC_MBPT_SIZE);
//ent = sg_virt(&mbpt->sg[i0]);
ent = mbpt->sg->pages + (i0 * PAGE_SIZE);
if(!ent[i1].enable){
return 0;
}
ent[i1].enable = 0;
ipa = (uint64_t)ent[i1].ipa << 12;
ent[i1].ipa = 0;
return ipa;
}
static void tof_utofu_free_mbpt(struct tof_utofu_cq *ucq, struct tof_utofu_mbpt *mbpt){
int i;
for(i = 0; i < mbpt->nsgents * PAGE_SIZE / sizeof(struct tof_icc_mbpt_entry); i++){
uintptr_t iova;
iova = tof_utofu_disable_mbpt(mbpt, i);
#if 0
/*
* NOTE: Not performed for McKernel managed stags.
*/
if(iova){
tof_smmu_release_ipa_cq(ucq->tni, ucq->cqid, iova, mbpt->pgsz);
}
#endif
}
#if 0
/*
* NOTE: Everyhing below has been allocated in McKernel, do nothing here!!
* This leaks memory in McKernel, but it doesn't crash Linux.
* Memory will be released once McKernel is unbooted.
*/
tof_smmu_iova_unmap_sg(ucq->tni, ucq->cqid, mbpt->sg, mbpt->nsgents);
for(i = 0; i < mbpt->nsgents; i++){
tof_util_free_pages((unsigned long)sg_virt(&mbpt->sg[i]), 0);
}
tof_util_free(mbpt->sg);
tof_util_free(mbpt);
#endif
}
static void tof_utofu_mbpt_release(struct kref *kref)
{
struct tof_utofu_mbpt *mbpt = container_of(kref, struct tof_utofu_mbpt, kref);
//atomic64_inc((atomic64_t *)&kref_free_count);
tof_utofu_free_mbpt(mbpt->ucq, mbpt);
}
static int tof_utofu_free_stag(struct tof_utofu_cq *ucq, int stag){
if(stag < 0 || stag >= TOF_UTOFU_NUM_STAG(ucq->num_stag) ||
ucq->steering == NULL){
return -EINVAL;
}
if(!(ucq->steering[stag].enable)){
return -ENOENT;
}
if (!mcctrl_kref_is_mckernel(&ucq->trans.mru[stag].mbpt->kref)) {
printk("%s: stag: %d is not an McKernel kref\n", __func__, stag);
return -EINVAL;
}
ucq->steering[stag].enable = 0;
ucq->mb[stag].enable = 0;
tof_utofu_trans_disable(ucq, stag);
dma_wmb();
tof_utofu_cq_cacheflush(ucq);
mcctrl_kref_put(&ucq->trans.mru[stag].mbpt->kref, tof_utofu_mbpt_release);
ucq->trans.mru[stag].mbpt = NULL;
dprintk("%s: TNI: %d, CQ: %d: stag %d deallocated\n",
__func__, ucq->tni, ucq->cqid, stag);
return 0;
}
void mcctrl_mckernel_tof_utofu_release_cq(void *pde_data)
{
struct tof_utofu_cq *ucq;
struct tof_utofu_device *dev;
unsigned long irqflags;
int stag;
dev = (struct tof_utofu_device *)pde_data;
ucq = container_of(dev, struct tof_utofu_cq, common);
if (!ucq->common.enabled) {
return;
}
dprintk("%s: UCQ (PDE: 0x%lx) TNI %d CQ %d\n",
__func__, (unsigned long)pde_data, ucq->tni, ucq->cqid);
/*
* Only release stags here, actual cleanup is still performed
* in the Tofu driver
*/
for (stag = 0; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag++) {
spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(ucq, stag);
spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
}
}
static inline void tof_core_unregister_signal_bg(int tni, int bgid)
{
return mcctrl_tof_core_register_signal_bg(tni, bgid, NULL);
}
static struct tof_utofu_bg *tof_utofu_bg_get(int tni, int bgid){
if((unsigned int)tni >= TOF_ICC_NTNIS ||
(unsigned int)bgid >= TOF_ICC_NBGS){
return NULL;
}
//return &tof_utofu_bg[tni][bgid];
// Convert [][] notion into pointer aritmethic
return mcctrl_tof_utofu_bg + (tni * TOF_ICC_NBGS) + bgid;
}
static int __tof_utofu_unset_bg(struct tof_utofu_bg *ubg){
if(ubg->common.enabled){
mcctrl_tof_core_unset_bg(ubg->tni, ubg->bgid);
ubg->common.enabled = false;
tof_core_unregister_signal_bg(ubg->tni, ubg->bgid);
}
return 0;
}
static int mcctrl_tof_utofu_disable_bch(struct tof_utofu_bg *ubg){
int ret;
int tni, bgid;
if(!ubg->bch.enabled){
return -EPERM;
}
ret = mcctrl_tof_core_disable_bch(ubg->tni, ubg->bgid);
if(ret < 0){
return ret;
}
for(tni = 0; tni < TOF_ICC_NTNIS; tni++){
uint64_t mask = ubg->bch.bgmask[tni];
for(bgid = 0; bgid < TOF_ICC_NBGS; bgid++){
if((mask >> bgid) & 1){
ret = __tof_utofu_unset_bg(tof_utofu_bg_get(tni, bgid));
if(ret < 0){
/* OK? */
//BUG();
return ret;
}
}
}
}
/* Not performed in McKernel handler */
//tof_smmu_release_ipa_bg(ubg->tni, ubg->bgid, ubg->bch.iova, TOF_ICC_BCH_DMA_ALIGN);
//put_page(ubg->bch.page);
ubg->bch.enabled = false;
smp_mb();
dprintk("%s: tni=%d bgid=%d\n", __func__, ubg->tni, ubg->bgid);
return 0;
}
void mcctrl_mckernel_tof_utofu_release_bch(void *pde_data)
{
struct tof_utofu_bg *ubg;
struct tof_utofu_device *dev = (struct tof_utofu_device *)pde_data;
ubg = container_of(dev, struct tof_utofu_bg, common);
//tof_log_if("tni=%d bgid=%d\n", ubg->tni, ubg->bgid);
dprintk("%s: tni=%d bgid=%d\n", __func__, ubg->tni, ubg->bgid);
mcctrl_tof_utofu_disable_bch(ubg);
}
void mcctrl_tofu_cleanup_file(struct mcctrl_file_to_pidfd *f2pfd)
{
/* Figure out whether CQ or BCH */
if (strstr(f2pfd->tofu_dev_path, "cq")) {
dprintk("%s: PID: %d, fd: %d (%s) -> release CQ\n",
__func__, f2pfd->pid, f2pfd->fd, f2pfd->tofu_dev_path);
mcctrl_mckernel_tof_utofu_release_cq(f2pfd->pde_data);
}
else if (strstr(f2pfd->tofu_dev_path, "bch")) {
dprintk("%s: PID: %d, fd: %d (%s) -> release BCH\n",
__func__, f2pfd->pid, f2pfd->fd, f2pfd->tofu_dev_path);
mcctrl_mckernel_tof_utofu_release_bch(f2pfd->pde_data);
}
}
int __mcctrl_tof_utofu_release_handler(struct inode *inode, struct file *filp,
int (*__release_func)(struct inode *inode, struct file *filp))
{
struct mcctrl_usrdata *usrdata;
struct mcctrl_file_to_pidfd *f2pfd;
struct mcctrl_per_proc_data *ppd;
struct ikc_scd_packet isp;
int ret;
dprintk("%s: current PID: %d, comm: %s \n",
__func__, task_tgid_vnr(current), current->comm);
f2pfd = mcctrl_file_to_pidfd_hash_lookup(filp, current->group_leader);
if (!f2pfd) {
goto out;
}
dprintk("%s: current PID: %d, PID: %d, fd: %d ...\n",
__func__, task_tgid_vnr(current), f2pfd->pid, f2pfd->fd);
usrdata = ihk_host_os_get_usrdata(f2pfd->os);
/* Look up per-process structure */
ppd = mcctrl_get_per_proc_data(usrdata, f2pfd->pid);
if (!ppd) {
pr_err("%s: PID: %d, fd: %d no PPD\n",
__func__, f2pfd->pid, f2pfd->fd);
goto out;
}
dprintk("%s: PID: %d, fd: %d PPD OK\n",
__func__, f2pfd->pid, f2pfd->fd);
/*
* We are in release() due to the process being killed,
* or because the application didn't close the file properly.
* Ask McKernel to clean up this fd.
*/
isp.msg = SCD_MSG_CLEANUP_FD;
isp.pid = f2pfd->pid;
isp.arg = f2pfd->fd;
ret = mcctrl_ikc_send_wait(f2pfd->os, ppd->ikc_target_cpu,
&isp, -20, NULL, NULL, 0);
if (ret != 0) {
pr_err("%s: WARNING: IKC req for PID: %d, fd: %d failed\n",
__func__, f2pfd->pid, f2pfd->fd);
}
/* Disable any remaining STAGs/BCH in mcctrl anyway */
mcctrl_tofu_cleanup_file(f2pfd);
mcctrl_file_to_pidfd_hash_remove(filp, f2pfd->os,
current->group_leader, f2pfd->fd);
mcctrl_put_per_proc_data(ppd);
out:
dprintk("%s: current PID: %d, comm: %s -> calling release\n",
__func__, task_tgid_vnr(current), current->comm);
return __release_func(inode, filp);
}
int __mcctrl_tof_utofu_release_cq(struct inode *inode, struct file *filp)
{
return __mcctrl_tof_utofu_release_handler(inode, filp,
mcctrl_tof_utofu_release_cq);
}
int __mcctrl_tof_utofu_release_bch(struct inode *inode, struct file *filp)
{
return __mcctrl_tof_utofu_release_handler(inode, filp,
mcctrl_tof_utofu_release_bch);
}
/*
* Tofu MMU notifier functions
*/
void __mcctrl_tof_utofu_mn_invalidate_range_end(
struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
unsigned long end)
{
char tmpname[TASK_COMM_LEN];
/* Not an offloaded syscall? */
if (current->mm != mm) {
goto out_call_real;
}
/* Not mcexec? Just in case.. */
get_task_comm(tmpname, current);
if (strncmp(tmpname, "mcexec", TASK_COMM_LEN)) {
goto out_call_real;
}
/* This is only called for Tofu enabled mcexec processes */
dprintk("%s: skipping tof_utofu_mn_invalidate_range_end() "
"for mcexec PID %d\n",
__func__, task_tgid_vnr(current));
return;
out_call_real:
return mcctrl_tof_utofu_mn_invalidate_range_end(mn, mm, start, end);
}
int __mcctrl_tof_utofu_ioctl_init_cq(struct tof_utofu_device *dev,
unsigned long arg) {
struct tof_utofu_cq *ucq;
ucq = container_of(dev, struct tof_utofu_cq, common);
if (!ucq->common.enabled) {
return -EINVAL;
}
dprintk("%s: Tofu TNI %d CQ %d (PDE: 0x%lx) MMU notifier to be hijacked\n",
__func__, ucq->tni, ucq->cqid, (unsigned long)dev);
/* Override the MMU notifier */
ucq->mn.ops = &__mcctrl_tof_utofu_mn_ops;
return 0;
}
long __mcctrl_tof_utofu_unlocked_ioctl_cq(void *pde_data, unsigned int cmd,
unsigned long arg) {
struct tof_utofu_device *dev = (struct tof_utofu_device *)pde_data;
int ret;
switch (cmd) {
/* We only care about init, where we hijack the MMU notifier */
case TOF_IOCTL_INIT_CQ:
ret = __mcctrl_tof_utofu_ioctl_init_cq(dev, arg);
break;
default:
ret = 0;
}
return ret;
}
#endif

View File

@ -0,0 +1,41 @@
#!/bin/bash
SCRIPT="`readlink -f ${BASH_SOURCE[0]:-}`"
SCRIPT_DIR=$(dirname ${SCRIPT})
CURRENT_DIR=`pwd`
cd ${SCRIPT_DIR}
DWARF_TOOL=${SCRIPT_DIR}/../../../../../../../tools/dwarf-extract-struct/dwarf-extract-struct
if [ ! -x ${DWARF_TOOL} ]; then
echo "error: couldn't find DWARF extractor executable (${DWARF_TOOL}), have you compiled it?"
cd -
exit 1
fi
echo "Looking for Tofu driver debug symbols..."
if [ "`find /lib/modules/ -name "tof_module.tar.gz" | xargs -r ls -t | head -n 1 | wc -l`" == "0" ]; then
echo "error: couldn't find Tofu modules with debug symbols"
cd -
exit 1
fi
MODULE_TAR_GZ=`find /lib/modules/ -name "tof_module.tar.gz" | xargs ls -t | head -n 1`
echo "Using Tofu driver debug symbols: ${MODULE_TAR_GZ}"
KMODULE=tof_utofu.ko
if ! tar zxvf ${MODULE_TAR_GZ} ${KMODULE} 2>&1 > /dev/null; then
echo "error: uncompressing kernel module with debug symbols"
cd -
exit 1
fi
${DWARF_TOOL} ${KMODULE} tof_utofu_device enabled subnet gpid > tofu_generated-tof_utofu_device.h
${DWARF_TOOL} ${KMODULE} tof_utofu_cq common tni cqid mn trans steering mb num_stag | sed "s/struct FILL_IN_MANUALLY trans;/#include \"tof_utofu_cq_trans.h\"/g" > tofu_generated-tof_utofu_cq.h
${DWARF_TOOL} ${KMODULE} tof_utofu_mbpt ucq iova sg nsgents mbptstart pgsz kref > tofu_generated-tof_utofu_mbpt.h
${DWARF_TOOL} ${KMODULE} tof_utofu_bg common tni bgid bch | sed "s/struct FILL_IN_MANUALLY bch;/#include \"tof_utofu_bg_bch.h\"/g" > tofu_generated-tof_utofu_bg.h
rm ${KMODULE}
#cat tofu_generated*.h
cd - > /dev/null

View File

@ -0,0 +1,831 @@
#ifndef _TOF_ICC_H_
#define _TOF_ICC_H_
#include <linux/types.h>
#ifdef __KERNEL__
#include <linux/bitops.h>
#else
#include <stdint.h>
typedef uint64_t phys_addr_t;
#endif
/* constants related to the Tofu Interconnect D */
#define TOF_ICC_NTNIS 6
#define TOF_ICC_NCQS 12
#define TOF_ICC_NBGS 48
#define TOF_ICC_NBCHS 16
#define TOF_ICC_NPORTS 10
#define TOF_ICC_NVMSIDS 16
#define TOF_ICC_RH_LEN 8
#define TOF_ICC_ECRC_LEN 4
#define TOF_ICC_FRAME_ALIGN 32
#define TOF_ICC_TLP_LEN(len) (((len) + 1) * TOF_ICC_FRAME_ALIGN)
#define TOF_ICC_TLP_PAYLOAD_MAX (TOF_ICC_TLP_LEN(61) - TOF_ICC_ECRC_LEN)
#define TOF_ICC_FRAME_LEN(len) (TOF_ICC_RH_LEN + TOF_ICC_TLP_LEN(len))
#define TOF_ICC_FRAME_LEN_MIN TOF_ICC_FRAME_LEN(2)
#define TOF_ICC_FRAME_LEN_MAX TOF_ICC_FRAME_LEN(61)
#define TOF_ICC_FRAME_BUF_SIZE_BITS 11
#define TOF_ICC_FRAME_BUF_SIZE (1 << TOF_ICC_FRAME_BUF_SIZE_BITS)
#define TOF_ICC_FRAME_BUF_ALIGN_BITS 8
#define TOF_ICC_FRAME_BUF_ALIGN (1 << TOF_ICC_FRAME_BUF_ALIGN_BITS)
#define TOF_ICC_PB_SIZE_BITS 11
#define TOF_ICC_PB_SIZE (1 << TOF_ICC_PB_SIZE_BITS)
#define TOF_ICC_PB_ALIGN_BITS 11
#define TOF_ICC_PB_ALIGN (1 << TOF_ICC_PB_ALIGN_BITS)
#define TOF_ICC_ST_ALIGN_BITS 8
#define TOF_ICC_ST_ALIGN (1 << TOF_ICC_ST_ALIGN_BITS)
#define TOF_ICC_MBT_ALIGN_BITS 8
#define TOF_ICC_MBT_ALIGN (1 << TOF_ICC_MBT_ALIGN_BITS)
#define TOF_ICC_MBPT_ALIGN_BITS 8
#define TOF_ICC_MBPT_ALIGN (1 << TOF_ICC_MBPT_ALIGN_BITS)
#define TOF_ICC_BG_BSEQ_SIZE_BITS 24
#define TOF_ICC_BG_BSEQ_SIZE (1 << TOF_ICC_BG_BSEQ_SIZE_BITS)
#define TOF_ICC_BCH_DMA_ALIGN_BITS 8
#define TOF_ICC_BCH_DMA_ALIGN (1 << TOF_ICC_BCH_DMA_ALIGN_BITS)
/* this is a CPU-specific constant, but referred in the ICC spec. */
#define TOF_ICC_CACHE_LINE_SIZE_BITS 8
#define TOF_ICC_CACHE_LINE_SIZE (1 << TOF_ICC_CACHE_LINE_SIZE_BITS)
#define TOF_ICC_TOQ_DESC_SIZE_BITS 5
#define TOF_ICC_TOQ_DESC_SIZE (1 << TOF_ICC_TOQ_DESC_SIZE_BITS)
#define TOF_ICC_TCQ_DESC_SIZE_BITS 3
#define TOF_ICC_TCQ_DESC_SIZE (1 << TOF_ICC_TCQ_DESC_SIZE_BITS)
#define TOF_ICC_TCQ_NLINE_BITS (TOF_ICC_CACHE_LINE_SIZE_BITS - TOF_ICC_TCQ_DESC_SIZE_BITS)
#define TOF_ICC_MRQ_DESC_SIZE_BITS 5
#define TOF_ICC_MRQ_DESC_SIZE (1 << TOF_ICC_MRQ_DESC_SIZE_BITS)
#define TOF_ICC_PBQ_DESC_SIZE_BITS 3
#define TOF_ICC_PBQ_DESC_SIZE (1 << TOF_ICC_PBQ_DESC_SIZE_BITS)
#define TOF_ICC_PRQ_DESC_SIZE_BITS 3
#define TOF_ICC_PRQ_DESC_SIZE (1 << TOF_ICC_PRQ_DESC_SIZE_BITS)
#define TOF_ICC_PRQ_NLINE_BITS (TOF_ICC_CACHE_LINE_SIZE_BITS - TOF_ICC_PBQ_DESC_SIZE_BITS)
#define TOF_ICC_TOQ_SIZE_NTYPES 6
#define TOF_ICC_TOQ_SIZE_BITS(size) ((size) * 2 + 11)
#define TOF_ICC_TOQ_SIZE(size) (1 << TOF_ICC_TOQ_SIZE_BITS(size))
#define TOF_ICC_TOQ_LEN(size) (TOF_ICC_TOQ_SIZE(size) * TOF_ICC_TOQ_DESC_SIZE)
#define TOF_ICC_TCQ_LEN(size) (TOF_ICC_TOQ_SIZE(size) * TOF_ICC_TCQ_DESC_SIZE)
#define TOF_ICC_MRQ_SIZE_NTYPES 6
#define TOF_ICC_MRQ_SIZE_BITS(size) ((size) * 2 + 11)
#define TOF_ICC_MRQ_SIZE(size) (1 << TOF_ICC_MRQ_SIZE_BITS(size))
#define TOF_ICC_MRQ_LEN(size) (TOF_ICC_MRQ_SIZE(size) * TOF_ICC_MRQ_DESC_SIZE)
#define TOF_ICC_PBQ_SIZE_NTYPES 6
#define TOF_ICC_PBQ_SIZE_BITS(size) ((size) * 2 + 11)
#define TOF_ICC_PBQ_SIZE(size) (1 << TOF_ICC_PBQ_SIZE_BITS(size))
#define TOF_ICC_PBQ_LEN(size) (TOF_ICC_PBQ_SIZE(size) * TOF_ICC_PBQ_DESC_SIZE)
#define TOF_ICC_PRQ_SIZE_NTYPES 6
#define TOF_ICC_PRQ_SIZE_BITS(size) ((size) * 2 + 11)
#define TOF_ICC_PRQ_SIZE(size) (1 << TOF_ICC_PRQ_SIZE_BITS(size))
#define TOF_ICC_PRQ_LEN(size) (TOF_ICC_PRQ_SIZE(size) * TOF_ICC_PRQ_DESC_SIZE)
#define TOF_ICC_STEERING_TABLE_ALIGN_BITS 8
#define TOF_ICC_STEERING_TABLE_ALIGN (1 << TOF_ICC_STEERING_TABLE_ALIGN_BITS)
#define TOF_ICC_STEERING_SIZE_BITS 4
#define TOF_ICC_STEERING_SIZE (1 << TOF_ICC_STEERING_SIZE_BITS)
#define TOF_ICC_MB_TABLE_ALIGN_BITS 8
#define TOF_ICC_MB_TABLE_ALIGN (1 << TOF_ICC_MB_TABLE_ALIGN_BITS)
#define TOF_ICC_MB_SIZE_BITS 4
#define TOF_ICC_MB_SIZE (1 << TOF_ICC_MB_SIZE_BITS)
#define TOF_ICC_MB_PS_ENCODE(bits) ((bits) % 9 == 3 ? (bits) / 9 - 1 : (bits) / 13 + 3)
#define TOF_ICC_MBPT_ALIGN_BITS 8
#define TOF_ICC_MBPT_ALIGN (1 << TOF_ICC_MBPT_ALIGN_BITS)
#define TOF_ICC_MBPT_SIZE_BITS 3
#define TOF_ICC_MBPT_SIZE (1 << TOF_ICC_MBPT_SIZE_BITS)
#define TOF_ICC_X_BITS 5
#define TOF_ICC_Y_BITS 5
#define TOF_ICC_Z_BITS 5
#define TOF_ICC_A_BITS 1
#define TOF_ICC_B_BITS 2
#define TOF_ICC_C_BITS 1
#define TOF_ICC_MAX_X_SIZE (1 << TOF_ICC_X_BITS)
#define TOF_ICC_MAX_Y_SIZE (1 << TOF_ICC_Y_BITS)
#define TOF_ICC_MAX_Z_SIZE (1 << TOF_ICC_Z_BITS)
#define TOF_ICC_A_SIZE 2
#define TOF_ICC_B_SIZE 3
#define TOF_ICC_C_SIZE 2
#define TOF_ICC_X_MASK ((1 << TOF_ICC_X_BITS) - 1)
#define TOF_ICC_Y_MASK ((1 << TOF_ICC_Y_BITS) - 1)
#define TOF_ICC_Z_MASK ((1 << TOF_ICC_Z_BITS) - 1)
#define TOF_ICC_A_MASK ((1 << TOF_ICC_A_BITS) - 1)
#define TOF_ICC_B_MASK ((1 << TOF_ICC_B_BITS) - 1)
#define TOF_ICC_C_MASK ((1 << TOF_ICC_C_BITS) - 1)
#define TOF_ICC_ABC_SIZE (TOF_ICC_A_SIZE * TOF_ICC_B_SIZE * TOF_ICC_C_SIZE)
static inline int tof_icc_get_framelen(int len){
len = TOF_ICC_RH_LEN + round_up(len + TOF_ICC_ECRC_LEN, TOF_ICC_FRAME_ALIGN);
if(len < TOF_ICC_FRAME_LEN_MIN){
len = TOF_ICC_FRAME_LEN_MIN;
}
return len;
}
/** Descriptors **/
/** commands and rcodes **/
enum {
TOF_ICC_TOQ_NOP,
TOF_ICC_TOQ_PUT,
TOF_ICC_TOQ_WRITE_PIGGYBACK_BUFFER,
TOF_ICC_TOQ_PUT_PIGGYBACK,
TOF_ICC_TOQ_GET,
TOF_ICC_TOQ_GETL,
TOF_ICC_TOQ_ATOMIC_READ_MODIFY_WRITE = 0xe,
TOF_ICC_TOQ_TRANSMIT_RAW_PACKET1 = 0x10,
TOF_ICC_TOQ_TRANSMIT_RAW_PACKET2,
TOF_ICC_TOQ_TRANSMIT_SYSTEM_PACKET1,
TOF_ICC_TOQ_TRANSMIT_SYSTEM_PACKET2,
TOF_ICC_TOQ_NCOMMANDS,
};
enum {
TOF_ICC_MRQ_ATOMIC_READ_MODIFY_WRITE_HALFWAY_NOTICE = 0x1,
TOF_ICC_MRQ_ATOMIC_READ_MODIFY_WRITE_NOTICE,
TOF_ICC_MRQ_ATOMIC_READ_MODIFY_WRITE_REMOTE_ERROR,
TOF_ICC_MRQ_PUT_HALFWAY_NOTICE,
TOF_ICC_MRQ_PUT_LAST_HALFWAY_NOTICE,
TOF_ICC_MRQ_GET_HALFWAY_NOTICE,
TOF_ICC_MRQ_GET_LAST_HALFWAY_NOTICE,
TOF_ICC_MRQ_PUT_NOTICE,
TOF_ICC_MRQ_PUT_LAST_NOTICE,
TOF_ICC_MRQ_GET_NOTICE,
TOF_ICC_MRQ_GET_LAST_NOTICE,
TOF_ICC_MRQ_PUT_REMOTE_ERROR,
TOF_ICC_MRQ_PUT_LAST_REMOTE_ERROR,
TOF_ICC_MRQ_GET_REMOTE_ERROR,
TOF_ICC_MRQ_GET_LAST_REMOTE_ERROR,
TOF_ICC_MRQ_NCOMMANDS,
};
enum {
TOF_ICC_PRQ_UNKNOWN_TLP,
TOF_ICC_PRQ_SYSTEM_TLP,
TOF_ICC_PRQ_ADDRESS_RANGE_EXCEPTION = 0x6,
TOF_ICC_PRQ_CQ_EXCEPTION = 0x8,
TOF_ICC_PRQ_ILLEGAL_TLP_FLAGS,
TOF_ICC_PRQ_ILLEGAL_TLP_LENGTH,
TOF_ICC_PRQ_CQ_ERROR = 0xc,
};
/** structures **/
struct tof_icc_steering_entry {
uint64_t res1:6;
uint64_t readonly:1;
uint64_t enable:1;
uint64_t mbva:32;
uint64_t res2:8;
uint64_t mbid:16;
uint64_t length; /* for optimization */
};
struct tof_icc_mb_entry {
uint64_t ps:3;
uint64_t res1:4;
uint64_t enable:1;
uint64_t ipa:32;
uint64_t res2:24;
uint64_t npage; /* for optimization */
};
struct tof_icc_mbpt_entry {
uint64_t res1:7;
uint64_t enable:1;
uint64_t res2:4;
uint64_t ipa:28;
uint64_t res3:24;
};
struct tof_icc_cq_stag_offset {
uint64_t offset:40;
uint64_t stag:18;
uint64_t cqid:6;
};
struct tof_icc_toq_common_header1 {
uint8_t interrupt:1;
uint8_t res1:4;
uint8_t source_type:2;
uint8_t flip:1;
uint8_t command;
union {
uint8_t mtu;
struct {
uint8_t res:4;
uint8_t op:4;
} armw;
} mtuop;
uint8_t sps:4;
uint8_t pa:1;
uint8_t pb:2;
uint8_t pc:1;
uint8_t rx;
uint8_t ry;
uint8_t rz;
uint8_t ra:1;
uint8_t rb:2;
uint8_t rc:1;
uint8_t res3:1;
uint8_t ri:3;
};
struct tof_icc_toq_common_header2 {
uint8_t gap;
uint8_t s:1;
uint8_t r:1;
uint8_t q:1;
uint8_t p:1;
uint8_t res1:1;
uint8_t j:1;
uint8_t res2:2;
uint16_t edata;
union{
struct {
uint32_t length:24;
uint32_t res:8;
} normal;
struct {
uint32_t length:6;
uint32_t res:26;
} piggyback;
} len;
};
struct tof_icc_toq_descriptor {
struct tof_icc_toq_common_header1 head1;
uint64_t res[3];
};
struct tof_icc_toq_nop {
struct tof_icc_toq_common_header1 head1;
uint64_t res[3];
};
struct tof_icc_toq_put {
struct tof_icc_toq_common_header1 head1;
struct tof_icc_toq_common_header2 head2;
struct tof_icc_cq_stag_offset remote;
struct tof_icc_cq_stag_offset local;
};
struct tof_icc_toq_write_piggyback_buffer {
struct tof_icc_toq_common_header1 head1;
uint64_t data[3];
};
struct tof_icc_toq_put_piggyback {
struct tof_icc_toq_common_header1 head1;
struct tof_icc_toq_common_header2 head2;
struct tof_icc_cq_stag_offset remote;
uint64_t data;
};
struct tof_icc_toq_get {
struct tof_icc_toq_common_header1 head1;
struct tof_icc_toq_common_header2 head2;
struct tof_icc_cq_stag_offset remote;
struct tof_icc_cq_stag_offset local;
};
struct tof_icc_toq_atomic_read_modify_write {
struct tof_icc_toq_common_header1 head1;
struct tof_icc_toq_common_header2 head2;
struct tof_icc_cq_stag_offset remote;
uint64_t data;
};
struct tof_icc_toq_transmit_raw_packet1 {
struct tof_icc_toq_common_header1 head1;
uint8_t gap;
uint8_t res4[3];
uint32_t length:12;
uint32_t res5:20;
uint64_t res6;
uint64_t pa:48; /* for optimization */
uint64_t res7:16;
};
struct tof_icc_toq_transmit_raw_packet2 {
uint8_t interrupt:1;
uint8_t res1:4;
uint8_t source_type:2;
uint8_t flip:1;
uint8_t command;
uint8_t res2:7;
uint8_t e:1;
uint8_t res3[4];
uint8_t port:5;
uint8_t res4:1;
uint8_t vc:2;
uint8_t gap;
uint8_t res5[3];
uint32_t length:12;
uint32_t res6:20;
uint64_t res7;
uint64_t pa:48; /* for optimization */
uint64_t res8:16;
};
struct tof_icc_toq_transmit_system_packet {
struct tof_icc_toq_common_header1 head1; /* rx, ry, rz should be rdx, rdy, rdz */
uint8_t gap;
uint8_t res4[3];
uint32_t length:12;
uint32_t res5:20;
uint64_t res6;
uint64_t pa:48; /* for optimization */
uint64_t res7:16;
};
struct tof_icc_tcq_descriptor {
uint8_t res1:5;
uint8_t counter_unmatch:1;
uint8_t res2:1;
uint8_t flip:1;
uint8_t rcode;
uint8_t res3[2];
union{
struct {
uint32_t length:24;
uint32_t res:8;
} normal;
struct {
uint32_t length:6;
uint32_t res:26;
} piggyback;
} len;
};
struct tof_icc_mrq_common_header1 {
uint8_t res1:7;
uint8_t flip:1;
uint8_t id;
uint8_t rcode;
uint8_t res2:4;
uint8_t pa:1;
uint8_t pb:2;
uint8_t pc:1;
uint8_t x;
uint8_t y;
uint8_t z;
uint8_t a:1;
uint8_t b:2;
uint8_t c:1;
uint8_t res3:1;
uint8_t i:3;
};
struct tof_icc_mrq_common_header2 {
uint8_t res1;
uint8_t res2:4;
uint8_t initial:1;
uint8_t res3:3;
uint16_t edata;
union {
struct {
uint32_t length:11;
uint32_t res:21;
} normal;
struct {
uint32_t op:4;
uint32_t res:28;
} armw;
} lenop;
};
struct tof_icc_mrq_atomic_read_modify_write_halfway_notice {
struct tof_icc_mrq_common_header1 head1;
struct tof_icc_mrq_common_header2 head2;
struct tof_icc_cq_stag_offset local;
struct tof_icc_cq_stag_offset remote;
};
struct tof_icc_mrq_descriptor {
struct tof_icc_mrq_common_header1 head1;
struct tof_icc_mrq_common_header2 head2;
struct tof_icc_cq_stag_offset cso1;
struct tof_icc_cq_stag_offset cso2;
};
struct tof_icc_pbq_descriptor {
uint64_t res1:7;
uint64_t f:1;
uint64_t res2:3;
uint64_t pa:29;
uint64_t res3:24;
};
struct tof_icc_prq_descriptor {
uint64_t rcode:7;
uint64_t f:1;
uint64_t res1:3;
uint64_t pa:29;
uint64_t res2:8;
uint64_t w:1;
uint64_t res3:5;
uint64_t l:1;
uint64_t e:1;
uint64_t res4:8;
};
/** Registers **/
/* useful packed structures */
struct tof_icc_reg_subnet {
uint64_t lz:6;
uint64_t sz:6;
uint64_t nz:6;
uint64_t ly:6;
uint64_t sy:6;
uint64_t ny:6;
uint64_t lx:6;
uint64_t sx:6;
uint64_t nx:6;
uint64_t res:10;
};
struct tof_icc_reg_bg_address {
uint32_t bgid:6;
uint32_t tni:3;
uint32_t c:1;
uint32_t b:2;
uint32_t a:1;
uint32_t z:5;
uint32_t y:5;
uint32_t x:5;
uint32_t pc:1;
uint32_t pb:2;
uint32_t pa:1;
};
/* relative offset of interrupt controller registers */
#define TOF_ICC_IRQREG_IRR 0x0
#define TOF_ICC_IRQREG_IMR 0x8
#define TOF_ICC_IRQREG_IRC 0x10
#define TOF_ICC_IRQREG_IMC 0x18
#define TOF_ICC_IRQREG_ICL 0x20
/* TOFU REGISTERS */
#define tof_icc_reg_pa 0x40000000
/* CQ */
#define TOF_ICC_REG_CQ_PA(tni, cqid) (tof_icc_reg_pa + 0 + (tni) * 0x1000000 + (cqid) * 0x10000)
#define TOF_ICC_REG_CQ_TOQ_DIRECT_DESCRIPTOR 0x0
#define TOF_ICC_REG_CQ_TOQ_FETCH_START 0x40
#define TOF_ICC_REG_CQ_MRQ_FULL_POINTER 0x48
#define TOF_ICC_REG_CQ_TOQ_PIGGYBACK_BUFFER0 0x50
#define TOF_ICC_REG_CQ_TOQ_PIGGYBACK_BUFFER1 0x58
#define TOF_ICC_REG_CQ_TOQ_PIGGYBACK_BUFFER2 0x60
#define TOF_ICC_REG_CQ_TCQ_NUM_NOTICE 0x68
#define TOF_ICC_REG_CQ_MRQ_NUM_NOTICE 0x70
#define TOF_ICC_REG_CQ_TX_PAYLOAD_BYTE 0x78
#define TOF_ICC_REG_CQ_RX_PAYLOAD_BYTE 0x80
#define TOF_ICC_REG_CQ_DUMP_START 0x0
#define TOF_ICC_REG_CQ_DUMP_END 0x88
/* BCH */
#define TOF_ICC_REG_BCH_PA(tni, bgid) (tof_icc_reg_pa + 0x0000e00000 + (tni) * 0x1000000 + (bgid) * 0x10000)
#define TOF_ICC_REG_BCH_IDATA 0x800
#define TOF_ICC_REG_BCH_READY 0x840
#define TOF_ICC_REG_BCH_READY_STATE BIT(63)
#define TOF_ICC_REG_BCH_IGNORED_SIGNAL_COUNT 0x848
#define TOF_ICC_REG_BCH_DUMP_START 0x800
#define TOF_ICC_REG_BCH_DUMP_END 0x850
/* CQS */
#define TOF_ICC_REG_CQS_PA(tni, cqid) (tof_icc_reg_pa + 0x0000400000 + (tni) * 0x1000000 + (cqid) * 0x10000)
#define TOF_ICC_REG_CQS_STATUS 0x0
#define TOF_ICC_REG_CQS_STATUS_DESCRIPTOR_PROCESS_STOP BIT(63)
#define TOF_ICC_REG_CQS_STATUS_DESCRIPTOR_FETCH_STOP BIT(62)
#define TOF_ICC_REG_CQS_STATUS_BLANK_ENTRY_FLIP_BIT BIT(61)
#define TOF_ICC_REG_CQS_STATUS_CACHE_FLUSH_BUSY BIT(60)
#define TOF_ICC_REG_CQS_STATUS_CQ_ENABLE BIT(59)
#define TOF_ICC_REG_CQS_STATUS_SESSION_DEAD BIT(58)
#define TOF_ICC_REG_CQS_STATUS_SESSION_OFFSET_OVERFLOW BIT(57)
#define TOF_ICC_REG_CQS_STATUS_SESSION_OFFSET GENMASK(56, 32)
#define TOF_ICC_REG_CQS_STATUS_NEXT_DESCRIPTOR_OFFSET GENMASK(29, 5)
#define TOF_ICC_REG_CQS_ENABLE 0x8
#define TOF_ICC_REG_CQS_CACHE_FLUSH 0x10
#define TOF_ICC_REG_CQS_FETCH_STOP 0x18
#define TOF_ICC_REG_CQS_MODE 0x20
#define TOF_ICC_REG_CQS_MODE_SYSTEM BIT(63)
#define TOF_ICC_REG_CQS_MODE_TRP2_ENABLE BIT(62)
#define TOF_ICC_REG_CQS_MODE_TRP1_ENABLE BIT(61)
#define TOF_ICC_REG_CQS_MODE_SESSION BIT(60)
#define TOF_ICC_REG_CQS_MODE_SUBNET_NX GENMASK(53, 48)
#define TOF_ICC_REG_CQS_MODE_SUBNET_SX GENMASK(47, 42)
#define TOF_ICC_REG_CQS_MODE_SUBNET_LX GENMASK(41, 36)
#define TOF_ICC_REG_CQS_MODE_SUBNET_NY GENMASK(35, 30)
#define TOF_ICC_REG_CQS_MODE_SUBNET_SY GENMASK(29, 24)
#define TOF_ICC_REG_CQS_MODE_SUBNET_LY GENMASK(23, 18)
#define TOF_ICC_REG_CQS_MODE_SUBNET_NZ GENMASK(17, 12)
#define TOF_ICC_REG_CQS_MODE_SUBNET_SZ GENMASK(11, 6)
#define TOF_ICC_REG_CQS_MODE_SUBNET_LZ GENMASK(5, 0)
#define TOF_ICC_REG_CQS_GPID 0x28
#define TOF_ICC_REG_CQS_TOQ_IPA 0x30
#define TOF_ICC_REG_CQS_TOQ_SIZE 0x38
#define TOF_ICC_REG_CQS_TCQ_IPA 0x40
#define TOF_ICC_REG_CQS_TCQ_IPA_CACHE_INJECTION BIT(63)
#define TOF_ICC_REG_CQS_MRQ_IPA 0x48
#define TOF_ICC_REG_CQS_MRQ_IPA_CACHE_INJECTION BIT(63)
#define TOF_ICC_REG_CQS_MRQ_SIZE 0x50
#define TOF_ICC_REG_CQS_MRQ_MASK 0x58
#define TOF_ICC_REG_CQS_TCQ_DESCRIPTOR_COALESCING_TIMER 0x60
#define TOF_ICC_REG_CQS_MRQ_DESCRIPTOR_COALESCING_TIMER 0x68
#define TOF_ICC_REG_CQS_MRQ_INTERRUPT_COALESCING_TIMER 0x70
#define TOF_ICC_REG_CQS_MRQ_INTERRUPT_COALESCING_COUNT 0x78
#define TOF_ICC_REG_CQS_TOQ_DIRECT_SOURCE_COUNT 0x80
#define TOF_ICC_REG_CQS_TOQ_DIRECT_DESCRIPTOR_COUNT 0x88
#define TOF_ICC_REG_CQS_MEMORY_BLOCK_TABLE_ENABLE 0x90
#define TOF_ICC_REG_CQS_MEMORY_BLOCK_TABLE_IPA 0x98
#define TOF_ICC_REG_CQS_MEMORY_BLOCK_TABLE_SIZE 0xa0
#define TOF_ICC_REG_CQS_STEERING_TABLE_ENABLE 0xa8
#define TOF_ICC_REG_CQS_STEERING_TABLE_IPA 0xb0
#define TOF_ICC_REG_CQS_STEERING_TABLE_SIZE 0xb8
#define TOF_ICC_REG_CQS_MRQ_INTERRUPT_MASK 0xc0
#define TOF_ICC_REG_CQS_IRR 0xc8
#define TOF_ICC_REG_CQS_IMR 0xd0
#define TOF_ICC_REG_CQS_IRC 0xd8
#define TOF_ICC_REG_CQS_IMC 0xe0
#define TOF_ICC_REG_CQS_ICL 0xe8
#define TOF_ICC_REG_CQS_DUMP_START 0x0
#define TOF_ICC_REG_CQS_DUMP_END 0xf0
/* BGS */
#define TOF_ICC_REG_BGS_PA(tni, bgid) (tof_icc_reg_pa + 0x0000800000 + (tni) * 0x1000000 + (bgid) * 0x10000)
#define TOF_ICC_REG_BGS_ENABLE 0x0
#define TOF_ICC_REG_BGS_IRR 0x8
#define TOF_ICC_REG_BGS_IMR 0x10
#define TOF_ICC_REG_BGS_IRC 0x18
#define TOF_ICC_REG_BGS_IMC 0x20
#define TOF_ICC_REG_BGS_ICL 0x28
#define TOF_ICC_REG_BGS_STATE 0x30
#define TOF_ICC_REG_BGS_STATE_ENABLE BIT(0)
#define TOF_ICC_REG_BGS_EXCEPTION_INFO_GPID_UNMATCH 0x38
#define TOF_ICC_REG_BGS_EXCEPTION_INFO_GPID_UNMATCH_BG_ADDRESS GENMASK(27, 0)
#define TOF_ICC_REG_BGS_EXCEPTION_INFO_ADDRESS_UNMATCH 0x40
#define TOF_ICC_REG_BGS_EXCEPTION_INFO_ADDRESS_UNMATCH_BG_ADDRESS GENMASK(27, 0)
#define TOF_ICC_REG_BGS_SIGNAL_A 0x48
#define TOF_ICC_REG_BGS_SIGNAL_A_SIG_RECV BIT(63)
#define TOF_ICC_REG_BGS_SIGNAL_A_TLP_RECV BIT(62)
#define TOF_ICC_REG_BGS_SIGNAL_A_SIG_SEND BIT(61)
#define TOF_ICC_REG_BGS_SIGNAL_A_OP_TYPE GENMASK(3, 0)
#define TOF_ICC_REG_BGS_SIGNAL_B 0x50
#define TOF_ICC_REG_BGS_SIGNAL_B_SIG_RECV BIT(63)
#define TOF_ICC_REG_BGS_SIGNAL_B_TLP_RECV BIT(62)
#define TOF_ICC_REG_BGS_SIGNAL_B_SIG_SEND BIT(61)
#define TOF_ICC_REG_BGS_SIGNAL_B_OP_TYPE GENMASK(3, 0)
#define TOF_ICC_REG_BGS_SIGNAL_MASK 0x58
#define TOF_ICC_REG_BGS_SIGNAL_MASK_SIG_RECV BIT(63)
#define TOF_ICC_REG_BGS_SIGNAL_MASK_TLP_RECV BIT(62)
#define TOF_ICC_REG_BGS_SIGNAL_MASK_SIG_SEND BIT(61)
#define TOF_ICC_REG_BGS_SIGNAL_MASK_TLP_SEND BIT(60)
#define TOF_ICC_REG_BGS_LOCAL_LINK 0x60
#define TOF_ICC_REG_BGS_LOCAL_LINK_BGID_RECV GENMASK(37, 32)
#define TOF_ICC_REG_BGS_LOCAL_LINK_BGID_SEND GENMASK(5, 0)
#define TOF_ICC_REG_BGS_REMOTE_LINK 0x68
#define TOF_ICC_REG_BGS_REMOTE_LINK_BG_ADDRESS_RECV GENMASK(59, 32)
#define TOF_ICC_REG_BGS_REMOTE_LINK_BG_ADDRESS_SEND GENMASK(31, 0)
#define TOF_ICC_REG_BGS_SUBNET_SIZE 0x70
#define TOF_ICC_REG_BGS_GPID_BSEQ 0x78
#define TOF_ICC_REG_BGS_DATA_A0 0x108
#define TOF_ICC_REG_BGS_DATA_AE 0x178
#define TOF_ICC_REG_BGS_DATA_B0 0x188
#define TOF_ICC_REG_BGS_DATA_BE 0x1f8
#define TOF_ICC_REG_BGS_BCH_MASK 0x800
#define TOF_ICC_REG_BGS_BCH_MASK_MASK BIT(63)
#define TOF_ICC_REG_BGS_BCH_MASK_STATUS 0x808
#define TOF_ICC_REG_BGS_BCH_MASK_STATUS_RUN BIT(63)
#define TOF_ICC_REG_BGS_BCH_NOTICE_IPA 0x810
#define TOF_ICC_REG_BGS_DUMP_START 0x0
#define TOF_ICC_REG_BGS_DUMP_END 0x818
/* TNI */
#define TOF_ICC_REG_TNI_PA(tni) (tof_icc_reg_pa + 0x0000c00000 + (tni) * 0x1000000)
#define TOF_ICC_REG_TNI_IRR 0x8
#define TOF_ICC_REG_TNI_IMR 0x10
#define TOF_ICC_REG_TNI_IRC 0x18
#define TOF_ICC_REG_TNI_IMC 0x20
#define TOF_ICC_REG_TNI_ICL 0x28
#define TOF_ICC_REG_TNI_STATE 0x30
#define TOF_ICC_REG_TNI_STATE_MASK GENMASK(1, 0)
#define TOF_ICC_REG_TNI_STATE_DISABLE 0
#define TOF_ICC_REG_TNI_STATE_NORMAL 2
#define TOF_ICC_REG_TNI_STATE_ERROR 3
#define TOF_ICC_REG_TNI_ENABLE 0x38
#define TOF_ICC_REG_TNI_CQ_PRESENT 0x40
#define TOF_ICC_REG_TNI_EXCEPTION_INFO_INACTIVE_BG 0x48
#define TOF_ICC_REG_TNI_EXCEPTION_INFO_INACTIVE_BG_DEST_BG GENMASK(37, 32)
#define TOF_ICC_REG_TNI_EXCEPTION_INFO_INACTIVE_BG_SOURCE_BG_ADDRESS GENMASK(27, 0)
#define TOF_ICC_REG_TNI_PRQ_FULL_POINTER 0x100
#define TOF_ICC_REG_TNI_PBQ_PA 0x108
#define TOF_ICC_REG_TNI_PBQ_SIZE 0x110
#define TOF_ICC_REG_TNI_PRQ_PA 0x118
#define TOF_ICC_REG_TNI_PRQ_PA_CACHE_INJECTION BIT(63)
#define TOF_ICC_REG_TNI_PRQ_SIZE 0x120
#define TOF_ICC_REG_TNI_PRQ_MASK 0x128
#define TOF_ICC_REG_TNI_PRQ_ENTRY_COALESCING_TIMER 0x130
#define TOF_ICC_REG_TNI_PRQ_INTERRUPT_COALESCING_TIMER 0x138
#define TOF_ICC_REG_TNI_PRQ_INTERRUPT_COALESCING_COUNT 0x140
#define TOF_ICC_REG_TNI_SEND_COUNT 0x148
#define TOF_ICC_REG_TNI_NO_SEND_COUNT 0x150
#define TOF_ICC_REG_TNI_BLOCK_SEND_COUNT 0x158
#define TOF_ICC_REG_TNI_RECEIVE_COUNT 0x160
#define TOF_ICC_REG_TNI_NO_RECEIVE_COUNT 0x168
#define TOF_ICC_REG_TNI_NUM_SEND_TLP 0x170
#define TOF_ICC_REG_TNI_BYTE_SEND_TLP 0x178
#define TOF_ICC_REG_TNI_NUM_SEND_SYSTEM_TLP 0x180
#define TOF_ICC_REG_TNI_NUM_RECEIVE_TLP 0x188
#define TOF_ICC_REG_TNI_BYTE_RECEIVE_TLP 0x190
#define TOF_ICC_REG_TNI_NUM_RECEIVE_NULLIFIED_TLP 0x198
#define TOF_ICC_REG_TNI_RX_NUM_UNKNOWN_TLP 0x1a0
#define TOF_ICC_REG_TNI_RX_NUM_SYSTEM_TLP 0x1a8
#define TOF_ICC_REG_TNI_RX_NUM_EXCEPTION_TLP 0x1b0
#define TOF_ICC_REG_TNI_RX_NUM_DISCARD_UNKNOWN_TLP 0x1b8
#define TOF_ICC_REG_TNI_RX_NUM_DISCARD_SYSTEM_TLP 0x1c0
#define TOF_ICC_REG_TNI_RX_NUM_DISCARD_EXCEPTION_TLP 0x1c8
#define TOF_ICC_REG_TNI_DUMP_START 0x8
#define TOF_ICC_REG_TNI_DUMP_END 0x1d0
/* Port */
#define TOF_ICC_REG_PORT_PA(port) (tof_icc_reg_pa + 0x0006000000 + (port) * 0x1000)
#define TOF_ICC_REG_PORT_TX_VC0_ZERO_CREDIT_COUNT 0x0
#define TOF_ICC_REG_PORT_TX_VC1_ZERO_CREDIT_COUNT 0x8
#define TOF_ICC_REG_PORT_TX_VC2_ZERO_CREDIT_COUNT 0x10
#define TOF_ICC_REG_PORT_TX_VC3_ZERO_CREDIT_COUNT 0x18
#define TOF_ICC_REG_PORT_FREE_RUN_COUNT 0x80
#define TOF_ICC_REG_PORT_NUM_SEND_DLLP 0xc0
#define TOF_ICC_REG_PORT_NUM_SEND_TLP 0xc8
#define TOF_ICC_REG_PORT_BYTE_SEND_TLP 0xd0
#define TOF_ICC_REG_PORT_NUM_SEND_SYSTEM_TLP 0xd8
#define TOF_ICC_REG_PORT_NUM_SEND_NULLIFIED_TLP 0xe0
#define TOF_ICC_REG_PORT_NUM_TX_DISCARD_SYSTEM_TLP 0xe8
#define TOF_ICC_REG_PORT_NUM_TX_DISCARD_NORMAL_TLP 0xf0
#define TOF_ICC_REG_PORT_NUM_TX_FILTERED_NORMAL_TLP 0xf8
#define TOF_ICC_REG_PORT_NUM_VIRTUAL_CUT_THROUGH_TLP 0x100
#define TOF_ICC_REG_PORT_NUM_GENERATE_NULLIFIED_TLP 0x108
#define TOF_ICC_REG_PORT_NUM_RECEIVE_DLLP 0x110
#define TOF_ICC_REG_PORT_NUM_RECEIVE_TLP 0x118
#define TOF_ICC_REG_PORT_BYTE_RECEIVE_TLP 0x120
#define TOF_ICC_REG_PORT_NUM_RECEIVE_SYSTEM_TLP 0x128
#define TOF_ICC_REG_PORT_NUM_RECEIVE_NULLIFIED_TLP 0x130
#define TOF_ICC_REG_PORT_NUM_RX_DISCARD_SYSTEM_TLP 0x138
#define TOF_ICC_REG_PORT_NUM_RX_DISCARD_NORMAL_TLP 0x140
#define TOF_ICC_REG_PORT_NUM_RX_FILTERED_NORMAL_TLP 0x158
#define TOF_ICC_REG_PORT_NUM_RX_DISCARD_NULLIFIED_TLP 0x160
#define TOF_ICC_REG_PORT_FRAME_LCRC_ERROR_COUNT 0x170
#define TOF_ICC_REG_PORT_TX_RETRY_BUFFER_CE_COUNT 0x180
#define TOF_ICC_REG_PORT_RX_VC_BUFFER_CE_COUNT 0x188
#define TOF_ICC_REG_PORT_XB_CE_COUNT 0x190
#define TOF_ICC_REG_PORT_ACK_NACK_TIME_OUT_COUNT 0x198
#define TOF_ICC_REG_PORT_SLICE0_FCS_ERROR_COUNT 0x1a0
#define TOF_ICC_REG_PORT_SLICE1_FCS_ERROR_COUNT 0x1a8
#define TOF_ICC_REG_PORT_DUMP_START 0x0
#define TOF_ICC_REG_PORT_DUMP_END 0x1b0
/* XB */
#define TOF_ICC_REG_XB_PA (tof_icc_reg_pa + 0x000600f000)
#define TOF_ICC_REG_XB_STQ_ENABLE 0x0
#define TOF_ICC_REG_XB_STQ_UPDATE_INTERVAL 0x8
#define TOF_ICC_REG_XB_STQ_PA 0x10
#define TOF_ICC_REG_XB_STQ_SIZE 0x18
#define TOF_ICC_REG_XB_STQ_NEXT_OFFSET 0x20
#define TOF_ICC_REG_XB_DUMP_START 0x0
#define TOF_ICC_REG_XB_DUMP_END 0x28
#define TOF_ICC_XB_TC_DATA_CYCLE_COUNT(tni) ((tni) * 0x10 + 0x0)
#define TOF_ICC_XB_TC_WAIT_CYCLE_COUNT(tni) ((tni) * 0x10 + 0x8)
#define TOF_ICC_XB_TD_DATA_CYCLE_COUNT(tnr) ((tnr) * 0x10 + 0x60)
#define TOF_ICC_XB_TD_WAIT_CYCLE_COUNT(tnr) ((tnr) * 0x10 + 0x68)
/* Tofu */
#define TOF_ICC_REG_TOFU_PA (tof_icc_reg_pa + 0x0007000000)
#define TOF_ICC_REG_TOFU_NODE_ADDRESS 0x0
#define TOF_ICC_REG_TOFU_NODE_ADDRESS_X GENMASK(22, 18)
#define TOF_ICC_REG_TOFU_NODE_ADDRESS_Y GENMASK(17, 13)
#define TOF_ICC_REG_TOFU_NODE_ADDRESS_Z GENMASK(12, 8)
#define TOF_ICC_REG_TOFU_NODE_ADDRESS_A BIT(7)
#define TOF_ICC_REG_TOFU_NODE_ADDRESS_B GENMASK(6, 5)
#define TOF_ICC_REG_TOFU_NODE_ADDRESS_C BIT(4)
#define TOF_ICC_REG_TOFU_PORT_SETTING 0x8
#define TOF_ICC_REG_TOFU_TD_TLP_FILTER(tnr) ((tnr) * 0x10 + 0x10)
#define TOF_ICC_REG_TOFU_TD_SETTINGS(tnr) ((tnr) * 0x10 + 0x18)
#define TOF_ICC_REG_TOFU_TNR_MSI_BASE 0xc0
#define TOF_ICC_REG_TOFU_TNR_IRR 0xc8
#define TOF_ICC_REG_TOFU_TNR_IMR 0xd0
#define TOF_ICC_REG_TOFU_TNR_IRC 0xd8
#define TOF_ICC_REG_TOFU_TNR_IMC 0xe0
#define TOF_ICC_REG_TOFU_TNR_ICL 0xe8
#define TOF_ICC_REG_TOFU_TNI_VMS(tni, vmsid) ((tni) * 0x100 + (vmsid) * 0x8 + 0x100)
#define TOF_ICC_REG_TOFU_TNI_VMS_CQ00(tni) ((tni) * 0x100 + 0x180)
#define TOF_ICC_REG_TOFU_TNI_VMS_BG00(tni) ((tni) * 0x100 + 0x1a0)
#define TOF_ICC_REG_TOFU_TNI_VMS_BG16(tni) ((tni) * 0x100 + 0x1a8)
#define TOF_ICC_REG_TOFU_TNI_VMS_BG32(tni) ((tni) * 0x100 + 0x1b0)
#define TOF_ICC_REG_TOFU_TNI_MSI_BASE(tni) ((tni) * 0x100 + 0x1c0)
#define TOF_ICC_REG_TOFU_DUMP_START 0x0
#define TOF_ICC_REG_TOFU_DUMP_END 0x6c8
/** Interrupts **/
#define TOF_ICC_IRQ_CQS_TOQ_READ_EXCEPTION BIT(0)
#define TOF_ICC_IRQ_CQS_TOQ_DIRECT_DESCRIPTOR_EXCEPTION BIT(1)
#define TOF_ICC_IRQ_CQS_TOQ_MARKED_UE BIT(2)
#define TOF_ICC_IRQ_CQS_TCQ_WRITE_EXCEPTION BIT(3)
#define TOF_ICC_IRQ_CQS_TOQ_SOURCE_TYPE_EXCEPTION BIT(4)
#define TOF_ICC_IRQ_CQS_TCQ_WRITE_ACKNOWLEDGE BIT(5)
#define TOF_ICC_IRQ_CQS_MRQ_WRITE_ACKNOWLEDGE BIT(7)
#define TOF_ICC_IRQ_CQS_MRQ_WRITE_EXCEPTION BIT(8)
#define TOF_ICC_IRQ_CQS_MRQ_OVERFLOW BIT(9)
#define TOF_ICC_IRQ_CQS_STEERING_READ_EXCEPTION BIT(36)
#define TOF_ICC_IRQ_CQS_MB_READ_EXCEPTION BIT(38)
#define TOF_ICC_IRQ_CQS_PAYLOAD_READ_EXCEPTION BIT(39)
#define TOF_ICC_IRQ_CQS_PAYLOAD_WRITE_EXCEPTION BIT(40)
/* Just for convinience of irr value, no exists CQS CACHEFLUSH_TIMEOUT interrupt */
#define TOF_ICC_DUMMY_IRQ_CQS_CACHEFLUSH_TIMEOUT BIT(63)
#define TOF_ICC_IRQ_BGS_NODE_ADDRESS_UNMATCH BIT(0)
#define TOF_ICC_IRQ_BGS_BG_RECV_ADDRESS_EXCEPTION BIT(1)
#define TOF_ICC_IRQ_BGS_BG_SEND_ADDRESS_EXCEPTION BIT(2)
#define TOF_ICC_IRQ_BGS_GPID_UNMATCH BIT(3)
#define TOF_ICC_IRQ_BGS_BSEQ_UNMATCH BIT(4)
#define TOF_ICC_IRQ_BGS_SIGNAL_STATE_ERROR BIT(5)
#define TOF_ICC_IRQ_BGS_SYNCHRONIZATION_ACKNOWLEDGE BIT(24)
#define TOF_ICC_IRQ_BGS_ERROR_SYNCHRONIZATION_ACKNOWLEDGE BIT(25)
#define TOF_ICC_IRQ_BGS_DMA_COMPLETION_EXCEPTION BIT(26)
#define TOF_ICC_IRQ_TNI_PBQ_READ_EXCEPTION BIT(0)
#define TOF_ICC_IRQ_TNI_PBQ_MARKED_UE BIT(1)
#define TOF_ICC_IRQ_TNI_PBQ_UNDERFLOW BIT(2)
#define TOF_ICC_IRQ_TNI_PRQ_PACKET_DISCARD BIT(3)
#define TOF_ICC_IRQ_TNI_PRQ_WRITE_ACKNOWLEDGE BIT(4)
#define TOF_ICC_IRQ_TNI_PRQ_WRITE_EXCEPTION BIT(5)
#define TOF_ICC_IRQ_TNI_PRQ_OVERFLOW BIT(6)
#define TOF_ICC_IRQ_TNI_INACTIVE_BG BIT(16)
#define TOF_ICC_IRQ_TNI_STAGE2_TRANSLATION_FAULT BIT(32)
#define TOF_ICC_IRQ_TNR_TNR0_RX_FILTER_OUT BIT(0)
#define TOF_ICC_IRQ_TNR_TNR0_TX_FILTER_OUT BIT(1)
#define TOF_ICC_IRQ_TNR_TNR0_PORT_ERROR BIT(2)
#define TOF_ICC_IRQ_TNR_TNR0_DATELINE_ERROR BIT(3)
#define TOF_ICC_IRQ_TNR_TNR0_ROUTING_ERROR BIT(4)
#define TOF_ICC_IRQ_TNR_TNR1_RX_FILTER_OUT BIT(6)
#define TOF_ICC_IRQ_TNR_TNR1_TX_FILTER_OUT BIT(7)
#define TOF_ICC_IRQ_TNR_TNR1_PORT_ERROR BIT(8)
#define TOF_ICC_IRQ_TNR_TNR1_DATELINE_ERROR BIT(9)
#define TOF_ICC_IRQ_TNR_TNR1_ROUTING_ERROR BIT(10)
#define TOF_ICC_IRQ_TNR_TNR2_RX_FILTER_OUT BIT(12)
#define TOF_ICC_IRQ_TNR_TNR2_TX_FILTER_OUT BIT(13)
#define TOF_ICC_IRQ_TNR_TNR2_PORT_ERROR BIT(14)
#define TOF_ICC_IRQ_TNR_TNR2_DATELINE_ERROR BIT(15)
#define TOF_ICC_IRQ_TNR_TNR2_ROUTING_ERROR BIT(16)
#define TOF_ICC_IRQ_TNR_TNR3_RX_FILTER_OUT BIT(18)
#define TOF_ICC_IRQ_TNR_TNR3_TX_FILTER_OUT BIT(19)
#define TOF_ICC_IRQ_TNR_TNR3_PORT_ERROR BIT(20)
#define TOF_ICC_IRQ_TNR_TNR3_DATELINE_ERROR BIT(21)
#define TOF_ICC_IRQ_TNR_TNR3_ROUTING_ERROR BIT(22)
#define TOF_ICC_IRQ_TNR_TNR4_RX_FILTER_OUT BIT(24)
#define TOF_ICC_IRQ_TNR_TNR4_TX_FILTER_OUT BIT(25)
#define TOF_ICC_IRQ_TNR_TNR4_PORT_ERROR BIT(26)
#define TOF_ICC_IRQ_TNR_TNR4_DATELINE_ERROR BIT(27)
#define TOF_ICC_IRQ_TNR_TNR4_ROUTING_ERROR BIT(28)
#define TOF_ICC_IRQ_TNR_TNR5_RX_FILTER_OUT BIT(30)
#define TOF_ICC_IRQ_TNR_TNR5_TX_FILTER_OUT BIT(31)
#define TOF_ICC_IRQ_TNR_TNR5_PORT_ERROR BIT(32)
#define TOF_ICC_IRQ_TNR_TNR5_DATELINE_ERROR BIT(33)
#define TOF_ICC_IRQ_TNR_TNR5_ROUTING_ERROR BIT(34)
#define TOF_ICC_IRQ_TNR_TNR6_RX_FILTER_OUT BIT(36)
#define TOF_ICC_IRQ_TNR_TNR6_TX_FILTER_OUT BIT(37)
#define TOF_ICC_IRQ_TNR_TNR6_PORT_ERROR BIT(38)
#define TOF_ICC_IRQ_TNR_TNR6_DATELINE_ERROR BIT(39)
#define TOF_ICC_IRQ_TNR_TNR6_ROUTING_ERROR BIT(40)
#define TOF_ICC_IRQ_TNR_TNR7_RX_FILTER_OUT BIT(42)
#define TOF_ICC_IRQ_TNR_TNR7_TX_FILTER_OUT BIT(43)
#define TOF_ICC_IRQ_TNR_TNR7_PORT_ERROR BIT(44)
#define TOF_ICC_IRQ_TNR_TNR7_DATELINE_ERROR BIT(45)
#define TOF_ICC_IRQ_TNR_TNR7_ROUTING_ERROR BIT(46)
#define TOF_ICC_IRQ_TNR_TNR8_RX_FILTER_OUT BIT(48)
#define TOF_ICC_IRQ_TNR_TNR8_TX_FILTER_OUT BIT(49)
#define TOF_ICC_IRQ_TNR_TNR8_PORT_ERROR BIT(50)
#define TOF_ICC_IRQ_TNR_TNR8_DATELINE_ERROR BIT(51)
#define TOF_ICC_IRQ_TNR_TNR8_ROUTING_ERROR BIT(52)
#define TOF_ICC_IRQ_TNR_TNR9_RX_FILTER_OUT BIT(54)
#define TOF_ICC_IRQ_TNR_TNR9_TX_FILTER_OUT BIT(55)
#define TOF_ICC_IRQ_TNR_TNR9_PORT_ERROR BIT(56)
#define TOF_ICC_IRQ_TNR_TNR9_DATELINE_ERROR BIT(57)
#define TOF_ICC_IRQ_TNR_TNR9_ROUTING_ERROR BIT(58)
#endif
/* vim: set noet ts=8 sw=8 sts=0 tw=0 : */

View File

@ -0,0 +1,345 @@
#ifndef _TOF_UAPI_H_
#define _TOF_UAPI_H_
#ifdef __KERNEL__
#include <linux/types.h>
#else
#include <stdint.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#endif
enum tof_sig_errno_cq {
TOF_TOQ_DIRECT_DESCRIPTOR_EXCEPTION,
TOF_TOQ_SOURCE_TYPE_EXCEPTION,
TOF_MRQ_OVERFLOW,
TOF_CQS_CACHEFLUSH_TIMEOUT,
};
enum tof_sig_errno_bg {
TOF_NODE_ADDRESS_UNMATCH,
TOF_BSEQ_UNMATCH,
TOF_SIGNAL_STATE_ERROR,
TOF_ERROR_SYNCHRONIZATION_ACKNOWLEDGE,
};
#define TOF_UAPI_VERSION 0x2a00
struct tof_init_cq {
uint16_t version;
uint8_t session_mode;
uint8_t toq_size;
uint8_t mrq_size;
uint8_t num_stag;
uint8_t tcq_cinj;
uint8_t mrq_cinj;
void *toq_mem;
void *tcq_mem;
void *mrq_mem;
};
struct tof_alloc_stag {
uint32_t flags;
int stag;
uint64_t offset;
void *va;
uint64_t len;
};
struct tof_free_stags {
uint16_t num;
int *stags;
};
struct tof_addr {
uint8_t pa;
uint8_t pb;
uint8_t pc;
uint8_t x;
uint8_t y;
uint8_t z;
uint8_t a;
uint8_t b;
uint8_t c;
};
struct tof_set_bg {
int tni;
int gate;
int source_lgate;
struct tof_addr source_raddr;
int source_rtni;
int source_rgate;
int dest_lgate;
struct tof_addr dest_raddr;
int dest_rtni;
int dest_rgate;
};
struct tof_enable_bch {
void *addr;
int bseq;
int num;
struct tof_set_bg *bgs;
};
struct tof_set_subnet {
int res0;
int res1;
uint8_t nx;
uint8_t sx;
uint8_t lx;
uint8_t ny;
uint8_t sy;
uint8_t ly;
uint8_t nz;
uint8_t sz;
uint8_t lz;
};
struct tof_reg_user {
uid_t uid;
uint32_t gpid;
struct tof_set_subnet subnet;
uint64_t *cqmask;
uint64_t *bgmask;
};
struct tof_notify_linkdown {
int num;
struct {
uint8_t x;
uint8_t y;
uint8_t z;
uint8_t a;
uint8_t b;
uint8_t c;
uint16_t ports;
} *items;
};
struct tof_get_port_stat {
int port_no;
uint64_t mask;
uint64_t pa[31];
};
struct tof_get_cq_stat {
int tni;
int cqid;
uint64_t txbyte;
uint64_t rxbyte;
};
struct tof_load_register {
uint64_t pa;
uint64_t len;
void *buf;
};
struct tof_load_resource {
uint64_t rsc_id;
uint64_t offset;
uint64_t len;
void *buf;
};
union tof_trans_table_bitfield {
struct {
uint64_t start:36;
uint64_t len:27;
uint64_t ps_code:1;
} bits;
uint64_t atomic;
};
struct tof_trans_table {
union tof_trans_table_bitfield steering;
union tof_trans_table_bitfield mbpt;
};
void tof_utofu_set_linkdown_callback(void (*callback)(int, const void *));
void tof_utofu_unset_linkdown_callback(void);
#define TOF_MMAP_CQ_REGISTER 0
#ifdef __KERNEL__
#define TOF_MMAP_CQ_TRANSTABLE (PAGE_SIZE)
#else
#define TOF_MMAP_CQ_TRANSTABLE (sysconf(_SC_PAGESIZE))
#endif
#define TOF_MMAP_BCH_REGISTER 0
#define TOF_MMAP_XB_STQ 0
#define TOF_ST_RDWR 0x0
#define TOF_ST_RDONLY 0x1
#define TOF_ST_LPG 0x2
#define TOF_STAG_TRANS_PS_CODE_64KB 0
#define TOF_STAG_TRANS_PS_CODE_2MB 1
#define TOF_IOC_MAGIC 'd'
#define TOF_IOCTL_INIT_CQ _IOWR(TOF_IOC_MAGIC, 0, long)
#define TOF_IOCTL_ALLOC_STAG _IOWR(TOF_IOC_MAGIC, 1, long)
#define TOF_IOCTL_FREE_STAGS _IOWR(TOF_IOC_MAGIC, 2, long)
#define TOF_IOCTL_ENABLE_BCH _IOWR(TOF_IOC_MAGIC, 3, long)
#define TOF_IOCTL_DISABLE_BCH _IOWR(TOF_IOC_MAGIC, 4, long)
#define TOF_IOCTL_SET_RT_SIGNAL _IOWR(TOF_IOC_MAGIC, 5, long)
#define TOF_IOCTL_SET_SUBNET _IOWR(TOF_IOC_MAGIC, 6, long)
#define TOF_IOCTL_REG_USER _IOWR(TOF_IOC_MAGIC, 7, long)
#define TOF_IOCTL_NOTIFY_LINKDOWN _IOWR(TOF_IOC_MAGIC, 8, long)
#define TOF_IOCTL_GET_PORT_STAT _IOWR(TOF_IOC_MAGIC, 9, long)
#define TOF_IOCTL_GET_CQ_STAT _IOWR(TOF_IOC_MAGIC, 10, long)
#define TOF_IOCTL_LOAD_REGISTER _IOWR(TOF_IOC_MAGIC, 11, long)
#define TOF_IOCTL_LOAD_RESOURCE _IOWR(TOF_IOC_MAGIC, 12, long)
#ifndef __KERNEL__
#define TOF_INIT_CQ TOF_IOCTL_INIT_CQ
#define TOF_ALLOC_STAG TOF_IOCTL_ALLOC_STAG
#define TOF_FREE_STAGS TOF_IOCTL_FREE_STAGS
#define TOF_ENABLE_BCH TOF_IOCTL_ENABLE_BCH
#define TOF_DISABLE_BCH TOF_IOCTL_DISABLE_BCH
#define TOF_SET_RT_SIGNAL TOF_IOCTL_SET_RT_SIGNAL
#define TOF_SET_SUBNET TOF_IOCTL_SET_SUBNET
#define TOF_REG_USER TOF_IOCTL_REG_USER
#define TOF_NOTIFY_LINKDOWN TOF_IOCTL_NOTIFY_LINKDOWN
#define TOF_GET_PORT_STAT TOF_IOCTL_GET_PORT_STAT
#define TOF_GET_CQ_STAT TOF_IOCTL_GET_CQ_STAT
#define TOF_LOAD_REGISTER TOF_IOCTL_LOAD_REGISTER
#define TOF_LOAD_RESOURCE TOF_IOCTL_LOAD_RESOURCE
#endif
enum {
/* TOQ (0 - 71) */
TOF_RSC_TNI0_TOQ0 = 0, TOF_RSC_TNI0_TOQ1, TOF_RSC_TNI0_TOQ2, TOF_RSC_TNI0_TOQ3,
TOF_RSC_TNI0_TOQ4, TOF_RSC_TNI0_TOQ5, TOF_RSC_TNI0_TOQ6, TOF_RSC_TNI0_TOQ7,
TOF_RSC_TNI0_TOQ8, TOF_RSC_TNI0_TOQ9, TOF_RSC_TNI0_TOQ10, TOF_RSC_TNI0_TOQ11,
TOF_RSC_TNI1_TOQ0, TOF_RSC_TNI1_TOQ1, TOF_RSC_TNI1_TOQ2, TOF_RSC_TNI1_TOQ3,
TOF_RSC_TNI1_TOQ4, TOF_RSC_TNI1_TOQ5, TOF_RSC_TNI1_TOQ6, TOF_RSC_TNI1_TOQ7,
TOF_RSC_TNI1_TOQ8, TOF_RSC_TNI1_TOQ9, TOF_RSC_TNI1_TOQ10, TOF_RSC_TNI1_TOQ11,
TOF_RSC_TNI2_TOQ0, TOF_RSC_TNI2_TOQ1, TOF_RSC_TNI2_TOQ2, TOF_RSC_TNI2_TOQ3,
TOF_RSC_TNI2_TOQ4, TOF_RSC_TNI2_TOQ5, TOF_RSC_TNI2_TOQ6, TOF_RSC_TNI2_TOQ7,
TOF_RSC_TNI2_TOQ8, TOF_RSC_TNI2_TOQ9, TOF_RSC_TNI2_TOQ10, TOF_RSC_TNI2_TOQ11,
TOF_RSC_TNI3_TOQ0, TOF_RSC_TNI3_TOQ1, TOF_RSC_TNI3_TOQ2, TOF_RSC_TNI3_TOQ3,
TOF_RSC_TNI3_TOQ4, TOF_RSC_TNI3_TOQ5, TOF_RSC_TNI3_TOQ6, TOF_RSC_TNI3_TOQ7,
TOF_RSC_TNI3_TOQ8, TOF_RSC_TNI3_TOQ9, TOF_RSC_TNI3_TOQ10, TOF_RSC_TNI3_TOQ11,
TOF_RSC_TNI4_TOQ0, TOF_RSC_TNI4_TOQ1, TOF_RSC_TNI4_TOQ2, TOF_RSC_TNI4_TOQ3,
TOF_RSC_TNI4_TOQ4, TOF_RSC_TNI4_TOQ5, TOF_RSC_TNI4_TOQ6, TOF_RSC_TNI4_TOQ7,
TOF_RSC_TNI4_TOQ8, TOF_RSC_TNI4_TOQ9, TOF_RSC_TNI4_TOQ10, TOF_RSC_TNI4_TOQ11,
TOF_RSC_TNI5_TOQ0, TOF_RSC_TNI5_TOQ1, TOF_RSC_TNI5_TOQ2, TOF_RSC_TNI5_TOQ3,
TOF_RSC_TNI5_TOQ4, TOF_RSC_TNI5_TOQ5, TOF_RSC_TNI5_TOQ6, TOF_RSC_TNI5_TOQ7,
TOF_RSC_TNI5_TOQ8, TOF_RSC_TNI5_TOQ9, TOF_RSC_TNI5_TOQ10, TOF_RSC_TNI5_TOQ11,
/* TOQ (72 - 143) */
TOF_RSC_TNI0_TCQ0, TOF_RSC_TNI0_TCQ1, TOF_RSC_TNI0_TCQ2, TOF_RSC_TNI0_TCQ3,
TOF_RSC_TNI0_TCQ4, TOF_RSC_TNI0_TCQ5, TOF_RSC_TNI0_TCQ6, TOF_RSC_TNI0_TCQ7,
TOF_RSC_TNI0_TCQ8, TOF_RSC_TNI0_TCQ9, TOF_RSC_TNI0_TCQ10, TOF_RSC_TNI0_TCQ11,
TOF_RSC_TNI1_TCQ0, TOF_RSC_TNI1_TCQ1, TOF_RSC_TNI1_TCQ2, TOF_RSC_TNI1_TCQ3,
TOF_RSC_TNI1_TCQ4, TOF_RSC_TNI1_TCQ5, TOF_RSC_TNI1_TCQ6, TOF_RSC_TNI1_TCQ7,
TOF_RSC_TNI1_TCQ8, TOF_RSC_TNI1_TCQ9, TOF_RSC_TNI1_TCQ10, TOF_RSC_TNI1_TCQ11,
TOF_RSC_TNI2_TCQ0, TOF_RSC_TNI2_TCQ1, TOF_RSC_TNI2_TCQ2, TOF_RSC_TNI2_TCQ3,
TOF_RSC_TNI2_TCQ4, TOF_RSC_TNI2_TCQ5, TOF_RSC_TNI2_TCQ6, TOF_RSC_TNI2_TCQ7,
TOF_RSC_TNI2_TCQ8, TOF_RSC_TNI2_TCQ9, TOF_RSC_TNI2_TCQ10, TOF_RSC_TNI2_TCQ11,
TOF_RSC_TNI3_TCQ0, TOF_RSC_TNI3_TCQ1, TOF_RSC_TNI3_TCQ2, TOF_RSC_TNI3_TCQ3,
TOF_RSC_TNI3_TCQ4, TOF_RSC_TNI3_TCQ5, TOF_RSC_TNI3_TCQ6, TOF_RSC_TNI3_TCQ7,
TOF_RSC_TNI3_TCQ8, TOF_RSC_TNI3_TCQ9, TOF_RSC_TNI3_TCQ10, TOF_RSC_TNI3_TCQ11,
TOF_RSC_TNI4_TCQ0, TOF_RSC_TNI4_TCQ1, TOF_RSC_TNI4_TCQ2, TOF_RSC_TNI4_TCQ3,
TOF_RSC_TNI4_TCQ4, TOF_RSC_TNI4_TCQ5, TOF_RSC_TNI4_TCQ6, TOF_RSC_TNI4_TCQ7,
TOF_RSC_TNI4_TCQ8, TOF_RSC_TNI4_TCQ9, TOF_RSC_TNI4_TCQ10, TOF_RSC_TNI4_TCQ11,
TOF_RSC_TNI5_TCQ0, TOF_RSC_TNI5_TCQ1, TOF_RSC_TNI5_TCQ2, TOF_RSC_TNI5_TCQ3,
TOF_RSC_TNI5_TCQ4, TOF_RSC_TNI5_TCQ5, TOF_RSC_TNI5_TCQ6, TOF_RSC_TNI5_TCQ7,
TOF_RSC_TNI5_TCQ8, TOF_RSC_TNI5_TCQ9, TOF_RSC_TNI5_TCQ10, TOF_RSC_TNI5_TCQ11,
/* MRQ (144 - 215) */
TOF_RSC_TNI0_MRQ0, TOF_RSC_TNI0_MRQ1, TOF_RSC_TNI0_MRQ2, TOF_RSC_TNI0_MRQ3,
TOF_RSC_TNI0_MRQ4, TOF_RSC_TNI0_MRQ5, TOF_RSC_TNI0_MRQ6, TOF_RSC_TNI0_MRQ7,
TOF_RSC_TNI0_MRQ8, TOF_RSC_TNI0_MRQ9, TOF_RSC_TNI0_MRQ10, TOF_RSC_TNI0_MRQ11,
TOF_RSC_TNI1_MRQ0, TOF_RSC_TNI1_MRQ1, TOF_RSC_TNI1_MRQ2, TOF_RSC_TNI1_MRQ3,
TOF_RSC_TNI1_MRQ4, TOF_RSC_TNI1_MRQ5, TOF_RSC_TNI1_MRQ6, TOF_RSC_TNI1_MRQ7,
TOF_RSC_TNI1_MRQ8, TOF_RSC_TNI1_MRQ9, TOF_RSC_TNI1_MRQ10, TOF_RSC_TNI1_MRQ11,
TOF_RSC_TNI2_MRQ0, TOF_RSC_TNI2_MRQ1, TOF_RSC_TNI2_MRQ2, TOF_RSC_TNI2_MRQ3,
TOF_RSC_TNI2_MRQ4, TOF_RSC_TNI2_MRQ5, TOF_RSC_TNI2_MRQ6, TOF_RSC_TNI2_MRQ7,
TOF_RSC_TNI2_MRQ8, TOF_RSC_TNI2_MRQ9, TOF_RSC_TNI2_MRQ10, TOF_RSC_TNI2_MRQ11,
TOF_RSC_TNI3_MRQ0, TOF_RSC_TNI3_MRQ1, TOF_RSC_TNI3_MRQ2, TOF_RSC_TNI3_MRQ3,
TOF_RSC_TNI3_MRQ4, TOF_RSC_TNI3_MRQ5, TOF_RSC_TNI3_MRQ6, TOF_RSC_TNI3_MRQ7,
TOF_RSC_TNI3_MRQ8, TOF_RSC_TNI3_MRQ9, TOF_RSC_TNI3_MRQ10, TOF_RSC_TNI3_MRQ11,
TOF_RSC_TNI4_MRQ0, TOF_RSC_TNI4_MRQ1, TOF_RSC_TNI4_MRQ2, TOF_RSC_TNI4_MRQ3,
TOF_RSC_TNI4_MRQ4, TOF_RSC_TNI4_MRQ5, TOF_RSC_TNI4_MRQ6, TOF_RSC_TNI4_MRQ7,
TOF_RSC_TNI4_MRQ8, TOF_RSC_TNI4_MRQ9, TOF_RSC_TNI4_MRQ10, TOF_RSC_TNI4_MRQ11,
TOF_RSC_TNI5_MRQ0, TOF_RSC_TNI5_MRQ1, TOF_RSC_TNI5_MRQ2, TOF_RSC_TNI5_MRQ3,
TOF_RSC_TNI5_MRQ4, TOF_RSC_TNI5_MRQ5, TOF_RSC_TNI5_MRQ6, TOF_RSC_TNI5_MRQ7,
TOF_RSC_TNI5_MRQ8, TOF_RSC_TNI5_MRQ9, TOF_RSC_TNI5_MRQ10, TOF_RSC_TNI5_MRQ11,
/* PBQ (216 - 221) */
TOF_RSC_TNI0_PBQ, TOF_RSC_TNI1_PBQ, TOF_RSC_TNI2_PBQ, TOF_RSC_TNI3_PBQ,
TOF_RSC_TNI4_PBQ, TOF_RSC_TNI5_PBQ,
/* PRQ (222 - 227) */
TOF_RSC_TNI0_PRQ, TOF_RSC_TNI1_PRQ, TOF_RSC_TNI2_PRQ, TOF_RSC_TNI3_PRQ,
TOF_RSC_TNI4_PRQ, TOF_RSC_TNI5_PRQ,
/* STEERINGTABLE (228 - 299) */
TOF_RSC_TNI0_STEERINGTABLE0, TOF_RSC_TNI0_STEERINGTABLE1, TOF_RSC_TNI0_STEERINGTABLE2,
TOF_RSC_TNI0_STEERINGTABLE3, TOF_RSC_TNI0_STEERINGTABLE4, TOF_RSC_TNI0_STEERINGTABLE5,
TOF_RSC_TNI0_STEERINGTABLE6, TOF_RSC_TNI0_STEERINGTABLE7, TOF_RSC_TNI0_STEERINGTABLE8,
TOF_RSC_TNI0_STEERINGTABLE9, TOF_RSC_TNI0_STEERINGTABLE10, TOF_RSC_TNI0_STEERINGTABLE11,
TOF_RSC_TNI1_STEERINGTABLE0, TOF_RSC_TNI1_STEERINGTABLE1, TOF_RSC_TNI1_STEERINGTABLE2,
TOF_RSC_TNI1_STEERINGTABLE3, TOF_RSC_TNI1_STEERINGTABLE4, TOF_RSC_TNI1_STEERINGTABLE5,
TOF_RSC_TNI1_STEERINGTABLE6, TOF_RSC_TNI1_STEERINGTABLE7, TOF_RSC_TNI1_STEERINGTABLE8,
TOF_RSC_TNI1_STEERINGTABLE9, TOF_RSC_TNI1_STEERINGTABLE10, TOF_RSC_TNI1_STEERINGTABLE11,
TOF_RSC_TNI2_STEERINGTABLE0, TOF_RSC_TNI2_STEERINGTABLE1, TOF_RSC_TNI2_STEERINGTABLE2,
TOF_RSC_TNI2_STEERINGTABLE3, TOF_RSC_TNI2_STEERINGTABLE4, TOF_RSC_TNI2_STEERINGTABLE5,
TOF_RSC_TNI2_STEERINGTABLE6, TOF_RSC_TNI2_STEERINGTABLE7, TOF_RSC_TNI2_STEERINGTABLE8,
TOF_RSC_TNI2_STEERINGTABLE9, TOF_RSC_TNI2_STEERINGTABLE10, TOF_RSC_TNI2_STEERINGTABLE11,
TOF_RSC_TNI3_STEERINGTABLE0, TOF_RSC_TNI3_STEERINGTABLE1, TOF_RSC_TNI3_STEERINGTABLE2,
TOF_RSC_TNI3_STEERINGTABLE3, TOF_RSC_TNI3_STEERINGTABLE4, TOF_RSC_TNI3_STEERINGTABLE5,
TOF_RSC_TNI3_STEERINGTABLE6, TOF_RSC_TNI3_STEERINGTABLE7, TOF_RSC_TNI3_STEERINGTABLE8,
TOF_RSC_TNI3_STEERINGTABLE9, TOF_RSC_TNI3_STEERINGTABLE10, TOF_RSC_TNI3_STEERINGTABLE11,
TOF_RSC_TNI4_STEERINGTABLE0, TOF_RSC_TNI4_STEERINGTABLE1, TOF_RSC_TNI4_STEERINGTABLE2,
TOF_RSC_TNI4_STEERINGTABLE3, TOF_RSC_TNI4_STEERINGTABLE4, TOF_RSC_TNI4_STEERINGTABLE5,
TOF_RSC_TNI4_STEERINGTABLE6, TOF_RSC_TNI4_STEERINGTABLE7, TOF_RSC_TNI4_STEERINGTABLE8,
TOF_RSC_TNI4_STEERINGTABLE9, TOF_RSC_TNI4_STEERINGTABLE10, TOF_RSC_TNI4_STEERINGTABLE11,
TOF_RSC_TNI5_STEERINGTABLE3, TOF_RSC_TNI5_STEERINGTABLE4, TOF_RSC_TNI5_STEERINGTABLE5,
TOF_RSC_TNI5_STEERINGTABLE6, TOF_RSC_TNI5_STEERINGTABLE7, TOF_RSC_TNI5_STEERINGTABLE8,
TOF_RSC_TNI5_STEERINGTABLE9, TOF_RSC_TNI5_STEERINGTABLE10, TOF_RSC_TNI5_STEERINGTABLE11,
/* MBTABLE (300 - 371) */
TOF_RSC_TNI0_MBTABLE0, TOF_RSC_TNI0_MBTABLE1, TOF_RSC_TNI0_MBTABLE2,
TOF_RSC_TNI0_MBTABLE3, TOF_RSC_TNI0_MBTABLE4, TOF_RSC_TNI0_MBTABLE5,
TOF_RSC_TNI0_MBTABLE6, TOF_RSC_TNI0_MBTABLE7, TOF_RSC_TNI0_MBTABLE8,
TOF_RSC_TNI0_MBTABLE9, TOF_RSC_TNI0_MBTABLE10, TOF_RSC_TNI0_MBTABLE11,
TOF_RSC_TNI1_MBTABLE0, TOF_RSC_TNI1_MBTABLE1, TOF_RSC_TNI1_MBTABLE2,
TOF_RSC_TNI1_MBTABLE3, TOF_RSC_TNI1_MBTABLE4, TOF_RSC_TNI1_MBTABLE5,
TOF_RSC_TNI1_MBTABLE6, TOF_RSC_TNI1_MBTABLE7, TOF_RSC_TNI1_MBTABLE8,
TOF_RSC_TNI1_MBTABLE9, TOF_RSC_TNI1_MBTABLE10, TOF_RSC_TNI1_MBTABLE11,
TOF_RSC_TNI2_MBTABLE0, TOF_RSC_TNI2_MBTABLE1, TOF_RSC_TNI2_MBTABLE2,
TOF_RSC_TNI2_MBTABLE3, TOF_RSC_TNI2_MBTABLE4, TOF_RSC_TNI2_MBTABLE5,
TOF_RSC_TNI2_MBTABLE6, TOF_RSC_TNI2_MBTABLE7, TOF_RSC_TNI2_MBTABLE8,
TOF_RSC_TNI2_MBTABLE9, TOF_RSC_TNI2_MBTABLE10, TOF_RSC_TNI2_MBTABLE11,
TOF_RSC_TNI3_MBTABLE0, TOF_RSC_TNI3_MBTABLE1, TOF_RSC_TNI3_MBTABLE2,
TOF_RSC_TNI3_MBTABLE3, TOF_RSC_TNI3_MBTABLE4, TOF_RSC_TNI3_MBTABLE5,
TOF_RSC_TNI3_MBTABLE6, TOF_RSC_TNI3_MBTABLE7, TOF_RSC_TNI3_MBTABLE8,
TOF_RSC_TNI3_MBTABLE9, TOF_RSC_TNI3_MBTABLE10, TOF_RSC_TNI3_MBTABLE11,
TOF_RSC_TNI4_MBTABLE0, TOF_RSC_TNI4_MBTABLE1, TOF_RSC_TNI4_MBTABLE2,
TOF_RSC_TNI4_MBTABLE3, TOF_RSC_TNI4_MBTABLE4, TOF_RSC_TNI4_MBTABLE5,
TOF_RSC_TNI4_MBTABLE6, TOF_RSC_TNI4_MBTABLE7, TOF_RSC_TNI4_MBTABLE8,
TOF_RSC_TNI4_MBTABLE9, TOF_RSC_TNI4_MBTABLE10, TOF_RSC_TNI4_MBTABLE11,
TOF_RSC_TNI5_MBTABLE0, TOF_RSC_TNI5_MBTABLE1, TOF_RSC_TNI5_MBTABLE2,
TOF_RSC_TNI5_MBTABLE3, TOF_RSC_TNI5_MBTABLE4, TOF_RSC_TNI5_MBTABLE5,
TOF_RSC_TNI5_MBTABLE6, TOF_RSC_TNI5_MBTABLE7, TOF_RSC_TNI5_MBTABLE8,
TOF_RSC_TNI5_MBTABLE9, TOF_RSC_TNI5_MBTABLE10, TOF_RSC_TNI5_MBTABLE11,
TOF_RSC_NUM /* 372 */
};
#define TOF_RSC_TOQ(TNI, CQID) (TOF_RSC_TNI0_TOQ0 + (TNI * 12) + CQID)
#define TOF_RSC_TCQ(TNI, CQID) (TOF_RSC_TNI0_TCQ0 + (TNI * 12) + CQID)
#define TOF_RSC_MRQ(TNI, CQID) (TOF_RSC_TNI0_MRQ0 + (TNI * 12) + CQID)
#define TOF_RSC_PBQ(TNI) (TOF_RSC_TNI0_PBQ + TNI)
#define TOF_RSC_PRQ(TNI) (TOF_RSC_TNI0_PRQ + TNI)
#define TOF_RSC_STT(TNI, CQID) (TOF_RSC_TNI0_STEERINGTABLE0 + (TNI * 12) + CQID)
#define TOF_RSC_MBT(TNI, CQID) (TOF_RSC_TNI0_MBTABLE0 + (TNI * 12) + CQID)
#endif
/* vim: set noet ts=8 sw=8 sts=0 tw=0 : */

View File

@ -0,0 +1,6 @@
struct {
bool enabled;
uint64_t bgmask[TOF_ICC_NTNIS];
uintptr_t iova;
void *kaddr;
} bch;

View File

@ -0,0 +1,6 @@
struct {
struct tof_utofu_trans_list *mru;
struct tof_trans_table *table;
int mruhead;
ihk_spinlock_t mru_lock;
} trans;

View File

@ -0,0 +1,21 @@
struct tof_utofu_bg {
union {
char whole_struct[160];
struct {
char padding0[0];
struct tof_utofu_device common;
};
struct {
char padding1[80];
uint8_t tni;
};
struct {
char padding2[81];
uint8_t bgid;
};
struct {
char padding3[88];
#include "tof_utofu_bg_bch.h"
};
};
};

View File

@ -0,0 +1,37 @@
struct tof_utofu_cq {
union {
char whole_struct[384];
struct {
char padding0[0];
struct tof_utofu_device common;
};
struct {
char padding1[80];
uint8_t tni;
};
struct {
char padding2[81];
uint8_t cqid;
};
struct {
char padding3[104];
#include "tof_utofu_cq_trans.h"
};
struct {
char padding4[128];
struct tof_icc_steering_entry *steering;
};
struct {
char padding5[136];
struct tof_icc_mb_entry *mb;
};
struct {
char padding6[186];
uint8_t num_stag;
};
struct {
char padding7[336];
struct mmu_notifier mn;
};
};
};

View File

@ -0,0 +1,17 @@
struct tof_utofu_device {
union {
char whole_struct[80];
struct {
char padding0[0];
bool enabled;
};
struct {
char padding1[12];
uint32_t gpid;
};
struct {
char padding2[24];
uint64_t subnet;
};
};
};

View File

@ -0,0 +1,33 @@
struct tof_utofu_mbpt {
union {
char whole_struct[56];
struct {
char padding0[0];
struct kref kref;
};
struct {
char padding1[8];
struct tof_utofu_cq *ucq;
};
struct {
char padding2[16];
uintptr_t iova;
};
struct {
char padding3[24];
struct scatterlist *sg;
};
struct {
char padding4[32];
size_t nsgents;
};
struct {
char padding5[40];
uintptr_t mbptstart;
};
struct {
char padding6[48];
size_t pgsz;
};
};
};

View File

@ -36,6 +36,7 @@
#include <linux/semaphore.h>
#include <linux/interrupt.h>
#include <linux/cpumask.h>
#include <linux/delay.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
#include <asm/io.h>
@ -229,6 +230,9 @@ static long mcexec_prepare_image(ihk_os_t os,
dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
__FUNCTION__, ppd->pid, ppd->rpgtable);
#ifdef ENABLE_TOFU
ppd->enable_tofu = pdesc->enable_tofu;
#endif
ret = 0;
@ -266,18 +270,24 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
return -EFAULT;
}
#ifdef CONFIG_MIC
if (pt.size > PAGE_SIZE) {
printk("mcexec_transfer_image(): ERROR: size exceeds PAGE_SIZE\n");
return -EFAULT;
}
phys = ihk_device_map_memory(ihk_os_to_dev(os), pt.rphys, PAGE_SIZE);
#ifdef CONFIG_MIC
rpm = ioremap_wc(phys, PAGE_SIZE);
#else
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
phys = ihk_device_map_memory(ihk_os_to_dev(os), pt.rphys, pt.size);
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, pt.size, NULL, 0);
#endif
if (!rpm) {
pr_err("%s(): error: invalid remote address\n", __func__);
return -EFAULT;
}
if (pt.direction == MCEXEC_UP_TRANSFER_TO_REMOTE) {
if (copy_from_user(rpm, pt.userp, pt.size)) {
ret = -EFAULT;
@ -295,10 +305,11 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
#ifdef CONFIG_MIC
iounmap(rpm);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
#else
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, PAGE_SIZE);
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, pt.size);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, pt.size);
#endif
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
return ret;
@ -378,6 +389,7 @@ static void release_handler(ihk_os_t os, void *param)
int os_ind = ihk_host_os_get_index(os);
unsigned long flags;
struct host_thread *thread;
int ret;
/* Finalize FS switch for uti threads */
write_lock_irqsave(&host_thread_lock, flags);
@ -399,7 +411,13 @@ static void release_handler(ihk_os_t os, void *param)
dprintk("%s: SCD_MSG_CLEANUP_PROCESS, info: %p, cpu: %d\n",
__FUNCTION__, info, info->cpu);
mcctrl_ikc_send(os, info->cpu, &isp);
ret = mcctrl_ikc_send_wait(os, info->cpu,
&isp, -20, NULL, NULL, 0);
if (ret != 0) {
printk("%s: WARNING: failed to send IKC msg: %d\n",
__func__, ret);
}
if (os_ind >= 0) {
delete_pid_entry(os_ind, info->pid);
}
@ -587,13 +605,14 @@ extern int mckernel_cpu_2_linux_cpu(struct mcctrl_usrdata *udp, int cpu_id);
static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
{
struct mcctrl_usrdata *udp = ihk_host_os_get_usrdata(os);
struct mcctrl_part_exec *pe;
struct mcctrl_part_exec *pe = NULL, *pe_itr;
struct get_cpu_set_arg req;
struct mcctrl_cpu_topology *cpu_top, *cpu_top_i;
struct cache_topology *cache_top;
int cpu, cpus_assigned, cpus_to_assign, cpu_prev;
int ret = 0;
int mcexec_linux_numa;
int pe_list_len = 0;
cpumask_t *mcexec_cpu_set = NULL;
cpumask_t *cpus_used = NULL;
cpumask_t *cpus_to_use = NULL;
@ -613,24 +632,126 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
return -EINVAL;
}
pe = &udp->part_exec;
mutex_lock(&pe->lock);
if (copy_from_user(&req, (void *)arg, sizeof(req))) {
printk("%s: error copying user request\n", __FUNCTION__);
pr_err("%s: error copying user request\n", __func__);
ret = -EINVAL;
goto put_and_unlock_out;
goto put_out;
}
/* First process to enter CPU partitioning */
if (pe->nr_processes == -1) {
/* User requested CPU mask? */
if (req.req_cpu_list && req.req_cpu_list_len) {
char *cpu_list = NULL;
cpu_list = kmalloc(req.req_cpu_list_len, GFP_KERNEL);
if (!cpu_list) {
printk("%s: error: allocating CPU list\n", __FUNCTION__);
ret = -ENOMEM;
goto put_out;
}
if (copy_from_user(cpu_list,
req.req_cpu_list, req.req_cpu_list_len)) {
printk("%s: error copying CPU list request\n", __FUNCTION__);
kfree(cpu_list);
ret = -EINVAL;
goto put_out;
}
cpus_used = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
cpus_to_use = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!cpus_to_use || !cpus_used) {
printk("%s: error: allocating CPU mask\n", __FUNCTION__);
ret = -ENOMEM;
kfree(cpu_list);
goto put_out;
}
memset(cpus_used, 0, sizeof(cpumask_t));
memset(cpus_to_use, 0, sizeof(cpumask_t));
/* Parse CPU list */
if (cpulist_parse(cpu_list, cpus_to_use) < 0) {
printk("%s: invalid CPUs requested: %s\n",
__FUNCTION__, cpu_list);
ret = -EINVAL;
kfree(cpu_list);
goto put_out;
}
memcpy(cpus_used, cpus_to_use, sizeof(cpumask_t));
/* Copy mask to user-space */
if (copy_to_user(req.cpu_set, cpus_used,
(req.cpu_set_size < sizeof(cpumask_t) ?
req.cpu_set_size : sizeof(cpumask_t)))) {
printk("%s: error copying mask to user\n", __FUNCTION__);
ret = -EINVAL;
kfree(cpu_list);
goto put_out;
}
/* Copy IKC target core */
cpu = cpumask_next(-1, cpus_used);
if (copy_to_user(req.target_core, &cpu, sizeof(cpu))) {
printk("%s: error copying target core to user\n",
__FUNCTION__);
ret = -EINVAL;
kfree(cpu_list);
goto put_out;
}
/* Save in per-process structure */
memcpy(&ppd->cpu_set, cpus_used, sizeof(cpumask_t));
ppd->ikc_target_cpu = cpu;
printk("%s: %s -> target McKernel CPU: %d\n",
__func__, cpu_list, cpu);
ret = 0;
kfree(cpu_list);
goto put_out;
}
mutex_lock(&udp->part_exec_lock);
/* Find part_exec having same node_proxy */
list_for_each_entry_reverse(pe_itr, &udp->part_exec_list, chain) {
pe_list_len++;
if (pe_itr->node_proxy_pid == req.ppid) {
pe = pe_itr;
break;
}
}
if (!pe) {
/* First process to enter CPU partitioning */
pr_debug("%s: pe_list_len:%d\n", __func__, pe_list_len);
if (pe_list_len >= PE_LIST_MAXLEN) {
/* delete head entry of pe_list */
pe_itr = list_first_entry(&udp->part_exec_list,
struct mcctrl_part_exec, chain);
list_del(&pe_itr->chain);
kfree(pe_itr);
}
pe = kzalloc(sizeof(struct mcctrl_part_exec), GFP_KERNEL);
if (!pe) {
mutex_unlock(&udp->part_exec_lock);
ret = -ENOMEM;
goto put_out;
}
/* Init part_exec */
mutex_init(&pe->lock);
INIT_LIST_HEAD(&pe->pli_list);
pe->nr_processes = req.nr_processes;
pe->nr_processes_left = req.nr_processes;
pe->nr_processes_joined = 0;
pe->node_proxy_pid = req.ppid;
list_add_tail(&pe->chain, &udp->part_exec_list);
dprintk("%s: nr_processes: %d (partitioned exec starts)\n",
__FUNCTION__,
pe->nr_processes);
__func__, pe->nr_processes);
}
mutex_unlock(&udp->part_exec_lock);
mutex_lock(&pe->lock);
if (pe->nr_processes != req.nr_processes) {
printk("%s: error: requested number of processes"
@ -640,7 +761,15 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
goto put_and_unlock_out;
}
if (pe->nr_processes_joined >= pe->nr_processes) {
printk("%s: too many processes have joined to the group of %d\n",
__func__, req.ppid);
ret = -EINVAL;
goto put_and_unlock_out;
}
--pe->nr_processes_left;
++pe->nr_processes_joined;
dprintk("%s: nr_processes: %d, nr_processes_left: %d\n",
__FUNCTION__,
pe->nr_processes,
@ -726,8 +855,6 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
wake_up_interruptible(&pli_next->pli_wq);
}
/* Reset process counter to start state */
pe->nr_processes = -1;
ret = -ETIMEDOUT;
goto put_and_unlock_out;
}
@ -975,16 +1102,8 @@ next_cpu:
/* Commit used cores to OS structure */
memcpy(&pe->cpus_used, cpus_used, sizeof(*cpus_used));
/* Reset if last process */
if (pe->nr_processes_left == 0) {
dprintk("%s: nr_processes: %d (partitioned exec ends)\n",
__FUNCTION__,
pe->nr_processes);
pe->nr_processes = -1;
memset(&pe->cpus_used, 0, sizeof(pe->cpus_used));
}
/* Otherwise wake up next process in list */
else {
/* If not last process, wake up next process in list */
if (pe->nr_processes_left != 0) {
++pe->process_rank;
pli_next = list_first_entry(&pe->pli_list,
struct process_list_item, list);
@ -997,11 +1116,14 @@ next_cpu:
ret = 0;
put_and_unlock_out:
mutex_unlock(&pe->lock);
put_out:
mcctrl_put_per_proc_data(ppd);
kfree(cpus_to_use);
kfree(cpus_used);
kfree(mcexec_cpu_set);
mcctrl_put_per_proc_data(ppd);
mutex_unlock(&pe->lock);
return ret;
}
@ -1146,7 +1268,7 @@ void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd)
process is gone and the application should be terminated. */
packet = (struct ikc_scd_packet *)ptd->data;
dprintk("%s: calling __return_syscall (hash),target pid=%d,tid=%d\n", __FUNCTION__, ppd->pid, packet->req.rtid);
__return_syscall(ppd->ud->os, packet, -ERESTARTSYS,
__return_syscall(ppd->ud->os, ppd, packet, -ERESTARTSYS,
packet->req.rtid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
@ -1170,7 +1292,7 @@ void mcctrl_put_per_proc_data(struct mcctrl_per_proc_data *ppd)
/* We use ERESTARTSYS to tell the LWK that the proxy
* process is gone and the application should be terminated */
__return_syscall(ppd->ud->os, packet, -ERESTARTSYS,
__return_syscall(ppd->ud->os, ppd, packet, -ERESTARTSYS,
packet->req.rtid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
}
@ -1211,7 +1333,7 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
/* We use ERESTARTSYS to tell the LWK that the proxy
* process is gone and the application should be terminated */
__return_syscall(ud->os, packet, -ERESTARTSYS,
__return_syscall(ud->os, NULL, packet, -ERESTARTSYS,
packet->req.rtid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
@ -1654,7 +1776,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size);
}
__return_syscall(os, packet, ret.ret, task_pid_vnr(current));
__return_syscall(os, ppd, packet, ret.ret, task_pid_vnr(current));
error = 0;
out:
@ -2097,7 +2219,13 @@ static DECLARE_WAIT_QUEUE_HEAD(perfctrlq);
long mcctrl_perf_num(ihk_os_t os, unsigned long arg)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_usrdata *usrdata;
if (!os || ihk_host_validate_os(os)) {
return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n", __func__);
@ -2122,22 +2250,34 @@ struct mcctrl_perf_ctrl_desc {
*/
long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_usrdata *usrdata = NULL;
struct ikc_scd_packet isp;
struct perf_ctrl_desc *perf_desc;
struct ihk_perf_event_attr attr;
struct ihk_cpu_info *info = ihk_os_get_cpu_info(os);
struct ihk_cpu_info *info = NULL;
int ret = 0;
int i = 0, j = 0;
int need_free;
int num_registered = 0;
int err = 0;
if (!os || ihk_host_validate_os(os)) {
return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n", __func__);
return -EINVAL;
}
info = ihk_os_get_cpu_info(os);
if (!info) {
pr_err("%s: error: cannot get cpu info\n", __func__);
return -EINVAL;
}
for (i = 0; i < usrdata->perf_event_num; i++) {
ret = copy_from_user(&attr, &arg[i],
sizeof(struct ihk_perf_event_attr));
@ -2197,20 +2337,30 @@ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg)
long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_usrdata *usrdata = NULL;
struct ikc_scd_packet isp;
struct perf_ctrl_desc *perf_desc;
struct ihk_cpu_info *info = ihk_os_get_cpu_info(os);
struct ihk_cpu_info *info = NULL;
unsigned long value_sum = 0;
int ret = 0;
int i = 0, j = 0;
int need_free;
if (!os || ihk_host_validate_os(os)) {
return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n", __func__);
return -EINVAL;
}
info = ihk_os_get_cpu_info(os);
if (!info || info->n_cpus < 1) {
return -EINVAL;
}
for (i = 0; i < usrdata->perf_event_num; i++) {
perf_desc = kmalloc(sizeof(struct mcctrl_perf_ctrl_desc),
GFP_KERNEL);
@ -2258,15 +2408,20 @@ long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg)
long mcctrl_perf_enable(ihk_os_t os)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_usrdata *usrdata = NULL;
struct ikc_scd_packet isp;
struct perf_ctrl_desc *perf_desc;
struct ihk_cpu_info *info = ihk_os_get_cpu_info(os);
struct ihk_cpu_info *info = NULL;
unsigned long cntr_mask = 0;
int ret = 0;
int i = 0, j = 0;
int need_free;
if (!os || ihk_host_validate_os(os)) {
return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n", __func__);
return -EINVAL;
@ -2289,6 +2444,11 @@ long mcctrl_perf_enable(ihk_os_t os)
isp.msg = SCD_MSG_PERF_CTRL;
isp.arg = virt_to_phys(perf_desc);
info = ihk_os_get_cpu_info(os);
if (!info || info->n_cpus < 1) {
kfree(perf_desc);
return -EINVAL;
}
for (j = 0; j < info->n_cpus; j++) {
ret = mcctrl_ikc_send_wait(os, j, &isp, 0,
wakeup_desc_of_perf_desc(perf_desc),
@ -2316,15 +2476,20 @@ long mcctrl_perf_enable(ihk_os_t os)
long mcctrl_perf_disable(ihk_os_t os)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_usrdata *usrdata = NULL;
struct ikc_scd_packet isp;
struct perf_ctrl_desc *perf_desc;
struct ihk_cpu_info *info = ihk_os_get_cpu_info(os);
struct ihk_cpu_info *info = NULL;
unsigned long cntr_mask = 0;
int ret = 0;
int i = 0, j = 0;
int need_free;
if (!os || ihk_host_validate_os(os)) {
return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n", __func__);
return -EINVAL;
@ -2347,6 +2512,11 @@ long mcctrl_perf_disable(ihk_os_t os)
isp.msg = SCD_MSG_PERF_CTRL;
isp.arg = virt_to_phys(perf_desc);
info = ihk_os_get_cpu_info(os);
if (!info || info->n_cpus < 1) {
kfree(perf_desc);
return -EINVAL;
}
for (j = 0; j < info->n_cpus; j++) {
ret = mcctrl_ikc_send_wait(os, j, &isp, 0,
wakeup_desc_of_perf_desc(perf_desc),
@ -2388,6 +2558,10 @@ long mcctrl_getrusage(ihk_os_t ihk_os, struct mcctrl_ioctl_getrusage_desc *__use
unsigned long ut;
unsigned long st;
if (!ihk_os || ihk_host_validate_os(ihk_os)) {
return -EINVAL;
}
ret = copy_from_user(&desc, _desc, sizeof(struct mcctrl_ioctl_getrusage_desc));
if (ret != 0) {
printk("%s: copy_from_user failed\n", __FUNCTION__);
@ -2630,7 +2804,7 @@ static long mcexec_terminate_thread_unsafe(ihk_os_t os, int pid, int tid, long c
__FUNCTION__, tid);
goto no_ptd;
}
__return_syscall(usrdata->os, packet, code, tid);
__return_syscall(usrdata->os, ppd, packet, code, tid);
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
/* Drop reference for this function */
@ -3376,7 +3550,7 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu)
struct ihk_ikc_channel_desc *ch;
int ret = 0;
if (!os) {
if (!os || ihk_host_validate_os(os) || !ret_cpu) {
return -EINVAL;
}
@ -3418,7 +3592,11 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu)
*ret_cpu = ch->send.queue->read_cpu;
ret = 0;
#ifndef ENABLE_FUGAKU_HACKS
pr_info("%s: OS: %lx, CPU: %d\n",
#else
dprintk("%s: OS: %lx, CPU: %d\n",
#endif
__func__, (unsigned long)os, *ret_cpu);
out_put_ppd:
@ -3468,7 +3646,8 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
isp.op = op;
isp.pdesc = virt_to_phys(ldesc);
ret = mcctrl_ikc_send_wait(os, cpu, &isp, 0, NULL, &do_free, 1, ldesc);
/* 1 sec timeout for the case where McKernel can't respond */
ret = mcctrl_ikc_send_wait(os, cpu, &isp, 1000, NULL, &do_free, 1, ldesc);
if (ret != 0) {
printk("%s: ERROR sending IKC msg: %d\n", __FUNCTION__, ret);
goto out;
@ -3482,7 +3661,11 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
/* Notify caller (for future async implementation) */
atomic_set(&desc->sync, 1);
#ifndef ENABLE_FUGAKU_HACKS
dprintk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#else
printk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#endif
__FUNCTION__,
(op == MCCTRL_OS_CPU_READ_REGISTER ? "READ" : "WRITE"), cpu,
desc->addr_ext, desc->val);

View File

@ -50,6 +50,9 @@ extern void procfs_exit(int);
extern void uti_attr_finalize(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
#ifdef ENABLE_TOFU
extern void mcctrl_file_to_pidfd_hash_init(void);
#endif
extern int mcctrl_os_read_cpu_register(ihk_os_t os, int cpu,
struct ihk_os_cpu_register *desc);
@ -57,6 +60,11 @@ extern int mcctrl_os_write_cpu_register(ihk_os_t os, int cpu,
struct ihk_os_cpu_register *desc);
extern int mcctrl_get_request_os_cpu(ihk_os_t os, int *cpu);
#ifdef ENABLE_TOFU
extern void mcctrl_tofu_hijack_release_handlers(void);
extern void mcctrl_tofu_restore_release_handlers(void);
#endif
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
unsigned long arg, struct file *file)
{
@ -227,7 +235,6 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
@ -319,10 +326,17 @@ static int __init mcctrl_init(void)
}
binfmt_mcexec_init();
#ifdef ENABLE_TOFU
mcctrl_file_to_pidfd_hash_init();
#endif
if ((ret = symbols_init()))
goto error;
#ifdef ENABLE_TOFU
mcctrl_tofu_hijack_release_handlers();
#endif
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
printk("mcctrl: error: registering OS notifier\n");
goto error;
@ -345,6 +359,9 @@ static void __exit mcctrl_exit(void)
binfmt_mcexec_exit();
uti_attr_finalize();
#ifdef ENABLE_TOFU
mcctrl_tofu_restore_release_handlers();
#endif
printk("mcctrl: unregistered.\n");
}

View File

@ -142,13 +142,35 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
ret = mcctrl_ikc_send(os, cpu, pisp);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send failed: %d\n", __func__, ret);
kfree(desc);
if (alloc_desc)
kfree(desc);
return ret;
}
if (timeout) {
ret = wait_event_interruptible_timeout(desc->wq,
desc->status, timeout);
/*
* Negative timeout indicates busy waiting, which can be used
* in situations where wait_event_interruptible_XXX() would
* fail, e.g., in a signal handler, at the time the process
* is being killed, etc.
*/
if (timeout < 0) {
unsigned long timeout_jiffies =
jiffies + msecs_to_jiffies(timeout * -1);
ret = -ETIME;
while (time_before(jiffies, timeout_jiffies)) {
schedule();
if (READ_ONCE(desc->status)) {
ret = 0;
break;
}
}
}
else {
ret = wait_event_interruptible_timeout(desc->wq,
desc->status, msecs_to_jiffies(timeout));
}
} else {
ret = wait_event_interruptible(desc->wq, desc->status);
}
@ -210,6 +232,8 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PROCFS_ANSWER:
case SCD_MSG_REMOTE_PAGE_FAULT_ANSWER:
case SCD_MSG_CPU_RW_REG_RESP:
case SCD_MSG_CLEANUP_PROCESS_RESP:
case SCD_MSG_CLEANUP_FD_RESP:
mcctrl_wakeup_cb(__os, pisp);
break;
@ -280,7 +304,11 @@ int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp)
{
struct mcctrl_usrdata *usrdata;
if (!os || cpu < 0) {
if (!os || ihk_host_validate_os(os) || !pisp) {
return -EINVAL;
}
if (cpu < 0) {
return -EINVAL;
}
@ -508,11 +536,9 @@ int prepare_ikc_channels(ihk_os_t os)
usrdata->os = os;
ihk_host_os_set_usrdata(os, usrdata);
ihk_ikc_listen_port(os, &lp_ikc2linux);
ihk_ikc_listen_port(os, &lp_ikc2mckernel);
init_waitqueue_head(&usrdata->wq_procfs);
mutex_init(&usrdata->reserve_lock);
mutex_init(&usrdata->part_exec_lock);
for (i = 0; i < MCCTRL_PER_PROC_DATA_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&usrdata->per_proc_data_hash[i]);
@ -521,13 +547,21 @@ int prepare_ikc_channels(ihk_os_t os)
INIT_LIST_HEAD(&usrdata->cpu_topology_list);
INIT_LIST_HEAD(&usrdata->node_topology_list);
INIT_LIST_HEAD(&usrdata->part_exec_list);
mutex_init(&usrdata->part_exec.lock);
INIT_LIST_HEAD(&usrdata->part_exec.pli_list);
usrdata->part_exec.nr_processes = -1;
INIT_LIST_HEAD(&usrdata->wakeup_descs_list);
spin_lock_init(&usrdata->wakeup_descs_lock);
/* ihk_ikc_listen_port should be performed after
* usrdata->cpu_topology_list is initialized because the
* function enables syscall_packet_handler which accesses
* the list (the call path is sysfsm_packet_handler -->
* sysfsm_work_main --> sysfsm_setup --> setup_sysfs_files
* --> setup_cpus_sysfs_files).
*/
ihk_ikc_listen_port(os, &lp_ikc2linux);
ihk_ikc_listen_port(os, &lp_ikc2mckernel);
return 0;
error:
@ -580,6 +614,18 @@ void destroy_ikc_channels(ihk_os_t os)
kfree(usrdata->channels);
kfree(usrdata->ikc2linux);
mutex_lock(&usrdata->part_exec_lock);
while (!list_empty(&usrdata->part_exec_list)) {
struct mcctrl_part_exec *pe;
pe = list_first_entry(&usrdata->part_exec_list,
struct mcctrl_part_exec, chain);
list_del(&pe->chain);
kfree(pe);
}
mutex_unlock(&usrdata->part_exec_lock);
kfree(usrdata);
}

View File

@ -58,7 +58,8 @@
#define SCD_MSG_SEND_SIGNAL 0x7
#define SCD_MSG_SEND_SIGNAL_ACK 0x8
#define SCD_MSG_CLEANUP_PROCESS 0x9
#define SCD_MSG_GET_VDSO_INFO 0xa
#define SCD_MSG_CLEANUP_PROCESS_RESP 0xa
#define SCD_MSG_GET_VDSO_INFO 0xb
//#define SCD_MSG_GET_CPU_MAPPING 0xc
//#define SCD_MSG_REPLY_GET_CPU_MAPPING 0xd
@ -104,6 +105,8 @@
#define SCD_MSG_CPU_RW_REG 0x52
#define SCD_MSG_CPU_RW_REG_RESP 0x53
#define SCD_MSG_CLEANUP_FD 0x54
#define SCD_MSG_CLEANUP_FD_RESP 0x55
#define SCD_MSG_FUTEX_WAKE 0x60
@ -260,6 +263,7 @@ struct mcctrl_per_proc_data {
struct list_head devobj_pager_list;
struct semaphore devobj_pager_lock;
int enable_tofu;
};
struct sysfsm_req {
@ -324,13 +328,20 @@ struct process_list_item {
wait_queue_head_t pli_wq;
};
#define PE_LIST_MAXLEN 5
struct mcctrl_part_exec {
struct mutex lock;
int nr_processes;
/* number of processes to let in / out the synchronization point */
int nr_processes_left;
/* number of processes which have joined the partition */
int nr_processes_joined;
int process_rank;
pid_t node_proxy_pid;
cpumask_t cpus_used;
struct list_head pli_list;
struct list_head chain;
};
#define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG))
@ -353,6 +364,7 @@ struct mcctrl_usrdata {
int job_pos;
int mcctrl_dma_abort;
struct mutex reserve_lock;
struct mutex part_exec_lock;
unsigned long last_thread_exec;
wait_queue_head_t wq_procfs;
struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
@ -368,7 +380,7 @@ struct mcctrl_usrdata {
nodemask_t numa_online;
struct list_head cpu_topology_list;
struct list_head node_topology_list;
struct mcctrl_part_exec part_exec;
struct list_head part_exec_list;
int perf_event_num;
};
@ -453,7 +465,8 @@ struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(struct mcctrl_per_proc
struct task_struct *task);
int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len);
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
void __return_syscall(ihk_os_t os, struct mcctrl_per_proc_data *ppd,
struct ikc_scd_packet *packet,
long ret, int stid);
int clear_pte_range(uintptr_t start, uintptr_t len);
@ -548,4 +561,34 @@ struct uti_futex_resp {
int done;
wait_queue_head_t wq;
};
#ifdef ENABLE_TOFU
/*
* Hash table to keep track of files and related processes
* and file descriptors.
* NOTE: Used for Tofu driver release handlers.
*/
#define MCCTRL_FILE_2_PIDFD_HASH_SHIFT 4
#define MCCTRL_FILE_2_PIDFD_HASH_SIZE (1 << MCCTRL_FILE_2_PIDFD_HASH_SHIFT)
#define MCCTRL_FILE_2_PIDFD_HASH_MASK (MCCTRL_FILE_2_PIDFD_HASH_SIZE - 1)
struct mcctrl_file_to_pidfd {
struct file *filp;
ihk_os_t os;
struct task_struct *group_leader;
int pid;
int fd;
struct list_head hash;
char tofu_dev_path[128];
void *pde_data;
};
int mcctrl_file_to_pidfd_hash_insert(struct file *filp,
ihk_os_t os, int pid, struct task_struct *group_leader, int fd,
char *path, void *pde_data);
struct mcctrl_file_to_pidfd *mcctrl_file_to_pidfd_hash_lookup(
struct file *filp, struct task_struct *group_leader);
int mcctrl_file_to_pidfd_hash_remove(struct file *filp,
ihk_os_t os, struct task_struct *group_leader, int fd);
#endif
#endif

View File

@ -126,7 +126,7 @@ find_procfs_entry(struct procfs_list_entry *parent, const char *name)
static void
delete_procfs_entries(struct procfs_list_entry *top)
{
struct procfs_list_entry *e;
struct procfs_list_entry *e = NULL;
struct procfs_list_entry *n;
list_del(&top->list);
@ -136,8 +136,10 @@ delete_procfs_entries(struct procfs_list_entry *top)
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
if (e) {
e->entry->read_proc = NULL;
e->entry->data = NULL;
}
#endif
remove_proc_entry(top->name, top->parent? top->parent->entry: NULL);
if(top->data)

View File

@ -45,6 +45,9 @@
#include <linux/mount.h>
#include <linux/kdev_t.h>
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/rbtree.h>
#include <linux/llist.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
#include <asm/io.h>
@ -52,6 +55,7 @@
#include "mcctrl.h"
#include <linux/version.h>
#include <archdeps.h>
#include <asm/pgtable.h>
#define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6)
@ -539,7 +543,11 @@ retry_alloc:
#define USE_VM_INSERT_PFN 1
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
#if defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 2)
static vm_fault_t rus_vm_fault(struct vm_fault *vmf)
#else
static int rus_vm_fault(struct vm_fault *vmf)
#endif
{
struct vm_area_struct *vma = vmf->vma;
#else
@ -651,6 +659,9 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
goto put_and_out;
}
// Force regular page size
pgsize = PAGE_SIZE;
rva = (unsigned long)addr & ~(pgsize - 1);
rpa = rpa & ~(pgsize - 1);
@ -662,7 +673,8 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
/* LWK may hold large page based mappings that align rva outside
* Linux' VMA, make sure we don't try to map to those pages */
if (rva + (pix * PAGE_SIZE) < vma->vm_start) {
if (rva + (pix * PAGE_SIZE) < vma->vm_start ||
rva + (pix * PAGE_SIZE) > vma->vm_end) {
continue;
}
@ -673,21 +685,27 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (error) {
pr_err("%s: error inserting mapping for 0x%#lx "
"(req: TID: %d, syscall: %lu) error: %d,"
" vm_start: 0x%lx, vm_end: 0x%lx\n",
" vm_start: 0x%lx, vm_end: 0x%lx, pgsize: %lu, ind: %lu\n",
__func__,
(unsigned long)addr, packet.fault_tid,
rsysnum, error,
vma->vm_start, vma->vm_end);
vma->vm_start, vma->vm_end, pgsize, pix);
}
}
else
else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
error = vmf_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
if (error == VM_FAULT_NOPAGE) {
dprintk("%s: vmf_insert_pfn returned %d\n",
__func__, error);
error = 0;
}
#else
error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
#endif
}
if (error) {
pr_err("%s: vm_insert_pfn returned %d\n",
__func__, error);
@ -1831,20 +1849,165 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
return ret;
}
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
#ifdef ENABLE_TOFU
struct list_head mcctrl_file_to_pidfd_hash[MCCTRL_FILE_2_PIDFD_HASH_SIZE];
spinlock_t mcctrl_file_to_pidfd_hash_lock;
void mcctrl_file_to_pidfd_hash_init(void)
{
int hash;
spin_lock_init(&mcctrl_file_to_pidfd_hash_lock);
for (hash = 0; hash < MCCTRL_FILE_2_PIDFD_HASH_SIZE; ++hash) {
INIT_LIST_HEAD(&mcctrl_file_to_pidfd_hash[hash]);
}
}
int mcctrl_file_to_pidfd_hash_insert(struct file *filp,
ihk_os_t os, int pid, struct task_struct *group_leader, int fd,
char *path, void *pde_data)
{
unsigned long irqflags;
struct mcctrl_file_to_pidfd *file2pidfd_iter;
struct mcctrl_file_to_pidfd *file2pidfd;
int hash = (int)((unsigned long)filp &
(unsigned long)MCCTRL_FILE_2_PIDFD_HASH_MASK);
int ret = 0;
file2pidfd = kmalloc(sizeof(*file2pidfd), GFP_ATOMIC);
if (!file2pidfd)
return -ENOMEM;
file2pidfd->filp = filp;
file2pidfd->os = os;
file2pidfd->pid = pid;
file2pidfd->group_leader = group_leader;
file2pidfd->fd = fd;
/* Only copy the name under /proc/tofu/dev/ */
strncpy(file2pidfd->tofu_dev_path, path + 15, 128);
file2pidfd->pde_data = pde_data;
spin_lock_irqsave(&mcctrl_file_to_pidfd_hash_lock, irqflags);
list_for_each_entry(file2pidfd_iter,
&mcctrl_file_to_pidfd_hash[hash], hash) {
if (file2pidfd_iter->filp == filp) {
printk("%s: WARNING: filp: %p, pid: %d, fd: %d exists\n",
__func__, filp, pid, fd);
ret = -EBUSY;
goto free_out;
}
}
list_add_tail(&file2pidfd->hash,
&mcctrl_file_to_pidfd_hash[hash]);
dprintk("%s: filp: %p, pid: %d, fd: %d added\n",
__func__, filp, pid, fd);
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return ret;
free_out:
kfree(file2pidfd);
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return ret;
}
/*
* XXX: lookup relies on group_leader to identify the process
* because PIDs might be different across name spaces (e.g.,
* when using Docker)
*/
struct mcctrl_file_to_pidfd *mcctrl_file_to_pidfd_hash_lookup(
struct file *filp, struct task_struct *group_leader)
{
unsigned long irqflags;
struct mcctrl_file_to_pidfd *file2pidfd_iter;
struct mcctrl_file_to_pidfd *file2pidfd = NULL;
int hash = (int)((unsigned long)filp &
(unsigned long)MCCTRL_FILE_2_PIDFD_HASH_MASK);
spin_lock_irqsave(&mcctrl_file_to_pidfd_hash_lock, irqflags);
list_for_each_entry(file2pidfd_iter,
&mcctrl_file_to_pidfd_hash[hash], hash) {
if (file2pidfd_iter->filp == filp &&
file2pidfd_iter->group_leader == group_leader) {
file2pidfd = file2pidfd_iter;
dprintk("%s: filp: %p, pid: %d, fd: %d found\n",
__func__, filp, file2pidfd->pid, file2pidfd->fd);
break;
}
}
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return file2pidfd;
}
int mcctrl_file_to_pidfd_hash_remove(struct file *filp,
ihk_os_t os, struct task_struct *group_leader, int fd)
{
unsigned long irqflags;
struct mcctrl_file_to_pidfd *file2pidfd_iter;
int hash = (int)((unsigned long)filp &
(unsigned long)MCCTRL_FILE_2_PIDFD_HASH_MASK);
int ret = 0;
spin_lock_irqsave(&mcctrl_file_to_pidfd_hash_lock, irqflags);
list_for_each_entry(file2pidfd_iter,
&mcctrl_file_to_pidfd_hash[hash], hash) {
if (file2pidfd_iter->filp != filp)
continue;
if (file2pidfd_iter->os != os)
continue;
if (file2pidfd_iter->group_leader != group_leader)
continue;
if (file2pidfd_iter->fd != fd)
continue;
list_del(&file2pidfd_iter->hash);
dprintk("%s: filp: %p, pid: %d, fd: %d removed\n",
__func__, filp, file2pidfd_iter->pid, fd);
kfree(file2pidfd_iter);
goto unlock_out;
}
dprintk("%s: filp: %p, pid: %d, fd: %d couldn't be found\n",
__func__, filp, pid, fd);
ret = -ENOENT;
unlock_out:
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return ret;
}
#endif
void __return_syscall(ihk_os_t os, struct mcctrl_per_proc_data *ppd,
struct ikc_scd_packet *packet,
long ret, int stid)
{
unsigned long phys;
struct syscall_response *res;
if (!os || ihk_host_validate_os(os) || !packet) {
return;
}
phys = ihk_device_map_memory(ihk_os_to_dev(os),
packet->resp_pa, sizeof(*res));
if (!phys) {
return;
}
res = ihk_device_map_virtual(ihk_os_to_dev(os),
phys, sizeof(*res), NULL, 0);
if (!res) {
printk("%s: ERROR: invalid response structure address\n",
__FUNCTION__);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, sizeof(*res));
return;
}
@ -1852,6 +2015,109 @@ void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
res->ret = ret;
res->stid = stid;
#ifdef ENABLE_TOFU
/* Tofu enabled process? */
if (ppd && ppd->enable_tofu) {
char *pathbuf, *fullpath;
/* Record PDE_DATA after open() calls for Tofu driver */
if (packet->req.number == __NR_openat && ret > 1) {
struct fd f;
int fd;
fd = ret;
f = fdget(fd);
if (!f.file) {
goto out_notify;
}
pathbuf = (char *)__get_free_page(GFP_ATOMIC);
if (!pathbuf) {
goto out_fdput_open;
}
fullpath = d_path(&f.file->f_path, pathbuf, PAGE_SIZE);
if (IS_ERR(fullpath)) {
goto out_free_open;
}
if (!strncmp("/proc/tofu/dev/", fullpath, 15)) {
res->pde_data = PDE_DATA(file_inode(f.file));
dprintk("%s: fd: %d, path: %s, PDE_DATA: 0x%lx\n",
__func__,
fd,
fullpath,
(unsigned long)res->pde_data);
dprintk("%s: pgd_index: %ld, pmd_index: %ld, pte_index: %ld\n",
__func__,
pgd_index((unsigned long)res->pde_data),
pmd_index((unsigned long)res->pde_data),
pte_index((unsigned long)res->pde_data));
dprintk("MAX_USER_VA_BITS: %d, PGDIR_SHIFT: %d\n",
MAX_USER_VA_BITS, PGDIR_SHIFT);
mcctrl_file_to_pidfd_hash_insert(f.file, os,
task_tgid_vnr(current),
current->group_leader, fd,
fullpath, res->pde_data);
}
out_free_open:
free_page((unsigned long)pathbuf);
out_fdput_open:
fdput(f);
}
/* Ioctl on Tofu CQ? */
else if (packet->req.number == __NR_ioctl &&
packet->req.args[0] > 0 && ret == 0) {
struct fd f;
int fd;
int tni, cq;
long __ret;
fd = packet->req.args[0];
f = fdget(fd);
if (!f.file) {
goto out_notify;
}
pathbuf = (char *)__get_free_page(GFP_ATOMIC);
if (!pathbuf) {
goto out_fdput_ioctl;
}
fullpath = d_path(&f.file->f_path, pathbuf, PAGE_SIZE);
if (IS_ERR(fullpath)) {
goto out_free_ioctl;
}
/* Looking for /proc/tofu/dev/tniXcqY pattern */
__ret = sscanf(fullpath, "/proc/tofu/dev/tni%dcq%d", &tni, &cq);
if (__ret == 2) {
extern long __mcctrl_tof_utofu_unlocked_ioctl_cq(void *pde_data,
unsigned int cmd, unsigned long arg);
dprintk("%s: ioctl(): fd: %d, path: %s\n",
__func__,
fd,
fullpath);
__ret = __mcctrl_tof_utofu_unlocked_ioctl_cq(
PDE_DATA(file_inode(f.file)),
packet->req.args[1], packet->req.args[2]);
}
out_free_ioctl:
free_page((unsigned long)pathbuf);
out_fdput_ioctl:
fdput(f);
}
}
out_notify:
#endif
if (__notify_syscall_requester(os, packet, res) < 0) {
printk("%s: WARNING: failed to notify PID %d\n",
__FUNCTION__, packet->pid);
@ -2154,11 +2420,98 @@ int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
if (ret == -ENOSYS)
return -ENOSYS;
__return_syscall(os, packet, ret, 0);
__return_syscall(os, NULL, packet, ret, 0);
return 0;
}
/*
* Memory clearing helpers.
*/
struct node_distance;
#define IHK_RBTREE_ALLOCATOR
#ifdef IHK_RBTREE_ALLOCATOR
struct free_chunk {
unsigned long addr, size;
struct rb_node node;
struct llist_node list;
};
#endif
typedef struct mcs_lock_node {
#ifndef SPIN_LOCK_IN_MCS
unsigned long locked;
struct mcs_lock_node *next;
#endif
unsigned long irqsave;
#ifdef SPIN_LOCK_IN_MCS
ihk_spinlock_t spinlock;
#endif
#ifndef ENABLE_UBSAN
} __aligned(64) mcs_lock_node_t;
#else
} mcs_lock_node_t;
#endif
struct ihk_mc_numa_node {
int id;
int linux_numa_id;
int type;
struct list_head allocators;
struct node_distance *nodes_by_distance;
#ifdef IHK_RBTREE_ALLOCATOR
atomic_t zeroing_workers;
atomic_t nr_to_zero_pages;
struct llist_head zeroed_list;
struct llist_head to_zero_list;
struct rb_root free_chunks;
mcs_lock_node_t lock;
unsigned long nr_pages;
/*
* nr_free_pages: all freed pages, zeroed if zero_at_free
*/
unsigned long nr_free_pages;
unsigned long min_addr;
unsigned long max_addr;
#endif
};
void mcctrl_zero_mckernel_pages(unsigned long arg)
{
struct llist_node *llnode;
struct ihk_mc_numa_node *node =
(struct ihk_mc_numa_node *)arg;
/* Iterate free chunks */
while ((llnode = llist_del_first(&node->to_zero_list))) {
unsigned long addr;
unsigned long size;
struct free_chunk *chunk =
container_of(llnode, struct free_chunk, list);
addr = chunk->addr;
size = chunk->size;
memset(phys_to_virt(addr) + sizeof(*chunk), 0,
chunk->size - sizeof(*chunk));
llist_add(&chunk->list, &node->zeroed_list);
dprintk("%s: zeroed %lu pages @ McKernel NUMA %d (chunk: 0x%lx:%lu)\n",
__func__,
size >> PAGE_SHIFT,
node->id,
addr, size);
barrier();
atomic_sub((int)(size >> PAGE_SHIFT), &node->nr_to_zero_pages);
}
atomic_dec(&node->zeroing_workers);
}
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
{
struct syscall_request *sc = &packet->req;
@ -2167,6 +2520,28 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
dprintk("%s: system call: %lx\n", __FUNCTION__, sc->args[0]);
switch (sc->number) {
#ifdef ENABLE_TOFU
case __NR_close: {
struct fd f;
int fd;
fd = (int)sc->args[0];
if (fd > 2) {
f = fdget(fd);
if (f.file) {
mcctrl_file_to_pidfd_hash_remove(f.file, os,
current->group_leader, fd);
fdput(f);
}
}
error = -ENOSYS;
goto out;
break;
}
#endif
case __NR_mmap:
ret = pager_call(os, sc);
break;
@ -2179,6 +2554,14 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
ret = remap_user_space(sc->args[0], sc->args[1], sc->args[2]);
break;
case __NR_move_pages:
/*
* move pages is used for zeroing McKernel side memory,
* this call is NOT offloaded by applications.
*/
mcctrl_zero_mckernel_pages(sc->args[0]);
goto out_no_syscall_return;
case __NR_exit_group: {
/* Make sure the user space handler will be called as well */
@ -2262,7 +2645,9 @@ sched_setparam_out:
break;
}
__return_syscall(os, packet, ret, 0);
__return_syscall(os, NULL, packet, ret, 0);
out_no_syscall_return:
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
error = 0;

Some files were not shown because too many files have changed in this diff Show More