Compare commits

...

31 Commits

Author SHA1 Message Date
5594a4a4a9 The build is successful, testing is required 2025-08-23 12:12:22 +08:00
9ae3a3f374 todo: modpost undefined errors 2025-08-23 11:02:25 +08:00
64dbb93260 Still need to port some kernel modules 2025-08-23 00:35:29 +08:00
015a64039d Try to port mckernel to rhel9 2025-08-22 22:02:50 +08:00
7afd1c87f6 Update IHK submodule commit
Change-Id: I503233b393e5bfec003d407512d1028de3a60946
2022-05-31 13:39:07 +09:00
27b3f59031 Update IHK submodule
Change-Id: I01bb44d3cf40e431090785ea261926e89d835e66
2021-06-14 06:05:58 -04:00
a1b9721772 RHEL8.4: make mcinspect and eclair fPIE for RPM, clear build_ldflags in mckernel.spec
Change-Id: I06f09628629c2afb0d36ad6ab2e2ed2cc716a980
2021-06-14 06:04:30 -04:00
69187ea0fd Update IHK submodule commit
Change-Id: Ic952ff15e2269452ce0693a2a96653659431372b
2021-06-13 22:43:32 -04:00
0353fc1a0a RHEL8.4: support VDSO changes for aarch64 (by Fujitsu)
Change-Id: I1148d2e56eab52ee0264995dd32b9fd2f0d661f0
2021-06-13 22:33:29 -04:00
1a71203872 release: 1.8.0: MAP_LOCKED and pre-populate PMIx shared memory PFNs
Change-Id: I171c87f0f49cf2f791693e397a1d94b1bc2d0440
2021-03-23 01:49:46 +00:00
03d99a0ed1 submodule: migrate to github.com/ihkmckernel
Change-Id: I64ee7c89e7316bb98b31833b5c15af9cf371b0ff
2021-03-23 01:12:25 +00:00
8fb42631f2 profile: fix infinite recursion for allocation miss event
Change-Id: I248c2abc7d02a9d9bffce20b3183724ddc8c2c1c
2021-03-21 15:26:39 +09:00
ba04c8a7b9 Fugaku: MAP_LOCKED and pre-populate PMIx shared memory PFNs
Change-Id: I74a0d0e50af0b6c60a6f9a4389ef3ab0534deda2
2021-03-21 15:25:15 +09:00
1bb8dcef05 release: 1.7.10: detect hungup via device-ioctl
Change-Id: I6531a159a44683085004ad3e90d7b4e67f51422c
2021-03-18 15:42:24 +09:00
ceb55d53b1 mcreboot-smp.sh: sudo ihkmond for /dev/kmsg log
Change-Id: I47aa483e6f787b8392b4b33b0fb10e4728157253
2021-03-18 06:36:33 +00:00
002f36c7f5 docs: add limitation about Linux kernel dump
Change-Id: Ic007f2f1915e37981955ad2160ea6614b1c36ec1
2021-03-17 21:39:07 +09:00
90c1ceef45 release: 1.7.9: fix smp_ihk_os_shutdown()-related double free
Change-Id: I408dc69b41d9643548226c15c67fcbd8197acb92
2021-03-17 18:21:25 +09:00
4f1b505550 docs: migrate to github.com/ihkmckernel
Change-Id: Idd8fed88545231b4aca290e1b54cbc2d2dff2e9e
2021-03-17 08:43:02 +00:00
051c0dcdd8 overlay_path: Fix resolution of symbolic link under /sys/
Change-Id: I650e72fb335aa72256d3b129a65c09bbd7cf26d3
Refs: #1463
2021-03-17 08:18:46 +00:00
09173d353c mcctrl_wakeup_desc: refcount and fix timeouts
Change-Id: I14b34f031ffb10bfac6cef07d81f53a8dece767b
2021-03-17 03:36:35 +00:00
d5c5023bf8 epoll/epoll_wait/ppoll: special handling in syscall offload
Change-Id: I792eb91c349d0ce942179996328c6f89f186ba31
2021-03-17 03:36:35 +00:00
e3493bd0be docs: lift limitations and fix ppn example
Change-Id: Id78e7db09767d5dd8a3dc5b9f911b9026608b021
2021-03-17 03:31:12 +00:00
44261678f7 cmake: fix condition to turn on/off ENABLE_KRM_WORKAROUND
Change-Id: I1a8efe88ffb1283d0343571f340a3b5715318e7d
2021-03-17 02:57:19 +00:00
6e4a29a422 docs: spec: fix description of IHK_RESERVE_MEM_MAX_SIZE_RATIO_ALL
Change-Id: I7af95524d87721fa1ce34bc560eddc947117f5f8
2021-03-15 15:32:08 +09:00
2039139380 release: 1.7.8: fix ihklib/ihk_reserve_cpu when using krm
Change-Id: I57235d51f51ae7327cb08a9e3ae56be995157100
2021-03-12 12:54:56 +09:00
c80b112ce7 release: 1.7.7: fix fput and mckernel.spec
Change-Id: I74f7530b067d44790e3f014479f580867387584a
2021-03-11 08:09:07 +00:00
4a05024656 spec: cmake-config cmake paramters
Change-Id: Ic0e7f62d9172f31afe90297bdd22b8e50cc6fc9e
2021-03-11 07:19:04 +00:00
7a04c6eb5c ihkmond: redirect kmsg to /dev/kmsg line by line
Change-Id: Iafc9d0eb47696073434dcc869a29336a51b8c50e
2021-03-11 16:11:17 +09:00
3e00189de0 kprintf: fix checking if interrupt is disabled
Change-Id: I2ee1a1e2438ae761c4136593953ede2738bc6f74
2021-03-11 07:03:04 +00:00
c94cf8e6f0 mcexec: fput executable just after its contents is transferred
Change-Id: I3fae841bd7341bca030fd6b7eceffa068c9e0f4e
2021-03-11 07:03:04 +00:00
ee974b200d mcexec_open_exec: fix missing fput on error
Change-Id: I3ac94e336dc54ec313e69c0fa85c17086dc256fd
2021-03-11 07:03:04 +00:00
43 changed files with 1225 additions and 445 deletions

6
.gitmodules vendored
View File

@ -1,12 +1,12 @@
[submodule "ihk"]
path = ihk
url = https://github.com/RIKEN-SysSoft/ihk.git
url = https://github.com/ihkmckernel/ihk.git
[submodule "executer/user/lib/libdwarf/libdwarf"]
path = executer/user/lib/libdwarf/libdwarf
url = https://github.com/bgerofi/libdwarf.git
[submodule "executer/user/lib/syscall_intercept"]
path = executer/user/lib/syscall_intercept
url = https://github.com/RIKEN-SysSoft/syscall_intercept.git
url = https://github.com/ihkmckernel/syscall_intercept.git
[submodule "executer/user/lib/uti"]
path = executer/user/lib/uti
url = https://github.com/RIKEN-SysSoft/uti.git
url = https://github.com/ihkmckernel/uti.git

View File

@ -7,7 +7,7 @@ endif (NOT CMAKE_BUILD_TYPE)
enable_language(C ASM)
project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.6")
set(MCKERNEL_VERSION "1.8.0")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "")
@ -84,7 +84,8 @@ if(ENABLE_FUGAKU_HACKS)
endif()
# Fujitsu MPI tries to xpmem-attach segment with size of range size + 1?
set(FJMPI_VERSION_COMMAND "a=\$(which mpifcc); b=\${a%/*/*}; c=\${b##*/}; d=\${c#*-}; echo \$d")
#set(FJMPI_VERSION_COMMAND "a=\$(which mpifcc); b=\${a%/*/*}; c=\${b##*/}; d=\${c#*-}; echo \$d")
set(FJMPI_VERSION_COMMAND "a=\$(which mpifort); b=\${a%/*/*}; c=\${b##*/}; d=\${c#*-}; echo \$d")
execute_process(COMMAND bash -c "${FJMPI_VERSION_COMMAND}"
OUTPUT_VARIABLE FJMPI_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
message("FJMPI_VERSION: ${FJMPI_VERSION}")
@ -105,10 +106,17 @@ execute_process(COMMAND bash -c "rpm -qi FJSVpxkrm-plugin-mckernel | awk '$1 ==
OUTPUT_VARIABLE KRM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
message("KRM_VERSION: ${KRM_VERSION}")
if(NOT "${KRM_VERSION}" STREQUAL "" AND "${KRM_VERSION}" VERSION_LESS_EQUAL 4.0.1)
option(ENABLE_KRM_WORKAROUND "krm workaround" ON)
else()
execute_process(COMMAND bash -c "rpm -qi FJSVpxkrm-plugin-mckernel | awk '$1 == \"Release\" && $2 == \":\" { print $3 }'"
OUTPUT_VARIABLE KRM_RELEASE OUTPUT_STRIP_TRAILING_WHITESPACE)
message("KRM_RELEASE: ${KRM_RELEASE}")
if("${KRM_VERSION}" STREQUAL "")
option(ENABLE_KRM_WORKAROUND "krm workaround" OFF)
elseif("${KRM_VERSION}" VERSION_GREATER_EQUAL 4.0.2 OR
("${KRM_VERSION}" VERSION_EQUAL 4.0.1 AND "${KRM_RELEASE}" VERSION_GREATER_EQUAL 25.13.1.0))
option(ENABLE_KRM_WORKAROUND "krm workaround" OFF)
else()
option(ENABLE_KRM_WORKAROUND "krm workaround" ON)
endif()
if(ENABLE_KRM_WORKAROUND)
@ -124,6 +132,14 @@ if(ENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
# redirect kernel messages to Linux's /dev/kmsg
option(ENABLE_KMSG_REDIRECT "Redirect kernel message to Linux's /dev/kmsg" OFF)
if(ENABLE_KMSG_REDIRECT)
add_definitions(-DENABLE_KMSG_REDIRECT)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_KMSG_REDIRECT")
endif()
option(PROFILE_ENABLE "System call profile" ON)
if(PROFILE_ENABLE)
add_definitions(-DPROFILE_ENABLE)
@ -176,6 +192,7 @@ file(REMOVE_RECURSE ${tmpdir})
file(MAKE_DIRECTORY ${tmpdir})
file(WRITE ${tmpdir}/driver.c "#include <linux/module.h>\n")
file(APPEND ${tmpdir}/driver.c "unsigned long MAP_KERNEL_START = MODULES_END - (1UL << 23);\n")
file(APPEND ${tmpdir}/driver.c "MODULE_LICENSE(\"GPL\");\n")
file(WRITE ${tmpdir}/Makefile "obj-m := driver.o\n")
file(APPEND ${tmpdir}/Makefile "all:\n")
file(APPEND ${tmpdir}/Makefile "\tmake ${KBUILD_MAKE_FLAGS_STR} -C ${KERNEL_DIR} M=${tmpdir} modules\n")
@ -261,11 +278,11 @@ set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
# ihk: ultimately should support extrnal build, but add as subproject for now
if (EXISTS ${PROJECT_SOURCE_DIR}/ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
set(IHK_SOURCE_DIR "ihk" CACHE STRING "path to ihk source directory from mckernel sources")
elseif (EXISTS ${PROJECT_SOURCE_DIR}/../ihk/CMakeLists.txt)
set(IHK_SOURCE_DIR "../ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
set(IHK_SOURCE_DIR "../ihk" CACHE STRING "path to ihk source directory from mckernel sources")
else()
set(IHK_SOURCE_DIR "ihk" CACHE STRINGS "path to ihk source directory from mckernel sources")
set(IHK_SOURCE_DIR "ihk" CACHE STRING "path to ihk source directory from mckernel sources")
endif()
if (EXISTS ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR}/CMakeLists.txt)
set(IHK_FULL_SOURCE_DIR ${PROJECT_SOURCE_DIR}/${IHK_SOURCE_DIR})
@ -340,4 +357,5 @@ message("ENABLE_WERROR: ${ENABLE_WERROR}")
message("ENABLE_UBSAN: ${ENABLE_UBSAN}")
message("ENABLE_LINUX_WORK_IRQ_FOR_IKC: ${ENABLE_LINUX_WORK_IRQ_FOR_IKC}")
message("ENABLE_PER_CPU_ALLOC_CACHE: ${ENABLE_PER_CPU_ALLOC_CACHE}")
message("ENABLE_KMSG_REDIRECT: ${ENABLE_KMSG_REDIRECT}")
message("-------------------------------")

View File

@ -912,7 +912,6 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void)
{
unsigned long flags;
@ -925,7 +924,6 @@ int cpu_interrupt_disabled(void)
: "memory");
return (flags == masked);
}
#endif
#else /* defined(CONFIG_HAS_NMI) */
@ -989,6 +987,18 @@ unsigned long cpu_enable_interrupt_save(void)
: "memory");
return flags;
}
int cpu_interrupt_disabled(void)
{
unsigned long flags;
asm volatile(
"mrs %0, daif // arch_local_irq_save\n"
: "=r" (flags)
:
: "memory");
return !!(flags & 0x2);
}
#endif /* defined(CONFIG_HAS_NMI) */
/* we not have "pause" instruction, instead "yield" instruction */

View File

@ -33,7 +33,6 @@ struct vdso {
long offset_sigtramp;
};
extern char vdso_start, vdso_end;
static struct vdso vdso;
struct tod_data_s tod_data

View File

@ -1273,6 +1273,15 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
int cpu_interrupt_disabled(void)
{
unsigned long flags;
asm volatile("pushf; pop %0" : "=r"(flags) : : "memory", "cc");
return !(flags & 0x200);
}
/*@
@ behavior valid_vector:
@ assumes 32 <= vector <= 255;

View File

@ -183,7 +183,7 @@ enum ihk_mc_pt_attribute {
PTATTR_WRITE_COMBINED = 0x40000,
};
enum ihk_mc_pt_attribute attr_mask;
extern enum ihk_mc_pt_attribute attr_mask;
static inline int pfn_is_write_combined(uintptr_t pfn)
{

View File

@ -1,3 +1,130 @@
=============================================
Version 1.8.0 (Mar 23, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
N/A
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
#. profile: fix infinite recursion for allocation miss event
#. Fugaku: MAP_LOCKED and pre-populate PMIx shared memory PFNs
=============================================
Version 1.7.10 (Mar 18, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
#. __ihk_device_detect_hungup: detect hungup via device-ioctl
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
N/A
=============================================
Version 1.7.9 (Mar 17, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
#. ihklib: ihk_reserve_mem_conf*: fix default values
#. smp_ihk_os_shutdown: fix memory leak
#. smp_ihk_os_shutdown: prevent double free
#. __ihk_os_shutdown: fix smp_ihk_os_shutdown()-related double free
#. smp_ihk_os_panic_notifier: exclude memory from Linux dump with default setting
#. smp_ihk_os_panic_notifier: exclude memory from Linux dump while booting, on timeout
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
#. mcctrl_wakeup_desc: refcount and fix timeouts
=============================================
Version 1.7.8 (Mar 12, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
#. ihklib: ihk_reserve_cpu: fix job cpu check when using krm
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
N/A
=============================================
Version 1.7.7 (Mar 11, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
N/A
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
#. mcexec: fput executable just after its contents is transferred
#. spec: cmake-config cmake parameters
=============================================
Version 1.7.6 (Mar 11, 2021)
=============================================

View File

@ -1,11 +1,5 @@
Contact
=======
Please give your feedback to us via one of the following mailing lists.
Subscription via
`www.pccluster.org <http://www.pccluster.org/mailman/listinfo/mckernel-users>`__
is needed.
- English: mckernel-users@pccluster.org
- Japanese: mckernel-users-jp@pccluster.org
Please give your feedback to us via the following mailing list: ihkmckernel@googlegroups.com

View File

@ -97,7 +97,7 @@ Clone the source code:
mkdir -p ~/src/ihk+mckernel/
cd ~/src/ihk+mckernel/
git clone --recursive -b development https://github.com/RIKEN-SysSoft/mckernel.git
git clone --recursive -b development https://github.com/ihkmckernel/mckernel.git
(Optional) Checkout to the specific branch or version:
@ -166,22 +166,6 @@ Create the tarball and the spec file:
make dist
cp mckernel-<version>.tar.gz <rpmbuild>/SOURCES
(optional) Edit the following line in ``scripts/mckernel.spec`` to change
cmake options. For example:
::
%cmake -DCMAKE_BUILD_TYPE=Release \
-DUNAME_R=%{kernel_version} \
-DKERNEL_DIR=%{kernel_dir} \
%{?cmake_libdir:-DCMAKE_INSTALL_LIBDIR=%{cmake_libdir}} \
%{?build_target:-DBUILD_TARGET=%{build_target}} \
%{?toolchain_file:-DCMAKE_TOOLCHAIN_FILE=%{toolchain_file}} \
-DENABLE_TOFU=ON -DENABLE_FUGAKU_HACKS=ON \
-DENABLE_KRM_WORKAROUND=OFF -DWITH_KRM=ON \
-DENABLE_FUGAKU_DEBUG=OFF -DENABLE_UTI=ON \
.
Create the rpm package:
When not cross-compiling:

View File

@ -666,7 +666,7 @@ IHKはLinuxに以下の機能を提供する。
このパラメタの目的は、Linuxによる空き領域の分断化が激しい状況においてメモリ予約処理時間を抑えることである。上記の状況で予約処理時間が長くなるのは、小さいサイズでの物理連続領域が大量に存在するので、小さいサイズでの要求回数が非常に大きくなるためである。
\subsubsection*{\texttt{IHK\_RESERVE\_MEM\_MAX\_SIZE\_RATIO\_ALL}}
\verb|ihk_reserve_mem()|でサイズに-1を指定した場合と\verb|IHK_RESERVE_MEM_BALANCED_ENABLE|に非ゼロを指定した場合に用いられる予約サイズを、予約時点で測定した空き容量に指定した値を乗じたものにする。なお、ゼロ以下の値または98より大きい値を設定しようとすると\verb:-EINVAL:を返す。また、デフォルト設定は98\%である。
\verb|ihk_reserve_mem()|でサイズに-1を指定した場合と\verb|IHK_RESERVE_MEM_BALANCED_ENABLE|に非ゼロを指定した場合に用いられる予約サイズを、予約時点で測定した空き容量に指定した値を乗じたものにする。なお、ゼロ以下の値を設定しようとしたり、また富岳では95、その他のシステムでは98より大きい値を設定しようとしたりすると\verb:-EINVAL:を返す。また、デフォルト設定は富岳では95\%、その他のシステムでは98\%である。
目的は、Linuxによる空き領域の分断化が激しい状況においてメモリ予約処理時間を抑えること、また予約時にLinuxのプロセスのメモリ要求が満たされない状況にならないようにすることである。

View File

@ -87,14 +87,14 @@ executable:
``<processes-per-node>`` is the number of the processes per node and
calculated by (number of MPI processes) / (number of nodes).
For example, ``<processes-per-node>`` equals to 4 (=32/8) when
For example, ``<processes-per-node>`` equals to 4 (=8/2) when
specifying the number of processes and nodes as follows with
Fujitsu Technical Computing Suite.
MPICH.
.. code-block:: none
#PJM --mpi "proc=32"
#PJM -L "node=8"
mpirun -n 8 -hosts host1,host2 ./cpi
(Advanced) When using Utility Thread offloading Interface (UTI)
---------------------------------------------------------------
@ -112,11 +112,11 @@ Add ``--enable-uti`` option to ``mcexec``:
Limitations
===========
1. Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed
#. Pseudo devices such as /dev/mem and /dev/zero are not mmap()ed
correctly even if the mmap() returns a success. An access of their
mapping receives the SIGSEGV signal.
2. clone() supports only the following flags. All the other flags cause
#. clone() supports only the following flags. All the other flags cause
clone() to return error or are simply ignored.
- CLONE_CHILD_CLEARTID
@ -126,32 +126,32 @@ Limitations
- CLONE_SIGHAND
- CLONE_VM
3. PAPI has the following restriction.
#. PAPI has the following restriction.
- Number of counters a user can use at the same time is up to the
number of the physical counters in the processor.
4. msync writes back only the modified pages mapped by the calling
#. msync writes back only the modified pages mapped by the calling
process.
5. The following syscalls always return the ENOSYS error.
#. The following syscalls always return the ENOSYS error.
- migrate_pages()
- move_pages()
- set_robust_list()
6. The following syscalls always return the EOPNOTSUPP error.
#. The following syscalls always return the EOPNOTSUPP error.
- arch_prctl(ARCH_SET_GS)
- signalfd()
7. signalfd4() returns a fd, but signal is not notified through the fd.
#. signalfd4() returns a fd, but signal is not notified through the fd.
8. set_rlimit sets the limit values but they are not enforced.
#. set_rlimit sets the limit values but they are not enforced.
9. Address randomization is not supported.
#. Address randomization is not supported.
10. brk() extends the heap more than requestd when -h (extend-heap-by=)
#. brk() extends the heap more than requestd when -h (extend-heap-by=)
option of mcexec is used with the value larger than 4 KiB.
syscall_pwrite02 of LTP would fail for this reason. This is because
the test expects that the end of the heap is set to the same address
@ -161,91 +161,86 @@ Limitations
than the requested. Therefore, the expected segmentation violation
doesnt occur.
11. setpriority()/getpriority() wont work. They might set/get the
priority of a random mcexec thread. This is because theres no fixed
correspondence between a McKernel thread which issues the system
call and a mcexec thread which handles the offload request.
#. setpriority()/getpriority() wont work. They might set/get the
priority of a random mcexec thread. This is because theres no fixed
correspondence between a McKernel thread which issues the system
call and a mcexec thread which handles the offload request.
12. mbind() can set the policy but it is not used when allocating
physical pages.
#. mbind() can set the policy but it is not used when allocating
physical pages.
13. MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for
set_mempolicy()/mbind() are not supported.
#. MPOL_F_RELATIVE_NODES and MPOL_INTERLEAVE flags for
set_mempolicy()/mbind() are not supported.
14. The MPOL_BIND policy for set_mempolicy()/mbind() works as the same
as the MPOL_PREFERRED policy. That is, the physical page allocator
doesnt give up the allocation when the specified nodes are running
out of pages but continues to search pages in the other nodes.
#. The MPOL_BIND policy for set_mempolicy()/mbind() works as the same
as the MPOL_PREFERRED policy. That is, the physical page allocator
doesnt give up the allocation when the specified nodes are running
out of pages but continues to search pages in the other nodes.
15. Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and
later. In addition, crash_kexec_post_notifiers kernel argument must
be given to Linux kernel.
#. Kernel dump on Linux panic requires Linux kernel CentOS-7.4 and
later. In addition, crash_kexec_post_notifiers kernel argument must
be given to Linux kernel.
16. setfsuid()/setfsgid() cannot change the id of the calling thread.
Instead, it changes that of the mcexec worker thread which takes the
system-call offload request.
#. setfsuid()/setfsgid() cannot change the id of the calling thread.
Instead, it changes that of the mcexec worker thread which takes the
system-call offload request.
17. mmap (hugeTLBfs): The physical pages corresponding to a map are
released when no McKernel process exist. The next map gets fresh
physical pages.
#. mmap (hugeTLBfs): The physical pages corresponding to a map are
released when no McKernel process exist. The next map gets fresh
physical pages.
18. Sticky bit on executable file has no effect.
#. Sticky bit on executable file has no effect.
19. Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the
process of booting McKernel due to the Linux bug, found in
Linux-3.10 and fixed in the later version. One way to circumvent
this is to always assign the same CPU set to McKernel.
#. Linux (RHEL-7 for x86_64) could hang when offlining CPUs in the
process of booting McKernel due to the Linux bug, found in
Linux-3.10 and fixed in the later version. One way to circumvent
this is to always assign the same CPU set to McKernel.
20. madvise:
#. madvise:
- MADV_HWPOISON and MADV_SOFT_OFFLINE always returns -EPERM.
- MADV_MERGEABLE and MADV_UNMERGEABLE always returns -EINVAL.
- MADV_HUGEPAGE and MADV_NOHUGEPAGE on file map returns -EINVAL
except on RHEL-8 for aarch64.
21. brk() and mmap() doesnt report out-of-memory through its return
value. Instead, page-fault reports the error.
#. brk() and mmap() doesnt report out-of-memory through its return
value. Instead, page-fault reports the error.
22. Anonymous mmap pre-maps requested number of pages when contiguous
pages are available. Demand paging is used when not available.
#. Anonymous mmap pre-maps requested number of pages when contiguous
pages are available. Demand paging is used when not available.
23. Mixing page sizes in anonymous shared mapping is not allowed. mmap
creates vm_range with one page size. And munmap or mremap that needs
the reduced page size changes the sizes of all the pages of the
vm_range.
#. ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS
(job-scheduler).
24. ihk_os_getperfevent() could time-out when invoked from Fujitsu TCS
(job-scheduler).
#. The behaviors of madvise and mbind are changed to do nothing and
report success as a workaround for Fugaku.
25. The behaviors of madvise and mbind are changed to do nothing and
report success as a workaround for Fugaku.
#. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.
26. mmap() allows unlimited overcommit. Note that it corresponds to
setting sysctl ``vm.overcommit_memory`` to 1.
#. mlockall() is not supported and returns -EPERM.
27. mlockall() is not supported and returns -EPERM.
#. munlockall() is not supported and returns zero.
28. munlockall() is not supported and returns zero.
#. (Fujitsu TCS-only) A job following the one in which __mcctrl_os_read_write_cpu_register() returns ``-ETIME`` fails because xos_hwb related CPU state isn't finalized. You can tell if the function returned ``-ETIME`` by checking if the following line appeared in the Linux kernel message:
29. scheduling behavior is not Linux compatible. For example, sometimes one of the two processes on the same CPU continues to run after yielding.
::
30. (Fujitsu TCS-only) A job following the one in which __mcctrl_os_read_write_cpu_register() returns ``-ETIME`` fails because xos_hwb related CPU state isn't finalized. You can tell if the function returned ``-ETIME`` by checking if the following line appeared in the Linux kernel message:
__mcctrl_os_read_write_cpu_register: ERROR sending IKC msg: -62
::
You can re-initialize xos_hwb related CPU state by the following command:
__mcctrl_os_read_write_cpu_register: ERROR sending IKC msg: -62
::
You can re-initialize xos_hwb related CPU state by the following command:
sudo systemctl restart xos_hwb
::
#. System calls can write the mcexec VMAs with PROT_WRITE flag not
set. This is because we never turn off PROT_WRITE of the mcexec
VMAs to circumvent the issue "set_host_vma(): do NOT read protect
Linux VMA".
sudo systemctl restart xos_hwb
#. procfs entry creation done by Linux work queue could starve when
Linux CPUs are flooded with system call offloads. LTP-2019
sendmsg02 causes this issue.
31. System calls can write the mcexec VMAs with PROT_WRITE flag not
set. This is because we never turn off PROT_WRITE of the mcexec
VMAs to circumvent the issue "set_host_vma(): do NOT read protect
Linux VMA".
32. procfs entry creation done by Linux work queue could starve when
Linux CPUs are flooded with system call offloads. LTP-2019
sendmsg02 causes this issue.
#. Linux kernel dump file doesn't include the memory allocated to McKernel. This is because of the issues in the implementation of the panic notifier handler.

View File

@ -27,9 +27,12 @@
#define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__)
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
void *vdso_start;
void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
void *mcctrl_vdso_start;
void *mcctrl_vdso_end;
static struct vm_special_mapping *mcctrl_vdso_spec;
#if defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 4)
static struct vdso_data **mcctrl_vdso_data;
#endif
#endif
#ifdef ENABLE_TOFU
@ -68,17 +71,36 @@ void __mcctrl_tof_utofu_mn_invalidate_range_end(
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
mcctrl_vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!mcctrl_vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
mcctrl_vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!mcctrl_vdso_end))
return -EFAULT;
vdso_spec = (void *) kallsyms_lookup_name("vdso_spec");
if (WARN_ON(!vdso_spec))
#if defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 4)
mcctrl_vdso_spec = (void *) kallsyms_lookup_name("aarch64_vdso_maps");
#else
mcctrl_vdso_spec = (void *) kallsyms_lookup_name("vdso_spec");
#endif
if (WARN_ON(!mcctrl_vdso_spec))
return -EFAULT;
if (WARN_ON(!mcctrl_vdso_spec[0].name ||
strcmp(mcctrl_vdso_spec[0].name, "[vvar]")))
return -EFAULT;
if (WARN_ON(!mcctrl_vdso_spec[1].name ||
strcmp(mcctrl_vdso_spec[1].name, "[vdso]")))
return -EFAULT;
#if defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 4)
mcctrl_vdso_data = (struct vdso_data **) kallsyms_lookup_name("vdso_data");
if (WARN_ON(!mcctrl_vdso_data || !*mcctrl_vdso_data))
return -EFAULT;
#endif
#endif
#ifdef ENABLE_TOFU
@ -212,7 +234,7 @@ static long elf_search_vdso_sigtramp(void)
Elf64_Sym *sym = NULL;
/* ELF header */
eh = (Elf64_Ehdr *)vdso_start;
eh = (Elf64_Ehdr *)mcctrl_vdso_start;
if (eh == NULL) {
D("vdso_start is NULL.\n");
goto out;
@ -233,8 +255,8 @@ static long elf_search_vdso_sigtramp(void)
/* Search dynsym-table and dynstr-table offset
* from section header table
*/
tmp_sh = (Elf64_Shdr *)(vdso_start + eh->e_shoff);
shstr = vdso_start + (tmp_sh + eh->e_shstrndx)->sh_offset;
tmp_sh = (Elf64_Shdr *)(mcctrl_vdso_start + eh->e_shoff);
shstr = mcctrl_vdso_start + (tmp_sh + eh->e_shstrndx)->sh_offset;
for (i = 0; i < eh->e_shnum; i++, tmp_sh++) {
if (tmp_sh->sh_type == SHT_DYNSYM) {
sym_sh = tmp_sh;
@ -242,7 +264,7 @@ static long elf_search_vdso_sigtramp(void)
if (tmp_sh->sh_type == SHT_STRTAB &&
!strcmp(&shstr[tmp_sh->sh_name], ".dynstr")) {
dynstr = vdso_start + tmp_sh->sh_offset;
dynstr = mcctrl_vdso_start + tmp_sh->sh_offset;
}
}
@ -257,7 +279,7 @@ static long elf_search_vdso_sigtramp(void)
}
/* Search __kernel_rt_sigreturn offset from dynsym-table */
sym = (Elf64_Sym *)(vdso_start + sym_sh->sh_offset);
sym = (Elf64_Sym *)(mcctrl_vdso_start + sym_sh->sh_offset);
for (i = 0; (i * sym_sh->sh_entsize) < sym_sh->sh_size; i++, sym++) {
if (!strcmp(dynstr + sym->st_name, "__kernel_rt_sigreturn")) {
ans = sym->st_value;
@ -282,9 +304,9 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0)
vvar_map = &(*vdso_spec)[0];
vdso_map = &(*vdso_spec)[1];
nr_vdso_page = ((vdso_end - vdso_start) + PAGE_SIZE - 1) >> PAGE_SHIFT;
vvar_map = &mcctrl_vdso_spec[0];
vdso_map = &mcctrl_vdso_spec[1];
nr_vdso_page = ((mcctrl_vdso_end - mcctrl_vdso_start) + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* VDSO pages */
//D("nr_vdso_page:%d\n", nr_vdso_page);
@ -298,7 +320,11 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
/* VVAR page */
//D("vdso->vvar_phys:0x#lx\n", vdso->vvar_phys);
#if defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 4)
vdso->vvar_phys = __pfn_to_phys(sym_to_pfn(*mcctrl_vdso_data));
#else
vdso->vvar_phys = page_to_phys(*vvar_map->pages);
#endif
/* offsets */
vdso->lbase = VDSO_LBASE;
@ -916,7 +942,7 @@ int __mcctrl_tof_utofu_release_handler(struct inode *inode, struct file *filp,
isp.arg = f2pfd->fd;
ret = mcctrl_ikc_send_wait(f2pfd->os, ppd->ikc_target_cpu,
&isp, -20, NULL, NULL, 0);
&isp, -1000, NULL, NULL, 0);
if (ret != 0) {
pr_err("%s: WARNING: IKC req for PID: %d, fd: %d failed\n",
__func__, f2pfd->pid, f2pfd->fd);

View File

@ -6,8 +6,13 @@
#include <asm/vgtod.h>
#include "config.h"
#include "../../mcctrl.h"
#include "../../kallsyms_compat.h"
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0) && defined(CONFIG_X86_VSYSCALL_EMULATION)
#define gtod (&VVAR(vsyscall_gtod_data))
#else
#define gtod NULL
#endif
//#define SC_DEBUG
@ -24,36 +29,36 @@ static void *vdso_start;
static void *vdso_end;
static struct page **vdso_pages;
#endif
static void *__vvar_page;
static void *__vvar_page_ptr;
static long *hpet_address;
static void **hv_clock;
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
_vdso_image_64 = (void *) kallsyms_lookup_name("vdso_image_64");
_vdso_image_64 = (void *) mcctrl_lookup_name("vdso_image_64");
if (WARN_ON(!_vdso_image_64))
return -EFAULT;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
vdso_start = (void *) mcctrl_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
vdso_end = (void *) mcctrl_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_pages = (void *) kallsyms_lookup_name("vdso_pages");
vdso_pages = (void *) mcctrl_lookup_name("vdso_pages");
if (WARN_ON(!vdso_pages))
return -EFAULT;
#endif
__vvar_page = (void *) kallsyms_lookup_name("__vvar_page");
if (WARN_ON(!__vvar_page))
__vvar_page_ptr = (void *) &__vvar_page;
if (WARN_ON(!__vvar_page_ptr))
return -EFAULT;
hpet_address = (void *) kallsyms_lookup_name("hpet_address");
hv_clock = (void *) kallsyms_lookup_name("hv_clock");
hpet_address = (void *) mcctrl_lookup_name("hpet_address");
hv_clock = (void *) mcctrl_lookup_name("hv_clock");
return 0;
}
@ -93,18 +98,18 @@ reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsign
#define DESIRED_USER_END 0x800000000000
#define GAP_FOR_MCEXEC 0x008000000000UL
end = DESIRED_USER_END;
down_write(&current->mm->mmap_sem);
mmap_write_lock(current->mm);
vma = find_vma(current->mm, 0);
if (vma) {
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
up_write(&current->mm->mmap_sem);
mmap_write_unlock(current->mm);
#endif
start = reserve_user_space_common(usrdata, start, end);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
up_write(&current->mm->mmap_sem);
mmap_write_unlock(current->mm);
#endif
mutex_unlock(&usrdata->reserve_lock);
@ -161,19 +166,19 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->vvar_is_global = 0;
vdso->vvar_virt = (void *)(-3 * PAGE_SIZE);
vdso->vvar_phys = virt_to_phys(__vvar_page);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
vdso->vvar_is_global = 0;
vdso->vvar_virt = (void *)(-2 * PAGE_SIZE);
vdso->vvar_phys = virt_to_phys(__vvar_page);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
vdso->vvar_is_global = 0;
vdso->vvar_virt = (void *)(vdso->vdso_npages * PAGE_SIZE);
vdso->vvar_phys = virt_to_phys(__vvar_page);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
vdso->vvar_is_global = 1;
vdso->vvar_virt = (void *)fix_to_virt(VVAR_PAGE);
vdso->vvar_phys = virt_to_phys(__vvar_page);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#endif
/* HPET page */

View File

@ -126,8 +126,8 @@ static int load_elf(struct linux_binprm *bprm
if(st == 0){
off = p & ~PAGE_MASK;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)
rc = get_user_pages_remote(current, bprm->mm,
bprm->p, 1, FOLL_FORCE, &page, NULL, NULL);
rc = get_user_pages_remote(bprm->mm,
bprm->p, 1, FOLL_FORCE, &page, NULL);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0)
rc = get_user_pages_remote(current, bprm->mm,
bprm->p, 1, FOLL_FORCE, &page, NULL);
@ -234,7 +234,7 @@ static int load_elf(struct linux_binprm *bprm
kfree(pbuf);
return rc;
}
rc = copy_strings_kernel(1, &bprm->interp, bprm);
rc = copy_string_kernel(bprm->interp, bprm);
if (rc < 0){
fput(file);
kfree(pbuf);
@ -242,7 +242,7 @@ static int load_elf(struct linux_binprm *bprm
}
bprm->argc++;
wp = MCEXEC_PATH;
rc = copy_strings_kernel(1, &wp, bprm);
rc = copy_string_kernel(wp, bprm);
if (rc){
fput(file);
kfree(pbuf);
@ -260,19 +260,25 @@ static int load_elf(struct linux_binprm *bprm
fput(bprm->file);
bprm->file = file;
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0)
rc = prepare_binprm(bprm);
if (rc < 0){
kfree(pbuf);
return rc;
}
#endif
kfree(pbuf);
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0)
return search_binary_handler(bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, regs
#endif
);
#else
return -ENOEXEC;
#endif
}
static struct linux_binfmt mcexec_format = {

View File

@ -414,7 +414,7 @@ static void release_handler(ihk_os_t os, void *param)
dprintk("%s: SCD_MSG_CLEANUP_PROCESS, info: %p, cpu: %d\n",
__FUNCTION__, info, info->cpu);
ret = mcctrl_ikc_send_wait(os, info->cpu,
&isp, -20, NULL, NULL, 0);
&isp, -5000, NULL, NULL, 0);
if (ret != 0) {
printk("%s: WARNING: failed to send IKC msg: %d\n",
__func__, ret);
@ -513,8 +513,6 @@ static DECLARE_WAIT_QUEUE_HEAD(signalq);
struct mcctrl_signal_desc {
struct mcctrl_signal msig;
struct mcctrl_wakeup_desc wakeup;
void *addrs[1];
};
static long mcexec_send_signal(ihk_os_t os, struct signal_desc *sigparam)
@ -554,7 +552,7 @@ static long mcexec_send_signal(ihk_os_t os, struct signal_desc *sigparam)
isp.pid = sig.pid;
isp.arg = virt_to_phys(msigp);
rc = mcctrl_ikc_send_wait(os, sig.cpu, &isp, 0, &desc->wakeup,
rc = mcctrl_ikc_send_wait(os, sig.cpu, &isp, -1000, NULL,
&do_free, 1, desc);
if (rc < 0) {
printk("mcexec_send_signal: mcctrl_ikc_send ret=%d\n", rc);
@ -980,7 +978,9 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
node = linux_numa_2_mckernel_numa(udp,
cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu_prev)));
for_each_cpu_not(cpu, cpus_used) {
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
if (cpumask_test_cpu(cpu, cpus_used))
continue;
/* Invalid CPU? */
if (cpu >= udp->cpu_info->n_cpus)
break;
@ -1799,7 +1799,7 @@ out:
}
LIST_HEAD(mckernel_exec_files);
DEFINE_SEMAPHORE(mckernel_exec_file_lock);
DEFINE_SEMAPHORE(mckernel_exec_file_lock, 1);
struct mckernel_exec_file {
@ -2010,7 +2010,7 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
fullpath = d_path(&file->f_path, pathbuf, PATH_MAX);
if (IS_ERR(fullpath)) {
retval = PTR_ERR(fullpath);
goto out_free;
goto out_put_file;
}
mcef = kmalloc(sizeof(*mcef), GFP_KERNEL);
@ -2243,8 +2243,6 @@ long mcctrl_perf_num(ihk_os_t os, unsigned long arg)
struct mcctrl_perf_ctrl_desc {
struct perf_ctrl_desc desc;
struct mcctrl_wakeup_desc wakeup;
void *addrs[1];
};
#define wakeup_desc_of_perf_desc(_desc) \
(&container_of((_desc), struct mcctrl_perf_ctrl_desc, desc)->wakeup)
@ -2310,9 +2308,7 @@ long mcctrl_perf_set(ihk_os_t os, struct ihk_perf_event_attr *__user arg)
isp.arg = virt_to_phys(perf_desc);
for (j = 0; j < info->n_cpus; j++) {
ret = mcctrl_ikc_send_wait(os, j, &isp,
msecs_to_jiffies(10000),
wakeup_desc_of_perf_desc(perf_desc),
ret = mcctrl_ikc_send_wait(os, j, &isp, 10000, NULL,
&need_free, 1, perf_desc);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send_wait ret=%d\n",
@ -2382,9 +2378,7 @@ long mcctrl_perf_get(ihk_os_t os, unsigned long *__user arg)
isp.arg = virt_to_phys(perf_desc);
for (j = 0; j < info->n_cpus; j++) {
ret = mcctrl_ikc_send_wait(os, j, &isp,
msecs_to_jiffies(10000),
wakeup_desc_of_perf_desc(perf_desc),
ret = mcctrl_ikc_send_wait(os, j, &isp, 10000, NULL,
&need_free, 1, perf_desc);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send_wait ret=%d\n",
@ -2454,9 +2448,8 @@ long mcctrl_perf_enable(ihk_os_t os)
return -EINVAL;
}
for (j = 0; j < info->n_cpus; j++) {
ret = mcctrl_ikc_send_wait(os, j, &isp, 0,
wakeup_desc_of_perf_desc(perf_desc),
&need_free, 1, perf_desc);
ret = mcctrl_ikc_send_wait(os, j, &isp, 0, NULL,
&need_free, 1, perf_desc);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send_wait ret=%d\n",
@ -2522,8 +2515,7 @@ long mcctrl_perf_disable(ihk_os_t os)
return -EINVAL;
}
for (j = 0; j < info->n_cpus; j++) {
ret = mcctrl_ikc_send_wait(os, j, &isp, 0,
wakeup_desc_of_perf_desc(perf_desc),
ret = mcctrl_ikc_send_wait(os, j, &isp, 0, NULL,
&need_free, 1, perf_desc);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send_wait ret=%d\n",
@ -3272,8 +3264,9 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *_desc)
&lcache_topo->shared_cpu_map);
}
else {
cpumask_complement(wkmask,
&lcache_topo->shared_cpu_map);
bitmap_complement(cpumask_bits(wkmask),
cpumask_bits(&lcache_topo->shared_cpu_map),
nr_cpumask_bits);
cpumask_and(cpuset, cpuset, wkmask);
}
}
@ -3286,8 +3279,9 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *_desc)
&lcache_topo->shared_cpu_map);
}
else {
cpumask_complement(wkmask,
&lcache_topo->shared_cpu_map);
bitmap_complement(cpumask_bits(wkmask),
cpumask_bits(&lcache_topo->shared_cpu_map),
nr_cpumask_bits);
cpumask_and(cpuset, cpuset, wkmask);
}
}
@ -3300,8 +3294,9 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *_desc)
&lcache_topo->shared_cpu_map);
}
else {
cpumask_complement(wkmask,
&lcache_topo->shared_cpu_map);
bitmap_complement(cpumask_bits(wkmask),
cpumask_bits(&lcache_topo->shared_cpu_map),
nr_cpumask_bits);
cpumask_and(cpuset, cpuset, wkmask);
}
}

View File

@ -34,6 +34,7 @@
#include <linux/version.h>
#include "mcctrl.h"
#include <ihk/ihk_host_user.h>
#include "kallsyms_compat.h"
#define OS_MAX_MINOR 64
@ -237,75 +238,84 @@ struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
int ret;
/* Initialize kallsyms compatibility layer */
ret = init_kallsyms_lookup();
if (ret) {
pr_err("Failed to initialize kallsyms compatibility layer: %d\n", ret);
return ret;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
mcctrl_sys_mount = (void *) kallsyms_lookup_name("ksys_mount");
mcctrl_sys_mount = (void *) mcctrl_lookup_name("ksys_mount");
#else
mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount");
mcctrl_sys_mount = (void *) mcctrl_lookup_name("sys_mount");
#if defined(CONFIG_X86_64_SMP)
if (!mcctrl_sys_mount)
mcctrl_sys_mount =
(void *) kallsyms_lookup_name("__x64_sys_mount");
(void *) mcctrl_lookup_name("__x64_sys_mount");
#endif
#endif
if (WARN_ON(!mcctrl_sys_mount))
return -EFAULT;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
mcctrl_sys_umount = (void *) kallsyms_lookup_name("ksys_umount");
mcctrl_sys_umount = (void *) mcctrl_lookup_name("ksys_umount");
#else
mcctrl_sys_umount = (void *) kallsyms_lookup_name("sys_umount");
mcctrl_sys_umount = (void *) mcctrl_lookup_name("sys_umount");
#if defined(CONFIG_X86_64_SMP)
if (!mcctrl_sys_umount)
mcctrl_sys_umount =
(void *) kallsyms_lookup_name("__x64_sys_umount");
(void *) mcctrl_lookup_name("__x64_sys_umount");
#endif
#endif
if (WARN_ON(!mcctrl_sys_umount))
return -EFAULT;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
mcctrl_sys_unshare = (void *) kallsyms_lookup_name("ksys_unshare");
mcctrl_sys_unshare = (void *) mcctrl_lookup_name("ksys_unshare");
#else
mcctrl_sys_unshare = (void *) kallsyms_lookup_name("sys_unshare");
mcctrl_sys_unshare = (void *) mcctrl_lookup_name("sys_unshare");
#if defined(CONFIG_X86_64_SMP)
if (!mcctrl_sys_unshare)
mcctrl_sys_unshare =
(void *) kallsyms_lookup_name("__x64_sys_unshare");
(void *) mcctrl_lookup_name("__x64_sys_unshare");
#endif
#endif
if (WARN_ON(!mcctrl_sys_unshare))
return -EFAULT;
mcctrl_sched_setaffinity =
(void *) kallsyms_lookup_name("sched_setaffinity");
(void *) mcctrl_lookup_name("sched_setaffinity");
if (WARN_ON(!mcctrl_sched_setaffinity))
return -EFAULT;
mcctrl_sched_setscheduler_nocheck =
(void *) kallsyms_lookup_name("sched_setscheduler_nocheck");
(void *) mcctrl_lookup_name("sched_setscheduler_nocheck");
if (WARN_ON(!mcctrl_sched_setscheduler_nocheck))
return -EFAULT;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
mcctrl_sys_readlinkat = (void *)kallsyms_lookup_name("do_readlinkat");
mcctrl_sys_readlinkat = (void *)mcctrl_lookup_name("do_readlinkat");
#else
mcctrl_sys_readlinkat = (void *)kallsyms_lookup_name("sys_readlinkat");
mcctrl_sys_readlinkat = (void *)mcctrl_lookup_name("sys_readlinkat");
#if defined(CONFIG_X86_64_SMP)
if (!mcctrl_sys_readlinkat)
mcctrl_sys_readlinkat =
(void *) kallsyms_lookup_name("__x64_sys_readlinkat");
(void *) mcctrl_lookup_name("__x64_sys_readlinkat");
#endif
#endif
if (WARN_ON(!mcctrl_sys_readlinkat))
return -EFAULT;
mcctrl_zap_page_range =
(void *) kallsyms_lookup_name("zap_page_range");
(void *) mcctrl_lookup_name("zap_page_range");
if (WARN_ON(!mcctrl_zap_page_range))
return -EFAULT;
mcctrl_hugetlbfs_inode_operations =
(void *) kallsyms_lookup_name("hugetlbfs_inode_operations");
(void *) mcctrl_lookup_name("hugetlbfs_inode_operations");
if (WARN_ON(!mcctrl_hugetlbfs_inode_operations))
return -EFAULT;
@ -359,6 +369,7 @@ static void __exit mcctrl_exit(void)
binfmt_mcexec_exit();
uti_attr_finalize();
cleanup_kallsyms_lookup();
#ifdef ENABLE_TOFU
mcctrl_tofu_restore_release_handlers();
#endif

View File

@ -51,7 +51,7 @@ static long uti_wait_event(void *_resp, unsigned long nsec_timeout)
}
}
static int uti_clock_gettime(clockid_t clk_id, struct timespec *tp)
static int uti_clock_gettime(clockid_t clk_id, struct timespec64 *tp)
{
int ret = 0;
struct timespec64 ts64;
@ -60,7 +60,7 @@ static int uti_clock_gettime(clockid_t clk_id, struct timespec *tp)
clk_id, CLOCK_REALTIME, CLOCK_MONOTONIC);
switch (clk_id) {
case CLOCK_REALTIME:
getnstimeofday64(&ts64);
ktime_get_real_ts64(&ts64);
tp->tv_sec = ts64.tv_sec;
tp->tv_nsec = ts64.tv_nsec;
dprintk("%s: CLOCK_REALTIME,%ld.%09ld\n", __func__,
@ -182,8 +182,6 @@ static int uti_remote_page_fault(struct mcctrl_usrdata *usrdata,
struct mcctrl_per_proc_data *ppd, int tid, int cpu)
{
int error;
struct mcctrl_wakeup_desc *desc;
int do_frees = 1;
struct ikc_scd_packet packet;
/* Request page fault */
@ -192,20 +190,9 @@ static int uti_remote_page_fault(struct mcctrl_usrdata *usrdata,
packet.fault_reason = reason;
packet.fault_tid = tid;
/* we need to alloc desc ourselves because GFP_ATOMIC */
retry_alloc:
desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
if (!desc) {
pr_warn("WARNING: coudln't alloc remote page fault wait desc, retrying..\n");
goto retry_alloc;
}
/* packet->target_cpu was set in rus_vm_fault if a thread was found */
error = mcctrl_ikc_send_wait(usrdata->os, cpu, &packet,
0, desc, &do_frees, 0);
if (do_frees) {
kfree(desc);
}
0, NULL, NULL, 0);
if (error < 0) {
pr_warn("%s: WARNING: failed to request uti remote page fault :%d\n",
__func__, error);
@ -1074,12 +1061,14 @@ static int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
switch (cmd) {
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
/* fallthrough */
case FUTEX_WAIT_BITSET:
ret = futex_wait(uaddr, fshared, val, timeout,
val3, clockrt, uti_info);
break;
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
/* fallthrough */
case FUTEX_WAKE_BITSET:
ret = futex_wake(uaddr, fshared, val, val3, uti_info);
break;
@ -1140,7 +1129,7 @@ long do_futex(int n, unsigned long arg0, unsigned long arg1,
int op = (int)arg1;
uint32_t val = (uint32_t)arg2;
struct timespec *utime = (struct timespec *)arg3;
struct timespec ts;
struct timespec64 ts;
uint32_t *uaddr2 = (uint32_t *)arg4;
uint32_t val3 = (uint32_t)arg5;
int flags = op;
@ -1171,12 +1160,12 @@ long do_futex(int n, unsigned long arg0, unsigned long arg1,
}
dprintk("%s: utime=%ld.%09ld\n", __func__, ts.tv_sec, ts.tv_nsec);
if (!timespec_valid(&ts)) {
if (!timespec64_valid(&ts)) {
return -EINVAL;
}
if (op == FUTEX_WAIT_BITSET) { /* User passed absolute time */
struct timespec ats;
struct timespec64 ats;
ret = uti_clock_gettime((flags & FUTEX_CLOCK_REALTIME) ?
CLOCK_REALTIME : CLOCK_MONOTONIC, &ats);

View File

@ -58,23 +58,41 @@ void mcctrl_os_read_write_cpu_response(ihk_os_t os,
struct ikc_scd_packet *pisp);
void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp);
/* Assumes usrdata->wakeup_descs_lock taken */
static void mcctrl_wakeup_desc_cleanup(ihk_os_t os,
struct mcctrl_wakeup_desc *desc)
static void mcctrl_wakeup_desc_put(struct mcctrl_wakeup_desc *desc,
struct mcctrl_usrdata *usrdata, int free_addrs)
{
unsigned long irqflags;
int i;
list_del(&desc->chain);
for (i = 0; i < desc->free_addrs_count; i++) {
kfree(desc->free_addrs[i]);
if (!refcount_dec_and_test(&desc->count)) {
return;
}
spin_lock_irqsave(&usrdata->wakeup_descs_lock, irqflags);
list_del(&desc->chain);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, irqflags);
if (free_addrs) {
for (i = 0; i < desc->free_addrs_count; i++) {
kfree(desc->free_addrs[i]);
}
}
if (desc->free_at_put)
kfree(desc);
}
static void mcctrl_wakeup_cb(ihk_os_t os, struct ikc_scd_packet *packet)
{
struct mcctrl_wakeup_desc *desc = packet->reply;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
/* destroy_ikc_channels must have cleaned up descs */
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n",
__func__);
return;
}
WRITE_ONCE(desc->err, packet->err);
@ -85,29 +103,25 @@ static void mcctrl_wakeup_cb(ihk_os_t os, struct ikc_scd_packet *packet)
* wake up opportunistically between this set and the wake_up call.
*
* If the other side is no longer waiting, free the memory that was
* left for us.
* left for us. The caller has been notified not to free.
*/
if (cmpxchg(&desc->status, 0, 1)) {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
/* destroy_ikc_channels must have cleaned up descs */
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n",
__func__);
return;
}
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
mcctrl_wakeup_desc_cleanup(os, desc);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
mcctrl_wakeup_desc_put(desc, usrdata, 1);
return;
}
/*
* Notify waiter before dropping reference to make sure
* wait queue is still valid.
*/
wake_up_interruptible(&desc->wq);
mcctrl_wakeup_desc_put(desc, usrdata, 0);
}
/* do_frees: 1 when caller should free free_addrs[], 0 otherwise */
/*
* do_frees: 1 when caller should free free_addrs[], 0 otherwise
* timeout: timeout in milliseconds
*/
int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
long int timeout, struct mcctrl_wakeup_desc *desc,
int *do_frees, int free_addrs_count, ...)
@ -115,35 +129,60 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
int ret, i;
int alloc_desc = (desc == NULL);
va_list ap;
unsigned long flags;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n",
__func__);
return -EINVAL;
}
if (free_addrs_count)
*do_frees = 1;
if (alloc_desc)
desc = kmalloc(sizeof(struct mcctrl_wakeup_desc) +
(free_addrs_count + 1) * sizeof(void *),
GFP_KERNEL);
GFP_ATOMIC);
if (!desc) {
pr_warn("%s: Could not allocate wakeup descriptor", __func__);
return -ENOMEM;
}
pisp->reply = desc;
va_start(ap, free_addrs_count);
for (i = 0; i < free_addrs_count; i++) {
desc->free_addrs[i] = va_arg(ap, void*);
}
va_end(ap);
if (alloc_desc)
desc->free_addrs[free_addrs_count++] = desc;
desc->free_addrs_count = free_addrs_count;
/* Only free at put time if allocated internally */
desc->free_at_put = 0;
if (alloc_desc)
desc->free_at_put = 1;
init_waitqueue_head(&desc->wq);
/* One for the caller and one for the call-back */
refcount_set(&desc->count, 2);
/* XXX: make this a hash-table? */
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_add(&desc->chain, &usrdata->wakeup_descs_list);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
WRITE_ONCE(desc->err, 0);
WRITE_ONCE(desc->status, 0);
ret = mcctrl_ikc_send(os, cpu, pisp);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send failed: %d\n", __func__, ret);
if (alloc_desc)
kfree(desc);
/* Failed to send msg, put twice */
mcctrl_wakeup_desc_put(desc, usrdata, 0);
mcctrl_wakeup_desc_put(desc, usrdata, 0);
return ret;
}
@ -180,28 +219,16 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
* the callback it will need to free things for us
*/
if (!cmpxchg(&desc->status, 0, 1)) {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
mcctrl_wakeup_desc_put(desc, usrdata, 0);
if (!usrdata) {
pr_err("%s: error: mcctrl_usrdata not found\n",
__func__);
ret = ret < 0 ? ret : -EINVAL;
goto out;
}
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_add(&desc->chain, &usrdata->wakeup_descs_list);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
if (do_frees)
*do_frees = 0;
return ret < 0 ? ret : -ETIME;
}
ret = READ_ONCE(desc->err);
out:
if (alloc_desc)
kfree(desc);
mcctrl_wakeup_desc_put(desc, usrdata, 0);
return ret;
}
@ -605,10 +632,15 @@ void destroy_ikc_channels(ihk_os_t os)
ihk_ikc_destroy_channel(usrdata->ikc2linux[i]);
}
}
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_for_each_entry_safe(mwd_entry, mwd_next,
&usrdata->wakeup_descs_list, chain) {
mcctrl_wakeup_desc_cleanup(os, mwd_entry);
&usrdata->wakeup_descs_list, chain) {
list_del(&mwd_entry->chain);
for (i = 0; i < mwd_entry->free_addrs_count; i++) {
kfree(mwd_entry->free_addrs[i]);
}
}
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);

View File

@ -0,0 +1,105 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_REFCOUNT_H
#define _LINUX_REFCOUNT_H
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/spinlock_types.h>
struct mutex;
/**
* struct refcount_t - variant of atomic_t specialized for reference counts
* @refs: atomic_t counter field
*
* The counter saturates at UINT_MAX and will not move once
* there. This avoids wrapping the counter and causing 'spurious'
* use-after-free bugs.
*/
typedef struct refcount_struct {
atomic_t refs;
} refcount_t;
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
/**
* refcount_set - set a refcount's value
* @r: the refcount
* @n: value to which the refcount will be set
*/
static inline void refcount_set(refcount_t *r, unsigned int n)
{
atomic_set(&r->refs, n);
}
/**
* refcount_read - get a refcount's value
* @r: the refcount
*
* Return: the refcount's value
*/
static inline unsigned int refcount_read(const refcount_t *r)
{
return atomic_read(&r->refs);
}
#ifdef CONFIG_REFCOUNT_FULL
extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
extern void refcount_add(unsigned int i, refcount_t *r);
extern __must_check bool refcount_inc_not_zero(refcount_t *r);
extern void refcount_inc(refcount_t *r);
extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
extern __must_check bool refcount_dec_and_test(refcount_t *r);
extern void refcount_dec(refcount_t *r);
#else
# ifdef CONFIG_ARCH_HAS_REFCOUNT
# include <asm/refcount.h>
# else
static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
{
return atomic_add_unless(&r->refs, i, 0);
}
static inline void refcount_add(unsigned int i, refcount_t *r)
{
atomic_add(i, &r->refs);
}
static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
{
return atomic_add_unless(&r->refs, 1, 0);
}
static inline void refcount_inc(refcount_t *r)
{
atomic_inc(&r->refs);
}
static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
return atomic_sub_and_test(i, &r->refs);
}
static inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
return atomic_dec_and_test(&r->refs);
}
static inline void refcount_dec(refcount_t *r)
{
atomic_dec(&r->refs);
}
# endif /* !CONFIG_ARCH_HAS_REFCOUNT */
#endif /* CONFIG_REFCOUNT_FULL */
extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
spinlock_t *lock,
unsigned long *flags);
#endif /* _LINUX_REFCOUNT_H */

View File

@ -0,0 +1,72 @@
/* kallsyms_compat.h - Compatibility layer for kallsyms_lookup_name */
#ifndef KALLSYMS_COMPAT_H
#define KALLSYMS_COMPAT_H
#include <linux/version.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
/* kallsyms_lookup_name is no longer exported since kernel 5.7 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0)
/* Function pointer for kallsyms_lookup_name */
static unsigned long (*mcctrl_kallsyms_lookup_name)(const char *name);
/* Kprobe-based approach to get kallsyms_lookup_name function pointer */
static struct kprobe kp_kallsyms = {
.symbol_name = "kallsyms_lookup_name"
};
static int init_kallsyms_lookup(void)
{
int ret;
ret = register_kprobe(&kp_kallsyms);
if (ret < 0) {
pr_err("register_kprobe failed, returned %d\n", ret);
return ret;
}
mcctrl_kallsyms_lookup_name = (unsigned long (*)(const char *))kp_kallsyms.addr;
unregister_kprobe(&kp_kallsyms);
if (!mcctrl_kallsyms_lookup_name) {
pr_err("Failed to get kallsyms_lookup_name address\n");
return -EINVAL;
}
return 0;
}
static void cleanup_kallsyms_lookup(void)
{
mcctrl_kallsyms_lookup_name = NULL;
}
static inline unsigned long mcctrl_lookup_name(const char *name)
{
if (mcctrl_kallsyms_lookup_name)
return mcctrl_kallsyms_lookup_name(name);
return 0;
}
#else /* LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) */
static inline int init_kallsyms_lookup(void)
{
return 0;
}
static inline void cleanup_kallsyms_lookup(void)
{
}
static inline unsigned long mcctrl_lookup_name(const char *name)
{
return kallsyms_lookup_name(name);
}
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0) */
#endif /* KALLSYMS_COMPAT_H */

View File

@ -44,6 +44,10 @@
#include <linux/semaphore.h>
#include <linux/rwlock.h>
#include <linux/threads.h>
#include <linux/version.h>
#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
#include <refcount.h>
#endif
#include "sysfs.h"
#define SCD_MSG_PREPARE_PROCESS 0x1
@ -401,6 +405,8 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
struct mcctrl_wakeup_desc {
int status;
int err;
refcount_t count;
int free_at_put;
wait_queue_head_t wq;
struct list_head chain;
int free_addrs_count;

View File

@ -40,7 +40,7 @@ typedef gid_t kgid_t;
struct procfs_entry {
char *name;
mode_t mode;
const struct file_operations *fops;
const struct proc_ops *fops;
};
#define NOD(NAME, MODE, FOP) { \
@ -58,8 +58,8 @@ struct procfs_entry {
static const struct procfs_entry tid_entry_stuff[];
static const struct procfs_entry pid_entry_stuff[];
static const struct procfs_entry base_entry_stuff[];
static const struct file_operations mckernel_forward_ro;
static const struct file_operations mckernel_forward;
static const struct proc_ops mckernel_forward_ro;
static const struct proc_ops mckernel_forward;
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos);
@ -84,7 +84,7 @@ struct procfs_list_entry {
* file.
*/
LIST_HEAD(procfs_file_list);
DEFINE_SEMAPHORE(procfs_file_list_lock);
DEFINE_SEMAPHORE(procfs_file_list_lock, 1);
static char *
getpath(struct procfs_list_entry *e, char *buf, int bufsize)
@ -183,10 +183,10 @@ add_procfs_entry(struct procfs_list_entry *parent, const char *name, int mode,
pde = proc_symlink(name, parent_pde, (char *)opaque);
}
else {
const struct file_operations *fop;
const struct proc_ops *fop;
if(opaque)
fop = (const struct file_operations *)opaque;
fop = (const struct proc_ops *)opaque;
else if(mode & S_IWUSR)
fop = &mckernel_forward;
else
@ -509,7 +509,7 @@ static ssize_t __mckernel_procfs_read_write(
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
struct procfs_list_entry *e = pde_data(inode);
#endif
loff_t offset = *ppos;
char pathbuf[PROCFS_NAME_MAX];
@ -611,7 +611,7 @@ static ssize_t __mckernel_procfs_read_write(
ret = mcctrl_ikc_send_wait(osnum_to_os(e->osnum),
(pid > 0) ? ppd->ikc_target_cpu : 0,
&isp, HZ, NULL, &do_free, 1, r);
&isp, 5000, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
@ -770,16 +770,16 @@ int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg,
return 0;
}
static const struct file_operations mckernel_forward_ro = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read,
.write = NULL,
static const struct proc_ops mckernel_forward_ro = {
.proc_lseek = mckernel_procfs_lseek,
.proc_read = mckernel_procfs_read,
.proc_write = NULL,
};
static const struct file_operations mckernel_forward = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read,
.write = mckernel_procfs_write,
static const struct proc_ops mckernel_forward = {
.proc_lseek = mckernel_procfs_lseek,
.proc_read = mckernel_procfs_read,
.proc_write = mckernel_procfs_write,
};
#define PA_NULL (-1L)
@ -812,7 +812,7 @@ static int mckernel_procfs_buff_open(struct inode *inode, struct file *file)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
struct procfs_list_entry *e = pde_data(inode);
#endif
os = osnum_to_os(e->osnum);
@ -879,7 +879,7 @@ static int mckernel_procfs_buff_release(struct inode *inode, struct file *file)
rc = -EIO;
ret = mcctrl_ikc_send_wait(info->os, 0,
&isp, 5 * HZ, NULL, &do_free, 1, r);
&isp, 5000, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
@ -977,7 +977,7 @@ static ssize_t mckernel_procfs_buff_read(struct file *file, char __user *ubuf,
done = 1;
ret = mcctrl_ikc_send_wait(os,
(pid > 0) ? ppd->ikc_target_cpu : 0,
&isp, 5 * HZ, NULL, &do_free, 1, r);
&isp, 5000, NULL, &do_free, 1, r);
if (!do_free && ret >= 0) {
ret = -EIO;
@ -1071,12 +1071,12 @@ rep:
return l;
}
static const struct file_operations mckernel_buff_io = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_buff_read,
.write = NULL,
.open = mckernel_procfs_buff_open,
.release = mckernel_procfs_buff_release,
static const struct proc_ops mckernel_buff_io = {
.proc_lseek = mckernel_procfs_lseek,
.proc_read = mckernel_procfs_buff_read,
.proc_write = NULL,
.proc_open = mckernel_procfs_buff_open,
.proc_release = mckernel_procfs_buff_release,
};
static const struct procfs_entry tid_entry_stuff[] = {

View File

@ -57,6 +57,21 @@
#include <archdeps.h>
#include <asm/pgtable.h>
/* Compatibility function for vfs_fstat which is not exported in newer kernels */
static inline int mcctrl_vfs_fstat(int fd, struct kstat *stat)
{
struct file *file;
int error;
file = fget(fd);
if (!file)
return -EBADF;
error = vfs_getattr(&file->f_path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
fput(file);
return error;
}
#define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6)
//#define SC_DEBUG
@ -495,8 +510,6 @@ int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr,
struct ikc_scd_packet *packet)
{
int error;
struct mcctrl_wakeup_desc *desc;
int do_frees = 1;
dprintk("%s: tid: %d, fault_addr: %p, reason: %lu\n",
__FUNCTION__, task_pid_vnr(current), fault_addr, (unsigned long)reason);
@ -506,19 +519,9 @@ int remote_page_fault(struct mcctrl_usrdata *usrdata, void *fault_addr,
packet->fault_address = (unsigned long)fault_addr;
packet->fault_reason = reason;
/* we need to alloc desc ourselves because GFP_ATOMIC */
retry_alloc:
desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
if (!desc) {
pr_warn("WARNING: coudln't alloc remote page fault wait desc, retrying..\n");
goto retry_alloc;
}
/* packet->target_cpu was set in rus_vm_fault if a thread was found */
error = mcctrl_ikc_send_wait(usrdata->os, packet->target_cpu, packet,
0, desc, &do_frees, 0);
if (do_frees)
kfree(desc);
0, NULL, NULL, 0);
if (error < 0) {
pr_warn("%s: WARNING: failed to request remote page fault PID %d: %d\n",
__func__, packet->pid, error);
@ -745,7 +748,7 @@ static struct vm_operations_struct rus_vmops = {
static int rus_mmap(struct file *file, struct vm_area_struct *vma)
{
vma->vm_flags |= arch_rus_vm_flags;
vm_flags_set(vma, arch_rus_vm_flags);
vma->vm_ops = &rus_vmops;
return 0;
}
@ -788,10 +791,10 @@ reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, u
#if 0
{ /* debug */
struct vm_area_struct *vma;
down_write(&current->mm->mmap_sem);
mmap_write_lock(current->mm);
vma = find_vma(current->mm, start);
vma->vm_flags |= VM_DONTCOPY;
up_write(&current->mm->mmap_sem);
vm_flags_set(vma, VM_DONTCOPY);
mmap_write_unlock(current->mm);
}
#endif
revert_creds(original);
@ -962,7 +965,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
dprintk("pager_req_create(%d,%lx)\n", fd, (long)result_pa);
error = vfs_fstat(fd, &st);
error = mcctrl_vfs_fstat(fd, &st);
if (error) {
printk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
goto out;
@ -1475,12 +1478,12 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off,
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
down_write(&current->mm->mmap_sem);
mmap_write_lock(current->mm);
va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot,
prot_and_flags, pgoff);
up_write(&current->mm->mmap_sem);
mmap_write_unlock(current->mm);
#else
va = vm_mmap(file, ANY_WHERE, len, maxprot,
prot_and_flags, pgoff << PAGE_SHIFT);
@ -1583,7 +1586,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
#define PFN_VALID ((uintptr_t)1 << 63)
pfn = PFN_VALID; /* Use "not present" as the default setting */
down_read(&current->mm->mmap_sem);
mmap_read_lock(current->mm);
retry:
pgd = pgd_offset(current->mm, va);
if (!pgd_none(*pgd) && !pgd_bad(*pgd) && pgd_present(*pgd)) {
@ -1599,7 +1602,7 @@ retry:
pmd = pmd_offset(pud, va);
if (!pmd_none(*pmd) && !pmd_bad(*pmd) &&
pmd_present(*pmd)) {
pte = pte_offset_map(pmd, va);
pte = pte_offset_kernel(pmd, va);
if (!pte_none(*pte) && pte_present(*pte)) {
pfn = (uintptr_t)pte_pfn(*pte) << PAGE_SHIFT;
#define PFN_PRESENT ((uintptr_t)1 << 0)
@ -1635,7 +1638,9 @@ retry:
goto out_release;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || \
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)
fault = handle_mm_fault(vma, va, flags, NULL);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) || \
(defined(RHEL_RELEASE_CODE) && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))
fault = handle_mm_fault(vma, va, flags);
#else
@ -1668,7 +1673,7 @@ retry:
}
out_release:
up_read(&current->mm->mmap_sem);
mmap_read_unlock(current->mm);
phys = ihk_device_map_memory(dev, ppfn_rpa, sizeof(*ppfn));
ppfn = ihk_device_map_virtual(dev, phys, sizeof(*ppfn), NULL, 0);
@ -1694,9 +1699,9 @@ static int __pager_unmap(struct pager *pager)
int error;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
down_write(&current->mm->mmap_sem);
mmap_write_lock(current->mm);
error = do_munmap(current->mm, pager->map_uaddr, pager->map_len);
up_write(&current->mm->mmap_sem);
mmap_write_unlock(current->mm);
#else
error = vm_munmap(pager->map_uaddr, pager->map_len);
#endif
@ -1761,9 +1766,11 @@ static long pager_req_mlock_list(ihk_os_t os, unsigned long start,
struct vm_area_struct *vma;
kprintf("pager_req_mlock_list: addr(%p)\n", addr);
vma = find_vma(current->mm, 0x7010a0);
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
if (vma->vm_start < start || vma->vm_start > end) continue;
/* Use find_vma to iterate through VMAs */
vma = find_vma(mm, start);
while (vma != NULL) {
if (vma->vm_start > end) break;
kprintf("\t%p: %p -- %p\t%lx\n", vma,
(void*)vma->vm_start, (void*)vma->vm_end,
vma->vm_flags & VM_LOCKED);
@ -1778,6 +1785,8 @@ static long pager_req_mlock_list(ihk_os_t os, unsigned long start,
addrpair->flag = vma->vm_flags;
addrpair++;
}
/* Use find_vma to get next VMA */
vma = find_vma(mm, vma->vm_end);
}
full:
return cnt;
@ -2141,14 +2150,14 @@ static int remap_user_space(uintptr_t rva, size_t len, int prot)
uintptr_t map;
dprintk("remap_user_space(%lx,%lx,%x)\n", rva, len, prot);
down_write(&mm->mmap_sem);
mmap_write_lock(mm);
vma = find_vma(mm, rva);
if (!vma || (rva < vma->vm_start)) {
printk("remap_user_space(%lx,%lx,%x):find_vma failed. %p %lx %lx\n",
rva, len, prot, vma,
(vma)? vma->vm_start: -1,
(vma)? vma->vm_end: 0);
up_write(&mm->mmap_sem);
mmap_write_unlock(mm);
map = -ENOMEM;
goto out;
}
@ -2162,7 +2171,7 @@ static int remap_user_space(uintptr_t rva, size_t len, int prot)
prot, MAP_FIXED|MAP_SHARED, pgoff);
#endif
up_write(&mm->mmap_sem);
mmap_write_unlock(mm);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
map = vm_mmap(file, start, len,
@ -2187,7 +2196,7 @@ int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len)
int ret;
ret = 0;
down_read(&mm->mmap_sem);
mmap_read_lock(mm);
addr = start;
while (addr < (start + len)) {
vma = find_vma(mm, addr);
@ -2205,7 +2214,7 @@ int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len)
if (addr < end) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
/* Revert permission */
vma->vm_flags |= VM_READ | VM_WRITE | VM_EXEC;
vm_flags_set(vma, VM_READ | VM_WRITE | VM_EXEC);
error = zap_vma_ptes(vma, addr, end-addr);
if (error) {
mcctrl_zap_page_range(vma, addr, end-addr,
@ -2224,14 +2233,14 @@ int mcctrl_clear_pte_range(uintptr_t start, uintptr_t len)
}
else {
/* Revert permission */
vma->vm_flags |= VM_READ | VM_WRITE | VM_EXEC;
vm_flags_set(vma, VM_READ | VM_WRITE | VM_EXEC);
zap_vma_ptes(vma, addr, end-addr);
}
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) */
}
addr = end;
}
up_read(&mm->mmap_sem);
mmap_read_unlock(mm);
return ret;
}

View File

@ -20,20 +20,26 @@ target_include_directories(mcexec PUBLIC "${KERNEL_DIR}")
set_property(TARGET mcexec PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET mcexec PROPERTY LINK_FLAGS "-fPIE -pie")
#unset(LIBDWARF CACHE)
add_executable(mcinspect mcinspect.c)
if (NOT LIBDWARF)
target_include_directories(mcinspect PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/lib/")
target_include_directories(mcinspect PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/lib/libdwarf/libdwarf/libdwarf/")
target_link_libraries(mcinspect dwarf z elf)
else()
target_include_directories(mcinspect PRIVATE ${DWARF_H})
target_link_libraries(mcinspect ${LIBDWARF})
endif()
target_link_libraries(mcinspect ${LIBBFD})
target_link_libraries(mcinspect PRIVATE bfd dwarf z elf)
#if (NOT LIBDWARF)
# target_include_directories(mcinspect PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/lib/")
# target_include_directories(mcinspect PRIVATE
# "${CMAKE_CURRENT_SOURCE_DIR}/lib/libdwarf/libdwarf/libdwarf/")
# target_link_libraries(mcinspect dwarf z elf)
#else()
# target_include_directories(mcinspect PRIVATE ${DWARF_H})
# target_link_libraries(mcinspect ${LIBDWARF})
#endif()
#target_link_libraries(mcinspect ${LIBBFD})
set_property(TARGET mcinspect PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET mcinspect PROPERTY LINK_FLAGS "-fPIE -pie")
add_executable(eclair eclair.c arch/${ARCH}/arch-eclair.c)
target_link_libraries(eclair ${LIBBFD})
set_property(TARGET eclair PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET eclair PROPERTY LINK_FLAGS "-fPIE -pie")
add_library(sched_yield SHARED libsched_yield.c)
target_link_libraries(sched_yield dl)

View File

@ -279,13 +279,13 @@ void cmd_ldump2mcdump(void)
return;
}
ok = bfd_set_section_size(abfd, scn, cpsize);
ok = bfd_set_section_size(scn, cpsize);
if (!ok) {
bfd_perror("bfd_set_section_size");
return;
}
ok = bfd_set_section_flags(abfd, scn, SEC_HAS_CONTENTS);
ok = bfd_set_section_flags(scn, SEC_HAS_CONTENTS);
if (!ok) {
bfd_perror("bfd_set_setction_flags");
return;
@ -300,13 +300,13 @@ void cmd_ldump2mcdump(void)
return;
}
ok = bfd_set_section_size(abfd, scn, cpsize);
ok = bfd_set_section_size(scn, cpsize);
if (!ok) {
bfd_perror("bfd_set_section_size");
return;
}
ok = bfd_set_section_flags(abfd, scn, SEC_HAS_CONTENTS);
ok = bfd_set_section_flags(scn, SEC_HAS_CONTENTS);
if (!ok) {
bfd_perror("bfd_set_setction_flags");
return;
@ -321,13 +321,13 @@ void cmd_ldump2mcdump(void)
return;
}
ok = bfd_set_section_size(abfd, scn, cpsize);
ok = bfd_set_section_size(scn, cpsize);
if (!ok) {
bfd_perror("bfd_set_section_size");
return;
}
ok = bfd_set_section_flags(abfd, scn, SEC_HAS_CONTENTS);
ok = bfd_set_section_flags(scn, SEC_HAS_CONTENTS);
if (!ok) {
bfd_perror("bfd_set_setction_flags");
return;
@ -341,13 +341,13 @@ void cmd_ldump2mcdump(void)
return;
}
ok = bfd_set_section_size(abfd, scn, mem_size);
ok = bfd_set_section_size(scn, mem_size);
if (!ok) {
bfd_perror("bfd_set_section_size");
return;
}
ok = bfd_set_section_flags(abfd, scn, SEC_ALLOC|SEC_HAS_CONTENTS);
ok = bfd_set_section_flags(scn, SEC_ALLOC|SEC_HAS_CONTENTS);
if (!ok) {
bfd_perror("bfd_set_setction_flags");
return;
@ -366,14 +366,14 @@ void cmd_ldump2mcdump(void)
return;
}
ok = bfd_set_section_size(abfd, scn, mem_chunks->chunks[i].size);
ok = bfd_set_section_size(scn, mem_chunks->chunks[i].size);
if (!ok) {
bfd_perror("bfd_set_section_size");
return;
}
ok = bfd_set_section_flags(abfd, scn, SEC_ALLOC|SEC_HAS_CONTENTS);
ok = bfd_set_section_flags(scn, SEC_ALLOC|SEC_HAS_CONTENTS);
if (!ok) {
bfd_perror("bfd_set_setction_flags");
return;

View File

@ -4,9 +4,9 @@ endif()
if (ENABLE_UTI)
if (${ARCH} STREQUAL "arm64")
set(SYSCALL_INTERCEPT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/syscall_intercept/arch/aarch64" CACHE STRINGS "relative path to syscalL_intercept source directory")
set(SYSCALL_INTERCEPT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/syscall_intercept/arch/aarch64" CACHE STRING "relative path to syscalL_intercept source directory")
elseif (${ARCH} STREQUAL "x86_64")
set(SYSCALL_INTERCEPT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/syscall_intercept" CACHE STRINGS "relative path to syscalL_intercept source directory")
set(SYSCALL_INTERCEPT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/syscall_intercept" CACHE STRING "relative path to syscalL_intercept source directory")
endif()

View File

@ -2419,6 +2419,7 @@ int main(int argc, char **argv)
}
#endif // MCEXEC_BIND_MOUNT
/* fget executable as well */
if ((ret = load_elf_desc_shebang(argv[optind], &desc,
&shebang_argv, 1 /* execvp */))) {
fprintf(stderr, "%s: could not load program: %s\n",
@ -2860,6 +2861,14 @@ int main(int argc, char **argv)
fprintf(stderr, "error: transferring image\n");
return -1;
}
/* fput executable */
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
fprintf(stderr, "error: MCEXEC_UP_CLOSE_EXEC failed with %d\n",
ret);
return 1;
}
fflush(stdout);
fflush(stderr);
@ -3489,7 +3498,7 @@ checkexist_resolvelinks:
if (buf[0] == '/') {
/* cannot snprintf from same source and dest */
n = snprintf(tmpbuf2, PATH_MAX, "%s/%s", buf,
linkpath);
linkpath + 1);
if (n >= PATH_MAX)
return in;
strcpy(tmpbuf, tmpbuf2);
@ -4111,11 +4120,6 @@ int main_loop(struct thread_data_s *my_thread)
It is done by not calling do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
here and making McKernel side wait until release_handler() is called. */
/* Drop executable file */
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n");
}
__dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n",
w.sr.args[0], cpu);
if(w.sr.number == __NR_exit_group){
@ -4308,15 +4312,6 @@ gettid_out:
__dprintf("pid(%d): signals and syscall threads OK\n",
getpid());
/* Hold executable also in the child process */
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
!= 0) {
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
exec_path, ret, fd);
fs->status = -errno;
goto fork_child_sync_pipe;
}
/* Check if we need to limit number of threads in the pool */
if ((ret = ioctl(fd, MCEXEC_UP_GET_NUM_POOL_THREADS)) < 0) {
fprintf(stderr, "Error: obtaining thread pool count\n");
@ -4472,6 +4467,7 @@ fork_err:
}
filename = pathbuf;
/* fget executable as well */
if ((ret = load_elf_desc_shebang(filename, &desc,
&shebang_argv, 0)) != 0) {
goto return_execve1;
@ -4569,6 +4565,13 @@ return_execve1:
}
__dprintf("%s", "execve(): image transferred\n");
/* fput executable */
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
fprintf(stderr, "error: MCEXEC_UP_CLOSE_EXEC failed with %d\n",
ret);
return 1;
}
if (close_cloexec_fds(fd) < 0) {
ret = EINVAL;
goto return_execve2;

View File

@ -19,7 +19,7 @@
#include <string.h>
#include <errno.h>
#include <dwarf.h>
#include <libdwarf/libdwarf.h>
#include <libdwarf-0/libdwarf.h>
#include <getopt.h>
#include <libgen.h>
#include <bfd.h>
@ -130,14 +130,15 @@ int dwarf_walk_tree(Dwarf_Debug dbg,
Dwarf_Error err;
Dwarf_Die unit;
Dwarf_Die die;
Dwarf_Half header_cu_type;
int rc;
/* Iterate compile and type units */
for (is_info = 0; is_info < 2; ++is_info) {
rc = dwarf_next_cu_header_c(dbg, is_info, &cu_length,
rc = dwarf_next_cu_header_d(dbg, is_info, &cu_length,
&cu_version, &cu_abbrev_offset, &cu_pointer_size,
&cu_offset_size, &cu_extension_size, &type_signature,
&type_offset, &cu_next_offset, &err);
&type_offset, &cu_next_offset, &header_cu_type, &err);
while (rc != DW_DLV_NO_ENTRY) {
char *name = NULL;
@ -151,9 +152,9 @@ int dwarf_walk_tree(Dwarf_Debug dbg,
return -1;
}
rc = dwarf_siblingof(dbg, NULL, &unit, &err);
rc = dwarf_siblingof_b(dbg, NULL, is_info, &unit, &err);
if (rc != DW_DLV_OK) {
fprintf(stderr, "error: dwarf_siblingof failed: %d %s\n",
fprintf(stderr, "error: dwarf_siblingof_b failed: %d %s\n",
rc, dwarf_errmsg(err));
return -1;
}
@ -237,7 +238,7 @@ int dwarf_walk_tree(Dwarf_Debug dbg,
}
}
rc = dwarf_siblingof(dbg, die, &next, &err);
rc = dwarf_siblingof_b(dbg, die, is_info, &next, &err);
dwarf_dealloc(dbg, die, DW_DLA_DIE);
if (name)
dwarf_dealloc(dbg, name, DW_DLA_STRING);
@ -248,10 +249,10 @@ int dwarf_walk_tree(Dwarf_Debug dbg,
die = next;
}
rc = dwarf_next_cu_header_c(dbg, is_info, &cu_length,
rc = dwarf_next_cu_header_d(dbg, is_info, &cu_length,
&cu_version, &cu_abbrev_offset, &cu_pointer_size,
&cu_offset_size, &cu_extension_size, &type_signature,
&type_offset, &cu_next_offset, &err);
&type_offset, &cu_next_offset, &header_cu_type, &err);
}
}
@ -294,34 +295,64 @@ int dwarf_get_size(Dwarf_Debug dbg,
if (ssize < 0) {
fprintf(stderr, "%s: unsupported negative size\n",
__func__);
__func__);
return DW_DLV_ERROR;
}
size = (Dwarf_Unsigned) ssize;
}
else {
Dwarf_Locdesc **locdescs;
Dwarf_Signed len;
Dwarf_Loc_Head_c loclist_head = 0;
Dwarf_Unsigned lcount = 0;
Dwarf_Locdesc_c locdesc_entry = 0;
Dwarf_Small op;
Dwarf_Unsigned opd1, opd2, opd3;
Dwarf_Unsigned offsetforbranch;
int lres;
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
== DW_DLV_ERROR) {
lres = dwarf_get_loclist_c(attr, &loclist_head, &lcount, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr, "%s: unsupported member size\n",
__func__);
return DW_DLV_ERROR;
}
if (len != 1 ||
locdescs[0]->ld_cents != 1 ||
(locdescs[0]->ld_s[0]).lr_atom
!= DW_OP_plus_uconst) {
if (lcount != 1) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
size = (locdescs[0]->ld_s[0]).lr_number;
lres = dwarf_get_locdesc_entry_d(loclist_head, 0, 0, 0, 0, 0, 0, 0, 0, &locdesc_entry, 0, 0, 0, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
lres = dwarf_get_location_op_value_d(locdesc_entry, 0, &op, &opd1, &opd2, &opd3, NULL, NULL, NULL, &offsetforbranch, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
if (op != DW_OP_plus_uconst) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
size = opd1;
dwarf_loc_head_c_dealloc(loclist_head);
}
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
@ -455,27 +486,57 @@ int dwarf_get_offset(Dwarf_Debug dbg,
offset = (Dwarf_Unsigned) soffset;
}
else {
Dwarf_Locdesc **locdescs;
Dwarf_Signed len;
Dwarf_Loc_Head_c loclist_head = 0;
Dwarf_Unsigned lcount = 0;
Dwarf_Locdesc_c locdesc_entry = 0;
Dwarf_Small op;
Dwarf_Unsigned opd1, opd2, opd3;
Dwarf_Unsigned offsetforbranch;
int lres;
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
== DW_DLV_ERROR) {
lres = dwarf_get_loclist_c(attr, &loclist_head, &lcount, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr, "%s: unsupported member offset\n",
__func__);
return DW_DLV_ERROR;
}
if (len != 1 ||
locdescs[0]->ld_cents != 1 ||
(locdescs[0]->ld_s[0]).lr_atom
!= DW_OP_plus_uconst) {
if (lcount != 1) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
offset = (locdescs[0]->ld_s[0]).lr_number;
lres = dwarf_get_locdesc_entry_d(loclist_head, 0, 0, 0, 0, 0, 0, 0, 0, &locdesc_entry, 0, 0, 0, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
lres = dwarf_get_location_op_value_d(locdesc_entry, 0, &op, &opd1, &opd2, &opd3, NULL, NULL, NULL, &offsetforbranch, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
if (op != DW_OP_plus_uconst) {
fprintf(stderr,
"%s: unsupported location expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
return DW_DLV_ERROR;
}
offset = opd1;
dwarf_loc_head_c_dealloc(loclist_head);
}
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
@ -579,10 +640,10 @@ int dwarf_struct_field_offset(Dwarf_Debug dbg, Dwarf_Die die, void *arg)
break;
next_child:
rc = dwarf_siblingof(dbg, child, &next, &err);
rc = dwarf_siblingof_b(dbg, child, 1, &next, &err);
dwarf_dealloc(dbg, child, DW_DLA_DIE);
if (rc != DW_DLV_OK) {
fprintf(stderr, "%s: error: dwarf_siblingof: %d %s\n",
fprintf(stderr, "%s: error: dwarf_siblingof_b: %d %s\n",
__func__, rc, dwarf_errmsg(err));
rc = DW_DLV_NO_ENTRY;
goto out;
@ -617,7 +678,7 @@ out:
rc = dwarf_walk_tree(dbg, dwarf_struct_field_offset, &dsfo); \
if (rc != DW_DLV_OK) { \
fprintf(stderr, "%s: error: finding %s in struct %s\n", \
__func__, dsfo.field_name, dsfo.struct_name); \
__func__, dsfo.field_name, dsfo.struct_name); \
exit(1); \
} \
offset; \
@ -681,7 +742,7 @@ int dwarf_get_address(Dwarf_Debug dbg,
printf("%s: DW_AT_location\n", __func__);
rc = dwarf_whatform(attr, &form, perr);
if (rc != DW_DLV_OK) {
if (rc != DW_DLV_OK) {
fprintf(stderr, "%s: error: getting whatform: %s\n",
__func__, dwarf_errmsg(*perr));
goto dealloc_out;
@ -696,90 +757,148 @@ int dwarf_get_address(Dwarf_Debug dbg,
form == DW_FORM_data8 ||
form == DW_FORM_sec_offset) {
Dwarf_Locdesc **locdescs;
Dwarf_Signed len;
Dwarf_Loc_Head_c loclist_head = 0;
Dwarf_Unsigned lcount = 0;
Dwarf_Locdesc_c locdesc_entry = 0;
Dwarf_Small op;
Dwarf_Unsigned opd1, opd2, opd3;
Dwarf_Unsigned offsetforbranch;
int lres;
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
== DW_DLV_ERROR) {
fprintf(stderr, "%s: dwarf_loclist_n: %s\n",
lres = dwarf_get_loclist_c(attr, &loclist_head, &lcount, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr, "%s: dwarf_get_loclist_c: %s\n",
__func__, dwarf_errmsg(*perr));
rc = DW_DLV_ERROR;
goto dealloc_out;
}
if (len != 1 ||
locdescs[0]->ld_cents != 1 ||
(locdescs[0]->ld_s[0]).lr_atom
!= DW_OP_addr) {
if (lcount != 1) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
addr = (locdescs[0]->ld_s[0]).lr_number;
lres = dwarf_get_locdesc_entry_d(loclist_head, 0, 0, 0, 0, 0, 0, 0, 0, &locdesc_entry, 0, 0, 0, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
lres = dwarf_get_location_op_value_d(locdesc_entry, 0, &op, &opd1, &opd2, &opd3, NULL, NULL, NULL, &offsetforbranch, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
if (op != DW_OP_addr) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
addr = opd1;
dwarf_loc_head_c_dealloc(loclist_head);
}
else if (form == DW_FORM_exprloc) {
Dwarf_Half address_size = 0;
Dwarf_Ptr x = 0;
Dwarf_Unsigned tempud = 0;
Dwarf_Locdesc *locdescs = 0;
Dwarf_Signed len = 0;
Dwarf_Loc_Head_c loclist_head = 0;
Dwarf_Unsigned lcount = 0;
Dwarf_Locdesc_c locdesc_entry = 0;
Dwarf_Small op;
Dwarf_Unsigned opd1, opd2, opd3;
Dwarf_Unsigned offsetforbranch;
int lres;
Dwarf_Half version;
Dwarf_Half offset_size;
rc = dwarf_formexprloc(attr, &tempud, &x, perr);
if (rc == DW_DLV_NO_ENTRY) {
fprintf(stderr, "%s: dwarf_formexprloc: no entry?\n",
__func__);
goto dealloc_out;
}
else if (rc == DW_DLV_ERROR) {
if (rc != DW_DLV_OK) {
fprintf(stderr, "%s: dwarf_formexprloc(): %s\n",
__func__, dwarf_errmsg(*perr));
goto dealloc_out;
}
rc = dwarf_get_die_address_size(die, &address_size, perr);
if (rc == DW_DLV_NO_ENTRY) {
fprintf(stderr, "%s: dwarf_get_die_address_size: no entry?\n",
__func__);
goto dealloc_out;
}
else if (rc == DW_DLV_ERROR) {
if (rc != DW_DLV_OK) {
fprintf(stderr, "%s: dwarf_get_die_address_size: %s\n",
__func__, dwarf_errmsg(*perr));
goto dealloc_out;
}
rc = dwarf_loclist_from_expr_a(dbg, x, tempud, address_size,
&locdescs, &len, perr);
if (rc == DW_DLV_ERROR) {
fprintf(stderr, "%s: dwarf_loclist_from_expr_a: %s\n",
rc = dwarf_get_version_of_die(die, &version, &offset_size);
if (rc != DW_DLV_OK) {
fprintf(stderr, "%s: dwarf_get_version_of_die: %s\n",
__func__, dwarf_errmsg(*perr));
goto dealloc_out;
}
else if (rc == DW_DLV_NO_ENTRY) {
fprintf(stderr, "%s: dwarf_loclist_from_expr_a: no entry?\n",
__func__);
rc = dwarf_loclist_from_expr_c(dbg, x, tempud, address_size, offset_size, version,
&loclist_head, &lcount, perr);
if (rc != DW_DLV_OK) {
fprintf(stderr, "%s: dwarf_loclist_from_expr_c: %s\n",
__func__, dwarf_errmsg(*perr));
goto dealloc_out;
}
/* len is always 1 */
if (len != 1 ||
locdescs[0].ld_cents != 1 ||
(locdescs[0].ld_s[0]).lr_atom
!= DW_OP_addr) {
if (lcount != 1) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
addr = (locdescs[0].ld_s[0]).lr_number;
lres = dwarf_get_locdesc_entry_d(loclist_head, 0, 0, 0, 0, 0, 0, 0, 0, &locdesc_entry, 0, 0, 0, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
lres = dwarf_get_location_op_value_d(locdesc_entry, 0, &op, &opd1, &opd2, &opd3, NULL, NULL, NULL, &offsetforbranch, perr);
if (lres != DW_DLV_OK) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
if (op != DW_OP_addr) {
fprintf(stderr,
"%s: unsupported addr expression\n",
__func__);
dwarf_loc_head_c_dealloc(loclist_head);
rc = DW_DLV_ERROR;
goto dealloc_out;
}
addr = opd1;
dwarf_loc_head_c_dealloc(loclist_head);
}
else {
fprintf(stderr, "%s: unsupported form type?\n",
__func__);
__func__);
goto dealloc_out;
}
@ -888,7 +1007,7 @@ out:
rc = dwarf_walk_tree(dbg, dwarf_global_var_addr, &gva); \
if (rc != DW_DLV_OK) { \
fprintf(stderr, "%s: error: finding addr of %s\n", \
__func__, gva.variable); \
__func__, gva.variable); \
exit(1); \
} \
} \
@ -1210,25 +1329,25 @@ struct option mcinspect_options[] = {
{
.name = "ps",
.has_arg = no_argument,
.flag = &ps,
.flag = &ps,
.val = 1,
},
{
.name = "help",
.has_arg = no_argument,
.flag = &help,
.flag = &help,
.val = 1,
},
{
.name = "debug",
.has_arg = no_argument,
.flag = &debug,
.flag = &debug,
.val = 1,
},
{
.name = "vtop",
.has_arg = no_argument,
.flag = &vtop,
.flag = &vtop,
.val = 1,
},
{
@ -1277,7 +1396,7 @@ int main(int argc, char **argv)
case 'v':
vtop_addr = strtoul(optarg, 0, 16);
if (vtop_addr == 0 ||
errno == EINVAL || errno == ERANGE) {
errno == EINVAL || errno == ERANGE) {
fprintf(stderr, "error: invalid VA? (expected format: 0xXXXX)\n\n");
usage(argv);
exit(1);
@ -1324,7 +1443,7 @@ int main(int argc, char **argv)
exit(1);
}
rc = dwarf_init(dwarffd, DW_DLC_READ, errhand, errarg, &dbg, &error);
rc = dwarf_init_b(dwarffd, DW_DLA_WEAK, errhand, errarg, &dbg, &error);
if (rc != DW_DLV_OK) {
fprintf(stderr, "error: accessing DWARF information\n");
exit(1);
@ -1339,7 +1458,7 @@ int main(int argc, char **argv)
mcvtop(dbg, pid, vtop_addr);
}
dwarf_finish(dbg, &error);
dwarf_finish(dbg);
close(dwarffd);
close(mcfd);
return 0;

2
ihk

Submodule ihk updated: 8e637b7873...3114d9e710

View File

@ -87,7 +87,7 @@ void kputs(char *buf)
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer);
if (irqflags_can_interrupt(flags_outer) &&
if (!cpu_interrupt_disabled() &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
@ -128,7 +128,7 @@ int __kprintf(const char *format, ...)
}
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
if (irqflags_can_interrupt(flags_inner) &&
if (!cpu_interrupt_disabled() &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
@ -171,7 +171,7 @@ int kprintf(const char *format, ...)
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer);
if (irqflags_can_interrupt(flags_outer) &&
if (!cpu_interrupt_disabled() &&
DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);

View File

@ -136,6 +136,27 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
error = 0;
*objp = to_memobj(obj);
*maxprotp = result.maxprot;
#ifdef ENABLE_FUGAKU_HACKS
/* Pre-populate device file PFNs for PMIx shared mem */
if (!strncmp(obj->memobj.path,
"/var/opt/FJSVtcs/ple/daemonif", 29)) {
off_t offset;
uintptr_t phys;
unsigned long flag;
for (offset = 0; offset < obj->memobj.size; offset += PAGE_SIZE) {
if (devobj_get_page(&obj->memobj, offset, PAGE_P2ALIGN,
&phys, &flag, 0) < 0) {
kprintf("%s: WARNING: failed to populate offset %lu in %s\n",
__func__, offset, obj->memobj.path);
}
}
dkprintf("%s: pre-populated PFNs for %s, len: %lu\n",
__func__, obj->memobj.path, obj->memobj.size);
}
#endif
obj = NULL;
out:
@ -200,6 +221,10 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
uintptr_t attr;
ihk_mc_user_context_t ctx;
int ix;
unsigned long irqstate;
#ifdef ENABLE_FUGAKU_HACKS
int page_fault_attempts = 5;
#endif
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
@ -214,8 +239,15 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
#ifdef PROFILE_ENABLE
profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE);
#endif // PROFILE_ENABLE
irqstate = ihk_mc_spinlock_lock(&obj->pfn_table_lock);
pfn = obj->pfn_table[ix];
ihk_mc_spinlock_unlock(&obj->pfn_table_lock, irqstate);
if (!(pfn & PFN_VALID)) {
#ifdef ENABLE_FUGAKU_HACKS
pf_retry:
#endif
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN;
ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1);
@ -241,8 +273,24 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
pfn |= attr;
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
}
#ifdef ENABLE_FUGAKU_HACKS
else if (page_fault_attempts > 0) {
kprintf("%s(): va: 0x%lx !PFN_PRESENT for offset %lu in %s, "
"page_fault_attempts: %d\n",
__func__, virt_addr, off,
memobj->path ? memobj->path : "<unknown>",
page_fault_attempts);
--page_fault_attempts;
goto pf_retry;
}
#endif
obj->pfn_table[ix] = pfn;
/* Update atomically if unset */
irqstate = ihk_mc_spinlock_lock(&obj->pfn_table_lock);
if (obj->pfn_table[ix] == 0) {
obj->pfn_table[ix] = pfn;
}
ihk_mc_spinlock_unlock(&obj->pfn_table_lock, irqstate);
// Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory
}

View File

@ -100,8 +100,13 @@ void profile_event_add(enum profile_event_type type, uint64_t tsc)
return;
if (!cpu_local_var(current)->profile_events) {
if (profile_alloc_events(cpu_local_var(current)) < 0)
if (type == PROFILE_mpol_alloc_missed) {
return;
}
if (profile_alloc_events(cpu_local_var(current)) < 0) {
return;
}
}
if (type < PROFILE_EVENT_MAX) {

View File

@ -246,6 +246,12 @@ long do_syscall(struct syscall_request *req, int cpu)
unsigned long flags;
DECLARE_WAITQ_ENTRY(scd_wq_entry, cpu_local_var(current));
#ifdef ENABLE_FUGAKU_HACKS
if (req->number == __NR_epoll_wait ||
req->number == __NR_epoll_pwait)
goto schedule;
#endif
if (thread->rpf_backlog) {
void (*func)(void *) = thread->rpf_backlog;
void *arg = thread->rpf_arg;
@ -287,6 +293,9 @@ long do_syscall(struct syscall_request *req, int cpu)
continue;
}
#ifdef ENABLE_FUGAKU_HACKS
schedule:
#endif
flags = cpu_disable_interrupt_save();
/* Try to sleep until notified */
@ -2210,6 +2219,8 @@ straight_out:
}
#endif // PROFILE_ENABLE
if (error == -ESRCH) {
int populate_flags = 0;
dkprintf("do_mmap:hit non VREG\n");
/*
* XXX: temporary:
@ -2221,8 +2232,21 @@ straight_out:
vrflags &= ~VR_MEMTYPE_MASK;
vrflags |= VR_MEMTYPE_UC;
}
#ifdef ENABLE_FUGAKU_HACKS
#ifdef ENABLE_TOFU
if (!strncmp("/var/opt/FJSVtcs/ple/daemonif/",
thread->proc->fd_path[fd], 30)) {
dkprintf("%s: MAP_POPULATE | MAP_LOCKED for %s\n",
__func__, thread->proc->fd_path[fd]);
populate_flags = (MAP_POPULATE | MAP_LOCKED);
}
#endif
#endif
error = devobj_create(fd, len, off, &memobj, &maxprot,
prot, (flags & (MAP_POPULATE | MAP_LOCKED)));
prot,
populate_flags | (flags & (MAP_POPULATE | MAP_LOCKED)));
if (!error) {
#ifdef PROFILE_ENABLE
@ -11149,7 +11173,16 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
}
#endif // PROFILE_ENABLE
if (smp_load_acquire(&v->flags) & CPU_FLAG_NEED_RESCHED) {
#ifdef ENABLE_FUGAKU_HACKS
/* Do not deschedule when returning from an event (e.g., MPI) */
if (!(num == __NR_epoll_wait ||
num == __NR_epoll_pwait ||
num == __NR_ppoll) &&
smp_load_acquire(&v->flags) & CPU_FLAG_NEED_RESCHED)
#else
if (smp_load_acquire(&v->flags) & CPU_FLAG_NEED_RESCHED)
#endif
{
check_need_resched();
}

View File

@ -23,9 +23,7 @@ extern int num_processors;
void cpu_enable_interrupt(void);
void cpu_disable_interrupt(void);
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void);
#endif
void cpu_halt(void);
#ifdef ENABLE_FUGAKU_HACKS
void cpu_halt_panic(void);

View File

@ -15,6 +15,11 @@
%{!?kernel_dir: %global kernel_dir /usr/src/kernels/%{kernel_version}}
%define krequires %(echo %{kernel_version} | sed "s/.%{_target_cpu}$//")
%define ktag %(echo %{krequires} | tr '-' '_' | sed -e 's/\.el[0-9_]*$//' | sed -e 's/\.\([a-zA-Z]\)/_\1/')
%if "@ENABLE_UTI@" == "ON"
%define enable_uti 1
%else
%define enable_uti 0
%endif
Name: mckernel
Version: @MCKERNEL_VERSION@
@ -66,6 +71,8 @@ This package contains headers and libraries required for build apps using IHK/Mc
# We need to remove ld flags like relro for the final mckernel.img link, as well as remove cflags for mckernel
# ideally mckernel should use different environment variables for the user tools and the kernel tools altogether...
%undefine _hardened_build
%define build_ldflags ""
%define __global_ldflags ""
%define optflags -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions --param=ssp-buffer-size=4 -grecord-gcc-switches -mtune=generic
@ -77,9 +84,10 @@ pushd build
%{?cmake_libdir:-DCMAKE_INSTALL_LIBDIR=%{cmake_libdir}} \
%{?build_target:-DBUILD_TARGET=%{build_target}} \
%{?toolchain_file:-DCMAKE_TOOLCHAIN_FILE=%{toolchain_file}} \
-DENABLE_TOFU=ON -DENABLE_FUGAKU_HACKS=ON \
-DENABLE_KRM_WORKAROUND=OFF -DWITH_KRM=ON \
-DENABLE_FUGAKU_DEBUG=OFF -DENABLE_UTI=ON \
-DENABLE_TOFU=@ENABLE_TOFU@ -DENABLE_FUGAKU_HACKS=@ENABLE_FUGAKU_HACKS@ \
-DENABLE_KRM_WORKAROUND=@ENABLE_KRM_WORKAROUND@ -DWITH_KRM=@WITH_KRM@ \
-DENABLE_FUGAKU_DEBUG=@ENABLE_FUGAKU_DEBUG@ -DENABLE_UTI=@ENABLE_UTI@ \
-DENABLE_FJMPI_WORKAROUND=@ENABLE_FJMPI_WORKAROUND@ \
..
%make_build
popd
@ -112,6 +120,7 @@ popd
%{_libdir}/libsched_yield.so.1.0.0
%{_libdir}/libsched_yield.so
%{_libdir}/libldump2mcdump.so
%if 0%{?enable_uti}
%{_libdir}/libmck_syscall_intercept.so
%{_libdir}/libsyscall_intercept.so.0.1.0
%{_libdir}/libsyscall_intercept.so.0
@ -119,6 +128,7 @@ popd
%{_libdir}/mck/libuti.so.1.0.0
%{_libdir}/mck/libuti.so.1
%{_libdir}/mck/libuti.so
%endif
%{_sysconfdir}/irqbalance_mck.in
%{_mandir}/man1/mcreboot.1.gz
%{_mandir}/man1/ihkconfig.1.gz

View File

@ -119,7 +119,7 @@ if [ "${pid}" != "" ]; then
${SUDO} kill -9 ${pid} > /dev/null 2> /dev/null
fi
if [ "${redirect_kmsg}" != "0" -o "${mon_interval}" != "-1" ]; then
${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval}
${SUDO} ${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval}
fi
disable_irqbalance_mck() {

58
test/issues/1463/C1463.sh Executable file
View File

@ -0,0 +1,58 @@
#/bin/sh
USELTP=0
USEOSTEST=0
. ../../common.sh
issue="1463"
tid=01
TEST_DIR="/tmp/test"
ABS_PATH="${TEST_DIR}"
REL_PATH="./test"
ABS_LN="${TEST_DIR}_1463_abs_ln"
REL_LN="${TEST_DIR}_1463_rel_ln"
mkdir -p ${TEST_DIR}
touch ${TEST_DIR}/L.dir
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
ln -fns ${ABS_PATH} ${ABS_LN}
mcexec readlink ${ABS_LN}/L.dir | tee ./${tname}.txt
cnt=`grep "a.dir" ./${tname}.txt | wc -l`
if [ ${cnt} -eq 1 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
ln -fns ${REL_PATH} ${REL_LN}
mcexec readlink ${REL_LN}/L.dir | tee ./${tname}.txt
cnt=`grep "a.dir" ./${tname}.txt | wc -l`
if [ ${cnt} -eq 1 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""
tname=`printf "C${issue}T%02d" ${tid}`
echo "*** ${tname} start *******************************"
mcexec cat /sys/devices/system/cpu/offline | tee ./${tname}.txt
echo "** (expected blank output)"
lines=`grep -e "[0-9]" ./${tname}.txt | wc -l`
if [ ${lines} -eq 0 ]; then
echo "*** ${tname} PASSED ******************************"
else
echo "*** ${tname} FAILED ******************************"
fi
let tid++
echo ""

12
test/issues/1463/Makefile Normal file
View File

@ -0,0 +1,12 @@
CFLAGS=
LDFLAGS=
TARGET=
all: $(TARGET)
test: all
sh ./C1463.sh
clean:
rm -f $(TARGET) *.o *.txt

49
test/issues/1463/README Normal file
View File

@ -0,0 +1,49 @@
【Issue#1463 動作確認】
□ テスト内容
Issue#1463の修正は、mcexec.c: mcoverlay_path() 内で行われる/sys/ 配下への
リンク解決処理に関する修正である。
上記のリンク解決処理を/tmp/ 配下にも行うようにするテストパッチを適用した上で
/tmp/ 配下へのmcoverlay_path()を実行して動作を確認する。
なお、リンク解決処理の動作確認には、McKernelによって作成される、
/sys/devices/virtual/mcos/mcos0/sys/test/L.dir を利用する。
このL.dirは、同ディレクトリのa.dir へのシンボリックリンクとなっている。
C1463T01:
以下の流れで、リンク解決処理対象パスの途中に絶対パスのシンボリックリンクが
存在している場合にも、/sys/devices/virtual/mcos/mcos0/sys/ 下に
誘導されることを確認する。
a. /tmp/test/L.dir に空のファイルを作成
b. /tmp/test への絶対パスのシンボリックリンクとして、/tmp/test_1463_abs_ln を作成
c. mcexec readlink /tmp/test_1463_abs_ln/L.dir を実行し、a.dir が出力されることを確認
C1463T02:
以下の流れで、リンク解決処理対象パスの途中に相対パスのシンボリックリンクが
存在している場合にも、/sys/devices/virtual/mcos/mcos0/sys/ 下に
誘導されることを確認する。
a. /tmp/test/L.dir に空のファイルを作成
b. /tmp/test への相対パスのシンボリックリンクとして、/tmp/test_1463_rel_ln を作成
c. mcexec readlink /tmp/test_1463_rel_ln/L.dir を実行し、a.dir が出力されることを確認
C1463T03:
以下の流れで、/sys/ 配下へのアクセスが/sys/devices/virtual/mcos/mcos0/sys/ 下に
誘導されることを確認する。
a. mcexecで確認した場合の /sys/devices/system/cpu/offline が空であることを確認
※通常、mckernelではofflineのCPUが存在しないため
□ 実行手順
・下記の手順でテストを実行する
$ cd <mckernel>
$ patch -p0 < test/issues/1463/tmp_overlay_path.patch
(build mckernel)
$ cd test/issues/1463
$ make test
McKernelのインストール先や、OSTEST, LTPの配置場所は、
$HOME/.mck_test_config を参照している
.mck_test_config は、McKernelをビルドした際に生成されるmck_test_config.sample ファイルを
$HOMEにコピーし、適宜編集する
□ 実行結果
x86_64_result.log aarch64_result.log 参照。
すべての項目をPASSしていることを確認。

View File

@ -0,0 +1,15 @@
sh ./C1463.sh
mcstop+release.sh ... done
mcreboot.sh -c 37-43,49-55 -m 2G@2,2G@3 -r 37-43:36+49-55:48 -O ... done
*** C1463T01 start *******************************
a.dir
*** C1463T01 PASSED ******************************
*** C1463T02 start *******************************
a.dir
*** C1463T02 PASSED ******************************
*** C1463T03 start *******************************
** (expected blank output)
*** C1463T03 PASSED ******************************

View File

@ -0,0 +1,17 @@
diff --git executer/user/mcexec.c executer/user/mcexec.c
index acae1f8..d220dd9 100644
--- executer/user/mcexec.c
+++ executer/user/mcexec.c
@@ -3458,6 +3458,12 @@ overlay_path(int dirfd, const char *in, char *buf, int *resolvelinks)
goto checkexist_resolvelinks;
}
+ /* for #1463's test */
+ if (!strncmp(path, "/tmp", 4) &&
+ (path[4] == '/' || path[4] == '\0')) {
+ goto checkexist_resolvelinks;
+ }
+
return in;
checkexist_resolvelinks:

View File

@ -0,0 +1,15 @@
sh ./C1463.sh
mcstop+release.sh ... done
mcreboot.sh -c 1-7,9-15,17-23,25-31 -m 10G@0,10G@1 -r 1-7:0+9-15:8+17-23:16+25-31:24 -O ... done
*** C1463T01 start *******************************
a.dir
*** C1463T01 PASSED ******************************
*** C1463T02 start *******************************
a.dir
*** C1463T02 PASSED ******************************
*** C1463T03 start *******************************
** (expected blank output)
*** C1463T03 PASSED ******************************