mcctrl: lookup unexported symbols at runtime

Instead of parsing System.map, use kallsyms_lookup_name() to
get unexported symbols addresses at module loading time.

This lets mckernel work with kaslr enabled (it gets enabled by
default from el7.5 onwards)

Change-Id: Ie4349fc1145ebce44f37f1f40c16f9d75584074d
This commit is contained in:
Dominique Martinet
2018-08-03 13:12:08 +09:00
committed by Masamichi Takagi
parent 794684985f
commit e8f8660b73
10 changed files with 151 additions and 672 deletions

View File

@ -1,6 +1,7 @@
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <linux/version.h>
#include <linux/mm_types.h>
#include <linux/kallsyms.h>
#include <asm/vdso.h>
#include "../../../config.h"
#include "../../mcctrl.h"
@ -17,29 +18,31 @@
#define D(fmt, ...) printk("%s(%d) " fmt, __func__, __LINE__, ##__VA_ARGS__)
#ifdef MCCTRL_KSYM_vdso_start
# if MCCTRL_KSYM_vdso_start
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start;
# endif
#else
# error missing address of vdso_start.
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
void *vdso_start;
void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
#endif
#ifdef MCCTRL_KSYM_vdso_end
# if MCCTRL_KSYM_vdso_end
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end;
# endif
#else
# error missing address of vdso_end.
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_spec = (void *) kallsyms_lookup_name("vdso_spec");
if (WARN_ON(!vdso_spec))
return -EFAULT;
#endif
#ifdef MCCTRL_KSYM_vdso_spec
# if MCCTRL_KSYM_vdso_spec
static struct vm_special_mapping (*vdso_spec)[2] = (void*)MCCTRL_KSYM_vdso_spec;
# endif
#else
# error missing address of vdso_spec.
#endif
return 0;
}
#ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 1

View File

@ -1,5 +1,6 @@
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016 */
#include <linux/version.h>
#include <linux/kallsyms.h>
#include "../../../config.h"
#include "../../mcctrl.h"
@ -13,57 +14,46 @@
#endif
#endif /* POSTK_DEBUG_ARCH_DEP_83 */
#ifdef MCCTRL_KSYM_vdso_image_64
#if MCCTRL_KSYM_vdso_image_64
struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
static struct vdso_image *vdso_image_64;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
static void *vdso_start;
static void *vdso_end;
static struct page **vdso_pages;
#endif
static void *__vvar_page;
static long *hpet_address;
static void **hv_clock;
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
vdso_image_64 = (void *) kallsyms_lookup_name("vdso_image_64");
if (WARN_ON(!vdso_image_64))
return -EFAULT;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
vdso_start = (void *) kallsyms_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) kallsyms_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_pages = (void *) kallsyms_lookup_name("vdso_pages");
if (WARN_ON(!vdso_pages))
return -EFAULT;
#endif
#ifdef MCCTRL_KSYM_vdso_start
#if MCCTRL_KSYM_vdso_start
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start;
#endif
#endif
__vvar_page = (void *) kallsyms_lookup_name("__vvar_page");
if (WARN_ON(!__vvar_page))
return -EFAULT;
#ifdef MCCTRL_KSYM_vdso_end
#if MCCTRL_KSYM_vdso_end
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end;
#endif
#endif
hpet_address = (void *) kallsyms_lookup_name("hpet_address");
hv_clock = (void *) kallsyms_lookup_name("hv_clock");
return 0;
}
#ifdef MCCTRL_KSYM_vdso_pages
#if MCCTRL_KSYM_vdso_pages
struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages;
#endif
#endif
#ifdef MCCTRL_KSYM___vvar_page
#if MCCTRL_KSYM___vvar_page
void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page;
#endif
#endif
long *hpet_addressp
#ifdef MCCTRL_KSYM_hpet_address
#if MCCTRL_KSYM_hpet_address
= (void *)MCCTRL_KSYM_hpet_address;
#else
= &hpet_address;
#endif
#else
= NULL;
#endif
void **hv_clockp
#ifdef MCCTRL_KSYM_hv_clock
#if MCCTRL_KSYM_hv_clock
= (void *)MCCTRL_KSYM_hv_clock;
#else
= &hv_clock;
#endif
#else
= NULL;
#endif
#ifdef POSTK_DEBUG_ARCH_DEP_52
#define VDSO_MAXPAGES 2
@ -138,7 +128,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
/* VDSO pages */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
size = vdso_image->size;
size = vdso_image_64->size;
vdso->vdso_npages = size >> PAGE_SHIFT;
if (vdso->vdso_npages > VDSO_MAXPAGES) {
@ -148,7 +138,7 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
for (i = 0; i < vdso->vdso_npages; ++i) {
vdso->vdso_physlist[i] = virt_to_phys(
vdso_image->data + (i * PAGE_SIZE));
vdso_image_64->data + (i * PAGE_SIZE));
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
size = vdso_end - vdso_start;
@ -185,36 +175,36 @@ void get_vdso_info(ihk_os_t os, long vdso_rpa)
#endif
/* HPET page */
if (hpet_addressp && *hpet_addressp) {
if (hpet_address && *hpet_address) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
vdso->hpet_is_global = 1;
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
vdso->hpet_phys = *hpet_addressp;
vdso->hpet_phys = *hpet_address;
#endif
}
/* struct pvlock_vcpu_time_info table */
if (hv_clockp && *hv_clockp) {
if (hv_clock && *hv_clock) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->pvti_is_global = 0;
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
vdso->pvti_phys = virt_to_phys(*hv_clockp);
vdso->pvti_phys = virt_to_phys(*hv_clock);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
vdso->pvti_is_global = 1;
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
vdso->pvti_phys = virt_to_phys(*hv_clockp);
vdso->pvti_phys = virt_to_phys(*hv_clock);
#endif
}

View File

@ -39,7 +39,6 @@
#include <asm/uaccess.h>
#include <asm/delay.h>
#include <asm/io.h>
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <trace/events/sched.h>
#include <config.h>
@ -56,39 +55,6 @@
#define dprintk(...)
#endif
#ifdef MCCTRL_KSYM_sys_unshare
#if MCCTRL_KSYM_sys_unshare
typedef int (*int_star_fn_ulong_t)(unsigned long);
int (*mcctrl_sys_unshare)(unsigned long unshare_flags) =
(int_star_fn_ulong_t)
MCCTRL_KSYM_sys_unshare;
#else // exported
int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = NULL;
#endif
#endif
#ifdef MCCTRL_KSYM_sys_mount
#if MCCTRL_KSYM_sys_mount
typedef int (*int_star_fn_char_char_char_ulong_void_t)(char *, char *, char *, unsigned long, void *);
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) =
(int_star_fn_char_char_char_ulong_void_t)
MCCTRL_KSYM_sys_mount;
#else // exported
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = sys_mount;
#endif
#endif
#ifdef MCCTRL_KSYM_sys_umount
#if MCCTRL_KSYM_sys_umount
typedef int (*int_fn_char_star_int_t)(char *, int);
int (*mcctrl_sys_umount)(char *dir_name, int flags) =
(int_fn_char_star_int_t)
MCCTRL_KSYM_sys_umount;
#else // exported
int (*mcctrl_sys_umount)(char *dir_name, int flags) = sys_umount;
#endif
#endif
//extern struct mcctrl_channel *channels;
int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu);
int syscall_backward(struct mcctrl_usrdata *, int, unsigned long, unsigned long,
@ -1925,12 +1891,8 @@ long mcexec_sys_mount(struct sys_mount_desc *__user arg)
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
original = override_creds(promoted);
#ifdef MCCTRL_KSYM_sys_mount
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
desc.flags, desc.data);
#else
ret = -EFAULT;
#endif
revert_creds(original);
put_cred(promoted);
@ -1956,11 +1918,7 @@ long mcexec_sys_umount(struct sys_mount_desc *__user arg)
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
original = override_creds(promoted);
#ifdef MCCTRL_KSYM_sys_umount
ret = mcctrl_sys_umount(desc.dir_name, MNT_FORCE);
#else
ret = -EFAULT;
#endif
revert_creds(original);
put_cred(promoted);
@ -1986,11 +1944,7 @@ long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
original = override_creds(promoted);
#if MCCTRL_KSYM_sys_unshare
ret = mcctrl_sys_unshare(desc.unshare_flags);
#else
ret = -EFAULT;
#endif
revert_creds(original);
put_cred(promoted);
@ -2510,9 +2464,6 @@ cache_topo_search(struct ihk_cpu_topology *cpu_topo, int level)
return NULL;
}
static long (*setaffinity)(pid_t pid, const struct cpumask *in_mask);
static int (*setscheduler_nocheck)(struct task_struct *p, int policy,
const struct sched_param *param);
static unsigned int *uti_rr;
static int max_cpu;
@ -2526,20 +2477,6 @@ uti_attr_init(void)
if (uti_rr)
return 0;
if (!setaffinity) {
setaffinity = (long (*)(pid_t, const struct cpumask *))
kallsyms_lookup_name("sched_setaffinity");
if (!setaffinity)
return -ENOSYS;
}
if (!setscheduler_nocheck) {
setscheduler_nocheck = (int (*)(struct task_struct *, int,
const struct sched_param *))
kallsyms_lookup_name("sched_setscheduler_nocheck");
if (!setscheduler_nocheck)
return -ENOSYS;
}
for_each_possible_cpu(i) {
max_cpu = i;
}
@ -2757,29 +2694,29 @@ mcexec_uti_attr(ihk_os_t os, struct uti_attr_desc __user *arg)
else if (kattr->attr.flags & UTI_FLAG_EXCLUSIVE_CPU) {
struct sched_param sp;
setaffinity(0, uti_cpu_select(cpuset));
mcctrl_sched_setaffinity(0, uti_cpu_select(cpuset));
sp.sched_priority = 1;
setscheduler_nocheck(current, SCHED_FIFO, &sp);
mcctrl_sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
rc = 1;
}
else if (kattr->attr.flags & UTI_FLAG_CPU_INTENSIVE) {
setaffinity(0, uti_cpu_select(cpuset));
mcctrl_sched_setaffinity(0, uti_cpu_select(cpuset));
rc = 1;
}
else if (kattr->attr.flags & UTI_FLAG_HIGH_PRIORITY) {
struct sched_param sp;
setaffinity(0, uti_cpu_select(cpuset));
mcctrl_sched_setaffinity(0, uti_cpu_select(cpuset));
sp.sched_priority = 1;
setscheduler_nocheck(current, SCHED_FIFO, &sp);
mcctrl_sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
rc = 1;
}
else if (kattr->attr.flags & UTI_FLAG_NON_COOPERATIVE) {
setaffinity(0, uti_cpu_select(cpuset));
mcctrl_sched_setaffinity(0, uti_cpu_select(cpuset));
rc = 1;
}
else {
setaffinity(0, cpuset);
mcctrl_sched_setaffinity(0, cpuset);
}
kfree(cpuset);

View File

@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/delay.h>
#include <linux/kallsyms.h>
#include "mcctrl.h"
#include <ihk/ihk_host_user.h>
@ -214,6 +215,60 @@ static struct ihk_os_notifier mcctrl_os_notifier = {
.ops = &mcctrl_os_notifier_ops,
};
int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
int (*mcctrl_sys_umount)(char *dir_name, int flags);
int (*mcctrl_sys_unshare)(unsigned long unshare_flags);
long (*mcctrl_sched_setaffinity)(pid_t pid, const struct cpumask *in_mask);
int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p, int policy,
const struct sched_param *param);
ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz);
void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start,
unsigned long size,
struct zap_details *details);
static int symbols_init(void)
{
mcctrl_sys_mount = (void *) kallsyms_lookup_name("sys_mount");
if (WARN_ON(!mcctrl_sys_mount))
return -EFAULT;
mcctrl_sys_umount = (void *) kallsyms_lookup_name("sys_umount");
if (WARN_ON(!mcctrl_sys_umount))
return -EFAULT;
mcctrl_sys_unshare = (void *) kallsyms_lookup_name("sys_unshare");
if (WARN_ON(!mcctrl_sys_unshare))
return -EFAULT;
mcctrl_sched_setaffinity =
(void *) kallsyms_lookup_name("sched_setaffinity");
if (WARN_ON(!mcctrl_sched_setaffinity))
return -EFAULT;
mcctrl_sched_setscheduler_nocheck =
(void *) kallsyms_lookup_name("sched_setscheduler_nocheck");
if (WARN_ON(!mcctrl_sched_setscheduler_nocheck))
return -EFAULT;
mcctrl_sys_readlink =
(void *) kallsyms_lookup_name("sys_readlink");
if (WARN_ON(!mcctrl_sys_readlink))
return -EFAULT;
mcctrl_zap_page_range =
(void *) kallsyms_lookup_name("zap_page_range");
if (WARN_ON(!mcctrl_zap_page_range))
return -EFAULT;
return arch_symbols_init();
}
static int __init mcctrl_init(void)
{
int ret = 0;
@ -231,6 +286,9 @@ static int __init mcctrl_init(void)
binfmt_mcexec_init();
if ((ret = symbols_init()))
goto error;
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
printk("mcctrl: error: registering OS notifier\n");
goto error;

View File

@ -400,6 +400,23 @@ int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
ihk_os_t osnum_to_os(int n);
/* look up symbols, plus arch-specific ones */
extern int (*mcctrl_sys_mount)(char *dev_name, char *dir_name, char *type,
unsigned long flags, void *data);
extern int (*mcctrl_sys_umount)(char *dir_name, int flags);
extern int (*mcctrl_sys_unshare)(unsigned long unshare_flags);
extern long (*mcctrl_sched_setaffinity)(pid_t pid,
const struct cpumask *in_mask);
extern int (*mcctrl_sched_setscheduler_nocheck)(struct task_struct *p,
int policy,
const struct sched_param *param);
extern ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz);
extern void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
unsigned long start,
unsigned long size,
struct zap_details *details);
/* syscall.c */
void pager_add_process(void);
void pager_remove_process(struct mcctrl_per_proc_data *ppd);
@ -504,6 +521,7 @@ struct vdso {
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
unsigned long *endp);
void get_vdso_info(ihk_os_t os, long vdso_pa);
int arch_symbols_init(void);
struct get_cpu_mapping_req {
int busy; /* INOUT: */

View File

@ -63,16 +63,6 @@
#define dprintk(...)
#endif
#ifdef MCCTRL_KSYM_zap_page_range
static void
(*mcctrl_zap_page_range)(struct vm_area_struct *vma, unsigned long start,
unsigned long size, struct zap_details *details)
#if MCCTRL_KSYM_zap_page_range
= (void *)MCCTRL_KSYM_zap_page_range;
#else
= &zap_page_range;
#endif
#endif
static long pager_call(ihk_os_t os, struct syscall_request *req);

View File

@ -892,16 +892,6 @@ out:
return error;
} /* read_long() */
#ifdef MCCTRL_KSYM_sys_readlink
static ssize_t (*mcctrl_sys_readlink)(const char *path, char *buf,
size_t bufsiz)
#if MCCTRL_KSYM_sys_readlink
= (void *)MCCTRL_KSYM_sys_readlink;
#else
= &sys_readlink;
#endif
#endif
static int read_link(char *buf, size_t bufsize, char *fmt, ...)
{
int error;