Files
mckernel/executer/kernel/mcctrl/arch/x86_64/archdeps.c

392 lines
9.4 KiB
C

/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016-2018 */
#include <linux/version.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include "config.h"
#include "../../mcctrl.h"
#include "../../kallsyms_compat.h"
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0) && defined(CONFIG_X86_VSYSCALL_EMULATION)
#define gtod (&VVAR(vsyscall_gtod_data))
#else
#define gtod NULL
#endif
//#define SC_DEBUG
#ifdef SC_DEBUG
#define dprintk(...) printk(__VA_ARGS__)
#else
#define dprintk(...)
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
static struct vdso_image *_vdso_image_64;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
static void *vdso_start;
static void *vdso_end;
static struct page **vdso_pages;
#endif
static void *__vvar_page_ptr;
static long *hpet_address;
static void **hv_clock;
int arch_symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
_vdso_image_64 = (void *) mcctrl_lookup_name("vdso_image_64");
if (WARN_ON(!_vdso_image_64))
return -EFAULT;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
vdso_start = (void *) mcctrl_lookup_name("vdso_start");
if (WARN_ON(!vdso_start))
return -EFAULT;
vdso_end = (void *) mcctrl_lookup_name("vdso_end");
if (WARN_ON(!vdso_end))
return -EFAULT;
vdso_pages = (void *) mcctrl_lookup_name("vdso_pages");
if (WARN_ON(!vdso_pages))
return -EFAULT;
#endif
__vvar_page_ptr = (void *) &__vvar_page;
if (WARN_ON(!__vvar_page_ptr))
return -EFAULT;
hpet_address = (void *) mcctrl_lookup_name("hpet_address");
hv_clock = (void *) mcctrl_lookup_name("hv_clock");
return 0;
}
#define VDSO_MAXPAGES 2
struct vdso {
long busy;
int vdso_npages;
char vvar_is_global;
char hpet_is_global;
char pvti_is_global;
char padding;
long vdso_physlist[VDSO_MAXPAGES];
void *vvar_virt;
long vvar_phys;
void *hpet_virt;
long hpet_phys;
void *pvti_virt;
long pvti_phys;
void *vgtod_virt;
};
unsigned long
reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end);
int
reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp)
{
struct vm_area_struct *vma;
unsigned long start = 0L;
unsigned long end;
if (mutex_lock_killable(&usrdata->reserve_lock) < 0) {
return -1;
}
#define DESIRED_USER_END 0x800000000000
#define GAP_FOR_MCEXEC 0x008000000000UL
end = DESIRED_USER_END;
mmap_write_lock(current->mm);
vma = find_vma(current->mm, 0);
if (vma) {
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
mmap_write_unlock(current->mm);
#endif
start = reserve_user_space_common(usrdata, start, end);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
mmap_write_unlock(current->mm);
#endif
mutex_unlock(&usrdata->reserve_lock);
if (IS_ERR_VALUE(start)) {
return start;
}
*startp = start;
*endp = end;
return 0;
}
void get_vdso_info(ihk_os_t os, long vdso_rpa)
{
ihk_device_t dev = ihk_os_to_dev(os);
long vdso_pa;
struct vdso *vdso;
size_t size;
int i;
vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso));
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
/* VDSO pages */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
size = _vdso_image_64->size;
vdso->vdso_npages = size >> PAGE_SHIFT;
if (vdso->vdso_npages > VDSO_MAXPAGES) {
vdso->vdso_npages = 0;
goto out;
}
for (i = 0; i < vdso->vdso_npages; ++i) {
vdso->vdso_physlist[i] = virt_to_phys(
_vdso_image_64->data + (i * PAGE_SIZE));
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
size = vdso_end - vdso_start;
size = (size + PAGE_SIZE - 1) & PAGE_MASK;
vdso->vdso_npages = size >> PAGE_SHIFT;
if (vdso->vdso_npages > VDSO_MAXPAGES) {
vdso->vdso_npages = 0;
goto out;
}
for (i = 0; i < vdso->vdso_npages; ++i) {
vdso->vdso_physlist[i] = page_to_phys(vdso_pages[i]);
}
#endif
/* VVAR page */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->vvar_is_global = 0;
vdso->vvar_virt = (void *)(-3 * PAGE_SIZE);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
vdso->vvar_is_global = 0;
vdso->vvar_virt = (void *)(-2 * PAGE_SIZE);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
vdso->vvar_is_global = 0;
vdso->vvar_virt = (void *)(vdso->vdso_npages * PAGE_SIZE);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
vdso->vvar_is_global = 1;
vdso->vvar_virt = (void *)fix_to_virt(VVAR_PAGE);
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
#endif
/* HPET page */
if (hpet_address && *hpet_address) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
vdso->hpet_is_global = 0;
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
vdso->hpet_phys = *hpet_address;
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
vdso->hpet_is_global = 1;
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
vdso->hpet_phys = *hpet_address;
#endif
}
/* struct pvlock_vcpu_time_info table */
if (hv_clock && *hv_clock) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
vdso->pvti_is_global = 0;
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
vdso->pvti_phys = virt_to_phys(*hv_clock);
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
vdso->pvti_is_global = 1;
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
vdso->pvti_phys = virt_to_phys(*hv_clock);
#endif
}
vdso->vgtod_virt = (void *)gtod;
out:
wmb();
vdso->busy = 0;
ihk_device_unmap_virtual(dev, vdso, sizeof(*vdso));
ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso));
return;
} /* get_vdso_info() */
void *
get_user_sp(void)
{
unsigned long usp;
asm volatile("movq %%gs:0xaf80, %0" : "=r" (usp));
return (void *)usp;
}
void
set_user_sp(void *usp)
{
asm volatile("movq %0, %%gs:0xaf80" :: "r" (usp));
}
struct trans_uctx {
volatile int cond;
int fregsize;
unsigned long rax;
unsigned long rbx;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
unsigned long rbp;
unsigned long r8;
unsigned long r9;
unsigned long r10;
unsigned long r11;
unsigned long r12;
unsigned long r13;
unsigned long r14;
unsigned long r15;
unsigned long rflags;
unsigned long rip;
unsigned long rsp;
unsigned long fs;
};
void
restore_tls(unsigned long addr)
{
wrmsrl(MSR_FS_BASE, addr);
}
void
save_tls_ctx(void __user *ctx)
{
struct trans_uctx __user *tctx = ctx;
struct trans_uctx kctx;
if (copy_from_user(&kctx, tctx, sizeof(struct trans_uctx))) {
pr_err("%s: copy_from_user failed.\n", __func__);
return;
}
rdmsrl(MSR_FS_BASE, kctx.fs);
}
unsigned long
get_tls_ctx(void __user *ctx)
{
struct trans_uctx __user *tctx = ctx;
struct trans_uctx kctx;
if (copy_from_user(&kctx, tctx, sizeof(struct trans_uctx))) {
pr_err("%s: copy_from_user failed.\n", __func__);
return 0;
}
return kctx.fs;
}
unsigned long
get_rsp_ctx(void *ctx)
{
struct trans_uctx *tctx = ctx;
return tctx->rsp;
}
int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
unsigned long *rpap, unsigned long *pgsizep)
{
unsigned long rpa;
int offsh;
int i;
int ix;
unsigned long phys;
unsigned long *pt;
int error;
unsigned long pgsize;
rpa = rpt;
offsh = 39;
pgsize = 0;
/* i = 0: PML4, 1: PDPT, 2: PDT, 3: PT */
for (i = 0; i < 4; ++i) {
ix = (rva >> offsh) & 0x1FF;
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpa, PAGE_SIZE);
pt = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
dprintk("rpa %#lx offsh %d ix %#x phys %#lx pt %p pt[ix] %#lx\n",
rpa, offsh, ix, phys, pt, pt[ix]);
#define PTE_P 0x001
if (!(pt[ix] & PTE_P)) {
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
error = -EFAULT;
dprintk("Remote PTE is not present for 0x%lx (rpt: %lx) ?\n", rva, rpt);
goto out;
}
#define PTE_PS 0x080
if (pt[ix] & PTE_PS) {
pgsize = 1UL << offsh;
rpa = pt[ix] & ((1UL << 52) - 1) & ~(pgsize - 1);
rpa |= rva & (pgsize - 1);
/* For GB pages, just report regular 2MB page */
if (offsh == 30) {
pgsize = 1UL << 21;
dprintk("%s: GB page translated 0x%lx -> 0x%lx, pgsize: %lu\n",
__FUNCTION__, rva, rpa, pgsize);
}
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
error = 0;
goto found;
}
rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << 12) - 1);
offsh -= 9;
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
}
pgsize = 1UL << 12;
rpa |= rva & (pgsize - 1);
found:
error = 0;
*rpap = rpa;
*pgsizep = pgsize;
out:
dprintk("translate_rva_to_rpa: %d rva %#lx --> rpa %#lx (%lx)\n",
error, rva, rpa, pgsize);
return error;
}
#define PFN_WRITE_COMBINED _PAGE_PWT
static inline bool pte_is_write_combined(pte_t pte)
{
return ((pte_flags(pte) & _PAGE_PWT) && !(pte_flags(pte) & _PAGE_PCD));
}
/*
* The assembler switch_ctx is save/load registers in the context.
* Do TLS save/load and register host_thread with ioctl.
*/
long arch_switch_ctx(struct uti_switch_ctx_desc *desc)
{
return 0;
}