392 lines
9.4 KiB
C
392 lines
9.4 KiB
C
/* archdeps.c COPYRIGHT FUJITSU LIMITED 2016-2018 */
|
|
#include <linux/version.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/uaccess.h>
|
|
#include <asm/vsyscall.h>
|
|
#include <asm/vgtod.h>
|
|
#include "config.h"
|
|
#include "../../mcctrl.h"
|
|
#include "../../kallsyms_compat.h"
|
|
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,8,0) && defined(CONFIG_X86_VSYSCALL_EMULATION)
|
|
#define gtod (&VVAR(vsyscall_gtod_data))
|
|
#else
|
|
#define gtod NULL
|
|
#endif
|
|
|
|
//#define SC_DEBUG
|
|
|
|
#ifdef SC_DEBUG
|
|
#define dprintk(...) printk(__VA_ARGS__)
|
|
#else
|
|
#define dprintk(...)
|
|
#endif
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
|
|
static struct vdso_image *_vdso_image_64;
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
|
|
static void *vdso_start;
|
|
static void *vdso_end;
|
|
static struct page **vdso_pages;
|
|
#endif
|
|
static void *__vvar_page_ptr;
|
|
static long *hpet_address;
|
|
static void **hv_clock;
|
|
|
|
int arch_symbols_init(void)
|
|
{
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
|
|
_vdso_image_64 = (void *) mcctrl_lookup_name("vdso_image_64");
|
|
if (WARN_ON(!_vdso_image_64))
|
|
return -EFAULT;
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
|
|
vdso_start = (void *) mcctrl_lookup_name("vdso_start");
|
|
if (WARN_ON(!vdso_start))
|
|
return -EFAULT;
|
|
|
|
vdso_end = (void *) mcctrl_lookup_name("vdso_end");
|
|
if (WARN_ON(!vdso_end))
|
|
return -EFAULT;
|
|
|
|
vdso_pages = (void *) mcctrl_lookup_name("vdso_pages");
|
|
if (WARN_ON(!vdso_pages))
|
|
return -EFAULT;
|
|
#endif
|
|
|
|
__vvar_page_ptr = (void *) &__vvar_page;
|
|
if (WARN_ON(!__vvar_page_ptr))
|
|
return -EFAULT;
|
|
|
|
hpet_address = (void *) mcctrl_lookup_name("hpet_address");
|
|
hv_clock = (void *) mcctrl_lookup_name("hv_clock");
|
|
return 0;
|
|
}
|
|
|
|
|
|
#define VDSO_MAXPAGES 2
|
|
struct vdso {
|
|
long busy;
|
|
int vdso_npages;
|
|
char vvar_is_global;
|
|
char hpet_is_global;
|
|
char pvti_is_global;
|
|
char padding;
|
|
long vdso_physlist[VDSO_MAXPAGES];
|
|
void *vvar_virt;
|
|
long vvar_phys;
|
|
void *hpet_virt;
|
|
long hpet_phys;
|
|
void *pvti_virt;
|
|
long pvti_phys;
|
|
void *vgtod_virt;
|
|
};
|
|
|
|
unsigned long
|
|
reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end);
|
|
|
|
int
|
|
reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
unsigned long start = 0L;
|
|
unsigned long end;
|
|
|
|
if (mutex_lock_killable(&usrdata->reserve_lock) < 0) {
|
|
return -1;
|
|
}
|
|
|
|
#define DESIRED_USER_END 0x800000000000
|
|
#define GAP_FOR_MCEXEC 0x008000000000UL
|
|
end = DESIRED_USER_END;
|
|
mmap_write_lock(current->mm);
|
|
vma = find_vma(current->mm, 0);
|
|
if (vma) {
|
|
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
|
|
}
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
|
mmap_write_unlock(current->mm);
|
|
#endif
|
|
start = reserve_user_space_common(usrdata, start, end);
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
|
mmap_write_unlock(current->mm);
|
|
#endif
|
|
|
|
mutex_unlock(&usrdata->reserve_lock);
|
|
|
|
if (IS_ERR_VALUE(start)) {
|
|
return start;
|
|
}
|
|
*startp = start;
|
|
*endp = end;
|
|
return 0;
|
|
}
|
|
|
|
void get_vdso_info(ihk_os_t os, long vdso_rpa)
|
|
{
|
|
ihk_device_t dev = ihk_os_to_dev(os);
|
|
long vdso_pa;
|
|
struct vdso *vdso;
|
|
size_t size;
|
|
int i;
|
|
|
|
vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso));
|
|
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
|
|
|
|
/* VDSO pages */
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
|
size = _vdso_image_64->size;
|
|
vdso->vdso_npages = size >> PAGE_SHIFT;
|
|
|
|
if (vdso->vdso_npages > VDSO_MAXPAGES) {
|
|
vdso->vdso_npages = 0;
|
|
goto out;
|
|
}
|
|
|
|
for (i = 0; i < vdso->vdso_npages; ++i) {
|
|
vdso->vdso_physlist[i] = virt_to_phys(
|
|
_vdso_image_64->data + (i * PAGE_SIZE));
|
|
}
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
|
|
size = vdso_end - vdso_start;
|
|
size = (size + PAGE_SIZE - 1) & PAGE_MASK;
|
|
|
|
vdso->vdso_npages = size >> PAGE_SHIFT;
|
|
if (vdso->vdso_npages > VDSO_MAXPAGES) {
|
|
vdso->vdso_npages = 0;
|
|
goto out;
|
|
}
|
|
|
|
for (i = 0; i < vdso->vdso_npages; ++i) {
|
|
vdso->vdso_physlist[i] = page_to_phys(vdso_pages[i]);
|
|
}
|
|
#endif
|
|
|
|
/* VVAR page */
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
|
vdso->vvar_is_global = 0;
|
|
vdso->vvar_virt = (void *)(-3 * PAGE_SIZE);
|
|
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
|
|
vdso->vvar_is_global = 0;
|
|
vdso->vvar_virt = (void *)(-2 * PAGE_SIZE);
|
|
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
|
vdso->vvar_is_global = 0;
|
|
vdso->vvar_virt = (void *)(vdso->vdso_npages * PAGE_SIZE);
|
|
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
|
|
vdso->vvar_is_global = 1;
|
|
vdso->vvar_virt = (void *)fix_to_virt(VVAR_PAGE);
|
|
vdso->vvar_phys = virt_to_phys(__vvar_page_ptr);
|
|
#endif
|
|
|
|
/* HPET page */
|
|
if (hpet_address && *hpet_address) {
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
|
vdso->hpet_is_global = 0;
|
|
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
|
|
vdso->hpet_phys = *hpet_address;
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
|
|
vdso->hpet_is_global = 0;
|
|
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
|
|
vdso->hpet_phys = *hpet_address;
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
|
vdso->hpet_is_global = 0;
|
|
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
|
|
vdso->hpet_phys = *hpet_address;
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
|
|
vdso->hpet_is_global = 1;
|
|
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
|
|
vdso->hpet_phys = *hpet_address;
|
|
#endif
|
|
}
|
|
|
|
/* struct pvlock_vcpu_time_info table */
|
|
if (hv_clock && *hv_clock) {
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
|
vdso->pvti_is_global = 0;
|
|
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
|
|
vdso->pvti_phys = virt_to_phys(*hv_clock);
|
|
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
|
|
vdso->pvti_is_global = 1;
|
|
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
|
|
vdso->pvti_phys = virt_to_phys(*hv_clock);
|
|
#endif
|
|
}
|
|
|
|
vdso->vgtod_virt = (void *)gtod;
|
|
out:
|
|
wmb();
|
|
vdso->busy = 0;
|
|
|
|
ihk_device_unmap_virtual(dev, vdso, sizeof(*vdso));
|
|
ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso));
|
|
return;
|
|
} /* get_vdso_info() */
|
|
|
|
void *
|
|
get_user_sp(void)
|
|
{
|
|
unsigned long usp;
|
|
|
|
asm volatile("movq %%gs:0xaf80, %0" : "=r" (usp));
|
|
return (void *)usp;
|
|
}
|
|
|
|
void
|
|
set_user_sp(void *usp)
|
|
{
|
|
asm volatile("movq %0, %%gs:0xaf80" :: "r" (usp));
|
|
}
|
|
|
|
struct trans_uctx {
|
|
volatile int cond;
|
|
int fregsize;
|
|
|
|
unsigned long rax;
|
|
unsigned long rbx;
|
|
unsigned long rcx;
|
|
unsigned long rdx;
|
|
unsigned long rsi;
|
|
unsigned long rdi;
|
|
unsigned long rbp;
|
|
unsigned long r8;
|
|
unsigned long r9;
|
|
unsigned long r10;
|
|
unsigned long r11;
|
|
unsigned long r12;
|
|
unsigned long r13;
|
|
unsigned long r14;
|
|
unsigned long r15;
|
|
unsigned long rflags;
|
|
unsigned long rip;
|
|
unsigned long rsp;
|
|
unsigned long fs;
|
|
};
|
|
|
|
void
|
|
restore_tls(unsigned long addr)
|
|
{
|
|
wrmsrl(MSR_FS_BASE, addr);
|
|
}
|
|
|
|
void
|
|
save_tls_ctx(void __user *ctx)
|
|
{
|
|
struct trans_uctx __user *tctx = ctx;
|
|
struct trans_uctx kctx;
|
|
|
|
if (copy_from_user(&kctx, tctx, sizeof(struct trans_uctx))) {
|
|
pr_err("%s: copy_from_user failed.\n", __func__);
|
|
return;
|
|
}
|
|
rdmsrl(MSR_FS_BASE, kctx.fs);
|
|
}
|
|
|
|
unsigned long
|
|
get_tls_ctx(void __user *ctx)
|
|
{
|
|
struct trans_uctx __user *tctx = ctx;
|
|
struct trans_uctx kctx;
|
|
|
|
if (copy_from_user(&kctx, tctx, sizeof(struct trans_uctx))) {
|
|
pr_err("%s: copy_from_user failed.\n", __func__);
|
|
return 0;
|
|
}
|
|
return kctx.fs;
|
|
}
|
|
|
|
unsigned long
|
|
get_rsp_ctx(void *ctx)
|
|
{
|
|
struct trans_uctx *tctx = ctx;
|
|
|
|
return tctx->rsp;
|
|
}
|
|
|
|
int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
|
|
unsigned long *rpap, unsigned long *pgsizep)
|
|
{
|
|
unsigned long rpa;
|
|
int offsh;
|
|
int i;
|
|
int ix;
|
|
unsigned long phys;
|
|
unsigned long *pt;
|
|
int error;
|
|
unsigned long pgsize;
|
|
|
|
rpa = rpt;
|
|
offsh = 39;
|
|
pgsize = 0;
|
|
/* i = 0: PML4, 1: PDPT, 2: PDT, 3: PT */
|
|
for (i = 0; i < 4; ++i) {
|
|
ix = (rva >> offsh) & 0x1FF;
|
|
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpa, PAGE_SIZE);
|
|
pt = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
|
|
dprintk("rpa %#lx offsh %d ix %#x phys %#lx pt %p pt[ix] %#lx\n",
|
|
rpa, offsh, ix, phys, pt, pt[ix]);
|
|
|
|
#define PTE_P 0x001
|
|
if (!(pt[ix] & PTE_P)) {
|
|
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
|
|
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
|
|
error = -EFAULT;
|
|
dprintk("Remote PTE is not present for 0x%lx (rpt: %lx) ?\n", rva, rpt);
|
|
goto out;
|
|
}
|
|
|
|
#define PTE_PS 0x080
|
|
if (pt[ix] & PTE_PS) {
|
|
pgsize = 1UL << offsh;
|
|
rpa = pt[ix] & ((1UL << 52) - 1) & ~(pgsize - 1);
|
|
rpa |= rva & (pgsize - 1);
|
|
|
|
/* For GB pages, just report regular 2MB page */
|
|
if (offsh == 30) {
|
|
pgsize = 1UL << 21;
|
|
dprintk("%s: GB page translated 0x%lx -> 0x%lx, pgsize: %lu\n",
|
|
__FUNCTION__, rva, rpa, pgsize);
|
|
}
|
|
|
|
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
|
|
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
|
|
error = 0;
|
|
goto found;
|
|
}
|
|
|
|
rpa = pt[ix] & ((1UL << 52) - 1) & ~((1UL << 12) - 1);
|
|
offsh -= 9;
|
|
ihk_device_unmap_virtual(ihk_os_to_dev(os), pt, PAGE_SIZE);
|
|
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
|
|
}
|
|
pgsize = 1UL << 12;
|
|
rpa |= rva & (pgsize - 1);
|
|
|
|
found:
|
|
error = 0;
|
|
*rpap = rpa;
|
|
*pgsizep = pgsize;
|
|
|
|
out:
|
|
dprintk("translate_rva_to_rpa: %d rva %#lx --> rpa %#lx (%lx)\n",
|
|
error, rva, rpa, pgsize);
|
|
return error;
|
|
}
|
|
|
|
#define PFN_WRITE_COMBINED _PAGE_PWT
|
|
static inline bool pte_is_write_combined(pte_t pte)
|
|
{
|
|
return ((pte_flags(pte) & _PAGE_PWT) && !(pte_flags(pte) & _PAGE_PCD));
|
|
}
|
|
|
|
/*
|
|
* The assembler switch_ctx is save/load registers in the context.
|
|
* Do TLS save/load and register host_thread with ioctl.
|
|
*/
|
|
long arch_switch_ctx(struct uti_switch_ctx_desc *desc)
|
|
{
|
|
return 0;
|
|
}
|