diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index 4d329f1e..c9de0a18 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -29,6 +29,7 @@ #include #include #include +#include #define LAPIC_ID 0x020 #define LAPIC_TIMER 0x320 @@ -63,8 +64,10 @@ #ifdef DEBUG_PRINT_CPU #define dkprintf kprintf +#define ekprintf kprintf #else #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#define ekprintf kprintf #endif static void *lapic_vp; @@ -144,6 +147,12 @@ extern char debug_exception[], int3_exception[]; uint64_t boot_pat_state = 0; int no_turbo = 0; /* May be updated by early parsing of kargs */ +extern int num_processors; /* kernel/ap.c */ +struct pvclock_vcpu_time_info *pvti = NULL; +int pvti_npages; +static long pvti_msr = -1; + + static void init_idt(void) { int i; @@ -1581,3 +1590,88 @@ void sync_tick(void) dkprintf("sync_tick():\n"); return; } + +static int is_pvclock_available(void) +{ + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + + dkprintf("is_pvclock_available()\n"); +#define KVM_CPUID_SIGNATURE 0x40000000 + asm ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a" (KVM_CPUID_SIGNATURE)); + if ((eax && (eax < 0x40000001)) + || (ebx != 0x4b4d564b) + || (ecx != 0x564b4d56) + || (edx != 0x0000004d)) { + dkprintf("is_pvclock_available(): false (not kvm)\n"); + return 0; + } + +#define KVM_CPUID_FEATURES 0x40000001 + asm ("cpuid" : "=a"(eax) + : "a"(KVM_CPUID_FEATURES) + : "%ebx", "%ecx", "%edx"); +#define KVM_FEATURE_CLOCKSOURCE2 3 + if (eax & (1 << KVM_FEATURE_CLOCKSOURCE2)) { +#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 + pvti_msr = MSR_KVM_SYSTEM_TIME_NEW; + dkprintf("is_pvclock_available(): true (new)\n"); + return 1; + } +#define KVM_FEATURE_CLOCKSOURCE 0 + else if (eax & (1 << KVM_FEATURE_CLOCKSOURCE)) { +#define MSR_KVM_SYSTEM_TIME 0x12 + pvti_msr = MSR_KVM_SYSTEM_TIME; + dkprintf("is_pvclock_available(): true (old)\n"); + return 1; + } + + dkprintf("is_pvclock_available(): false (not supported)\n"); + return 0; +} /* is_pvclock_available() */ + +int arch_setup_pvclock(void) +{ + size_t size; + int npages; + + dkprintf("arch_setup_pvclock()\n"); + if (!is_pvclock_available()) { + dkprintf("arch_setup_pvclock(): not supported\n"); + return 0; + } + + size = num_processors * sizeof(*pvti); + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + pvti_npages = npages; + + pvti = allocate_pages(npages, IHK_MC_AP_NOWAIT); + if (!pvti) { + ekprintf("arch_setup_pvclock: allocate_pages failed.\n"); + return -ENOMEM; + } + + dkprintf("arch_setup_pvclock(): ok\n"); + return 0; +} /* arch_setup_pvclock() */ + +void arch_start_pvclock(void) +{ + int cpu; + + dkprintf("arch_start_pvclock()\n"); + if (!pvti) { + dkprintf("arch_start_pvclock(): not supported\n"); + return; + } + + cpu = ihk_mc_get_processor_id(); + wrmsr(pvti_msr,(intptr_t)&pvti[cpu]); + dkprintf("arch_start_pvclock(): ok\n"); + return; +} /* arch_start_pvclock() */ + +/*** end of file ***/ diff --git a/arch/x86/kernel/include/arch/auxvec.h b/arch/x86/kernel/include/arch/auxvec.h new file mode 100644 index 00000000..e85b8c9f --- /dev/null +++ b/arch/x86/kernel/include/arch/auxvec.h @@ -0,0 +1,18 @@ +/** + * \file auxvec.h + * License details are found in the file LICENSE. + * \brief + * Declare architecture-dependent constants for auxiliary vector + * \author Gou Nakamura + * Copyright (C) 2016 RIKEN AICS + */ +/* + * HISTORY + */ + +#ifndef ARCH_AUXVEC_H +#define ARCH_AUXVEC_H + +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index 248fd9fb..fd6351ef 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include void terminate(int, int); extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact); @@ -61,6 +63,10 @@ uintptr_t debug_constants[] = { -1, }; +static struct vdso vdso; +static size_t container_size = 0; +static ptrdiff_t vdso_offset; + /* See dkprintf("BSP HW ID = %d, ", bsp_hw_id); (in ./mcos/kernel/ap.c) @@ -1498,3 +1504,280 @@ SYSCALL_DECLARE(arch_prctl) ihk_mc_syscall_arg1(ctx)); } +static int vdso_get_vdso_info(void) +{ + int error; + struct ikc_scd_packet packet; + struct ihk_ikc_channel_desc *ch = cpu_local_var(syscall_channel); + + dkprintf("vdso_get_vdso_info()\n"); + vdso.busy = 1; + vdso.vdso_npages = 0; + + packet.msg = SCD_MSG_GET_VDSO_INFO; + packet.arg = virt_to_phys(&vdso); + + error = ihk_ikc_send(ch, &packet, 0); + if (error) { + ekprintf("vdso_get_vdso_info: ihk_ikc_send failed. %d\n", error); + goto out; + } + + while (vdso.busy) { + cpu_pause(); + } + + error = 0; +out: + if (error) { + vdso.vdso_npages = 0; + } + dkprintf("vdso_get_vdso_info(): %d\n", error); + return error; +} /* vdso_get_vdso_info() */ + +static int vdso_map_global_pages(void) +{ + int error; + enum ihk_mc_pt_attribute attr; + int i; + void *virt; + intptr_t phys; + + dkprintf("vdso_map_global_pages()\n"); + if (vdso.vvar_virt && vdso.vvar_is_global) { + attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE; + error = ihk_mc_pt_set_page(NULL, vdso.vvar_virt, vdso.vvar_phys, attr); + if (error) { + ekprintf("vdso_map_global_pages: mapping vvar failed. %d\n", error); + goto out; + } + } + + if (vdso.hpet_virt && vdso.hpet_is_global) { + attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE; + error = ihk_mc_pt_set_page(NULL, vdso.hpet_virt, vdso.hpet_phys, attr); + if (error) { + ekprintf("vdso_map_global_pages: mapping hpet failed. %d\n", error); + goto out; + } + } + + if (vdso.pvti_virt && vdso.pvti_is_global) { + error = arch_setup_pvclock(); + if (error) { + ekprintf("vdso_map_global_pages: arch_setup_pvclock failed. %d\n", error); + goto out; + } + + attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE; + for (i = 0; i < pvti_npages; ++i) { + virt = vdso.pvti_virt - (i * PAGE_SIZE); + phys = virt_to_phys(pvti + (i * PAGE_SIZE)); + error = ihk_mc_pt_set_page(NULL, virt, phys, attr); + if (error) { + ekprintf("vdso_map_global_pages: mapping pvti failed. %d\n", error); + goto out; + } + } + } + + error = 0; +out: + dkprintf("vdso_map_global_pages(): %d\n", error); + return error; +} /* vdso_map_global_pages() */ + +static void vdso_calc_container_size(void) +{ + intptr_t start, end; + intptr_t s, e; + + dkprintf("vdso_calc_container_size()\n"); + start = 0; + end = vdso.vdso_npages * PAGE_SIZE; + + if (vdso.vvar_virt && !vdso.vvar_is_global) { + s = (intptr_t)vdso.vvar_virt; + e = s + PAGE_SIZE; + + if (s < start) { + start = s; + } + if (end < e) { + end = e; + } + } + if (vdso.hpet_virt && !vdso.hpet_is_global) { + s = (intptr_t)vdso.hpet_virt; + e = s + PAGE_SIZE; + + if (s < start) { + start = s; + } + if (end < e) { + end = e; + } + } + if (vdso.pvti_virt && !vdso.pvti_is_global) { + s = (intptr_t)vdso.pvti_virt; + e = s + PAGE_SIZE; + + if (s < start) { + start = s; + } + if (end < e) { + end = e; + } + } + + vdso_offset = 0; + if (start < 0) { + vdso_offset = -start; + } + + container_size = end - start; + dkprintf("vdso_calc_container_size(): %#lx %#lx\n", container_size, vdso_offset); + return; +} /* vdso_calc_container_size() */ + +int arch_setup_vdso() +{ + int error; + + dkprintf("arch_setup_vdso()\n"); + error = vdso_get_vdso_info(); + if (error) { + ekprintf("arch_setup_vdso: vdso_get_vdso_info failed. %d\n", error); + goto out; + } + + if (vdso.vdso_npages <= 0) { + error = 0; + goto out; + } + + error = vdso_map_global_pages(); + if (error) { + ekprintf("arch_setup_vdso: vdso_map_global_pages failed. %d\n", error); + goto out; + } + + vdso_calc_container_size(); + + error = 0; +out: + if (container_size > 0) { + kprintf("vdso is enabled\n"); + } + else { + kprintf("vdso is disabled\n"); + } + dkprintf("arch_setup_vdso(): %d\n", error); + return error; +} /* arch_setup_vdso() */ + +int arch_map_vdso(struct process_vm *vm) +{ + struct address_space *as = vm->address_space; + page_table_t pt = as->page_table; + void *container; + void *s; + void *e; + unsigned long vrflags; + enum ihk_mc_pt_attribute attr; + int error; + int i; + + dkprintf("arch_map_vdso()\n"); + if (container_size <= 0) { + /* vdso pages are not available */ + dkprintf("arch_map_vdso(): not available\n"); + error = 0; + goto out; + } + + container = (void *)vm->region.map_end; + vm->region.map_end += container_size; + + s = container + vdso_offset; + e = s + (vdso.vdso_npages * PAGE_SIZE); + vrflags = VR_REMOTE; + vrflags |= VR_PROT_READ | VR_PROT_EXEC; + vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); + error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e, NOPHYS, vrflags, NULL, 0, PAGE_SHIFT); + if (error) { + ekprintf("ERROR: adding memory range for vdso. %d\n", error); + goto out; + } + vm->vdso_addr = s; + + attr = PTATTR_ACTIVE | PTATTR_USER; + for (i = 0; i < vdso.vdso_npages; ++i) { + s = vm->vdso_addr + (i * PAGE_SIZE); + e = s + PAGE_SIZE; + error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.vdso_physlist[i], attr); + if (error) { + ekprintf("ihk_mc_pt_set_range failed. %d\n", error); + goto out; + } + } + + if (container_size > (vdso.vdso_npages * PAGE_SIZE)) { + if (vdso_offset) { + s = container; + e = container + vdso_offset; + } + else { + s = container + (vdso.vdso_npages * PAGE_SIZE); + e = container + container_size; + } + vrflags = VR_REMOTE; + vrflags |= VR_PROT_READ; + vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags); + error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e, NOPHYS, vrflags, NULL, 0, PAGE_SHIFT); + if (error) { + ekprintf("ERROR: adding memory range for vvar. %d\n", error); + goto out; + } + vm->vvar_addr = s; + + if (vdso.vvar_virt && !vdso.vvar_is_global) { + s = vm->vdso_addr + (intptr_t)vdso.vvar_virt; + e = s + PAGE_SIZE; + attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE; + error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.vvar_phys, attr); + if (error) { + ekprintf("ihk_mc_pt_set_range failed. %d\n", error); + goto out; + } + } + if (vdso.hpet_virt && !vdso.hpet_is_global) { + s = vm->vdso_addr + (intptr_t)vdso.hpet_virt; + e = s + PAGE_SIZE; + attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE; + error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.hpet_phys, attr); + if (error) { + ekprintf("ihk_mc_pt_set_range failed. %d\n", error); + goto out; + } + } + if (vdso.pvti_virt && !vdso.pvti_is_global) { + s = vm->vdso_addr + (intptr_t)vdso.pvti_virt; + e = s + PAGE_SIZE; + attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE; + error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.pvti_phys, attr); + if (error) { + ekprintf("ihk_mc_pt_set_range failed. %d\n", error); + goto out; + } + } + } + + error = 0; +out: + dkprintf("arch_map_vdso(): %d %p\n", error, vm->vdso_addr); + return error; +} /* arch_map_vdso() */ + +/*** End of File ***/ diff --git a/configure.ac b/configure.ac index 43647be6..78b011c6 100644 --- a/configure.ac +++ b/configure.ac @@ -214,6 +214,13 @@ AC_DEFUN([MCCTRL_FIND_KSYM],[ MCCTRL_FIND_KSYM([sys_mount]) MCCTRL_FIND_KSYM([sys_unshare]) MCCTRL_FIND_KSYM([zap_page_range]) +MCCTRL_FIND_KSYM([vdso_image_64]) +MCCTRL_FIND_KSYM([vdso_start]) +MCCTRL_FIND_KSYM([vdso_end]) +MCCTRL_FIND_KSYM([vdso_pages]) +MCCTRL_FIND_KSYM([__vvar_page]) +MCCTRL_FIND_KSYM([hpet_address]) +MCCTRL_FIND_KSYM([hv_clock]) case $ENABLE_MEMDUMP in yes|no|auto) diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 6247cc7b..6d297bdf 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -87,6 +87,8 @@ struct program_load_desc { int pgid; int cred[8]; int reloc; + char enable_vdso; + char padding[7]; unsigned long entry; unsigned long user_start; unsigned long user_end; diff --git a/executer/kernel/mcctrl/arch/x86_64/archdeps.c b/executer/kernel/mcctrl/arch/x86_64/archdeps.c index c3e90f81..68ee13b9 100644 --- a/executer/kernel/mcctrl/arch/x86_64/archdeps.c +++ b/executer/kernel/mcctrl/arch/x86_64/archdeps.c @@ -2,6 +2,58 @@ #include "../../config.h" #include "../../mcctrl.h" +#ifdef MCCTRL_KSYM_vdso_image_64 +#if MCCTRL_KSYM_vdso_image_64 +struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64; +#endif +#endif + +#ifdef MCCTRL_KSYM_vdso_start +#if MCCTRL_KSYM_vdso_start +void *vdso_start = (void *)MCCTRL_KSYM_vdso_start; +#endif +#endif + +#ifdef MCCTRL_KSYM_vdso_end +#if MCCTRL_KSYM_vdso_end +void *vdso_end = (void *)MCCTRL_KSYM_vdso_end; +#endif +#endif + +#ifdef MCCTRL_KSYM_vdso_pages +#if MCCTRL_KSYM_vdso_pages +struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages; +#endif +#endif + +#ifdef MCCTRL_KSYM___vvar_page +#if MCCTRL_KSYM___vvar_page +void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page; +#endif +#endif + +long *hpet_addressp +#ifdef MCCTRL_KSYM_hpet_address +#if MCCTRL_KSYM_hpet_address + = (void *)MCCTRL_KSYM_hpet_address; +#else + = &hpet_address; +#endif +#else + = NULL; +#endif + +void **hv_clockp +#ifdef MCCTRL_KSYM_hv_clock +#if MCCTRL_KSYM_hv_clock + = (void *)MCCTRL_KSYM_hv_clock; +#else + = &hv_clock; +#endif +#else + = NULL; +#endif + unsigned long reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end); @@ -36,3 +88,107 @@ reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsign *endp = end; return 0; } + +void get_vdso_info(ihk_os_t os, long vdso_rpa) +{ + ihk_device_t dev = ihk_os_to_dev(os); + long vdso_pa; + struct vdso *vdso; + size_t size; + int i; + + vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso)); + vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0); + + memset(vdso, 0, sizeof(*vdso)); + + /* VDSO pages */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) + size = vdso_image->size; + vdso->vdso_npages = size >> PAGE_SHIFT; + + if (vdso->vdso_npages > VDSO_MAXPAGES) { + vdso->vdso_npages = 0; + goto out; + } + + for (i = 0; i < vdso->vdso_npages; ++i) { + vdso->vdso_physlist[i] = virt_to_phys( + vdso_image->data + (i * PAGE_SIZE)); + } +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) + size = vdso_end - vdso_start; + size = (size + PAGE_SIZE - 1) & PAGE_MASK; + + vdso->vdso_npages = size >> PAGE_SHIFT; + if (vdso->vdso_npages > VDSO_MAXPAGES) { + vdso->vdso_npages = 0; + goto out; + } + + for (i = 0; i < vdso->vdso_npages; ++i) { + vdso->vdso_physlist[i] = page_to_phys(vdso_pages[i]); + } +#endif + + /* VVAR page */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) + vdso->vvar_is_global = 0; + vdso->vvar_virt = -3 * PAGE_SIZE; + vdso->vvar_phys = virt_to_phys(__vvar_page); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) + vdso->vvar_is_global = 0; + vdso->vvar_virt = -2 * PAGE_SIZE; + vdso->vvar_phys = virt_to_phys(__vvar_page); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) + vdso->vvar_is_global = 0; + vdso->vvar_virt = (void *)(vdso->vdso_npages * PAGE_SIZE); + vdso->vvar_phys = virt_to_phys(__vvar_page); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0) + vdso->vvar_is_global = 1; + vdso->vvar_virt = (void *)fix_to_virt(VVAR_PAGE); + vdso->vvar_phys = virt_to_phys(__vvar_page); +#endif + + /* HPET page */ + if (hpet_addressp && *hpet_addressp) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) + vdso->hpet_is_global = 0; + vdso->hpet_virt = -2 * PAGE_SIZE; + vdso->hpet_phys = *hpet_addressp; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0) + vdso->hpet_is_global = 0; + vdso->hpet_virt = -1 * PAGE_SIZE; + vdso->hpet_phys = *hpet_addressp; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) + vdso->hpet_is_global = 0; + vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE); + vdso->hpet_phys = *hpet_addressp; +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) + vdso->hpet_is_global = 1; + vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET); + vdso->hpet_phys = *hpet_addressp; +#endif + } + + /* struct pvlock_vcpu_time_info table */ + if (hv_clockp && *hv_clockp) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0) + vdso->pvti_is_global = 0; + vdso->pvti_virt = -1 * PAGE_SIZE; + vdso->pvti_phys = virt_to_phys(*hv_clockp); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0) + vdso->pvti_is_global = 1; + vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN); + vdso->pvti_phys = virt_to_phys(*hv_clockp); +#endif + } + +out: + wmb(); + vdso->busy = 0; + + ihk_device_unmap_virtual(dev, vdso, sizeof(*vdso)); + ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso)); + return; +} /* get_vdso_info() */ diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index fe0e0721..f53e920b 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -95,6 +95,10 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, delete_tid_entry(ihk_host_os_get_index(__os), pisp->pid, pisp->arg); break; + case SCD_MSG_GET_VDSO_INFO: + get_vdso_info(__os, pisp->arg); + break; + default: printk(KERN_ERR "mcctrl:syscall_packet_handler:" "unknown message (%d.%d.%d.%d.%d.%#lx)\n", diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index ddb6c362..4a9774ae 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -55,6 +55,7 @@ #define SCD_MSG_SYSCALL_ONESIDE 0x4 #define SCD_MSG_SEND_SIGNAL 0x8 #define SCD_MSG_CLEANUP_PROCESS 0x9 +#define SCD_MSG_GET_VDSO_INFO 0xa #define SCD_MSG_PROCFS_CREATE 0x10 #define SCD_MSG_PROCFS_DELETE 0x11 @@ -230,8 +231,6 @@ struct mcctrl_signal { int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp); int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg); int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); -int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, - unsigned long *endp); ihk_os_t osnum_to_os(int n); @@ -275,4 +274,26 @@ void procfs_exit(int osnum); /* sysfs_files.c */ void setup_sysfs_files(ihk_os_t os); +/* archdep.c */ +#define VDSO_MAXPAGES 2 +struct vdso { + long busy; + int vdso_npages; + char vvar_is_global; + char hpet_is_global; + char pvti_is_global; + char padding; + long vdso_physlist[VDSO_MAXPAGES]; + void *vvar_virt; + long vvar_phys; + void *hpet_virt; + long hpet_phys; + void *pvti_virt; + long pvti_phys; +}; + +int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, + unsigned long *endp); +void get_vdso_info(ihk_os_t os, long vdso_pa); + #endif diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 9ea72a26..a49ccf54 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -63,6 +63,7 @@ #include #include #include "../include/uprotocol.h" +#include //#define DEBUG @@ -132,6 +133,7 @@ static char *exec_path = NULL; static char *altroot; static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK"; static int ischild; +static int enable_vdso = 1; struct fork_sync { pid_t pid; @@ -1231,6 +1233,24 @@ static int rlimits[] = { char dev[64]; +static struct option mcexec_options[] = { + { + .name = "disable-vdso", + .has_arg = no_argument, + .flag = &enable_vdso, + .val = 0, + }, + { + .name = "enable-vdso", + .has_arg = no_argument, + .flag = &enable_vdso, + .val = 1, + }, + + /* end */ + { NULL, 0, NULL, 0, }, +}; + int main(int argc, char **argv) { // int fd; @@ -1282,12 +1302,15 @@ int main(int argc, char **argv) } /* Parse options ("+" denotes stop at the first non-option) */ - while ((opt = getopt(argc, argv, "+c:")) != -1) { + while ((opt = getopt_long(argc, argv, "+c:", mcexec_options, NULL)) != -1) { switch (opt) { case 'c': target_core = atoi(optarg); break; + case 0: /* long opt */ + break; + default: /* '?' */ print_usage(argv); exit(EXIT_FAILURE); @@ -1416,6 +1439,8 @@ int main(int argc, char **argv) //print_flat(args); desc->cpu = target_core; + desc->enable_vdso = enable_vdso; + p = getenv(rlimit_stack_envname); if (p) { errno = 0; diff --git a/kernel/ap.c b/kernel/ap.c index 7194873d..73a709b3 100644 --- a/kernel/ap.c +++ b/kernel/ap.c @@ -40,6 +40,7 @@ static void ap_wait(void) kmalloc_init(); sched_init(); + arch_start_pvclock(); if (find_command_line("hidos")) { init_host_syscall_channel(); diff --git a/kernel/host.c b/kernel/host.c index 0a3db796..faf63a81 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -86,6 +86,7 @@ int prepare_process_ranges_args_envs(struct thread *thread, struct process_vm *vm = proc->vm; struct address_space *as = vm->address_space; long aout_base; + int error; n = p->num_sections; @@ -324,6 +325,14 @@ int prepare_process_ranges_args_envs(struct thread *thread, dkprintf("env OK\n"); + if (pn->enable_vdso) { + error = arch_map_vdso(vm); + if (error) { + kprintf("ERROR: mapping vdso pages. %d\n", error); + goto err; + } + } + p->rprocess = (unsigned long)thread; p->rpgtable = virt_to_phys(as->page_table); diff --git a/kernel/include/auxvec.h b/kernel/include/auxvec.h index a6d60b87..ec44024a 100644 --- a/kernel/include/auxvec.h +++ b/kernel/include/auxvec.h @@ -1,6 +1,8 @@ #ifndef _LINUX_AUXVEC_H #define _LINUX_AUXVEC_H +#include + /* Symbolic values for the entries in the auxiliary table put on the initial stack */ #define AT_NULL 0 /* end of vector */ diff --git a/kernel/include/process.h b/kernel/include/process.h index 28e9dc58..7b775b83 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -292,7 +292,7 @@ struct user unsigned long int u_debugreg [8]; }; -#define AUXV_LEN 16 +#define AUXV_LEN 18 struct vm_range { struct list_head list; @@ -557,6 +557,8 @@ struct process_vm { struct process *proc; /* process that reside on the same page */ void *opt; void (*free_cb)(struct process_vm *, void *); + void *vdso_addr; + void *vvar_addr; ihk_spinlock_t page_table_lock; ihk_spinlock_t memory_range_lock; diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 4cd8f182..1756d4e5 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -38,6 +38,7 @@ #define SCD_MSG_SYSCALL_ONESIDE 0x4 #define SCD_MSG_SEND_SIGNAL 0x8 #define SCD_MSG_CLEANUP_PROCESS 0x9 +#define SCD_MSG_GET_VDSO_INFO 0xa #define SCD_MSG_PROCFS_CREATE 0x10 #define SCD_MSG_PROCFS_DELETE 0x11 @@ -176,6 +177,8 @@ struct program_load_desc { int pgid; int cred[8]; int reloc; + char enable_vdso; + char padding[7]; unsigned long entry; unsigned long user_start; unsigned long user_end; @@ -361,5 +364,25 @@ intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd, off_t off0); typedef int32_t key_t; int do_shmget(key_t key, size_t size, int shmflg); +struct process_vm; +int arch_map_vdso(struct process_vm *vm); /* arch dependent */ +int arch_setup_vdso(void); + +#define VDSO_MAXPAGES 2 +struct vdso { + long busy; + int vdso_npages; + char vvar_is_global; + char hpet_is_global; + char pvti_is_global; + char padding; + long vdso_physlist[VDSO_MAXPAGES]; + void *vvar_virt; + long vvar_phys; + void *hpet_virt; + long hpet_phys; + void *pvti_virt; + long pvti_phys; +}; #endif diff --git a/kernel/init.c b/kernel/init.c index d48f21fa..9f82ca01 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -343,6 +343,8 @@ static void post_init(void) ihk_mc_spinlock_init(&syscall_lock); } + arch_setup_vdso(); + arch_start_pvclock(); ap_start(); sysfs_init(); diff --git a/kernel/process.c b/kernel/process.c index 1bd6fa3f..fcf0ede0 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -498,23 +498,30 @@ static int copy_user_pte(void *arg0, page_table_t src_pt, pte_t *src_ptep, void goto out; } - dkprintf("copy_user_pte(): 0x%lx PTE found\n", pgaddr); - dkprintf("copy_user_pte(): page size: %d\n", pgsize); - - npages = pgsize / PAGE_SIZE; - virt = ihk_mc_alloc_aligned_pages(npages, pgalign, IHK_MC_AP_NOWAIT); - if (!virt) { - kprintf("ERROR: copy_user_pte() allocating new page\n"); - error = -ENOMEM; - goto out; + if (args->new_vrflag & VR_REMOTE) { + phys = src_phys; + attr = pte_get_attr(src_ptep, pgsize); } - phys = virt_to_phys(virt); - dkprintf("copy_user_pte(): phys page allocated\n"); + else { + dkprintf("copy_user_pte(): 0x%lx PTE found\n", pgaddr); + dkprintf("copy_user_pte(): page size: %d\n", pgsize); - memcpy(virt, src_kvirt, pgsize); - dkprintf("copy_user_pte(): memcpy OK\n"); + npages = pgsize / PAGE_SIZE; + virt = ihk_mc_alloc_aligned_pages(npages, pgalign, IHK_MC_AP_NOWAIT); + if (!virt) { + kprintf("ERROR: copy_user_pte() allocating new page\n"); + error = -ENOMEM; + goto out; + } + phys = virt_to_phys(virt); + dkprintf("copy_user_pte(): phys page allocated\n"); + + memcpy(virt, src_kvirt, pgsize); + dkprintf("copy_user_pte(): memcpy OK\n"); + + attr = arch_vrflag_to_ptattr(args->new_vrflag, PF_POPULATE, NULL); + } - attr = arch_vrflag_to_ptattr(args->new_vrflag, PF_POPULATE, NULL); error = ihk_mc_pt_set_range(args->new_vm->address_space->page_table, args->new_vm, pgaddr, pgaddr+pgsize, phys, attr); if (error) { args->fault_addr = (intptr_t)pgaddr; @@ -1815,6 +1822,11 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, p[s_ind--] = AT_CLKTCK; p[s_ind--] = at_rand; /* AT_RANDOM */ p[s_ind--] = AT_RANDOM; +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR AT_IGNORE +#endif + p[s_ind--] = (long)(thread->vm->vdso_addr); + p[s_ind--] = (thread->vm->vdso_addr)? AT_SYSINFO_EHDR: AT_IGNORE; /* Save auxiliary vector for later use. */ memcpy(proc->saved_auxv, &p[s_ind + 1], sizeof(proc->saved_auxv)); diff --git a/lib/include/ihk/cpu.h b/lib/include/ihk/cpu.h index 331f9fee..2bf02b18 100644 --- a/lib/include/ihk/cpu.h +++ b/lib/include/ihk/cpu.h @@ -111,4 +111,14 @@ void init_tick(void); void init_delay(void); void sync_tick(void); +struct pvclock_vcpu_time_info { + long contents[32/sizeof(long)]; +}; + +extern struct pvclock_vcpu_time_info *pvti; +extern int pvti_npages; + +int arch_setup_pvclock(void); +void arch_start_pvclock(void); + #endif