support vdso which borrows clocksource from linux

This commit is contained in:
NAKAMURA Gou
2016-03-28 14:42:49 +09:00
parent a587c8f5e5
commit 41bb2ab5e6
17 changed files with 689 additions and 18 deletions

View File

@ -29,6 +29,7 @@
#include <process.h>
#include <cls.h>
#include <prctl.h>
#include <page.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@ -63,8 +64,10 @@
#ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf
#define ekprintf kprintf
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf kprintf
#endif
static void *lapic_vp;
@ -144,6 +147,12 @@ extern char debug_exception[], int3_exception[];
uint64_t boot_pat_state = 0;
int no_turbo = 0; /* May be updated by early parsing of kargs */
extern int num_processors; /* kernel/ap.c */
struct pvclock_vcpu_time_info *pvti = NULL;
int pvti_npages;
static long pvti_msr = -1;
static void init_idt(void)
{
int i;
@ -1581,3 +1590,88 @@ void sync_tick(void)
dkprintf("sync_tick():\n");
return;
}
static int is_pvclock_available(void)
{
uint32_t eax;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
dkprintf("is_pvclock_available()\n");
#define KVM_CPUID_SIGNATURE 0x40000000
asm ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a" (KVM_CPUID_SIGNATURE));
if ((eax && (eax < 0x40000001))
|| (ebx != 0x4b4d564b)
|| (ecx != 0x564b4d56)
|| (edx != 0x0000004d)) {
dkprintf("is_pvclock_available(): false (not kvm)\n");
return 0;
}
#define KVM_CPUID_FEATURES 0x40000001
asm ("cpuid" : "=a"(eax)
: "a"(KVM_CPUID_FEATURES)
: "%ebx", "%ecx", "%edx");
#define KVM_FEATURE_CLOCKSOURCE2 3
if (eax & (1 << KVM_FEATURE_CLOCKSOURCE2)) {
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
pvti_msr = MSR_KVM_SYSTEM_TIME_NEW;
dkprintf("is_pvclock_available(): true (new)\n");
return 1;
}
#define KVM_FEATURE_CLOCKSOURCE 0
else if (eax & (1 << KVM_FEATURE_CLOCKSOURCE)) {
#define MSR_KVM_SYSTEM_TIME 0x12
pvti_msr = MSR_KVM_SYSTEM_TIME;
dkprintf("is_pvclock_available(): true (old)\n");
return 1;
}
dkprintf("is_pvclock_available(): false (not supported)\n");
return 0;
} /* is_pvclock_available() */
int arch_setup_pvclock(void)
{
size_t size;
int npages;
dkprintf("arch_setup_pvclock()\n");
if (!is_pvclock_available()) {
dkprintf("arch_setup_pvclock(): not supported\n");
return 0;
}
size = num_processors * sizeof(*pvti);
npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
pvti_npages = npages;
pvti = allocate_pages(npages, IHK_MC_AP_NOWAIT);
if (!pvti) {
ekprintf("arch_setup_pvclock: allocate_pages failed.\n");
return -ENOMEM;
}
dkprintf("arch_setup_pvclock(): ok\n");
return 0;
} /* arch_setup_pvclock() */
void arch_start_pvclock(void)
{
int cpu;
dkprintf("arch_start_pvclock()\n");
if (!pvti) {
dkprintf("arch_start_pvclock(): not supported\n");
return;
}
cpu = ihk_mc_get_processor_id();
wrmsr(pvti_msr,(intptr_t)&pvti[cpu]);
dkprintf("arch_start_pvclock(): ok\n");
return;
} /* arch_start_pvclock() */
/*** end of file ***/

View File

@ -0,0 +1,18 @@
/**
* \file auxvec.h
* License details are found in the file LICENSE.
* \brief
* Declare architecture-dependent constants for auxiliary vector
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* Copyright (C) 2016 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef ARCH_AUXVEC_H
#define ARCH_AUXVEC_H
#define AT_SYSINFO_EHDR 33
#endif

View File

@ -27,6 +27,8 @@
#include <mman.h>
#include <shm.h>
#include <prctl.h>
#include <ihk/ikc.h>
#include <page.h>
void terminate(int, int);
extern long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact);
@ -61,6 +63,10 @@ uintptr_t debug_constants[] = {
-1,
};
static struct vdso vdso;
static size_t container_size = 0;
static ptrdiff_t vdso_offset;
/*
See dkprintf("BSP HW ID = %d, ", bsp_hw_id); (in ./mcos/kernel/ap.c)
@ -1498,3 +1504,280 @@ SYSCALL_DECLARE(arch_prctl)
ihk_mc_syscall_arg1(ctx));
}
static int vdso_get_vdso_info(void)
{
int error;
struct ikc_scd_packet packet;
struct ihk_ikc_channel_desc *ch = cpu_local_var(syscall_channel);
dkprintf("vdso_get_vdso_info()\n");
vdso.busy = 1;
vdso.vdso_npages = 0;
packet.msg = SCD_MSG_GET_VDSO_INFO;
packet.arg = virt_to_phys(&vdso);
error = ihk_ikc_send(ch, &packet, 0);
if (error) {
ekprintf("vdso_get_vdso_info: ihk_ikc_send failed. %d\n", error);
goto out;
}
while (vdso.busy) {
cpu_pause();
}
error = 0;
out:
if (error) {
vdso.vdso_npages = 0;
}
dkprintf("vdso_get_vdso_info(): %d\n", error);
return error;
} /* vdso_get_vdso_info() */
static int vdso_map_global_pages(void)
{
int error;
enum ihk_mc_pt_attribute attr;
int i;
void *virt;
intptr_t phys;
dkprintf("vdso_map_global_pages()\n");
if (vdso.vvar_virt && vdso.vvar_is_global) {
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_page(NULL, vdso.vvar_virt, vdso.vvar_phys, attr);
if (error) {
ekprintf("vdso_map_global_pages: mapping vvar failed. %d\n", error);
goto out;
}
}
if (vdso.hpet_virt && vdso.hpet_is_global) {
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE;
error = ihk_mc_pt_set_page(NULL, vdso.hpet_virt, vdso.hpet_phys, attr);
if (error) {
ekprintf("vdso_map_global_pages: mapping hpet failed. %d\n", error);
goto out;
}
}
if (vdso.pvti_virt && vdso.pvti_is_global) {
error = arch_setup_pvclock();
if (error) {
ekprintf("vdso_map_global_pages: arch_setup_pvclock failed. %d\n", error);
goto out;
}
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
for (i = 0; i < pvti_npages; ++i) {
virt = vdso.pvti_virt - (i * PAGE_SIZE);
phys = virt_to_phys(pvti + (i * PAGE_SIZE));
error = ihk_mc_pt_set_page(NULL, virt, phys, attr);
if (error) {
ekprintf("vdso_map_global_pages: mapping pvti failed. %d\n", error);
goto out;
}
}
}
error = 0;
out:
dkprintf("vdso_map_global_pages(): %d\n", error);
return error;
} /* vdso_map_global_pages() */
static void vdso_calc_container_size(void)
{
intptr_t start, end;
intptr_t s, e;
dkprintf("vdso_calc_container_size()\n");
start = 0;
end = vdso.vdso_npages * PAGE_SIZE;
if (vdso.vvar_virt && !vdso.vvar_is_global) {
s = (intptr_t)vdso.vvar_virt;
e = s + PAGE_SIZE;
if (s < start) {
start = s;
}
if (end < e) {
end = e;
}
}
if (vdso.hpet_virt && !vdso.hpet_is_global) {
s = (intptr_t)vdso.hpet_virt;
e = s + PAGE_SIZE;
if (s < start) {
start = s;
}
if (end < e) {
end = e;
}
}
if (vdso.pvti_virt && !vdso.pvti_is_global) {
s = (intptr_t)vdso.pvti_virt;
e = s + PAGE_SIZE;
if (s < start) {
start = s;
}
if (end < e) {
end = e;
}
}
vdso_offset = 0;
if (start < 0) {
vdso_offset = -start;
}
container_size = end - start;
dkprintf("vdso_calc_container_size(): %#lx %#lx\n", container_size, vdso_offset);
return;
} /* vdso_calc_container_size() */
int arch_setup_vdso()
{
int error;
dkprintf("arch_setup_vdso()\n");
error = vdso_get_vdso_info();
if (error) {
ekprintf("arch_setup_vdso: vdso_get_vdso_info failed. %d\n", error);
goto out;
}
if (vdso.vdso_npages <= 0) {
error = 0;
goto out;
}
error = vdso_map_global_pages();
if (error) {
ekprintf("arch_setup_vdso: vdso_map_global_pages failed. %d\n", error);
goto out;
}
vdso_calc_container_size();
error = 0;
out:
if (container_size > 0) {
kprintf("vdso is enabled\n");
}
else {
kprintf("vdso is disabled\n");
}
dkprintf("arch_setup_vdso(): %d\n", error);
return error;
} /* arch_setup_vdso() */
int arch_map_vdso(struct process_vm *vm)
{
struct address_space *as = vm->address_space;
page_table_t pt = as->page_table;
void *container;
void *s;
void *e;
unsigned long vrflags;
enum ihk_mc_pt_attribute attr;
int error;
int i;
dkprintf("arch_map_vdso()\n");
if (container_size <= 0) {
/* vdso pages are not available */
dkprintf("arch_map_vdso(): not available\n");
error = 0;
goto out;
}
container = (void *)vm->region.map_end;
vm->region.map_end += container_size;
s = container + vdso_offset;
e = s + (vdso.vdso_npages * PAGE_SIZE);
vrflags = VR_REMOTE;
vrflags |= VR_PROT_READ | VR_PROT_EXEC;
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e, NOPHYS, vrflags, NULL, 0, PAGE_SHIFT);
if (error) {
ekprintf("ERROR: adding memory range for vdso. %d\n", error);
goto out;
}
vm->vdso_addr = s;
attr = PTATTR_ACTIVE | PTATTR_USER;
for (i = 0; i < vdso.vdso_npages; ++i) {
s = vm->vdso_addr + (i * PAGE_SIZE);
e = s + PAGE_SIZE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.vdso_physlist[i], attr);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
}
}
if (container_size > (vdso.vdso_npages * PAGE_SIZE)) {
if (vdso_offset) {
s = container;
e = container + vdso_offset;
}
else {
s = container + (vdso.vdso_npages * PAGE_SIZE);
e = container + container_size;
}
vrflags = VR_REMOTE;
vrflags |= VR_PROT_READ;
vrflags |= VRFLAG_PROT_TO_MAXPROT(vrflags);
error = add_process_memory_range(vm, (intptr_t)s, (intptr_t)e, NOPHYS, vrflags, NULL, 0, PAGE_SHIFT);
if (error) {
ekprintf("ERROR: adding memory range for vvar. %d\n", error);
goto out;
}
vm->vvar_addr = s;
if (vdso.vvar_virt && !vdso.vvar_is_global) {
s = vm->vdso_addr + (intptr_t)vdso.vvar_virt;
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.vvar_phys, attr);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
}
}
if (vdso.hpet_virt && !vdso.hpet_is_global) {
s = vm->vdso_addr + (intptr_t)vdso.hpet_virt;
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE | PTATTR_UNCACHABLE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.hpet_phys, attr);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
}
}
if (vdso.pvti_virt && !vdso.pvti_is_global) {
s = vm->vdso_addr + (intptr_t)vdso.pvti_virt;
e = s + PAGE_SIZE;
attr = PTATTR_ACTIVE | PTATTR_USER | PTATTR_NO_EXECUTE;
error = ihk_mc_pt_set_range(pt, vm, s, e, vdso.pvti_phys, attr);
if (error) {
ekprintf("ihk_mc_pt_set_range failed. %d\n", error);
goto out;
}
}
}
error = 0;
out:
dkprintf("arch_map_vdso(): %d %p\n", error, vm->vdso_addr);
return error;
} /* arch_map_vdso() */
/*** End of File ***/