gettimeofday(): an implementation based on CPU invariant TSC support

This commit is contained in:
Balazs Gerofi
2015-08-24 23:41:31 +02:00
parent b8f166e608
commit 9ae5bcf46e
8 changed files with 193 additions and 33 deletions

View File

@ -67,6 +67,7 @@ void assign_processor_id(void);
void arch_delay(int);
void x86_set_warm_reset(unsigned long ip, char *first_page_va);
void x86_init_perfctr(void);
int gettime_local_support = 0;
extern int kprintf(const char *format, ...);
@ -569,6 +570,29 @@ static void check_no_execute(void)
return;
}
void init_gettime_support(void)
{
uint64_t op;
uint64_t eax;
uint64_t ebx;
uint64_t ecx;
uint64_t edx;
/* Check if Invariant TSC supported.
* Processors support for invariant TSC is indicated by
* CPUID.80000007H:EDX[8].
* See page 2498 of the Intel64 and IA-32 Architectures Software
* Developers Manual - combined */
op = 0x80000007;
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (op));
if (edx & (1 << 8)) {
gettime_local_support = 1;
kprintf("Invariant TSC supported.\n");
}
}
void init_cpu(void)
{
enable_page_protection_fault();
@ -595,6 +619,8 @@ void setup_x86(void)
init_cpu();
init_gettime_support();
kprintf("setup_x86 done.\n");
}
@ -1316,3 +1342,9 @@ ihk_mc_user_context_t *lookup_user_context(struct process *proc)
return uctx;
} /* lookup_user_context() */
void zero_tsc(void)
{
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
}

View File

@ -50,7 +50,7 @@ SYSCALL_HANDLED(29, shmget)
SYSCALL_HANDLED(30, shmat)
SYSCALL_HANDLED(31, shmctl)
SYSCALL_HANDLED(34, pause)
SYSCALL_DELEGATED(35, nanosleep)
SYSCALL_HANDLED(35, nanosleep)
SYSCALL_HANDLED(39, getpid)
SYSCALL_HANDLED(56, clone)
SYSCALL_DELEGATED(57, fork)
@ -67,7 +67,7 @@ SYSCALL_DELEGATED(70, msgrcv)
SYSCALL_DELEGATED(72, fcntl)
SYSCALL_DELEGATED(79, getcwd)
SYSCALL_DELEGATED(89, readlink)
SYSCALL_DELEGATED(96, gettimeofday)
SYSCALL_HANDLED(96, gettimeofday)
SYSCALL_HANDLED(97, getrlimit)
SYSCALL_HANDLED(101, ptrace)
SYSCALL_HANDLED(102, getuid)

View File

@ -24,18 +24,21 @@
#include <process.h>
#include <init.h>
#include <march.h>
#include <cls.h>
int num_processors = 1;
static volatile int ap_stop = 1;
extern void zero_tsc(void);
static void ap_wait(void)
{
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
while (ap_stop) {
barrier();
cpu_pause();
}
zero_tsc();
kmalloc_init();
sched_init();
@ -64,8 +67,6 @@ void ap_init(void)
ihk_mc_init_ap();
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
cpu_info = ihk_mc_get_cpu_info();
bsp_hw_id = ihk_mc_get_hardware_processor_id();

View File

@ -71,6 +71,11 @@ struct cpu_local_var {
int in_interrupt;
int no_preempt;
int timer_enabled;
unsigned long tv_sec;
unsigned long tv_nsec;
unsigned long last_tsc;
} __attribute__((aligned(64)));

View File

@ -19,6 +19,8 @@
#ifndef __TIME_H
#define __TIME_H
#define NS_PER_SEC 1000000000UL
typedef long int __time_t;
/* POSIX.1b structure for a time value. This is like a `struct timeval' but

View File

@ -200,6 +200,14 @@ static void pc_test(void)
ed[1] - st[1], ed[2] - st[2], ed[3] - st[3]);
}
extern void ihk_mc_get_boot_time(unsigned long *tv_sec, unsigned long *tv_nsec);
static void time_init(void)
{
ihk_mc_get_boot_time(&cpu_local_var(tv_sec),
&cpu_local_var(tv_nsec));
cpu_local_var(last_tsc) = 0;
}
static void rest_init(void)
{
handler_init();
@ -212,6 +220,7 @@ static void rest_init(void)
ap_init();
cpu_local_var_init();
time_init();
kmalloc_init();
ikc_master_init();
@ -220,9 +229,13 @@ static void rest_init(void)
}
int host_ikc_inited = 0;
extern int num_processors;
extern void zero_tsc(void);
extern void update_cpu_local_time(void);
static void post_init(void)
{
int i;
cpu_enable_interrupt();
while (!host_ikc_inited) {
@ -237,7 +250,20 @@ static void post_init(void)
init_host_syscall_channel2();
ihk_mc_spinlock_init(&syscall_lock);
}
/* Update time elapsed so far during boot, distribute the current
* date to all cores and zero TSC.
* All AP cores are wait spinning for ap_start() and they will zero
* their TSC immediatly. */
update_cpu_local_time();
cpu_local_var(last_tsc) = 0;
for (i = 0; i < num_processors; ++i) {
get_cpu_local_var(i)->tv_sec = cpu_local_var(tv_sec);
get_cpu_local_var(i)->tv_nsec = cpu_local_var(tv_nsec);
}
zero_tsc();
ap_start();
create_os_procfs_files();
}
#ifdef DCFA_RUN

View File

@ -105,6 +105,7 @@ int patch_process_vm(struct process_vm *, void *, const void *, size_t);
void do_setpgid(int, int);
extern long alloc_debugreg(struct process *proc);
extern int num_processors;
extern unsigned long ihk_mc_get_ns_per_tsc(void);
static int ptrace_detach(int pid, int data);
int prepare_process_ranges_args_envs(struct process *proc,
@ -3505,38 +3506,47 @@ SYSCALL_DECLARE(futex)
(unsigned long)uaddr, op, val, utime, uaddr2, val3, *uaddr);
if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) {
struct syscall_request request IHK_DMA_ALIGN;
struct timeval tv_now;
request.number = n;
unsigned long __phys;
if (!gettime_local_support) {
struct syscall_request request IHK_DMA_ALIGN;
struct timeval tv_now;
request.number = n;
unsigned long __phys;
dkprintf("futex,utime and FUTEX_WAIT_*, uaddr=%lx, []=%x\n", (unsigned long)uaddr, *uaddr);
dkprintf("futex,utime and FUTEX_WAIT_*, uaddr=%lx, []=%x\n", (unsigned long)uaddr, *uaddr);
if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table,
(void *)&tv_now, &__phys)) {
return -EFAULT;
if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table,
(void *)&tv_now, &__phys)) {
return -EFAULT;
}
request.args[0] = __phys;
int r = do_syscall(&request, ihk_mc_get_processor_id(), 0);
if (r < 0) {
return -EFAULT;
}
dkprintf("futex, FUTEX_WAIT_*, arg3 != NULL, pc=%lx\n", (unsigned long)ihk_mc_syscall_pc(ctx));
dkprintf("now->tv_sec=%016ld,tv_nsec=%016ld\n", tv_now.tv_sec, tv_now.tv_usec * 1000);
dkprintf("utime->tv_sec=%016ld,tv_nsec=%016ld\n", utime->tv_sec, utime->tv_nsec);
unsigned long nsec_timeout = ((long)utime->tv_sec * 1000000000ULL)
+ utime->tv_nsec;
long nsec_now = ((long)tv_now.tv_sec * 1000000000ULL) +
tv_now.tv_usec * 1000;
long diff_nsec = nsec_timeout - nsec_now;
timeout = (diff_nsec / 1000) * 1100; // (usec * 1.1GHz)
}
/* Compute timeout based on TSC/nanosec ratio */
else {
unsigned long nsec_timeout = ((long)utime->tv_sec * 1000000000ULL)
+ utime->tv_nsec;
request.args[0] = __phys;
int r = do_syscall(&request, ihk_mc_get_processor_id(), 0);
if (r < 0) {
return -EFAULT;
timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc();
dkprintf("futex timeout: %lu\n", timeout);
}
dkprintf("futex, FUTEX_WAIT_*, arg3 != NULL, pc=%lx\n", (unsigned long)ihk_mc_syscall_pc(ctx));
dkprintf("now->tv_sec=%016ld,tv_nsec=%016ld\n", tv_now.tv_sec, tv_now.tv_usec * 1000);
dkprintf("utime->tv_sec=%016ld,tv_nsec=%016ld\n", utime->tv_sec, utime->tv_nsec);
long nsec_now = ((long)tv_now.tv_sec * 1000000000ULL) +
tv_now.tv_usec * 1000;
long nsec_timeout = ((long)utime->tv_sec * 1000000000ULL) +
utime->tv_nsec * 1;
long diff_nsec = nsec_timeout - nsec_now;
timeout = (diff_nsec / 1000) * 1100; // (usec * 1.1GHz)
dkprintf("futex timeout: %lu\n", timeout);
}
/* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE.
@ -4913,6 +4923,88 @@ SYSCALL_DECLARE(get_cpu_id)
return ihk_mc_get_processor_id();
}
void __update_time_from_tsc_delta(unsigned long *tv_sec,
unsigned long *tv_nsec,
unsigned long tsc_delta)
{
unsigned long ns_delta = tsc_delta * ihk_mc_get_ns_per_tsc() / 1000;
*tv_sec += (ns_delta / NS_PER_SEC);
*tv_nsec += (ns_delta % NS_PER_SEC);
if (*tv_nsec > NS_PER_SEC) {
*tv_nsec -= NS_PER_SEC;
++*tv_sec;
}
}
void update_cpu_local_time(void)
{
unsigned long tsc = rdtsc();
__update_time_from_tsc_delta(
&cpu_local_var(tv_sec),
&cpu_local_var(tv_nsec),
tsc - cpu_local_var(last_tsc));
cpu_local_var(last_tsc) = tsc;
}
SYSCALL_DECLARE(gettimeofday)
{
struct timeval *tv = (struct timeval *)ihk_mc_syscall_arg0(ctx);
struct syscall_request request IHK_DMA_ALIGN;
/* Do it locally if supported */
if (gettime_local_support) {
update_cpu_local_time();
tv->tv_sec = cpu_local_var(tv_sec);
tv->tv_usec = cpu_local_var(tv_nsec) / 1000;
dkprintf("gettimeofday(): \n");
return 0;
}
/* Otherwise offload */
request.number = __NR_gettimeofday;
request.args[0] = (unsigned long)tv;
return do_syscall(&request, ihk_mc_get_processor_id(), 0);
}
SYSCALL_DECLARE(nanosleep)
{
struct timespec *tv = (struct timespec *)ihk_mc_syscall_arg0(ctx);
struct timespec *rem = (struct timespec *)ihk_mc_syscall_arg0(ctx);
struct syscall_request request IHK_DMA_ALIGN;
/* Do it locally if supported */
if (gettime_local_support) {
unsigned long nanosecs = tv->tv_sec * NS_PER_SEC + tv->tv_nsec;
unsigned long tscs = nanosecs * 1000 / ihk_mc_get_ns_per_tsc();
unsigned long ts = rdtsc();
/* Spin wait */
while (rdtsc() - ts < tscs)
cpu_pause();
rem->tv_sec = 0;
rem->tv_nsec = 0;
return 0;
}
/* Otherwise offload */
request.number = __NR_nanosleep;
request.args[0] = (unsigned long)tv;
request.args[0] = (unsigned long)rem;
return do_syscall(&request, ihk_mc_get_processor_id(), 0);
}
SYSCALL_DECLARE(sched_yield)
{
schedule();

View File

@ -102,4 +102,6 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value);
extern unsigned int ihk_ikc_irq;
extern unsigned int ihk_ikc_irq_apicid;
extern int gettime_local_support;
#endif