diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 6e2eef9f..04bc1e8f 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -116,6 +116,7 @@ SYSCALL_HANDLED(216, remap_file_pages) SYSCALL_DELEGATED(217, getdents64) SYSCALL_HANDLED(218, set_tid_address) SYSCALL_DELEGATED(220, semtimedop) +SYSCALL_HANDLED(228, clock_gettime) SYSCALL_DELEGATED(230, clock_nanosleep) SYSCALL_HANDLED(231, exit_group) SYSCALL_DELEGATED(232, epoll_wait) diff --git a/kernel/futex.c b/kernel/futex.c index baae867f..a822a79e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -153,7 +153,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) */ static void get_futex_key_refs(union futex_key *key) { - /* RIKEN: only !fshared futexes... */ + /* RIKEN: no swapping in McKernel */ return; } @@ -163,7 +163,7 @@ static void get_futex_key_refs(union futex_key *key) */ static void drop_futex_key_refs(union futex_key *key) { - /* RIKEN: only !fshared futexes... */ + /* RIKEN: no swapping in McKernel */ return; } /** @@ -185,6 +185,7 @@ static int get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key) { unsigned long address = (unsigned long)uaddr; + unsigned long phys; struct process_vm *mm = cpu_local_var(current)->vm; /* @@ -203,15 +204,31 @@ get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key) * but access_ok() should be faster than find_vma() */ if (!fshared) { - key->private.mm = mm; key->private.address = address; get_futex_key_refs(key); return 0; } - /* RIKEN: No shared futex support... */ - return -EFAULT; + key->both.offset |= FUT_OFF_MMSHARED; + +retry_v2p: + /* Just use physical address of page, McKernel does not do swapping */ + if (ihk_mc_pt_virt_to_phys(mm->address_space->page_table, + (void *)uaddr, &phys)) { + + /* Check if we can fault in page */ + if (page_fault_process_vm(mm, uaddr, PF_POPULATE | PF_WRITE | PF_USER)) { + kprintf("error: get_futex_key() virt to phys translation failed\n"); + return -EFAULT; + } + + goto retry_v2p; + } + key->shared.phys = (void *)phys; + key->shared.pgoff = 0; + + return 0; } @@ -265,6 +282,7 @@ static void wake_futex(struct futex_q *q) barrier(); q->lock_ptr = NULL; + dkprintf("wake_futex(): waking up tid %d\n", p->tid); sched_wakeup_thread(p, PS_NORMAL); } @@ -667,12 +685,16 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q /* RIKEN: use mcos timers */ if (timeout) { + dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid); time_remain = schedule_timeout(timeout); } else { + dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid); schedule(); time_remain = 0; } + + dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid); } /* This does not need to be serialized */ @@ -777,10 +799,10 @@ retry: if (timeout && !time_remain) goto out_put_key; - if(hassigpending(cpu_local_var(current))){ + if (hassigpending(cpu_local_var(current))) { ret = -EINTR; goto out_put_key; - } + } /* RIKEN: no signals */ put_futex_key(fshared, &q.key); @@ -793,17 +815,10 @@ out: } int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout, - uint32_t *uaddr2, uint32_t val2, uint32_t val3) + uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared) { int clockrt, ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; - int fshared = 0; - - /* RIKEN: Assume address space private futexes. - if (!(op & FUTEX_PRIVATE_FLAG)) { - fshared = 1; - } - */ clockrt = op & FUTEX_CLOCK_REALTIME; if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) @@ -824,8 +839,7 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout, ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, - 0); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0); break; case FUTEX_WAKE_OP: ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); diff --git a/kernel/include/futex.h b/kernel/include/futex.h index 1c89848d..1fcab9e0 100644 --- a/kernel/include/futex.h +++ b/kernel/include/futex.h @@ -241,13 +241,11 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, struct process_vm; union futex_key { -#if 0 struct { unsigned long pgoff; - struct inode *inode; + void *phys; int offset; } shared; -#endif struct { unsigned long address; struct process_vm *mm; @@ -261,6 +259,7 @@ union futex_key { }; #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } +#define FUT_OFF_MMSHARED 2 extern int futex_init(void); @@ -272,7 +271,8 @@ futex( uint64_t timeout, uint32_t __user * uaddr2, uint32_t val2, - uint32_t val3 + uint32_t val3, + int fshared ); diff --git a/kernel/include/lwk/futex.h b/kernel/include/lwk/futex.h index 5f4f2f24..9c28208a 100644 --- a/kernel/include/lwk/futex.h +++ b/kernel/include/lwk/futex.h @@ -92,7 +92,8 @@ futex( uint64_t timeout, uint32_t __user * uaddr2, uint32_t val2, - uint32_t val3 + uint32_t val3, + int fshared ); extern long diff --git a/kernel/syscall.c b/kernel/syscall.c index 37f21b2d..fd2d326e 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -3420,7 +3420,10 @@ SYSCALL_DECLARE(futex) { uint64_t timeout = 0; // No timeout uint32_t val2 = 0; - int futex_clock_realtime = 0; + // Only one clock is used, ignore FUTEX_CLOCK_REALTIME + //int futex_clock_realtime = 0; + int fshared = 1; + int ret = 0; uint32_t *uaddr = (uint32_t *)ihk_mc_syscall_arg0(ctx); int op = (int)ihk_mc_syscall_arg1(ctx); @@ -3429,33 +3432,33 @@ SYSCALL_DECLARE(futex) uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx); uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx); - /* Mask off the FUTEX_PRIVATE_FLAG, - * assume all futexes are address space private */ - if (op & FUTEX_CLOCK_REALTIME) { - futex_clock_realtime = 1; + /* Cross-address space futex? */ + if (op & FUTEX_PRIVATE_FLAG) { + fshared = 0; } op = (op & FUTEX_CMD_MASK); - dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x\n", - op, - (op == FUTEX_WAIT) ? "FUTEX_WAIT" : - (op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" : - (op == FUTEX_WAKE) ? "FUTEX_WAKE" : - (op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" : - (op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" : - (op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" : - (op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown", - (unsigned long)uaddr, op, val, utime, uaddr2, val3, *uaddr); + dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d\n", + op, + (op == FUTEX_WAIT) ? "FUTEX_WAIT" : + (op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" : + (op == FUTEX_WAKE) ? "FUTEX_WAKE" : + (op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" : + (op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" : + (op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" : + (op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown", + (unsigned long)uaddr, val, utime, uaddr2, val3, *uaddr, fshared); if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) { + unsigned long nsec_timeout; + struct timespec ats; + if (!gettime_local_support) { struct syscall_request request IHK_DMA_ALIGN; struct timeval tv_now; request.number = n; unsigned long __phys; - dkprintf("futex,utime and FUTEX_WAIT_*, uaddr=%lx, []=%x\n", (unsigned long)uaddr, *uaddr); - if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->address_space->page_table, (void *)&tv_now, &__phys)) { return -EFAULT; @@ -3469,37 +3472,26 @@ SYSCALL_DECLARE(futex) return -EFAULT; } - dkprintf("futex, FUTEX_WAIT_*, arg3 != NULL, pc=%lx\n", (unsigned long)ihk_mc_syscall_pc(ctx)); - dkprintf("now->tv_sec=%016ld,tv_nsec=%016ld\n", tv_now.tv_sec, tv_now.tv_usec * 1000); - dkprintf("utime->tv_sec=%016ld,tv_nsec=%016ld\n", utime->tv_sec, utime->tv_nsec); - unsigned long nsec_timeout = ((long)utime->tv_sec * 1000000000ULL) - + utime->tv_nsec; - - long nsec_now = ((long)tv_now.tv_sec * 1000000000ULL) + - tv_now.tv_usec * 1000; - long diff_nsec = nsec_timeout - nsec_now; - - timeout = (diff_nsec / 1000) * 1100; // (usec * 1.1GHz) + ats.tv_sec = tv_now.tv_sec; + ats.tv_nsec = tv_now.tv_usec * 1000; } /* Compute timeout based on TSC/nanosec ratio */ else { - unsigned long nsec_timeout; - - if (!(futex_clock_realtime)) { - nsec_timeout = ((long)utime->tv_sec * NS_PER_SEC) - + utime->tv_nsec; - } - else { /* FUTEX_CLOCK_REALTIME denotes absolute time */ - struct timespec ats; - calculate_time_from_tsc(&ats); - - nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec) - - (ats.tv_sec * NS_PER_SEC + ats.tv_nsec); - } - - timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc(); - dkprintf("futex timeout: %lu\n", timeout); + calculate_time_from_tsc(&ats); } + + /* As per the Linux implementation FUTEX_WAIT specifies the duration of + * the timeout, while FUTEX_WAIT_BITSET specifies the absolute timestamp */ + if (op == FUTEX_WAIT_BITSET) { + nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec) - + (ats.tv_sec * NS_PER_SEC + ats.tv_nsec); + } + else { + nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec); + } + + timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc(); + dkprintf("futex timeout: %lu\n", timeout); } /* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE. @@ -3507,7 +3499,20 @@ SYSCALL_DECLARE(futex) if (op == FUTEX_CMP_REQUEUE || op == FUTEX_WAKE_OP) val2 = (uint32_t) (unsigned long) ihk_mc_syscall_arg3(ctx); - return futex(uaddr, op, val, timeout, uaddr2, val2, val3); + ret = futex(uaddr, op, val, timeout, uaddr2, val2, val3, fshared); + + dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d, ret: %d\n", + op, + (op == FUTEX_WAIT) ? "FUTEX_WAIT" : + (op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" : + (op == FUTEX_WAKE) ? "FUTEX_WAKE" : + (op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" : + (op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" : + (op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" : + (op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown", + (unsigned long)uaddr, val, utime, uaddr2, val3, *uaddr, fshared, ret); + + return ret; } SYSCALL_DECLARE(exit) @@ -3549,7 +3554,7 @@ SYSCALL_DECLARE(exit) *thread->clear_child_tid = 0; barrier(); futex((uint32_t *)thread->clear_child_tid, - FUTEX_WAKE, 1, 0, NULL, 0, 0); + FUTEX_WAKE, 1, 0, NULL, 0, 0, 1); } mcs_rwlock_reader_lock(&proc->threads_lock, &lock); @@ -4869,6 +4874,36 @@ static void calculate_time_from_tsc(struct timespec *ts) return; } +SYSCALL_DECLARE(clock_gettime) +{ + /* TODO: handle clock_id */ + struct timespec *ts = (struct timespec *)ihk_mc_syscall_arg1(ctx); + struct syscall_request request IHK_DMA_ALIGN; + int error; + struct timespec ats; + + if (!ts) { + /* nothing to do */ + return 0; + } + + /* Do it locally if supported */ + if (gettime_local_support) { + calculate_time_from_tsc(&ats); + + error = copy_to_user(ts, &ats, sizeof(ats)); + + dkprintf("clock_gettime(): %d\n", error); + return error; + } + + /* Otherwise offload */ + request.number = __NR_clock_gettime; + request.args[0] = ihk_mc_syscall_arg0(ctx); + request.args[1] = ihk_mc_syscall_arg1(ctx); + + return do_syscall(&request, ihk_mc_get_processor_id(), 0); +} SYSCALL_DECLARE(gettimeofday) {