futex(): support for cross address-space futexes

This commit is contained in:
Balazs Gerofi
2015-11-24 14:33:48 +09:00
parent eb6de9d1de
commit 828a3ea57a
5 changed files with 119 additions and 68 deletions

View File

@ -116,6 +116,7 @@ SYSCALL_HANDLED(216, remap_file_pages)
SYSCALL_DELEGATED(217, getdents64)
SYSCALL_HANDLED(218, set_tid_address)
SYSCALL_DELEGATED(220, semtimedop)
SYSCALL_HANDLED(228, clock_gettime)
SYSCALL_DELEGATED(230, clock_nanosleep)
SYSCALL_HANDLED(231, exit_group)
SYSCALL_DELEGATED(232, epoll_wait)

View File

@ -153,7 +153,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
*/
static void get_futex_key_refs(union futex_key *key)
{
/* RIKEN: only !fshared futexes... */
/* RIKEN: no swapping in McKernel */
return;
}
@ -163,7 +163,7 @@ static void get_futex_key_refs(union futex_key *key)
*/
static void drop_futex_key_refs(union futex_key *key)
{
/* RIKEN: only !fshared futexes... */
/* RIKEN: no swapping in McKernel */
return;
}
/**
@ -185,6 +185,7 @@ static int
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
{
unsigned long address = (unsigned long)uaddr;
unsigned long phys;
struct process_vm *mm = cpu_local_var(current)->vm;
/*
@ -203,15 +204,31 @@ get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
* but access_ok() should be faster than find_vma()
*/
if (!fshared) {
key->private.mm = mm;
key->private.address = address;
get_futex_key_refs(key);
return 0;
}
/* RIKEN: No shared futex support... */
return -EFAULT;
key->both.offset |= FUT_OFF_MMSHARED;
retry_v2p:
/* Just use physical address of page, McKernel does not do swapping */
if (ihk_mc_pt_virt_to_phys(mm->address_space->page_table,
(void *)uaddr, &phys)) {
/* Check if we can fault in page */
if (page_fault_process_vm(mm, uaddr, PF_POPULATE | PF_WRITE | PF_USER)) {
kprintf("error: get_futex_key() virt to phys translation failed\n");
return -EFAULT;
}
goto retry_v2p;
}
key->shared.phys = (void *)phys;
key->shared.pgoff = 0;
return 0;
}
@ -265,6 +282,7 @@ static void wake_futex(struct futex_q *q)
barrier();
q->lock_ptr = NULL;
dkprintf("wake_futex(): waking up tid %d\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL);
}
@ -667,12 +685,16 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
/* RIKEN: use mcos timers */
if (timeout) {
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
time_remain = schedule_timeout(timeout);
}
else {
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
schedule();
time_remain = 0;
}
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid);
}
/* This does not need to be serialized */
@ -777,10 +799,10 @@ retry:
if (timeout && !time_remain)
goto out_put_key;
if(hassigpending(cpu_local_var(current))){
if (hassigpending(cpu_local_var(current))) {
ret = -EINTR;
goto out_put_key;
}
}
/* RIKEN: no signals */
put_futex_key(fshared, &q.key);
@ -793,17 +815,10 @@ out:
}
int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
uint32_t *uaddr2, uint32_t val2, uint32_t val3)
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared)
{
int clockrt, ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK;
int fshared = 0;
/* RIKEN: Assume address space private futexes.
if (!(op & FUTEX_PRIVATE_FLAG)) {
fshared = 1;
}
*/
clockrt = op & FUTEX_CLOCK_REALTIME;
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
@ -824,8 +839,7 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
break;
case FUTEX_CMP_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
0);
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0);
break;
case FUTEX_WAKE_OP:
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);

View File

@ -241,13 +241,11 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
struct process_vm;
union futex_key {
#if 0
struct {
unsigned long pgoff;
struct inode *inode;
void *phys;
int offset;
} shared;
#endif
struct {
unsigned long address;
struct process_vm *mm;
@ -261,6 +259,7 @@ union futex_key {
};
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
#define FUT_OFF_MMSHARED 2
extern int futex_init(void);
@ -272,7 +271,8 @@ futex(
uint64_t timeout,
uint32_t __user * uaddr2,
uint32_t val2,
uint32_t val3
uint32_t val3,
int fshared
);

View File

@ -92,7 +92,8 @@ futex(
uint64_t timeout,
uint32_t __user * uaddr2,
uint32_t val2,
uint32_t val3
uint32_t val3,
int fshared
);
extern long

View File

@ -3420,7 +3420,10 @@ SYSCALL_DECLARE(futex)
{
uint64_t timeout = 0; // No timeout
uint32_t val2 = 0;
int futex_clock_realtime = 0;
// Only one clock is used, ignore FUTEX_CLOCK_REALTIME
//int futex_clock_realtime = 0;
int fshared = 1;
int ret = 0;
uint32_t *uaddr = (uint32_t *)ihk_mc_syscall_arg0(ctx);
int op = (int)ihk_mc_syscall_arg1(ctx);
@ -3429,33 +3432,33 @@ SYSCALL_DECLARE(futex)
uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx);
uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx);
/* Mask off the FUTEX_PRIVATE_FLAG,
* assume all futexes are address space private */
if (op & FUTEX_CLOCK_REALTIME) {
futex_clock_realtime = 1;
/* Cross-address space futex? */
if (op & FUTEX_PRIVATE_FLAG) {
fshared = 0;
}
op = (op & FUTEX_CMD_MASK);
dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x\n",
op,
(op == FUTEX_WAIT) ? "FUTEX_WAIT" :
(op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" :
(op == FUTEX_WAKE) ? "FUTEX_WAKE" :
(op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" :
(op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" :
(op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" :
(op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown",
(unsigned long)uaddr, op, val, utime, uaddr2, val3, *uaddr);
dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d\n",
op,
(op == FUTEX_WAIT) ? "FUTEX_WAIT" :
(op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" :
(op == FUTEX_WAKE) ? "FUTEX_WAKE" :
(op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" :
(op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" :
(op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" :
(op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown",
(unsigned long)uaddr, val, utime, uaddr2, val3, *uaddr, fshared);
if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) {
unsigned long nsec_timeout;
struct timespec ats;
if (!gettime_local_support) {
struct syscall_request request IHK_DMA_ALIGN;
struct timeval tv_now;
request.number = n;
unsigned long __phys;
dkprintf("futex,utime and FUTEX_WAIT_*, uaddr=%lx, []=%x\n", (unsigned long)uaddr, *uaddr);
if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->address_space->page_table,
(void *)&tv_now, &__phys)) {
return -EFAULT;
@ -3469,37 +3472,26 @@ SYSCALL_DECLARE(futex)
return -EFAULT;
}
dkprintf("futex, FUTEX_WAIT_*, arg3 != NULL, pc=%lx\n", (unsigned long)ihk_mc_syscall_pc(ctx));
dkprintf("now->tv_sec=%016ld,tv_nsec=%016ld\n", tv_now.tv_sec, tv_now.tv_usec * 1000);
dkprintf("utime->tv_sec=%016ld,tv_nsec=%016ld\n", utime->tv_sec, utime->tv_nsec);
unsigned long nsec_timeout = ((long)utime->tv_sec * 1000000000ULL)
+ utime->tv_nsec;
long nsec_now = ((long)tv_now.tv_sec * 1000000000ULL) +
tv_now.tv_usec * 1000;
long diff_nsec = nsec_timeout - nsec_now;
timeout = (diff_nsec / 1000) * 1100; // (usec * 1.1GHz)
ats.tv_sec = tv_now.tv_sec;
ats.tv_nsec = tv_now.tv_usec * 1000;
}
/* Compute timeout based on TSC/nanosec ratio */
else {
unsigned long nsec_timeout;
if (!(futex_clock_realtime)) {
nsec_timeout = ((long)utime->tv_sec * NS_PER_SEC)
+ utime->tv_nsec;
}
else { /* FUTEX_CLOCK_REALTIME denotes absolute time */
struct timespec ats;
calculate_time_from_tsc(&ats);
nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec) -
(ats.tv_sec * NS_PER_SEC + ats.tv_nsec);
}
timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc();
dkprintf("futex timeout: %lu\n", timeout);
calculate_time_from_tsc(&ats);
}
/* As per the Linux implementation FUTEX_WAIT specifies the duration of
* the timeout, while FUTEX_WAIT_BITSET specifies the absolute timestamp */
if (op == FUTEX_WAIT_BITSET) {
nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec) -
(ats.tv_sec * NS_PER_SEC + ats.tv_nsec);
}
else {
nsec_timeout = (utime->tv_sec * NS_PER_SEC + utime->tv_nsec);
}
timeout = nsec_timeout * 1000 / ihk_mc_get_ns_per_tsc();
dkprintf("futex timeout: %lu\n", timeout);
}
/* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE.
@ -3507,7 +3499,20 @@ SYSCALL_DECLARE(futex)
if (op == FUTEX_CMP_REQUEUE || op == FUTEX_WAKE_OP)
val2 = (uint32_t) (unsigned long) ihk_mc_syscall_arg3(ctx);
return futex(uaddr, op, val, timeout, uaddr2, val2, val3);
ret = futex(uaddr, op, val, timeout, uaddr2, val2, val3, fshared);
dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x, shared: %d, ret: %d\n",
op,
(op == FUTEX_WAIT) ? "FUTEX_WAIT" :
(op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" :
(op == FUTEX_WAKE) ? "FUTEX_WAKE" :
(op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" :
(op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" :
(op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" :
(op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown",
(unsigned long)uaddr, val, utime, uaddr2, val3, *uaddr, fshared, ret);
return ret;
}
SYSCALL_DECLARE(exit)
@ -3549,7 +3554,7 @@ SYSCALL_DECLARE(exit)
*thread->clear_child_tid = 0;
barrier();
futex((uint32_t *)thread->clear_child_tid,
FUTEX_WAKE, 1, 0, NULL, 0, 0);
FUTEX_WAKE, 1, 0, NULL, 0, 0, 1);
}
mcs_rwlock_reader_lock(&proc->threads_lock, &lock);
@ -4869,6 +4874,36 @@ static void calculate_time_from_tsc(struct timespec *ts)
return;
}
SYSCALL_DECLARE(clock_gettime)
{
/* TODO: handle clock_id */
struct timespec *ts = (struct timespec *)ihk_mc_syscall_arg1(ctx);
struct syscall_request request IHK_DMA_ALIGN;
int error;
struct timespec ats;
if (!ts) {
/* nothing to do */
return 0;
}
/* Do it locally if supported */
if (gettime_local_support) {
calculate_time_from_tsc(&ats);
error = copy_to_user(ts, &ats, sizeof(ats));
dkprintf("clock_gettime(): %d\n", error);
return error;
}
/* Otherwise offload */
request.number = __NR_clock_gettime;
request.args[0] = ihk_mc_syscall_arg0(ctx);
request.args[1] = ihk_mc_syscall_arg1(ctx);
return do_syscall(&request, ihk_mc_get_processor_id(), 0);
}
SYSCALL_DECLARE(gettimeofday)
{