uti: Call into McKernel futex()

(1) Masquerade clv
(2) Fix timeout
(3) Let mcexec thread with the same tid as McKernel thread migrating
    to Linux handles the migration request
(4) Call create_tracer() before creating proxy related objects

Change-Id: I6b2689b70db49827f10aa7d5a4c581aa81319b55
This commit is contained in:
Masamichi Takagi
2018-09-02 17:36:28 +09:00
parent 460917c4a0
commit 52afbbbc98
16 changed files with 629 additions and 150 deletions

View File

@ -71,14 +71,21 @@
#include <kmsg.h>
#include <timer.h>
#include <debug.h>
#include <syscall.h>
//#define DEBUG_PRINT_FUTEX
#ifdef DEBUG_PRINT_FUTEX
#undef DDEBUG_DEFAULT
#define DDEBUG_DEFAULT DDEBUG_PRINT
#define uti_dkprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
#else
#define uti_dkprintf(...) do { } while (0)
#endif
#define uti_kprintf(...) do { ((clv_override && linux_printk) ? (*linux_printk) : kprintf)(__VA_ARGS__); } while (0)
unsigned long ihk_mc_get_ns_per_tsc(void);
int futex_cmpxchg_enabled;
/**
@ -108,6 +115,9 @@ struct futex_q {
union futex_key key;
union futex_key *requeue_pi_key;
uint32_t bitset;
/* Used to wake-up a thread running on a Linux CPU */
void *uti_futex_resp;
};
/*
@ -180,11 +190,12 @@ static void drop_futex_key_refs(union futex_key *key)
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key, struct cpu_local_var *clv_override)
{
unsigned long address = (unsigned long)uaddr;
unsigned long phys;
struct process_vm *mm = cpu_local_var(current)->vm;
struct thread *thread = cpu_local_var_with_override(current, clv_override);
struct process_vm *mm = thread->vm;
/*
* The futex address must be "naturally" aligned.
@ -250,7 +261,7 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.
*/
static void wake_futex(struct futex_q *q)
static void wake_futex(struct futex_q *q, struct cpu_local_var *clv_override)
{
struct thread *p = q->task;
@ -272,8 +283,31 @@ static void wake_futex(struct futex_q *q)
barrier();
q->lock_ptr = NULL;
dkprintf("wake_futex(): waking up tid %d\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL);
if (q->uti_futex_resp) {
int rc;
uti_dkprintf("wake_futex(): waking up migrated-to-Linux thread (tid %d),uti_futex_resp=%p\n", p->tid, q->uti_futex_resp);
/* TODO: Add the case when a Linux thread waking up another Linux thread */
if (clv_override) {
uti_dkprintf("%s: ERROR: A Linux thread is waking up migrated-to-Linux thread\n", __FUNCTION__);
}
if (p->spin_sleep == 0) {
uti_dkprintf("%s: INFO: woken up by someone else\n", __FUNCTION__);
}
struct ikc_scd_packet pckt;
struct ihk_ikc_channel_desc *resp_channel = cpu_local_var_with_override(ikc2linux, clv_override);
pckt.msg = SCD_MSG_FUTEX_WAKE;
pckt.futex.resp = q->uti_futex_resp;
pckt.futex.spin_sleep = &p->spin_sleep;
rc = ihk_ikc_send(resp_channel, &pckt, 0);
if (rc) {
uti_dkprintf("%s: ERROR: ihk_ikc_send returned %d, resp_channel=%p\n", __FUNCTION__, rc, resp_channel);
}
} else {
uti_dkprintf("wake_futex(): waking up McKernel thread (tid %d)\n", p->tid);
sched_wakeup_thread(p, PS_NORMAL);
}
}
/*
@ -303,7 +337,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
/*
* Wake up waiters matching bitset queued on this futex (uaddr).
*/
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset)
static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset, struct cpu_local_var *clv_override)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
@ -314,7 +348,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
if (!bitset)
return -EINVAL;
ret = get_futex_key(uaddr, fshared, &key);
ret = get_futex_key(uaddr, fshared, &key, clv_override);
if ((ret != 0))
goto out;
@ -330,7 +364,7 @@ static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset
if (!(this->bitset & bitset))
continue;
wake_futex(this);
wake_futex(this, clv_override);
if (++ret >= nr_wake)
break;
}
@ -348,7 +382,8 @@ out:
*/
static int
futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int nr_wake, int nr_wake2, int op)
int nr_wake, int nr_wake2, int op,
struct cpu_local_var *clv_override)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
struct futex_hash_bucket *hb1, *hb2;
@ -357,10 +392,10 @@ futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int ret, op_ret;
retry:
ret = get_futex_key(uaddr1, fshared, &key1);
ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
if ((ret != 0))
goto out;
ret = get_futex_key(uaddr2, fshared, &key2);
ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
if ((ret != 0))
goto out_put_key1;
@ -394,7 +429,7 @@ retry_private:
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) {
wake_futex(this);
wake_futex(this, clv_override);
if (++ret >= nr_wake)
break;
}
@ -406,7 +441,7 @@ retry_private:
op_ret = 0;
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) {
wake_futex(this);
wake_futex(this, clv_override);
if (++op_ret >= nr_wake2)
break;
}
@ -469,7 +504,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
*/
static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
int nr_wake, int nr_requeue, uint32_t *cmpval,
int requeue_pi)
int requeue_pi, struct cpu_local_var *clv_override)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
int drop_count = 0, task_count = 0, ret;
@ -477,10 +512,10 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
struct plist_head *head1;
struct futex_q *this, *next;
ret = get_futex_key(uaddr1, fshared, &key1);
ret = get_futex_key(uaddr1, fshared, &key1, clv_override);
if ((ret != 0))
goto out;
ret = get_futex_key(uaddr2, fshared, &key2);
ret = get_futex_key(uaddr2, fshared, &key2, clv_override);
if ((ret != 0))
goto out_put_key1;
@ -515,7 +550,7 @@ static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
*/
/* RIKEN: no requeue_pi at this moment */
if (++task_count <= nr_wake) {
wake_futex(this);
wake_futex(this, clv_override);
continue;
}
@ -574,7 +609,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
* state is implicit in the state of woken task (see futex_wait_requeue_pi() for
* an example).
*/
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb, struct cpu_local_var *clv_override)
{
int prio;
@ -595,7 +630,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
q->list.plist.spinlock = &hb->lock;
#endif
plist_add(&q->list, &hb->chain);
q->task = cpu_local_var(current);
q->task = cpu_local_var_with_override(current, clv_override);
ihk_mc_spinlock_unlock_noirq(&hb->lock);
}
@ -658,19 +693,19 @@ retry:
/* RIKEN: this function has been rewritten so that it returns the remaining
* time in case we are waken.
*/
static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
uint64_t timeout)
static int64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
uint64_t timeout, struct cpu_local_var *clv_override)
{
uint64_t time_remain = 0;
int64_t time_remain = 0;
unsigned long irqstate;
struct thread *thread = cpu_local_var(current);
struct thread *thread = cpu_local_var_with_override(current, clv_override);
/*
* The task state is guaranteed to be set before another task can
* wake it.
* queue_me() calls spin_unlock() upon completion, serializing
* access to the hash list and forcing a memory barrier.
*/
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
xchg4(&(thread->status), PS_INTERRUPTIBLE);
/* Indicate spin sleep. Note that schedule_timeout() with
* idle_halt should use spin sleep because sleep with timeout
@ -682,25 +717,40 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
}
queue_me(q, hb);
queue_me(q, hb, clv_override);
if (!plist_node_empty(&q->list)) {
if (clv_override) {
uti_dkprintf("%s: tid: %d is trying to sleep\n", __FUNCTION__, thread->tid);
/* Note that the unit of timeout is nsec */
time_remain = (*linux_wait_event)(q->uti_futex_resp, timeout);
/* Note that time_remain == 0 indicates contidion evaluated to false after the timeout elapsed */
if (time_remain < 0) {
if (time_remain == -ERESTARTSYS) { /* Interrupted by signal */
uti_dkprintf("%s: DEBUG: wait_event returned -ERESTARTSYS\n", __FUNCTION__);
} else {
uti_kprintf("%s: ERROR: wait_event returned %d\n", __FUNCTION__, time_remain);
}
}
uti_dkprintf("%s: tid: %d woken up\n", __FUNCTION__, thread->tid);
} else {
if (timeout) {
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", thread->tid);
time_remain = schedule_timeout(timeout);
}
else {
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", thread->tid);
spin_sleep_or_schedule();
time_remain = 0;
}
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid);
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", thread->tid);
}
}
/* This does not need to be serialized */
cpu_local_var(current)->status = PS_RUNNING;
thread->status = PS_RUNNING;
thread->spin_sleep = 0;
return time_remain;
@ -724,7 +774,8 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
* <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
*/
static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
struct futex_q *q, struct futex_hash_bucket **hb)
struct futex_q *q, struct futex_hash_bucket **hb,
struct cpu_local_var *clv_override)
{
uint32_t uval;
int ret;
@ -747,7 +798,7 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
* rare, but normal.
*/
q->key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q->key);
ret = get_futex_key(uaddr, fshared, &q->key, clv_override);
if (ret != 0)
return ret;
@ -771,46 +822,54 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
}
static int futex_wait(uint32_t __user *uaddr, int fshared,
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt)
uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt,
struct cpu_local_var *clv_override)
{
struct futex_hash_bucket *hb;
struct futex_q q;
uint64_t time_remain;
int64_t time_remain;
int ret;
if (!bitset)
return -EINVAL;
#ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile &&
cpu_local_var(current)->profile_start_ts) {
cpu_local_var(current)->profile_elapsed_ts +=
(rdtsc() - cpu_local_var(current)->profile_start_ts);
cpu_local_var(current)->profile_start_ts = 0;
if (cpu_local_var_with_override(current, clv_override)->profile &&
cpu_local_var_with_override(current, clv_override)->profile_start_ts) {
cpu_local_var_with_override(current, clv_override)->profile_elapsed_ts +=
(rdtsc() - cpu_local_var_with_override(current, clv_override)->profile_start_ts);
cpu_local_var_with_override(current, clv_override)->profile_start_ts = 0;
}
#endif
q.bitset = bitset;
q.requeue_pi_key = NULL;
q.uti_futex_resp = cpu_local_var_with_override(uti_futex_resp, clv_override);
retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb, clv_override);
if (ret) {
uti_dkprintf("%s: tid=%d futex_wait_setup returns zero, no need to sleep\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out;
}
/* queue_me and wait for wakeup, timeout, or a signal. */
time_remain = futex_wait_queue_me(hb, &q, timeout);
time_remain = futex_wait_queue_me(hb, &q, timeout, clv_override);
/* If we were woken (and unqueued), we succeeded, whatever. */
ret = 0;
if (!unqueue_me(&q))
if (!unqueue_me(&q)) {
uti_dkprintf("%s: tid=%d unqueued\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key;
}
ret = -ETIMEDOUT;
/* RIKEN: timer expired case (indicated by !time_remain) */
if (timeout && !time_remain)
if (timeout && !time_remain) {
uti_dkprintf("%s: tid=%d timer expired\n", __FUNCTION__, cpu_local_var_with_override(current, clv_override)->tid);
goto out_put_key;
}
if (hassigpending(cpu_local_var(current))) {
ret = -EINTR;
@ -825,19 +884,22 @@ out_put_key:
put_futex_key(fshared, &q.key);
out:
#ifdef PROFILE_ENABLE
if (cpu_local_var(current)->profile) {
cpu_local_var(current)->profile_start_ts = rdtsc();
if (cpu_local_var_with_override(current, clv_override)->profile) {
cpu_local_var_with_override(current, clv_override)->profile_start_ts = rdtsc();
}
#endif
return ret;
}
int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared)
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared,
struct cpu_local_var *clv_override)
{
int clockrt, ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK;
uti_dkprintf("%s: uaddr=%p, op=%x, val=%x, timeout=%ld, uaddr2=%p, val2=%x, val3=%x, fshared=%d, clv=%p\n", __FUNCTION__, uaddr, op, val, timeout, uaddr2, val2, val3, fshared, clv_override);
clockrt = op & FUTEX_CLOCK_REALTIME;
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
@ -846,21 +908,21 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET:
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt, clv_override);
break;
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAKE_BITSET:
ret = futex_wake(uaddr, fshared, val, val3);
ret = futex_wake(uaddr, fshared, val, val3, clv_override);
break;
case FUTEX_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0, clv_override);
break;
case FUTEX_CMP_REQUEUE:
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0);
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0, clv_override);
break;
case FUTEX_WAKE_OP:
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3, clv_override);
break;
/* RIKEN: these calls are not supported for now.
case FUTEX_LOCK_PI: