futex(): spin wait when CPU not oversubscribed and fix lost wake-up bug

This commit is contained in:
Balazs Gerofi
2017-01-12 18:50:21 +09:00
parent d646c2a4b9
commit bd47b909bf
7 changed files with 163 additions and 142 deletions

View File

@ -2233,30 +2233,28 @@ int strcpy_from_user(char *dst, const char *src)
return err;
}
long getlong_user(const long *p)
long getlong_user(long *dest, const long *p)
{
int error;
long l;
error = copy_from_user(&l, p, sizeof(l));
error = copy_from_user(dest, p, sizeof(long));
if (error) {
return error;
}
return l;
return 0;
}
int getint_user(const int *p)
int getint_user(int *dest, const int *p)
{
int error;
int i;
error = copy_from_user(&i, p, sizeof(i));
error = copy_from_user(dest, p, sizeof(int));
if (error) {
return error;
}
return i;
return 0;
}
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)

View File

@ -248,9 +248,13 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{
/* RIKEN: futexes are always on not swappable pages */
*dest = getint_user((int *)from);
/*
* Officially we should call:
* return getint_user((int *)dest, (int *)from);
*
* but McKernel on x86 can just access user-space.
*/
*dest = *(volatile uint32_t *)from;
return 0;
}
@ -670,26 +674,32 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
uint64_t timeout)
{
uint64_t time_remain = 0;
unsigned long irqstate;
struct thread *thread = cpu_local_var(current);
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using set_mb() and
* queue_me() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
* wake it.
* queue_me() calls spin_unlock() upon completion, serializing
* access to the hash list and forcing a memory barrier.
*/
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
barrier();
/* Indicate spin sleep */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
thread->spin_sleep = 1;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
queue_me(q, hb);
if (!plist_node_empty(&q->list)) {
/* RIKEN: use mcos timers */
if (timeout) {
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
time_remain = schedule_timeout(timeout);
}
else {
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
schedule();
spin_sleep_or_schedule();
time_remain = 0;
}
@ -698,6 +708,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
/* This does not need to be serialized */
cpu_local_var(current)->status = PS_RUNNING;
thread->spin_sleep = 0;
return time_remain;
}
@ -744,14 +755,17 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
*/
q->key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q->key);
if ((ret != 0))
if (ret != 0)
return ret;
*hb = queue_lock(q);
ret = get_futex_value_locked(&uval, uaddr);
/* RIKEN: get_futex_value_locked() always returns 0 on mckernel */
if (ret) {
queue_unlock(q, *hb);
put_futex_key(fshared, &q->key);
return ret;
}
if (uval != val) {
queue_unlock(q, *hb);
@ -777,8 +791,6 @@ static int futex_wait(uint32_t __user *uaddr, int fshared,
q.bitset = bitset;
q.requeue_pi_key = NULL;
/* RIKEN: futex_wait_queue_me() calls schedule_timeout() if timer is set */
retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);

View File

@ -759,6 +759,7 @@ extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64
enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
void schedule(void);
void spin_sleep_or_schedule(void);
void runq_add_thread(struct thread *thread, int cpu_id);
void runq_del_thread(struct thread *thread, int cpu_id);
int sched_wakeup_thread(struct thread *thread, int valid_states);

View File

@ -2668,6 +2668,65 @@ set_timer()
}
}
/*
* NOTE: it is assumed that a wait-queue (or futex queue) is
* set before calling this function.
* NOTE: one must set thread->spin_sleep to 1 before evaluating
* the wait condition to avoid lost wake-ups.
*/
void spin_sleep_or_schedule(void)
{
struct thread *thread = cpu_local_var(current);
struct cpu_local_var *v;
int do_schedule = 0;
int woken = 0;
long irqstate;
/* Try to spin sleep */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (thread->spin_sleep == 0) {
dkprintf("%s: caught a lost wake-up!\n", __FUNCTION__);
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
for (;;) {
/* Check if we need to reschedule */
irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
do_schedule = 1;
}
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
/* Check if we were woken up */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (thread->spin_sleep == 0) {
woken = 1;
}
/* Indicate that we are not spinning any more */
if (do_schedule) {
thread->spin_sleep = 0;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
if (woken) {
return;
}
if (do_schedule) {
break;
}
cpu_pause();
}
schedule();
}
void schedule(void)
{
struct cpu_local_var *v;
@ -2834,7 +2893,6 @@ int
sched_wakeup_thread(struct thread *thread, int valid_states)
{
int status;
int spin_slept = 0;
unsigned long irqstate;
struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id);
struct process *proc = thread->proc;
@ -2844,29 +2902,23 @@ sched_wakeup_thread(struct thread *thread, int valid_states)
proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id());
irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock));
if (thread->spin_sleep > 0) {
if (thread->spin_sleep == 1) {
dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n",
thread->cpu_id);
spin_slept = 1;
status = 0;
}
--thread->spin_sleep;
thread->spin_sleep = 0;
ihk_mc_spinlock_unlock(&(thread->spin_sleep_lock), irqstate);
if (spin_slept) {
return status;
}
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
if (thread->status & valid_states) {
mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock);
if(proc->status != PS_EXITED)
if (proc->status != PS_EXITED)
proc->status = PS_RUNNING;
mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock);
xchg4((int *)(&thread->status), PS_RUNNING);
barrier();
status = 0;
}
else {

View File

@ -54,136 +54,75 @@ void init_timers(void)
}
uint64_t schedule_timeout(uint64_t timeout)
{
struct waitq_entry my_wait;
struct timer my_timer;
{
struct thread *thread = cpu_local_var(current);
int irqstate;
int spin_sleep;
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout);
spin_sleep = ++thread->spin_sleep;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
long irqstate;
/* Spin sleep.. */
for (;;) {
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
uint64_t t_s = rdtsc();
uint64_t t_e;
int spin_over = 0;
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
/* Woken up by someone? */
if (thread->spin_sleep < 1) {
if (thread->spin_sleep == 0) {
t_e = rdtsc();
spin_over = 1;
if ((t_e - t_s) < timeout) {
timeout -= (t_e - t_s);
}
else {
timeout = 1;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
break;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
if (!spin_over) {
t_s = rdtsc();
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
/* Give a chance to another thread (if any) in case the core is
* oversubscribed, but make sure we will be re-scheduled */
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
schedule();
/* Give a chance to another thread (if any) in case the core is
* oversubscribed, but make sure we will be re-scheduled */
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
schedule();
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
else {
/* Spin wait */
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
spin_over = 1;
}
else {
timeout -= LOOP_TIMEOUT;
}
}
/* Recheck if woken */
continue;
}
else {
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
if (spin_over) {
dkprintf("schedule_timeout() spin woken up, timeout: %lu\n",
timeout);
/* Give a chance to another thread (if any) in case we timed out,
* but make sure we will be re-scheduled */
if (timeout == 0) {
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate =
ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
/* Spin wait */
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
schedule();
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
}
/* Time out? */
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
/* We are not sleeping any more */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (spin_sleep == thread->spin_sleep) {
--thread->spin_sleep;
}
thread->spin_sleep = 0;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
return timeout;
break;
}
else {
timeout -= LOOP_TIMEOUT;
}
}
/* Init waitq and wait entry for this timer */
my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout;
my_timer.thread = cpu_local_var(current);
waitq_init(&my_timer.processes);
waitq_init_entry(&my_wait, cpu_local_var(current));
/* Add ourself to the timer queue */
ihk_mc_spinlock_lock_noirq(&timers_lock);
list_add_tail(&my_timer.list, &timers);
dkprintf("schedule_timeout() sleep timeout: %lu\n", my_timer.timeout);
/* Add ourself to the waitqueue and sleep */
waitq_prepare_to_wait(&my_timer.processes, &my_wait, PS_INTERRUPTIBLE);
ihk_mc_spinlock_unlock_noirq(&timers_lock);
schedule();
waitq_finish_wait(&my_timer.processes, &my_wait);
ihk_mc_spinlock_lock_noirq(&timers_lock);
/* Waken up by someone else then timeout? */
if (my_timer.timeout) {
list_del(&my_timer.list);
}
ihk_mc_spinlock_unlock_noirq(&timers_lock);
dkprintf("schedule_timeout() woken up, timeout: %lu\n",
my_timer.timeout);
return my_timer.timeout;
return timeout;
}

View File

@ -35,8 +35,8 @@ void *phys_to_virt(unsigned long p);
int copy_from_user(void *dst, const void *src, size_t siz);
int strlen_user(const char *s);
int strcpy_from_user(char *dst, const char *src);
long getlong_user(const long *p);
int getint_user(const int *p);
long getlong_user(long *dest, const long *p);
int getint_user(int *dest, const int *p);
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz);
int copy_to_user(void *dst, const void *src, size_t siz);
int setlong_user(long *dst, long data);

View File

@ -275,13 +275,21 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
long *_flat;
char *p;
long r;
int n;
int n, ret;
/* How many strings do we have? */
if (nr_strings == -1) {
for (nr_strings = 0; (r = getlong_user((void *)(strings + nr_strings))) > 0; ++nr_strings);
if(r < 0)
return r;
nr_strings = 0;
for (;;) {
ret = getlong_user(&r, (void *)(strings + nr_strings));
if (ret < 0)
return ret;
if (r == 0)
break;
++nr_strings;
}
}
/* Count full length */
@ -295,13 +303,19 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
}
for (string_i = 0; string_i < nr_strings; ++string_i) {
char *userp = (char *)getlong_user((void *)(strings + string_i));
int len = strlen_user(userp);
char *userp;
int len;
ret = getlong_user((long *)&userp, (void *)(strings + string_i));
if (ret < 0)
return ret;
len = strlen_user(userp);
if(len < 0)
return len;
// Pointer + actual value
full_len += sizeof(char *) + len + 1;
full_len += sizeof(char *) + len + 1;
}
full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1);
@ -326,8 +340,13 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
}
for (string_i = 0; string_i < nr_strings; ++string_i) {
char *userp = (char *)getlong_user((void *)(strings + string_i));
char *userp;
_flat[n++] = p - (char *)_flat;
ret = getlong_user((long *)&userp, (void *)(strings + string_i));
if (ret < 0)
return ret;
strcpy_from_user(p, userp);
p = strchr(p, '\0') + 1;
}