futex(): spin wait when CPU not oversubscribed and fix lost wake-up bug

This commit is contained in:
Balazs Gerofi
2017-01-12 18:50:21 +09:00
parent d646c2a4b9
commit bd47b909bf
7 changed files with 163 additions and 142 deletions

View File

@ -2233,30 +2233,28 @@ int strcpy_from_user(char *dst, const char *src)
return err; return err;
} }
long getlong_user(const long *p) long getlong_user(long *dest, const long *p)
{ {
int error; int error;
long l;
error = copy_from_user(&l, p, sizeof(l)); error = copy_from_user(dest, p, sizeof(long));
if (error) { if (error) {
return error; return error;
} }
return l; return 0;
} }
int getint_user(const int *p) int getint_user(int *dest, const int *p)
{ {
int error; int error;
int i;
error = copy_from_user(&i, p, sizeof(i)); error = copy_from_user(dest, p, sizeof(int));
if (error) { if (error) {
return error; return error;
} }
return i; return 0;
} }
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz) int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)

View File

@ -248,9 +248,13 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
static int get_futex_value_locked(uint32_t *dest, uint32_t *from) static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
{ {
/* RIKEN: futexes are always on not swappable pages */ /*
*dest = getint_user((int *)from); * Officially we should call:
* return getint_user((int *)dest, (int *)from);
*
* but McKernel on x86 can just access user-space.
*/
*dest = *(volatile uint32_t *)from;
return 0; return 0;
} }
@ -670,26 +674,32 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
uint64_t timeout) uint64_t timeout)
{ {
uint64_t time_remain = 0; uint64_t time_remain = 0;
unsigned long irqstate;
struct thread *thread = cpu_local_var(current);
/* /*
* The task state is guaranteed to be set before another task can * The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using set_mb() and * wake it.
* queue_me() calls spin_unlock() upon completion, both serializing * queue_me() calls spin_unlock() upon completion, serializing
* access to the hash list and forcing another memory barrier. * access to the hash list and forcing a memory barrier.
*/ */
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE); xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
barrier();
/* Indicate spin sleep */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
thread->spin_sleep = 1;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
queue_me(q, hb); queue_me(q, hb);
if (!plist_node_empty(&q->list)) { if (!plist_node_empty(&q->list)) {
/* RIKEN: use mcos timers */
if (timeout) { if (timeout) {
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid); dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
time_remain = schedule_timeout(timeout); time_remain = schedule_timeout(timeout);
} }
else { else {
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid); dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
schedule(); spin_sleep_or_schedule();
time_remain = 0; time_remain = 0;
} }
@ -698,6 +708,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
/* This does not need to be serialized */ /* This does not need to be serialized */
cpu_local_var(current)->status = PS_RUNNING; cpu_local_var(current)->status = PS_RUNNING;
thread->spin_sleep = 0;
return time_remain; return time_remain;
} }
@ -744,14 +755,17 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
*/ */
q->key = FUTEX_KEY_INIT; q->key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q->key); ret = get_futex_key(uaddr, fshared, &q->key);
if ((ret != 0)) if (ret != 0)
return ret; return ret;
*hb = queue_lock(q); *hb = queue_lock(q);
ret = get_futex_value_locked(&uval, uaddr); ret = get_futex_value_locked(&uval, uaddr);
if (ret) {
/* RIKEN: get_futex_value_locked() always returns 0 on mckernel */ queue_unlock(q, *hb);
put_futex_key(fshared, &q->key);
return ret;
}
if (uval != val) { if (uval != val) {
queue_unlock(q, *hb); queue_unlock(q, *hb);
@ -777,8 +791,6 @@ static int futex_wait(uint32_t __user *uaddr, int fshared,
q.bitset = bitset; q.bitset = bitset;
q.requeue_pi_key = NULL; q.requeue_pi_key = NULL;
/* RIKEN: futex_wait_queue_me() calls schedule_timeout() if timer is set */
retry: retry:
/* Prepare to wait on uaddr. */ /* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);

View File

@ -759,6 +759,7 @@ extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64
enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep); enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
void schedule(void); void schedule(void);
void spin_sleep_or_schedule(void);
void runq_add_thread(struct thread *thread, int cpu_id); void runq_add_thread(struct thread *thread, int cpu_id);
void runq_del_thread(struct thread *thread, int cpu_id); void runq_del_thread(struct thread *thread, int cpu_id);
int sched_wakeup_thread(struct thread *thread, int valid_states); int sched_wakeup_thread(struct thread *thread, int valid_states);

View File

@ -2668,6 +2668,65 @@ set_timer()
} }
} }
/*
* NOTE: it is assumed that a wait-queue (or futex queue) is
* set before calling this function.
* NOTE: one must set thread->spin_sleep to 1 before evaluating
* the wait condition to avoid lost wake-ups.
*/
void spin_sleep_or_schedule(void)
{
struct thread *thread = cpu_local_var(current);
struct cpu_local_var *v;
int do_schedule = 0;
int woken = 0;
long irqstate;
/* Try to spin sleep */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (thread->spin_sleep == 0) {
dkprintf("%s: caught a lost wake-up!\n", __FUNCTION__);
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
for (;;) {
/* Check if we need to reschedule */
irqstate =
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
v = get_this_cpu_local_var();
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
do_schedule = 1;
}
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
/* Check if we were woken up */
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (thread->spin_sleep == 0) {
woken = 1;
}
/* Indicate that we are not spinning any more */
if (do_schedule) {
thread->spin_sleep = 0;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
if (woken) {
return;
}
if (do_schedule) {
break;
}
cpu_pause();
}
schedule();
}
void schedule(void) void schedule(void)
{ {
struct cpu_local_var *v; struct cpu_local_var *v;
@ -2834,7 +2893,6 @@ int
sched_wakeup_thread(struct thread *thread, int valid_states) sched_wakeup_thread(struct thread *thread, int valid_states)
{ {
int status; int status;
int spin_slept = 0;
unsigned long irqstate; unsigned long irqstate;
struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id); struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id);
struct process *proc = thread->proc; struct process *proc = thread->proc;
@ -2844,29 +2902,23 @@ sched_wakeup_thread(struct thread *thread, int valid_states)
proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id()); proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id());
irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock)); irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock));
if (thread->spin_sleep > 0) { if (thread->spin_sleep == 1) {
dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n", dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n",
thread->cpu_id); thread->cpu_id);
spin_slept = 1;
status = 0; status = 0;
} }
--thread->spin_sleep; thread->spin_sleep = 0;
ihk_mc_spinlock_unlock(&(thread->spin_sleep_lock), irqstate); ihk_mc_spinlock_unlock(&(thread->spin_sleep_lock), irqstate);
if (spin_slept) {
return status;
}
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
if (thread->status & valid_states) { if (thread->status & valid_states) {
mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock); mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock);
if(proc->status != PS_EXITED) if (proc->status != PS_EXITED)
proc->status = PS_RUNNING; proc->status = PS_RUNNING;
mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock);
xchg4((int *)(&thread->status), PS_RUNNING); xchg4((int *)(&thread->status), PS_RUNNING);
barrier();
status = 0; status = 0;
} }
else { else {

View File

@ -54,136 +54,75 @@ void init_timers(void)
} }
uint64_t schedule_timeout(uint64_t timeout) uint64_t schedule_timeout(uint64_t timeout)
{ {
struct waitq_entry my_wait;
struct timer my_timer;
struct thread *thread = cpu_local_var(current); struct thread *thread = cpu_local_var(current);
int irqstate; long irqstate;
int spin_sleep;
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout);
spin_sleep = ++thread->spin_sleep;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
/* Spin sleep.. */ /* Spin sleep.. */
for (;;) { for (;;) {
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
uint64_t t_s = rdtsc(); uint64_t t_s = rdtsc();
uint64_t t_e; uint64_t t_e;
int spin_over = 0;
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
/* Woken up by someone? */ /* Woken up by someone? */
if (thread->spin_sleep < 1) { if (thread->spin_sleep == 0) {
t_e = rdtsc(); t_e = rdtsc();
spin_over = 1;
if ((t_e - t_s) < timeout) { if ((t_e - t_s) < timeout) {
timeout -= (t_e - t_s); timeout -= (t_e - t_s);
} }
else { else {
timeout = 1; timeout = 1;
} }
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
break;
} }
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
if (!spin_over) { /* Give a chance to another thread (if any) in case the core is
t_s = rdtsc(); * oversubscribed, but make sure we will be re-scheduled */
int need_schedule; irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
struct cpu_local_var *v = get_this_cpu_local_var(); need_schedule = v->runq_len > 1 ? 1 : 0;
int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0; if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
schedule();
/* Give a chance to another thread (if any) in case the core is /* Recheck if woken */
* oversubscribed, but make sure we will be re-scheduled */ continue;
if (need_schedule) { }
xchg4(&(cpu_local_var(current)->status), PS_RUNNING); else {
schedule(); ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
else {
/* Spin wait */
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
spin_over = 1;
}
else {
timeout -= LOOP_TIMEOUT;
}
}
} }
if (spin_over) {
dkprintf("schedule_timeout() spin woken up, timeout: %lu\n",
timeout);
/* Give a chance to another thread (if any) in case we timed out,
* but make sure we will be re-scheduled */
if (timeout == 0) {
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate = /* Spin wait */
ihk_mc_spinlock_lock(&(v->runq_lock)); while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
need_schedule = v->runq_len > 1 ? 1 : 0; cpu_pause();
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); }
if (need_schedule) { /* Time out? */
xchg4(&(cpu_local_var(current)->status), PS_RUNNING); if (timeout < LOOP_TIMEOUT) {
schedule(); timeout = 0;
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE); /* We are not sleeping any more */
}
}
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (spin_sleep == thread->spin_sleep) { thread->spin_sleep = 0;
--thread->spin_sleep;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
return timeout; break;
}
else {
timeout -= LOOP_TIMEOUT;
} }
} }
/* Init waitq and wait entry for this timer */ return timeout;
my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout;
my_timer.thread = cpu_local_var(current);
waitq_init(&my_timer.processes);
waitq_init_entry(&my_wait, cpu_local_var(current));
/* Add ourself to the timer queue */
ihk_mc_spinlock_lock_noirq(&timers_lock);
list_add_tail(&my_timer.list, &timers);
dkprintf("schedule_timeout() sleep timeout: %lu\n", my_timer.timeout);
/* Add ourself to the waitqueue and sleep */
waitq_prepare_to_wait(&my_timer.processes, &my_wait, PS_INTERRUPTIBLE);
ihk_mc_spinlock_unlock_noirq(&timers_lock);
schedule();
waitq_finish_wait(&my_timer.processes, &my_wait);
ihk_mc_spinlock_lock_noirq(&timers_lock);
/* Waken up by someone else then timeout? */
if (my_timer.timeout) {
list_del(&my_timer.list);
}
ihk_mc_spinlock_unlock_noirq(&timers_lock);
dkprintf("schedule_timeout() woken up, timeout: %lu\n",
my_timer.timeout);
return my_timer.timeout;
} }

View File

@ -35,8 +35,8 @@ void *phys_to_virt(unsigned long p);
int copy_from_user(void *dst, const void *src, size_t siz); int copy_from_user(void *dst, const void *src, size_t siz);
int strlen_user(const char *s); int strlen_user(const char *s);
int strcpy_from_user(char *dst, const char *src); int strcpy_from_user(char *dst, const char *src);
long getlong_user(const long *p); long getlong_user(long *dest, const long *p);
int getint_user(const int *p); int getint_user(int *dest, const int *p);
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz); int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz);
int copy_to_user(void *dst, const void *src, size_t siz); int copy_to_user(void *dst, const void *src, size_t siz);
int setlong_user(long *dst, long data); int setlong_user(long *dst, long data);

View File

@ -275,13 +275,21 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
long *_flat; long *_flat;
char *p; char *p;
long r; long r;
int n; int n, ret;
/* How many strings do we have? */ /* How many strings do we have? */
if (nr_strings == -1) { if (nr_strings == -1) {
for (nr_strings = 0; (r = getlong_user((void *)(strings + nr_strings))) > 0; ++nr_strings); nr_strings = 0;
if(r < 0) for (;;) {
return r; ret = getlong_user(&r, (void *)(strings + nr_strings));
if (ret < 0)
return ret;
if (r == 0)
break;
++nr_strings;
}
} }
/* Count full length */ /* Count full length */
@ -295,13 +303,19 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
} }
for (string_i = 0; string_i < nr_strings; ++string_i) { for (string_i = 0; string_i < nr_strings; ++string_i) {
char *userp = (char *)getlong_user((void *)(strings + string_i)); char *userp;
int len = strlen_user(userp); int len;
ret = getlong_user((long *)&userp, (void *)(strings + string_i));
if (ret < 0)
return ret;
len = strlen_user(userp);
if(len < 0) if(len < 0)
return len; return len;
// Pointer + actual value // Pointer + actual value
full_len += sizeof(char *) + len + 1; full_len += sizeof(char *) + len + 1;
} }
full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1); full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1);
@ -326,8 +340,13 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
} }
for (string_i = 0; string_i < nr_strings; ++string_i) { for (string_i = 0; string_i < nr_strings; ++string_i) {
char *userp = (char *)getlong_user((void *)(strings + string_i)); char *userp;
_flat[n++] = p - (char *)_flat; _flat[n++] = p - (char *)_flat;
ret = getlong_user((long *)&userp, (void *)(strings + string_i));
if (ret < 0)
return ret;
strcpy_from_user(p, userp); strcpy_from_user(p, userp);
p = strchr(p, '\0') + 1; p = strchr(p, '\0') + 1;
} }