futex(): spin wait when CPU not oversubscribed and fix lost wake-up bug
This commit is contained in:
@ -2233,30 +2233,28 @@ int strcpy_from_user(char *dst, const char *src)
|
||||
return err;
|
||||
}
|
||||
|
||||
long getlong_user(const long *p)
|
||||
long getlong_user(long *dest, const long *p)
|
||||
{
|
||||
int error;
|
||||
long l;
|
||||
|
||||
error = copy_from_user(&l, p, sizeof(l));
|
||||
error = copy_from_user(dest, p, sizeof(long));
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
return l;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int getint_user(const int *p)
|
||||
int getint_user(int *dest, const int *p)
|
||||
{
|
||||
int error;
|
||||
int i;
|
||||
|
||||
error = copy_from_user(&i, p, sizeof(i));
|
||||
error = copy_from_user(dest, p, sizeof(int));
|
||||
if (error) {
|
||||
return error;
|
||||
}
|
||||
|
||||
return i;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)
|
||||
|
||||
@ -248,9 +248,13 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
|
||||
|
||||
static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
/* RIKEN: futexes are always on not swappable pages */
|
||||
*dest = getint_user((int *)from);
|
||||
|
||||
/*
|
||||
* Officially we should call:
|
||||
* return getint_user((int *)dest, (int *)from);
|
||||
*
|
||||
* but McKernel on x86 can just access user-space.
|
||||
*/
|
||||
*dest = *(volatile uint32_t *)from;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -670,26 +674,32 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
uint64_t timeout)
|
||||
{
|
||||
uint64_t time_remain = 0;
|
||||
unsigned long irqstate;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
/*
|
||||
* The task state is guaranteed to be set before another task can
|
||||
* wake it. set_current_state() is implemented using set_mb() and
|
||||
* queue_me() calls spin_unlock() upon completion, both serializing
|
||||
* access to the hash list and forcing another memory barrier.
|
||||
* wake it.
|
||||
* queue_me() calls spin_unlock() upon completion, serializing
|
||||
* access to the hash list and forcing a memory barrier.
|
||||
*/
|
||||
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
|
||||
barrier();
|
||||
|
||||
/* Indicate spin sleep */
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
thread->spin_sleep = 1;
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
queue_me(q, hb);
|
||||
|
||||
if (!plist_node_empty(&q->list)) {
|
||||
|
||||
/* RIKEN: use mcos timers */
|
||||
if (timeout) {
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
|
||||
time_remain = schedule_timeout(timeout);
|
||||
}
|
||||
else {
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
|
||||
schedule();
|
||||
spin_sleep_or_schedule();
|
||||
time_remain = 0;
|
||||
}
|
||||
|
||||
@ -698,6 +708,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
|
||||
/* This does not need to be serialized */
|
||||
cpu_local_var(current)->status = PS_RUNNING;
|
||||
thread->spin_sleep = 0;
|
||||
|
||||
return time_remain;
|
||||
}
|
||||
@ -744,14 +755,17 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
|
||||
*/
|
||||
q->key = FUTEX_KEY_INIT;
|
||||
ret = get_futex_key(uaddr, fshared, &q->key);
|
||||
if ((ret != 0))
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
*hb = queue_lock(q);
|
||||
|
||||
ret = get_futex_value_locked(&uval, uaddr);
|
||||
|
||||
/* RIKEN: get_futex_value_locked() always returns 0 on mckernel */
|
||||
if (ret) {
|
||||
queue_unlock(q, *hb);
|
||||
put_futex_key(fshared, &q->key);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (uval != val) {
|
||||
queue_unlock(q, *hb);
|
||||
@ -777,8 +791,6 @@ static int futex_wait(uint32_t __user *uaddr, int fshared,
|
||||
q.bitset = bitset;
|
||||
q.requeue_pi_key = NULL;
|
||||
|
||||
/* RIKEN: futex_wait_queue_me() calls schedule_timeout() if timer is set */
|
||||
|
||||
retry:
|
||||
/* Prepare to wait on uaddr. */
|
||||
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
|
||||
|
||||
@ -759,6 +759,7 @@ extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64
|
||||
enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
|
||||
|
||||
void schedule(void);
|
||||
void spin_sleep_or_schedule(void);
|
||||
void runq_add_thread(struct thread *thread, int cpu_id);
|
||||
void runq_del_thread(struct thread *thread, int cpu_id);
|
||||
int sched_wakeup_thread(struct thread *thread, int valid_states);
|
||||
|
||||
@ -2668,6 +2668,65 @@ set_timer()
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: it is assumed that a wait-queue (or futex queue) is
|
||||
* set before calling this function.
|
||||
* NOTE: one must set thread->spin_sleep to 1 before evaluating
|
||||
* the wait condition to avoid lost wake-ups.
|
||||
*/
|
||||
void spin_sleep_or_schedule(void)
|
||||
{
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct cpu_local_var *v;
|
||||
int do_schedule = 0;
|
||||
int woken = 0;
|
||||
long irqstate;
|
||||
|
||||
/* Try to spin sleep */
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
if (thread->spin_sleep == 0) {
|
||||
dkprintf("%s: caught a lost wake-up!\n", __FUNCTION__);
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
for (;;) {
|
||||
/* Check if we need to reschedule */
|
||||
irqstate =
|
||||
ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock));
|
||||
v = get_this_cpu_local_var();
|
||||
|
||||
if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) {
|
||||
do_schedule = 1;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
|
||||
|
||||
/* Check if we were woken up */
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
if (thread->spin_sleep == 0) {
|
||||
woken = 1;
|
||||
}
|
||||
|
||||
/* Indicate that we are not spinning any more */
|
||||
if (do_schedule) {
|
||||
thread->spin_sleep = 0;
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
if (woken) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (do_schedule) {
|
||||
break;
|
||||
}
|
||||
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
schedule();
|
||||
}
|
||||
|
||||
void schedule(void)
|
||||
{
|
||||
struct cpu_local_var *v;
|
||||
@ -2834,7 +2893,6 @@ int
|
||||
sched_wakeup_thread(struct thread *thread, int valid_states)
|
||||
{
|
||||
int status;
|
||||
int spin_slept = 0;
|
||||
unsigned long irqstate;
|
||||
struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id);
|
||||
struct process *proc = thread->proc;
|
||||
@ -2844,29 +2902,23 @@ sched_wakeup_thread(struct thread *thread, int valid_states)
|
||||
proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id());
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock));
|
||||
if (thread->spin_sleep > 0) {
|
||||
if (thread->spin_sleep == 1) {
|
||||
dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n",
|
||||
thread->cpu_id);
|
||||
|
||||
spin_slept = 1;
|
||||
status = 0;
|
||||
}
|
||||
--thread->spin_sleep;
|
||||
thread->spin_sleep = 0;
|
||||
ihk_mc_spinlock_unlock(&(thread->spin_sleep_lock), irqstate);
|
||||
|
||||
if (spin_slept) {
|
||||
return status;
|
||||
}
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
|
||||
if (thread->status & valid_states) {
|
||||
mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock);
|
||||
if(proc->status != PS_EXITED)
|
||||
if (proc->status != PS_EXITED)
|
||||
proc->status = PS_RUNNING;
|
||||
mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock);
|
||||
xchg4((int *)(&thread->status), PS_RUNNING);
|
||||
barrier();
|
||||
status = 0;
|
||||
}
|
||||
else {
|
||||
|
||||
139
kernel/timer.c
139
kernel/timer.c
@ -54,136 +54,75 @@ void init_timers(void)
|
||||
}
|
||||
|
||||
uint64_t schedule_timeout(uint64_t timeout)
|
||||
{
|
||||
struct waitq_entry my_wait;
|
||||
struct timer my_timer;
|
||||
{
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
int irqstate;
|
||||
int spin_sleep;
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout);
|
||||
spin_sleep = ++thread->spin_sleep;
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
long irqstate;
|
||||
|
||||
/* Spin sleep.. */
|
||||
for (;;) {
|
||||
int need_schedule;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
uint64_t t_s = rdtsc();
|
||||
uint64_t t_e;
|
||||
int spin_over = 0;
|
||||
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
|
||||
|
||||
/* Woken up by someone? */
|
||||
if (thread->spin_sleep < 1) {
|
||||
if (thread->spin_sleep == 0) {
|
||||
t_e = rdtsc();
|
||||
|
||||
spin_over = 1;
|
||||
if ((t_e - t_s) < timeout) {
|
||||
timeout -= (t_e - t_s);
|
||||
}
|
||||
else {
|
||||
timeout = 1;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
if (!spin_over) {
|
||||
t_s = rdtsc();
|
||||
int need_schedule;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
need_schedule = v->runq_len > 1 ? 1 : 0;
|
||||
/* Give a chance to another thread (if any) in case the core is
|
||||
* oversubscribed, but make sure we will be re-scheduled */
|
||||
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
need_schedule = v->runq_len > 1 ? 1 : 0;
|
||||
|
||||
if (need_schedule) {
|
||||
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
schedule();
|
||||
|
||||
/* Give a chance to another thread (if any) in case the core is
|
||||
* oversubscribed, but make sure we will be re-scheduled */
|
||||
if (need_schedule) {
|
||||
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
|
||||
schedule();
|
||||
xchg4(&(cpu_local_var(current)->status),
|
||||
PS_INTERRUPTIBLE);
|
||||
}
|
||||
else {
|
||||
/* Spin wait */
|
||||
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (timeout < LOOP_TIMEOUT) {
|
||||
timeout = 0;
|
||||
spin_over = 1;
|
||||
}
|
||||
else {
|
||||
timeout -= LOOP_TIMEOUT;
|
||||
}
|
||||
}
|
||||
/* Recheck if woken */
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
}
|
||||
|
||||
if (spin_over) {
|
||||
dkprintf("schedule_timeout() spin woken up, timeout: %lu\n",
|
||||
timeout);
|
||||
|
||||
/* Give a chance to another thread (if any) in case we timed out,
|
||||
* but make sure we will be re-scheduled */
|
||||
if (timeout == 0) {
|
||||
int need_schedule;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
|
||||
int irqstate =
|
||||
ihk_mc_spinlock_lock(&(v->runq_lock));
|
||||
need_schedule = v->runq_len > 1 ? 1 : 0;
|
||||
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
|
||||
/* Spin wait */
|
||||
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (need_schedule) {
|
||||
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
|
||||
schedule();
|
||||
xchg4(&(cpu_local_var(current)->status),
|
||||
PS_INTERRUPTIBLE);
|
||||
}
|
||||
}
|
||||
|
||||
/* Time out? */
|
||||
if (timeout < LOOP_TIMEOUT) {
|
||||
timeout = 0;
|
||||
|
||||
/* We are not sleeping any more */
|
||||
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
|
||||
if (spin_sleep == thread->spin_sleep) {
|
||||
--thread->spin_sleep;
|
||||
}
|
||||
thread->spin_sleep = 0;
|
||||
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
|
||||
|
||||
return timeout;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
timeout -= LOOP_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
/* Init waitq and wait entry for this timer */
|
||||
my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout;
|
||||
my_timer.thread = cpu_local_var(current);
|
||||
waitq_init(&my_timer.processes);
|
||||
waitq_init_entry(&my_wait, cpu_local_var(current));
|
||||
|
||||
/* Add ourself to the timer queue */
|
||||
ihk_mc_spinlock_lock_noirq(&timers_lock);
|
||||
list_add_tail(&my_timer.list, &timers);
|
||||
|
||||
dkprintf("schedule_timeout() sleep timeout: %lu\n", my_timer.timeout);
|
||||
|
||||
/* Add ourself to the waitqueue and sleep */
|
||||
waitq_prepare_to_wait(&my_timer.processes, &my_wait, PS_INTERRUPTIBLE);
|
||||
ihk_mc_spinlock_unlock_noirq(&timers_lock);
|
||||
schedule();
|
||||
waitq_finish_wait(&my_timer.processes, &my_wait);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&timers_lock);
|
||||
|
||||
/* Waken up by someone else then timeout? */
|
||||
if (my_timer.timeout) {
|
||||
list_del(&my_timer.list);
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&timers_lock);
|
||||
|
||||
dkprintf("schedule_timeout() woken up, timeout: %lu\n",
|
||||
my_timer.timeout);
|
||||
|
||||
return my_timer.timeout;
|
||||
return timeout;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -35,8 +35,8 @@ void *phys_to_virt(unsigned long p);
|
||||
int copy_from_user(void *dst, const void *src, size_t siz);
|
||||
int strlen_user(const char *s);
|
||||
int strcpy_from_user(char *dst, const char *src);
|
||||
long getlong_user(const long *p);
|
||||
int getint_user(const int *p);
|
||||
long getlong_user(long *dest, const long *p);
|
||||
int getint_user(int *dest, const int *p);
|
||||
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz);
|
||||
int copy_to_user(void *dst, const void *src, size_t siz);
|
||||
int setlong_user(long *dst, long data);
|
||||
|
||||
35
lib/string.c
35
lib/string.c
@ -275,13 +275,21 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
|
||||
long *_flat;
|
||||
char *p;
|
||||
long r;
|
||||
int n;
|
||||
int n, ret;
|
||||
|
||||
/* How many strings do we have? */
|
||||
if (nr_strings == -1) {
|
||||
for (nr_strings = 0; (r = getlong_user((void *)(strings + nr_strings))) > 0; ++nr_strings);
|
||||
if(r < 0)
|
||||
return r;
|
||||
nr_strings = 0;
|
||||
for (;;) {
|
||||
ret = getlong_user(&r, (void *)(strings + nr_strings));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (r == 0)
|
||||
break;
|
||||
|
||||
++nr_strings;
|
||||
}
|
||||
}
|
||||
|
||||
/* Count full length */
|
||||
@ -295,13 +303,19 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
|
||||
}
|
||||
|
||||
for (string_i = 0; string_i < nr_strings; ++string_i) {
|
||||
char *userp = (char *)getlong_user((void *)(strings + string_i));
|
||||
int len = strlen_user(userp);
|
||||
char *userp;
|
||||
int len;
|
||||
|
||||
ret = getlong_user((long *)&userp, (void *)(strings + string_i));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
len = strlen_user(userp);
|
||||
|
||||
if(len < 0)
|
||||
return len;
|
||||
// Pointer + actual value
|
||||
full_len += sizeof(char *) + len + 1;
|
||||
full_len += sizeof(char *) + len + 1;
|
||||
}
|
||||
|
||||
full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1);
|
||||
@ -326,8 +340,13 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char
|
||||
}
|
||||
|
||||
for (string_i = 0; string_i < nr_strings; ++string_i) {
|
||||
char *userp = (char *)getlong_user((void *)(strings + string_i));
|
||||
char *userp;
|
||||
_flat[n++] = p - (char *)_flat;
|
||||
|
||||
ret = getlong_user((long *)&userp, (void *)(strings + string_i));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
strcpy_from_user(p, userp);
|
||||
p = strchr(p, '\0') + 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user