Files
mckernel/kernel/futex.c
2012-12-17 16:10:56 +09:00

472 lines
12 KiB
C

/*
* Kitten LWK futex code adaptation.
* Copyright (c) 2012 RIKEN AICS
*/
/*
* Copyright (c) 2008 Sandia National Laboratories
*
* Futex code adapted from Linux 2.6.27.9, original copyright below.
* Simplified to only support address-space (process-private) futexes.
* Removed demand-paging, cow, etc. complications since LWK doesn't
* require these.
*/
/*
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
*
* Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
* (C) Copyright 2003 Red Hat Inc, All Rights Reserved
*
* Removed page pinning, fix privately mapped COW pages and other cleanups
* (C) Copyright 2003, 2004 Jamie Lokier
*
* Robust futex support started by Ingo Molnar
* (C) Copyright 2006 Red Hat Inc, All Rights Reserved
* Thanks to Thomas Gleixner for suggestions, analysis and fixes.
*
* PI-futex support started by Ingo Molnar and Thomas Gleixner
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* PRIVATE futexes by Eric Dumazet
* Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
*
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
*
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <process.h>
#include <futex.h>
#include <hash.h>
#include <ihk/lock.h>
#include <list.h>
#include <cls.h>
#include <kmsg.h>
#include <timer.h>
#if 0
#include <lwk/kernel.h>
#include <lwk/task.h>
#include <lwk/aspace.h>
#include <lwk/futex.h>
#include <lwk/hash.h>
#include <lwk/sched.h>
#ifdef __UACCESS__
#include <arch/uaccess.h>
#endif
#endif
void futex_queue_init(struct futex_queue *queue)
{
aal_mc_spinlock_init(&queue->lock);
INIT_LIST_HEAD(&queue->futex_list);
}
static int uaddr_is_valid(uint32_t __user *uaddr)
{
#ifdef __UACCESS__
return access_ok(VERIFY_WRITE, uaddr, sizeof(uint32_t));
#else
return 1;
#endif
}
static int futex_init(struct futex *futex, uint32_t __user *uaddr,
uint32_t bitset)
{
if (!uaddr_is_valid(uaddr))
return -EINVAL;
futex->uaddr = uaddr;
futex->bitset = bitset;
waitq_init(&futex->waitq);
return 0;
}
static struct futex_queue *get_queue(uint32_t __user *uaddr)
{
uint64_t hash = hash_64((uint64_t)uaddr, FUTEX_HASHBITS);
return &cpu_local_var(current)->vm->futex_queues[hash];
}
static struct futex_queue *queue_lock(struct futex *futex, int *irqflags)
{
struct futex_queue *queue = get_queue(futex->uaddr);
futex->lock_ptr = &queue->lock;
*irqflags = aal_mc_spinlock_lock(&queue->lock);
return queue;
}
static void queue_unlock(struct futex_queue *futex_queue, int irqflags)
{
aal_mc_spinlock_unlock(&futex_queue->lock, irqflags);
}
static void queue_me(struct futex *futex, struct futex_queue *futex_queue)
{
list_add_tail(&futex->link, &futex_queue->futex_list);
}
static int unqueue_me(struct futex *futex)
{
aal_spinlock_t *lock_ptr;
int irqflags;
int status = 0;
/* In the common case we don't take the spinlock, which is nice. */
retry:
lock_ptr = futex->lock_ptr;
barrier();
if (lock_ptr != NULL) {
irqflags = aal_mc_spinlock_lock(lock_ptr);
/*
* q->lock_ptr can change between reading it and
* spin_lock(), causing us to take the wrong lock. This
* corrects the race condition.
*
* Reasoning goes like this: if we have the wrong lock,
* q->lock_ptr must have changed (maybe several times)
* between reading it and the spin_lock(). It can
* change again after the spin_lock() but only if it was
* already changed before the spin_lock(). It cannot,
* however, change back to the original value. Therefore
* we can detect whether we acquired the correct lock.
*/
if (lock_ptr != futex->lock_ptr) {
aal_mc_spinlock_unlock(lock_ptr, irqflags);
goto retry;
}
//WARN_ON(list_empty(&futex->link));
list_del(&futex->link);
aal_mc_spinlock_unlock(lock_ptr, irqflags);
status = 1;
}
return status;
}
static void lock_two_queues(struct futex_queue *queue1, int *irqflags1,
struct futex_queue *queue2, int *irqflags2)
{
if (queue1 < queue2)
*irqflags1 = aal_mc_spinlock_lock(&queue1->lock);
*irqflags2 = aal_mc_spinlock_lock(&queue2->lock);
if (queue1 > queue2)
*irqflags1 = aal_mc_spinlock_lock(&queue1->lock);
}
static void unlock_two_queues(struct futex_queue *queue1, int irqflags1,
struct futex_queue *queue2, int irqflags2)
{
if (queue1 == queue2) {
aal_mc_spinlock_unlock(&queue2->lock, irqflags2);
}
else {
aal_mc_spinlock_unlock(&queue2->lock, irqflags2);
aal_mc_spinlock_unlock(&queue1->lock, irqflags1);
}
}
/** Puts a task to sleep waiting on a futex. */
static int futex_wait(uint32_t __user *uaddr, uint32_t val,
uint64_t timeout, uint32_t bitset)
{
DECLARE_WAITQ_ENTRY(wait, cpu_local_var(current));
int status;
uint32_t uval;
struct futex futex;
struct futex_queue *queue;
int irqflags;
uint64_t time_remain = 0;
if (!bitset)
return -EINVAL;
/* This verifies that uaddr is sane */
if ((status = futex_init(&futex, uaddr, bitset)) != 0)
return status;
/* Lock the futex queue corresponding to uaddr */
queue = queue_lock(&futex, &irqflags);
/* Get the value from user-space. Since we don't have
* paging, the only options are for this to succeed (with no
* page faults) or fail, returning -EFAULT. There is no way
* for us to be put to sleep, so holding the queue's spinlock
* is fine. */
#ifdef __UACCESS__
if ((status = get_user(uval, uaddr)) != 0)
goto error;
#else
uval = *uaddr;
status = 0;
#endif
/* The user-space value must match the value passed in */
if (uval != val) {
status = -EWOULDBLOCK;
goto error;
}
/* Add ourself to the futex's waitq and go to sleep */
cpu_local_var(current)->status = PS_INTERRUPTIBLE;
waitq_add_entry(&futex.waitq, &wait);
/* Add ourself to the futex queue and drop our lock on it */
queue_me(&futex, queue);
queue_unlock(queue, irqflags);
if (!list_empty(&futex.link)) {
if (timeout) {
time_remain = schedule_timeout(timeout);
}
else {
schedule();
time_remain = 0;
}
}
cpu_local_var(current)->status = PS_RUNNING;
/*
* NOTE: We don't remove ourself from the waitq because
* we are the only user of it.
*/
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!unqueue_me(&futex))
return 0;
if (time_remain == 0)
return -ETIMEDOUT;
/* We expect that there is a signal pending, but another thread
* may have handled it for us already. */
return -EINTR;
error:
queue_unlock(queue, irqflags);
return status;
}
/*
* The futex_queue's lock must be held when this is called.
* Afterwards, the futex_queue must not be accessed.
*/
static void wake_futex(struct futex *futex)
{
list_del_init(&futex->link);
/*
* The lock in waitq_wakeup() is a crucial memory barrier after the
* list_del_init() and also before assigning to futex->lock_ptr.
*/
waitq_wakeup(&futex->waitq);
/*
* The waiting task can free the futex as soon as this is written,
* without taking any locks. This must come last.
*
* A memory barrier is required here to prevent the following store
* to lock_ptr from getting ahead of the wakeup. Clearing the lock
* at the end of waitq_wakeup() does not prevent this store from
* moving.
*/
barrier();
futex->lock_ptr = NULL;
}
/** Wakes up nr_wake tasks waiting on a futex. */
static int futex_wake(uint32_t __user *uaddr, int nr_wake, uint32_t bitset)
{
struct futex_queue *queue;
struct list_head *head;
struct futex *this, *next;
int nr_woke = 0;
int irqflags;
if (!bitset)
return -EINVAL;
if (!uaddr_is_valid(uaddr))
return -EINVAL;
queue = get_queue(uaddr);
irqflags = aal_mc_spinlock_lock(&queue->lock);
head = &queue->futex_list;
list_for_each_entry_safe(this, next, head, link) {
if ((this->uaddr == uaddr) && (this->bitset & bitset)) {
wake_futex(this);
if (++nr_woke >= nr_wake)
break;
}
}
aal_mc_spinlock_unlock(&queue->lock, irqflags);
return nr_woke;
}
/** Conditionally wakes up tasks that are waiting on futexes. */
static int futex_wake_op(uint32_t __user *uaddr1, uint32_t __user *uaddr2,
int nr_wake1, int nr_wake2, int op)
{
struct futex_queue *queue1, *queue2;
int irqflags1 = 0;
int irqflags2 = 0;
struct list_head *head;
struct futex *this, *next;
int op_result, nr_woke1 = 0, nr_woke2 = 0;
if (!uaddr_is_valid(uaddr1) || !uaddr_is_valid(uaddr2))
return -EINVAL;
queue1 = get_queue(uaddr1);
queue2 = get_queue(uaddr2);
lock_two_queues(queue1, &irqflags1, queue2, &irqflags2);
op_result = futex_atomic_op_inuser(op, (int *)uaddr2);
if (op_result < 0) {
unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
return op_result;
}
head = &queue1->futex_list;
list_for_each_entry_safe(this, next, head, link) {
if (this->uaddr == uaddr1) {
wake_futex(this);
if (++nr_woke1 >= nr_wake1)
break;
}
}
if (op_result > 0) {
head = &queue2->futex_list;
list_for_each_entry_safe(this, next, head, link) {
if (this->uaddr == uaddr2) {
wake_futex(this);
if (++nr_woke2 >= nr_wake2)
break;
}
}
}
unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
return nr_woke1 + nr_woke2;
}
/** Conditionally wakes up or requeues tasks that are waiting on futexes. */
static int futex_cmp_requeue(uint32_t __user *uaddr1, uint32_t __user *uaddr2,
int nr_wake, int nr_requeue, uint32_t cmpval)
{
struct futex_queue *queue1, *queue2;
int irqflags1, irqflags2;
struct list_head *head1, *head2;
struct futex *this, *next;
uint32_t curval;
int status, nr_woke = 0;
if (!uaddr_is_valid(uaddr1) || !uaddr_is_valid(uaddr2))
return -EINVAL;
queue1 = get_queue(uaddr1);
queue2 = get_queue(uaddr2);
lock_two_queues(queue1, &irqflags1, queue2, &irqflags2);
#ifdef __UACCESS__
if ((status = get_user(curval, uaddr1)) != 0)
goto out_unlock;
#else
curval = *uaddr1;
status = 0;
#endif
if (curval != cmpval) {
status = -EAGAIN;
goto out_unlock;
}
head1 = &queue1->futex_list;
head2 = &queue2->futex_list;
list_for_each_entry_safe(this, next, head1, link) {
if (this->uaddr != uaddr1)
continue;
if (++nr_woke <= nr_wake) {
wake_futex(this);
} else {
/* If uaddr1 and uaddr2 hash to the
* same futex queue, no need to requeue */
if (head1 != head2) {
list_move_tail(&this->link, head2);
this->lock_ptr = &queue2->lock;
}
this->uaddr = uaddr2;
if (nr_woke - nr_wake >= nr_requeue)
break;
}
}
status = nr_woke;
out_unlock:
unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
return status;
}
int futex(uint32_t __user *uaddr, int op, uint32_t val, uint64_t timeout,
uint32_t __user *uaddr2, uint32_t val2, uint32_t val3)
{
int status;
switch (op) {
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET:
status = futex_wait(uaddr, val, timeout, val3);
break;
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAKE_BITSET:
status = futex_wake(uaddr, val, val3);
break;
case FUTEX_WAKE_OP:
status = futex_wake_op(uaddr, uaddr2, val, val2, val3);
break;
case FUTEX_CMP_REQUEUE:
status = futex_cmp_requeue(uaddr, uaddr2, val, val2, val3);
break;
default:
kprintf("sys_futex() op=%d not supported (pid: )\n",
op, &cpu_local_var(current)->pid);
status = -ENOSYS;
}
return status;
}