From 83db56a040c7d66342f8b061c6bb4ecda37b59ac Mon Sep 17 00:00:00 2001
From: "Balazs Gerofi bgerofi@riken.jp" <bgerofi@kncc-login1.(none)>
Date: Fri, 10 May 2013 14:23:14 +0900
Subject: [PATCH] futex adaptation from Linux 2.6.34 (Intel MPSS Linux)

---
 kernel/Makefile.build    |    2 +-
 kernel/futex.c           | 1048 ++++++++++++++++++++++++++------------
 kernel/include/futex.h   |   80 ++-
 kernel/include/jhash.h   |  145 ++++++
 kernel/include/plist.h   |  273 ++++++++++
 kernel/include/process.h |    3 -
 kernel/init.c            |    2 +
 kernel/plist.c           |  123 +++++
 kernel/process.c         |    9 +-
 kernel/syscall.c         |   62 +--
 10 files changed, 1341 insertions(+), 406 deletions(-)
 create mode 100644 kernel/include/jhash.h
 create mode 100644 kernel/include/plist.h
 create mode 100644 kernel/plist.c

diff --git a/kernel/Makefile.build b/kernel/Makefile.build
index 10a6c84c..36bf10b2 100644
--- a/kernel/Makefile.build
+++ b/kernel/Makefile.build
@@ -1,6 +1,6 @@
 IHKDIR=$(IHKBASE)/$(TARGETDIR)
 OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
-OBJS += process.o copy.o waitq.o futex.o timer.o
+OBJS += process.o copy.o waitq.o futex.o timer.o plist.o
 DEPSRCS=$(wildcard $(SRC)/*.c)
 
 CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
diff --git a/kernel/futex.c b/kernel/futex.c
index 91e4dd50..19fc874e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1,15 +1,7 @@
 /*
- * Kitten LWK futex code adaptation.
- * Copyright (c) 2012 RIKEN AICS
- */
-
-/*
- * Copyright (c) 2008 Sandia National Laboratories
- *
- * Futex code adapted from Linux 2.6.27.9, original copyright below.
- * Simplified to only support address-space (process-private) futexes.
- * Removed demand-paging, cow, etc. complications since LWK doesn't
- * require these.
+ * Linux futex adaptation.
+ * (C) Copyright 2013 RIKEN AICS
+ * Balazs Gerofi <bgerofi@riken.jp>
  */
 
 /*
@@ -33,6 +25,10 @@
  *  PRIVATE futexes by Eric Dumazet
  *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
  *
+ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
+ *  Copyright (C) IBM Corporation, 2009
+ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -57,90 +53,555 @@
 
 #include <process.h>
 #include <futex.h>
-#include <hash.h>
+#include <jhash.h>
 #include <ihk/lock.h>
+#include <ihk/atomic.h>
 #include <list.h>
+#include <plist.h>
 #include <cls.h>
 #include <kmsg.h>
 #include <timer.h>
 
-#if 0
-#include <lwk/kernel.h>
-#include <lwk/task.h>
-#include <lwk/aspace.h>
-#include <lwk/futex.h>
-#include <lwk/hash.h>
-#include <lwk/sched.h>
+//#define DEBUG_PRINT_FUTEX
 
-#ifdef __UACCESS__
-#include <arch/uaccess.h>
-#endif
-
-#endif
-
-void futex_queue_init(struct futex_queue *queue)
-{
-	ihk_mc_spinlock_init(&queue->lock);
-	INIT_LIST_HEAD(&queue->futex_list);
-}
-
-static int uaddr_is_valid(uint32_t __user *uaddr)
-{
-#ifdef __UACCESS__
-	return access_ok(VERIFY_WRITE, uaddr, sizeof(uint32_t));
+#ifdef DEBUG_PRINT_FUTEX
+#define dkprintf kprintf
 #else
-	return 1;
+#define dkprintf(...)
 #endif
+
+int futex_cmpxchg_enabled;
+
+/**
+ * struct futex_q - The hashed futex queue entry, one per waiting task
+ * @task:		the task waiting on the futex
+ * @lock_ptr:		the hash bucket lock
+ * @key:		the key the futex is hashed on
+ * @requeue_pi_key:	the requeue_pi target futex key
+ * @bitset:		bitset for the optional bitmasked wakeup
+ *
+ * We use this hashed waitqueue, instead of a normal wait_queue_t, so
+ * we can wake only the relevant ones (hashed queues may be shared).
+ *
+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
+ * The order of wakup is always to make the first condition true, then
+ * the second.
+ *
+ * PI futexes are typically woken before they are removed from the hash list via
+ * the rt_mutex code. See unqueue_me_pi().
+ */
+struct futex_q {
+	struct plist_node list;
+
+	struct process *task;
+	ihk_spinlock_t *lock_ptr;
+	union futex_key key;
+	union futex_key *requeue_pi_key;
+	uint32_t bitset;
+};
+
+/*
+ * Hash buckets are shared by all the futex_keys that hash to the same
+ * location.  Each key may have multiple futex_q structures, one for each task
+ * waiting on a futex.
+ */
+struct futex_hash_bucket {
+	ihk_spinlock_t lock;
+	struct plist_head chain;
+};
+
+static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
+
+/*
+ * We hash on the keys returned from get_futex_key (see below).
+ */
+static struct futex_hash_bucket *hash_futex(union futex_key *key)
+{
+	uint32_t hash = jhash2((uint32_t*)&key->both.word,
+			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+			  key->both.offset);
+	return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
 }
 
-static int futex_init(struct futex *futex, uint32_t __user *uaddr,
-                      uint32_t bitset)
+/*
+ * Return 1 if two futex_keys are equal, 0 otherwise.
+ */
+static inline int match_futex(union futex_key *key1, union futex_key *key2)
 {
-	if (!uaddr_is_valid(uaddr))
-		return -EINVAL;
+	return (key1 && key2
+		&& key1->both.word == key2->both.word
+		&& key1->both.ptr == key2->both.ptr
+		&& key1->both.offset == key2->both.offset);
+}
+
+/*
+ * Take a reference to the resource addressed by a key.
+ * Can be called while holding spinlocks.
+ *
+ */
+static void get_futex_key_refs(union futex_key *key)
+{
+	/* RIKEN: only !fshared futexes... */
+	return;
+}
+
+/*
+ * Drop a reference to the resource addressed by a key.
+ * The hash bucket spinlock must not be held.
+ */
+static void drop_futex_key_refs(union futex_key *key)
+{
+	/* RIKEN: only !fshared futexes... */
+	return;
+}
+/**
+ * get_futex_key() - Get parameters which are the keys for a futex
+ * @uaddr:	virtual address of the futex
+ * @fshared:	0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+ * @key:	address where result is stored.
+ *
+ * Returns a negative error code or 0
+ * The key words are stored in *key on success.
+ *
+ * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
+ * offset_within_page).  For private mappings, it's (uaddr, current->mm).
+ * We can usually work out the index without swapping in the page.
+ *
+ * lock_page() might sleep, the caller should not hold a spinlock.
+ */
+static int
+get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
+{
+	unsigned long address = (unsigned long)uaddr;
+	struct process_vm *mm = cpu_local_var(current)->vm;
+
+	/*
+	 * The futex address must be "naturally" aligned.
+	 */
+	key->both.offset = address % PAGE_SIZE;
+	if (((address % sizeof(uint32_t)) != 0))
+		return -EINVAL;
+	address -= key->both.offset;
+
+	/*
+	 * PROCESS_PRIVATE futexes are fast.
+	 * As the mm cannot disappear under us and the 'key' only needs
+	 * virtual address, we dont even have to find the underlying vma.
+	 * Note : We do have to check 'uaddr' is a valid user address,
+	 *        but access_ok() should be faster than find_vma()
+	 */
+	if (!fshared) {
+
+		key->private.mm = mm;
+		key->private.address = address;
+		get_futex_key_refs(key);
+		return 0;
+	}
+
+	/* RIKEN: No shared futex support... */
+	return -EFAULT;
+}
+
+
+static inline
+void put_futex_key(int fshared, union futex_key *key)
+{
+	drop_futex_key_refs(key);
+}
+
+static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uint32_t newval)
+{
+	int curval;
+
+	/* RIKEN: futexes are on not swappable memory */
+	curval = futex_atomic_cmpxchg_inatomic((int*)uaddr, (int)uval, (int)newval);
+
+	return curval;
+}
+
+static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
+{
+	/* RIKEN: futexes are always on not swappable pages */
+	*dest = *from;
 
-	futex->uaddr = uaddr;
-	futex->bitset = bitset;
-	waitq_init(&futex->waitq);
 	return 0;
 }
 
-static struct futex_queue *get_queue(uint32_t __user *uaddr)
+/*
+ * The hash bucket lock must be held when this is called.
+ * Afterwards, the futex_q must not be accessed.
+ */
+static void wake_futex(struct futex_q *q)
 {
-	uint64_t hash = hash_64((uint64_t)uaddr, FUTEX_HASHBITS);
-	return &cpu_local_var(current)->vm->futex_queues[hash];
+	struct process *p = q->task;
+
+	/*
+	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
+	 * a non futex wake up happens on another CPU then the task
+	 * might exit and p would dereference a non existing task
+	 * struct. Prevent this by holding a reference on p across the
+	 * wake up.
+	 */
+
+	plist_del(&q->list, &q->list.plist);
+	/*
+	 * The waiting task can free the futex_q as soon as
+	 * q->lock_ptr = NULL is written, without taking any locks. A
+	 * memory barrier is required here to prevent the following
+	 * store to lock_ptr from getting ahead of the plist_del.
+	 */
+	barrier();
+	q->lock_ptr = NULL;
+
+	sched_wakeup_process(p, PS_NORMAL);
 }
 
-static struct futex_queue *queue_lock(struct futex *futex, int *irqflags)
+/*
+ * Express the locking dependencies for lockdep:
+ */
+static inline void
+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
 {
-	struct futex_queue *queue = get_queue(futex->uaddr);
-	futex->lock_ptr = &queue->lock;
-	*irqflags = ihk_mc_spinlock_lock(&queue->lock);
-	return queue;
+	if (hb1 <= hb2) {
+		ihk_mc_spinlock_lock_noirq(&hb1->lock);
+		if (hb1 < hb2)
+			ihk_mc_spinlock_lock_noirq(&hb2->lock);
+	} else { /* hb1 > hb2 */
+		ihk_mc_spinlock_lock_noirq(&hb2->lock);
+		ihk_mc_spinlock_lock_noirq(&hb1->lock);
+	}
 }
 
-static void queue_unlock(struct futex_queue *futex_queue, int irqflags)
+static inline void
+double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
 {
-	ihk_mc_spinlock_unlock(&futex_queue->lock, irqflags);
+	ihk_mc_spinlock_unlock_noirq(&hb1->lock);
+	if (hb1 != hb2)
+		ihk_mc_spinlock_unlock_noirq(&hb2->lock);
 }
 
-static void queue_me(struct futex *futex, struct futex_queue *futex_queue)
+/*
+ * Wake up waiters matching bitset queued on this futex (uaddr).
+ */
+static int futex_wake(uint32_t *uaddr, int fshared, int nr_wake, uint32_t bitset)
 {
-	list_add_tail(&futex->link, &futex_queue->futex_list);
+	struct futex_hash_bucket *hb;
+	struct futex_q *this, *next;
+	struct plist_head *head;
+	union futex_key key = FUTEX_KEY_INIT;
+	int ret;
+
+	if (!bitset)
+		return -EINVAL;
+
+	ret = get_futex_key(uaddr, fshared, &key);
+	if ((ret != 0))
+		goto out;
+
+	hb = hash_futex(&key);
+	ihk_mc_spinlock_lock_noirq(&hb->lock);
+	head = &hb->chain;
+
+	plist_for_each_entry_safe(this, next, head, list) {
+		if (match_futex (&this->key, &key)) {
+			
+			/* RIKEN: no pi state... */
+			/* Check if one of the bits is set in both bitsets */
+			if (!(this->bitset & bitset))
+				continue;
+
+			wake_futex(this);
+			if (++ret >= nr_wake)
+				break;
+		}
+	}
+
+	ihk_mc_spinlock_unlock_noirq(&hb->lock);
+	put_futex_key(fshared, &key);
+out:
+	return ret;
 }
 
-static int unqueue_me(struct futex *futex)
+/*
+ * Wake up all waiters hashed on the physical page that is mapped
+ * to this virtual address:
+ */
+static int
+futex_wake_op(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
+	      int nr_wake, int nr_wake2, int op)
+{
+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+	struct futex_hash_bucket *hb1, *hb2;
+	struct plist_head *head;
+	struct futex_q *this, *next;
+	int ret, op_ret;
+
+retry:
+	ret = get_futex_key(uaddr1, fshared, &key1);
+	if ((ret != 0))
+		goto out;
+	ret = get_futex_key(uaddr2, fshared, &key2);
+	if ((ret != 0))
+		goto out_put_key1;
+
+	hb1 = hash_futex(&key1);
+	hb2 = hash_futex(&key2);
+
+retry_private:
+	double_lock_hb(hb1, hb2);
+	op_ret = futex_atomic_op_inuser(op, (int*)uaddr2);
+	if ((op_ret < 0)) {
+
+		double_unlock_hb(hb1, hb2);
+
+		if ((op_ret != -EFAULT)) {
+			ret = op_ret;
+			goto out_put_keys;
+		}
+
+		/* RIKEN: set ret to 0 as if fault_in_user_writeable() returned it */
+		ret = 0;
+
+		if (!fshared)
+			goto retry_private;
+
+		put_futex_key(fshared, &key2);
+		put_futex_key(fshared, &key1);
+		goto retry;
+	}
+
+	head = &hb1->chain;
+
+	plist_for_each_entry_safe(this, next, head, list) {
+		if (match_futex (&this->key, &key1)) {
+			wake_futex(this);
+			if (++ret >= nr_wake)
+				break;
+		}
+	}
+
+	if (op_ret > 0) {
+		head = &hb2->chain;
+
+		op_ret = 0;
+		plist_for_each_entry_safe(this, next, head, list) {
+			if (match_futex (&this->key, &key2)) {
+				wake_futex(this);
+				if (++op_ret >= nr_wake2)
+					break;
+			}
+		}
+		ret += op_ret;
+	}
+
+	double_unlock_hb(hb1, hb2);
+out_put_keys:
+	put_futex_key(fshared, &key2);
+out_put_key1:
+	put_futex_key(fshared, &key1);
+out:
+	return ret;
+}
+
+/**
+ * requeue_futex() - Requeue a futex_q from one hb to another
+ * @q:		the futex_q to requeue
+ * @hb1:	the source hash_bucket
+ * @hb2:	the target hash_bucket
+ * @key2:	the new key for the requeued futex_q
+ */
+static inline
+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
+		   struct futex_hash_bucket *hb2, union futex_key *key2)
+{
+
+	/*
+	 * If key1 and key2 hash to the same bucket, no need to
+	 * requeue.
+	 */
+	if ((&hb1->chain != &hb2->chain)) {
+		plist_del(&q->list, &hb1->chain);
+		plist_add(&q->list, &hb2->chain);
+		q->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+		q->list.plist.spinlock = &hb2->lock;
+#endif
+	}
+	get_futex_key_refs(key2);
+	q->key = *key2;
+}
+
+/**
+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
+ * uaddr1:	source futex user address
+ * uaddr2:	target futex user address
+ * nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
+ * nr_requeue:	number of waiters to requeue (0-INT_MAX)
+ * requeue_pi:	if we are attempting to requeue from a non-pi futex to a
+ * 		pi futex (pi to pi requeue is not supported)
+ *
+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
+ * uaddr2 atomically on behalf of the top waiter.
+ *
+ * Returns:
+ * >=0 - on success, the number of tasks requeued or woken
+ *  <0 - on error
+ */
+static int futex_requeue(uint32_t *uaddr1, int fshared, uint32_t *uaddr2,
+			 int nr_wake, int nr_requeue, uint32_t *cmpval,
+			 int requeue_pi)
+{
+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+	int drop_count = 0, task_count = 0, ret;
+	struct futex_hash_bucket *hb1, *hb2;
+	struct plist_head *head1;
+	struct futex_q *this, *next;
+
+	ret = get_futex_key(uaddr1, fshared, &key1);
+	if ((ret != 0))
+		goto out;
+	ret = get_futex_key(uaddr2, fshared, &key2);
+	if ((ret != 0))
+		goto out_put_key1;
+
+	hb1 = hash_futex(&key1);
+	hb2 = hash_futex(&key2);
+
+	double_lock_hb(hb1, hb2);
+
+	if ((cmpval != NULL)) {
+		uint32_t curval;
+
+		ret = get_futex_value_locked(&curval, uaddr1);
+
+		if (curval != *cmpval) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+	}
+
+	head1 = &hb1->chain;
+	plist_for_each_entry_safe(this, next, head1, list) {
+		if (task_count - nr_wake >= nr_requeue)
+			break;
+
+		if (!match_futex(&this->key, &key1))
+			continue;
+
+		/*
+		 * Wake nr_wake waiters.  For requeue_pi, if we acquired the
+		 * lock, we already woke the top_waiter.  If not, it will be
+		 * woken by futex_unlock_pi().
+		 */
+		/* RIKEN: no requeue_pi at this moment */
+		if (++task_count <= nr_wake) {
+			wake_futex(this);
+			continue;
+		}
+
+		requeue_futex(this, hb1, hb2, &key2);
+		drop_count++;
+	}
+
+out_unlock:
+	double_unlock_hb(hb1, hb2);
+
+	/*
+	 * drop_futex_key_refs() must be called outside the spinlocks. During
+	 * the requeue we moved futex_q's from the hash bucket at key1 to the
+	 * one at key2 and updated their key pointer.  We no longer need to
+	 * hold the references to key1.
+	 */
+	while (--drop_count >= 0)
+		drop_futex_key_refs(&key1);
+
+	put_futex_key(fshared, &key2);
+out_put_key1:
+	put_futex_key(fshared, &key1);
+out:
+	return ret ? ret : task_count;
+}
+
+/* The key must be already stored in q->key. */
+static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
+{
+	struct futex_hash_bucket *hb;
+
+	get_futex_key_refs(&q->key);
+	hb = hash_futex(&q->key);
+	q->lock_ptr = &hb->lock;
+
+	ihk_mc_spinlock_lock_noirq(&hb->lock);
+	return hb;
+}
+
+static inline void
+queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
+{
+	ihk_mc_spinlock_unlock_noirq(&hb->lock);
+	drop_futex_key_refs(&q->key);
+}
+
+/**
+ * queue_me() - Enqueue the futex_q on the futex_hash_bucket
+ * @q:	The futex_q to enqueue
+ * @hb:	The destination hash bucket
+ *
+ * The hb->lock must be held by the caller, and is released here. A call to
+ * queue_me() is typically paired with exactly one call to unqueue_me().  The
+ * exceptions involve the PI related operations, which may use unqueue_me_pi()
+ * or nothing if the unqueue is done as part of the wake process and the unqueue
+ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
+ * an example).
+ */
+static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+{
+	int prio;
+
+	/*
+	 * The priority used to register this element is
+	 * - either the real thread-priority for the real-time threads
+	 * (i.e. threads with a priority lower than MAX_RT_PRIO)
+	 * - or MAX_RT_PRIO for non-RT threads.
+	 * Thus, all RT-threads are woken first in priority order, and
+	 * the others are woken last, in FIFO order.
+	 *
+	 * RIKEN: no priorities at the moment, everyone is 10.
+	 */
+	prio = 10; 
+
+	plist_node_init(&q->list, prio);
+#ifdef CONFIG_DEBUG_PI_LIST
+	q->list.plist.spinlock = &hb->lock;
+#endif
+	plist_add(&q->list, &hb->chain);
+	q->task = cpu_local_var(current);
+	ihk_mc_spinlock_unlock_noirq(&hb->lock);
+}
+
+/**
+ * unqueue_me() - Remove the futex_q from its futex_hash_bucket
+ * @q:	The futex_q to unqueue
+ *
+ * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
+ * be paired with exactly one earlier call to queue_me().
+ *
+ * Returns:
+ *   1 - if the futex_q was still queued (and we removed unqueued it)
+ *   0 - if the futex_q was already removed by the waking thread
+ */
+static int unqueue_me(struct futex_q *q)
 {
 	ihk_spinlock_t *lock_ptr;
-	int irqflags;
-	int status = 0;
+	int ret = 0;
 
 	/* In the common case we don't take the spinlock, which is nice. */
 retry:
-	lock_ptr = futex->lock_ptr;
+	lock_ptr = q->lock_ptr;
 	barrier();
 	if (lock_ptr != NULL) {
-		irqflags = ihk_mc_spinlock_lock(lock_ptr);
+		ihk_mc_spinlock_lock_noirq(lock_ptr);
 		/*
 		 * q->lock_ptr can change between reading it and
 		 * spin_lock(), causing us to take the wrong lock.  This
@@ -154,95 +615,46 @@ retry:
 		 * however, change back to the original value.  Therefore
 		 * we can detect whether we acquired the correct lock.
 		 */
-		if (lock_ptr != futex->lock_ptr) {
-			ihk_mc_spinlock_unlock(lock_ptr, irqflags);
+		if (lock_ptr != q->lock_ptr) {
+			ihk_mc_spinlock_unlock_noirq(lock_ptr);
 			goto retry;
 		}
+		plist_del(&q->list, &q->list.plist);
 
-		//WARN_ON(list_empty(&futex->link));
-		list_del(&futex->link);
-		ihk_mc_spinlock_unlock(lock_ptr, irqflags);
-		status = 1;
+		ihk_mc_spinlock_unlock_noirq(lock_ptr);
+		ret = 1;
 	}
 
-	return status;
+	drop_futex_key_refs(&q->key);
+	return ret;
 }
 
-static void lock_two_queues(struct futex_queue *queue1, int *irqflags1,
-                            struct futex_queue *queue2, int *irqflags2)
-{
-	if (queue1 < queue2) 
-		*irqflags1 = ihk_mc_spinlock_lock(&queue1->lock);
-	
-	*irqflags2 = ihk_mc_spinlock_lock(&queue2->lock);
-	
-	if (queue1 > queue2)
-		*irqflags1 = ihk_mc_spinlock_lock(&queue1->lock);
-}
+/**
+ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
+ * @hb:		the futex hash bucket, must be locked by the caller
+ * @q:		the futex_q to queue up on
+ * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
+ */
 
-static void unlock_two_queues(struct futex_queue *queue1, int irqflags1,
-                              struct futex_queue *queue2, int irqflags2)
+/* RIKEN: this function has been rewritten so that it returns the remaining
+ * time in case we are waken.
+ */
+static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
+				uint64_t timeout)
 {
-	if (queue1 == queue2) {
-		ihk_mc_spinlock_unlock(&queue2->lock, irqflags2);
-	}
-	else {
-		ihk_mc_spinlock_unlock(&queue2->lock, irqflags2);
-		ihk_mc_spinlock_unlock(&queue1->lock, irqflags1);
-	}
-}
-
-/** Puts a task to sleep waiting on a futex. */
-static int futex_wait(uint32_t __user *uaddr, uint32_t val, 
-                      uint64_t timeout, uint32_t bitset)
-{
-	DECLARE_WAITQ_ENTRY(wait, cpu_local_var(current));
-	int status;
-	uint32_t uval;
-	struct futex futex;
-	struct futex_queue *queue;
-	int irqflags;
 	uint64_t time_remain = 0;
-
-	if (!bitset)
-		return -EINVAL;
-
-	/* This verifies that uaddr is sane */
-	if ((status = futex_init(&futex, uaddr, bitset)) != 0)
-		return status;
-
-	/* Lock the futex queue corresponding to uaddr */
-	queue = queue_lock(&futex, &irqflags);
-
-	/* Get the value from user-space. Since we don't have
- 	 * paging, the only options are for this to succeed (with no
- 	 * page faults) or fail, returning -EFAULT. There is no way
- 	 * for us to be put to sleep, so holding the queue's spinlock
- 	 * is fine. */
-#ifdef __UACCESS__	
-	if ((status = get_user(uval, uaddr)) != 0)
-		goto error;
-#else
-	uval = *uaddr;
-	status = 0;
-#endif
-
-	/* The user-space value must match the value passed in */
-	if (uval != val) {
-		status = -EWOULDBLOCK;
-		goto error;
-	}
+	/*
+	 * The task state is guaranteed to be set before another task can
+	 * wake it. set_current_state() is implemented using set_mb() and
+	 * queue_me() calls spin_unlock() upon completion, both serializing
+	 * access to the hash list and forcing another memory barrier.
+	 */
+	xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
+	queue_me(q, hb);
 	
-	/* Add ourself to the futex's waitq and go to sleep */
-	cpu_local_var(current)->status = PS_INTERRUPTIBLE;
-	waitq_add_entry(&futex.waitq, &wait);
-
-	/* Add ourself to the futex queue and drop our lock on it */
-	queue_me(&futex, queue);
-	queue_unlock(queue, irqflags);
-	
-	if (!list_empty(&futex.link)) {
+	if (!plist_node_empty(&q->list)) {
 		
+		/* RIKEN: use mcos timers */
 		if (timeout) {
 			time_remain = schedule_timeout(timeout);
 		}
@@ -251,221 +663,217 @@ static int futex_wait(uint32_t __user *uaddr, uint32_t val,
 			time_remain = 0;
 		}
 	}
-
-	cpu_local_var(current)->status = PS_RUNNING;
-
-	/*
- 	 * NOTE: We don't remove ourself from the waitq because
- 	 *       we are the only user of it.
- 	 */
 	
-	/* If we were woken (and unqueued), we succeeded, whatever. */
-	if (!unqueue_me(&futex))
-		return 0;
-
-	if (time_remain == 0)
-		return -ETIMEDOUT;
-		
-	/* We expect that there is a signal pending, but another thread
-	 * may have handled it for us already. */
-	return -EINTR;
-
-error:
-	queue_unlock(queue, irqflags);
-	return status;
+	/* This does not need to be serialized */
+	cpu_local_var(current)->status = PS_RUNNING;
+	
+	return time_remain;
 }
 
-/*
- * The futex_queue's lock must be held when this is called.
- * Afterwards, the futex_queue must not be accessed.
+/**
+ * futex_wait_setup() - Prepare to wait on a futex
+ * @uaddr:	the futex userspace address
+ * @val:	the expected value
+ * @fshared:	whether the futex is shared (1) or not (0)
+ * @q:		the associated futex_q
+ * @hb:		storage for hash_bucket pointer to be returned to caller
+ *
+ * Setup the futex_q and locate the hash_bucket.  Get the futex value and
+ * compare it with the expected value.  Handle atomic faults internally.
+ * Return with the hb lock held and a q.key reference on success, and unlocked
+ * with no q.key reference on failure.
+ *
+ * Returns:
+ *  0 - uaddr contains val and hb has been locked
+ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
  */
-static void wake_futex(struct futex *futex)
+static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared,
+			   struct futex_q *q, struct futex_hash_bucket **hb)
 {
-	list_del_init(&futex->link);
+	uint32_t uval;
+	int ret;
+
 	/*
-	 * The lock in waitq_wakeup() is a crucial memory barrier after the
-	 * list_del_init() and also before assigning to futex->lock_ptr.
-	 */
-	waitq_wakeup(&futex->waitq);
-	/*
-	 * The waiting task can free the futex as soon as this is written,
-	 * without taking any locks.  This must come last.
+	 * Access the page AFTER the hash-bucket is locked.
+	 * Order is important:
 	 *
-	 * A memory barrier is required here to prevent the following store
-	 * to lock_ptr from getting ahead of the wakeup. Clearing the lock
-	 * at the end of waitq_wakeup() does not prevent this store from
-	 * moving.
+	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
+	 *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
+	 *
+	 * The basic logical guarantee of a futex is that it blocks ONLY
+	 * if cond(var) is known to be true at the time of blocking, for
+	 * any cond.  If we queued after testing *uaddr, that would open
+	 * a race condition where we could block indefinitely with
+	 * cond(var) false, which would violate the guarantee.
+	 *
+	 * A consequence is that futex_wait() can return zero and absorb
+	 * a wakeup when *uaddr != val on entry to the syscall.  This is
+	 * rare, but normal.
 	 */
-	barrier();
-	futex->lock_ptr = NULL;
+	q->key = FUTEX_KEY_INIT;
+	ret = get_futex_key(uaddr, fshared, &q->key);
+	if ((ret != 0))
+		return ret;
+
+	*hb = queue_lock(q);
+
+	ret = get_futex_value_locked(&uval, uaddr);
+
+	/* RIKEN: get_futex_value_locked() always returns 0 on mckernel */
+
+	if (uval != val) {
+		queue_unlock(q, *hb);
+		ret = -EWOULDBLOCK;
+	}
+
+	if (ret)
+		put_futex_key(fshared, &q->key);
+	return ret;
 }
 
-/** Wakes up nr_wake tasks waiting on a futex. */
-static int futex_wake(uint32_t __user *uaddr, int nr_wake, uint32_t bitset)
+static int futex_wait(uint32_t __user *uaddr, int fshared,
+		      uint32_t val, uint64_t timeout, uint32_t bitset, int clockrt)
 {
-	struct futex_queue *queue;
-	struct list_head *head;
-	struct futex *this, *next;
-	int nr_woke = 0;
-	int irqflags;
+	struct futex_hash_bucket *hb;
+	struct futex_q q;
+	uint64_t time_remain;
+	int ret;
 
 	if (!bitset)
 		return -EINVAL;
 
-	if (!uaddr_is_valid(uaddr))
-		return -EINVAL;
+	q.bitset = bitset;
+	q.requeue_pi_key = NULL;
 
-	queue = get_queue(uaddr);
-	irqflags = ihk_mc_spinlock_lock(&queue->lock);
-	head = &queue->futex_list;
+	/* RIKEN: futex_wait_queue_me() calls schedule_timeout() if timer is set */
 
-	list_for_each_entry_safe(this, next, head, link) {
-		if ((this->uaddr == uaddr) && (this->bitset & bitset)) {
-			wake_futex(this);
-			if (++nr_woke >= nr_wake)
-				break;
-		}
-	}
+retry:
+	/* Prepare to wait on uaddr. */
+	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+	if (ret)
+		goto out;
 
-	ihk_mc_spinlock_unlock(&queue->lock, irqflags);
-	return nr_woke;
+	/* queue_me and wait for wakeup, timeout, or a signal. */
+	time_remain = futex_wait_queue_me(hb, &q, timeout);
+
+	/* If we were woken (and unqueued), we succeeded, whatever. */
+	ret = 0;
+	if (!unqueue_me(&q))
+		goto out_put_key;
+	ret = -ETIMEDOUT;
+
+	/* RIKEN: timer expired case (indicated by !time_remain) */
+	if (timeout && !time_remain)
+		goto out_put_key;
+
+	/* RIKEN: no signals */
+	put_futex_key(fshared, &q.key);
+	goto retry;
+
+out_put_key:
+	put_futex_key(fshared, &q.key);
+out:
+	return ret;
 }
 
-/** Conditionally wakes up tasks that are waiting on futexes. */
-static int futex_wake_op(uint32_t __user *uaddr1, uint32_t __user *uaddr2,
-                         int nr_wake1, int nr_wake2, int op)
+int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
+		uint32_t *uaddr2, uint32_t val2, uint32_t val3)
 {
-	struct futex_queue *queue1, *queue2;
-	int irqflags1 = 0;
-	int irqflags2 = 0;
-	struct list_head *head;
-	struct futex *this, *next;
-	int op_result, nr_woke1 = 0, nr_woke2 = 0;
+	int clockrt, ret = -ENOSYS;
+	int cmd = op & FUTEX_CMD_MASK;
+	int fshared = 0;
 
-	if (!uaddr_is_valid(uaddr1) || !uaddr_is_valid(uaddr2))
-		return -EINVAL;
-
-	queue1 = get_queue(uaddr1);
-	queue2 = get_queue(uaddr2);
-	lock_two_queues(queue1, &irqflags1, queue2, &irqflags2);
-
-	op_result = futex_atomic_op_inuser(op, (int *)uaddr2);
-	if (op_result < 0) {
-		unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
-		return op_result;
+	/* RIKEN: Assume address space private futexes. 
+	if (!(op & FUTEX_PRIVATE_FLAG)) {
+		fshared = 1;
 	}
+	*/
 
-	head = &queue1->futex_list;
-	list_for_each_entry_safe(this, next, head, link) {
-		if (this->uaddr == uaddr1) {
-			wake_futex(this);
-			if (++nr_woke1 >= nr_wake1)
-				break;
-		}
+	clockrt = op & FUTEX_CLOCK_REALTIME;
+	if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+		return -ENOSYS;
+
+	switch (cmd) {
+	case FUTEX_WAIT:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+	case FUTEX_WAIT_BITSET:
+		ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
+		break;
+	case FUTEX_WAKE:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+	case FUTEX_WAKE_BITSET:
+		ret = futex_wake(uaddr, fshared, val, val3);
+		break;
+	case FUTEX_REQUEUE:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
+		break;
+	case FUTEX_CMP_REQUEUE:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    0);
+		break;
+	case FUTEX_WAKE_OP:
+		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
+		break;
+	/* RIKEN: these calls are not supported for now.	
+	case FUTEX_LOCK_PI:
+		if (futex_cmpxchg_enabled)
+			ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
+		break;
+	case FUTEX_UNLOCK_PI:
+		if (futex_cmpxchg_enabled)
+			ret = futex_unlock_pi(uaddr, fshared);
+		break;
+	case FUTEX_TRYLOCK_PI:
+		if (futex_cmpxchg_enabled)
+			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+		break;
+	case FUTEX_WAIT_REQUEUE_PI:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
+					    clockrt, uaddr2);
+		break;
+	case FUTEX_CMP_REQUEUE_PI:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    1);
+		break;
+	*/
+	default:
+		kprintf("futex() invalid cmd: %d \n", cmd); 
+		ret = -ENOSYS;
 	}
-
-	if (op_result > 0) {
-		head = &queue2->futex_list;
-		list_for_each_entry_safe(this, next, head, link) {
-			if (this->uaddr == uaddr2) {
-				wake_futex(this);
-				if (++nr_woke2 >= nr_wake2)
-					break;
-			}
-		}
-	}
-
-	unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
-	return nr_woke1 + nr_woke2;
+	return ret;
 }
 
-/** Conditionally wakes up or requeues tasks that are waiting on futexes. */
-static int futex_cmp_requeue(uint32_t __user *uaddr1, uint32_t __user *uaddr2,
-                             int nr_wake, int nr_requeue, uint32_t cmpval)
-{
-	struct futex_queue *queue1, *queue2;
-	int irqflags1, irqflags2;
-	struct list_head *head1, *head2;
-	struct futex *this, *next;
-	uint32_t curval;
-	int status, nr_woke = 0;
-
-	if (!uaddr_is_valid(uaddr1) || !uaddr_is_valid(uaddr2))
-		return -EINVAL;
-
-	queue1 = get_queue(uaddr1);
-	queue2 = get_queue(uaddr2);
-	lock_two_queues(queue1, &irqflags1, queue2, &irqflags2);
-
-#ifdef __UACCESS__
-	if ((status = get_user(curval, uaddr1)) != 0)
-		goto out_unlock;
-#else
-	curval = *uaddr1;
-	status = 0;
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
 
-	if (curval != cmpval) {
-		status = -EAGAIN;
-		goto out_unlock;
-	}
-
-	head1 = &queue1->futex_list;
-	head2 = &queue2->futex_list;
-	list_for_each_entry_safe(this, next, head1, link) {
-		if (this->uaddr != uaddr1)
-			continue;
-		if (++nr_woke <= nr_wake) {
-			wake_futex(this);
-		} else {
-			/* If uaddr1 and uaddr2 hash to the
-			 * same futex queue, no need to requeue */
-			if (head1 != head2) {
-				list_move_tail(&this->link, head2);
-				this->lock_ptr = &queue2->lock;
-			}
-			this->uaddr = uaddr2;
-
-			if (nr_woke - nr_wake >= nr_requeue)
-				break;
-		}
-	}
-	status = nr_woke;
-
-out_unlock:
-	unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
-	return status;
-}
-
-int futex(uint32_t __user *uaddr, int op, uint32_t val, uint64_t timeout,
-          uint32_t __user *uaddr2, uint32_t val2, uint32_t val3)
+int futex_init(void)
 {
-	int status;
+	int curval;
+	int i;
 
-	switch (op) {
-		case FUTEX_WAIT:
-			val3 = FUTEX_BITSET_MATCH_ANY;
-		case FUTEX_WAIT_BITSET:
-			status = futex_wait(uaddr, val, timeout, val3);
-			break;
-		case FUTEX_WAKE:
-			val3 = FUTEX_BITSET_MATCH_ANY;
-		case FUTEX_WAKE_BITSET:
-			status = futex_wake(uaddr, val, val3);
-			break;
-		case FUTEX_WAKE_OP:
-			status = futex_wake_op(uaddr, uaddr2, val, val2, val3);
-			break;
-		case FUTEX_CMP_REQUEUE:
-			status = futex_cmp_requeue(uaddr, uaddr2, val, val2, val3);
-			break;
-		default:
-			kprintf("sys_futex() op=%d not supported (pid: )\n",
-			        op, &cpu_local_var(current)->pid);
-
-			status = -ENOSYS;
+	/*
+	 * This will fail and we want it. Some arch implementations do
+	 * runtime detection of the futex_atomic_cmpxchg_inatomic()
+	 * functionality. We want to know that before we call in any
+	 * of the complex code paths. Also we want to prevent
+	 * registration of robust lists in that case. NULL is
+	 * guaranteed to fault and we get -EFAULT on functional
+	 * implementation, the non functional ones will return
+	 * -ENOSYS.
+	 */
+	curval = cmpxchg_futex_value_locked(NULL, 0, 0);
+	if (curval == -EFAULT) {
+		dkprintf("futex_cmpxchg_enabled = 1 ??\n");
+		futex_cmpxchg_enabled = 1;
 	}
 
-	return status;
+	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
+		plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
+		ihk_mc_spinlock_init(&futex_queues[i].lock);
+	}
+
+	return 0;
 }
 
diff --git a/kernel/include/futex.h b/kernel/include/futex.h
index 2f700cf7..6c9d46df 100644
--- a/kernel/include/futex.h
+++ b/kernel/include/futex.h
@@ -1,24 +1,50 @@
-/* Kitten LWK futex adaptation */
+/*
+ * Linux futex adaptation.
+ * (C) Copyright 2013 RIKEN AICS
+ * Balazs Gerofi <bgerofi@riken.jp>
+ */
 
-
-#ifndef _LWK_FUTEX_H
-#define _LWK_FUTEX_H
+#ifndef _FUTEX_H
+#define _FUTEX_H
 
 /** \name Futex Commands
  * @{
  */
 #define FUTEX_WAIT		0
 #define FUTEX_WAKE		1
+#define FUTEX_FD		2
+#define FUTEX_REQUEUE		3
 #define FUTEX_CMP_REQUEUE	4
 #define FUTEX_WAKE_OP		5
+#define FUTEX_LOCK_PI		6
+#define FUTEX_UNLOCK_PI		7
+#define FUTEX_TRYLOCK_PI	8
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
+#define FUTEX_WAIT_REQUEUE_PI	11
+#define FUTEX_CMP_REQUEUE_PI	12
 // @}
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
 #define FUTEX_CMD_MASK		~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
 
+#define FUTEX_WAIT_PRIVATE	(FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_PRIVATE	(FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_REQUEUE_PRIVATE	(FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_OP_PRIVATE	(FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG)
+#define FUTEX_LOCK_PI_PRIVATE	(FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_UNLOCK_PI_PRIVATE	(FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+
+
 /** \name Futex Operations, used for FUTEX_WAKE_OP
  * @{
  */
@@ -201,30 +227,34 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
 
 #define FUTEX_HASHBITS		8	/* 256 entries in each futex hash tbl */
 
-/** Futex tracking structure.
- *
- * A futex has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&futex->link) || futex->lock_ptr == 0.
- * The order of wakup is always to make the first condition true, then
- * wake up futex->waitq, then make the second condition true.
- */
-struct futex {
-	struct list_head		link;
-	struct waitq			waitq;
-	ihk_spinlock_t *		lock_ptr;
-	uint32_t __user *		uaddr;
-	uint32_t				bitset;
+#define FUT_OFF_INODE    1 /* We set bit 0 if key has a reference on inode */
+#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */
+
+struct process_vm;
+
+union futex_key {
+#if 0
+	struct {
+		unsigned long pgoff;
+		struct inode *inode;
+		int offset;
+	} shared;
+#endif	
+	struct {
+		unsigned long address;
+		struct process_vm *mm;
+		int offset;
+	} private;
+	struct {
+		unsigned long word;
+		void *ptr;
+		int offset;
+	} both;
 };
 
-struct futex_queue {
-	ihk_spinlock_t			lock;
-	struct list_head		futex_list;
-};
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
 
-extern void
-futex_queue_init(
-	struct futex_queue *		queue
-);
+extern int futex_init(void);
 
 extern int
 futex(
diff --git a/kernel/include/jhash.h b/kernel/include/jhash.h
new file mode 100644
index 00000000..a026476d
--- /dev/null
+++ b/kernel/include/jhash.h
@@ -0,0 +1,145 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* RIKEN: u32 replaced to uint32_t
+ *
+ * jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose.  It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault.  -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO	0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes.  No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline uint32_t jhash(const void *key, uint32_t length, uint32_t initval)
+{
+	uint32_t a, b, c, len;
+	const uint8_t *k = key;
+
+	len = length;
+	a = b = JHASH_GOLDEN_RATIO;
+	c = initval;
+
+	while (len >= 12) {
+		a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
+		b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
+		c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
+
+		__jhash_mix(a,b,c);
+
+		k += 12;
+		len -= 12;
+	}
+
+	c += length;
+	switch (len) {
+	case 11: c += ((uint32_t)k[10]<<24);
+	case 10: c += ((uint32_t)k[9]<<16);
+	case 9 : c += ((uint32_t)k[8]<<8);
+	case 8 : b += ((uint32_t)k[7]<<24);
+	case 7 : b += ((uint32_t)k[6]<<16);
+	case 6 : b += ((uint32_t)k[5]<<8);
+	case 5 : b += k[4];
+	case 4 : a += ((uint32_t)k[3]<<24);
+	case 3 : a += ((uint32_t)k[2]<<16);
+	case 2 : a += ((uint32_t)k[1]<<8);
+	case 1 : a += k[0];
+	};
+
+	__jhash_mix(a,b,c);
+
+	return c;
+}
+
+/* A special optimized version that handles 1 or more of uint32_ts.
+ * The length parameter here is the number of uint32_ts in the key.
+ */
+static inline uint32_t jhash2(const uint32_t *k, uint32_t length, uint32_t initval)
+{
+	uint32_t a, b, c, len;
+
+	a = b = JHASH_GOLDEN_RATIO;
+	c = initval;
+	len = length;
+
+	while (len >= 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		__jhash_mix(a, b, c);
+		k += 3; len -= 3;
+	}
+
+	c += length * 4;
+
+	switch (len) {
+	case 2 : b += k[1];
+	case 1 : a += k[0];
+	};
+
+	__jhash_mix(a,b,c);
+
+	return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ *       done at the end is not done here.
+ */
+static inline uint32_t jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
+{
+	a += JHASH_GOLDEN_RATIO;
+	b += JHASH_GOLDEN_RATIO;
+	c += initval;
+
+	__jhash_mix(a, b, c);
+
+	return c;
+}
+
+static inline uint32_t jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
+{
+	return jhash_3words(a, b, 0, initval);
+}
+
+static inline uint32_t jhash_1word(uint32_t a, uint32_t initval)
+{
+	return jhash_3words(a, 0, 0, initval);
+}
+
+#endif /* _LINUX_JHASH_H */
diff --git a/kernel/include/plist.h b/kernel/include/plist.h
new file mode 100644
index 00000000..80231129
--- /dev/null
+++ b/kernel/include/plist.h
@@ -0,0 +1,273 @@
+/*
+ * Descending-priority-sorted double-linked list
+ *
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * 2001-2005 (c) MontaVista Software, Inc.
+ * Daniel Walker <dwalker@mvista.com>
+ *
+ * (C) 2005 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Simplifications of the original code by
+ * Oleg Nesterov <oleg@tv-sign.ru>
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on simple lists (include/linux/list.h).
+ *
+ * This is a priority-sorted list of nodes; each node has a
+ * priority from INT_MIN (highest) to INT_MAX (lowest).
+ *
+ * Addition is O(K), removal is O(1), change of priority of a node is
+ * O(K) and K is the number of RT priority levels used in the system.
+ * (1 <= K <= 99)
+ *
+ * This list is really a list of lists:
+ *
+ *  - The tier 1 list is the prio_list, different priority nodes.
+ *
+ *  - The tier 2 list is the node_list, serialized nodes.
+ *
+ * Simple ASCII art explanation:
+ *
+ * |HEAD          |
+ * |              |
+ * |prio_list.prev|<------------------------------------|
+ * |prio_list.next|<->|pl|<->|pl|<--------------->|pl|<-|
+ * |10            |   |10|   |21|   |21|   |21|   |40|   (prio)
+ * |              |   |  |   |  |   |  |   |  |   |  |
+ * |              |   |  |   |  |   |  |   |  |   |  |
+ * |node_list.next|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
+ * |node_list.prev|<------------------------------------|
+ *
+ * The nodes on the prio_list list are sorted by priority to simplify
+ * the insertion of new nodes. There are no nodes with duplicate
+ * priorites on the list.
+ *
+ * The nodes on the node_list are ordered by priority and can contain
+ * entries which have the same priority. Those entries are ordered
+ * FIFO
+ *
+ * Addition means: look for the prio_list node in the prio_list
+ * for the priority of the node and insert it before the node_list
+ * entry of the next prio_list node. If it is the first node of
+ * that priority, add it to the prio_list in the right position and
+ * insert it into the serialized node_list list
+ *
+ * Removal means remove it from the node_list and remove it from
+ * the prio_list if the node_list list_head is non empty. In case
+ * of removal from the prio_list it must be checked whether other
+ * entries of the same priority are on the list or not. If there
+ * is another entry of the same priority then this entry has to
+ * replace the removed entry on the prio_list. If the entry which
+ * is removed is the only entry of this priority then a simple
+ * remove from both list is sufficient.
+ *
+ * INT_MIN is the highest priority, 0 is the medium highest, INT_MAX
+ * is lowest priority.
+ *
+ * No locking is done, up to the caller.
+ *
+ */
+#ifndef _LINUX_PLIST_H_
+#define _LINUX_PLIST_H_
+
+#include <ihk/lock.h>
+#include <list.h>
+
+struct plist_head {
+	struct list_head prio_list;
+	struct list_head node_list;
+#ifdef CONFIG_DEBUG_PI_LIST
+	raw_spinlock_t *rawlock;
+	spinlock_t *spinlock;
+#endif
+};
+
+struct plist_node {
+	int			prio;
+	struct plist_head	plist;
+};
+
+#ifdef CONFIG_DEBUG_PI_LIST
+# define PLIST_HEAD_LOCK_INIT(_lock)		.spinlock = _lock
+# define PLIST_HEAD_LOCK_INIT_RAW(_lock)	.rawlock = _lock
+#else
+# define PLIST_HEAD_LOCK_INIT(_lock)
+# define PLIST_HEAD_LOCK_INIT_RAW(_lock)
+#endif
+
+#define _PLIST_HEAD_INIT(head)				\
+	.prio_list = LIST_HEAD_INIT((head).prio_list),	\
+	.node_list = LIST_HEAD_INIT((head).node_list)
+
+/**
+ * PLIST_HEAD_INIT - static struct plist_head initializer
+ * @head:	struct plist_head variable name
+ * @_lock:	lock to initialize for this list
+ */
+#define PLIST_HEAD_INIT(head, _lock)			\
+{							\
+	_PLIST_HEAD_INIT(head),				\
+	PLIST_HEAD_LOCK_INIT(&(_lock))			\
+}
+
+/**
+ * PLIST_HEAD_INIT_RAW - static struct plist_head initializer
+ * @head:	struct plist_head variable name
+ * @_lock:	lock to initialize for this list
+ */
+#define PLIST_HEAD_INIT_RAW(head, _lock)		\
+{							\
+	_PLIST_HEAD_INIT(head),				\
+	PLIST_HEAD_LOCK_INIT_RAW(&(_lock))		\
+}
+
+/**
+ * PLIST_NODE_INIT - static struct plist_node initializer
+ * @node:	struct plist_node variable name
+ * @__prio:	initial node priority
+ */
+#define PLIST_NODE_INIT(node, __prio)			\
+{							\
+	.prio  = (__prio),				\
+	.plist = { _PLIST_HEAD_INIT((node).plist) },	\
+}
+
+/**
+ * plist_head_init - dynamic struct plist_head initializer
+ * @head:	&struct plist_head pointer
+ * @lock:	spinlock protecting the list (debugging)
+ */
+static inline void
+plist_head_init(struct plist_head *head, ihk_spinlock_t *lock)
+{
+	INIT_LIST_HEAD(&head->prio_list);
+	INIT_LIST_HEAD(&head->node_list);
+#ifdef CONFIG_DEBUG_PI_LIST
+	head->spinlock = lock;
+	head->rawlock = NULL;
+#endif
+}
+
+/**
+ * plist_head_init_raw - dynamic struct plist_head initializer
+ * @head:	&struct plist_head pointer
+ * @lock:	raw_spinlock protecting the list (debugging)
+ */
+static inline void
+plist_head_init_raw(struct plist_head *head, ihk_spinlock_t *lock)
+{
+	INIT_LIST_HEAD(&head->prio_list);
+	INIT_LIST_HEAD(&head->node_list);
+#ifdef CONFIG_DEBUG_PI_LIST
+	head->rawlock = lock;
+	head->spinlock = NULL;
+#endif
+}
+
+/**
+ * plist_node_init - Dynamic struct plist_node initializer
+ * @node:	&struct plist_node pointer
+ * @prio:	initial node priority
+ */
+static inline void plist_node_init(struct plist_node *node, int prio)
+{
+	node->prio = prio;
+	plist_head_init(&node->plist, NULL);
+}
+
+extern void plist_add(struct plist_node *node, struct plist_head *head);
+extern void plist_del(struct plist_node *node, struct plist_head *head);
+
+/**
+ * plist_for_each - iterate over the plist
+ * @pos:	the type * to use as a loop counter
+ * @head:	the head for your list
+ */
+#define plist_for_each(pos, head)	\
+	 list_for_each_entry(pos, &(head)->node_list, plist.node_list)
+
+/**
+ * plist_for_each_safe - iterate safely over a plist of given type
+ * @pos:	the type * to use as a loop counter
+ * @n:	another type * to use as temporary storage
+ * @head:	the head for your list
+ *
+ * Iterate over a plist of given type, safe against removal of list entry.
+ */
+#define plist_for_each_safe(pos, n, head)	\
+	 list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list)
+
+/**
+ * plist_for_each_entry	- iterate over list of given type
+ * @pos:	the type * to use as a loop counter
+ * @head:	the head for your list
+ * @mem:	the name of the list_struct within the struct
+ */
+#define plist_for_each_entry(pos, head, mem)	\
+	 list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list)
+
+/**
+ * plist_for_each_entry_safe - iterate safely over list of given type
+ * @pos:	the type * to use as a loop counter
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list
+ * @m:		the name of the list_struct within the struct
+ *
+ * Iterate over list of given type, safe against removal of list entry.
+ */
+#define plist_for_each_entry_safe(pos, n, head, m)	\
+	list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list)
+
+/**
+ * plist_head_empty - return !0 if a plist_head is empty
+ * @head:	&struct plist_head pointer
+ */
+static inline int plist_head_empty(const struct plist_head *head)
+{
+	return list_empty(&head->node_list);
+}
+
+/**
+ * plist_node_empty - return !0 if plist_node is not on a list
+ * @node:	&struct plist_node pointer
+ */
+static inline int plist_node_empty(const struct plist_node *node)
+{
+	return plist_head_empty(&node->plist);
+}
+
+/* All functions below assume the plist_head is not empty. */
+
+/**
+ * plist_first_entry - get the struct for the first entry
+ * @head:	the &struct plist_head pointer
+ * @type:	the type of the struct this is embedded in
+ * @member:	the name of the list_struct within the struct
+ */
+#ifdef CONFIG_DEBUG_PI_LIST
+# define plist_first_entry(head, type, member)	\
+({ \
+	WARN_ON(plist_head_empty(head)); \
+	container_of(plist_first(head), type, member); \
+})
+#else
+# define plist_first_entry(head, type, member)	\
+	container_of(plist_first(head), type, member)
+#endif
+
+/**
+ * plist_first - return the first node (and thus, highest priority)
+ * @head:	the &struct plist_head pointer
+ *
+ * Assumes the plist is _not_ empty.
+ */
+static inline struct plist_node *plist_first(const struct plist_head *head)
+{
+	return list_entry(head->node_list.next,
+			  struct plist_node, plist.node_list);
+}
+
+#endif
diff --git a/kernel/include/process.h b/kernel/include/process.h
index 489897c6..24e5ec17 100644
--- a/kernel/include/process.h
+++ b/kernel/include/process.h
@@ -73,9 +73,6 @@ struct process_vm {
 	struct list_head vm_range_list;
 	struct vm_regions region;
  	
-	// Address space private futexes 
-	struct futex_queue futex_queues[1 << FUTEX_HASHBITS];
-
     ihk_spinlock_t page_table_lock;
     ihk_spinlock_t memory_range_lock;
     // to protect the followings:
diff --git a/kernel/init.c b/kernel/init.c
index cea67803..6376b680 100644
--- a/kernel/init.c
+++ b/kernel/init.c
@@ -216,6 +216,8 @@ int main(void)
 
 	post_init();
 
+	futex_init();
+
 	kputs("MCK/IHK booted.\n");
 
 #ifdef DCFA_KMOD
diff --git a/kernel/plist.c b/kernel/plist.c
new file mode 100644
index 00000000..5c0d1f28
--- /dev/null
+++ b/kernel/plist.c
@@ -0,0 +1,123 @@
+/*
+ * lib/plist.c
+ *
+ * Descending-priority-sorted double-linked list
+ *
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * 2001-2005 (c) MontaVista Software, Inc.
+ * Daniel Walker <dwalker@mvista.com>
+ *
+ * (C) 2005 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Simplifications of the original code by
+ * Oleg Nesterov <oleg@tv-sign.ru>
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on simple lists (include/linux/list.h).
+ *
+ * This file contains the add / del functions which are considered to
+ * be too large to inline. See include/linux/plist.h for further
+ * information.
+ */
+
+#include <plist.h>
+#include <ihk/lock.h>
+
+#ifdef CONFIG_DEBUG_PI_LIST
+
+static void plist_check_prev_next(struct list_head *t, struct list_head *p,
+				  struct list_head *n)
+{
+	WARN(n->prev != p || p->next != n,
+			"top: %p, n: %p, p: %p\n"
+			"prev: %p, n: %p, p: %p\n"
+			"next: %p, n: %p, p: %p\n",
+			 t, t->next, t->prev,
+			p, p->next, p->prev,
+			n, n->next, n->prev);
+}
+
+static void plist_check_list(struct list_head *top)
+{
+	struct list_head *prev = top, *next = top->next;
+
+	plist_check_prev_next(top, prev, next);
+	while (next != top) {
+		prev = next;
+		next = prev->next;
+		plist_check_prev_next(top, prev, next);
+	}
+}
+
+static void plist_check_head(struct plist_head *head)
+{
+	WARN_ON(!head->rawlock && !head->spinlock);
+	if (head->rawlock)
+		WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
+	if (head->spinlock)
+		WARN_ON_SMP(!spin_is_locked(head->spinlock));
+	plist_check_list(&head->prio_list);
+	plist_check_list(&head->node_list);
+}
+
+#else
+# define plist_check_head(h)	do { } while (0)
+#endif
+
+/**
+ * plist_add - add @node to @head
+ *
+ * @node:	&struct plist_node pointer
+ * @head:	&struct plist_head pointer
+ */
+void plist_add(struct plist_node *node, struct plist_head *head)
+{
+	struct plist_node *iter;
+
+	plist_check_head(head);
+#if 0	
+	WARN_ON(!plist_node_empty(node));
+#endif	
+
+	list_for_each_entry(iter, &head->prio_list, plist.prio_list) {
+		if (node->prio < iter->prio)
+			goto lt_prio;
+		else if (node->prio == iter->prio) {
+			iter = list_entry(iter->plist.prio_list.next,
+					struct plist_node, plist.prio_list);
+			goto eq_prio;
+		}
+	}
+
+lt_prio:
+	list_add_tail(&node->plist.prio_list, &iter->plist.prio_list);
+eq_prio:
+	list_add_tail(&node->plist.node_list, &iter->plist.node_list);
+
+	plist_check_head(head);
+}
+
+/**
+ * plist_del - Remove a @node from plist.
+ *
+ * @node:	&struct plist_node pointer - entry to be removed
+ * @head:	&struct plist_head pointer - list head
+ */
+void plist_del(struct plist_node *node, struct plist_head *head)
+{
+	plist_check_head(head);
+
+	if (!list_empty(&node->plist.prio_list)) {
+		struct plist_node *next = plist_first(&node->plist);
+
+		list_move_tail(&next->plist.prio_list, &node->plist.prio_list);
+		list_del_init(&node->plist.prio_list);
+	}
+
+	list_del_init(&node->plist.node_list);
+
+	plist_check_head(head);
+}
diff --git a/kernel/process.c b/kernel/process.c
index f7b00b20..885a2a64 100644
--- a/kernel/process.c
+++ b/kernel/process.c
@@ -19,25 +19,18 @@
 
 
 #define USER_STACK_NR_PAGES 8192
-#define KERNEL_STACK_NR_PAGES 16
+#define KERNEL_STACK_NR_PAGES 24
 
 extern long do_arch_prctl(unsigned long code, unsigned long address);
 
 void init_process_vm(struct process_vm *vm)
 {
-	int i;
-
 	ihk_mc_spinlock_init(&vm->memory_range_lock);
 	ihk_mc_spinlock_init(&vm->page_table_lock);
 
 	ihk_atomic_set(&vm->refcount, 1);
 	INIT_LIST_HEAD(&vm->vm_range_list);
 	vm->page_table = ihk_mc_pt_create();
-	
-	/* Initialize futex queues */
-	for (i = 0; i < (1 << FUTEX_HASHBITS); ++i)
-		futex_queue_init(&vm->futex_queues[i]);
-
 }
 
 struct process *create_process(unsigned long user_pc)
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 4040381a..a3d16fc0 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -370,7 +370,7 @@ SYSCALL_DECLARE(exit_group)
 SYSCALL_DECLARE(mmap)
 {
 	struct vm_regions *region = &cpu_local_var(current)->vm->region;
-    unsigned long lockr;
+	unsigned long lockr;
 
     dkprintf("syscall.c,mmap,addr=%lx,len=%lx,prot=%lx,flags=%x,fd=%x,offset=%lx\n",
             ihk_mc_syscall_arg0(ctx), ihk_mc_syscall_arg1(ctx),
@@ -865,14 +865,22 @@ SYSCALL_DECLARE(futex)
 	uint32_t *uaddr2 = (uint32_t *)ihk_mc_syscall_arg4(ctx);
 	uint32_t val3 = (uint32_t)ihk_mc_syscall_arg5(ctx);
     
-	dkprintf("futex,uaddr=%lx,op=%x, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x\n", (unsigned long)uaddr, op, val, utime, uaddr2, val3, *uaddr);
-
 	/* Mask off the FUTEX_PRIVATE_FLAG,
 	 * assume all futexes are address space private */
 	op = (op & FUTEX_CMD_MASK);
+	
+	dkprintf("futex op=[%x, %s],uaddr=%lx, val=%x, utime=%lx, uaddr2=%lx, val3=%x, []=%x\n", 
+	op,
+	(op == FUTEX_WAIT) ? "FUTEX_WAIT" :
+	(op == FUTEX_WAIT_BITSET) ? "FUTEX_WAIT_BITSET" :
+	(op == FUTEX_WAKE) ? "FUTEX_WAKE" :
+	(op == FUTEX_WAKE_OP) ? "FUTEX_WAKE_OP" :
+	(op == FUTEX_WAKE_BITSET) ? "FUTEX_WAKE_BITSET" :
+	(op == FUTEX_CMP_REQUEUE) ? "FUTEX_CMP_REQUEUE" :
+	(op == FUTEX_REQUEUE) ? "FUTEX_REQUEUE (NOT IMPL!)" : "unknown",
+	(unsigned long)uaddr, op, val, utime, uaddr2, val3, *uaddr);
 
 	if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) {
-		/* gettimeofday(&tv_now, NULL) from host */
 		struct syscall_request request IHK_DMA_ALIGN; 
 		struct timeval tv_now;
 		request.number = 96;
@@ -904,6 +912,7 @@ SYSCALL_DECLARE(futex)
 		long diff_nsec = nsec_timeout - nsec_now;
 
 		timeout = (diff_nsec / 1000) * 1100; // (usec * 1.1GHz)
+		dkprintf("futex timeout: %lu\n", timeout);
 	}
 
 	/* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE.
@@ -911,51 +920,6 @@ SYSCALL_DECLARE(futex)
 	if (op == FUTEX_CMP_REQUEUE || op == FUTEX_WAKE_OP)
 		val2 = (uint32_t) (unsigned long) ihk_mc_syscall_arg3(ctx);
 
-    // we don't have timer interrupt and wakeup, so fake it by just pausing
-    if (utime && (op == FUTEX_WAIT_BITSET || op == FUTEX_WAIT)) {
-        // gettimeofday(&tv_now, NULL);
-        struct syscall_request request IHK_DMA_ALIGN; 
-        struct timeval tv_now;
-        request.number = 96;
-
-#if 1
-        unsigned long __phys;                                          
-        if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table, 
-                                   (void *)&tv_now,
-                                   &__phys)) { 
-            return -EFAULT; 
-        }
-        request.args[0] = __phys;               
-        
-        int r = do_syscall(&request, ctx);
-        if(r < 0) {
-            return -EFAULT;
-        }
-
-        dkprintf("futex,FUTEX_WAIT_BITSET,arg3!=NULL,pc=%lx\n", (unsigned long)ihk_mc_syscall_pc(ctx));
-
-        dkprintf("  now->tv_sec=%016ld,tv_nsec=%016ld\n", tv_now.tv_sec, tv_now.tv_usec * 1000);
-        dkprintf("utime->tv_sec=%016ld,tv_nsec=%016ld\n", utime->tv_sec, utime->tv_nsec);
-
-        long nsec_now = ((long)tv_now.tv_sec * 1000000000ULL) + 
-            tv_now.tv_usec * 1000;
-        long nsec_timeout = ((long)utime->tv_sec * 1000000000ULL) + 
-            utime->tv_nsec * 1;
-        long diff_nsec = nsec_timeout - nsec_now;
-
-		/*
-        if(diff_nsec > 0) {
-            dkprintf("pausing %016ldnsec\n", diff_nsec);
-            arch_delay(diff_nsec/1000); // unit is usec
-        }
-		*/
-		timeout = (diff_nsec / 1000) * 1100; // (usec * 1.1GHz)
-#else
-        arch_delay(200000); // unit is usec
-	return -ETIMEDOUT; 
-#endif
-    }
-
 	return futex(uaddr, op, val, timeout, uaddr2, val2, val3);
 }