diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 2002d23a..dc5df9d3 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -2233,30 +2233,28 @@ int strcpy_from_user(char *dst, const char *src) return err; } -long getlong_user(const long *p) +long getlong_user(long *dest, const long *p) { int error; - long l; - error = copy_from_user(&l, p, sizeof(l)); + error = copy_from_user(dest, p, sizeof(long)); if (error) { return error; } - return l; + return 0; } -int getint_user(const int *p) +int getint_user(int *dest, const int *p) { int error; - int i; - error = copy_from_user(&i, p, sizeof(i)); + error = copy_from_user(dest, p, sizeof(int)); if (error) { return error; } - return i; + return 0; } int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz) diff --git a/kernel/futex.c b/kernel/futex.c index 934174a3..b9999093 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -248,9 +248,13 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin static int get_futex_value_locked(uint32_t *dest, uint32_t *from) { - /* RIKEN: futexes are always on not swappable pages */ - *dest = getint_user((int *)from); - + /* + * Officially we should call: + * return getint_user((int *)dest, (int *)from); + * + * but McKernel on x86 can just access user-space. + */ + *dest = *(volatile uint32_t *)from; return 0; } @@ -670,26 +674,32 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q uint64_t timeout) { uint64_t time_remain = 0; + unsigned long irqstate; + struct thread *thread = cpu_local_var(current); /* * The task state is guaranteed to be set before another task can - * wake it. set_current_state() is implemented using set_mb() and - * queue_me() calls spin_unlock() upon completion, both serializing - * access to the hash list and forcing another memory barrier. + * wake it. + * queue_me() calls spin_unlock() upon completion, serializing + * access to the hash list and forcing a memory barrier. */ xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE); - barrier(); + + /* Indicate spin sleep */ + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); + thread->spin_sleep = 1; + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); + queue_me(q, hb); if (!plist_node_empty(&q->list)) { - /* RIKEN: use mcos timers */ if (timeout) { dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid); time_remain = schedule_timeout(timeout); } else { dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid); - schedule(); + spin_sleep_or_schedule(); time_remain = 0; } @@ -698,6 +708,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q /* This does not need to be serialized */ cpu_local_var(current)->status = PS_RUNNING; + thread->spin_sleep = 0; return time_remain; } @@ -744,14 +755,17 @@ static int futex_wait_setup(uint32_t __user *uaddr, uint32_t val, int fshared, */ q->key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q->key); - if ((ret != 0)) + if (ret != 0) return ret; *hb = queue_lock(q); ret = get_futex_value_locked(&uval, uaddr); - - /* RIKEN: get_futex_value_locked() always returns 0 on mckernel */ + if (ret) { + queue_unlock(q, *hb); + put_futex_key(fshared, &q->key); + return ret; + } if (uval != val) { queue_unlock(q, *hb); @@ -777,8 +791,6 @@ static int futex_wait(uint32_t __user *uaddr, int fshared, q.bitset = bitset; q.requeue_pi_key = NULL; - /* RIKEN: futex_wait_queue_me() calls schedule_timeout() if timer is set */ - retry: /* Prepare to wait on uaddr. */ ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); diff --git a/kernel/include/process.h b/kernel/include/process.h index dad51722..a948ce86 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -759,6 +759,7 @@ extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64 enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep); void schedule(void); +void spin_sleep_or_schedule(void); void runq_add_thread(struct thread *thread, int cpu_id); void runq_del_thread(struct thread *thread, int cpu_id); int sched_wakeup_thread(struct thread *thread, int valid_states); diff --git a/kernel/process.c b/kernel/process.c index 65efd5c2..1dae876d 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2668,6 +2668,65 @@ set_timer() } } +/* + * NOTE: it is assumed that a wait-queue (or futex queue) is + * set before calling this function. + * NOTE: one must set thread->spin_sleep to 1 before evaluating + * the wait condition to avoid lost wake-ups. + */ +void spin_sleep_or_schedule(void) +{ + struct thread *thread = cpu_local_var(current); + struct cpu_local_var *v; + int do_schedule = 0; + int woken = 0; + long irqstate; + + /* Try to spin sleep */ + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); + if (thread->spin_sleep == 0) { + dkprintf("%s: caught a lost wake-up!\n", __FUNCTION__); + } + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); + + for (;;) { + /* Check if we need to reschedule */ + irqstate = + ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); + v = get_this_cpu_local_var(); + + if (v->flags & CPU_FLAG_NEED_RESCHED || v->runq_len > 1) { + do_schedule = 1; + } + + ihk_mc_spinlock_unlock(&v->runq_lock, irqstate); + + /* Check if we were woken up */ + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); + if (thread->spin_sleep == 0) { + woken = 1; + } + + /* Indicate that we are not spinning any more */ + if (do_schedule) { + thread->spin_sleep = 0; + } + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); + + if (woken) { + return; + } + + if (do_schedule) { + break; + } + + cpu_pause(); + } + + schedule(); +} + void schedule(void) { struct cpu_local_var *v; @@ -2834,7 +2893,6 @@ int sched_wakeup_thread(struct thread *thread, int valid_states) { int status; - int spin_slept = 0; unsigned long irqstate; struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id); struct process *proc = thread->proc; @@ -2844,29 +2902,23 @@ sched_wakeup_thread(struct thread *thread, int valid_states) proc->pid, valid_states, thread->status, thread->cpu_id, ihk_mc_get_processor_id()); irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock)); - if (thread->spin_sleep > 0) { + if (thread->spin_sleep == 1) { dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n", thread->cpu_id); - spin_slept = 1; status = 0; } - --thread->spin_sleep; + thread->spin_sleep = 0; ihk_mc_spinlock_unlock(&(thread->spin_sleep_lock), irqstate); - if (spin_slept) { - return status; - } - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); if (thread->status & valid_states) { mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock); - if(proc->status != PS_EXITED) + if (proc->status != PS_EXITED) proc->status = PS_RUNNING; mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); xchg4((int *)(&thread->status), PS_RUNNING); - barrier(); status = 0; } else { diff --git a/kernel/timer.c b/kernel/timer.c index ea0ac831..031d15b7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -54,136 +54,75 @@ void init_timers(void) } uint64_t schedule_timeout(uint64_t timeout) -{ - struct waitq_entry my_wait; - struct timer my_timer; +{ struct thread *thread = cpu_local_var(current); - int irqstate; - int spin_sleep; - - irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); - dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout); - spin_sleep = ++thread->spin_sleep; - ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); + long irqstate; /* Spin sleep.. */ for (;;) { + int need_schedule; + struct cpu_local_var *v = get_this_cpu_local_var(); uint64_t t_s = rdtsc(); uint64_t t_e; - int spin_over = 0; - + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); - + /* Woken up by someone? */ - if (thread->spin_sleep < 1) { + if (thread->spin_sleep == 0) { t_e = rdtsc(); - spin_over = 1; if ((t_e - t_s) < timeout) { timeout -= (t_e - t_s); } else { timeout = 1; } + + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); + break; } - + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); - if (!spin_over) { - t_s = rdtsc(); - int need_schedule; - struct cpu_local_var *v = get_this_cpu_local_var(); - int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - need_schedule = v->runq_len > 1 ? 1 : 0; + /* Give a chance to another thread (if any) in case the core is + * oversubscribed, but make sure we will be re-scheduled */ + irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); + need_schedule = v->runq_len > 1 ? 1 : 0; + + if (need_schedule) { + xchg4(&(cpu_local_var(current)->status), PS_RUNNING); ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); + schedule(); - /* Give a chance to another thread (if any) in case the core is - * oversubscribed, but make sure we will be re-scheduled */ - if (need_schedule) { - xchg4(&(cpu_local_var(current)->status), PS_RUNNING); - schedule(); - xchg4(&(cpu_local_var(current)->status), - PS_INTERRUPTIBLE); - } - else { - /* Spin wait */ - while ((rdtsc() - t_s) < LOOP_TIMEOUT) { - cpu_pause(); - } - - if (timeout < LOOP_TIMEOUT) { - timeout = 0; - spin_over = 1; - } - else { - timeout -= LOOP_TIMEOUT; - } - } + /* Recheck if woken */ + continue; + } + else { + ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); } - - if (spin_over) { - dkprintf("schedule_timeout() spin woken up, timeout: %lu\n", - timeout); - - /* Give a chance to another thread (if any) in case we timed out, - * but make sure we will be re-scheduled */ - if (timeout == 0) { - int need_schedule; - struct cpu_local_var *v = get_this_cpu_local_var(); - int irqstate = - ihk_mc_spinlock_lock(&(v->runq_lock)); - need_schedule = v->runq_len > 1 ? 1 : 0; - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); + /* Spin wait */ + while ((rdtsc() - t_s) < LOOP_TIMEOUT) { + cpu_pause(); + } - if (need_schedule) { - xchg4(&(cpu_local_var(current)->status), PS_RUNNING); - schedule(); - xchg4(&(cpu_local_var(current)->status), - PS_INTERRUPTIBLE); - } - } - + /* Time out? */ + if (timeout < LOOP_TIMEOUT) { + timeout = 0; + + /* We are not sleeping any more */ irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); - if (spin_sleep == thread->spin_sleep) { - --thread->spin_sleep; - } + thread->spin_sleep = 0; ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); - return timeout; + break; + } + else { + timeout -= LOOP_TIMEOUT; } } - /* Init waitq and wait entry for this timer */ - my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout; - my_timer.thread = cpu_local_var(current); - waitq_init(&my_timer.processes); - waitq_init_entry(&my_wait, cpu_local_var(current)); - - /* Add ourself to the timer queue */ - ihk_mc_spinlock_lock_noirq(&timers_lock); - list_add_tail(&my_timer.list, &timers); - - dkprintf("schedule_timeout() sleep timeout: %lu\n", my_timer.timeout); - - /* Add ourself to the waitqueue and sleep */ - waitq_prepare_to_wait(&my_timer.processes, &my_wait, PS_INTERRUPTIBLE); - ihk_mc_spinlock_unlock_noirq(&timers_lock); - schedule(); - waitq_finish_wait(&my_timer.processes, &my_wait); - - ihk_mc_spinlock_lock_noirq(&timers_lock); - - /* Waken up by someone else then timeout? */ - if (my_timer.timeout) { - list_del(&my_timer.list); - } - ihk_mc_spinlock_unlock_noirq(&timers_lock); - - dkprintf("schedule_timeout() woken up, timeout: %lu\n", - my_timer.timeout); - - return my_timer.timeout; + return timeout; } diff --git a/lib/include/memory.h b/lib/include/memory.h index b4ad4f33..edeaa080 100644 --- a/lib/include/memory.h +++ b/lib/include/memory.h @@ -35,8 +35,8 @@ void *phys_to_virt(unsigned long p); int copy_from_user(void *dst, const void *src, size_t siz); int strlen_user(const char *s); int strcpy_from_user(char *dst, const char *src); -long getlong_user(const long *p); -int getint_user(const int *p); +long getlong_user(long *dest, const long *p); +int getint_user(int *dest, const int *p); int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz); int copy_to_user(void *dst, const void *src, size_t siz); int setlong_user(long *dst, long data); diff --git a/lib/string.c b/lib/string.c index 41d05013..03c1dfe7 100644 --- a/lib/string.c +++ b/lib/string.c @@ -275,13 +275,21 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char long *_flat; char *p; long r; - int n; + int n, ret; /* How many strings do we have? */ if (nr_strings == -1) { - for (nr_strings = 0; (r = getlong_user((void *)(strings + nr_strings))) > 0; ++nr_strings); - if(r < 0) - return r; + nr_strings = 0; + for (;;) { + ret = getlong_user(&r, (void *)(strings + nr_strings)); + if (ret < 0) + return ret; + + if (r == 0) + break; + + ++nr_strings; + } } /* Count full length */ @@ -295,13 +303,19 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char } for (string_i = 0; string_i < nr_strings; ++string_i) { - char *userp = (char *)getlong_user((void *)(strings + string_i)); - int len = strlen_user(userp); + char *userp; + int len; + + ret = getlong_user((long *)&userp, (void *)(strings + string_i)); + if (ret < 0) + return ret; + + len = strlen_user(userp); if(len < 0) return len; // Pointer + actual value - full_len += sizeof(char *) + len + 1; + full_len += sizeof(char *) + len + 1; } full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1); @@ -326,8 +340,13 @@ int flatten_strings_from_user(int nr_strings, char *first, char **strings, char } for (string_i = 0; string_i < nr_strings; ++string_i) { - char *userp = (char *)getlong_user((void *)(strings + string_i)); + char *userp; _flat[n++] = p - (char *)_flat; + + ret = getlong_user((long *)&userp, (void *)(strings + string_i)); + if (ret < 0) + return ret; + strcpy_from_user(p, userp); p = strchr(p, '\0') + 1; }