futex and rlimit

This commit is contained in:
Balazs Gerofi
2012-05-08 18:32:43 +09:00
parent c3463e7393
commit a7c0225423
17 changed files with 1534 additions and 65 deletions

View File

@@ -1,6 +1,6 @@
AALDIR=$(AALBASE)/$(TARGET)
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
OBJS += process.o copy.o waitq.o
OBJS += process.o copy.o waitq.o futex.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__

467
kernel/futex.c Normal file
View File

@@ -0,0 +1,467 @@
/*
* Kitten LWK futex code adaptation.
* Copyright (c) 2012 RIKEN AICS
*/
/*
* Copyright (c) 2008 Sandia National Laboratories
*
* Futex code adapted from Linux 2.6.27.9, original copyright below.
* Simplified to only support address-space (process-private) futexes.
* Removed demand-paging, cow, etc. complications since LWK doesn't
* require these.
*/
/*
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
*
* Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
* (C) Copyright 2003 Red Hat Inc, All Rights Reserved
*
* Removed page pinning, fix privately mapped COW pages and other cleanups
* (C) Copyright 2003, 2004 Jamie Lokier
*
* Robust futex support started by Ingo Molnar
* (C) Copyright 2006 Red Hat Inc, All Rights Reserved
* Thanks to Thomas Gleixner for suggestions, analysis and fixes.
*
* PI-futex support started by Ingo Molnar and Thomas Gleixner
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
* PRIVATE futexes by Eric Dumazet
* Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
*
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
*
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <process.h>
#include <futex.h>
#include <hash.h>
#include <aal/lock.h>
#include <list.h>
#include <cls.h>
#include <kmsg.h>
#if 0
#include <lwk/kernel.h>
#include <lwk/task.h>
#include <lwk/aspace.h>
#include <lwk/futex.h>
#include <lwk/hash.h>
#include <lwk/sched.h>
#ifdef __UACCESS__
#include <arch/uaccess.h>
#endif
#endif
void futex_queue_init(struct futex_queue *queue)
{
aal_mc_spinlock_init(&queue->lock);
INIT_LIST_HEAD(&queue->futex_list);
}
static int uaddr_is_valid(uint32_t __user *uaddr)
{
#ifdef __UACCESS__
return access_ok(VERIFY_WRITE, uaddr, sizeof(uint32_t));
#else
return 1;
#endif
}
static int futex_init(struct futex *futex, uint32_t __user *uaddr,
uint32_t bitset)
{
if (!uaddr_is_valid(uaddr))
return -EINVAL;
futex->uaddr = uaddr;
futex->bitset = bitset;
waitq_init(&futex->waitq);
return 0;
}
static struct futex_queue *get_queue(uint32_t __user *uaddr)
{
uint64_t hash = hash_64((uint64_t)uaddr, FUTEX_HASHBITS);
return &cpu_local_var(current)->vm->futex_queues[hash];
}
static struct futex_queue *queue_lock(struct futex *futex, int *irqflags)
{
struct futex_queue *queue = get_queue(futex->uaddr);
futex->lock_ptr = &queue->lock;
*irqflags = aal_mc_spinlock_lock(&queue->lock);
return queue;
}
static void queue_unlock(struct futex_queue *futex_queue, int irqflags)
{
aal_mc_spinlock_unlock(&futex_queue->lock, irqflags);
}
static void queue_me(struct futex *futex, struct futex_queue *futex_queue)
{
list_add_tail(&futex->link, &futex_queue->futex_list);
}
static int unqueue_me(struct futex *futex)
{
aal_spinlock_t *lock_ptr;
int irqflags;
int status = 0;
/* In the common case we don't take the spinlock, which is nice. */
retry:
lock_ptr = futex->lock_ptr;
barrier();
if (lock_ptr != NULL) {
irqflags = aal_mc_spinlock_lock(lock_ptr);
/*
* q->lock_ptr can change between reading it and
* spin_lock(), causing us to take the wrong lock. This
* corrects the race condition.
*
* Reasoning goes like this: if we have the wrong lock,
* q->lock_ptr must have changed (maybe several times)
* between reading it and the spin_lock(). It can
* change again after the spin_lock() but only if it was
* already changed before the spin_lock(). It cannot,
* however, change back to the original value. Therefore
* we can detect whether we acquired the correct lock.
*/
if (lock_ptr != futex->lock_ptr) {
aal_mc_spinlock_unlock(lock_ptr, irqflags);
goto retry;
}
//WARN_ON(list_empty(&futex->link));
list_del(&futex->link);
aal_mc_spinlock_unlock(lock_ptr, irqflags);
status = 1;
}
return status;
}
static void lock_two_queues(struct futex_queue *queue1, int *irqflags1,
struct futex_queue *queue2, int *irqflags2)
{
if (queue1 < queue2)
*irqflags1 = aal_mc_spinlock_lock(&queue1->lock);
*irqflags2 = aal_mc_spinlock_lock(&queue2->lock);
if (queue1 > queue2)
*irqflags1 = aal_mc_spinlock_lock(&queue1->lock);
}
static void unlock_two_queues(struct futex_queue *queue1, int irqflags1,
struct futex_queue *queue2, int irqflags2)
{
if (queue1 == queue2) {
aal_mc_spinlock_unlock(&queue2->lock, irqflags2);
}
else {
aal_mc_spinlock_unlock(&queue2->lock, irqflags2);
aal_mc_spinlock_unlock(&queue1->lock, irqflags1);
}
}
/** Puts a task to sleep waiting on a futex. */
static int futex_wait(uint32_t __user *uaddr, uint32_t val,
uint64_t timeout, uint32_t bitset)
{
DECLARE_WAITQ_ENTRY(wait, cpu_local_var(current));
int status;
uint32_t uval;
struct futex futex;
struct futex_queue *queue;
int irqflags;
uint64_t time_remain = 0;
if (!bitset)
return -EINVAL;
/* This verifies that uaddr is sane */
if ((status = futex_init(&futex, uaddr, bitset)) != 0)
return status;
/* Lock the futex queue corresponding to uaddr */
queue = queue_lock(&futex, &irqflags);
/* Get the value from user-space. Since we don't have
* paging, the only options are for this to succeed (with no
* page faults) or fail, returning -EFAULT. There is no way
* for us to be put to sleep, so holding the queue's spinlock
* is fine. */
#ifdef __UACCESS__
if ((status = get_user(uval, uaddr)) != 0)
goto error;
#else
uval = *uaddr;
status = 0;
#endif
/* The user-space value must match the value passed in */
if (uval != val) {
status = -EWOULDBLOCK;
goto error;
}
/* Add ourself to the futex queue and drop our lock on it */
queue_me(&futex, queue);
queue_unlock(queue, irqflags);
/* Add ourself to the futex's waitq and go to sleep */
cpu_local_var(current)->status = PS_INTERRUPTIBLE;
waitq_add_entry(&futex.waitq, &wait);
if (!list_empty(&futex.link)) {
// We don't have timers for now, let's sleep forever,
// and pretend we were woken up
//time_remain = schedule_timeout(timeout);
schedule();
time_remain = 10;
}
cpu_local_var(current)->status = PS_RUNNING;
/*
* NOTE: We don't remove ourself from the waitq because
* we are the only user of it.
*/
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!unqueue_me(&futex))
return 0;
if (time_remain == 0)
return -ETIMEDOUT;
/* We expect that there is a signal pending, but another thread
* may have handled it for us already. */
return -EINTR;
error:
queue_unlock(queue, irqflags);
return status;
}
/*
* The futex_queue's lock must be held when this is called.
* Afterwards, the futex_queue must not be accessed.
*/
static void wake_futex(struct futex *futex)
{
list_del_init(&futex->link);
/*
* The lock in waitq_wakeup() is a crucial memory barrier after the
* list_del_init() and also before assigning to futex->lock_ptr.
*/
waitq_wakeup(&futex->waitq);
/*
* The waiting task can free the futex as soon as this is written,
* without taking any locks. This must come last.
*
* A memory barrier is required here to prevent the following store
* to lock_ptr from getting ahead of the wakeup. Clearing the lock
* at the end of waitq_wakeup() does not prevent this store from
* moving.
*/
barrier();
futex->lock_ptr = NULL;
}
/** Wakes up nr_wake tasks waiting on a futex. */
static int futex_wake(uint32_t __user *uaddr, int nr_wake, uint32_t bitset)
{
struct futex_queue *queue;
struct list_head *head;
struct futex *this, *next;
int nr_woke = 0;
int irqflags;
if (!bitset)
return -EINVAL;
if (!uaddr_is_valid(uaddr))
return -EINVAL;
queue = get_queue(uaddr);
irqflags = aal_mc_spinlock_lock(&queue->lock);
head = &queue->futex_list;
list_for_each_entry_safe(this, next, head, link) {
if ((this->uaddr == uaddr) && (this->bitset & bitset)) {
wake_futex(this);
if (++nr_woke >= nr_wake)
break;
}
}
aal_mc_spinlock_unlock(&queue->lock, irqflags);
return nr_woke;
}
/** Conditionally wakes up tasks that are waiting on futexes. */
static int futex_wake_op(uint32_t __user *uaddr1, uint32_t __user *uaddr2,
int nr_wake1, int nr_wake2, int op)
{
struct futex_queue *queue1, *queue2;
int irqflags1 = 0;
int irqflags2 = 0;
struct list_head *head;
struct futex *this, *next;
int op_result, nr_woke1 = 0, nr_woke2 = 0;
if (!uaddr_is_valid(uaddr1) || !uaddr_is_valid(uaddr2))
return -EINVAL;
queue1 = get_queue(uaddr1);
queue2 = get_queue(uaddr2);
lock_two_queues(queue1, &irqflags1, queue2, &irqflags2);
op_result = futex_atomic_op_inuser(op, (int *)uaddr2);
if (op_result < 0) {
unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
return op_result;
}
head = &queue1->futex_list;
list_for_each_entry_safe(this, next, head, link) {
if (this->uaddr == uaddr1) {
wake_futex(this);
if (++nr_woke1 >= nr_wake1)
break;
}
}
if (op_result > 0) {
head = &queue2->futex_list;
list_for_each_entry_safe(this, next, head, link) {
if (this->uaddr == uaddr2) {
wake_futex(this);
if (++nr_woke2 >= nr_wake2)
break;
}
}
}
unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
return nr_woke1 + nr_woke2;
}
/** Conditionally wakes up or requeues tasks that are waiting on futexes. */
static int futex_cmp_requeue(uint32_t __user *uaddr1, uint32_t __user *uaddr2,
int nr_wake, int nr_requeue, uint32_t cmpval)
{
struct futex_queue *queue1, *queue2;
int irqflags1, irqflags2;
struct list_head *head1, *head2;
struct futex *this, *next;
uint32_t curval;
int status, nr_woke = 0;
if (!uaddr_is_valid(uaddr1) || !uaddr_is_valid(uaddr2))
return -EINVAL;
queue1 = get_queue(uaddr1);
queue2 = get_queue(uaddr2);
lock_two_queues(queue1, &irqflags1, queue2, &irqflags2);
#ifdef __UACCESS__
if ((status = get_user(curval, uaddr1)) != 0)
goto out_unlock;
#else
curval = *uaddr1;
status = 0;
#endif
if (curval != cmpval) {
status = -EAGAIN;
goto out_unlock;
}
head1 = &queue1->futex_list;
head2 = &queue2->futex_list;
list_for_each_entry_safe(this, next, head1, link) {
if (this->uaddr != uaddr1)
continue;
if (++nr_woke <= nr_wake) {
wake_futex(this);
} else {
/* If uaddr1 and uaddr2 hash to the
* same futex queue, no need to requeue */
if (head1 != head2) {
list_move_tail(&this->link, head2);
this->lock_ptr = &queue2->lock;
}
this->uaddr = uaddr2;
if (nr_woke - nr_wake >= nr_requeue)
break;
}
}
status = nr_woke;
out_unlock:
unlock_two_queues(queue1, irqflags1, queue2, irqflags2);
return status;
}
int futex(uint32_t __user *uaddr, int op, uint32_t val, uint64_t timeout,
uint32_t __user *uaddr2, uint32_t val2, uint32_t val3)
{
int status;
switch (op) {
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET:
status = futex_wait(uaddr, val, timeout, val3);
break;
case FUTEX_WAKE:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAKE_BITSET:
status = futex_wake(uaddr, val, val3);
break;
case FUTEX_WAKE_OP:
status = futex_wake_op(uaddr, uaddr2, val, val2, val3);
break;
case FUTEX_CMP_REQUEUE:
status = futex_cmp_requeue(uaddr, uaddr2, val, val2, val3);
break;
default:
kprintf("sys_futex() op=%d not supported (pid: )\n",
op, &cpu_local_var(current)->pid);
status = -ENOSYS;
}
return status;
}

View File

@@ -45,7 +45,7 @@ static void process_msg_prepare_process(unsigned long rphys)
+ sizeof(struct program_image_section) * n);
proc = create_process(p->entry);
proc->pid = p->pid;
proc->pid = 1024;
/* TODO: Clear it at the proper timing */
cpu_local_var(scp).post_idx = 0;

38
kernel/include/asm.h Normal file
View File

@@ -0,0 +1,38 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
#ifdef __ASSEMBLY__
# define __ASM_FORM(x) x
# define __ASM_EX_SEC .section __ex_table
#else
# define __ASM_FORM(x) " " #x " "
# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
#endif
# define __ASM_SEL(a,b) __ASM_FORM(b)
#define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q)
#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg)
#define _ASM_PTR __ASM_SEL(.long, .quad)
#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
#define _ASM_MOV_UL __ASM_SIZE(mov)
#define _ASM_INC __ASM_SIZE(inc)
#define _ASM_DEC __ASM_SIZE(dec)
#define _ASM_ADD __ASM_SIZE(add)
#define _ASM_SUB __ASM_SIZE(sub)
#define _ASM_XADD __ASM_SIZE(xadd)
#define _ASM_AX __ASM_REG(ax)
#define _ASM_BX __ASM_REG(bx)
#define _ASM_CX __ASM_REG(cx)
#define _ASM_DX __ASM_REG(dx)
/* Exception table entry */
# define _ASM_EXTABLE(from,to) \
__ASM_EX_SEC \
_ASM_ALIGN "\n" \
_ASM_PTR #from "," #to "\n" \
" .previous\n"
#endif /* _ASM_X86_ASM_H */

242
kernel/include/futex.h Normal file
View File

@@ -0,0 +1,242 @@
/* Kitten LWK futex adaptation */
#ifndef _LWK_FUTEX_H
#define _LWK_FUTEX_H
/** \name Futex Commands
* @{
*/
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
#define FUTEX_WAIT_BITSET 9
#define FUTEX_WAKE_BITSET 10
// @}
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
/** \name Futex Operations, used for FUTEX_WAKE_OP
* @{
*/
#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */
#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */
#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */
#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */
#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */
#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */
#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */
// @}
/* FUTEX_WAKE_OP will perform atomically
int oldval = *(int *)UADDR2;
*(int *)UADDR2 = oldval OP OPARG;
if (oldval CMP CMPARG)
wake UADDR2; */
#define FUTEX_OP(op, oparg, cmp, cmparg) \
(((op & 0xf) << 28) | ((cmp & 0xf) << 24) \
| ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
/*
* bitset with all bits set for the FUTEX_xxx_BITSET OPs to request a
* match of any bit.
*/
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
#ifdef __KERNEL__
#include <aal/lock.h>
#include <list.h>
#include <process.h>
#include <waitq.h>
#ifndef _ASM_X86_FUTEX_H
#define _ASM_X86_FUTEX_H
#ifdef __KERNEL__
/* We don't deal with uaccess at the moment, because x86 can access
* userspace directly, we rely on glibc and the app developers.
*/
#ifdef __UACCESS__
#include <arch/uaccess.h>
#endif
#include <asm.h>
#include <errno.h>
#define __user
#if 0
#include <arch/processor.h>
#include <arch/system.h>
#endif
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
asm volatile("1:\t" insn "\n" \
"2:\t.section .fixup,\"ax\"\n" \
"3:\tmov\t%3, %1\n" \
"\tjmp\t2b\n" \
"\t.previous\n" \
_ASM_EXTABLE(1b, 3b) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
: "i" (-EFAULT), "0" (oparg), "1" (0))
#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
asm volatile("1:\tmovl %2, %0\n" \
"\tmovl\t%0, %3\n" \
"\t" insn "\n" \
"2:\tlock; cmpxchgl %3, %2\n" \
"\tjnz\t1b\n" \
"3:\t.section .fixup,\"ax\"\n" \
"4:\tmov\t%5, %1\n" \
"\tjmp\t3b\n" \
"\t.previous\n" \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
#ifdef __UACCESS__
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
#endif
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ADD:
__futex_atomic_op1("lock; xaddl %0, %2", ret, oldval,
uaddr, oparg);
break;
case FUTEX_OP_OR:
__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ANDN:
__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
break;
case FUTEX_OP_XOR:
__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
break;
default:
ret = -ENOSYS;
}
if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ:
ret = (oldval == cmparg);
break;
case FUTEX_OP_CMP_NE:
ret = (oldval != cmparg);
break;
case FUTEX_OP_CMP_LT:
ret = (oldval < cmparg);
break;
case FUTEX_OP_CMP_GE:
ret = (oldval >= cmparg);
break;
case FUTEX_OP_CMP_LE:
ret = (oldval <= cmparg);
break;
case FUTEX_OP_CMP_GT:
ret = (oldval > cmparg);
break;
default:
ret = -ENOSYS;
}
}
return ret;
}
static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
int newval)
{
#ifdef __UACCESS__
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
#endif
asm volatile("1:\tlock; cmpxchgl %3, %1\n"
"2:\t.section .fixup, \"ax\"\n"
"3:\tmov %2, %0\n"
"\tjmp 2b\n"
"\t.previous\n"
_ASM_EXTABLE(1b, 3b)
: "=a" (oldval), "+m" (*uaddr)
: "i" (-EFAULT), "r" (newval), "0" (oldval)
: "memory"
);
return oldval;
}
#endif // __KERNEL__
#endif // _ASM_X86_FUTEX_H
#define FUTEX_HASHBITS 8 /* 256 entries in each futex hash tbl */
/** Futex tracking structure.
*
* A futex has a woken state, just like tasks have TASK_RUNNING.
* It is considered woken when list_empty(&futex->link) || futex->lock_ptr == 0.
* The order of wakup is always to make the first condition true, then
* wake up futex->waitq, then make the second condition true.
*/
struct futex {
struct list_head link;
struct waitq waitq;
aal_spinlock_t * lock_ptr;
uint32_t __user * uaddr;
uint32_t bitset;
};
struct futex_queue {
aal_spinlock_t lock;
struct list_head futex_list;
};
extern void
futex_queue_init(
struct futex_queue * queue
);
extern int
futex(
uint32_t __user * uaddr,
int op,
uint32_t val,
uint64_t timeout,
uint32_t __user * uaddr2,
uint32_t val2,
uint32_t val3
);
#endif
#endif

70
kernel/include/hash.h Normal file
View File

@@ -0,0 +1,70 @@
#ifndef _LINUX_HASH_H
#define _LINUX_HASH_H
/* Fast hashing routine for ints, longs and pointers.
(C) 2002 William Lee Irwin III, IBM */
/*
* Knuth recommends primes in approximately golden ratio to the maximum
* integer representable by a machine word for multiplicative hashing.
* Chuck Lever verified the effectiveness of this technique:
* http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
*
* These primes are chosen to be bit-sparse, that is operations on
* them can use shifts and additions instead of multiplications for
* machines where multiplications are slow.
*/
#define BITS_PER_LONG 64
/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL
#if BITS_PER_LONG == 32
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32
#define hash_long(val, bits) hash_32(val, bits)
#elif BITS_PER_LONG == 64
#define hash_long(val, bits) hash_64(val, bits)
#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64
#else
#error Wordsize not 32 or 64
#endif
static inline uint64_t hash_64(uint64_t val, unsigned int bits)
{
uint64_t hash = val;
/* Sigh, gcc can't optimise this alone like it does for 32 bits. */
uint64_t n = hash;
n <<= 18;
hash -= n;
n <<= 33;
hash -= n;
n <<= 3;
hash += n;
n <<= 3;
hash -= n;
n <<= 4;
hash += n;
n <<= 2;
hash += n;
/* High bits are more random, so use them. */
return hash >> (64 - bits);
}
static inline uint32_t hash_32(uint32_t val, unsigned int bits)
{
/* On some cpus multiply is faster, on others gcc will do shifts */
uint32_t hash = val * GOLDEN_RATIO_PRIME_32;
/* High bits are more random, so use them. */
return hash >> (32 - bits);
}
static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
{
return hash_long((unsigned long)ptr, bits);
}
#endif /* _LINUX_HASH_H */

View File

@@ -0,0 +1,36 @@
/* Never include this file directly. Include <lwk/compiler.h> instead. */
/*
* Common definitions for all gcc versions go here.
*/
/* Optimization barrier */
/* The "volatile" is due to gcc bugs
* NOTE: already defined in aal/manycore/generic/include/aal/cpu.h
* #define barrier() __asm__ __volatile__("": : :"memory")
*/
/* This macro obfuscates arithmetic on a variable address so that gcc
shouldn't recognize the original var, and make assumptions about it */
/*
* Versions of the ppc64 compiler before 4.1 had a bug where use of
* RELOC_HIDE could trash r30. The bug can be worked around by changing
* the inline assembly constraint from =g to =r, in this particular
* case either is valid.
*/
#define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
(typeof(ptr)) (__ptr + (off)); })
#define inline inline __attribute__((always_inline))
#define __inline__ __inline__ __attribute__((always_inline))
#define __inline __inline __attribute__((always_inline))
#define __deprecated __attribute__((deprecated))
#define noinline __attribute__((noinline))
#define __attribute_pure__ __attribute__((pure))
#define __attribute_const__ __attribute__((__const__))
#define __weak __attribute__((weak))
#define __noreturn __attribute__((noreturn))

View File

@@ -0,0 +1,24 @@
/* Never include this file directly. Include <lwk/compiler.h> instead. */
/* These definitions are for GCC v4.x. */
#include <lwk/compiler-gcc.h>
#ifdef CONFIG_FORCED_INLINING
# undef inline
# undef __inline__
# undef __inline
# define inline inline __attribute__((always_inline))
# define __inline__ __inline__ __attribute__((always_inline))
# define __inline __inline __attribute__((always_inline))
#endif
#define __used __attribute__((__used__))
#define __must_check __attribute__((warn_unused_result))
#define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
#define __always_inline inline __attribute__((always_inline))
/*
* A trick to suppress uninitialized variable warning without generating any
* code
*/
#define uninitialized_var(x) x = x

View File

@@ -0,0 +1,146 @@
#ifndef _LWK_COMPILER_H
#define _LWK_COMPILER_H
#ifndef __ASSEMBLY__
#ifdef __CHECKER__
# define __user __attribute__((noderef, address_space(1)))
# define __kernel /* default address space */
# define __safe __attribute__((safe))
# define __force __attribute__((force))
# define __nocast __attribute__((nocast))
# define __iomem __attribute__((noderef, address_space(2)))
# define __acquires(x) __attribute__((context(0,1)))
# define __releases(x) __attribute__((context(1,0)))
# define __acquire(x) __context__(1)
# define __release(x) __context__(-1)
# define __cond_lock(x) ((x) ? ({ __context__(1); 1; }) : 0)
# define __unused(x) x __attribute__((unused))
extern void __chk_user_ptr(void __user *);
extern void __chk_io_ptr(void __iomem *);
#else
# define __user
# define __kernel
# define __safe
# define __force
# define __nocast
# define __iomem
# define __chk_user_ptr(x) (void)0
# define __chk_io_ptr(x) (void)0
# define __builtin_warning(x, y...) (1)
# define __acquires(x)
# define __releases(x)
# define __acquire(x) (void)0
# define __release(x) (void)0
# define __cond_lock(x) (x)
# define __unused(x) x
#endif
#ifdef __KERNEL__
#if __GNUC__ > 4
#error no compiler-gcc.h file for this gcc version
#elif __GNUC__ == 4
# include <lwk/compiler-gcc4.h>
#else
# error Sorry, your compiler is too old/not recognized.
#endif
/*
* Generic compiler-dependent macros required for kernel
* build go below this comment. Actual compiler/compiler version
* specific implementations come from the above header files
*/
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
/* Optimization barrier */
#ifndef barrier
# define barrier() __memory_barrier()
#endif
#ifndef RELOC_HIDE
# define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
__ptr = (unsigned long) (ptr); \
(typeof(ptr)) (__ptr + (off)); })
#endif
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#ifdef __KERNEL__
/*
* Allow us to mark functions as 'deprecated' and have gcc emit a nice
* warning for each use, in hopes of speeding the functions removal.
* Usage is:
* int __deprecated foo(void)
*/
#ifndef __deprecated
# define __deprecated /* unimplemented */
#endif
#ifndef __must_check
#define __must_check
#endif
/*
* Allow us to avoid 'defined but not used' warnings on functions and data,
* as well as force them to be emitted to the assembly file.
*
* As of gcc 3.4, static functions that are not marked with attribute((used))
* may be elided from the assembly file. As of gcc 3.4, static data not so
* marked will not be elided, but this may change in a future gcc version.
*
* In prior versions of gcc, such functions and data would be emitted, but
* would be warned about except with attribute((unused)).
*/
#ifndef __used
# define __used /* unimplemented */
#endif
/*
* From the GCC manual:
*
* Many functions have no effects except the return value and their
* return value depends only on the parameters and/or global
* variables. Such a function can be subject to common subexpression
* elimination and loop optimization just as an arithmetic operator
* would be.
* [...]
*/
#ifndef __attribute_pure__
# define __attribute_pure__ /* unimplemented */
#endif
#ifndef noinline
#define noinline
#endif
#ifndef __always_inline
#define __always_inline inline
#endif
#endif /* __KERNEL__ */
/*
* From the GCC manual:
*
* Many functions do not examine any values except their arguments,
* and have no effects except the return value. Basically this is
* just slightly more strict class than the `pure' attribute above,
* since function is not allowed to read global memory.
*
* Note that a function that has pointer arguments and examines the
* data pointed to must _not_ be declared `const'. Likewise, a
* function that calls a non-`const' function usually must not be
* `const'. It does not make sense for a `const' function to return
* `void'.
*/
#ifndef __attribute_const__
# define __attribute_const__ /* unimplemented */
#endif
#endif /* _LWK_COMPILER_H */

109
kernel/include/lwk/futex.h Normal file
View File

@@ -0,0 +1,109 @@
#ifndef _LWK_FUTEX_H
#define _LWK_FUTEX_H
/** \name Futex Commands
* @{
*/
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
#define FUTEX_WAIT_BITSET 9
#define FUTEX_WAKE_BITSET 10
// @}
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
/** \name Futex Operations, used for FUTEX_WAKE_OP
* @{
*/
#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */
#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */
#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */
#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */
#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */
#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */
#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */
// @}
/* FUTEX_WAKE_OP will perform atomically
int oldval = *(int *)UADDR2;
*(int *)UADDR2 = oldval OP OPARG;
if (oldval CMP CMPARG)
wake UADDR2; */
#define FUTEX_OP(op, oparg, cmp, cmparg) \
(((op & 0xf) << 28) | ((cmp & 0xf) << 24) \
| ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
/*
* bitset with all bits set for the FUTEX_xxx_BITSET OPs to request a
* match of any bit.
*/
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
#ifdef __KERNEL__
#include <lwk/spinlock.h>
#include <lwk/list.h>
#include <lwk/waitq.h>
#include <arch/futex.h>
#define FUTEX_HASHBITS 8 /* 256 entries in each futex hash tbl */
/** Futex tracking structure.
*
* A futex has a woken state, just like tasks have TASK_RUNNING.
* It is considered woken when list_empty(&futex->link) || futex->lock_ptr == 0.
* The order of wakup is always to make the first condition true, then
* wake up futex->waitq, then make the second condition true.
*/
struct futex {
struct list_head link;
struct waitq waitq;
spinlock_t * lock_ptr;
uint32_t __user * uaddr;
uint32_t bitset;
};
struct futex_queue {
spinlock_t lock;
struct list_head futex_list;
};
extern void
futex_queue_init(
struct futex_queue * queue
);
extern int
futex(
uint32_t __user * uaddr,
int op,
uint32_t val,
uint64_t timeout,
uint32_t __user * uaddr2,
uint32_t val2,
uint32_t val3
);
extern long
sys_futex(
uint32_t __user * uaddr,
int op,
uint32_t val,
struct timespec __user * utime,
uint32_t __user * uaddr2,
uint32_t val3
);
#endif
#endif

View File

@@ -0,0 +1,25 @@
#ifndef _LWK_STDDEF_H
#define _LWK_STDDEF_H
#include <lwk/compiler.h>
#undef NULL
#if defined(__cplusplus)
#define NULL 0
#else
#define NULL ((void *)0)
#endif
#ifdef __KERNEL__
#define false 0
#define true 1
#endif
#undef offsetof
#ifdef __compiler_offsetof
#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
#else
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
#endif

View File

@@ -20,6 +20,7 @@
#define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE)
struct vm_range {
struct list_head list;
unsigned long start, end;
@@ -33,16 +34,9 @@ struct vm_regions {
unsigned long brk_start, brk_end;
unsigned long map_start, map_end;
unsigned long stack_start, stack_end;
unsigned long tlsblock_base, tlsblock_limit;
};
struct process_vm {
aal_atomic_t refcount;
struct page_table *page_table;
struct list_head vm_range_list;
struct vm_regions region;
};
struct process_vm;
struct process {
int pid;
@@ -54,13 +48,30 @@ struct process {
aal_mc_kernel_context_t ctx;
aal_mc_user_context_t *uctx;
struct list_head sched_list; // Runqueue
// Runqueue list entry
struct list_head sched_list;
struct thread {
int *clear_child_tid;
unsigned long tlsblock_base, tlsblock_limit;
} thread;
};
#include <waitq.h>
#include <futex.h>
struct process_vm {
aal_atomic_t refcount;
struct page_table *page_table;
struct list_head vm_range_list;
struct vm_regions region;
// Address space private futexes
struct futex_queue futex_queues[1 << FUTEX_HASHBITS];
};
struct process *create_process(unsigned long user_pc);
struct process *clone_process(struct process *org,
unsigned long pc, unsigned long sp);

88
kernel/include/rlimit.h Normal file
View File

@@ -0,0 +1,88 @@
#ifndef __RLIMIT_H
#define __RLIMIT_H
/* Kinds of resource limit. */
enum __rlimit_resource
{
/* Per-process CPU limit, in seconds. */
RLIMIT_CPU = 0,
#define RLIMIT_CPU RLIMIT_CPU
/* Largest file that can be created, in bytes. */
RLIMIT_FSIZE = 1,
#define RLIMIT_FSIZE RLIMIT_FSIZE
/* Maximum size of data segment, in bytes. */
RLIMIT_DATA = 2,
#define RLIMIT_DATA RLIMIT_DATA
/* Maximum size of stack segment, in bytes. */
RLIMIT_STACK = 3,
#define RLIMIT_STACK RLIMIT_STACK
/* Largest core file that can be created, in bytes. */
RLIMIT_CORE = 4,
#define RLIMIT_CORE RLIMIT_CORE
/* Largest resident set size, in bytes.
This affects swapping; processes that are exceeding their
resident set size will be more likely to have physical memory
taken from them. */
__RLIMIT_RSS = 5,
#define RLIMIT_RSS __RLIMIT_RSS
/* Number of open files. */
RLIMIT_NOFILE = 7,
__RLIMIT_OFILE = RLIMIT_NOFILE, /* BSD name for same. */
#define RLIMIT_NOFILE RLIMIT_NOFILE
#define RLIMIT_OFILE __RLIMIT_OFILE
/* Address space limit. */
RLIMIT_AS = 9,
#define RLIMIT_AS RLIMIT_AS
/* Number of processes. */
__RLIMIT_NPROC = 6,
#define RLIMIT_NPROC __RLIMIT_NPROC
/* Locked-in-memory address space. */
__RLIMIT_MEMLOCK = 8,
#define RLIMIT_MEMLOCK __RLIMIT_MEMLOCK
/* Maximum number of file locks. */
__RLIMIT_LOCKS = 10,
#define RLIMIT_LOCKS __RLIMIT_LOCKS
/* Maximum number of pending signals. */
__RLIMIT_SIGPENDING = 11,
#define RLIMIT_SIGPENDING __RLIMIT_SIGPENDING
/* Maximum bytes in POSIX message queues. */
__RLIMIT_MSGQUEUE = 12,
#define RLIMIT_MSGQUEUE __RLIMIT_MSGQUEUE
/* Maximum nice priority allowed to raise to.
Nice levels 19 .. -20 correspond to 0 .. 39
values of this resource limit. */
__RLIMIT_NICE = 13,
#define RLIMIT_NICE __RLIMIT_NICE
/* Maximum realtime priority allowed for non-priviledged
processes. */
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
__RLIMIT_NLIMITS = 15,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS
};
struct rlimit {
uint64_t rlim_cur; /* Soft limit */
uint64_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
};
#endif

View File

@@ -5,8 +5,8 @@
#include <aal/lock.h>
#include <list.h>
#include <process.h>
struct process;
struct waitq_entry;
typedef int (*waitq_func_t)(struct waitq_entry *wait, unsigned mode,

View File

@@ -45,7 +45,27 @@ static struct aal_mc_pa_ops allocator = {
static void page_fault_handler(unsigned long address, void *regs)
{
kprintf("Page fault for %016lx\n", address);
struct vm_range *range, *next;
char found = 0;
kprintf("[%d] Page fault for 0x%lX\n",
aal_mc_get_processor_id(), address);
list_for_each_entry_safe(range, next,
&cpu_local_var(current)->vm->vm_range_list,
list) {
if (range->start <= address && range->end > address) {
kprintf("address is in range, flag: 0x%X! \n", range->flag);
found = 1;
break;
}
}
if (!found)
kprintf("address is out of range! \n");
/* TODO */
aal_mc_debug_show_interrupt_context(regs);
panic("page fault");

View File

@@ -19,22 +19,30 @@ extern long do_arch_prctl(unsigned long code, unsigned long address);
void init_process_vm(struct process_vm *vm)
{
int i;
aal_atomic_set(&vm->refcount, 1);
INIT_LIST_HEAD(&vm->vm_range_list);
vm->page_table = aal_mc_pt_create();
vm->region.tlsblock_base = 0;
/* Initialize futex queues */
for (i = 0; i < (1 << FUTEX_HASHBITS); ++i)
futex_queue_init(&vm->futex_queues[i]);
}
struct process *create_process(unsigned long user_pc)
{
struct process *proc;
proc = aal_mc_alloc_pages(1, 0);
proc = aal_mc_alloc_pages(3, 0);
if (!proc)
return NULL;
memset(proc, 0, sizeof(struct process));
aal_mc_init_user_process(&proc->ctx, &proc->uctx,
((char *)proc) + PAGE_SIZE, user_pc, 0);
((char *)proc) + 3 * PAGE_SIZE, user_pc, 0);
proc->vm = (struct process_vm *)(proc + 1);
@@ -50,7 +58,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
proc = aal_mc_alloc_pages(1, 0);
memset(proc, 0, sizeof(struct process));
memset(proc, 0, sizeof(*proc));
aal_mc_init_user_process(&proc->ctx, &proc->uctx,
((char *)proc) + PAGE_SIZE, pc, sp);
@@ -101,9 +109,9 @@ int add_process_memory_range(struct process *process,
range->phys = phys;
range->flag = flag;
dkprintf("range: %lx - %lx => %lx - %lx\n",
dkprintf("range: 0x%lX - 0x%lX => 0x%lX - 0x%lX (%ld)\n",
range->start, range->end, range->phys, range->phys +
range->end - range->start);
range->end - range->start, range->end - range->start);
if (flag & VR_REMOTE) {
update_process_page_table(process, range, AAL_PTA_REMOTE);
@@ -122,14 +130,17 @@ int add_process_memory_range(struct process *process,
return 0;
}
#define NR_STACK_PAGES 2
void init_process_stack(struct process *process)
{
char *stack = aal_mc_alloc_pages(1, 0);
unsigned long *p = (unsigned long *)(stack + PAGE_SIZE);
char *stack = aal_mc_alloc_pages(NR_STACK_PAGES, 0);
unsigned long *p = (unsigned long *)(stack + (NR_STACK_PAGES * PAGE_SIZE));
memset(stack, 0, PAGE_SIZE);
memset(stack, 0, NR_STACK_PAGES * PAGE_SIZE);
add_process_memory_range(process, USER_END - PAGE_SIZE,
add_process_memory_range(process, USER_END - (NR_STACK_PAGES * PAGE_SIZE),
USER_END,
virt_to_phys(stack), VR_STACK);
@@ -147,7 +158,7 @@ void init_process_stack(struct process *process)
aal_mc_modify_user_context(process->uctx, AAL_UCR_STACK_POINTER,
USER_END - sizeof(unsigned long) * 9);
process->vm->region.stack_end = USER_END;
process->vm->region.stack_start = USER_END - PAGE_SIZE;
process->vm->region.stack_start = USER_END - (NR_STACK_PAGES * PAGE_SIZE);
}
@@ -231,11 +242,11 @@ static void idle(void)
{
//unsigned int flags;
//flags = aal_mc_spinlock_lock(&cpu_status_lock);
cpu_local_var(status) = CPU_STATUS_IDLE;
//aal_mc_spinlock_unlock(&cpu_status_lock, flags);
while (1) {
cpu_enable_interrupt();
schedule();
cpu_local_var(status) = CPU_STATUS_IDLE;
cpu_halt();
}
}
@@ -307,8 +318,13 @@ void schedule(void)
prev ? prev->pid : 0, next ? next->pid : 0);
aal_mc_load_page_table(next->vm->page_table);
do_arch_prctl(ARCH_SET_FS, next->vm->region.tlsblock_base);
cpu_local_var(status) = CPU_STATUS_RUNNING;
kprintf("[%d] schedule: tlsblock_base: 0x%lX\n",
aal_mc_get_processor_id(), next->thread.tlsblock_base);
do_arch_prctl(ARCH_SET_FS, next->thread.tlsblock_base);
if (next != &cpu_local_var(idle))
cpu_local_var(status) = CPU_STATUS_RUNNING;
if (prev) {
aal_mc_switch_context(&prev->ctx, &next->ctx);
@@ -355,6 +371,7 @@ void __runq_add_proc(struct process *proc, int cpu_id)
++v->runq_len;
proc->cpu_id = cpu_id;
proc->status = PS_RUNNING;
get_cpu_local_var(cpu_id)->status = CPU_STATUS_RUNNING;
dkprintf("runq_add_proc(): pid %d added to CPU[%d]'s runq\n",
proc->pid, cpu_id);

View File

@@ -1,6 +1,7 @@
#include <types.h>
#include <kmsg.h>
#include <aal/cpu.h>
#include <cpulocal.h>
#include <aal/mm.h>
#include <aal/debug.h>
#include <aal/ikc.h>
@@ -12,9 +13,12 @@
#include <uio.h>
#include <aal/lock.h>
#include <ctype.h>
#include <waitq.h>
#include <rlimit.h>
/* Headers taken from kitten LWK */
#include <lwk/stddef.h>
#include <futex.h>
#define SYSCALL_BY_IKC
@@ -26,6 +30,10 @@
#define dkprintf(...)
#endif
static aal_spinlock_t sysc_lock = { 0 };
static aal_atomic_t pid_cnt = AAL_ATOMIC_INIT(1024);
int memcpy_async(unsigned long dest, unsigned long src,
unsigned long len, int wait, unsigned long *notify);
@@ -58,6 +66,7 @@ static void send_syscall(struct syscall_request *req)
packet.arg = cpu_local_var(scp).request_rpa;
aal_ikc_send(cpu_local_var(syscall_channel), &packet, 0);
//aal_ikc_send(get_cpu_local_var(0)->syscall_channel, &packet, 0);
#endif
}
@@ -151,8 +160,59 @@ SYSCALL_DECLARE(open)
SYSCALL_FOOTER;
}
static DECLARE_WAITQ(my_waitq);
SYSCALL_DECLARE(ioctl)
{
switch (aal_mc_syscall_arg0(ctx)) {
case 0: {
struct waitq_entry my_wait;
waitq_init_entry(&my_wait, cpu_local_var(current));
dkprintf("CPU[%d] pid[%d] going to sleep...\n",
cpu_local_var(current)->cpu_id,
cpu_local_var(current)->pid);
waitq_prepare_to_wait(&my_waitq, &my_wait, PS_INTERRUPTIBLE);
schedule();
waitq_finish_wait(&my_waitq, &my_wait);
dkprintf("CPU[%d] pid[%d] woke up!\n",
cpu_local_var(current)->cpu_id,
cpu_local_var(current)->pid);
break;
}
case 1:
dkprintf("CPU[%d] pid[%d] waking up everyone..\n",
cpu_local_var(current)->cpu_id,
cpu_local_var(current)->pid);
waitq_wakeup(&my_waitq);
break;
case 2:
dkprintf("[%d] pid %d made an ioctl\n",
cpu_local_var(current)->cpu_id,
cpu_local_var(current)->pid);
break;
default:
dkprintf("ioctl() unimplemented\n");
}
return 0;
#if 0
SYSCALL_HEADER;
/* Very ad-hoc for termios */
@@ -163,6 +223,7 @@ SYSCALL_DECLARE(ioctl)
}
return -EINVAL;
#endif
}
SYSCALL_DECLARE(read)
@@ -195,9 +256,14 @@ SYSCALL_DECLARE(pwrite)
SYSCALL_DECLARE(close)
{
kprintf("[%d] close() \n", aal_mc_get_processor_id());
return -EBADF;
/*
SYSCALL_HEADER;
SYSCALL_ARGS_1(D);
SYSCALL_FOOTER;
*/
}
SYSCALL_DECLARE(lseek)
@@ -322,6 +388,11 @@ long do_arch_prctl(unsigned long code, unsigned long address)
switch (code) {
case ARCH_SET_FS:
kprintf("[%d] arch_prctl: ARCH_SET_FS: 0x%lX\n",
aal_mc_get_processor_id(), address);
cpu_local_var(current)->thread.tlsblock_base = address;
err = aal_mc_arch_set_special_register(type, address);
break;
case ARCH_SET_GS:
err = aal_mc_arch_set_special_register(type, address);
break;
@@ -390,63 +461,73 @@ SYSCALL_DECLARE(clone)
SYSCALL_DECLARE(clone)
{
int i;
int cpuid = -1;
int clone_flags = aal_mc_syscall_arg0(ctx);
//unsigned long flags; /* spinlock */
struct aal_mc_cpu_info *cpu_info = aal_mc_get_cpu_info();
struct process *new;
int i;
int cpuid = -1;
int clone_flags = aal_mc_syscall_arg0(ctx);
//unsigned long flags; /* spinlock */
struct aal_mc_cpu_info *cpu_info = aal_mc_get_cpu_info();
struct process *new;
kputs(";sys_clone\n");
dkprintf("[%d] clone(): stack_pointr: 0x%lX\n",
aal_mc_get_processor_id(),
(unsigned long)aal_mc_syscall_arg1(ctx));
//flags = aal_mc_spinlock_lock(&cpu_status_lock);
for (i = 0; i < cpu_info->ncpus; i++) {
if(get_cpu_local_var(i)->status == CPU_STATUS_IDLE)
if (get_cpu_local_var(i)->status == CPU_STATUS_IDLE) {
cpuid = i;
break;
}
}
if(cpuid < 0) return -EAGAIN;
if (cpuid < 0)
return -EAGAIN;
new = clone_process(cpu_local_var(current), aal_mc_syscall_pc(ctx),
aal_mc_syscall_arg1(ctx));
if (!new) {
return -ENOMEM;
}
/* TODO: allocate new pid */
new->pid = 0xc107e;
if (clone_flags & CLONE_SETTLS) {
dkprintf("clone_flags & CLONE_SETTLS\n");
/* Allocate new pid */
new->pid = aal_atomic_inc_return(&pid_cnt);
if (clone_flags & CLONE_PARENT_SETTID) {
dkprintf("clone_flags & CLONE_PARENT_SETTID: 0x%lX\n",
(unsigned long)aal_mc_syscall_arg2(ctx));
new->vm->region.tlsblock_base
*(int*)aal_mc_syscall_arg2(ctx) = new->pid;
}
if (clone_flags & CLONE_CHILD_CLEARTID) {
dkprintf("clone_flags & CLONE_CHILD_CLEARTID: 0x%lX\n",
(unsigned long)aal_mc_syscall_arg3(ctx));
new->thread.clear_child_tid = (int*)aal_mc_syscall_arg3(ctx);
}
if (clone_flags & CLONE_SETTLS) {
dkprintf("clone_flags & CLONE_SETTLS: 0x%lX\n",
(unsigned long)aal_mc_syscall_arg4(ctx));
new->thread.tlsblock_base
= (unsigned long)aal_mc_syscall_arg4(ctx);
}
else
new->vm->region.tlsblock_base = 0;
if (clone_flags & CLONE_PARENT_SETTID) {
unsigned long pptid;
int *vptid;
if (aal_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table,
(int*)aal_mc_syscall_arg2(ctx), &pptid))
return -EFAULT;
vptid = (int *)phys_to_virt(pptid);
*vptid = 1;
else {
new->thread.tlsblock_base = 0;
}
new->thread.clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID)
? (int*)aal_mc_syscall_arg3(ctx)
: NULL;
aal_mc_syscall_ret(new->uctx) = 0;
runq_add_proc(new, cpuid);
//get_cpu_local_var(cpuid)->next = new;
//get_cpu_local_var(cpuid)->status = CPU_STATUS_RUNNING;
//aal_mc_spinlock_unlock(&cpu_status_lock, flags);
aal_mc_interrupt_cpu(aal_mc_get_cpu_info()->hw_ids[cpuid], 0xd1);
dkprintf("clone: kicking scheduler!\n");
while (1) { cpu_halt(); }
aal_mc_interrupt_cpu(get_x86_cpu_local_variable(cpuid)->apic_id, 0xd1);
//while (1) { cpu_halt(); }
return new->pid;
}
@@ -459,6 +540,7 @@ SYSCALL_DECLARE(set_tid_address)
return cpu_local_var(current)->pid;
}
SYSCALL_DECLARE(set_robust_list)
{
return -ENOSYS;
@@ -501,6 +583,94 @@ SYSCALL_DECLARE(writev)
return ret;
}
SYSCALL_DECLARE(futex)
{
// TODO: timespec support!
//struct timespec _utime;
uint64_t timeout = 1000; // MAX_SCHEDULE_TIMEOUT;
uint32_t val2 = 0;
uint32_t *uaddr = (uint32_t *)aal_mc_syscall_arg0(ctx);
int op = (int)aal_mc_syscall_arg1(ctx);
uint32_t val = (uint32_t)aal_mc_syscall_arg2(ctx);
//struct timespec __user *utime = aal_mc_syscall_arg3(ctx);
uint32_t *uaddr2 = (uint32_t *)aal_mc_syscall_arg4(ctx);
uint32_t val3 = (uint32_t)aal_mc_syscall_arg5(ctx);
/* Mask off the FUTEX_PRIVATE_FLAG,
* assume all futexes are address space private */
op = (op & FUTEX_CMD_MASK);
#if 0
if (utime && (op == FUTEX_WAIT)) {
if (copy_from_user(&_utime, utime, sizeof(_utime)) != 0)
return -EFAULT;
if (!timespec_valid(&_utime))
return -EINVAL;
timeout = timespec_to_ns(_utime);
}
#endif
/* Requeue parameter in 'utime' if op == FUTEX_CMP_REQUEUE.
* number of waiters to wake in 'utime' if op == FUTEX_WAKE_OP. */
if (op == FUTEX_CMP_REQUEUE || op == FUTEX_WAKE_OP)
val2 = (uint32_t) (unsigned long) aal_mc_syscall_arg3(ctx);
return futex(uaddr, op, val, timeout, uaddr2, val2, val3);
}
SYSCALL_DECLARE(exit)
{
/* If there is a clear_child_tid address set, clear it and wake it.
* This unblocks any pthread_join() waiters. */
if (cpu_local_var(current)->thread.clear_child_tid) {
kprintf("exit clear_child!\n");
*cpu_local_var(current)->thread.clear_child_tid = 0;
barrier();
futex((uint32_t *)cpu_local_var(current)->thread.clear_child_tid,
FUTEX_WAKE, 1, 0, NULL, 0, 0);
}
runq_del_proc(cpu_local_var(current), cpu_local_var(current)->cpu_id);
free_process_memory(cpu_local_var(current));
cpu_local_var(current) = NULL;
schedule();
return 0;
}
SYSCALL_DECLARE(getrlimit)
{
int ret;
int resource = aal_mc_syscall_arg0(ctx);
struct rlimit *rlm = (struct rlimit *)aal_mc_syscall_arg1(ctx);
switch (resource) {
case RLIMIT_STACK:
dkprintf("[%d] getrlimit() RLIMIT_STACK\n", aal_mc_get_processor_id());
rlm->rlim_cur = (1024*1024);
rlm->rlim_max = (16384*1024);
ret = 0;
break;
default:
return -ENOSYS;
}
return ret;
}
SYSCALL_DECLARE(noop)
{
kprintf("noop() \n");
return -EFAULT;
}
static long (*syscall_table[])(int, aal_mc_user_context_t *) = {
[0] = sys_read,
@@ -513,13 +683,17 @@ static long (*syscall_table[])(int, aal_mc_user_context_t *) = {
[10] = sys_mprotect,
[11] = sys_munmap,
[12] = sys_brk,
[14] = sys_noop,
[16] = sys_ioctl,
[17] = sys_pread,
[18] = sys_pwrite,
[20] = sys_writev,
[28] = sys_noop,
[39] = sys_getpid,
[56] = sys_clone,
[60] = sys_exit,
[63] = sys_uname,
[97] = sys_getrlimit,
[102] = sys_getxid,
[104] = sys_getxid,
[107] = sys_getxid,
@@ -527,6 +701,7 @@ static long (*syscall_table[])(int, aal_mc_user_context_t *) = {
[110] = sys_getxid,
[111] = sys_getxid,
[158] = sys_arch_prctl,
[202] = sys_futex,
[218] = sys_set_tid_address,
[231] = sys_exit_group,
[273] = sys_set_robust_list,
@@ -563,7 +738,6 @@ long syscall(int num, aal_mc_user_context_t *ctx)
if (syscall_table[num]) {
l = syscall_table[num](num, ctx);
dkprintf(" %lx\n", l);
return l;
} else {
dkprintf("USC[%3d](%lx, %lx, %lx, %lx, %lx) @ %lx | %lx\n", num,
aal_mc_syscall_arg0(ctx), aal_mc_syscall_arg1(ctx),
@@ -571,8 +745,10 @@ long syscall(int num, aal_mc_user_context_t *ctx)
aal_mc_syscall_arg4(ctx), aal_mc_syscall_pc(ctx),
aal_mc_syscall_sp(ctx));
//while(1);
return -ENOSYS;
l = -ENOSYS;
}
return l;
}
void __host_update_process_range(struct process *process,