From 04e193de13777beafa0b07bcb82547aef82ec0b6 Mon Sep 17 00:00:00 2001 From: Tomoki Shirasawa Date: Tue, 13 Oct 2015 23:04:08 +0900 Subject: [PATCH] refactoring process structures --- arch/x86/kernel/cpu.c | 42 +- arch/x86/kernel/gencore.c | 48 +- arch/x86/kernel/include/arch-lock.h | 270 ++-- arch/x86/kernel/memory.c | 6 +- arch/x86/kernel/syscall.c | 469 ++++--- executer/user/mcexec.c | 5 +- kernel/debug.c | 12 +- kernel/fileobj.c | 2 +- kernel/futex.c | 10 +- kernel/host.c | 121 +- kernel/include/cls.h | 8 +- kernel/include/kmalloc.h | 12 +- kernel/include/process.h | 304 +++-- kernel/include/syscall.h | 1 + kernel/include/timer.h | 2 +- kernel/include/waitq.h | 4 +- kernel/init.c | 2 + kernel/mem.c | 38 +- kernel/process.c | 1495 ++++++++++++--------- kernel/procfs.c | 40 +- kernel/syscall.c | 1914 ++++++++++++--------------- kernel/timer.c | 38 +- kernel/waitq.c | 8 +- 23 files changed, 2586 insertions(+), 2265 deletions(-) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index c4a82a97..88176b27 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -1262,33 +1262,33 @@ int ihk_mc_interrupt_cpu(int cpu, int vector) @ ensures proc->fp_regs == NULL; @*/ void -release_fp_regs(struct process *proc) +release_fp_regs(struct thread *thread) { int pages; - if (proc && !proc->fp_regs) + if (thread && !thread->fp_regs) return; pages = (sizeof(fp_regs_struct) + 4095) >> 12; - ihk_mc_free_pages(proc->fp_regs, pages); - proc->fp_regs = NULL; + ihk_mc_free_pages(thread->fp_regs, pages); + thread->fp_regs = NULL; } void -save_fp_regs(struct process *proc) +save_fp_regs(struct thread *thread) { int pages; - if (!proc->fp_regs) { + if (!thread->fp_regs) { pages = (sizeof(fp_regs_struct) + 4095) >> 12; - proc->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT); + thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT); - if (!proc->fp_regs) { + if (!thread->fp_regs) { kprintf("error: allocating fp_regs pages\n"); return; } - memset(proc->fp_regs, 0, sizeof(fp_regs_struct)); + memset(thread->fp_regs, 0, sizeof(fp_regs_struct)); } if (xsave_available) { @@ -1298,17 +1298,17 @@ save_fp_regs(struct process *proc) low = 0x7; high = 0; - asm volatile("xsave %0" : : "m" (*proc->fp_regs), "a" (low), "d" (high) + asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high) : "memory"); - dkprintf("fp_regs for TID %d saved\n", proc->ftn->tid); + dkprintf("fp_regs for TID %d saved\n", thread->tid); } } void -restore_fp_regs(struct process *proc) +restore_fp_regs(struct thread *thread) { - if (!proc->fp_regs) + if (!thread->fp_regs) return; if (xsave_available) { @@ -1318,29 +1318,29 @@ restore_fp_regs(struct process *proc) low = 0x7; high = 0; - asm volatile("xrstor %0" : : "m" (*proc->fp_regs), + asm volatile("xrstor %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)); - dkprintf("fp_regs for TID %d restored\n", proc->ftn->tid); + dkprintf("fp_regs for TID %d restored\n", thread->tid); } // XXX: why release?? - //release_fp_regs(proc); + //release_fp_regs(thread); } -ihk_mc_user_context_t *lookup_user_context(struct process *proc) +ihk_mc_user_context_t *lookup_user_context(struct thread *thread) { - ihk_mc_user_context_t *uctx = proc->uctx; + ihk_mc_user_context_t *uctx = thread->uctx; - if ((!(proc->ftn->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE + if ((!(thread->tstatus & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE | PS_STOPPED | PS_TRACED)) - && (proc != cpu_local_var(current))) + && (thread != cpu_local_var(current))) || !uctx->is_gpr_valid) { return NULL; } if (!uctx->is_sr_valid) { - uctx->sr.fs_base = proc->thread.tlsblock_base; + uctx->sr.fs_base = thread->thread.tlsblock_base; uctx->sr.gs_base = 0; uctx->sr.ds = 0; uctx->sr.es = 0; diff --git a/arch/x86/kernel/gencore.c b/arch/x86/kernel/gencore.c index 472b5844..cf9645a0 100644 --- a/arch/x86/kernel/gencore.c +++ b/arch/x86/kernel/gencore.c @@ -78,11 +78,11 @@ int get_prstatus_size(void) * \brief Fill a prstatus structure. * * \param head A pointer to a note structure. - * \param proc A pointer to the current process structure. + * \param thread A pointer to the current thread structure. * \param regs0 A pointer to a x86_regs structure. */ -void fill_prstatus(struct note *head, struct process *proc, void *regs0) +void fill_prstatus(struct note *head, struct thread *thread, void *regs0) { void *name; struct elf_prstatus64 *prstatus; @@ -160,11 +160,11 @@ int get_prpsinfo_size(void) * \brief Fill a prpsinfo structure. * * \param head A pointer to a note structure. - * \param proc A pointer to the current process structure. + * \param thread A pointer to the current thread structure. * \param regs A pointer to a x86_regs structure. */ -void fill_prpsinfo(struct note *head, struct process *proc, void *regs) +void fill_prpsinfo(struct note *head, struct thread *thread, void *regs) { void *name; struct elf_prpsinfo64 *prpsinfo; @@ -176,8 +176,8 @@ void fill_prpsinfo(struct note *head, struct process *proc, void *regs) memcpy(name, "CORE", sizeof("CORE")); prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE"))); - prpsinfo->pr_state = proc->ftn->status; - prpsinfo->pr_pid = proc->ftn->pid; + prpsinfo->pr_state = thread->tstatus; + prpsinfo->pr_pid = thread->proc->pid; /* We leave most of the fields unfilled. @@ -210,11 +210,11 @@ int get_auxv_size(void) * \brief Fill an AUXV structure. * * \param head A pointer to a note structure. - * \param proc A pointer to the current process structure. + * \param thread A pointer to the current thread structure. * \param regs A pointer to a x86_regs structure. */ -void fill_auxv(struct note *head, struct process *proc, void *regs) +void fill_auxv(struct note *head, struct thread *thread, void *regs) { void *name; void *auxv; @@ -225,7 +225,7 @@ void fill_auxv(struct note *head, struct process *proc, void *regs) name = (void *) (head + 1); memcpy(name, "CORE", sizeof("CORE")); auxv = name + align32(sizeof("CORE")); - memcpy(auxv, proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN); + memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN); } /** @@ -243,23 +243,23 @@ int get_note_size(void) * \brief Fill the NOTE segment. * * \param head A pointer to a note structure. - * \param proc A pointer to the current process structure. + * \param thread A pointer to the current thread structure. * \param regs A pointer to a x86_regs structure. */ -void fill_note(void *note, struct process *proc, void *regs) +void fill_note(void *note, struct thread *thread, void *regs) { - fill_prstatus(note, proc, regs); + fill_prstatus(note, thread, regs); note += get_prstatus_size(); - fill_prpsinfo(note, proc, regs); + fill_prpsinfo(note, thread, regs); note += get_prpsinfo_size(); - fill_auxv(note, proc, regs); + fill_auxv(note, thread, regs); } /** * \brief Generate an image of the core file. * - * \param proc A pointer to the current process structure. + * \param thread A pointer to the current thread structure. * \param regs A pointer to a x86_regs structure. * \param coretable(out) An array of core chunks. * \param chunks(out) Number of the entires of coretable. @@ -271,7 +271,7 @@ void fill_note(void *note, struct process *proc, void *regs) * should be zero. */ -int gencore(struct process *proc, void *regs, +int gencore(struct thread *thread, void *regs, struct coretable **coretable, int *chunks) { struct coretable *ct = NULL; @@ -279,7 +279,7 @@ int gencore(struct process *proc, void *regs, Elf64_Phdr *ph = NULL; void *note = NULL; struct vm_range *range; - struct process_vm *vm = proc->vm; + struct process_vm *vm = thread->vm; int segs = 1; /* the first one is for NOTE */ int notesize, phsize, alignednotesize; unsigned int offset = 0; @@ -306,7 +306,7 @@ int gencore(struct process *proc, void *regs, unsigned long p, phys; int prevzero = 0; for (p = range->start; p < range->end; p += PAGE_SIZE) { - if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void *)p, &phys) != 0) { prevzero = 1; } else { @@ -326,7 +326,7 @@ int gencore(struct process *proc, void *regs, dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks); { - struct vm_regions region = proc->vm->region; + struct vm_regions region = thread->vm->region; dkprintf("text: %lx-%lx\n", region.text_start, region.text_end); dkprintf("data: %lx-%lx\n", region.data_start, region.data_end); @@ -364,7 +364,7 @@ int gencore(struct process *proc, void *regs, goto fail; } memset(note, 0, alignednotesize); - fill_note(note, proc, regs); + fill_note(note, thread, regs); /* prgram header for NOTE segment is exceptional */ ph[0].p_type = PT_NOTE; @@ -434,7 +434,7 @@ int gencore(struct process *proc, void *regs, for (start = p = range->start; p < range->end; p += PAGE_SIZE) { - if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void *)p, &phys) != 0) { if (prevzero == 0) { /* We begin a new chunk */ @@ -472,9 +472,9 @@ int gencore(struct process *proc, void *regs, i++; } } else { - if ((proc->vm->region.user_start <= range->start) && - (range->end <= proc->vm->region.user_end)) { - if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, + if ((thread->vm->region.user_start <= range->start) && + (range->end <= thread->vm->region.user_end)) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void *)range->start, &phys) != 0) { dkprintf("could not convert user virtual address %lx" "to physical address", range->start); diff --git a/arch/x86/kernel/include/arch-lock.h b/arch/x86/kernel/include/arch-lock.h index 755f4c96..fa660d2e 100644 --- a/arch/x86/kernel/include/arch-lock.h +++ b/arch/x86/kernel/include/arch-lock.h @@ -8,8 +8,9 @@ #include //#define DEBUG_SPINLOCK +//#define DEBUG_MCS_RWLOCK -#ifdef DEBUG_SPINLOCK +#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK) int __kprintf(const char *format, ...); #endif @@ -26,7 +27,17 @@ static void ihk_mc_spinlock_init(ihk_spinlock_t *lock) } #define SPIN_LOCK_UNLOCKED 0 -static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_lock_noirq(l) { \ +__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__ihk_mc_spinlock_lock_noirq(l); \ +__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq +#endif + +static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) { int inc = 0x00010000; int tmp; @@ -45,11 +56,6 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) : "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc"); #endif -#ifdef DEBUG_SPINLOCK - __kprintf("[%d] trying to grab lock: 0x%lX\n", - ihk_mc_get_processor_id(), lock); -#endif - preempt_disable(); asm volatile("lock; xaddl %0, %1\n" @@ -67,37 +73,58 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock) : : "memory", "cc"); -#ifdef DEBUG_SPINLOCK - __kprintf("[%d] holding lock: 0x%lX\n", ihk_mc_get_processor_id(), lock); -#endif } -static unsigned long ihk_mc_spinlock_lock(ihk_spinlock_t *lock) +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\ +__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +rc = __ihk_mc_spinlock_lock(l);\ +__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\ +}) +#else +#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock +#endif +static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock) { unsigned long flags; flags = cpu_disable_interrupt_save(); - ihk_mc_spinlock_lock_noirq(lock); + __ihk_mc_spinlock_lock_noirq(lock); return flags; } -static void ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock) +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_unlock_noirq(l) { \ +__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__ihk_mc_spinlock_unlock_noirq(l); \ +__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq +#endif +static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock) { asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc"); preempt_enable(); } -static void ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags) +#ifdef DEBUG_SPINLOCK +#define ihk_mc_spinlock_unlock(l, f) { \ +__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__ihk_mc_spinlock_unlock((l), (f)); \ +__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \ +} +#else +#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock +#endif +static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags) { - ihk_mc_spinlock_unlock_noirq(lock); + __ihk_mc_spinlock_unlock_noirq(lock); cpu_restore_interrupt(flags); -#ifdef DEBUG_SPINLOCK - __kprintf("[%d] released lock: 0x%lX\n", ihk_mc_get_processor_id(), lock); -#endif } /* An implementation of the Mellor-Crummey Scott (MCS) lock */ @@ -152,76 +179,85 @@ static void mcs_lock_unlock(struct mcs_lock_node *lock, } // reader/writer lock -typedef struct rwlock_node { +typedef struct mcs_rwlock_node { ihk_atomic_t count; // num of readers (use only common reader) char type; // lock type -#define RWLOCK_TYPE_COMMON_READER 0 -#define RWLOCK_TYPE_READER 1 -#define RWLOCK_TYPE_WRITER 2 +#define MCS_RWLOCK_TYPE_COMMON_READER 0 +#define MCS_RWLOCK_TYPE_READER 1 +#define MCS_RWLOCK_TYPE_WRITER 2 char locked; // lock -#define RWLOCK_LOCKED 1 -#define RWLOCK_UNLOCKED 0 +#define MCS_RWLOCK_LOCKED 1 +#define MCS_RWLOCK_UNLOCKED 0 char dmy1; // unused char dmy2; // unused - struct rwlock_node *next; -} __attribute__((aligned(64))) rwlock_node_t; + struct mcs_rwlock_node *next; +} __attribute__((aligned(64))) mcs_rwlock_node_t; -typedef struct rwlock_node_irqsave { - struct rwlock_node node; +typedef struct mcs_rwlock_node_irqsave { + struct mcs_rwlock_node node; unsigned long irqsave; -} __attribute__((aligned(64))) rwlock_node_irqsave_t; +} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t; -typedef struct rwlock_lock { - struct rwlock_node reader; /* common reader lock */ - struct rwlock_node *node; /* base */ -} __attribute__((aligned(64))) rwlock_lock_t; +typedef struct mcs_rwlock_lock { + struct mcs_rwlock_node reader; /* common reader lock */ + struct mcs_rwlock_node *node; /* base */ +} __attribute__((aligned(64))) mcs_rwlock_lock_t; static void -rwlock_init(struct rwlock_lock *lock) +mcs_rwlock_init(struct mcs_rwlock_lock *lock) { ihk_atomic_set(&lock->reader.count, 0); - lock->reader.type = RWLOCK_TYPE_COMMON_READER; + lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER; lock->node = NULL; } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_lock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_lock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq +#endif static void -rwlock_writer_lock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) +__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) { - struct rwlock_node *pred; + struct mcs_rwlock_node *pred; preempt_disable(); - node->type = RWLOCK_TYPE_WRITER; + node->type = MCS_RWLOCK_TYPE_WRITER; node->next = NULL; - pred = (struct rwlock_node *)xchg8((unsigned long *)&lock->node, + pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node, (unsigned long)node); if (pred) { - node->locked = RWLOCK_LOCKED; + node->locked = MCS_RWLOCK_LOCKED; pred->next = node; - while (node->locked != RWLOCK_UNLOCKED) { + while (node->locked != MCS_RWLOCK_UNLOCKED) { cpu_pause(); } } } static void -rwlock_unlock_readers(struct rwlock_lock *lock) +mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock) { - struct rwlock_node *p; - struct rwlock_node *f = NULL; - struct rwlock_node *n; + struct mcs_rwlock_node *p; + struct mcs_rwlock_node *f = NULL; + struct mcs_rwlock_node *n; ihk_atomic_inc(&lock->reader.count); // protect to unlock reader for(p = &lock->reader; p->next; p = n){ n = p->next; - if(p->next->type == RWLOCK_TYPE_READER){ + if(p->next->type == MCS_RWLOCK_TYPE_READER){ p->next = n->next; if(lock->node == n){ - struct rwlock_node *old; + struct mcs_rwlock_node *old; - old = (struct rwlock_node *)atomic_cmpxchg8( + old = (struct mcs_rwlock_node *)atomic_cmpxchg8( (unsigned long *)&lock->node, (unsigned long)n, (unsigned long)p); @@ -233,29 +269,44 @@ rwlock_unlock_readers(struct rwlock_lock *lock) p->next = n->next; } } + else if(p->next == NULL){ + while (n->next == NULL) { + cpu_pause(); + } + p->next = n->next; + } if(f){ ihk_atomic_inc(&lock->reader.count); - n->locked = RWLOCK_UNLOCKED; + n->locked = MCS_RWLOCK_UNLOCKED; } else f = n; n = p; } if(n->next == NULL && lock->node != n){ - while (n->next == NULL) { + while (n->next == NULL && lock->node != n) { cpu_pause(); } } } - f->locked = RWLOCK_UNLOCKED; + f->locked = MCS_RWLOCK_UNLOCKED; } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_unlock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_unlock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq +#endif static void -rwlock_writer_unlock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) +__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) { if (node->next == NULL) { - struct rwlock_node *old = (struct rwlock_node *) + struct mcs_rwlock_node *old = (struct mcs_rwlock_node *) atomic_cmpxchg8((unsigned long *)&lock->node, (unsigned long)node, (unsigned long)0); @@ -268,42 +319,52 @@ rwlock_writer_unlock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) } } - if(node->next->type == RWLOCK_TYPE_READER){ + if(node->next->type == MCS_RWLOCK_TYPE_READER){ lock->reader.next = node->next; - rwlock_unlock_readers(lock); + mcs_rwlock_unlock_readers(lock); } else{ - node->next->locked = RWLOCK_UNLOCKED; + node->next->locked = MCS_RWLOCK_UNLOCKED; } out: preempt_enable(); } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_lock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_lock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq +#endif static void -rwlock_reader_lock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) +__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) { - struct rwlock_node *pred; + struct mcs_rwlock_node *pred; preempt_disable(); - node->type = RWLOCK_TYPE_READER; + node->type = MCS_RWLOCK_TYPE_READER; node->next = NULL; + node->dmy1 = ihk_mc_get_processor_id(); - pred = (struct rwlock_node *)xchg8((unsigned long *)&lock->node, + pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node, (unsigned long)node); if (pred) { if(pred == &lock->reader){ if(ihk_atomic_inc_return(&pred->count) != 1){ - struct rwlock_node *old; + struct mcs_rwlock_node *old; - old = (struct rwlock_node *)atomic_cmpxchg8( + old = (struct mcs_rwlock_node *)atomic_cmpxchg8( (unsigned long *)&lock->node, (unsigned long)node, (unsigned long)pred); - if (old == pred) { + if (old == node) { goto out; } @@ -312,36 +373,45 @@ rwlock_reader_lock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) } pred->next = node->next; - if(node->next->type == RWLOCK_TYPE_READER) - rwlock_unlock_readers(lock); + if(node->next->type == MCS_RWLOCK_TYPE_READER) + mcs_rwlock_unlock_readers(lock); goto out; } ihk_atomic_dec(&pred->count); } - node->locked = RWLOCK_LOCKED; + node->locked = MCS_RWLOCK_LOCKED; pred->next = node; - while (node->locked != RWLOCK_UNLOCKED) { + while (node->locked != MCS_RWLOCK_UNLOCKED) { cpu_pause(); } } else { lock->reader.next = node; - rwlock_unlock_readers(lock); + mcs_rwlock_unlock_readers(lock); } out: return; } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_unlock_noirq(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_unlock_noirq((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq +#endif static void -rwlock_reader_unlock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) +__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node) { if(ihk_atomic_dec_return(&lock->reader.count)) goto out; if (lock->reader.next == NULL) { - struct rwlock_node *old; + struct mcs_rwlock_node *old; - old = (struct rwlock_node *)atomic_cmpxchg8( + old = (struct mcs_rwlock_node *)atomic_cmpxchg8( (unsigned long *)&lock->node, (unsigned long)&lock->reader, (unsigned long)0); @@ -355,42 +425,78 @@ rwlock_reader_unlock_noirq(struct rwlock_lock *lock, struct rwlock_node *node) } } - if(lock->reader.next->type == RWLOCK_TYPE_READER){ - rwlock_unlock_readers(lock); + if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){ + mcs_rwlock_unlock_readers(lock); } else{ - lock->reader.next->locked = RWLOCK_UNLOCKED; + lock->reader.next->locked = MCS_RWLOCK_UNLOCKED; } out: preempt_enable(); } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_lock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_lock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock +#endif static void -rwlock_writer_lock(struct rwlock_lock *lock, struct rwlock_node_irqsave *node) +__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) { node->irqsave = cpu_disable_interrupt_save(); - rwlock_writer_lock_noirq(lock, &node->node); + __mcs_rwlock_writer_lock_noirq(lock, &node->node); } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_writer_unlock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_writer_unlock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock +#endif static void -rwlock_writer_unlock(struct rwlock_lock *lock, struct rwlock_node_irqsave *node) +__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) { - rwlock_writer_unlock_noirq(lock, &node->node); + __mcs_rwlock_writer_unlock_noirq(lock, &node->node); cpu_restore_interrupt(node->irqsave); } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_lock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_lock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock +#endif static void -rwlock_reader_lock(struct rwlock_lock *lock, struct rwlock_node_irqsave *node) +__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) { node->irqsave = cpu_disable_interrupt_save(); - rwlock_reader_lock_noirq(lock, &node->node); + __mcs_rwlock_reader_lock_noirq(lock, &node->node); } +#ifdef DEBUG_MCS_RWLOCK +#define mcs_rwlock_reader_unlock(l, n) { \ +__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \ +__mcs_rwlock_reader_unlock((l), (n)); \ +__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \ +} +#else +#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock +#endif static void -rwlock_reader_unlock(struct rwlock_lock *lock, struct rwlock_node_irqsave *node) +__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node) { - rwlock_reader_unlock_noirq(lock, &node->node); + __mcs_rwlock_reader_unlock_noirq(lock, &node->node); cpu_restore_interrupt(node->irqsave); } diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 19e374fa..44944e2b 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -2191,7 +2191,7 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t cpsize = remain; } - error = ihk_mc_pt_virt_to_phys(vm->page_table, from, &pa); + error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa); if (error) { return error; } @@ -2274,7 +2274,7 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t cpsize = remain; } - error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa); + error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa); if (error) { return error; } @@ -2330,7 +2330,7 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t cpsize = remain; } - error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa); + error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa); if (error) { kprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error); return error; diff --git a/arch/x86/kernel/syscall.c b/arch/x86/kernel/syscall.c index a1264368..a3a1d9cf 100644 --- a/arch/x86/kernel/syscall.c +++ b/arch/x86/kernel/syscall.c @@ -25,13 +25,13 @@ #include #include -void terminate(int, int, ihk_mc_user_context_t *); +void terminate(int, int); int copy_from_user(void *dst, const void *src, size_t siz); int copy_to_user(void *dst, const void *src, size_t siz); int write_process_vm(struct process_vm *vm, void *dst, const void *src, size_t siz); long do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact); long syscall(int num, ihk_mc_user_context_t *ctx); -extern void save_fp_regs(struct process *proc); +extern void save_fp_regs(struct thread *proc); //#define DEBUG_PRINT_SC @@ -46,12 +46,12 @@ uintptr_t debug_constants[] = { offsetof(struct cpu_local_var, current), offsetof(struct cpu_local_var, runq), offsetof(struct cpu_local_var, status), - offsetof(struct process, ctx), - offsetof(struct process, sched_list), - offsetof(struct process, ftn), - offsetof(struct fork_tree_node, status), - offsetof(struct fork_tree_node, pid), - offsetof(struct fork_tree_node, tid), + offsetof(struct thread, ctx), + offsetof(struct thread, sched_list), + offsetof(struct thread, proc), + offsetof(struct thread, tstatus), + offsetof(struct process, pid), + offsetof(struct thread, tid), -1, }; @@ -163,7 +163,7 @@ struct sigsp { SYSCALL_DECLARE(rt_sigreturn) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct x86_user_context *regs; struct sigsp *sigsp; @@ -173,8 +173,8 @@ SYSCALL_DECLARE(rt_sigreturn) sigsp = (struct sigsp *)regs->gpr.rsp; if(copy_from_user(regs, &sigsp->regs, sizeof(struct x86_user_context))) return -EFAULT; - proc->sigmask.__val[0] = sigsp->sigmask; - proc->sigstack.ss_flags = sigsp->ssflags; + thread->sigmask.__val[0] = sigsp->sigmask; + thread->sigstack.ss_flags = sigsp->ssflags; if(sigsp->restart){ return syscall(sigsp->num, (ihk_mc_user_context_t *)regs); } @@ -182,38 +182,10 @@ SYSCALL_DECLARE(rt_sigreturn) } extern struct cpu_local_var *clv; -extern unsigned long do_kill(int pid, int tid, int sig, struct siginfo *info, int ptracecont); +extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); extern void interrupt_syscall(int all, int pid); extern int num_processors; -void -do_setpgid(int pid, int pgid) -{ - struct cpu_local_var *v; - struct process *p; - struct process *proc = cpu_local_var(current); - int i; - unsigned long irqstate; - - if(pid == 0) - pid = proc->ftn->pid; - if(pgid == 0) - pgid = pid; - - for(i = 0; i < num_processors; i++){ - v = get_cpu_local_var(i); - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid <= 0) - continue; - if(p->ftn->pid == pid){ - p->ftn->pgid = pgid; - } - } - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - } -} - #define RFLAGS_MASK (RFLAGS_CF | RFLAGS_PF | RFLAGS_AF | RFLAGS_ZF | \ RFLAGS_SF | RFLAGS_TF | RFLAGS_DF | RFLAGS_OF | \ RFLAGS_NT | RFLAGS_RF | RFLAGS_AC) @@ -222,10 +194,10 @@ do_setpgid(int pid, int pgid) #define DB7_RESERVED_MASK (0xffffffff0000dc00UL) #define DB7_RESERVED_SET (0x400UL) -extern ihk_mc_user_context_t *lookup_user_context(struct process *proc); +extern ihk_mc_user_context_t *lookup_user_context(struct thread *thread); long -ptrace_read_user(struct process *proc, long addr, unsigned long *value) +ptrace_read_user(struct thread *thread, long addr, unsigned long *value) { unsigned long *p; struct x86_user_context *uctx; @@ -235,7 +207,7 @@ ptrace_read_user(struct process *proc, long addr, unsigned long *value) return -EIO; } else if (addr < sizeof(struct user_regs_struct)) { - uctx = lookup_user_context(proc); + uctx = lookup_user_context(thread); if (!uctx) { return -EIO; } @@ -253,11 +225,11 @@ ptrace_read_user(struct process *proc, long addr, unsigned long *value) if (offsetof(struct user, u_debugreg[0]) <= addr && addr < offsetof(struct user, u_debugreg[8])) { if (addr & (sizeof(*value) - 1)) return -EIO; - if (proc->ptrace_debugreg == NULL) { + if (thread->ptrace_debugreg == NULL) { kprintf("ptrace_read_user: missing ptrace_debugreg\n"); return -EFAULT; } - p = &proc->ptrace_debugreg[(addr - offsetof(struct user, u_debugreg[0])) / sizeof(*value)]; + p = &thread->ptrace_debugreg[(addr - offsetof(struct user, u_debugreg[0])) / sizeof(*value)]; *value = *p; return 0; } @@ -269,7 +241,7 @@ ptrace_read_user(struct process *proc, long addr, unsigned long *value) } long -ptrace_write_user(struct process *proc, long addr, unsigned long value) +ptrace_write_user(struct thread *thread, long addr, unsigned long value) { unsigned long *p; struct x86_user_context *uctx; @@ -279,7 +251,7 @@ ptrace_write_user(struct process *proc, long addr, unsigned long value) return -EIO; } else if (addr < sizeof(struct user_regs_struct)) { - uctx = lookup_user_context(proc); + uctx = lookup_user_context(thread); if (!uctx) { return -EIO; } @@ -302,11 +274,11 @@ ptrace_write_user(struct process *proc, long addr, unsigned long value) if (offsetof(struct user, u_debugreg[0]) <= addr && addr < offsetof(struct user, u_debugreg[8])) { if (addr & (sizeof(value) - 1)) return -EIO; - if (proc->ptrace_debugreg == NULL) { + if (thread->ptrace_debugreg == NULL) { kprintf("ptrace_write_user: missing ptrace_debugreg\n"); return -EFAULT; } - p = &proc->ptrace_debugreg[(addr - offsetof(struct user, u_debugreg[0])) / sizeof(value)]; + p = &thread->ptrace_debugreg[(addr - offsetof(struct user, u_debugreg[0])) / sizeof(value)]; if (addr == offsetof(struct user, u_debugreg[6])) { value &= ~DB6_RESERVED_MASK; value |= DB6_RESERVED_SET; @@ -325,16 +297,16 @@ ptrace_write_user(struct process *proc, long addr, unsigned long value) } long -alloc_debugreg(struct process *proc) +alloc_debugreg(struct thread *thread) { - proc->ptrace_debugreg = kmalloc(sizeof(*proc->ptrace_debugreg) * 8, IHK_MC_AP_NOWAIT); - if (proc->ptrace_debugreg == NULL) { + thread->ptrace_debugreg = kmalloc(sizeof(*thread->ptrace_debugreg) * 8, IHK_MC_AP_NOWAIT); + if (thread->ptrace_debugreg == NULL) { kprintf("alloc_debugreg: no memory.\n"); return -ENOMEM; } - memset(proc->ptrace_debugreg, '\0', sizeof(*proc->ptrace_debugreg) * 8); - proc->ptrace_debugreg[6] = DB6_RESERVED_SET; - proc->ptrace_debugreg[7] = DB7_RESERVED_SET; + memset(thread->ptrace_debugreg, '\0', sizeof(*thread->ptrace_debugreg) * 8); + thread->ptrace_debugreg[6] = DB6_RESERVED_SET; + thread->ptrace_debugreg[7] = DB7_RESERVED_SET; return 0; } @@ -381,50 +353,50 @@ clear_debugreg(void) asm("mov %0, %%db7" ::"r" (r)); } -void clear_single_step(struct process *proc) +void clear_single_step(struct thread *thread) { - proc->uctx->gpr.rflags &= ~RFLAGS_TF; + thread->uctx->gpr.rflags &= ~RFLAGS_TF; } -void set_single_step(struct process *proc) +void set_single_step(struct thread *thread) { - proc->uctx->gpr.rflags |= RFLAGS_TF; + thread->uctx->gpr.rflags |= RFLAGS_TF; } -long ptrace_read_fpregs(struct process *proc, void *fpregs) +long ptrace_read_fpregs(struct thread *thread, void *fpregs) { - save_fp_regs(proc); - if (proc->fp_regs == NULL) { + save_fp_regs(thread); + if (thread->fp_regs == NULL) { return -ENOMEM; } - return copy_to_user(fpregs, &proc->fp_regs->i387, + return copy_to_user(fpregs, &thread->fp_regs->i387, sizeof(struct i387_fxsave_struct)); } -long ptrace_write_fpregs(struct process *proc, void *fpregs) +long ptrace_write_fpregs(struct thread *thread, void *fpregs) { - save_fp_regs(proc); - if (proc->fp_regs == NULL) { + save_fp_regs(thread); + if (thread->fp_regs == NULL) { return -ENOMEM; } - return copy_from_user(&proc->fp_regs->i387, fpregs, + return copy_from_user(&thread->fp_regs->i387, fpregs, sizeof(struct i387_fxsave_struct)); } -long ptrace_read_regset(struct process *proc, long type, struct iovec *iov) +long ptrace_read_regset(struct thread *thread, long type, struct iovec *iov) { long rc = -EINVAL; switch (type) { case NT_X86_XSTATE: - save_fp_regs(proc); - if (proc->fp_regs == NULL) { + save_fp_regs(thread); + if (thread->fp_regs == NULL) { return -ENOMEM; } if (iov->iov_len > sizeof(fp_regs_struct)) { iov->iov_len = sizeof(fp_regs_struct); } - rc = copy_to_user(iov->iov_base, proc->fp_regs, iov->iov_len); + rc = copy_to_user(iov->iov_base, thread->fp_regs, iov->iov_len); break; default: kprintf("ptrace_read_regset: not supported type 0x%x\n", type); @@ -433,20 +405,20 @@ long ptrace_read_regset(struct process *proc, long type, struct iovec *iov) return rc; } -long ptrace_write_regset(struct process *proc, long type, struct iovec *iov) +long ptrace_write_regset(struct thread *thread, long type, struct iovec *iov) { long rc = -EINVAL; switch (type) { case NT_X86_XSTATE: - save_fp_regs(proc); - if (proc->fp_regs == NULL) { + save_fp_regs(thread); + if (thread->fp_regs == NULL) { return -ENOMEM; } if (iov->iov_len > sizeof(fp_regs_struct)) { iov->iov_len = sizeof(fp_regs_struct); } - rc = copy_from_user(proc->fp_regs, iov->iov_base, iov->iov_len); + rc = copy_from_user(thread->fp_regs, iov->iov_base, iov->iov_len); break; default: kprintf("ptrace_write_regset: not supported type 0x%x\n", type); @@ -455,47 +427,44 @@ long ptrace_write_regset(struct process *proc, long type, struct iovec *iov) return rc; } -extern void coredump(struct process *proc, void *regs); +extern void coredump(struct thread *thread, void *regs); -void ptrace_report_signal(struct process *proc, int sig) +void ptrace_report_signal(struct thread *thread, int sig) { - long rc; + struct mcs_rwlock_node_irqsave lock; + struct process *proc = thread->proc; + int parent_pid; + struct siginfo info; - dkprintf("ptrace_report_signal,pid=%d\n", proc->ftn->pid); + dkprintf("ptrace_report_signal,pid=%d\n", thread->proc->pid); - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - proc->ftn->exit_status = sig; - /* Transition process state */ - proc->ftn->status = PS_TRACED; - proc->ftn->ptrace &= ~PT_TRACE_SYSCALL_MASK; + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + if(!(proc->ptrace & PT_TRACED)){ + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); + return; + } + proc->exit_status = sig; + /* Transition thread state */ + proc->pstatus = PS_TRACED; + thread->tstatus = PS_TRACED; + proc->ptrace &= ~PT_TRACE_SYSCALL_MASK; if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU) { - proc->ftn->signal_flags |= SIGNAL_STOP_STOPPED; + proc->signal_flags |= SIGNAL_STOP_STOPPED; } else { - proc->ftn->signal_flags &= ~SIGNAL_STOP_STOPPED; + proc->signal_flags &= ~SIGNAL_STOP_STOPPED; } - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - if (proc->ftn->parent) { - /* kill SIGCHLD */ - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - if (proc->ftn->parent->owner) { - struct siginfo info; + parent_pid = proc->parent->pid; + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); - memset(&info, '\0', sizeof info); - info.si_signo = SIGCHLD; - info.si_code = CLD_TRAPPED; - info._sifields._sigchld.si_pid = proc->ftn->pid; - info._sifields._sigchld.si_status = proc->ftn->exit_status; - rc = do_kill(proc->ftn->parent->pid, -1, SIGCHLD, &info, 0); - if (rc < 0) { - kprintf("ptrace_report_signal,do_kill failed\n"); - } - } - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - - /* Wake parent (if sleeping in wait4()) */ - waitq_wakeup(&proc->ftn->parent->waitpid_q); - } + memset(&info, '\0', sizeof info); + info.si_signo = SIGCHLD; + info.si_code = CLD_TRAPPED; + info._sifields._sigchld.si_pid = thread->proc->pid; + info._sifields._sigchld.si_status = thread->proc->exit_status; + do_kill(cpu_local_var(current), parent_pid, -1, SIGCHLD, &info, 0); + /* Wake parent (if sleeping in wait4()) */ + waitq_wakeup(&proc->parent->waitpid_q); dkprintf("ptrace_report_signal,sleeping\n"); /* Sleep */ @@ -505,6 +474,8 @@ void ptrace_report_signal(struct process *proc, int sig) static int isrestart(int num, unsigned long rc, int sig, int restart) { + if(sig == SIGKILL || sig == SIGSTOP) + return 0; if(num == 0 || rc != -EINTR) return 0; switch(num){ @@ -536,22 +507,23 @@ isrestart(int num, unsigned long rc, int sig, int restart) } void -do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pending *pending, int num) +do_signal(unsigned long rc, void *regs0, struct thread *thread, struct sig_pending *pending, int num) { struct x86_user_context *regs = regs0; struct k_sigaction *k; int sig; __sigset_t w; - int irqstate; - struct fork_tree_node *ftn = proc->ftn; + struct process *proc = thread->proc; int orgsig; int ptraceflag = 0; + struct mcs_rwlock_node_irqsave lock; + unsigned long irqstate; for(w = pending->sigmask.__val[0], sig = 0; w; sig++, w >>= 1); - dkprintf("do_signal,pid=%d,sig=%d\n", proc->ftn->pid, sig); + dkprintf("do_signal,pid=%d,sig=%d\n", proc->pid, sig); orgsig = sig; - if((ftn->ptrace & PT_TRACED) && + if((proc->ptrace & PT_TRACED) && pending->ptracecont == 0 && sig != SIGKILL) { ptraceflag = 1; @@ -566,39 +538,39 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin rc = regs->gpr.rax; } - irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); - k = proc->sighandler->action + sig - 1; + irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); + k = thread->sigcommon->action + sig - 1; if(k->sa.sa_handler == SIG_IGN){ kfree(pending); - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); return; } else if(k->sa.sa_handler){ unsigned long *usp; /* user stack */ struct sigsp *sigsp; - int ssflags = proc->sigstack.ss_flags; - unsigned long mask = (unsigned long)proc->sigmask.__val[0]; + int ssflags = thread->sigstack.ss_flags; + unsigned long mask = (unsigned long)thread->sigmask.__val[0]; if((k->sa.sa_flags & SA_ONSTACK) && - !(proc->sigstack.ss_flags & SS_DISABLE) && - !(proc->sigstack.ss_flags & SS_ONSTACK)){ + !(thread->sigstack.ss_flags & SS_DISABLE) && + !(thread->sigstack.ss_flags & SS_ONSTACK)){ unsigned long lsp; - lsp = ((unsigned long)(((char *)proc->sigstack.ss_sp) + proc->sigstack.ss_size)) & 0xfffffffffffffff8UL; + lsp = ((unsigned long)(((char *)thread->sigstack.ss_sp) + thread->sigstack.ss_size)) & 0xfffffffffffffff8UL; usp = (unsigned long *)lsp; - proc->sigstack.ss_flags |= SS_ONSTACK; + thread->sigstack.ss_flags |= SS_ONSTACK; } else{ usp = (unsigned long *)regs->gpr.rsp; } sigsp = ((struct sigsp *)usp) - 1; sigsp = (struct sigsp *)((unsigned long)sigsp & 0xfffffffffffffff0UL); - if(write_process_vm(proc->vm, &sigsp->regs, regs, sizeof(struct x86_user_context)) || - write_process_vm(proc->vm, &sigsp->sigrc, &rc, sizeof(long))){ + if(write_process_vm(thread->vm, &sigsp->regs, regs, sizeof(struct x86_user_context)) || + write_process_vm(thread->vm, &sigsp->sigrc, &rc, sizeof(long))){ kfree(pending); - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); kprintf("do_signal,write_process_vm failed\n"); - terminate(0, sig, (ihk_mc_user_context_t *)regs->gpr.rsp); + terminate(0, sig); return; } sigsp->sigmask = mask; @@ -621,25 +593,25 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin regs->gpr.rip = (unsigned long)k->sa.sa_handler; regs->gpr.rsp = (unsigned long)usp; - proc->sigmask.__val[0] |= pending->sigmask.__val[0]; + thread->sigmask.__val[0] |= pending->sigmask.__val[0]; kfree(pending); - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); } else { int coredumped = 0; siginfo_t info; if(ptraceflag){ - if(proc->ptrace_recvsig) - kfree(proc->ptrace_recvsig); - proc->ptrace_recvsig = pending; - if(proc->ptrace_sendsig) - kfree(proc->ptrace_sendsig); - proc->ptrace_sendsig = NULL; + if(thread->ptrace_recvsig) + kfree(thread->ptrace_recvsig); + thread->ptrace_recvsig = pending; + if(thread->ptrace_sendsig) + kfree(thread->ptrace_sendsig); + thread->ptrace_sendsig = NULL; } else kfree(pending); - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); switch (sig) { case SIGSTOP: case SIGTSTP: @@ -648,49 +620,50 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin memset(&info, '\0', sizeof info); info.si_signo = SIGCHLD; info.si_code = CLD_STOPPED; - info._sifields._sigchld.si_pid = proc->ftn->pid; + info._sifields._sigchld.si_pid = thread->proc->pid; info._sifields._sigchld.si_status = (sig << 8) | 0x7f; - do_kill(proc->ftn->parent->pid, -1, SIGCHLD, &info, 0); + do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0); if(ptraceflag){ - ptrace_report_signal(proc, orgsig); + ptrace_report_signal(thread, orgsig); } else{ dkprintf("do_signal,SIGSTOP,changing state\n"); - /* Update process state in fork tree */ - ihk_mc_spinlock_lock_noirq(&ftn->lock); - ftn->group_exit_status = SIGSTOP; + /* Update thread state in fork tree */ + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + proc->group_exit_status = SIGSTOP; /* Reap and set new signal_flags */ - ftn->signal_flags = SIGNAL_STOP_STOPPED; + proc->signal_flags = SIGNAL_STOP_STOPPED; - ftn->status = PS_STOPPED; - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + proc->pstatus = PS_STOPPED; + thread->tstatus = PS_STOPPED; + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); /* Wake up the parent who tried wait4 and sleeping */ - waitq_wakeup(&proc->ftn->parent->waitpid_q); + waitq_wakeup(&proc->parent->waitpid_q); dkprintf("do_signal,SIGSTOP,sleeping\n"); /* Sleep */ - proc->ftn->status = PS_STOPPED; schedule(); dkprintf("SIGSTOP(): woken up\n"); } break; case SIGTRAP: dkprintf("do_signal,SIGTRAP\n"); - if(!(ftn->ptrace & PT_TRACED)) { + if(!(proc->ptrace & PT_TRACED)) { goto core; } - /* Update process state in fork tree */ - ihk_mc_spinlock_lock_noirq(&ftn->lock); - ftn->exit_status = SIGTRAP; - ftn->status = PS_TRACED; - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + /* Update thread state in fork tree */ + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + proc->exit_status = SIGTRAP; + proc->pstatus = PS_TRACED; + thread->tstatus = PS_TRACED; + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); /* Wake up the parent who tried wait4 and sleeping */ - waitq_wakeup(&proc->ftn->parent->waitpid_q); + waitq_wakeup(&thread->proc->parent->waitpid_q); /* Sleep */ dkprintf("do_signal,SIGTRAP,sleeping\n"); @@ -702,10 +675,10 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin memset(&info, '\0', sizeof info); info.si_signo = SIGCHLD; info.si_code = CLD_CONTINUED; - info._sifields._sigchld.si_pid = proc->ftn->pid; + info._sifields._sigchld.si_pid = proc->pid; info._sifields._sigchld.si_status = 0x0000ffff; - do_kill(proc->ftn->parent->pid, -1, SIGCHLD, &info, 0); - ftn->signal_flags = SIGNAL_STOP_CONTINUED; + do_kill(cpu_local_var(current), proc->parent->pid, -1, SIGCHLD, &info, 0); + proc->signal_flags = SIGNAL_STOP_CONTINUED; dkprintf("do_signal,SIGCONT,do nothing\n"); break; case SIGQUIT: @@ -717,23 +690,23 @@ do_signal(unsigned long rc, void *regs0, struct process *proc, struct sig_pendin case SIGSYS: core: dkprintf("do_signal,default,core,sig=%d\n", sig); - coredump(proc, regs); + coredump(thread, regs); coredumped = 0x80; - terminate(0, sig | coredumped, (ihk_mc_user_context_t *)regs->gpr.rsp); + terminate(0, sig | coredumped); break; case SIGCHLD: case SIGURG: break; default: dkprintf("do_signal,default,terminate,sig=%d\n", sig); - terminate(0, sig, (ihk_mc_user_context_t *)regs->gpr.rsp); + terminate(0, sig); break; } } } static struct sig_pending * -getsigpending(struct process *proc, int delflag){ +getsigpending(struct thread *thread, int delflag){ struct list_head *head; ihk_spinlock_t *lock; struct sig_pending *next; @@ -744,15 +717,15 @@ getsigpending(struct process *proc, int delflag){ int sig; struct k_sigaction *k; - w = proc->sigmask.__val[0]; + w = thread->sigmask.__val[0]; - lock = &proc->sigshared->lock; - head = &proc->sigshared->sigpending; + lock = &thread->sigcommon->lock; + head = &thread->sigcommon->sigpending; for(;;){ irqstate = ihk_mc_spinlock_lock(lock); list_for_each_entry_safe(pending, next, head, list){ for(x = pending->sigmask.__val[0], sig = 0; x; sig++, x >>= 1); - k = proc->sighandler->action + sig - 1; + k = thread->sigcommon->action + sig - 1; if(delflag || (sig != SIGCHLD && sig != SIGURG) || (k->sa.sa_handler != (void *)1 && @@ -767,45 +740,44 @@ getsigpending(struct process *proc, int delflag){ } ihk_mc_spinlock_unlock(lock, irqstate); - if(lock == &proc->sigpendinglock) + if(lock == &thread->sigpendinglock) return NULL; - lock = &proc->sigpendinglock; - head = &proc->sigpending; + lock = &thread->sigpendinglock; + head = &thread->sigpending; } return NULL; } struct sig_pending * -hassigpending(struct process *proc) +hassigpending(struct thread *thread) { - return getsigpending(proc, 0); + return getsigpending(thread, 0); } void check_signal(unsigned long rc, void *regs0, int num) { struct x86_user_context *regs = regs0; - struct process *proc; + struct thread *thread; struct sig_pending *pending; int irqstate; if(clv == NULL) return; - proc = cpu_local_var(current); - if(proc == NULL || proc->ftn->pid == 0){ - struct process *p; + thread = cpu_local_var(current); + + if(thread == NULL || thread == &cpu_local_var(idle)){ + struct thread *t; irqstate = ihk_mc_spinlock_lock(&(cpu_local_var(runq_lock))); - list_for_each_entry(p, &(cpu_local_var(runq)), sched_list){ - if(p->ftn->pid <= 0) + list_for_each_entry(t, &(cpu_local_var(runq)), sched_list){ + if(t == &cpu_local_var(idle)) continue; - if(p->ftn->status == PS_INTERRUPTIBLE && - hassigpending(p)){ - p->ftn->status = PS_RUNNING; - ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate); - // schedule(); - return; + if(t->tstatus == PS_INTERRUPTIBLE && + hassigpending(t)){ + t->tstatus = PS_RUNNING; + break; } } ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), irqstate); @@ -817,24 +789,24 @@ check_signal(unsigned long rc, void *regs0, int num) } for(;;){ - pending = getsigpending(proc, 1); + pending = getsigpending(thread, 1); if(!pending) { dkprintf("check_signal,queue is empty\n"); return; } - do_signal(rc, regs, proc, pending, num); + do_signal(rc, regs, thread, pending, num); } } unsigned long -do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) +do_kill(struct thread *thread, int pid, int tid, int sig, siginfo_t *info, + int ptracecont) { dkprintf("do_kill,pid=%d,tid=%d,sig=%d\n", pid, tid, sig); struct cpu_local_var *v; - struct process *p; - struct process *proc = cpu_local_var(current); - struct process *tproc = NULL; + struct thread *t; + struct thread *tthread = NULL; int i; __sigset_t mask; struct list_head *head; @@ -865,9 +837,9 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) int sendme = 0; if(pid == 0){ - if(proc == NULL || proc->ftn->pid <= 0) + if(thread == NULL || thread->proc->pid <= 0) return -ESRCH; - pgid = proc->ftn->pgid; + pgid = thread->proc->pgid; } pids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT); if(!pids) @@ -875,32 +847,32 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) for(i = 0; i < num_processors; i++){ v = get_cpu_local_var(i); irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ + list_for_each_entry(t, &(v->runq), sched_list){ int j; - if(p->ftn->pid <= 0) + if(t->proc->pid <= 0) continue; - if(pgid != 1 && p->ftn->pgid != pgid) + if(pgid != 1 && t->proc->pgid != pgid) continue; - if(proc && p->ftn->pid == proc->ftn->pid){ + if(thread && t->proc->pid == thread->proc->pid){ sendme = 1; continue; } for(j = 0; j < n; j++) - if(pids[j] == p->ftn->pid) + if(pids[j] == t->proc->pid) break; if(j == n){ - pids[n] = p->ftn->pid; + pids[n] = t->proc->pid; n++; } } ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); } for(i = 0; i < n; i++) - rc = do_kill(pids[i], -1, sig, info, ptracecont); + rc = do_kill(thread, pids[i], -1, sig, info, ptracecont); if(sendme) - rc = do_kill(proc->ftn->pid, -1, sig, info, ptracecont); + rc = do_kill(thread, thread->proc->pid, -1, sig, info, ptracecont); kfree(pids); return rc; @@ -908,18 +880,18 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) irqstate = cpu_disable_interrupt_save(); mask = __sigmask(sig); if(tid == -1){ - struct process *tproc0 = NULL; + struct thread *tthread0 = NULL; ihk_spinlock_t *savelock0 = NULL; for(i = 0; i < num_processors; i++){ v = get_cpu_local_var(i); found = 0; ihk_mc_spinlock_lock_noirq(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid == pid){ - if(p->ftn->tid == pid || tproc == NULL){ - if(!(mask & p->sigmask.__val[0])){ - tproc = p; + list_for_each_entry(t, &(v->runq), sched_list){ + if(t->proc->pid == pid){ + if(t->tid == pid || tthread == NULL){ + if(!(mask & t->sigmask.__val[0])){ + tthread = t; if(!found && savelock) { ihk_mc_spinlock_unlock_noirq(savelock); } @@ -930,14 +902,14 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) savelock0 = NULL; } } - else if(tproc == NULL && tproc0 == NULL){ - tproc0 = p; + else if(tthread == NULL && tthread0 == NULL){ + tthread0 = t; found = 1; savelock0 = &(v->runq_lock); } } - if(!(mask & p->sigmask.__val[0])){ - if(p->ftn->tid == pid || tproc == NULL){ + if(!(mask & t->sigmask.__val[0])){ + if(t->tid == pid || tthread == NULL){ } } @@ -947,8 +919,8 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) ihk_mc_spinlock_unlock_noirq(&(v->runq_lock)); } } - if(tproc == NULL){ - tproc = tproc0; + if(tthread == NULL){ + tthread = tthread0; savelock = savelock0; } } @@ -957,12 +929,12 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) v = get_cpu_local_var(i); found = 0; ihk_mc_spinlock_lock_noirq(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid > 0 && - p->ftn->tid == tid){ + list_for_each_entry(t, &(v->runq), sched_list){ + if(t->proc->pid > 0 && + t->tid == tid){ savelock = &(v->runq_lock); found = 1; - tproc = p; + tthread = t; break; } } @@ -975,12 +947,12 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) v = get_cpu_local_var(i); found = 0; ihk_mc_spinlock_lock_noirq(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid == pid && - p->ftn->tid == tid){ + list_for_each_entry(t, &(v->runq), sched_list){ + if(t->proc->pid == pid && + t->tid == tid){ savelock = &(v->runq_lock); found = 1; - tproc = p; + tthread = t; break; } } @@ -990,17 +962,18 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) } } - if(!tproc){ + if(!tthread){ cpu_restore_interrupt(irqstate); return -ESRCH; } if(sig != SIGCONT && - proc->ftn->euid != 0 && - proc->ftn->ruid != tproc->ftn->ruid && - proc->ftn->euid != tproc->ftn->ruid && - proc->ftn->ruid != tproc->ftn->suid && - proc->ftn->euid != tproc->ftn->suid){ + thread && + thread->proc->euid != 0 && + thread->proc->ruid != tthread->proc->ruid && + thread->proc->euid != tthread->proc->ruid && + thread->proc->ruid != tthread->proc->suid && + thread->proc->euid != tthread->proc->suid){ ihk_mc_spinlock_unlock_noirq(savelock); cpu_restore_interrupt(irqstate); return -EPERM; @@ -1014,20 +987,20 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) doint = 0; if(tid == -1){ - ihk_mc_spinlock_lock_noirq(&tproc->sigshared->lock); - head = &tproc->sigshared->sigpending; + ihk_mc_spinlock_lock_noirq(&tthread->sigcommon->lock); + head = &tthread->sigcommon->sigpending; } else{ - ihk_mc_spinlock_lock_noirq(&tproc->sigpendinglock); - head = &tproc->sigpending; + ihk_mc_spinlock_lock_noirq(&tthread->sigpendinglock); + head = &tthread->sigpending; } /* Put signal event even when handler is SIG_IGN or SIG_DFL - because target ptraced process must call ptrace_report_signal + because target ptraced thread must call ptrace_report_signal in check_signal */ rc = 0; - k = tproc->sighandler->action + sig - 1; - if((sig != SIGKILL && (tproc->ftn->ptrace & PT_TRACED)) || + k = tthread->sigcommon->action + sig - 1; + if((sig != SIGKILL && (tthread->proc->ptrace & PT_TRACED)) || (k->sa.sa_handler != (void *)1 && (k->sa.sa_handler != NULL || (sig != SIGCHLD && sig != SIGURG)))){ @@ -1055,42 +1028,42 @@ do_kill(int pid, int tid, int sig, siginfo_t *info, int ptracecont) list_add(&pending->list, head); else list_add_tail(&pending->list, head); - tproc->sigevent = 1; + tthread->sigevent = 1; } } } if(tid == -1){ - ihk_mc_spinlock_unlock_noirq(&tproc->sigshared->lock); + ihk_mc_spinlock_unlock_noirq(&tthread->sigcommon->lock); } else{ - ihk_mc_spinlock_unlock_noirq(&tproc->sigpendinglock); + ihk_mc_spinlock_unlock_noirq(&tthread->sigpendinglock); } - if (doint && !(mask & tproc->sigmask.__val[0])) { - int cpuid = tproc->cpu_id; - int pid = tproc->ftn->pid; - int status = tproc->ftn->status; + if (doint && !(mask & tthread->sigmask.__val[0])) { + int cpuid = tthread->cpu_id; + int pid = tthread->proc->pid; + int status = tthread->tstatus; - if (proc != tproc) { + if (thread != tthread) { dkprintf("do_kill,ipi,pid=%d,cpu_id=%d\n", - tproc->ftn->pid, tproc->cpu_id); - ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(tproc->cpu_id)->apic_id, 0xd0); + tthread->proc->pid, tthread->cpu_id); + ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(tthread->cpu_id)->apic_id, 0xd0); } ihk_mc_spinlock_unlock_noirq(savelock); cpu_restore_interrupt(irqstate); - if(!tproc->nohost) + if(!tthread->proc->nohost) interrupt_syscall(pid, cpuid); if (status != PS_RUNNING) { if(sig == SIGKILL){ /* Wake up the target only when stopped by ptrace-reporting */ - sched_wakeup_process(tproc, PS_TRACED | PS_STOPPED); + sched_wakeup_thread(tthread, PS_TRACED | PS_STOPPED); } else if(sig == SIGCONT || ptracecont){ /* Wake up the target only when stopped by SIGSTOP */ - sched_wakeup_process(tproc, PS_STOPPED); + sched_wakeup_thread(tthread, PS_STOPPED); } } } @@ -1105,15 +1078,15 @@ void set_signal(int sig, void *regs0, siginfo_t *info) { struct x86_user_context *regs = regs0; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); - if(proc == NULL || proc->ftn->pid == 0) + if(thread == NULL || thread->proc->pid == 0) return; - if((__sigmask(sig) & proc->sigmask.__val[0]) || + if((__sigmask(sig) & thread->sigmask.__val[0]) || (regs->gpr.rsp & 0x8000000000000000)){ - coredump(proc, regs0); - terminate(0, sig | 0x80, (ihk_mc_user_context_t *)regs->gpr.rsp); + coredump(thread, regs0); + terminate(0, sig | 0x80); } - do_kill(proc->ftn->pid, proc->ftn->tid, sig, info, 0); + do_kill(thread, thread->proc->pid, thread->tid, sig, info, 0); } diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 5f603f91..6a0aeba4 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -183,6 +183,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp) desc = malloc(sizeof(struct program_load_desc) + sizeof(struct program_image_section) * nhdrs); + desc->shell_path[0] = '\0'; fseek(fp, hdr.e_phoff, SEEK_SET); j = 0; desc->num_sections = nhdrs; @@ -1822,6 +1823,7 @@ fork_child_sync_pipe: /* Parent */ default: +fprintf(stderr, "fork %d->%d\n", getpid(), pid); fs->pid = pid; while ((rc = sem_trywait(&fs->sem)) == -1 && (errno == EAGAIN || errno == EINTR)) { int st; @@ -1870,6 +1872,7 @@ fork_err: siginfo_t info; int opt; +fprintf(stderr, "wait4: pid=%d\n", pid); opt = WEXITED | (options & WNOWAIT); memset(&info, '\0', sizeof info); while((ret = waitid(P_PID, pid, &info, opt)) == -1 && @@ -1879,7 +1882,7 @@ fork_err: } if(ret != pid) { - fprintf(stderr, "ERROR: waiting for %lu\n", w.sr.args[0]); + fprintf(stderr, "ERROR: waiting for %lu rc=%d errno=%d\n", w.sr.args[0], ret, errno); } do_syscall_return(fd, cpu, ret, 0, 0, 0, 0); diff --git a/kernel/debug.c b/kernel/debug.c index c2a1452e..a58aff0d 100644 --- a/kernel/debug.c +++ b/kernel/debug.c @@ -28,7 +28,7 @@ void kputs(char *buf) int len = strlen(buf); unsigned long flags; - flags = ihk_mc_spinlock_lock(&kmsg_lock); + flags = __ihk_mc_spinlock_lock(&kmsg_lock); if (len + kmsg_buf.tail > kmsg_buf.len) { kmsg_buf.tail = 0; @@ -40,19 +40,19 @@ void kputs(char *buf) memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len); kmsg_buf.tail += len; - ihk_mc_spinlock_unlock(&kmsg_lock, flags); + __ihk_mc_spinlock_unlock(&kmsg_lock, flags); } #define KPRINTF_LOCAL_BUF_LEN 1024 unsigned long kprintf_lock(void) { - return ihk_mc_spinlock_lock(&kmsg_lock); + return __ihk_mc_spinlock_lock(&kmsg_lock); } void kprintf_unlock(unsigned long irqflags) { - ihk_mc_spinlock_unlock(&kmsg_lock, irqflags); + __ihk_mc_spinlock_unlock(&kmsg_lock, irqflags); } /* Caller must hold kmsg_lock! */ @@ -85,7 +85,7 @@ int kprintf(const char *format, ...) unsigned long flags; char buf[KPRINTF_LOCAL_BUF_LEN]; - flags = ihk_mc_spinlock_lock(&kmsg_lock); + flags = __ihk_mc_spinlock_lock(&kmsg_lock); /* Copy into the local buf */ len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id()); @@ -101,7 +101,7 @@ int kprintf(const char *format, ...) memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len); kmsg_buf.tail += len; - ihk_mc_spinlock_unlock(&kmsg_lock, flags); + __ihk_mc_spinlock_unlock(&kmsg_lock, flags); return len; } diff --git a/kernel/fileobj.c b/kernel/fileobj.c index 10a1fe92..64331179 100644 --- a/kernel/fileobj.c +++ b/kernel/fileobj.c @@ -387,7 +387,7 @@ out: static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag) { - struct process *proc = cpu_local_var(current); + struct thread *proc = cpu_local_var(current); struct fileobj *obj = to_fileobj(memobj); int error; void *virt = NULL; diff --git a/kernel/futex.c b/kernel/futex.c index 2174d6cc..dc801ab4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -103,7 +103,7 @@ int futex_cmpxchg_enabled; struct futex_q { struct plist_node list; - struct process *task; + struct thread *task; ihk_spinlock_t *lock_ptr; union futex_key key; union futex_key *requeue_pi_key; @@ -243,7 +243,7 @@ static int get_futex_value_locked(uint32_t *dest, uint32_t *from) */ static void wake_futex(struct futex_q *q) { - struct process *p = q->task; + struct thread *p = q->task; /* * We set q->lock_ptr = NULL _before_ we wake up the task. If @@ -263,7 +263,7 @@ static void wake_futex(struct futex_q *q) barrier(); q->lock_ptr = NULL; - sched_wakeup_process(p, PS_NORMAL); + sched_wakeup_thread(p, PS_NORMAL); } /* @@ -658,7 +658,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q * queue_me() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ - xchg4(&(cpu_local_var(current)->ftn->status), PS_INTERRUPTIBLE); + xchg4(&(cpu_local_var(current)->tstatus), PS_INTERRUPTIBLE); queue_me(q, hb); if (!plist_node_empty(&q->list)) { @@ -674,7 +674,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q } /* This does not need to be serialized */ - cpu_local_var(current)->ftn->status = PS_RUNNING; + cpu_local_var(current)->tstatus = PS_RUNNING; return time_remain; } diff --git a/kernel/host.c b/kernel/host.c index f5351048..d75cd6f5 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -39,11 +39,11 @@ #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #endif -void check_mapping_for_proc(struct process *proc, unsigned long addr) +void check_mapping_for_proc(struct thread *thread, unsigned long addr) { unsigned long __phys; - if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, (void*)addr, &__phys)) { + if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void*)addr, &__phys)) { kprintf("check_map: no mapping for 0x%lX\n", addr); } else { @@ -60,7 +60,7 @@ void check_mapping_for_proc(struct process *proc, unsigned long addr) * NOTE: if args, args_len, envs, envs_len are zero, * the function constructs them based on the descriptor */ -int prepare_process_ranges_args_envs(struct process *proc, +int prepare_process_ranges_args_envs(struct thread *thread, struct program_load_desc *pn, struct program_load_desc *p, enum ihk_mc_pt_attribute attr, @@ -81,6 +81,9 @@ int prepare_process_ranges_args_envs(struct process *proc, uintptr_t interp_obase = -1; uintptr_t interp_nbase = -1; size_t map_size; + struct process *proc = thread->proc; + struct process_vm *vm = proc->vm; + struct address_space *as = vm->address_space; n = p->num_sections; @@ -89,7 +92,7 @@ int prepare_process_ranges_args_envs(struct process *proc, if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) { interp_obase = pn->sections[i].vaddr; interp_obase -= (interp_obase % pn->interp_align); - interp_nbase = proc->vm->region.map_start; + interp_nbase = vm->region.map_start; interp_nbase = (interp_nbase + pn->interp_align - 1) & ~(pn->interp_align - 1); } @@ -114,7 +117,7 @@ int prepare_process_ranges_args_envs(struct process *proc, } up = virt_to_phys(up_v); - if (add_process_memory_range(proc, s, e, up, flags, NULL, 0) != 0) { + if (add_process_memory_range(vm, s, e, up, flags, NULL, 0) != 0) { ihk_mc_free_pages(up_v, range_npages); kprintf("ERROR: adding memory range for ELF section %i\n", i); goto err; @@ -123,14 +126,14 @@ int prepare_process_ranges_args_envs(struct process *proc, { void *_virt = (void *)s; unsigned long _phys; - if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, + if (ihk_mc_pt_virt_to_phys(as->page_table, _virt, &_phys)) { kprintf("ERROR: no mapping for 0x%lX\n", _virt); } for (_virt = (void *)s + PAGE_SIZE; (unsigned long)_virt < e; _virt += PAGE_SIZE) { unsigned long __phys; - if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, + if (ihk_mc_pt_virt_to_phys(as->page_table, _virt, &__phys)) { kprintf("ERROR: no mapping for 0x%lX\n", _virt); panic("mapping"); @@ -149,23 +152,23 @@ int prepare_process_ranges_args_envs(struct process *proc, /* TODO: Maybe we need flag */ if (pn->sections[i].interp) { - proc->vm->region.map_end = e; + vm->region.map_end = e; } else if (i == 0) { - proc->vm->region.text_start = s; - proc->vm->region.text_end = e; + vm->region.text_start = s; + vm->region.text_end = e; } else if (i == 1) { - proc->vm->region.data_start = s; - proc->vm->region.data_end = e; + vm->region.data_start = s; + vm->region.data_end = e; } else { - proc->vm->region.data_start = - (s < proc->vm->region.data_start ? - s : proc->vm->region.data_start); - proc->vm->region.data_end = - (e > proc->vm->region.data_end ? - e : proc->vm->region.data_end); + vm->region.data_start = + (s < vm->region.data_start ? + s : vm->region.data_start); + vm->region.data_end = + (e > vm->region.data_end ? + e : vm->region.data_end); } } @@ -173,17 +176,17 @@ int prepare_process_ranges_args_envs(struct process *proc, pn->entry -= interp_obase; pn->entry += interp_nbase; p->entry = pn->entry; - ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER, - pn->entry); + ihk_mc_modify_user_context(thread->uctx, + IHK_UCR_PROGRAM_COUNTER, + pn->entry); } - proc->vm->region.brk_start = proc->vm->region.brk_end = - proc->vm->region.data_end; + vm->region.brk_start = vm->region.brk_end = vm->region.data_end; /* Map, copy and update args and envs */ flags = VR_PROT_READ | VR_PROT_WRITE; flags |= VRFLAG_PROT_TO_MAXPROT(flags); - addr = proc->vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT; + addr = vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT; e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT; if((args_envs = ihk_mc_alloc_pages(ARGENV_PAGE_COUNT, IHK_MC_AP_NOWAIT)) == NULL){ @@ -192,7 +195,7 @@ int prepare_process_ranges_args_envs(struct process *proc, } args_envs_p = virt_to_phys(args_envs); - if(add_process_memory_range(proc, addr, e, args_envs_p, + if(add_process_memory_range(vm, addr, e, args_envs_p, flags, NULL, 0) != 0){ ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT); kprintf("ERROR: adding memory range for args/envs\n"); @@ -305,10 +308,10 @@ int prepare_process_ranges_args_envs(struct process *proc, dkprintf("env OK\n"); - p->rprocess = (unsigned long)proc; - p->rpgtable = virt_to_phys(proc->vm->page_table); + p->rprocess = (unsigned long)thread; + p->rpgtable = virt_to_phys(as->page_table); - if (init_process_stack(proc, pn, argc, argv, envc, env) != 0) { + if (init_process_stack(thread, pn, argc, argv, envc, env) != 0) { goto err; } @@ -327,7 +330,9 @@ static int process_msg_prepare_process(unsigned long rphys) unsigned long phys, sz; struct program_load_desc *p, *pn; int npages, n; + struct thread *thread; struct process *proc; + struct process_vm *vm; enum ihk_mc_pt_attribute attr; attr = PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER; @@ -354,41 +359,43 @@ static int process_msg_prepare_process(unsigned long rphys) memcpy_long(pn, p, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * n); - if((proc = create_process(p->entry)) == NULL){ + if((thread = create_thread(p->entry)) == NULL){ ihk_mc_free(pn); ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_memory(NULL, phys, sz); return -ENOMEM; } - proc->ftn->pid = pn->pid; - proc->ftn->pgid = pn->pgid; + proc = thread->proc; + vm = thread->vm; - proc->ftn->ruid = pn->cred[0]; - proc->ftn->euid = pn->cred[1]; - proc->ftn->suid = pn->cred[2]; - proc->ftn->fsuid = pn->cred[3]; - proc->ftn->rgid = pn->cred[4]; - proc->ftn->egid = pn->cred[5]; - proc->ftn->sgid = pn->cred[6]; - proc->ftn->fsgid = pn->cred[7]; + proc->pid = pn->pid; + proc->pgid = pn->pgid; + proc->ruid = pn->cred[0]; + proc->euid = pn->cred[1]; + proc->suid = pn->cred[2]; + proc->fsuid = pn->cred[3]; + proc->rgid = pn->cred[4]; + proc->egid = pn->cred[5]; + proc->sgid = pn->cred[6]; + proc->fsgid = pn->cred[7]; - proc->vm->region.user_start = pn->user_start; - proc->vm->region.user_end = pn->user_end; - proc->vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK; - proc->vm->region.map_end = proc->vm->region.map_start; + vm->region.user_start = pn->user_start; + vm->region.user_end = pn->user_end; + vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK; + vm->region.map_end = proc->vm->region.map_start; memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); /* TODO: Clear it at the proper timing */ cpu_local_var(scp).post_idx = 0; - if (prepare_process_ranges_args_envs(proc, pn, p, attr, + if (prepare_process_ranges_args_envs(thread, pn, p, attr, NULL, 0, NULL, 0) != 0) { kprintf("error: preparing process ranges, args, envs, stack\n"); goto err; } - dkprintf("new process : %p [%d] / table : %p\n", proc, proc->ftn->pid, - proc->vm->page_table); + dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid, + vm->address_space->page_table); ihk_mc_free(pn); @@ -401,8 +408,7 @@ err: ihk_mc_free(pn); ihk_mc_unmap_virtual(p, npages, 1); ihk_mc_unmap_memory(NULL, phys, sz); - free_process_memory(proc); - destroy_process(proc); + destroy_thread(thread); return -ENOMEM; } @@ -476,8 +482,8 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c, ihk_ikc_send(c, packet, 0); } -extern unsigned long do_kill(int, int, int, struct siginfo *, int ptracecont); -extern void settid(struct process *proc, int mode, int newcpuid, int oldcpuid); +extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont); +extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid); extern void process_procfs_request(unsigned long rarg); extern int memcheckall(); @@ -492,6 +498,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, struct ikc_scd_packet *packet = __packet; struct ikc_scd_packet pckt; int rc; + struct thread *thread; struct process *proc; struct mcctrl_signal { int cond; @@ -539,13 +546,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, return -1; } dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg); - proc = (struct process *)packet->arg; + thread = (struct thread *)packet->arg; + proc = thread->proc; - settid(proc, 0, cpuid, -1); - proc->ftn->status = PS_RUNNING; - runq_add_proc(proc, cpuid); + settid(thread, 0, cpuid, -1); + proc->pstatus = PS_RUNNING; + thread->tstatus = PS_RUNNING; + chain_thread(thread); + chain_process(proc); + runq_add_thread(thread, cpuid); - //cpu_local_var(next) = (struct process *)packet->arg; + //cpu_local_var(next) = (struct thread *)packet->arg; return 0; case SCD_MSG_SEND_SIGNAL: pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal)); @@ -559,7 +570,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, pckt.arg = packet->arg; syscall_channel_send(c, &pckt); - rc = do_kill(info.pid, info.tid, info.sig, &info.info, 0); + rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0); kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc); return 0; case SCD_MSG_PROCFS_REQUEST: diff --git a/kernel/include/cls.h b/kernel/include/cls.h index 2ccaf187..eb1070db 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -41,13 +41,14 @@ struct cpu_local_var { struct malloc_header free_list; ihk_spinlock_t free_list_lock; - struct process idle; - struct fork_tree_node idle_ftn; + struct thread idle; + struct process idle_proc; struct process_vm idle_vm; + struct address_space idle_asp; ihk_spinlock_t runq_lock; unsigned long runq_irqstate; - struct process *current; + struct thread *current; struct list_head runq; size_t runq_len; @@ -58,6 +59,7 @@ struct cpu_local_var { struct ihk_ikc_channel_desc *syscall_channel2; struct syscall_params scp2; struct ikc_scd_init_param iip2; + struct resource_set *resource_set; int status; int fs; diff --git a/kernel/include/kmalloc.h b/kernel/include/kmalloc.h index c7d4b800..6f523ec8 100644 --- a/kernel/include/kmalloc.h +++ b/kernel/include/kmalloc.h @@ -14,8 +14,18 @@ #define __HEADER_KMALLOC_H #include +#include -#define kmalloc(size, flag) _kmalloc(size, flag, __FILE__, __LINE__) +void panic(const char *); +int kprintf(const char *format, ...); + +#define kmalloc(size, flag) ({\ +void *r = _kmalloc(size, flag, __FILE__, __LINE__);\ +if(r == NULL){\ +kprintf("kmalloc: out of memory %s:%d no_preempt=%d\n", __FILE__, __LINE__, cpu_local_var(no_preempt)); \ +}\ +r;\ +}) #define kfree(ptr) _kfree(ptr, __FILE__, __LINE__) #define memcheck(ptr, msg) _memcheck(ptr, msg, __FILE__, __LINE__, 0) void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line); diff --git a/kernel/include/process.h b/kernel/include/process.h index 53dde058..82b59afd 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -51,6 +51,7 @@ #define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4) #define VRFLAG_MAXPROT_TO_PROT(vrflag) (((vrflag) & VR_MAXPROT_MASK) >> 4) +// struct process.status, struct thread.status #define PS_RUNNING 0x1 #define PS_INTERRUPTIBLE 0x2 #define PS_UNINTERRUPTIBLE 0x4 @@ -58,15 +59,19 @@ #define PS_EXITED 0x10 #define PS_STOPPED 0x20 #define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */ +#define PS_STOPPING 0x80 +#define PS_TRACING 0x100 #define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE) +// struct process.ptrace #define PT_TRACED 0x80 /* The process is ptraced */ #define PT_TRACE_EXEC 0x100 /* Trace execve(2) */ #define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */ #define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */ #define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT) +// ptrace(2) request #define PTRACE_TRACEME 0 #define PTRACE_PEEKTEXT 1 #define PTRACE_PEEKDATA 2 @@ -95,6 +100,7 @@ #define PTRACE_GETREGSET 0x4204 #define PTRACE_SETREGSET 0x4205 +// ptrace(2) options #define PTRACE_O_TRACESYSGOOD 1 #define PTRACE_O_TRACEFORK 2 #define PTRACE_O_TRACEVFORK 4 @@ -104,6 +110,7 @@ #define PTRACE_O_TRACEEXIT 0x40 #define PTRACE_O_MASK 0x7f +// ptrace(2) events #define PTRACE_EVENT_FORK 1 #define PTRACE_EVENT_VFORK 2 #define PTRACE_EVENT_CLONE 3 @@ -158,6 +165,66 @@ #include #include +struct resource_set; +struct process_hash; +struct thread_hash; +struct address_space; +struct process; +struct thread; +struct process_vm; +struct vm_regions; +struct vm_range; + +#define HASH_SIZE 73 + +struct resource_set { + struct list_head list; + char *path; + struct process_hash *process_hash; + struct thread_hash *thread_hash; + struct list_head phys_mem_list; + mcs_rwlock_lock_t phys_mem_lock; + cpu_set_t cpu_set; + mcs_rwlock_lock_t cpu_set_lock; + struct process *pid1; +}; + +extern struct list_head resource_set_list; +extern mcs_rwlock_lock_t resource_set_lock; + +struct process_hash { + struct list_head list[HASH_SIZE]; + mcs_rwlock_lock_t lock[HASH_SIZE]; +}; + +static inline int +process_hash(int pid) +{ + return pid % HASH_SIZE; +} + +static inline int +thread_hash(int tid) +{ + return tid % HASH_SIZE; +} + +struct thread_hash { + struct list_head list[HASH_SIZE]; + mcs_rwlock_lock_t lock[HASH_SIZE]; +}; + +struct address_space { + struct page_table *page_table; + struct list_head siblings_list; + struct resource_set *res; + int type; +#define ADDRESS_SPACE_NORMAL 1 +#define ADDRESS_SPACE_PVAS 2 + int nslots; + int pids[]; +}; + struct user_fpregs_struct { unsigned short cwd; @@ -234,6 +301,7 @@ struct vm_range { }; struct vm_regions { + unsigned long vm_start, vm_end; unsigned long text_start, text_end; unsigned long data_start, data_end; unsigned long brk_start, brk_end; @@ -252,11 +320,12 @@ struct sigfd { #define SFD_CLOEXEC 02000000 #define SFD_NONBLOCK 04000 -struct sig_handler { +struct sig_common { ihk_spinlock_t lock; - ihk_atomic_t use; + ihk_atomic_t use; struct sigfd *sigfd; struct k_sigaction action[_NSIG]; + struct list_head sigpending; }; struct sig_pending { @@ -266,27 +335,60 @@ struct sig_pending { int ptracecont; }; -struct sig_shared { - ihk_spinlock_t lock; - ihk_atomic_t use; - struct list_head sigpending; -}; - typedef void pgio_func_t(void *arg); /* Represents a node in the process fork tree, it may exist even after the * corresponding process exited due to references from the parent and/or * children and is used for implementing wait/waitpid without having a * special "init" process */ -struct fork_tree_node { - ihk_spinlock_t lock; - ihk_atomic_t refcount; - int exit_status; - int status; +struct process { + struct list_head hash_list; + mcs_rwlock_lock_t update_lock; // lock for parent, status, ...? - struct process *owner; + // process vm + struct process_vm *vm; + + // threads and children + struct list_head threads_list; + mcs_rwlock_lock_t threads_lock; // lock for threads_list + + /* The ptracing process behave as the parent of the ptraced process + after using PTRACE_ATTACH except getppid. So we save it here. */ + struct process *parent; + struct process *ppid_parent; + struct list_head children_list; + struct list_head ptraced_children_list; + mcs_rwlock_lock_t children_lock; // lock for children_list and ptraced_children_list + struct list_head siblings_list; // lock parent + struct list_head ptraced_siblings_list; // lock ppid_parent + + ihk_atomic_t refcount; + + // process status and exit status + int pstatus; // PS_RUNNING -> PS_EXITED -> PS_ZOMBIE + // | ^ ^ + // | |---+ | + // V | | + // PS_STOPPING | | + // (PS_TRACING)| | + // | | | + // V +---- | + // PS_STOPPED -----+ + // (PS_TRACED) + int exit_status; + + /* Store exit_status for a group of threads when stopped by SIGSTOP. + exit_status can't be used because values of exit_status of threads + might divert while the threads are exiting by group_exit(). */ + int group_exit_status; + + /* Manage ptraced processes in the separate list to make it easy to + restore the orginal parent child relationship when + performing PTRACE_DETACH */ + struct waitq waitpid_q; + + // process info and credentials etc. int pid; - int tid; int pgid; int ruid; int euid; @@ -296,50 +398,36 @@ struct fork_tree_node { int egid; int sgid; int fsgid; - - struct fork_tree_node *parent; - struct list_head children; - struct list_head siblings_list; - - /* The ptracing process behave as the parent of the ptraced process - after using PTRACE_ATTACH except getppid. So we save it here. */ - struct fork_tree_node *ppid_parent; + int execed; + int nohost; + struct rlimit rlimit[MCK_RLIM_MAX]; + unsigned long saved_auxv[AUXV_LEN]; + char *saved_cmdline; + long saved_cmdline_len; - /* Manage ptraced processes in the separate list to make it easy to - restore the orginal parent child relationship when - performing PTRACE_DETACH */ - struct list_head ptrace_children; - struct list_head ptrace_siblings_list; + /* Store ptrace flags. + * The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request. + * Other bits are for inner use of the McKernel. + */ + int ptrace; - struct waitq waitpid_q; + /* Store ptrace event message. + * PTRACE_O_xxx will store event message here. + * PTRACE_GETEVENTMSG will get from here. + */ + unsigned long ptrace_eventmsg; - /* Store exit_status for a group of threads when stopped by SIGSTOP. - exit_status can't be used because values of exit_status of threads - might divert while the threads are exiting by group_exit(). */ - int group_exit_status; + /* Store event related to signal. For example, + it represents that the proceess has been resumed by SIGCONT. */ + int signal_flags; - /* Store ptrace flags. - * The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request. - * Other bits are for inner use of the McKernel. - */ - int ptrace; + /* Store signal sent to parent when the process terminates. */ + int termsig; - /* Store ptrace event message. - PTRACE_O_xxx will store event message here. - PTRACE_GETEVENTMSG will get from here. - */ - unsigned long ptrace_eventmsg; - - /* Store event related to signal. For example, - it represents that the proceess has been resumed by SIGCONT. */ - int signal_flags; - - /* Store signal sent to parent when the process terminates. */ - int termsig; }; -void hold_fork_tree_node(struct fork_tree_node *ftn); -void release_fork_tree_node(struct fork_tree_node *ftn); +void hold_thread(struct thread *ftn); +void release_thread(struct thread *ftn); /* * Scheduling policies @@ -364,101 +452,109 @@ struct sched_param { int sched_priority; }; -struct process { +struct thread { + struct list_head hash_list; + // thread info int cpu_id; + int tid; + int tstatus; - ihk_atomic_t refcount; + // process vm struct process_vm *vm; + // context ihk_mc_kernel_context_t ctx; ihk_mc_user_context_t *uctx; + // sibling + struct process *proc; + struct list_head siblings_list; // lock process + // Runqueue list entry - struct list_head sched_list; + struct list_head sched_list; // lock cls int sched_policy; struct sched_param sched_param; ihk_spinlock_t spin_sleep_lock; int spin_sleep; - struct thread { + ihk_atomic_t refcount; + + struct { int *clear_child_tid; unsigned long tlsblock_base, tlsblock_limit; } thread; - volatile int sigevent; - int nohost; - int execed; + // thread info + cpu_set_t cpu_set; + fp_regs_struct *fp_regs; + int in_syscall_offload; + + // signal + struct sig_common *sigcommon; sigset_t sigmask; stack_t sigstack; - ihk_spinlock_t sigpendinglock; struct list_head sigpending; - struct sig_shared *sigshared; - struct sig_handler *sighandler; + ihk_spinlock_t sigpendinglock; + volatile int sigevent; - struct rlimit rlimit[MCK_RLIM_MAX]; + // gpio pgio_func_t *pgio_fp; void *pgio_arg; - struct fork_tree_node *ftn; - - cpu_set_t cpu_set; - unsigned long saved_auxv[AUXV_LEN]; - + // for ptrace unsigned long *ptrace_debugreg; /* debug registers for ptrace */ struct sig_pending *ptrace_recvsig; struct sig_pending *ptrace_sendsig; - fp_regs_struct *fp_regs; - char *saved_cmdline; - long saved_cmdline_len; - int in_syscall_offload; }; struct process_vm { - ihk_atomic_t refcount; - - struct page_table *page_table; + struct address_space *address_space; struct list_head vm_range_list; struct vm_regions region; - struct process *owner_process; /* process that reside on the same page */ + struct process *proc; /* process that reside on the same page */ - ihk_spinlock_t page_table_lock; - ihk_spinlock_t memory_range_lock; + ihk_spinlock_t page_table_lock; + ihk_spinlock_t memory_range_lock; // to protect the followings: // 1. addition of process "memory range" (extend_process_region, add_process_memory_range) // 2. addition of process page table (allocate_pages, update_process_page_table) // note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc) // is protected by its own lock (see ihk/manycore/generic/page_alloc.c) + ihk_atomic_t refcount; cpu_set_t cpu_set; ihk_spinlock_t cpu_set_lock; int exiting; }; -struct process *create_process(unsigned long user_pc); -struct process *clone_process(struct process *org, unsigned long pc, +struct thread *create_thread(unsigned long user_pc); +struct thread *clone_thread(struct thread *org, unsigned long pc, unsigned long sp, int clone_flags); -void destroy_process(struct process *proc); -void hold_process(struct process *proc); -void release_process(struct process *proc); -void flush_process_memory(struct process *proc); -void free_process_memory(struct process *proc); -void free_process_memory_ranges(struct process *proc); -int populate_process_memory(struct process *proc, void *start, size_t len); +void destroy_thread(struct thread *thread); +void hold_thread(struct thread *thread); +void release_thread(struct thread *thread); +void flush_process_memory(struct process_vm *vm); +void hold_process_vm(struct process_vm *vm); +void release_process_vm(struct process_vm *vm); +void hold_process(struct process *); +void release_process(struct process *); +void free_process_memory_ranges(struct process_vm *vm); +int populate_process_memory(struct process_vm *vm, void *start, size_t len); -int add_process_memory_range(struct process *process, +int add_process_memory_range(struct process_vm *vm, unsigned long start, unsigned long end, unsigned long phys, unsigned long flag, struct memobj *memobj, off_t objoff); -int remove_process_memory_range(struct process *process, unsigned long start, +int remove_process_memory_range(struct process_vm *vm, unsigned long start, unsigned long end, int *ro_freedp); -int split_process_memory_range(struct process *process, +int split_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t addr, struct vm_range **splitp); -int join_process_memory_range(struct process *process, struct vm_range *surviving, +int join_process_memory_range(struct process_vm *vm, struct vm_range *surviving, struct vm_range *merging); int change_prot_process_memory_range( - struct process *process, struct vm_range *range, + struct process_vm *vm, struct vm_range *range, unsigned long newflag); int remap_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t start, uintptr_t end, off_t off); @@ -477,24 +573,24 @@ int extend_up_process_memory_range(struct process_vm *vm, int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr, uint64_t reason); -int remove_process_region(struct process *proc, +int remove_process_region(struct process_vm *vm, unsigned long start, unsigned long end); struct program_load_desc; -int init_process_stack(struct process *process, struct program_load_desc *pn, +int init_process_stack(struct thread *thread, struct program_load_desc *pn, int argc, char **argv, int envc, char **env); -unsigned long extend_process_region(struct process *proc, +unsigned long extend_process_region(struct process_vm *vm, unsigned long start, unsigned long end, unsigned long address, unsigned long flag); extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep); enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep); void schedule(void); -void runq_add_proc(struct process *proc, int cpu_id); -void runq_del_proc(struct process *proc, int cpu_id); -int sched_wakeup_process(struct process *proc, int valid_states); +void runq_add_thread(struct thread *thread, int cpu_id); +void runq_del_thread(struct thread *thread, int cpu_id); +int sched_wakeup_thread(struct thread *thread, int valid_states); -void sched_request_migrate(int cpu_id, struct process *proc); +void sched_request_migrate(int cpu_id, struct thread *thread); void check_need_resched(void); void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock); @@ -502,8 +598,14 @@ void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock); void cpu_clear_and_set(int c_cpu, int s_cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock); -struct process *findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate); -void process_unlock(void *savelock, unsigned long irqstate); void release_cpuid(int cpuid); +struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock); +void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock); +struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock); +void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock); +void chain_process(struct process *); +void chain_thread(struct thread *); +void proc_init(); + #endif diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index a2417a88..786d57c8 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -285,4 +285,5 @@ struct procfs_file { char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */ }; +extern void terminate(int, int); #endif diff --git a/kernel/include/timer.h b/kernel/include/timer.h index 86e9c719..cfe6362b 100644 --- a/kernel/include/timer.h +++ b/kernel/include/timer.h @@ -36,7 +36,7 @@ struct timer { uint64_t timeout; struct waitq processes; struct list_head list; - struct process *proc; + struct thread *thread; }; uint64_t schedule_timeout(uint64_t timeout); diff --git a/kernel/include/waitq.h b/kernel/include/waitq.h index d64f2b89..75c4b059 100644 --- a/kernel/include/waitq.h +++ b/kernel/include/waitq.h @@ -19,7 +19,7 @@ #include #include -struct process; +struct thread; struct waitq_entry; typedef int (*waitq_func_t)(struct waitq_entry *wait, unsigned mode, @@ -58,7 +58,7 @@ typedef struct waitq_entry { } extern void waitq_init(waitq_t *waitq); -extern void waitq_init_entry(waitq_entry_t *entry, struct process *proc); +extern void waitq_init_entry(waitq_entry_t *entry, struct thread *proc); extern int waitq_active(waitq_t *waitq); extern void waitq_add_entry(waitq_t *waitq, waitq_entry_t *entry); extern void waitq_add_entry_locked(waitq_t *waitq, waitq_entry_t *entry); diff --git a/kernel/init.c b/kernel/init.c index 8a520447..2f5ba98d 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -225,6 +225,8 @@ static void rest_init(void) ikc_master_init(); + proc_init(); + sched_init(); } diff --git a/kernel/mem.c b/kernel/mem.c index 9b33963e..b1d888cb 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -174,7 +174,7 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = { void set_signal(int sig, void *regs, struct siginfo *info); void check_signal(unsigned long, void *, int); -int gencore(struct process *, void *, struct coretable **, int *); +int gencore(struct thread *, void *, struct coretable **, int *); void freecore(struct coretable **); /** @@ -184,14 +184,14 @@ void freecore(struct coretable **); * \param regs A pointer to a x86_regs structure. */ -void coredump(struct process *proc, void *regs) +void coredump(struct thread *thread, void *regs) { struct syscall_request request IHK_DMA_ALIGN; int ret; struct coretable *coretable; int chunks; - ret = gencore(proc, regs, &coretable, &chunks); + ret = gencore(thread, regs, &coretable, &chunks); if (ret != 0) { dkprintf("could not generate a core file image\n"); return; @@ -200,7 +200,7 @@ void coredump(struct process *proc, void *regs) request.args[0] = chunks; request.args[1] = virt_to_phys(coretable); /* no data for now */ - ret = do_syscall(&request, proc->cpu_id, proc->ftn->pid); + ret = do_syscall(&request, thread->cpu_id, thread->proc->pid); if (ret == 0) { kprintf("dumped core.\n"); } else { @@ -209,10 +209,10 @@ void coredump(struct process *proc, void *regs) freecore(&coretable); } -static void unhandled_page_fault(struct process *proc, void *fault_addr, void *regs) +static void unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) { const uintptr_t address = (uintptr_t)fault_addr; - struct process_vm *vm = proc->vm; + struct process_vm *vm = thread->vm; struct vm_range *range; char found; unsigned long irqflags; @@ -235,7 +235,7 @@ static void unhandled_page_fault(struct process *proc, void *fault_addr, void *r found = 1; dkprintf("address is in range, flag: 0x%X! \n", range->flag); - ihk_mc_pt_print_pte(vm->page_table, (void*)address); + ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); break; } } @@ -366,7 +366,7 @@ void tlb_flush_handler(int vector) static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); int error; dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n", @@ -376,29 +376,24 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) cpu_enable_interrupt(); - error = page_fault_process_vm(proc->vm, fault_addr, reason); + error = page_fault_process_vm(thread->vm, fault_addr, reason); if (error) { struct siginfo info; if (error == -ECANCELED) { dkprintf("process is exiting, terminate.\n"); - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - proc->ftn->status = PS_ZOMBIE; - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - release_fork_tree_node(proc->ftn->parent); - release_fork_tree_node(proc->ftn); - release_process(proc); - preempt_enable(); - schedule(); + terminate(0, SIGSEGV); + // no return } kprintf("[%d]page_fault_handler(%p,%lx,%p):" "fault vm failed. %d, TID: %d\n", ihk_mc_get_processor_id(), fault_addr, - reason, regs, error, proc->ftn->tid); - unhandled_page_fault(proc, fault_addr, regs); + reason, regs, error, thread->tid); + unhandled_page_fault(thread, fault_addr, regs); + preempt_enable(); memset(&info, '\0', sizeof info); if (error == -ERANGE) { info.si_signo = SIGBUS; @@ -407,7 +402,7 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) set_signal(SIGBUS, regs, &info); } else { - struct process_vm *vm = proc->vm; + struct process_vm *vm = thread->vm; struct vm_range *range; info.si_signo = SIGSEGV; @@ -421,7 +416,6 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) info._sifields._sigfault.si_addr = fault_addr; set_signal(SIGSEGV, regs, &info); } - preempt_enable(); check_signal(0, regs, 0); goto out; } @@ -880,12 +874,10 @@ int memcheckall() struct alloc *ap; int r = 0; -kprintf("memcheckall\n"); for(i = 0; i < HASHNUM; i++) for(ap = allochash[i]; ap; ap = ap->next) if(ap->p) r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2); -kprintf("done\n"); return r; } diff --git a/kernel/process.c b/kernel/process.c index 376a3480..641c5d35 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -43,107 +43,161 @@ #endif extern long do_arch_prctl(unsigned long code, unsigned long address); -extern long alloc_debugreg(struct process *proc); +extern long alloc_debugreg(struct thread *proc); extern void save_debugreg(unsigned long *debugreg); extern void restore_debugreg(unsigned long *debugreg); extern void clear_debugreg(void); -extern void clear_single_step(struct process *proc); -static void insert_vm_range_list(struct process_vm *vm, +extern void clear_single_step(struct thread *proc); +static void insert_vm_range_list(struct process_vm *vm, struct vm_range *newrange); -static int copy_user_ranges(struct process *proc, struct process *org); -extern void release_fp_regs(struct process *proc); -extern void save_fp_regs(struct process *proc); -extern void restore_fp_regs(struct process *proc); -void settid(struct process *proc, int mode, int newcpuid, int oldcpuid); -extern void __runq_add_proc(struct process *proc, int cpu_id); +static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm); +extern void release_fp_regs(struct thread *proc); +extern void save_fp_regs(struct thread *proc); +extern void restore_fp_regs(struct thread *proc); +void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid); +extern void __runq_add_proc(struct thread *proc, int cpu_id); extern void terminate_host(int pid); extern void lapic_timer_enable(unsigned int clocks); extern void lapic_timer_disable(); +extern int num_processors; +extern ihk_spinlock_t cpuid_head_lock; +int ptrace_detach(int pid, int data); +extern unsigned long do_kill(struct thread *, int pid, int tid, int sig, struct siginfo *info, int ptracecont); -int refcount_fork_tree_node(struct fork_tree_node *ftn) +struct list_head resource_set_list; +mcs_rwlock_lock_t resource_set_lock; + +void +init_process(struct process *proc, struct process *parent) { - return ihk_atomic_read(&ftn->refcount); -} - -void hold_fork_tree_node(struct fork_tree_node *ftn) -{ - ihk_atomic_inc(&ftn->refcount); - dkprintf("hold ftn(%d): %d\n", - ftn->pid, ihk_atomic_read(&ftn->refcount)); -} - -void release_fork_tree_node(struct fork_tree_node *ftn) -{ - dkprintf("release ftn(%d): %d\n", - ftn->pid, ihk_atomic_read(&ftn->refcount)); - - if (!ihk_atomic_dec_and_test(&ftn->refcount)) { - return; - } - - dkprintf("dealloc ftn(%d): %d\n", - ftn->pid, ihk_atomic_read(&ftn->refcount)); - - /* Dealloc */ - kfree(ftn); -} - - -void init_fork_tree_node(struct fork_tree_node *ftn, - struct fork_tree_node *parent, struct process *owner) -{ - ihk_mc_spinlock_init(&ftn->lock); - /* Only the process/thread holds a reference at this point */ - ihk_atomic_set(&ftn->refcount, 1); - - ftn->owner = owner; - /* These will be filled out when changing status */ - ftn->pid = -1; - ftn->exit_status = -1; - ftn->status = PS_RUNNING; + proc->pid = -1; + proc->exit_status = -1; + proc->pstatus = PS_RUNNING; - ftn->group_exit_status = 0; - ftn->ptrace = 0; - ftn->signal_flags = 0; - - ftn->parent = NULL; - if (parent) { - ftn->parent = parent; - ftn->pgid = parent->pgid; - ftn->ruid = parent->ruid; - ftn->euid = parent->euid; - ftn->suid = parent->suid; - ftn->fsuid = parent->fsuid; - ftn->rgid = parent->rgid; - ftn->egid = parent->egid; - ftn->sgid = parent->sgid; - ftn->fsgid = parent->fsgid; + if(parent){ + proc->parent = parent; + proc->ppid_parent = parent; + proc->pgid = parent->pgid; + proc->ruid = parent->ruid; + proc->euid = parent->euid; + proc->suid = parent->suid; + proc->fsuid = parent->fsuid; + proc->rgid = parent->rgid; + proc->egid = parent->egid; + proc->sgid = parent->sgid; + proc->fsgid = parent->fsgid; + memcpy(proc->rlimit, parent->rlimit, + sizeof(struct rlimit) * MCK_RLIM_MAX); } - INIT_LIST_HEAD(&ftn->children); - INIT_LIST_HEAD(&ftn->siblings_list); - INIT_LIST_HEAD(&ftn->ptrace_children); - INIT_LIST_HEAD(&ftn->ptrace_siblings_list); - - waitq_init(&ftn->waitpid_q); + INIT_LIST_HEAD(&proc->threads_list); + INIT_LIST_HEAD(&proc->children_list); + INIT_LIST_HEAD(&proc->ptraced_children_list); + mcs_rwlock_init(&proc->threads_lock); + mcs_rwlock_init(&proc->children_lock); + waitq_init(&proc->waitpid_q); + ihk_atomic_set(&proc->refcount, 2); } -static int init_process_vm(struct process *owner, struct process_vm *vm) +void +chain_process(struct process *proc) { - void *pt = ihk_mc_pt_create(IHK_MC_AP_NOWAIT); + struct mcs_rwlock_node_irqsave lock; + struct process *parent = proc->parent; + int hash; + struct process_hash *phash; - if(pt == NULL) - return -ENOMEM; + mcs_rwlock_writer_lock(&parent->children_lock, &lock); + list_add_tail(&proc->siblings_list, &parent->children_list); + mcs_rwlock_writer_unlock(&parent->children_lock, &lock); + hash = process_hash(proc->pid); + phash = cpu_local_var(resource_set)->process_hash; + mcs_rwlock_writer_lock(&phash->lock[hash], &lock); + list_add_tail(&proc->hash_list, &phash->list[hash]); + mcs_rwlock_writer_unlock(&phash->lock[hash], &lock); +} + +void +chain_thread(struct thread *thread) +{ + struct mcs_rwlock_node_irqsave lock; + struct process *proc = thread->proc; + int hash; + struct thread_hash *thash; + + mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + list_add_tail(&thread->siblings_list, &proc->threads_list); + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); + + hash = thread_hash(thread->tid); + thash = cpu_local_var(resource_set)->thread_hash; + mcs_rwlock_writer_lock(&thash->lock[hash], &lock); + list_add_tail(&thread->hash_list, &thash->list[hash]); + mcs_rwlock_writer_unlock(&thash->lock[hash], &lock); + + ihk_atomic_inc(&proc->refcount); +} + +struct address_space * +create_address_space(struct resource_set *res, int type, int n) +{ + struct address_space *asp; + void *pt; + + asp = kmalloc(sizeof(struct address_space) + sizeof(int) * n, IHK_MC_AP_NOWAIT); + if(!asp) + return NULL; + pt = ihk_mc_pt_create(IHK_MC_AP_NOWAIT); + if(!pt){ + kfree(asp); + return NULL; + } + + memset(asp, '\0', sizeof(struct address_space) + sizeof(int) * n); + asp->res = res; + asp->type = type; + asp->nslots = n; + asp->page_table = pt; + return asp; +} + +void +remove_address_space(struct address_space *asp) +{ + ihk_mc_pt_destroy(asp->page_table); + kfree(asp); +} + +void +detach_address_space(struct address_space *asp, int pid) +{ + if(asp->type == ADDRESS_SPACE_NORMAL){ + remove_address_space(asp); + } + else if(asp->type == ADDRESS_SPACE_PVAS){ + int i; + + for(i = 0; i < asp->nslots; i++){ + if(asp->pids[i] == pid){ + asp->pids[i] = 0; + break; + } + } + } +} + +static int +init_process_vm(struct process *owner, struct address_space *asp, struct process_vm *vm) +{ ihk_mc_spinlock_init(&vm->memory_range_lock); ihk_mc_spinlock_init(&vm->page_table_lock); ihk_atomic_set(&vm->refcount, 1); INIT_LIST_HEAD(&vm->vm_range_list); - vm->page_table = pt; - hold_process(owner); - vm->owner_process = owner; + vm->address_space = asp; + vm->proc = owner; memset(&vm->cpu_set, 0, sizeof(cpu_set_t)); ihk_mc_spinlock_init(&vm->cpu_set_lock); vm->exiting = 0; @@ -151,283 +205,284 @@ static int init_process_vm(struct process *owner, struct process_vm *vm) return 0; } -struct process *create_process(unsigned long user_pc) +struct thread * +create_thread(unsigned long user_pc) { + struct thread *thread; struct process *proc; + struct process_vm *vm = NULL; + struct address_space *asp = NULL; - proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, IHK_MC_AP_NOWAIT); - if (!proc) + thread = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, IHK_MC_AP_NOWAIT); + if (!thread) return NULL; - + memset(thread, 0, sizeof(struct thread)); + ihk_atomic_set(&thread->refcount, 2); + proc = kmalloc(sizeof(struct process), IHK_MC_AP_NOWAIT); + vm = kmalloc(sizeof(struct process_vm), IHK_MC_AP_NOWAIT); + asp = create_address_space(cpu_local_var(resource_set), + ADDRESS_SPACE_NORMAL, 1); + if (!proc || !vm || !asp) + goto err; memset(proc, 0, sizeof(struct process)); - ihk_atomic_set(&proc->refcount, 2); + memset(vm, 0, sizeof(struct process_vm)); + init_process(proc, cpu_local_var(resource_set)->pid1); + if (1) { struct ihk_mc_cpu_info *infop; int i; infop = ihk_mc_get_cpu_info(); for (i = 0; i < infop->ncpus; ++i) { - CPU_SET(i, &proc->cpu_set); + CPU_SET(i, &thread->cpu_set); } } - proc->sched_policy = SCHED_NORMAL; + thread->sched_policy = SCHED_NORMAL; - proc->sighandler = kmalloc(sizeof(struct sig_handler), IHK_MC_AP_NOWAIT); - if(!proc->sighandler){ - goto err_free_process; + thread->sigcommon = kmalloc(sizeof(struct sig_common), + IHK_MC_AP_NOWAIT); + if (!thread->sigcommon) { + goto err; } - proc->sigshared = kmalloc(sizeof(struct sig_shared), IHK_MC_AP_NOWAIT); - if(!proc->sigshared){ - goto err_free_sighandler; - } - memset(proc->sighandler, '\0', sizeof(struct sig_handler)); - ihk_atomic_set(&proc->sighandler->use, 1); - ihk_mc_spinlock_init(&proc->sighandler->lock); - ihk_atomic_set(&proc->sigshared->use, 1); - ihk_mc_spinlock_init(&proc->sigshared->lock); - INIT_LIST_HEAD(&proc->sigshared->sigpending); - ihk_mc_spinlock_init(&proc->sigpendinglock); - INIT_LIST_HEAD(&proc->sigpending); + memset(thread->sigcommon, '\0', sizeof(struct sig_common)); - proc->sigstack.ss_sp = NULL; - proc->sigstack.ss_flags = SS_DISABLE; - proc->sigstack.ss_size = 0; + dkprintf("fork(): sigshared\n"); - ihk_mc_init_user_process(&proc->ctx, &proc->uctx, - ((char *)proc) + - KERNEL_STACK_NR_PAGES * PAGE_SIZE, user_pc, 0); + ihk_atomic_set(&thread->sigcommon->use, 1); + ihk_mc_spinlock_init(&thread->sigcommon->lock); + INIT_LIST_HEAD(&thread->sigcommon->sigpending); - proc->vm = (struct process_vm *)(proc + 1); + ihk_mc_spinlock_init(&thread->sigpendinglock); + INIT_LIST_HEAD(&thread->sigpending); - proc->ftn = kmalloc(sizeof(struct fork_tree_node), IHK_MC_AP_NOWAIT); - if (!proc->ftn) { - goto err_free_sigshared; + thread->sigstack.ss_sp = NULL; + thread->sigstack.ss_flags = SS_DISABLE; + thread->sigstack.ss_size = 0; + + ihk_mc_init_user_process(&thread->ctx, &thread->uctx, ((char *)thread) + + KERNEL_STACK_NR_PAGES * PAGE_SIZE, user_pc, 0); + + thread->vm = vm; + thread->proc = proc; + proc->vm = vm; + + if(init_process_vm(proc, asp, vm) != 0){ + goto err; } - init_fork_tree_node(proc->ftn, NULL, proc); + cpu_set(ihk_mc_get_processor_id(), &thread->vm->cpu_set, + &thread->vm->cpu_set_lock); - if(init_process_vm(proc, proc->vm) != 0){ - goto err_free_sigshared; - } + ihk_mc_spinlock_init(&thread->spin_sleep_lock); + thread->spin_sleep = 0; - cpu_set(ihk_mc_get_processor_id(), &proc->vm->cpu_set, - &proc->vm->cpu_set_lock); + return thread; - ihk_mc_spinlock_init(&proc->spin_sleep_lock); - proc->spin_sleep = 0; +err: + if(proc) + kfree(proc); + if(vm) + kfree(vm); + if(asp) + remove_address_space(asp); + if(thread->sigcommon) + kfree(thread->sigcommon); + ihk_mc_free_pages(thread, KERNEL_STACK_NR_PAGES); - return proc; - -err_free_sigshared: - kfree(proc->sigshared); - -err_free_sighandler: - kfree(proc->sighandler); - -err_free_process: - ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES); - return NULL; } -struct process *clone_process(struct process *org, unsigned long pc, - unsigned long sp, int clone_flags) +struct thread * +clone_thread(struct thread *org, unsigned long pc, unsigned long sp, + int clone_flags) { - struct process *proc; + struct thread *thread; int termsig = clone_flags & 0xff; + struct process *proc = NULL; + struct address_space *asp = NULL; + if (termsig < 0 || _NSIG < termsig) { return (void *)-EINVAL; } - if ((proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, + if((clone_flags & CLONE_SIGHAND) && + !(clone_flags & CLONE_VM)) + return (void *)-EINVAL; + if((clone_flags & CLONE_THREAD) && + !(clone_flags & CLONE_SIGHAND)) + return (void *)-EINVAL; + if((clone_flags & CLONE_FS) && + (clone_flags & CLONE_NEWNS)) + return (void *)-EINVAL; + if((clone_flags & CLONE_NEWIPC) && + (clone_flags & CLONE_SYSVSEM)) + return (void *)-EINVAL; + if((clone_flags & CLONE_NEWPID) && + (clone_flags & CLONE_THREAD)) + return (void *)-EINVAL; + + + if ((thread = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, IHK_MC_AP_NOWAIT)) == NULL) { return NULL; } - memset(proc, 0, sizeof(struct process)); - ihk_atomic_set(&proc->refcount, 2); - memcpy(&proc->cpu_set, &org->cpu_set, sizeof(proc->cpu_set)); + memset(thread, 0, sizeof(struct thread)); + ihk_atomic_set(&thread->refcount, 2); + memcpy(&thread->cpu_set, &org->cpu_set, sizeof(thread->cpu_set)); /* NOTE: sp is the user mode stack! */ - ihk_mc_init_user_process(&proc->ctx, &proc->uctx, - ((char *)proc) + - KERNEL_STACK_NR_PAGES * PAGE_SIZE, pc, sp); + ihk_mc_init_user_process(&thread->ctx, &thread->uctx, ((char *)thread) + + KERNEL_STACK_NR_PAGES * PAGE_SIZE, pc, sp); - memcpy(proc->uctx, org->uctx, sizeof(*org->uctx)); - ihk_mc_modify_user_context(proc->uctx, IHK_UCR_STACK_POINTER, sp); - ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER, pc); + memcpy(thread->uctx, org->uctx, sizeof(*org->uctx)); + ihk_mc_modify_user_context(thread->uctx, IHK_UCR_STACK_POINTER, sp); + ihk_mc_modify_user_context(thread->uctx, IHK_UCR_PROGRAM_COUNTER, pc); - memcpy(proc->rlimit, org->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX); - proc->sigmask = org->sigmask; - - proc->ftn = kmalloc(sizeof(struct fork_tree_node), IHK_MC_AP_NOWAIT); - if (!proc->ftn) { - goto err_free_sigshared; - } - - proc->ftn->termsig = termsig; - - init_fork_tree_node(proc->ftn, org->ftn, proc); - - proc->sched_policy = org->sched_policy; - proc->sched_param.sched_priority = org->sched_param.sched_priority; - - /* clone signal handlers */ - if (clone_flags & CLONE_SIGHAND) { - proc->sigstack.ss_sp = NULL; - proc->sigstack.ss_flags = SS_DISABLE; - proc->sigstack.ss_size = 0; - - proc->sighandler = org->sighandler; - ihk_atomic_inc(&org->sighandler->use); - - proc->sigshared = org->sigshared; - ihk_atomic_inc(&org->sigshared->use); - - ihk_mc_spinlock_init(&proc->sigpendinglock); - INIT_LIST_HEAD(&proc->sigpending); - } - /* copy signal handlers (i.e., fork()) */ - else { - dkprintf("fork(): sighandler\n"); - proc->sighandler = kmalloc(sizeof(struct sig_handler), - IHK_MC_AP_NOWAIT); - - if (!proc->sighandler) { - goto err_free_proc; - } - - dkprintf("fork(): sigshared\n"); - proc->sigshared = kmalloc(sizeof(struct sig_shared), IHK_MC_AP_NOWAIT); - - if (!proc->sigshared) { - goto err_free_sighandler; - } - - memcpy(proc->sighandler, org->sighandler, sizeof(struct sig_handler)); - ihk_atomic_set(&proc->sighandler->use, 1); - ihk_mc_spinlock_init(&proc->sighandler->lock); - ihk_atomic_set(&proc->sigshared->use, 1); - ihk_mc_spinlock_init(&proc->sigshared->lock); - INIT_LIST_HEAD(&proc->sigshared->sigpending); - ihk_mc_spinlock_init(&proc->sigpendinglock); - INIT_LIST_HEAD(&proc->sigpending); - } + thread->sched_policy = org->sched_policy; + thread->sched_param.sched_priority = org->sched_param.sched_priority; /* clone VM */ if (clone_flags & CLONE_VM) { - ihk_atomic_inc(&org->vm->refcount); - proc->vm = org->vm; + proc = org->proc; + thread->vm = org->vm; + thread->proc = proc; } /* fork() */ else { - proc->vm = (struct process_vm *)(proc + 1); - - dkprintf("fork(): init_process_vm()\n"); - if (init_process_vm(proc, proc->vm) != 0) { - goto err_free_sigshared; + proc = kmalloc(sizeof(struct process), IHK_MC_AP_NOWAIT); + if(!proc) + goto err_free_proc; + memset(proc, '\0', sizeof(struct process)); + init_process(proc, org->proc); + + proc->termsig = termsig; + asp = create_address_space(cpu_local_var(resource_set), + ADDRESS_SPACE_NORMAL, 1); + if(!asp){ + kfree(proc); + goto err_free_proc; } + proc->vm = kmalloc(sizeof(struct process_vm), IHK_MC_AP_NOWAIT); + if(!proc->vm){ + remove_address_space(asp); + kfree(proc); + goto err_free_proc; + } + memset(proc->vm, '\0', sizeof(struct process_vm)); + + dkprintf("fork(): init_process_vm()\n"); + if (init_process_vm(proc, asp, proc->vm) != 0) { + remove_address_space(asp); + kfree(proc->vm); + kfree(proc); + goto err_free_proc; + } + thread->proc = proc; + thread->vm = proc->vm; memcpy(&proc->vm->region, &org->vm->region, sizeof(struct vm_regions)); - + dkprintf("fork(): copy_user_ranges()\n"); /* Copy user-space mappings. - * TODO: do this with COW later? */ - if (copy_user_ranges(proc, org) != 0) { - goto err_free_sigshared; + * TODO: do this with COW later? */ + if (copy_user_ranges(proc->vm, org->vm) != 0) { + remove_address_space(asp); + kfree(proc->vm); + kfree(proc); + goto err_free_proc; } - + dkprintf("fork(): copy_user_ranges() OK\n"); } - - /* Add thread/proc's fork_tree_node to parent's children list */ - ihk_mc_spinlock_lock_noirq(&org->ftn->lock); - list_add_tail(&proc->ftn->siblings_list, &org->ftn->children); - ihk_mc_spinlock_unlock_noirq(&org->ftn->lock); - /* We hold a reference to parent */ - hold_fork_tree_node(proc->ftn->parent); + /* clone signal handlers */ + if (clone_flags & CLONE_SIGHAND) { + thread->sigcommon = org->sigcommon; + ihk_atomic_inc(&org->sigcommon->use); + } + /* copy signal handlers (i.e., fork()) */ + else { + dkprintf("fork(): sigcommon\n"); + thread->sigcommon = kmalloc(sizeof(struct sig_common), + IHK_MC_AP_NOWAIT); + if (!thread->sigcommon) { + goto err_free_proc; + } + memset(thread->sigcommon, '\0', sizeof(struct sig_common)); - /* Parent holds a reference to us */ - hold_fork_tree_node(proc->ftn); + dkprintf("fork(): sigshared\n"); - ihk_mc_spinlock_init(&proc->spin_sleep_lock); - proc->spin_sleep = 0; + memcpy(thread->sigcommon->action, org->sigcommon->action, + sizeof(struct k_sigaction) * _NSIG); + ihk_atomic_set(&thread->sigcommon->use, 1); + ihk_mc_spinlock_init(&thread->sigcommon->lock); + INIT_LIST_HEAD(&thread->sigcommon->sigpending); + // TODO: copy signalfd + } + thread->sigstack.ss_sp = NULL; + thread->sigstack.ss_flags = SS_DISABLE; + thread->sigstack.ss_size = 0; + ihk_mc_spinlock_init(&thread->sigpendinglock); + INIT_LIST_HEAD(&thread->sigpending); + thread->sigmask = org->sigmask; - return proc; + ihk_mc_spinlock_init(&thread->spin_sleep_lock); + thread->spin_sleep = 0; -err_free_sigshared: - kfree(proc->sigshared); - -err_free_sighandler: - ihk_mc_free_pages(proc->sighandler, KERNEL_STACK_NR_PAGES); + return thread; err_free_proc: - ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES); - release_process(org); + ihk_mc_free_pages(thread, KERNEL_STACK_NR_PAGES); return NULL; } -int ptrace_traceme(void){ +int +ptrace_traceme(void) +{ int error = 0; - struct process *proc = cpu_local_var(current); - struct fork_tree_node *child, *next; - dkprintf("ptrace_traceme,pid=%d,proc->ftn->parent=%p\n", proc->ftn->pid, proc->ftn->parent); + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct process *parent = proc->parent; + struct mcs_rwlock_node_irqsave lock; + struct mcs_rwlock_node child_lock; - if (proc->ftn->parent == NULL || proc->ftn->ptrace) { - error = -EPERM; - goto out; + dkprintf("ptrace_traceme,pid=%d,proc->parent=%p\n", proc->pid, proc->parent); + + if (proc->ptrace & PT_TRACED) { + return -EPERM; } - dkprintf("ptrace_traceme,parent->pid=%d\n", proc->ftn->parent->pid); + dkprintf("ptrace_traceme,parent->pid=%d\n", proc->parent->pid); - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); + mcs_rwlock_writer_lock(&proc->update_lock, &lock); + mcs_rwlock_writer_lock_noirq(&parent->children_lock, &child_lock); + list_add_tail(&proc->ptraced_siblings_list, &parent->ptraced_children_list); + mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &child_lock); + proc->ptrace = PT_TRACED | PT_TRACE_EXEC; + mcs_rwlock_writer_unlock(&proc->update_lock, &lock); - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - list_for_each_entry_safe(child, next, &proc->ftn->parent->children, siblings_list) { - if(child == proc->ftn) { - list_del(&child->siblings_list); - goto found; - } - } - kprintf("ptrace_traceme,not found\n"); - error = -EPERM; - goto out_notfound; - found: - proc->ftn->ptrace = PT_TRACED | PT_TRACE_EXEC; - proc->ftn->ppid_parent = proc->ftn->parent; - - list_add_tail(&proc->ftn->ptrace_siblings_list, &proc->ftn->parent->ptrace_children); - - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - - if (proc->ptrace_debugreg == NULL) { - error = alloc_debugreg(proc); + if (thread->ptrace_debugreg == NULL) { + error = alloc_debugreg(thread); } - clear_single_step(proc); + clear_single_step(thread); - out: dkprintf("ptrace_traceme,returning,error=%d\n", error); return error; - out_notfound: - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - goto out; } -static int copy_user_ranges(struct process *proc, struct process *org) +static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm) { struct vm_range *src_range; - struct vm_range *range; - - ihk_mc_spinlock_lock_noirq(&org->vm->memory_range_lock); + struct vm_range *range; + + ihk_mc_spinlock_lock_noirq(&orgvm->memory_range_lock); /* Iterate original process' vm_range list and take a copy one-by-one */ - list_for_each_entry(src_range, &org->vm->vm_range_list, list) { + list_for_each_entry(src_range, &orgvm->vm_range_list, list) { void *ptepgaddr; size_t ptepgsize; int ptep2align; @@ -454,11 +509,13 @@ static int copy_user_ranges(struct process *proc, struct process *org) } /* Copy actual mappings */ - vaddr = (void *)range->start; + vaddr = (void *)range->start; while ((unsigned long)vaddr < range->end) { /* Get source PTE */ - ptep = ihk_mc_pt_lookup_pte(org->vm->page_table, vaddr, - &ptepgaddr, &ptepgsize, &ptep2align); + ptep = ihk_mc_pt_lookup_pte(orgvm->address_space-> + page_table, vaddr, + &ptepgaddr, &ptepgsize, + &ptep2align); if (!ptep || pte_is_null(ptep) || !pte_is_present(ptep)) { vaddr += PAGE_SIZE; @@ -475,24 +532,24 @@ static int copy_user_ranges(struct process *proc, struct process *org) } dkprintf("copy_user_ranges(): 0x%lx PTE found\n", vaddr); - + /* Page size */ - if (arch_get_smaller_page_size(NULL, -1, &ptepgsize, + if (arch_get_smaller_page_size(NULL, -1, &ptepgsize, &ptep2align)) { kprintf("ERROR: copy_user_ranges() " "(%p,%lx-%lx %lx,%lx):" - "get pgsize failed\n", org->vm, + "get pgsize failed\n", orgvm, range->start, range->end, range->flag, vaddr); goto err_free_range_rollback; } - + pgsize = ptepgsize; p2align = ptep2align; dkprintf("copy_user_ranges(): page size: %d\n", pgsize); - + /* Get physical page */ pg_vaddr = ihk_mc_alloc_aligned_pages(1, p2align, IHK_MC_AP_NOWAIT); @@ -508,26 +565,28 @@ static int copy_user_ranges(struct process *proc, struct process *org) /* Set up new PTE */ attr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL); - if (ihk_mc_pt_set_range(proc->vm->page_table, proc->vm, vaddr, - vaddr + pgsize, virt_to_phys(pg_vaddr), attr)) { + + if (ihk_mc_pt_set_range(vm->address_space->page_table, + vm, vaddr, vaddr + pgsize, + virt_to_phys(pg_vaddr), attr)) { kprintf("ERROR: copy_user_ranges() " "(%p,%lx-%lx %lx,%lx):" "set range failed.\n", - org->vm, range->start, range->end, + orgvm, range->start, range->end, range->flag, vaddr); goto err_free_range_rollback; } dkprintf("copy_user_ranges(): new PTE set\n", pgsize); - + vaddr += pgsize; } - insert_vm_range_list(proc->vm, range); + insert_vm_range_list(vm, range); } - ihk_mc_spinlock_unlock_noirq(&org->vm->memory_range_lock); - + ihk_mc_spinlock_unlock_noirq(&orgvm->memory_range_lock); + return 0; err_free_range_rollback: @@ -538,20 +597,21 @@ err_rollback: /* TODO: implement rollback */ - ihk_mc_spinlock_unlock_noirq(&org->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&orgvm->memory_range_lock); return -1; } -int update_process_page_table(struct process *process, +int update_process_page_table(struct process_vm *vm, struct vm_range *range, uint64_t phys, enum ihk_mc_pt_attribute flag) { unsigned long p, pa = phys; unsigned long pp; - unsigned long flags = ihk_mc_spinlock_lock(&process->vm->page_table_lock); + unsigned long flags; enum ihk_mc_pt_attribute attr; + flags = ihk_mc_spinlock_lock(&vm->page_table_lock); attr = flag | PTATTR_USER | PTATTR_FOR_USER; attr |= (range->flag & VR_PROT_WRITE)? PTATTR_WRITABLE: 0; attr |= (range->flag & VR_PROT_EXEC)? 0: PTATTR_NO_EXECUTE; @@ -559,15 +619,16 @@ int update_process_page_table(struct process *process, p = range->start; while (p < range->end) { #ifdef USE_LARGE_PAGES - /* Use large PTE if both virtual and physical addresses are large page + /* Use large PTE if both virtual and physical addresses are large page * aligned and more than LARGE_PAGE_SIZE is left from the range */ - if ((p & (LARGE_PAGE_SIZE - 1)) == 0 && + if ((p & (LARGE_PAGE_SIZE - 1)) == 0 && (pa & (LARGE_PAGE_SIZE - 1)) == 0 && (range->end - p) >= LARGE_PAGE_SIZE) { - if (ihk_mc_pt_set_large_page(process->vm->page_table, (void *)p, - pa, attr) != 0) { - kprintf("ERROR: setting large page for 0x%lX -> 0x%lX\n", + if (ihk_mc_pt_set_large_page(vm->address_space-> + page_table, (void *)p, + pa, attr) != 0) { + kprintf("ERROR: setting large page for 0x%lX -> 0x%lX\n", p, pa); goto err; } @@ -578,9 +639,9 @@ int update_process_page_table(struct process *process, p += LARGE_PAGE_SIZE; } else { -#endif - if(ihk_mc_pt_set_page(process->vm->page_table, (void *)p, - pa, attr) != 0){ +#endif + if(ihk_mc_pt_set_page(vm->address_space->page_table, + (void *)p, pa, attr) != 0){ kprintf("ERROR: setting page for 0x%lX -> 0x%lX\n", p, pa); goto err; } @@ -591,7 +652,7 @@ int update_process_page_table(struct process *process, } #endif } - ihk_mc_spinlock_unlock(&process->vm->page_table_lock, flags); + ihk_mc_spinlock_unlock(&vm->page_table_lock, flags); return 0; err: @@ -599,16 +660,18 @@ err: pa = phys; while(pp < p){ #ifdef USE_LARGE_PAGES - if ((p & (LARGE_PAGE_SIZE - 1)) == 0 && + if ((p & (LARGE_PAGE_SIZE - 1)) == 0 && (pa & (LARGE_PAGE_SIZE - 1)) == 0 && (range->end - p) >= LARGE_PAGE_SIZE) { - ihk_mc_pt_clear_large_page(process->vm->page_table, (void *)pp); + ihk_mc_pt_clear_large_page(vm->address_space-> + page_table, (void *)pp); pa += LARGE_PAGE_SIZE; pp += LARGE_PAGE_SIZE; } else{ #endif - ihk_mc_pt_clear_page(process->vm->page_table, (void *)pp); + ihk_mc_pt_clear_page(vm->address_space->page_table, + (void *)pp); pa += PAGE_SIZE; pp += PAGE_SIZE; #ifdef USE_LARGE_PAGES @@ -616,24 +679,24 @@ err: #endif } - ihk_mc_spinlock_unlock(&process->vm->page_table_lock, flags); + ihk_mc_spinlock_unlock(&vm->page_table_lock, flags); return -ENOMEM; } -int split_process_memory_range(struct process *proc, struct vm_range *range, +int split_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t addr, struct vm_range **splitp) { int error; struct vm_range *newrange = NULL; dkprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p)\n", - proc, range->start, range->end, addr, splitp); + vm, range->start, range->end, addr, splitp); newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT); if (!newrange) { ekprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p):" "kmalloc failed\n", - proc, range->start, range->end, addr, splitp); + vm, range->start, range->end, addr, splitp); error = -ENOMEM; goto out; } @@ -663,19 +726,19 @@ int split_process_memory_range(struct process *proc, struct vm_range *range, out: dkprintf("split_process_memory_range(%p,%lx-%lx,%lx,%p): %d %p %lx-%lx\n", - proc, range->start, range->end, addr, splitp, + vm, range->start, range->end, addr, splitp, error, newrange, newrange? newrange->start: 0, newrange? newrange->end: 0); return error; } -int join_process_memory_range(struct process *proc, +int join_process_memory_range(struct process_vm *vm, struct vm_range *surviving, struct vm_range *merging) { int error; dkprintf("join_process_memory_range(%p,%lx-%lx,%lx-%lx)\n", - proc, surviving->start, surviving->end, + vm, surviving->start, surviving->end, merging->start, merging->end); if ((surviving->end != merging->start) @@ -706,7 +769,7 @@ int join_process_memory_range(struct process *proc, error = 0; out: dkprintf("join_process_memory_range(%p,%lx-%lx,%p): %d\n", - proc, surviving->start, surviving->end, merging, error); + vm, surviving->start, surviving->end, merging, error); return error; } @@ -753,7 +816,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) if (range->memobj) { memobj_lock(range->memobj); } - error = ihk_mc_pt_free_range(vm->page_table, vm, + error = ihk_mc_pt_free_range(vm->address_space->page_table, vm, (void *)start, (void *)end, (range->flag & VR_PRIVATE)? NULL: range->memobj); if (range->memobj) { @@ -769,7 +832,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) } else { ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - error = ihk_mc_pt_clear_range(vm->page_table, vm, + error = ihk_mc_pt_clear_range(vm->address_space->page_table, vm, (void *)start, (void *)end); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error && (error != -ENOENT)) { @@ -791,10 +854,9 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) return 0; } -int remove_process_memory_range(struct process *process, +int remove_process_memory_range(struct process_vm *vm, unsigned long start, unsigned long end, int *ro_freedp) { - struct process_vm * const vm = process->vm; struct vm_range *range; struct vm_range *next; int error; @@ -802,7 +864,7 @@ int remove_process_memory_range(struct process *process, int ro_freed = 0; dkprintf("remove_process_memory_range(%p,%lx,%lx)\n", - process, start, end); + vm, start, end); list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { if ((range->end <= start) || (end <= range->start)) { @@ -812,23 +874,23 @@ int remove_process_memory_range(struct process *process, freerange = range; if (freerange->start < start) { - error = split_process_memory_range(process, + error = split_process_memory_range(vm, freerange, start, &freerange); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" "split failed %d\n", - process, start, end, error); + vm, start, end, error); return error; } } if (end < freerange->end) { - error = split_process_memory_range(process, + error = split_process_memory_range(vm, freerange, end, NULL); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" "split failed %d\n", - process, start, end, error); + vm, start, end, error); return error; } } @@ -837,11 +899,11 @@ int remove_process_memory_range(struct process *process, ro_freed = 1; } - error = free_process_memory_range(process->vm, freerange); + error = free_process_memory_range(vm, freerange); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" "free failed %d\n", - process, start, end, error); + vm, start, end, error); return error; } @@ -851,7 +913,7 @@ int remove_process_memory_range(struct process *process, *ro_freedp = ro_freed; } dkprintf("remove_process_memory_range(%p,%lx,%lx): 0 %d\n", - process, start, end, ro_freed); + vm, start, end, ro_freed); return 0; } @@ -911,7 +973,7 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa return attr; } -int add_process_memory_range(struct process *process, +int add_process_memory_range(struct process_vm *vm, unsigned long start, unsigned long end, unsigned long phys, unsigned long flag, struct memobj *memobj, off_t offset) @@ -919,15 +981,15 @@ int add_process_memory_range(struct process *process, struct vm_range *range; int rc; #if 0 - extern void __host_update_process_range(struct process *process, + extern void __host_update_process_range(struct thread *process, struct vm_range *range); #endif - if ((start < process->vm->region.user_start) - || (process->vm->region.user_end < end)) { + if ((start < vm->region.user_start) + || (vm->region.user_end < end)) { kprintf("range(%#lx - %#lx) is not in user avail(%#lx - %#lx)\n", - start, end, process->vm->region.user_start, - process->vm->region.user_end); + start, end, vm->region.user_start, + vm->region.user_end); return -EINVAL; } @@ -954,9 +1016,9 @@ int add_process_memory_range(struct process *process, } if (flag & VR_REMOTE) { - rc = update_process_page_table(process, range, phys, IHK_PTA_REMOTE); + rc = update_process_page_table(vm, range, phys, IHK_PTA_REMOTE); } else if (flag & VR_IO_NOCACHE) { - rc = update_process_page_table(process, range, phys, PTATTR_UNCACHABLE); + rc = update_process_page_table(vm, range, phys, PTATTR_UNCACHABLE); } else if(flag & VR_DEMAND_PAGING){ //demand paging no need to update process table now dkprintf("demand paging do not update process page table\n"); @@ -964,7 +1026,7 @@ int add_process_memory_range(struct process *process, } else if ((range->flag & VR_PROT_MASK) == VR_PROT_NONE) { rc = 0; } else { - rc = update_process_page_table(process, range, phys, 0); + rc = update_process_page_table(vm, range, phys, 0); } if(rc != 0){ kprintf("ERROR: preparing page tables\n"); @@ -972,14 +1034,14 @@ int add_process_memory_range(struct process *process, return rc; } -#if 0 // disable __host_update_process_range() in add_process_memory_range(), because it has no effect on the actual mapping on the MICs side. +#if 0 // disable __host_update_process_range() in add_process_memory_range(), because it has no effect on the actual mapping on the MICs side. if (!(flag & VR_REMOTE)) { __host_update_process_range(process, range); } #endif - - insert_vm_range_list(process->vm, range); - + + insert_vm_range_list(vm, range); + /* Clear content! */ if (!(flag & (VR_REMOTE | VR_DEMAND_PAGING)) && ((flag & VR_PROT_MASK) != VR_PROT_NONE)) { @@ -1092,7 +1154,7 @@ out: return error; } -int change_prot_process_memory_range(struct process *proc, +int change_prot_process_memory_range(struct process_vm *vm, struct vm_range *range, unsigned long protflag) { unsigned long newflag; @@ -1103,7 +1165,7 @@ int change_prot_process_memory_range(struct process *proc, enum ihk_mc_pt_attribute setattr; dkprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx)\n", - proc, range->start, range->end, protflag); + vm, range->start, range->end, protflag); newflag = (range->flag & ~VR_PROT_MASK) | (protflag & VR_PROT_MASK); if (range->flag == newflag) { @@ -1118,29 +1180,29 @@ int change_prot_process_memory_range(struct process *proc, clrattr = oldattr & ~newattr; setattr = newattr & ~oldattr; - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); - error = ihk_mc_pt_change_attr_range(proc->vm->page_table, + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); + error = ihk_mc_pt_change_attr_range(vm->address_space->page_table, (void *)range->start, (void *)range->end, clrattr, setattr); - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error && (error != -ENOENT)) { ekprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx):" "ihk_mc_pt_change_attr_range failed: %d\n", - proc, range->start, range->end, protflag, error); + vm, range->start, range->end, protflag, error); goto out; } if (((range->flag & VR_PROT_MASK) == PROT_NONE) && !(range->flag & VR_DEMAND_PAGING)) { - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); - error = ihk_mc_pt_alloc_range(proc->vm->page_table, + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); + error = ihk_mc_pt_alloc_range(vm->address_space->page_table, (void *)range->start, (void *)range->end, newattr); - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { ekprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx):" "ihk_mc_pt_alloc_range failed: %d\n", - proc, range->start, range->end, protflag, error); + vm, range->start, range->end, protflag, error); goto out; } } @@ -1149,7 +1211,7 @@ int change_prot_process_memory_range(struct process *proc, error = 0; out: dkprintf("change_prot_process_memory_range(%p,%lx-%lx,%lx): %d\n", - proc, range->start, range->end, protflag, error); + vm, range->start, range->end, protflag, error); return error; } @@ -1223,7 +1285,7 @@ int remap_process_memory_range(struct process_vm *vm, struct vm_range *range, args.off = off; args.memobj = range->memobj; - error = visit_pte_range(vm->page_table, (void *)start, + error = visit_pte_range(vm->address_space->page_table, (void *)start, (void *)end, VPTEF_DEFAULT, &remap_one_page, &args); if (error) { ekprintf("remap_process_memory_range(%p,%p,%#lx,%#lx,%#lx):" @@ -1292,8 +1354,9 @@ int sync_process_memory_range(struct process_vm *vm, struct vm_range *range, ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); memobj_lock(range->memobj); - error = visit_pte_range(vm->page_table, (void *)start, (void *)end, - VPTEF_SKIP_NULL, &sync_one_page, &args); + error = visit_pte_range(vm->address_space->page_table, (void *)start, + (void *)end, VPTEF_SKIP_NULL, &sync_one_page, + &args); memobj_unlock(range->memobj); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { @@ -1373,8 +1436,9 @@ int invalidate_process_memory_range(struct process_vm *vm, ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); memobj_lock(range->memobj); - error = visit_pte_range(vm->page_table, (void *)start, (void *)end, - VPTEF_SKIP_NULL, &invalidate_one_page, &args); + error = visit_pte_range(vm->address_space->page_table, (void *)start, + (void *)end, VPTEF_SKIP_NULL, + &invalidate_one_page, &args); memobj_unlock(range->memobj); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); if (error) { @@ -1404,7 +1468,9 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx)\n", vm, range->start, range->end, range->flag, fault_addr, reason); ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); /*****/ - ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align); + ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table, + (void *)fault_addr, &pgaddr, &pgsize, + &p2align); if (!(reason & (PF_PROT | PF_PATCH)) && ptep && !pte_is_null(ptep) && !pte_is_fileoff(ptep, pgsize)) { if (!pte_is_present(ptep)) { @@ -1437,7 +1503,7 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang else { off = pte_get_off(ptep, pgsize); } - error = memobj_get_page(range->memobj, off, p2align, + error = memobj_get_page(range->memobj, off, p2align, &phys, &memobj_flag); if (error) { if (error != -ERESTART) { @@ -1464,11 +1530,11 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang else { phys = pte_get_phys(ptep); } - + page = phys_to_page(phys); - + attr = arch_vrflag_to_ptattr(range->flag | memobj_flag, reason, ptep); - + /*****/ if (((range->flag & VR_PRIVATE) || ((reason & PF_PATCH) @@ -1499,15 +1565,17 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang } /*****/ if (ptep) { - error = ihk_mc_pt_set_pte(vm->page_table, ptep, pgsize, phys, attr); + error = ihk_mc_pt_set_pte(vm->address_space->page_table, ptep, + pgsize, phys, attr); if (error) { kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_pte failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; } } else { - error = ihk_mc_pt_set_range(vm->page_table, vm, pgaddr, pgaddr + pgsize, - phys, attr); + error = ihk_mc_pt_set_range(vm->address_space->page_table, vm, + pgaddr, pgaddr + pgsize, phys, + attr); if (error) { kprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx):set_range failed. %d\n", vm, range->start, range->end, range->flag, fault_addr, reason, error); goto out; @@ -1544,7 +1612,7 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui range = lookup_process_memory_range(vm, fault_addr, fault_addr+1); if (range == NULL) { error = -EFAULT; - kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):" + dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):" "out of range. %d\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason, error); @@ -1557,11 +1625,11 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui || ((reason & PF_INSTR) && !(range->flag & VR_PROT_EXEC))) { error = -EFAULT; - kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):" + dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):" "access denied. %d\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason, error); - if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE)) + if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE)) kprintf("if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE))\n"); if (((reason & PF_WRITE) && !(reason & PF_PATCH))) kprintf("if (((reason & PF_WRITE) && !(reason & PF_PATCH)))\n"); @@ -1604,7 +1672,7 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui goto out; } if (error) { - kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):" + dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):" "fault range failed. %d\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason, error); @@ -1623,7 +1691,7 @@ out: int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr, uint64_t reason) { int error; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); for (;;) { error = do_page_fault_process_vm(fault_vm, fault_addr, reason); @@ -1631,23 +1699,23 @@ int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr, uint64_ break; } - if (proc->pgio_fp) { - (*proc->pgio_fp)(proc->pgio_arg); - proc->pgio_fp = NULL; + if (thread->pgio_fp) { + (*thread->pgio_fp)(thread->pgio_arg); + thread->pgio_fp = NULL; } } return error; } -int init_process_stack(struct process *process, struct program_load_desc *pn, - int argc, char **argv, +int init_process_stack(struct thread *thread, struct program_load_desc *pn, + int argc, char **argv, int envc, char **env) { int s_ind = 0; int arg_ind; unsigned long size; - unsigned long end = process->vm->region.user_end; + unsigned long end = thread->vm->region.user_end; unsigned long start; int rc; unsigned long vrflag; @@ -1656,10 +1724,11 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, unsigned long *p; unsigned long minsz; unsigned long at_rand; + struct process *proc = thread->proc; /* create stack range */ minsz = PAGE_SIZE; - size = process->rlimit[MCK_RLIMIT_STACK].rlim_cur & PAGE_MASK; + size = proc->rlimit[MCK_RLIMIT_STACK].rlim_cur & PAGE_MASK; if (size > (USER_END / 2)) { size = USER_END / 2; } @@ -1672,7 +1741,7 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, vrflag |= PROT_TO_VR_FLAG(pn->stack_prot); vrflag |= VR_MAXPROT_READ | VR_MAXPROT_WRITE | VR_MAXPROT_EXEC; #define NOPHYS ((uintptr_t)-1) - if ((rc = add_process_memory_range(process, start, end, NOPHYS, + if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, vrflag, NULL, 0)) != 0) { return rc; } @@ -1683,10 +1752,11 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, return -ENOMEM; } memset(stack, 0, minsz); - error = ihk_mc_pt_set_range(process->vm->page_table, process->vm, - (void *)(end-minsz), (void *)end, - virt_to_phys(stack), - arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL)); + error = ihk_mc_pt_set_range(thread->vm->address_space->page_table, + thread->vm, (void *)(end-minsz), + (void *)end, virt_to_phys(stack), + arch_vrflag_to_ptattr(vrflag, PF_POPULATE, + NULL)); if (error) { kprintf("init_process_stack:" "set range %lx-%lx %lx failed. %d\n", @@ -1698,12 +1768,12 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, /* set up initial stack frame */ p = (unsigned long *)(stack + minsz); s_ind = -1; - + /* "random" 16 bytes on the very top */ p[s_ind--] = 0x010101011; p[s_ind--] = 0x010101011; at_rand = end + sizeof(unsigned long) * s_ind; - + /* auxiliary vector */ /* If you add/delete entires, please increase/decrease AUXV_LEN in include/process.h. */ @@ -1716,24 +1786,23 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, p[s_ind--] = pn->at_phent; /* AT_PHENT */ p[s_ind--] = AT_PHENT; p[s_ind--] = pn->at_phdr; /* AT_PHDR */ - p[s_ind--] = AT_PHDR; + p[s_ind--] = AT_PHDR; p[s_ind--] = 4096; /* AT_PAGESZ */ p[s_ind--] = AT_PAGESZ; p[s_ind--] = pn->at_clktck; /* AT_CLKTCK */ p[s_ind--] = AT_CLKTCK; p[s_ind--] = at_rand; /* AT_RANDOM */ p[s_ind--] = AT_RANDOM; - + /* Save auxiliary vector for later use. */ - memcpy(process->saved_auxv, &p[s_ind + 1], - sizeof(process->saved_auxv)); - - p[s_ind--] = 0; /* envp terminating NULL */ + memcpy(proc->saved_auxv, &p[s_ind + 1], sizeof(proc->saved_auxv)); + + p[s_ind--] = 0; /* envp terminating NULL */ /* envp */ for (arg_ind = envc - 1; arg_ind > -1; --arg_ind) { p[s_ind--] = (unsigned long)env[arg_ind]; } - + p[s_ind--] = 0; /* argv terminating NULL */ /* argv */ for (arg_ind = argc - 1; arg_ind > -1; --arg_ind) { @@ -1742,15 +1811,15 @@ int init_process_stack(struct process *process, struct program_load_desc *pn, /* argc */ p[s_ind] = argc; - ihk_mc_modify_user_context(process->uctx, IHK_UCR_STACK_POINTER, + ihk_mc_modify_user_context(thread->uctx, IHK_UCR_STACK_POINTER, end + sizeof(unsigned long) * s_ind); - process->vm->region.stack_end = end; - process->vm->region.stack_start = start; + thread->vm->region.stack_end = end; + thread->vm->region.stack_start = start; return 0; } -unsigned long extend_process_region(struct process *proc, +unsigned long extend_process_region(struct process_vm *vm, unsigned long start, unsigned long end, unsigned long address, unsigned long flag) { @@ -1785,16 +1854,16 @@ unsigned long extend_process_region(struct process *proc, IHK_MC_AP_NOWAIT)) == NULL){ return end; } - if((rc = add_process_memory_range(proc, old_aligned_end, + if((rc = add_process_memory_range(vm, old_aligned_end, aligned_end, virt_to_phys(p), flag)) != 0){ free_pages(p, (aligned_end - old_aligned_end) >> PAGE_SHIFT); return end; } - + dkprintf("filled in gap for LARGE_PAGE_SIZE aligned start: 0x%lX -> 0x%lX\n", old_aligned_end, aligned_end); } - + /* Add large region for the actual mapping */ aligned_new_end = (aligned_new_end + (aligned_end - old_aligned_end) + (LARGE_PAGE_SIZE - 1)) & LARGE_PAGE_MASK; @@ -1814,7 +1883,7 @@ unsigned long extend_process_region(struct process *proc, (void *)(p_aligned + aligned_new_end - aligned_end), (LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT); - if((rc = add_process_memory_range(proc, aligned_end, + if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end, virt_to_phys((void *)p_aligned), flag)) != 0){ free_pages(p, (aligned_new_end - aligned_end + LARGE_PAGE_SIZE) >> PAGE_SHIFT); @@ -1822,8 +1891,8 @@ unsigned long extend_process_region(struct process *proc, } dkprintf("largePTE area: 0x%lX - 0x%lX (s: %lu) -> 0x%lX - \n", - aligned_end, aligned_new_end, - (aligned_new_end - aligned_end), + aligned_end, aligned_new_end, + (aligned_new_end - aligned_end), virt_to_phys((void *)p_aligned)); return address; @@ -1840,42 +1909,41 @@ unsigned long extend_process_region(struct process *proc, if (!p) { return end; } - } - if((rc = add_process_memory_range(proc, aligned_end, aligned_new_end, + } + if((rc = add_process_memory_range(vm, aligned_end, aligned_new_end, (p==0?0:virt_to_phys(p)), flag, NULL, 0)) != 0){ free_pages(p, (aligned_new_end - aligned_end) >> PAGE_SHIFT); return end; } - + return address; } -// Original version retained because dcfa (src/mccmd/client/ibmic/main.c) calls this -int remove_process_region(struct process *proc, +// Original version retained because dcfa (src/mccmd/client/ibmic/main.c) calls this +int remove_process_region(struct process_vm *vm, unsigned long start, unsigned long end) { if ((start & (PAGE_SIZE - 1)) || (end & (PAGE_SIZE - 1))) { return -EINVAL; } - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); /* We defer freeing to the time of exit */ // XXX: check error - ihk_mc_pt_clear_range(proc->vm->page_table, proc->vm, + ihk_mc_pt_clear_range(vm->address_space->page_table, vm, (void *)start, (void *)end); - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); return 0; } -void flush_process_memory(struct process *proc) +void flush_process_memory(struct process_vm *vm) { - struct process_vm *vm = proc->vm; struct vm_range *range; struct vm_range *next; int error; - dkprintf("flush_process_memory(%p)\n", proc); + dkprintf("flush_process_memory(%p)\n", vm); ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); /* Let concurrent page faults know the VM will be gone */ vm->exiting = 1; @@ -1886,21 +1954,20 @@ void flush_process_memory(struct process *proc) if (error) { ekprintf("flush_process_memory(%p):" "free range failed. %lx-%lx %d\n", - proc, range->start, range->end, error); + vm, range->start, range->end, error); /* through */ } } } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); - dkprintf("flush_process_memory(%p):\n", proc); + dkprintf("flush_process_memory(%p):\n", vm); return; } -void free_process_memory_ranges(struct process *proc) +void free_process_memory_ranges(struct process_vm *vm) { int error; struct vm_range *range, *next; - struct process_vm *vm = proc->vm; if (vm == NULL) { return; @@ -1912,24 +1979,67 @@ void free_process_memory_ranges(struct process *proc) if (error) { ekprintf("free_process_memory(%p):" "free range failed. %lx-%lx %d\n", - proc, range->start, range->end, error); + vm, range->start, range->end, error); /* through */ } } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); } -void free_process_memory(struct process *proc) +void +hold_process(struct process *proc) { - struct vm_range *range, *next; - struct process_vm *vm = proc->vm; - int error; + ihk_atomic_inc(&proc->refcount); +} - if (vm == NULL) { +void +release_process(struct process *proc) +{ + struct process *parent; + struct mcs_rwlock_node_irqsave lock; + struct process_hash *phash; + struct resource_set *rset; + int hash; + + if (!ihk_atomic_dec_and_test(&proc->refcount)) { return; } - proc->vm = NULL; + rset = cpu_local_var(resource_set); + phash = rset->process_hash; + hash = process_hash(proc->pid); + + mcs_rwlock_writer_lock(&phash->lock[hash], &lock); + list_del(&proc->hash_list); + mcs_rwlock_writer_unlock(&phash->lock[hash], &lock); + + parent = proc->parent; + mcs_rwlock_writer_lock(&parent->children_lock, &lock); + list_del(&proc->siblings_list); + mcs_rwlock_writer_unlock(&parent->children_lock, &lock); + + if(proc->ptrace & PT_TRACED){ + parent = proc->ppid_parent; + mcs_rwlock_writer_lock(&parent->children_lock, &lock); + list_del(&proc->ptraced_siblings_list); + mcs_rwlock_writer_unlock(&parent->children_lock, &lock); + } + + kfree(proc); +} + +void +hold_process_vm(struct process_vm *vm) +{ + ihk_atomic_inc(&vm->refcount); +} + +void +release_process_vm(struct process_vm *vm) +{ + struct vm_range *range, *next; + int error; + if (!ihk_atomic_dec_and_test(&vm->refcount)) { return; } @@ -1940,17 +2050,22 @@ void free_process_memory(struct process *proc) if (error) { ekprintf("free_process_memory(%p):" "free range failed. %lx-%lx %d\n", - proc, range->start, range->end, error); + vm, range->start, range->end, error); /* through */ } } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); - - ihk_mc_pt_destroy(vm->page_table); - release_process(vm->owner_process); } -int populate_process_memory(struct process *proc, void *start, size_t len) +static void +free_process_vm(struct process_vm *vm) +{ + detach_address_space(vm->address_space, vm->proc->pid); + kfree(vm); + release_process(vm->proc); +} + +int populate_process_memory(struct process_vm *vm, void *start, size_t len) { int error; const int reason = PF_USER | PF_POPULATE; @@ -1959,11 +2074,11 @@ int populate_process_memory(struct process *proc, void *start, size_t len) end = (uintptr_t)start + len; for (addr = (uintptr_t)start; addr < end; addr += PAGE_SIZE) { - error = page_fault_process_vm(proc->vm, (void *)addr, reason); + error = page_fault_process_vm(vm, (void *)addr, reason); if (error) { ekprintf("populate_process_range:page_fault_process_vm" "(%p,%lx,%lx) failed %d\n", - proc, addr, reason, error); + vm, addr, reason, error); goto out; } } @@ -1973,69 +2088,98 @@ out: return error; } -void hold_process(struct process *proc) +void hold_thread(struct thread *thread) { - if (proc->ftn->status & (PS_ZOMBIE | PS_EXITED)) { - panic("hold_process: already exited process"); + if (thread->proc->pstatus & (PS_ZOMBIE | PS_EXITED)) { + panic("hold_thread: already exited process"); } - ihk_atomic_inc(&proc->refcount); + ihk_atomic_inc(&thread->refcount); return; } -void destroy_process(struct process *proc) +void +hold_sigcommon(struct sig_common *sigcommon) +{ + ihk_atomic_inc(&sigcommon->use); +} + +void +release_sigcommon(struct sig_common *sigcommon) { struct sig_pending *pending; struct sig_pending *next; - delete_proc_procfs_files(proc->ftn->pid); - - if (proc->vm) { - cpu_clear(proc->cpu_id, &proc->vm->cpu_set, &proc->vm->cpu_set_lock); - } - - free_process_memory(proc); - - if(ihk_atomic_dec_and_test(&proc->sighandler->use)){ - kfree(proc->sighandler); - } - if(ihk_atomic_dec_and_test(&proc->sigshared->use)){ - list_for_each_entry_safe(pending, next, &proc->sigshared->sigpending, list){ - list_del(&pending->list); - kfree(pending); - } - list_del(&proc->sigshared->sigpending); - kfree(proc->sigshared); - } - list_for_each_entry_safe(pending, next, &proc->sigpending, list){ - list_del(&pending->list); - kfree(pending); - } - if (proc->ptrace_debugreg) { - kfree(proc->ptrace_debugreg); - } - if (proc->ptrace_recvsig) { - kfree(proc->ptrace_recvsig); - } - if (proc->ptrace_sendsig) { - kfree(proc->ptrace_sendsig); - } - if (proc->fp_regs) { - release_fp_regs(proc); - } - if (proc->saved_cmdline) { - kfree(proc->saved_cmdline); - } - ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES); -} - -void release_process(struct process *proc) -{ - if (!ihk_atomic_dec_and_test(&proc->refcount)) { + if (!ihk_atomic_dec_and_test(&sigcommon->use)) { return; } - destroy_process(proc); + list_for_each_entry_safe(pending, next, &sigcommon->sigpending, list){ + list_del(&pending->list); + kfree(pending); + } + kfree(sigcommon); +} + +void destroy_thread(struct thread *thread) +{ + struct sig_pending *pending; + struct sig_pending *signext; + struct mcs_rwlock_node_irqsave lock; + struct process *proc = thread->proc; + struct resource_set *resource_set = cpu_local_var(resource_set); + int hash; + + mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + list_del(&thread->siblings_list); + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); + + hash = thread_hash(thread->tid); + mcs_rwlock_writer_lock(&resource_set->thread_hash->lock[hash], &lock); + list_del(&thread->hash_list); + mcs_rwlock_writer_unlock(&resource_set->thread_hash->lock[hash], &lock); + + cpu_clear(thread->cpu_id, &thread->vm->cpu_set, &thread->vm->cpu_set_lock); + list_for_each_entry_safe(pending, signext, &thread->sigpending, list){ + list_del(&pending->list); + kfree(pending); + } + + if (thread->ptrace_debugreg) { + kfree(thread->ptrace_debugreg); + } + if (thread->ptrace_recvsig) { + kfree(thread->ptrace_recvsig); + } + if (thread->ptrace_sendsig) { + kfree(thread->ptrace_sendsig); + } + if (thread->fp_regs) { + release_fp_regs(thread); + } + + release_sigcommon(thread->sigcommon); + + ihk_mc_free_pages(thread, KERNEL_STACK_NR_PAGES); +} + +void release_thread(struct thread *thread) +{ + struct process_vm *vm; + struct process *proc; + + if (!ihk_atomic_dec_and_test(&thread->refcount)) { + return; + } + + vm = thread->vm; + proc = thread->proc; + + destroy_thread(thread); + + if(ihk_atomic_read(&vm->refcount) == 0) + free_process_vm(vm); + release_process(proc); } void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock) @@ -2054,7 +2198,7 @@ void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock) ihk_mc_spinlock_unlock(lock, flags); } -void cpu_clear_and_set(int c_cpu, int s_cpu, +void cpu_clear_and_set(int c_cpu, int s_cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock) { unsigned int flags; @@ -2070,11 +2214,11 @@ static void do_migrate(void); static void idle(void) { struct cpu_local_var *v = get_this_cpu_local_var(); - + /* Release runq_lock before starting the idle loop. * See comments at release_runq_lock(). */ - ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), + ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), cpu_local_var(runq_irqstate)); if(v->status == CPU_STATUS_RUNNING) @@ -2084,7 +2228,7 @@ static void idle(void) while (1) { schedule(); cpu_disable_interrupt(); - + /* See if we need to migrate a process somewhere */ if (v->flags & CPU_FLAG_NEED_MIGRATE) { do_migrate(); @@ -2113,11 +2257,11 @@ static void idle(void) if (v->status == CPU_STATUS_IDLE || v->status == CPU_STATUS_RESERVED) { long s; - struct process *p; + struct thread *t; s = ihk_mc_spinlock_lock(&v->runq_lock); - list_for_each_entry(p, &v->runq, sched_list) { - if (p->ftn->status == PS_RUNNING) { + list_for_each_entry(t, &v->runq, sched_list) { + if (t->tstatus == PS_RUNNING) { v->status = CPU_STATUS_RUNNING; break; } @@ -2134,22 +2278,112 @@ static void idle(void) } } +struct resource_set * +new_resource_set() +{ + struct resource_set *res; + struct process_hash *phash; + struct thread_hash *thash; + struct process *pid1; + int i; + int hash; + + res = kmalloc(sizeof(struct resource_set), IHK_MC_AP_NOWAIT); + phash = kmalloc(sizeof(struct process_hash), IHK_MC_AP_NOWAIT); + thash = kmalloc(sizeof(struct thread_hash), IHK_MC_AP_NOWAIT); + pid1 = kmalloc(sizeof(struct process), IHK_MC_AP_NOWAIT); + + if(!res || !phash || !thash || !pid1){ + if(res) + kfree(res); + if(phash) + kfree(phash); + if(thash) + kfree(thash); + if(pid1) + kfree(pid1); + return NULL; + } + + memset(res, '\0', sizeof(struct resource_set)); + memset(phash, '\0', sizeof(struct process_hash)); + memset(thash, '\0', sizeof(struct thread_hash)); + memset(pid1, '\0', sizeof(struct process)); + + INIT_LIST_HEAD(&res->phys_mem_list); + mcs_rwlock_init(&res->phys_mem_lock); + mcs_rwlock_init(&res->cpu_set_lock); + + for(i = 0; i < HASH_SIZE; i++){ + INIT_LIST_HEAD(&phash->list[i]); + mcs_rwlock_init(&phash->lock[i]); + } + res->process_hash = phash; + + for(i = 0; i < HASH_SIZE; i++){ + INIT_LIST_HEAD(&thash->list[i]); + mcs_rwlock_init(&thash->lock[i]); + } + res->thread_hash = thash; + + init_process(pid1, pid1); + pid1->pid = 1; + hash = process_hash(1); + list_add_tail(&pid1->hash_list, &phash->list[hash]); + res->pid1 = pid1; + + return res; +} + +void +proc_init() +{ + struct resource_set *res = new_resource_set(); + int i; + + if(!res){ + panic("no mem for resource_set"); + } + INIT_LIST_HEAD(&resource_set_list); + mcs_rwlock_init(&resource_set_lock); + for(i = 0; i < num_processors; i++){ + CPU_SET(i, &res->cpu_set); + } + // TODO: setup for phys mem + res->path = kmalloc(2, IHK_MC_AP_NOWAIT); + if(!res->path){ + panic("no mem for resource_set"); + } + res->path[0] = '/'; + res->path[0] = '\0'; + list_add_tail(&res->list, &resource_set_list); +} + void sched_init(void) { - struct process *idle_process = &cpu_local_var(idle); + struct thread *idle_thread = &cpu_local_var(idle); + struct resource_set *res; - memset(idle_process, 0, sizeof(struct process)); + res = list_first_entry(&resource_set_list, struct resource_set, list); + cpu_local_var(resource_set) = res; + + memset(idle_thread, 0, sizeof(struct thread)); memset(&cpu_local_var(idle_vm), 0, sizeof(struct process_vm)); - memset(&cpu_local_var(idle_ftn), 0, sizeof(struct fork_tree_node)); + memset(&cpu_local_var(idle_proc), 0, sizeof(struct process)); - idle_process->vm = &cpu_local_var(idle_vm); - idle_process->ftn = &cpu_local_var(idle_ftn); + idle_thread->vm = &cpu_local_var(idle_vm); + idle_thread->vm->address_space = &cpu_local_var(idle_asp); + idle_thread->proc = &cpu_local_var(idle_proc); + init_process(idle_thread->proc, NULL); + idle_thread->proc->vm = &cpu_local_var(idle_vm); + list_add_tail(&idle_thread->siblings_list, + &idle_thread->proc->children_list); - ihk_mc_init_context(&idle_process->ctx, NULL, idle); - ihk_mc_spinlock_init(&idle_process->vm->memory_range_lock); - INIT_LIST_HEAD(&idle_process->vm->vm_range_list); - idle_process->ftn->pid = 0; - idle_process->ftn->tid = ihk_mc_get_processor_id(); + ihk_mc_init_context(&idle_thread->ctx, NULL, idle); + ihk_mc_spinlock_init(&idle_thread->vm->memory_range_lock); + INIT_LIST_HEAD(&idle_thread->vm->vm_range_list); + idle_thread->proc->pid = 0; + idle_thread->tid = ihk_mc_get_processor_id(); INIT_LIST_HEAD(&cpu_local_var(runq)); cpu_local_var(runq_len) = 0; @@ -2185,7 +2419,7 @@ static void double_rq_unlock(struct cpu_local_var *v1, struct cpu_local_var *v2, struct migrate_request { struct list_head list; - struct process *proc; + struct thread *thread; struct waitq wq; }; @@ -2204,14 +2438,14 @@ static void do_migrate(void) /* 0. check if migration is necessary */ list_del(&req->list); - if (req->proc->cpu_id != cur_cpu_id) /* already not here */ + if (req->thread->cpu_id != cur_cpu_id) /* already not here */ goto ack; - if (CPU_ISSET(cur_cpu_id, &req->proc->cpu_set)) /* good affinity */ + if (CPU_ISSET(cur_cpu_id, &req->thread->cpu_set)) /* good affinity */ goto ack; /* 1. select CPU */ for (cpu_id = 0; cpu_id < CPU_SETSIZE; cpu_id++) - if (CPU_ISSET(cpu_id, &req->proc->cpu_set)) + if (CPU_ISSET(cpu_id, &req->thread->cpu_set)) break; if (CPU_SETSIZE == cpu_id) /* empty affinity (bug?) */ goto ack; @@ -2219,20 +2453,20 @@ static void do_migrate(void) /* 2. migrate thread */ v = get_cpu_local_var(cpu_id); double_rq_lock(cur_v, v, &irqstate); - list_del(&req->proc->sched_list); + list_del(&req->thread->sched_list); cur_v->runq_len -= 1; - old_cpu_id = req->proc->cpu_id; - req->proc->cpu_id = cpu_id; - settid(req->proc, 2, cpu_id, old_cpu_id); - list_add_tail(&req->proc->sched_list, &v->runq); + old_cpu_id = req->thread->cpu_id; + req->thread->cpu_id = cpu_id; + settid(req->thread, 2, cpu_id, old_cpu_id); + list_add_tail(&req->thread->sched_list, &v->runq); v->runq_len += 1; /* update cpu_set of the VM for remote TLB invalidation */ - cpu_clear_and_set(old_cpu_id, cpu_id, &req->proc->vm->cpu_set, - &req->proc->vm->cpu_set_lock); + cpu_clear_and_set(old_cpu_id, cpu_id, &req->thread->vm->cpu_set, + &req->thread->vm->cpu_set_lock); dkprintf("do_migrate(): migrated TID %d from CPU %d to CPU %d\n", - req->proc->ftn->tid, old_cpu_id, cpu_id); + req->thread->tid, old_cpu_id, cpu_id); v->flags |= CPU_FLAG_NEED_RESCHED; ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu_id)->apic_id, 0xd1); @@ -2244,27 +2478,25 @@ ack: ihk_mc_spinlock_unlock(&cur_v->migq_lock, irqstate); } -extern int num_processors; -extern ihk_spinlock_t cpuid_head_lock; - void schedule(void) { struct cpu_local_var *v; - struct process *next, *prev, *proc, *tmp = NULL; + struct thread *next, *prev, *thread, *tmp = NULL; int switch_ctx = 0; - struct process *last; + struct thread *last; if (cpu_local_var(no_preempt)) { - dkprintf("no schedule() while no preemption! \n"); + kprintf("no schedule() while no preemption! \n"); +panic("panic schedule\n"); return; } - + if (cpu_local_var(current) && cpu_local_var(current)->in_syscall_offload) { dkprintf("no schedule() while syscall offload!\n"); return; } - + redo: cpu_local_var(runq_irqstate) = ihk_mc_spinlock_lock(&(get_this_cpu_local_var()->runq_lock)); @@ -2278,10 +2510,10 @@ redo: /* All runnable processes are on the runqueue */ if (prev && prev != &cpu_local_var(idle)) { list_del(&prev->sched_list); - --v->runq_len; - + --v->runq_len; + /* Round-robin if not exited yet */ - if (!(prev->ftn->status & (PS_ZOMBIE | PS_EXITED))) { + if (prev->tstatus != PS_EXITED) { list_add_tail(&prev->sched_list, &(v->runq)); ++v->runq_len; } @@ -2305,9 +2537,9 @@ redo: next = &cpu_local_var(idle); } else { /* Pick a new running process */ - list_for_each_entry_safe(proc, tmp, &(v->runq), sched_list) { - if (proc->ftn->status == PS_RUNNING) { - next = proc; + list_for_each_entry_safe(thread, tmp, &(v->runq), sched_list) { + if (thread->tstatus == PS_RUNNING) { + next = thread; break; } } @@ -2323,10 +2555,10 @@ redo: switch_ctx = 1; v->current = next; } - + if (switch_ctx) { dkprintf("schedule: %d => %d \n", - prev ? prev->ftn->tid : 0, next ? next->ftn->tid : 0); + prev ? prev->tid : 0, next ? next->tid : 0); if (prev && prev->ptrace_debugreg) { save_debugreg(prev->ptrace_debugreg); @@ -2347,29 +2579,29 @@ redo: restore_fp_regs(next); } - ihk_mc_load_page_table(next->vm->page_table); - - dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", - ihk_mc_get_processor_id(), next->thread.tlsblock_base); + ihk_mc_load_page_table(next->vm->address_space->page_table); + + dkprintf("[%d] schedule: tlsblock_base: 0x%lX\n", + ihk_mc_get_processor_id(), next->thread.tlsblock_base); /* Set up new TLS.. */ do_arch_prctl(ARCH_SET_FS, next->thread.tlsblock_base); - + if (prev) { last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); - } + } else { last = ihk_mc_switch_context(NULL, &next->ctx, prev); } - - /* + + /* * We must hold the lock throughout the context switch, otherwise - * an IRQ could deschedule this process between page table loading and + * an IRQ could deschedule this process between page table loading and * context switching and leave the execution in an inconsistent state. - * Since we may be migrated to another core meanwhile, we refer - * directly to cpu_local_var. + * Since we may be migrated to another core meanwhile, we refer + * directly to cpu_local_var. */ - ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), + ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), cpu_local_var(runq_irqstate)); /* Have we migrated to another core meanwhile? */ @@ -2378,13 +2610,12 @@ redo: goto redo; } - if ((last != NULL) && (last->ftn) && (last->ftn->status & (PS_ZOMBIE | PS_EXITED))) { - free_process_memory(last); - release_process(last); + if ((last != NULL) && (last->tstatus == PS_EXITED)) { + release_thread(last); } } else { - ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), + ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)), cpu_local_var(runq_irqstate)); } } @@ -2415,48 +2646,48 @@ void check_need_resched(void) } } - -int sched_wakeup_process(struct process *proc, int valid_states) +int +sched_wakeup_thread(struct thread *thread, int valid_states) { int status; int spin_slept = 0; unsigned long irqstate; - struct cpu_local_var *v = get_cpu_local_var(proc->cpu_id); + struct cpu_local_var *v = get_cpu_local_var(thread->cpu_id); dkprintf("sched_wakeup_process,proc->pid=%d,valid_states=%08x,proc->status=%08x,proc->cpu_id=%d,my cpu_id=%d\n", - proc->ftn->pid, valid_states, proc->ftn->status, proc->cpu_id, ihk_mc_get_processor_id()); - - irqstate = ihk_mc_spinlock_lock(&(proc->spin_sleep_lock)); - if (proc->spin_sleep > 0) { - dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n", - proc->cpu_id); + thread->proc->pid, valid_states, thread->tstatus, thread->cpu_id, ihk_mc_get_processor_id()); + + irqstate = ihk_mc_spinlock_lock(&(thread->spin_sleep_lock)); + if (thread->spin_sleep > 0) { + dkprintf("sched_wakeup_process() spin wakeup: cpu_id: %d\n", + thread->cpu_id); spin_slept = 1; - status = 0; + status = 0; } - --proc->spin_sleep; - ihk_mc_spinlock_unlock(&(proc->spin_sleep_lock), irqstate); - + --thread->spin_sleep; + ihk_mc_spinlock_unlock(&(thread->spin_sleep_lock), irqstate); + if (spin_slept) { return status; } irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - - if (proc->ftn->status & valid_states) { - xchg4((int *)(&proc->ftn->status), PS_RUNNING); + + if (thread->tstatus & valid_states) { + xchg4((int *)(&thread->tstatus), PS_RUNNING); status = 0; - } + } else { status = -EINVAL; } ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - if (!status && (proc->cpu_id != ihk_mc_get_processor_id())) { - dkprintf("sched_wakeup_process,issuing IPI,proc->cpu_id=%d\n", - proc->cpu_id); - ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(proc->cpu_id)->apic_id, + if (!status && (thread->cpu_id != ihk_mc_get_processor_id())) { + dkprintf("sched_wakeup_process,issuing IPI,thread->cpu_id=%d\n", + thread->cpu_id); + ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(thread->cpu_id)->apic_id, 0xd1); } @@ -2481,10 +2712,10 @@ int sched_wakeup_process(struct process *proc, int valid_states) * 3. Do migration * 4. Wake up this thread */ -void sched_request_migrate(int cpu_id, struct process *proc) +void sched_request_migrate(int cpu_id, struct thread *thread) { struct cpu_local_var *v = get_cpu_local_var(cpu_id); - struct migrate_request req = { .proc = proc }; + struct migrate_request req = { .thread = thread }; unsigned long irqstate; DECLARE_WAITQ_ENTRY(entry, cpu_local_var(current)); @@ -2509,30 +2740,30 @@ void sched_request_migrate(int cpu_id, struct process *proc) } /* Runq lock must be held here */ -void __runq_add_proc(struct process *proc, int cpu_id) +void __runq_add_thread(struct thread *thread, int cpu_id) { struct cpu_local_var *v = get_cpu_local_var(cpu_id); - list_add_tail(&proc->sched_list, &v->runq); + list_add_tail(&thread->sched_list, &v->runq); ++v->runq_len; v->flags |= CPU_FLAG_NEED_RESCHED; - proc->cpu_id = cpu_id; - //proc->ftn->status = PS_RUNNING; /* not set here */ + thread->cpu_id = cpu_id; + //thread->proc->status = PS_RUNNING; /* not set here */ get_cpu_local_var(cpu_id)->status = CPU_STATUS_RUNNING; dkprintf("runq_add_proc(): tid %d added to CPU[%d]'s runq\n", - proc->ftn->tid, cpu_id); + thread->tid, cpu_id); } -void runq_add_proc(struct process *proc, int cpu_id) +void runq_add_thread(struct thread *thread, int cpu_id) { struct cpu_local_var *v = get_cpu_local_var(cpu_id); unsigned long irqstate; irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - __runq_add_proc(proc, cpu_id); + __runq_add_thread(thread, cpu_id); ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - create_proc_procfs_files(proc->ftn->pid, cpu_id); + create_proc_procfs_files(thread->proc->pid, cpu_id); /* Kick scheduler */ if (cpu_id != ihk_mc_get_processor_id()) @@ -2541,71 +2772,139 @@ void runq_add_proc(struct process *proc, int cpu_id) } /* NOTE: shouldn't remove a running process! */ -void runq_del_proc(struct process *proc, int cpu_id) +void runq_del_thread(struct thread *thread, int cpu_id) { struct cpu_local_var *v = get_cpu_local_var(cpu_id); unsigned long irqstate; - + irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_del(&proc->sched_list); + list_del(&thread->sched_list); --v->runq_len; - + if (!v->runq_len) get_cpu_local_var(cpu_id)->status = CPU_STATUS_IDLE; ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); } -struct process * -findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate) +struct thread * +find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock) { - struct cpu_local_var *v; - struct process *p; - int i; - extern int num_processors; + struct thread *thread; + struct thread_hash *thash = cpu_local_var(resource_set)->thread_hash; + int hash = thread_hash(tid); - for(i = 0; i < num_processors; i++){ - v = get_cpu_local_var(i); - *savelock = &(v->runq_lock); - *irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid == pid && - (tid == -1 || p->ftn->tid == tid)){ - return p; - } + if(tid <= 0) + return NULL; + mcs_rwlock_reader_lock(&thash->lock[hash], lock); + list_for_each_entry(thread, &thash->list[hash], hash_list){ + if(thread->tid == tid){ + if(pid <= 0) + return thread; + if(pid == thread->proc->pid) + return thread; } - ihk_mc_spinlock_unlock(&(v->runq_lock), *irqstate); } + mcs_rwlock_reader_unlock(&thash->lock[hash], lock); return NULL; } void -process_unlock(void *savelock, unsigned long irqstate) +thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock) { - ihk_mc_spinlock_unlock((ihk_spinlock_t *)savelock, irqstate); + struct thread_hash *thash = cpu_local_var(resource_set)->thread_hash; + int hash; + + if(!thread) + return; + hash = thread_hash(thread->tid); + mcs_rwlock_reader_unlock(&thash->lock[hash], lock); +} + +struct process * +find_process(int pid, struct mcs_rwlock_node_irqsave *lock) +{ + struct process *proc; + struct process_hash *phash = cpu_local_var(resource_set)->process_hash; + int hash = process_hash(pid); + + if(pid <= 0) + return NULL; + mcs_rwlock_reader_lock(&phash->lock[hash], lock); + list_for_each_entry(proc, &phash->list[hash], hash_list){ + if(proc->pid == pid){ + if(pid == proc->pid) + return proc; + } + } + mcs_rwlock_reader_unlock(&phash->lock[hash], lock); + return NULL; +} + +void +process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock) +{ + struct process_hash *phash = cpu_local_var(resource_set)->process_hash; + int hash; + + if(!proc) + return; + hash = process_hash(proc->pid); + mcs_rwlock_reader_unlock(&phash->lock[hash], lock); } void debug_log(unsigned long arg) { - struct cpu_local_var *v; struct process *p; + struct thread *t; int i; - extern int num_processors; - unsigned long irqstate; + struct mcs_rwlock_node_irqsave lock; + struct resource_set *rset = cpu_local_var(resource_set); + struct process_hash *phash = rset->process_hash; + struct thread_hash *thash = rset->thread_hash; switch(arg){ case 1: - for(i = 0; i < num_processors; i++){ - v = get_cpu_local_var(i); - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid <= 0) - continue; - kprintf("cpu=%d pid=%d tid=%d status=%d\n", - i, p->ftn->pid, p->ftn->tid, p->ftn->status); + for(i = 0; i < HASH_SIZE; i++){ + __mcs_rwlock_reader_lock(&phash->lock[i], &lock); + list_for_each_entry(p, &phash->list[i], hash_list){ + kprintf("pid=%d ppid=%d status=%d\n", + p->pid, p->ppid_parent->pid, p->pstatus); + } + __mcs_rwlock_reader_unlock(&phash->lock[i], &lock); + } + break; + case 2: + for(i = 0; i < HASH_SIZE; i++){ + __mcs_rwlock_reader_lock(&thash->lock[i], &lock); + list_for_each_entry(t, &thash->list[i], hash_list){ + kprintf("cpu=%d pid=%d tid=%d status=%d offload=%d\n", + t->cpu_id, t->proc->pid, t->tid, + t->tstatus, t->in_syscall_offload); + } + __mcs_rwlock_reader_unlock(&thash->lock[i], &lock); + } + break; + case 3: + for(i = 0; i < HASH_SIZE; i++){ + if(phash->lock[i].node) + kprintf("phash[i] is locked\n"); + list_for_each_entry(p, &phash->list[i], hash_list){ + kprintf("pid=%d ppid=%d status=%d\n", + p->pid, p->ppid_parent->pid, p->pstatus); + } + } + break; + case 4: + for(i = 0; i < HASH_SIZE; i++){ + if(thash->lock[i].node) + kprintf("thash[i] is locked\n"); + list_for_each_entry(t, &thash->list[i], hash_list){ + kprintf("cpu=%d pid=%d tid=%d status=%d\n", + t->cpu_id, t->proc->pid, t->tid, + t->tstatus); } - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); } break; } diff --git a/kernel/procfs.c b/kernel/procfs.c index b1742b8c..321da1f6 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -257,14 +257,14 @@ static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, in void process_procfs_request(unsigned long rarg) { unsigned long parg, pbuf; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; struct procfs_read *r; struct ikc_scd_packet packet; int rosnum, ret, pid, tid, ans = -EIO, eof = 0; char *buf, *p; struct ihk_ikc_channel_desc *syscall_channel; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct mcs_rwlock_node_irqsave lock; unsigned long offset; int count; int npages; @@ -336,30 +336,31 @@ void process_procfs_request(unsigned long rarg) */ ret = sscanf(p, "%d/", &pid); if (ret == 1) { - if (pid != cpu_local_var(current)->ftn->pid) { + if (pid != cpu_local_var(current)->proc->pid) { /* We are not located in the proper cpu for some reason. */ dprintf("mismatched pid. We are %d, but requested pid is %d.\n", pid, cpu_local_var(current)->pid); tid = pid; /* main thread */ - proc = findthread_and_lock(pid, tid, &savelock, &irqstate); - if (!proc) { + thread = find_thread(pid, tid, &lock); + if (!thread) { dprintf("We cannot find the proper cpu for requested pid.\n"); goto end; } - else if (proc->cpu_id != ihk_mc_get_processor_id()) { + else if (thread->cpu_id != ihk_mc_get_processor_id()) { /* The target process has gone by migration. */ - r->newcpu = proc->cpu_id; - dprintf("expected cpu id is %d.\n", proc->cpu_id); - process_unlock(savelock, irqstate); + r->newcpu = thread->cpu_id; + dprintf("expected cpu id is %d.\n", thread->cpu_id); + thread_unlock(thread, &lock); ans = 0; goto end; } else { - process_unlock(savelock, irqstate); + thread_unlock(thread, &lock); /* 'proc' is not 'current' */ is_current = 0; } + proc = thread->proc; } } else if (!strcmp(p, "stat")) { /* "/proc/stat" */ @@ -431,7 +432,7 @@ void process_procfs_request(unsigned long rarg) ans = -EIO; goto end; } - ret = ihk_mc_pt_virt_to_phys(vm->page_table, + ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, (void *)offset, &pa); if(ret){ if(ans == 0) @@ -562,8 +563,8 @@ void process_procfs_request(unsigned long rarg) ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); while (start < end) { - *_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->page_table, start); - dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->ftn->pid, + *_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->address_space->page_table, start); + dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->proc->pid, start, *_buf); start += PAGE_SIZE; ++_buf; @@ -586,7 +587,6 @@ void process_procfs_request(unsigned long rarg) unsigned long lockedsize = 0; char tmp[1024]; int len; - struct fork_tree_node *ftn = proc->ftn; ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); list_for_each_entry(range, &proc->vm->vm_range_list, list) { @@ -599,8 +599,8 @@ void process_procfs_request(unsigned long rarg) "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n" "VmLck:\t%9lu kB\n", - ftn->ruid, ftn->euid, ftn->suid, ftn->fsuid, - ftn->rgid, ftn->egid, ftn->sgid, ftn->fsgid, + proc->ruid, proc->euid, proc->suid, proc->fsuid, + proc->rgid, proc->egid, proc->sgid, proc->fsgid, (lockedsize + 1023) >> 10); len = strlen(tmp); if (r->offset < len) { @@ -712,7 +712,7 @@ void process_procfs_request(unsigned long rarg) char tmp[1024]; int len; - if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){ + if ((thread = find_thread(pid, tid, &lock))){ dprintf("thread found! pid=%d tid=%d\n", pid, tid); /* * pid (comm) state ppid @@ -748,10 +748,10 @@ void process_procfs_request(unsigned long rarg) 0L, 0L, 0L, 0L, // rsslim... 0L, 0L, 0L, 0L, // kstkesp... 0L, 0L, 0L, 0L, // sigignore... - 0L, 0, proc->cpu_id, 0, // cnswap... + 0L, 0, thread->cpu_id, 0, // cnswap... 0, 0LL, 0L, 0L // policy... ); - process_unlock(savelock, irqstate); + thread_unlock(thread, &lock); dprintf("tmp=%s\n", tmp); len = strlen(tmp); diff --git a/kernel/syscall.c b/kernel/syscall.c index 282fd96c..efcae6cf 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -95,20 +95,20 @@ static char *syscall_name[] MCKERNEL_UNUSED = { }; void check_signal(unsigned long, void *, int); -void do_signal(long rc, void *regs, struct process *proc, struct sig_pending *pending, int num); -extern unsigned long do_kill(int pid, int tid, int sig, struct siginfo *info, int ptracecont); -extern struct sigpending *hassigpending(struct process *proc); +void do_signal(long rc, void *regs, struct thread *thread, struct sig_pending *pending, int num); +extern unsigned long do_kill(struct thread *thread, int pid, int tid, int sig, struct siginfo *info, int ptracecont); +extern struct sigpending *hassigpending(struct thread *thread); int copy_from_user(void *, const void *, size_t); int read_process_vm(struct process_vm *, void *, const void *, size_t); int copy_to_user(void *, const void *, size_t); int patch_process_vm(struct process_vm *, void *, const void *, size_t); -void do_setpgid(int, int); -extern long alloc_debugreg(struct process *proc); +extern long alloc_debugreg(struct thread *thread); extern int num_processors; extern unsigned long ihk_mc_get_ns_per_tsc(void); -static int ptrace_detach(int pid, int data); +extern int ptrace_detach(int pid, int data); +extern void debug_log(unsigned long); -int prepare_process_ranges_args_envs(struct process *proc, +int prepare_process_ranges_args_envs(struct thread *thread, struct program_load_desc *pn, struct program_load_desc *p, enum ihk_mc_pt_attribute attr, @@ -170,10 +170,10 @@ static void send_syscall(struct syscall_request *req, int cpu, int pid) #ifdef SYSCALL_BY_IKC packet.msg = SCD_MSG_SYSCALL_ONESIDE; packet.ref = cpu; - packet.pid = pid ? pid : cpu_local_var(current)->ftn->pid; + packet.pid = pid ? pid : cpu_local_var(current)->proc->pid; packet.arg = scp->request_rpa; dkprintf("send syscall, nr: %d, pid: %d\n", req->number, packet.pid); - + ret = ihk_ikc_send(syscall_channel, &packet, 0); if (ret < 0) { kprintf("ERROR: sending IKC msg, ret: %d\n", ret); @@ -192,15 +192,18 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) long rc; int islock = 0; unsigned long irqstate; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); - ++proc->in_syscall_offload; dkprintf("SC(%d)[%3d] sending syscall\n", - ihk_mc_get_processor_id(), - req->number); + ihk_mc_get_processor_id(), + req->number); - if(proc->nohost) // host is down - return -EPIPE; + if(req->number != __NR_exit_group){ + + if(thread->proc->nohost) // host is down + return -EPIPE; + ++thread->in_syscall_offload; + } irqstate = 0; /* for avoidance of warning */ if(req->number == __NR_exit_group || @@ -231,8 +234,8 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) if (res->status == STATUS_PAGE_FAULT) { dkprintf("STATUS_PAGE_FAULT in syscall, pid: %d\n", - cpu_local_var(current)->ftn->pid); - error = page_fault_process_vm(proc->vm, + cpu_local_var(current)->proc->pid); + error = page_fault_process_vm(thread->vm, (void *)res->fault_address, res->fault_reason|PF_POPULATE); @@ -255,7 +258,9 @@ long do_syscall(struct syscall_request *req, int cpu, int pid) ihk_mc_spinlock_unlock(&syscall_lock, irqstate); } - --proc->in_syscall_offload; + if(req->number != __NR_exit_group){ + --thread->in_syscall_offload; + } return rc; } @@ -267,51 +272,7 @@ long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx) SYSCALL_FOOTER; } -#if 0 -void sigchld_parent(struct process *parent, int status) -{ - struct process *proc = cpu_local_var(current); - int irqstate; - struct sig_pending *pending; - struct list_head *head; - __sigset_t mask; - - mask = __sigmask(SIGCHLD); - - head = &parent->sigpending; - irqstate = ihk_mc_spinlock_lock(&parent->sigpendinglock); - - list_for_each_entry(pending, head, list) { - if (pending->sigmask.__val[0] == mask) - break; - } - - if (&pending->list == head) { - pending = kmalloc(sizeof(struct sig_pending), IHK_MC_AP_NOWAIT); - - if (!pending) { - /* TODO: what to do here?? */ - panic("ERROR: not enough memory for signaling parent process!"); - } - - pending->sigmask.__val[0] = mask; - pending->info.si_signo = SIGCHLD; - pending->info._sifields._sigchld.si_pid = proc->pid; - pending->info._sifields._sigchld.si_status = status; - - list_add_tail(&pending->list, head); - proc->sigevent = 1; - } - /* TODO: There was a SIGCHLD pending */ - else { - - } - - ihk_mc_spinlock_unlock(&parent->sigpendinglock, irqstate); -} -#endif - -static int wait_zombie(struct process *proc, struct fork_tree_node *child, int *status, int options) { +static int wait_zombie(struct thread *thread, struct process *child, int *status, int options) { int ret; struct syscall_request request IHK_DMA_ALIGN; @@ -336,14 +297,14 @@ static int wait_zombie(struct process *proc, struct fork_tree_node *child, int * return ret; } -static int wait_stopped(struct process *proc, struct fork_tree_node *child, int *status, int options) +static int wait_stopped(struct thread *thread, struct process *child, int *status, int options) { dkprintf("wait_stopped,proc->pid=%d,child->pid=%d,options=%08x\n", - proc->ftn->pid, child->pid, options); + thread->proc->pid, child->pid, options); int ret; /* Copy exit_status created in do_signal */ - int *exit_status = child->status == PS_STOPPED ? + int *exit_status = child->pstatus == PS_STOPPED ? &child->group_exit_status : &child->exit_status; @@ -371,7 +332,7 @@ static int wait_stopped(struct process *proc, struct fork_tree_node *child, int return ret; } -static int wait_continued(struct process *proc, struct fork_tree_node *child, int *status, int options) { +static int wait_continued(struct thread *thread, struct process *child, int *status, int options) { int ret; if (status) { @@ -395,125 +356,84 @@ static int wait_continued(struct process *proc, struct fork_tree_node *child, in static int do_wait(int pid, int *status, int options, void *rusage) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *child_iter, *next; - int pgid = proc->ftn->pgid; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct process *child, *next; + int pgid = proc->pgid; int ret; struct waitq_entry waitpid_wqe; int empty = 1; int orgpid = pid; + struct mcs_rwlock_node lock; - dkprintf("wait4,proc->pid=%d,pid=%d\n", proc->ftn->pid, pid); + dkprintf("wait4,thread->pid=%d,pid=%d\n", thread->proc->pid, pid); rescan: pid = orgpid; - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - list_for_each_entry_safe(child_iter, next, &proc->ftn->children, siblings_list) { - - if (!(!!(options & __WCLONE) ^ (child_iter->termsig == SIGCHLD))) { + mcs_rwlock_writer_lock_noirq(&thread->proc->children_lock, &lock); + list_for_each_entry_safe(child, next, &proc->children_list, siblings_list) { + if (!(!!(options & __WCLONE) ^ (child->termsig == SIGCHLD))) { continue; } - ihk_mc_spinlock_lock_noirq(&child_iter->lock); - - if ((pid < 0 && -pid == child_iter->pgid) || + if ((pid < 0 && -pid == child->pgid) || pid == -1 || - (pid == 0 && pgid == child_iter->pgid) || - (pid > 0 && pid == child_iter->pid)) { + (pid == 0 && pgid == child->pgid) || + (pid > 0 && pid == child->pid)) { empty = 0; if((options & WEXITED) && - child_iter->status == PS_ZOMBIE) { - ret = wait_zombie(proc, child_iter, status, options); - if(ret == child_iter->pid) { + child->pstatus == PS_ZOMBIE) { + ret = wait_zombie(thread, child, status, options); + if(ret == child->pid){ + mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); if(!(options & WNOWAIT)){ - list_del(&child_iter->siblings_list); - release_fork_tree_node(child_iter); + release_process(child); } goto out_found; } } - if((child_iter->signal_flags & SIGNAL_STOP_STOPPED) && + if(!(child->ptrace & PT_TRACED) && + (child->signal_flags & SIGNAL_STOP_STOPPED) && (options & WUNTRACED)) { /* Not ptraced and in stopped state and WUNTRACED is specified */ - ret = wait_stopped(proc, child_iter, status, options); - if(ret == child_iter->pid) { + ret = wait_stopped(thread, child, status, options); + if(ret == child->pid){ if(!(options & WNOWAIT)){ - child_iter->signal_flags &= ~SIGNAL_STOP_STOPPED; + child->signal_flags &= ~SIGNAL_STOP_STOPPED; } + mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); goto out_found; } } - if((child_iter->signal_flags & SIGNAL_STOP_CONTINUED) && - (options & WCONTINUED)) { - ret = wait_continued(proc, child_iter, status, options); - if(ret == child_iter->pid) { + if((child->ptrace & PT_TRACED) && + (child->pstatus & (PS_STOPPED | PS_TRACED))) { + ret = wait_stopped(thread, child, status, options); + if(ret == child->pid){ if(!(options & WNOWAIT)){ - child_iter->signal_flags &= ~SIGNAL_STOP_CONTINUED; + child->signal_flags &= ~SIGNAL_STOP_STOPPED; } + mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); + goto out_found; + } + } + + if((child->signal_flags & SIGNAL_STOP_CONTINUED) && + (options & WCONTINUED)) { + ret = wait_continued(thread, child, status, options); + if(ret == child->pid){ + if(!(options & WNOWAIT)){ + child->signal_flags &= ~SIGNAL_STOP_CONTINUED; + } + mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); goto out_found; } } } - ihk_mc_spinlock_unlock_noirq(&child_iter->lock); - } - list_for_each_entry_safe(child_iter, next, &proc->ftn->ptrace_children, ptrace_siblings_list) { - - if (!(!!(options & __WCLONE) ^ (child_iter->termsig == SIGCHLD))) { - continue; - } - - ihk_mc_spinlock_lock_noirq(&child_iter->lock); - - if ((pid < 0 && -pid == child_iter->pgid) || - pid == -1 || - (pid == 0 && pgid == child_iter->pgid) || - (pid > 0 && pid == child_iter->pid)) { - - empty = 0; - - if((options & WEXITED) && - child_iter->status == PS_ZOMBIE) { - ret = wait_zombie(proc, child_iter, status, options); -// if(ret == child_iter->pid) { - if(!(options & WNOWAIT)){ - list_del(&child_iter->ptrace_siblings_list); - release_fork_tree_node(child_iter); - } - goto out_found; -// } - } - - if(child_iter->status & (PS_STOPPED | PS_TRACED)) { - /* ptraced and in stopped or trace-stopped state */ - ret = wait_stopped(proc, child_iter, status, options); -// if(ret == child_iter->pid) { - if(!(options & WNOWAIT)){ - child_iter->signal_flags &= ~SIGNAL_STOP_STOPPED; - } - goto out_found; -// } - } else { - /* ptraced and in running or sleeping state */ - } - - if((child_iter->signal_flags & SIGNAL_STOP_CONTINUED) && - (options & WCONTINUED)) { - ret = wait_continued(proc, child_iter, status, options); -// if(ret == child_iter->pid) { - if(!(options & WNOWAIT)){ - child_iter->signal_flags &= ~SIGNAL_STOP_CONTINUED; - } - goto out_found; -// } - } - } - - ihk_mc_spinlock_unlock_noirq(&child_iter->lock); } if (empty) { @@ -530,20 +450,19 @@ do_wait(int pid, int *status, int options, void *rusage) /* Sleep */ dkprintf("wait4,sleeping\n"); - waitq_init_entry(&waitpid_wqe, proc); - waitq_prepare_to_wait(&proc->ftn->waitpid_q, &waitpid_wqe, PS_INTERRUPTIBLE); + waitq_init_entry(&waitpid_wqe, thread); + waitq_prepare_to_wait(&thread->proc->waitpid_q, &waitpid_wqe, PS_INTERRUPTIBLE); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - if(hassigpending(proc)){ - waitq_finish_wait(&proc->ftn->waitpid_q, &waitpid_wqe); + mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); + if(hassigpending(thread)){ + waitq_finish_wait(&thread->proc->waitpid_q, &waitpid_wqe); return -EINTR; } - schedule(); dkprintf("wait4(): woken up\n"); - waitq_finish_wait(&proc->ftn->waitpid_q, &waitpid_wqe); + waitq_finish_wait(&thread->proc->waitpid_q, &waitpid_wqe); goto rescan; @@ -551,10 +470,10 @@ do_wait(int pid, int *status, int options, void *rusage) return ret; out_found: dkprintf("wait4,out_found\n"); - ihk_mc_spinlock_unlock_noirq(&child_iter->lock); + goto exit; out_notfound: dkprintf("wait4,out_notfound\n"); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + mcs_rwlock_writer_unlock_noirq(&thread->proc->children_lock, &lock); goto exit; } @@ -625,159 +544,228 @@ SYSCALL_DECLARE(waitid) return 0; } -static int ptrace_terminate_tracer(struct process *proc, struct fork_tree_node *tracer); - void -terminate(int rc, int sig, ihk_mc_user_context_t *ctx) +terminate(int rc, int sig) { - struct syscall_request request IHK_DMA_ALIGN; - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; - struct fork_tree_node *child, *next; - struct process *parent_owner; - int ntracee; - int *tracee = NULL; + struct resource_set *resource_set = cpu_local_var(resource_set); + struct thread *mythread = cpu_local_var(current); + struct thread *thread; + struct process *proc = mythread->proc; + struct process *child; + struct process *next; + struct process *pid1 = resource_set->pid1; + struct mcs_rwlock_node_irqsave lock; + struct mcs_rwlock_node updatelock; + struct mcs_rwlock_node childlock; + struct mcs_rwlock_node childlock1; int i; - int error; + int n; + int *ids = NULL; + struct syscall_request request IHK_DMA_ALIGN; - // check tracee and ptrace detach - ntracee = 0; - ihk_mc_spinlock_lock_noirq(&ftn->lock); - list_for_each_entry(child, &ftn->ptrace_children, ptrace_siblings_list) { - ntracee++; + // clean up threads + mcs_rwlock_reader_lock(&proc->threads_lock, &lock); // conflict clone + mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock); + if(proc->pstatus == PS_EXITED){ + mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + mythread->tstatus = PS_EXITED; + release_thread(mythread); + schedule(); + // no return + return; } - if(ntracee){ - tracee = kmalloc(sizeof(int) * ntracee, IHK_MC_AP_NOWAIT); + proc->exit_status = ((rc & 0x00ff) << 8) | (sig & 0xff); + proc->pstatus = PS_EXITED; + mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + + mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + list_del(&mythread->siblings_list); + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); + + mcs_rwlock_reader_lock(&proc->threads_lock, &lock); + n = 0; + list_for_each_entry(thread, &proc->threads_list, siblings_list) { + n++; + } + if(n){ + ids = kmalloc(sizeof(int) * n, IHK_MC_AP_NOWAIT); i = 0; - if(tracee){ - list_for_each_entry(child, &ftn->ptrace_children, ptrace_siblings_list) { - tracee[i] = child->pid; - i++; + if(ids){ + list_for_each_entry(thread, &proc->threads_list, siblings_list) { + if(thread != mythread){ + ids[i] = thread->tid; + i++; + } } } } - ihk_mc_spinlock_unlock_noirq(&ftn->lock); - if(tracee){ - for(i = 0; i < ntracee; i++){ - ptrace_detach(tracee[i], 0); + if(ids){ + for(i = 0; i < n; i++){ + do_kill(mythread, proc->pid, ids[i], SIGKILL, NULL, 0); } - kfree(tracee); + kfree(ids); + } + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + + for(;;){ + __mcs_rwlock_reader_lock(&proc->threads_lock, &lock); + if(list_empty(&proc->threads_list)){ + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + break; + } + __mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + cpu_pause(); } - dkprintf("terminate,pid=%d\n", proc->ftn->pid); - request.number = __NR_exit_group; - request.args[0] = ((rc & 0x00ff) << 8) | (sig & 0xff); + mcs_rwlock_writer_lock(&proc->threads_lock, &lock); + list_add_tail(&mythread->siblings_list, &proc->threads_list); + mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); + + delete_proc_procfs_files(proc->pid); + + release_process_vm(proc->vm); + while (ihk_atomic_read(&proc->vm->refcount) != 0) { + cpu_pause(); + } + + if (proc->saved_cmdline) { + kfree(proc->saved_cmdline); + } + + // check tracee and ptrace_detach + n = 0; + mcs_rwlock_reader_lock(&proc->children_lock, &lock); + list_for_each_entry(child, &proc->children_list, siblings_list) { + if(child->ptrace & PT_TRACED) + n++; + } + if(n){ + ids = kmalloc(sizeof(int) * n, IHK_MC_AP_NOWAIT); + i = 0; + if(ids){ + list_for_each_entry(child, &proc->children_list, siblings_list) { + if(child->ptrace & PT_TRACED){ + ids[i] = child->pid; + i++; + } + } + } + } + mcs_rwlock_reader_unlock(&proc->children_lock, &lock); + if(ids){ + for(i = 0; i < n; i++){ + ptrace_detach(ids[i], 0); + } + kfree(ids); + } + + // clean up children + for(i = 0; i < HASH_SIZE; i++){ + mcs_rwlock_writer_lock(&resource_set->process_hash->lock[i], + &lock); + list_for_each_entry_safe(child, next, + &resource_set->process_hash->list[i], + hash_list){ + mcs_rwlock_writer_lock_noirq(&child->update_lock, + &updatelock); + if(child->ppid_parent == proc && + child->pstatus == PS_ZOMBIE){ + list_del(&child->hash_list); + list_del(&child->siblings_list); + kfree(child); + } + else if(child->ppid_parent == proc){ + mcs_rwlock_writer_lock_noirq(&proc->children_lock, + &childlock); + mcs_rwlock_writer_lock_noirq(&pid1->children_lock, + &childlock1); + child->ppid_parent = pid1; + if(child->parent == proc){ + child->parent = pid1; + list_del(&child->siblings_list); + list_add_tail(&child->siblings_list, + &pid1->children_list); + } + else{ + list_del(&child->ptraced_siblings_list); + list_add_tail(&child->ptraced_siblings_list, + &pid1->ptraced_children_list); + } + mcs_rwlock_writer_unlock_noirq(&pid1->children_lock, + &childlock1); + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, + &childlock); + } + mcs_rwlock_writer_unlock_noirq(&child->update_lock, + &updatelock); + } + mcs_rwlock_writer_unlock(&resource_set->process_hash->lock[i], + &lock); + } + + dkprintf("terminate,pid=%d\n", proc->pid); #ifdef DCFA_KMOD do_mod_exit(rc); #endif - /* XXX: send SIGKILL to all threads in this process */ - - flush_process_memory(proc); /* temporary hack */ - if(!proc->nohost) - do_syscall(&request, ihk_mc_get_processor_id(), 0); - -#define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */ - - /* Do a "wait" on all children and detach owner process */ - ihk_mc_spinlock_lock_noirq(&ftn->lock); - list_for_each_entry_safe(child, next, &ftn->children, siblings_list) { - list_del(&child->siblings_list); - release_fork_tree_node(child); + // clean up memory + if(!proc->nohost){ + request.number = __NR_exit_group; + request.args[0] = proc->exit_status; + do_syscall(&request, ihk_mc_get_processor_id(), proc->pid); } - list_for_each_entry_safe(child, next, &ftn->ptrace_children, ptrace_siblings_list) { - list_del(&child->ptrace_siblings_list); - if (ptrace_terminate_tracer(child->owner, ftn)) { - release_fork_tree_node(child); - } + + // Send signal to parent + if (proc->parent == pid1) { + proc->pstatus = PS_ZOMBIE; + release_process(proc); } - ftn->owner = NULL; - ihk_mc_spinlock_unlock_noirq(&ftn->lock); + else { + proc->pstatus = PS_ZOMBIE; - /* Send signal to parent */ - if (ftn->parent) { - int parent_owner_pid; - ihk_mc_spinlock_lock_noirq(&ftn->lock); - ftn->exit_status = ((rc & 0x00ff) << 8) | (sig & 0xff); - ftn->status = PS_ZOMBIE; - ihk_mc_spinlock_unlock_noirq(&ftn->lock); - - /* Wake parent (if sleeping in wait4()) */ dkprintf("terminate,wakeup\n"); - waitq_wakeup(&ftn->parent->waitpid_q); /* Signal parent if still attached */ - ihk_mc_spinlock_lock_noirq(&ftn->parent->lock); - parent_owner = ftn->parent->owner; - parent_owner_pid = parent_owner ? ftn->parent->pid : 0; - ihk_mc_spinlock_unlock_noirq(&ftn->parent->lock); - if (parent_owner && (ftn->termsig != 0)) { + if (proc->termsig != 0) { struct siginfo info; + int error; memset(&info, '\0', sizeof info); info.si_signo = SIGCHLD; - info.si_code = sig? ((sig & 0x80)? CLD_DUMPED: CLD_KILLED): CLD_EXITED; - info._sifields._sigchld.si_pid = proc->ftn->pid; - info._sifields._sigchld.si_status = ((rc & 0x00ff) << 8) | (sig & 0xff); - dkprintf("terminate,kill %d,target pid=%d\n", - ftn->termsig, parent_owner_pid); - error = do_kill(ftn->parent->pid, -1, SIGCHLD, &info, 0); -/* - sigchld_parent(ftn->parent->owner, 0); -*/ + info.si_code = (proc->exit_status & 0x7f)? + ((proc->exit_status & 0x80)? + CLD_DUMPED: CLD_KILLED): CLD_EXITED; + info._sifields._sigchld.si_pid = proc->pid; + info._sifields._sigchld.si_status = proc->exit_status; + error = do_kill(NULL, proc->parent->pid, -1, SIGCHLD, &info, 0); dkprintf("terminate,klll %d,error=%d\n", - ftn->termsig, error); + proc->termsig, error); } + /* Wake parent (if sleeping in wait4()) */ + waitq_wakeup(&proc->parent->waitpid_q); + } - release_fork_tree_node(ftn->parent); - } else { - ihk_mc_spinlock_lock_noirq(&ftn->lock); - ftn->status = PS_EXITED; - ihk_mc_spinlock_unlock_noirq(&ftn->lock); - } - release_fork_tree_node(ftn); - release_process(proc); - + mythread->tstatus = PS_EXITED; + release_thread(mythread); schedule(); + // no return } -void terminate_host(int pid) +void +terminate_host(int pid) { - struct cpu_local_var *v; - struct process *p; - int i; - unsigned long irqstate; - extern int num_processors; - int *tids; - int n; - siginfo_t info; + struct process *proc; + struct mcs_rwlock_node_irqsave lock; - memset(&info, '\0', sizeof info); - info.si_signo = SIGKILL; - info.si_code = SI_KERNEL; - - tids = kmalloc(sizeof(int) * num_processors, IHK_MC_AP_NOWAIT); - if(!tids) + proc = find_process(pid, &lock); + if(!proc) return; - - for(n = 0, i = 0; i < num_processors; i++){ - v = get_cpu_local_var(i); - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid == pid){ - p->nohost = 1; - tids[n] = p->ftn->tid; - n++; - } - } - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - } - for(i = 0; i < n; i++){ - do_kill(pid, tids[i], SIGKILL, &info, 0); - } - - kfree(tids); + proc->nohost = 1; + process_unlock(proc, &lock); + do_kill(cpu_local_var(current), pid, -1, SIGKILL, NULL, 0); } void @@ -799,34 +787,8 @@ interrupt_syscall(int pid, int cpuid) SYSCALL_DECLARE(exit_group) { -#if 0 - SYSCALL_HEADER; -#endif - - dkprintf("sys_exit_group,pid=%d\n", cpu_local_var(current)->ftn->pid); - terminate((int)ihk_mc_syscall_arg0(ctx), 0, ctx); -#if 0 - struct process *proc = cpu_local_var(current); - -#ifdef DCFA_KMOD - do_mod_exit((int)ihk_mc_syscall_arg0(ctx)); -#endif - - /* XXX: send SIGKILL to all threads in this process */ - - do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0); - -#define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */ - proc->status = PS_ZOMBIE; - if (IS_DETACHED_PROCESS(proc)) { - /* release a reference for wait(2) */ - proc->status = PS_EXITED; - free_process(proc); - } - - schedule(); - -#endif + dkprintf("sys_exit_group,pid=%d\n", cpu_local_var(current)->proc->pid); + terminate((int)ihk_mc_syscall_arg0(ctx), 0); return 0; } @@ -875,7 +837,7 @@ static int do_munmap(void *addr, size_t len) int ro_freed; begin_free_pages_pending(); - error = remove_process_memory_range(cpu_local_var(current), + error = remove_process_memory_range(cpu_local_var(current)->vm, (intptr_t)addr, (intptr_t)addr+len, &ro_freed); // XXX: TLB flush flush_tlb(); @@ -895,8 +857,8 @@ static int do_munmap(void *addr, size_t len) static int search_free_space(size_t len, intptr_t hint, intptr_t *addrp) { - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; intptr_t addr; int error; struct vm_range *range; @@ -921,7 +883,7 @@ static int search_free_space(size_t len, intptr_t hint, intptr_t *addrp) goto out; } - range = lookup_process_memory_range(proc->vm, addr, addr+len); + range = lookup_process_memory_range(thread->vm, addr, addr+len); if (range == NULL) { break; } @@ -972,8 +934,8 @@ SYSCALL_DECLARE(mmap) const int fd = ihk_mc_syscall_arg4(ctx); const off_t off0 = ihk_mc_syscall_arg5(ctx); - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; intptr_t addr; size_t len; off_t off; @@ -1041,7 +1003,7 @@ SYSCALL_DECLARE(mmap) goto out2; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); if (flags & MAP_FIXED) { /* clear specified address range */ @@ -1188,11 +1150,11 @@ SYSCALL_DECLARE(mmap) } vrflags |= VRFLAG_PROT_TO_MAXPROT(PROT_TO_VR_FLAG(maxprot)); - error = add_process_memory_range(proc, addr, addr+len, phys, vrflags, memobj, off); + error = add_process_memory_range(thread->vm, addr, addr+len, phys, vrflags, memobj, off); if (error) { ekprintf("sys_mmap:add_process_memory_range" "(%p,%lx,%lx,%lx,%lx) failed %d\n", - proc, addr, addr+len, + thread->vm, addr, addr+len, virt_to_phys(p), vrflags, error); goto out; } @@ -1206,14 +1168,14 @@ out: if (ro_vma_mapped) { (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); } - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); if (!error && populated_mapping) { - error = populate_process_memory(proc, (void *)addr, len); + error = populate_process_memory(thread->vm, (void *)addr, len); if (error) { ekprintf("sys_mmap:populate_process_memory" "(%p,%p,%lx) failed %d\n", - proc, (void *)addr, len, error); + thread->vm, (void *)addr, len, error); /* * In this case, * the mapping established by this call should be unmapped @@ -1247,8 +1209,8 @@ SYSCALL_DECLARE(munmap) { const uintptr_t addr = ihk_mc_syscall_arg0(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; size_t len; int error; @@ -1266,9 +1228,9 @@ SYSCALL_DECLARE(munmap) goto out; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); error = do_munmap((void *)addr, len); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); out: dkprintf("[%d]sys_munmap(%lx,%lx): %d\n", @@ -1281,8 +1243,8 @@ SYSCALL_DECLARE(mprotect) const intptr_t start = ihk_mc_syscall_arg0(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx); const int prot = ihk_mc_syscall_arg2(ctx); - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; size_t len; intptr_t end; struct vm_range *first; @@ -1316,38 +1278,9 @@ SYSCALL_DECLARE(mprotect) return 0; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); -#if 0 - /* check contiguous map */ - first = NULL; - for (addr = start; addr < end; addr = range->end) { - if (first == NULL) { - range = lookup_process_memory_range(proc->vm, start, start+PAGE_SIZE); - first = range; - } - else { - range = next_process_memory_range(proc->vm, range); - } - - if ((range == NULL) || (addr < range->start)) { - /* not contiguous */ - ekprintf("sys_mprotect(%lx,%lx,%x):not contiguous\n", - start, len0, prot); - error = -ENOMEM; - goto out; - } - - if (range->flag & (VR_REMOTE | VR_RESERVED | VR_IO_NOCACHE)) { - ekprintf("sys_mprotect(%lx,%lx,%x):cannot change\n", - start, len0, prot); - error = -EINVAL; - goto out; - } - } -#else - first = lookup_process_memory_range(proc->vm, start, start+PAGE_SIZE); -#endif + first = lookup_process_memory_range(thread->vm, start, start+PAGE_SIZE); /* do the mprotect */ changed = NULL; @@ -1356,7 +1289,7 @@ SYSCALL_DECLARE(mprotect) range = first; } else { - range = next_process_memory_range(proc->vm, changed); + range = next_process_memory_range(thread->vm, changed); } if ((range == NULL) || (addr < range->start)) { @@ -1383,7 +1316,7 @@ SYSCALL_DECLARE(mprotect) } if (range->start < addr) { - error = split_process_memory_range(proc, range, addr, &range); + error = split_process_memory_range(thread->vm, range, addr, &range); if (error) { ekprintf("sys_mprotect(%lx,%lx,%x):split failed. %d\n", start, len0, prot, error); @@ -1391,7 +1324,7 @@ SYSCALL_DECLARE(mprotect) } } if (end < range->end) { - error = split_process_memory_range(proc, range, end, NULL); + error = split_process_memory_range(thread->vm, range, end, NULL); if (error) { ekprintf("sys_mprotect(%lx,%lx,%x):split failed. %d\n", start, len0, prot, error); @@ -1403,7 +1336,7 @@ SYSCALL_DECLARE(mprotect) ro_changed = 1; } - error = change_prot_process_memory_range(proc, range, protflags); + error = change_prot_process_memory_range(thread->vm, range, protflags); if (error) { ekprintf("sys_mprotect(%lx,%lx,%x):change failed. %d\n", start, len0, prot, error); @@ -1414,7 +1347,7 @@ SYSCALL_DECLARE(mprotect) changed = range; } else { - error = join_process_memory_range(proc, changed, range); + error = join_process_memory_range(thread->vm, changed, range); if (error) { ekprintf("sys_mprotect(%lx,%lx,%x):join failed. %d\n", start, len0, prot, error); @@ -1435,7 +1368,7 @@ out: /* through */ } } - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); dkprintf("[%d]sys_mprotect(%lx,%lx,%x): %d\n", ihk_mc_get_processor_id(), start, len0, prot, error); return error; @@ -1467,7 +1400,7 @@ SYSCALL_DECLARE(brk) vrflag = VR_PROT_READ | VR_PROT_WRITE; vrflag |= VRFLAG_PROT_TO_MAXPROT(vrflag); ihk_mc_spinlock_lock_noirq(&cpu_local_var(current)->vm->memory_range_lock); - region->brk_end = extend_process_region(cpu_local_var(current), + region->brk_end = extend_process_region(cpu_local_var(current)->vm, region->brk_start, region->brk_end, address, vrflag); ihk_mc_spinlock_unlock_noirq(&cpu_local_var(current)->vm->memory_range_lock); dkprintf("SC(%d)[sys_brk] brk_end set to %lx\n", @@ -1481,46 +1414,35 @@ out: SYSCALL_DECLARE(getpid) { - return cpu_local_var(current)->ftn->pid; + return cpu_local_var(current)->proc->pid; } SYSCALL_DECLARE(getppid) { - struct process *proc = cpu_local_var(current); - int pid = 1; // fake init + struct thread *thread = cpu_local_var(current); - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - if (proc->ftn->ptrace & PT_TRACED) { - if (proc->ftn->ppid_parent) - pid = proc->ftn->ppid_parent->pid; - } else { - if (proc->ftn->parent) { - pid = proc->ftn->parent->pid; - } - } - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - return pid; + return thread->proc->ppid_parent->pid; } void -settid(struct process *proc, int mode, int newcpuid, int oldcpuid) +settid(struct thread *thread, int mode, int newcpuid, int oldcpuid) { ihk_mc_user_context_t ctx; unsigned long rc; ihk_mc_syscall_arg0(&ctx) = mode; - ihk_mc_syscall_arg1(&ctx) = proc->ftn->pid; + ihk_mc_syscall_arg1(&ctx) = thread->proc->pid; ihk_mc_syscall_arg2(&ctx) = newcpuid; ihk_mc_syscall_arg3(&ctx) = oldcpuid; rc = syscall_generic_forwarding(__NR_gettid, &ctx); if (mode != 2) { - proc->ftn->tid = rc; + thread->tid = rc; } } SYSCALL_DECLARE(gettid) { - return cpu_local_var(current)->ftn->tid; + return cpu_local_var(current)->tid; } long do_arch_prctl(unsigned long code, unsigned long address) @@ -1571,68 +1493,69 @@ SYSCALL_DECLARE(arch_prctl) ihk_mc_syscall_arg1(ctx)); } -extern void ptrace_report_signal(struct process *proc, int sig); -static int ptrace_report_exec(struct process *proc) +extern void ptrace_report_signal(struct thread *thread, int sig); +static int ptrace_report_exec(struct thread *thread) { - int ptrace = proc->ftn->ptrace; + int ptrace = thread->proc->ptrace; if (ptrace & (PT_TRACE_EXEC|PTRACE_O_TRACEEXEC)) { ihk_mc_kernel_context_t ctx; int sig = (SIGTRAP | (PTRACE_EVENT_EXEC << 8)); - memcpy(&ctx, &proc->ctx, sizeof ctx); - ptrace_report_signal(proc, sig); - memcpy(&proc->ctx, &ctx, sizeof ctx); + memcpy(&ctx, &thread->ctx, sizeof ctx); + ptrace_report_signal(thread, sig); + memcpy(&thread->ctx, &ctx, sizeof ctx); } return 0; } -static void ptrace_syscall_enter(struct process *proc) +static void ptrace_syscall_enter(struct thread *thread) { - int ptrace = proc->ftn->ptrace; + int ptrace = thread->proc->ptrace; if (ptrace & PT_TRACE_SYSCALL_ENTER) { int sig = (SIGTRAP | ((ptrace & PTRACE_O_TRACESYSGOOD) ? 0x80 : 0)); - ptrace_report_signal(proc, sig); - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - if (proc->ftn->ptrace & PT_TRACE_SYSCALL_ENTER) { - proc->ftn->ptrace |= PT_TRACE_SYSCALL_EXIT; + ptrace_report_signal(thread, sig); + // TODO(sira): フラグ設定を排他的に行う必要がある! + //?ihk_mc_spinlock_lock_noirq(&thread->proc->lock); + if (thread->proc->ptrace & PT_TRACE_SYSCALL_ENTER) { + thread->proc->ptrace |= PT_TRACE_SYSCALL_EXIT; } - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + //?ihk_mc_spinlock_unlock_noirq(&thread->proc->lock); } } -static void ptrace_syscall_exit(struct process *proc) +static void ptrace_syscall_exit(struct thread *thread) { - int ptrace = proc->ftn->ptrace; + int ptrace = thread->proc->ptrace; if (ptrace & PT_TRACE_SYSCALL_EXIT) { int sig = (SIGTRAP | ((ptrace & PTRACE_O_TRACESYSGOOD) ? 0x80 : 0)); - ptrace_report_signal(proc, sig); + ptrace_report_signal(thread, sig); } } -static int ptrace_check_clone_event(struct process *proc, int clone_flags) +static int ptrace_check_clone_event(struct thread *thread, int clone_flags) { int event = 0; if (clone_flags & CLONE_VFORK) { /* vfork */ - if (proc->ftn->ptrace & PTRACE_O_TRACEVFORK) { + if (thread->proc->ptrace & PTRACE_O_TRACEVFORK) { event = PTRACE_EVENT_VFORK; } - if (proc->ftn->ptrace & PTRACE_O_TRACEVFORKDONE) { + if (thread->proc->ptrace & PTRACE_O_TRACEVFORKDONE) { event = PTRACE_EVENT_VFORK_DONE; } } else if ((clone_flags & CSIGNAL) == SIGCHLD) { /* fork */ - if (proc->ftn->ptrace & PTRACE_O_TRACEFORK) { + if (thread->proc->ptrace & PTRACE_O_TRACEFORK) { event = PTRACE_EVENT_FORK; } } else { /* clone */ - if (proc->ftn->ptrace & PTRACE_O_TRACECLONE) { + if (thread->proc->ptrace & PTRACE_O_TRACECLONE) { event = PTRACE_EVENT_CLONE; } } @@ -1640,80 +1563,90 @@ static int ptrace_check_clone_event(struct process *proc, int clone_flags) return event; } -static int ptrace_report_clone(struct process *proc, struct process *new, int event) +// TODO(sira): 全体的にチェック必要 +static int ptrace_report_clone(struct thread *thread, struct thread *new, int event) { dkprintf("ptrace_report_clone,enter\n"); int error = 0; long rc; struct siginfo info; + mcs_rwlock_node_t lock; /* Save reason why stopped and process state for wait4() to reap */ - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - proc->ftn->exit_status = (SIGTRAP | (event << 8)); + // TODO(sira): フラグ設定を排他的に行う必要がある! + //?ihk_mc_spinlock_lock_noirq(&thread->proc->lock); + thread->proc->exit_status = (SIGTRAP | (event << 8)); /* Transition process state */ - proc->ftn->status = PS_TRACED; - proc->ftn->ptrace_eventmsg = new->ftn->tid; - proc->ftn->ptrace &= ~PT_TRACE_SYSCALL_MASK; - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + thread->proc->pstatus = PS_TRACED; + thread->tstatus = PS_TRACED; + thread->proc->ptrace_eventmsg = new->tid; + thread->proc->ptrace &= ~PT_TRACE_SYSCALL_MASK; + //?ihk_mc_spinlock_unlock_noirq(&thread->proc->lock); dkprintf("ptrace_report_clone,kill SIGCHLD\n"); - if (proc->ftn->parent) { + if (thread->proc->parent) { /* kill SIGCHLD */ - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - if (proc->ftn->parent->owner) { + // このロックは不要と思われる + //?ihk_mc_spinlock_lock_noirq(&thread->proc->parent->lock); + // 条件も不要 必ず親がある + if (thread->proc->parent) { memset(&info, '\0', sizeof info); info.si_signo = SIGCHLD; info.si_code = CLD_TRAPPED; - info._sifields._sigchld.si_pid = proc->ftn->pid; - info._sifields._sigchld.si_status = proc->ftn->exit_status; - rc = do_kill(proc->ftn->parent->pid, -1, SIGCHLD, &info, 0); + info._sifields._sigchld.si_pid = thread->proc->pid; + info._sifields._sigchld.si_status = thread->proc->exit_status; + rc = do_kill(cpu_local_var(current), thread->proc->parent->pid, -1, SIGCHLD, &info, 0); if(rc < 0) { dkprintf("ptrace_report_clone,do_kill failed\n"); } } - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); + //?ihk_mc_spinlock_unlock_noirq(&thread->proc->parent->lock); /* Wake parent (if sleeping in wait4()) */ - waitq_wakeup(&proc->ftn->parent->waitpid_q); + waitq_wakeup(&thread->proc->parent->waitpid_q); } if (event != PTRACE_EVENT_VFORK_DONE) { /* PTRACE_EVENT_FORK or PTRACE_EVENT_VFORK or PTRACE_EVENT_CLONE */ - struct fork_tree_node *child, *next; + struct process *child, *next; /* set ptrace features to new process */ - ihk_mc_spinlock_lock_noirq(&new->ftn->lock); + // このロックは不要と思われる + //?ihk_mc_spinlock_lock_noirq(&new->proc->lock); - new->ftn->ptrace = proc->ftn->ptrace; - new->ftn->ppid_parent = new->ftn->parent; /* maybe proc */ + new->proc->ptrace = thread->proc->ptrace; + new->proc->ppid_parent = new->proc->parent; /* maybe proc */ - if ((new->ftn->ptrace & PT_TRACED) && new->ptrace_debugreg == NULL) { + if ((new->proc->ptrace & PT_TRACED) && new->ptrace_debugreg == NULL) { alloc_debugreg(new); } - ihk_mc_spinlock_lock_noirq(&new->ftn->parent->lock); - list_for_each_entry_safe(child, next, &new->ftn->parent->children, siblings_list) { - if(child == new->ftn) { + mcs_rwlock_writer_lock_noirq(&new->proc->parent->children_lock, &lock); + list_for_each_entry_safe(child, next, &new->proc->parent->children_list, siblings_list) { + if(child == new->proc) { list_del(&child->siblings_list); goto found; } } panic("ptrace_report_clone: missing parent-child relationship."); found: - ihk_mc_spinlock_unlock_noirq(&new->ftn->parent->lock); + mcs_rwlock_writer_unlock_noirq(&new->proc->parent->children_lock, &lock); - new->ftn->parent = proc->ftn->parent; /* new ptracing parent */ + new->proc->parent = thread->proc->parent; /* new ptracing parent */ - ihk_mc_spinlock_lock_noirq(&new->ftn->parent->lock); - list_add_tail(&new->ftn->ptrace_siblings_list, &new->ftn->parent->ptrace_children); - ihk_mc_spinlock_unlock_noirq(&new->ftn->parent->lock); +/* TODO(sira): 作り直し + ihk_mc_spinlock_lock_noirq(&new->proc->parent->children_lock); + list_add_tail(&new->proc->ptrace_siblings_list, &new->proc->parent->ptrace_children); + ihk_mc_spinlock_unlock_noirq(&new->proc->parent->children_lock); +*/ /* trace and SIGSTOP */ - new->ftn->exit_status = SIGSTOP; - new->ftn->status = PS_TRACED; + new->proc->exit_status = SIGSTOP; + new->proc->pstatus = PS_TRACED; + new->tstatus = PS_TRACED; - ihk_mc_spinlock_unlock_noirq(&new->ftn->lock); + //?ihk_mc_spinlock_unlock_noirq(&new->proc->lock); } return error; @@ -1721,8 +1654,8 @@ found: static void munmap_all(void) { - struct process *proc = cpu_local_var(current); - struct process_vm *vm = proc->vm; + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; struct vm_range *range; struct vm_range *next; void *addr; @@ -1743,7 +1676,7 @@ static void munmap_all(void) ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); /* free vm_ranges which do_munmap() failed to remove. */ - free_process_memory_ranges(proc); + free_process_memory_ranges(thread->vm); return; } /* munmap_all() */ @@ -1764,9 +1697,10 @@ SYSCALL_DECLARE(execve) struct syscall_request request IHK_DMA_ALIGN; struct program_load_desc *desc; - struct process *proc = cpu_local_var(current); - struct process_vm *vm = proc->vm; + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; struct vm_range *range; + struct process *proc = thread->proc; ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); @@ -1891,7 +1825,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, unsigned long cursp) { int cpuid; - struct process *new; + struct thread *new; struct syscall_request request1 IHK_DMA_ALIGN; int ptrace_event = 0; @@ -1913,9 +1847,8 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, return -EAGAIN; } - new = clone_process(cpu_local_var(current), curpc, - newsp ? newsp : cursp, - clone_flags); + new = clone_thread(cpu_local_var(current), curpc, + newsp ? newsp : cursp, clone_flags); if (!new) { release_cpuid(cpuid); @@ -1925,14 +1858,14 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, cpu_set(cpuid, &new->vm->cpu_set, &new->vm->cpu_set_lock); if (clone_flags & CLONE_VM) { - new->ftn->pid = cpu_local_var(current)->ftn->pid; + new->proc->pid = cpu_local_var(current)->proc->pid; settid(new, 1, cpuid, -1); } /* fork() a new process on the host */ else { request1.number = __NR_fork; - new->ftn->pid = do_syscall(&request1, ihk_mc_get_processor_id(), 0); - if (new->ftn->pid == -1) { + new->proc->pid = do_syscall(&request1, ihk_mc_get_processor_id(), 0); + if (new->proc->pid == -1) { kprintf("ERROR: forking host process\n"); /* TODO: clean-up new */ @@ -1943,7 +1876,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, /* In a single threaded process TID equals to PID */ settid(new, 0, cpuid, -1); - dkprintf("fork(): new pid: %d\n", new->ftn->pid); + dkprintf("fork(): new pid: %d\n", new->proc->pid); /* clear user space PTEs and set new rpgtable so that consequent * page faults will look up the right mappings */ request1.number = __NR_munmap; @@ -1951,13 +1884,13 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, request1.args[1] = new->vm->region.user_end - new->vm->region.user_start; /* 3rd parameter denotes new rpgtable of host process */ - request1.args[2] = virt_to_phys(new->vm->page_table); - request1.args[3] = new->ftn->pid; + request1.args[2] = virt_to_phys(new->vm->address_space->page_table); + request1.args[3] = new->proc->pid; dkprintf("fork(): requesting PTE clear and rpgtable (0x%lx) update\n", request1.args[2]); - if (do_syscall(&request1, ihk_mc_get_processor_id(), new->ftn->pid)) { + if (do_syscall(&request1, ihk_mc_get_processor_id(), new->proc->pid)) { kprintf("ERROR: clearing PTEs in host process\n"); } } @@ -1966,7 +1899,7 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, dkprintf("clone_flags & CLONE_PARENT_SETTID: 0x%lX\n", parent_tidptr); - *(int*)parent_tidptr = new->ftn->pid; + *(int*)parent_tidptr = new->proc->pid; } if (clone_flags & CLONE_CHILD_CLEARTID) { @@ -1981,14 +1914,14 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, dkprintf("clone_flags & CLONE_CHILD_SETTID: 0x%lX\n", child_tidptr); - if (ihk_mc_pt_virt_to_phys(new->vm->page_table, + if (ihk_mc_pt_virt_to_phys(new->vm->address_space->page_table, (void *)child_tidptr, &phys)) { kprintf("ERROR: looking up physical addr for child process\n"); release_cpuid(cpuid); return -EFAULT; } - *((int*)phys_to_virt(phys)) = new->ftn->tid; + *((int*)phys_to_virt(phys)) = new->tid; } if (clone_flags & CLONE_SETTLS) { @@ -2004,25 +1937,32 @@ unsigned long do_fork(int clone_flags, unsigned long newsp, ihk_mc_syscall_ret(new->uctx) = 0; - if (cpu_local_var(current)->ftn->ptrace) { + if (cpu_local_var(current)->proc->ptrace) { ptrace_event = ptrace_check_clone_event(cpu_local_var(current), clone_flags); if (ptrace_event) { ptrace_report_clone(cpu_local_var(current), new, ptrace_event); } } - dkprintf("clone: kicking scheduler!,cpuid=%d pid=%d tid %d -> tid=%d\n", - cpuid, new->ftn->pid, - cpu_local_var(current)->ftn->tid, - new->ftn->tid); + new->tstatus = PS_RUNNING; + chain_thread(new); + if (!(clone_flags & CLONE_VM)) { + new->proc->pstatus = PS_RUNNING; + chain_process(new->proc); + } - runq_add_proc(new, cpuid); + dkprintf("clone: kicking scheduler!,cpuid=%d pid=%d tid %d -> tid=%d\n", + cpuid, new->proc->pid, + cpu_local_var(current)->tid, + new->tid); + + runq_add_thread(new, cpuid); if (ptrace_event) { schedule(); } - return new->ftn->tid; + return new->tid; } SYSCALL_DECLARE(vfork) @@ -2043,24 +1983,24 @@ SYSCALL_DECLARE(set_tid_address) cpu_local_var(current)->thread.clear_child_tid = (int*)ihk_mc_syscall_arg0(ctx); - return cpu_local_var(current)->ftn->pid; + return cpu_local_var(current)->proc->pid; } SYSCALL_DECLARE(kill) { int pid = ihk_mc_syscall_arg0(ctx); int sig = ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct siginfo info; int error; memset(&info, '\0', sizeof info); info.si_signo = sig; info.si_code = SI_USER; - info._sifields._kill.si_pid = proc->ftn->pid; + info._sifields._kill.si_pid = thread->proc->pid; dkprintf("sys_kill,enter,pid=%d,sig=%d\n", pid, sig); - error = do_kill(pid, -1, sig, &info, 0); + error = do_kill(thread, pid, -1, sig, &info, 0); dkprintf("sys_kill,returning,pid=%d,sig=%d,error=%d\n", pid, sig, error); return error; } @@ -2071,20 +2011,20 @@ SYSCALL_DECLARE(tgkill) int tgid = ihk_mc_syscall_arg0(ctx); int tid = ihk_mc_syscall_arg1(ctx); int sig = ihk_mc_syscall_arg2(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct siginfo info; memset(&info, '\0', sizeof info); info.si_signo = sig; info.si_code = SI_TKILL; - info._sifields._kill.si_pid = proc->ftn->pid; + info._sifields._kill.si_pid = thread->proc->pid; if(tid <= 0) return -EINVAL; if(tgid <= 0 && tgid != -1) return -EINVAL; - return do_kill(tgid, tid, sig, &info, 0); + return do_kill(thread, tgid, tid, sig, &info, 0); } int * @@ -2112,28 +2052,15 @@ do_setresuid() { int _buf[16]; int *buf; - struct process *proc = cpu_local_var(current); - int pid = proc->ftn->pid; - struct cpu_local_var *v; - struct process *p; - int i; - unsigned long irqstate; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; buf = getcred(_buf); - for(i = 0; i < num_processors; i++){ - v = get_cpu_local_var(i); - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid == pid){ - p->ftn->ruid = buf[0]; - p->ftn->euid = buf[1]; - p->ftn->suid = buf[2]; - p->ftn->fsuid = buf[3]; - } - } - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - } + proc->ruid = buf[0]; + proc->euid = buf[1]; + proc->suid = buf[2]; + proc->fsuid = buf[3]; } void @@ -2141,28 +2068,15 @@ do_setresgid() { int _buf[16]; int *buf; - struct process *proc = cpu_local_var(current); - int pid = proc->ftn->pid; - struct cpu_local_var *v; - struct process *p; - int i; - unsigned long irqstate; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; buf = getcred(_buf); - for(i = 0; i < num_processors; i++){ - v = get_cpu_local_var(i); - irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); - list_for_each_entry(p, &(v->runq), sched_list){ - if(p->ftn->pid == pid){ - p->ftn->rgid = buf[4]; - p->ftn->egid = buf[5]; - p->ftn->sgid = buf[6]; - p->ftn->fsgid = buf[7]; - } - } - ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - } + proc->rgid = buf[4]; + proc->egid = buf[5]; + proc->sgid = buf[6]; + proc->fsgid = buf[7]; } SYSCALL_DECLARE(setresuid) @@ -2261,33 +2175,33 @@ SYSCALL_DECLARE(setfsgid) SYSCALL_DECLARE(getuid) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; - return ftn->ruid; + return proc->ruid; } SYSCALL_DECLARE(geteuid) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; - return ftn->euid; + return proc->euid; } SYSCALL_DECLARE(getresuid) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; int *ruid = (int *)ihk_mc_syscall_arg0(ctx); int *euid = (int *)ihk_mc_syscall_arg1(ctx); int *suid = (int *)ihk_mc_syscall_arg2(ctx); - if(copy_to_user(ruid, &ftn->ruid, sizeof(int))) + if(copy_to_user(ruid, &proc->ruid, sizeof(int))) return -EFAULT; - if(copy_to_user(euid, &ftn->euid, sizeof(int))) + if(copy_to_user(euid, &proc->euid, sizeof(int))) return -EFAULT; - if(copy_to_user(suid, &ftn->suid, sizeof(int))) + if(copy_to_user(suid, &proc->suid, sizeof(int))) return -EFAULT; return 0; @@ -2295,33 +2209,33 @@ SYSCALL_DECLARE(getresuid) SYSCALL_DECLARE(getgid) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; - return ftn->rgid; + return proc->rgid; } SYSCALL_DECLARE(getegid) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; - return ftn->egid; + return proc->egid; } SYSCALL_DECLARE(getresgid) { - struct process *proc = cpu_local_var(current); - struct fork_tree_node *ftn = proc->ftn; + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; int *rgid = (int *)ihk_mc_syscall_arg0(ctx); int *egid = (int *)ihk_mc_syscall_arg1(ctx); int *sgid = (int *)ihk_mc_syscall_arg2(ctx); - if(copy_to_user(rgid, &ftn->rgid, sizeof(int))) + if(copy_to_user(rgid, &proc->rgid, sizeof(int))) return -EFAULT; - if(copy_to_user(egid, &ftn->egid, sizeof(int))) + if(copy_to_user(egid, &proc->egid, sizeof(int))) return -EFAULT; - if(copy_to_user(sgid, &ftn->sgid, sizeof(int))) + if(copy_to_user(sgid, &proc->sgid, sizeof(int))) return -EFAULT; return 0; @@ -2331,25 +2245,24 @@ SYSCALL_DECLARE(setpgid) { int pid = ihk_mc_syscall_arg0(ctx); int pgid = ihk_mc_syscall_arg1(ctx); + struct thread *thread = cpu_local_var(current); + struct process *proc = thread->proc; + struct mcs_rwlock_node_irqsave lock; long rc; - struct process *proc = cpu_local_var(current); - ihk_spinlock_t *lock; - unsigned long irqstate = 0; - struct process *tproc; if(pid == 0) - pid = proc->ftn->pid; + pid = proc->pid; if(pgid == 0) pgid = pid; - if(proc->ftn->pid != pid){ - tproc = findthread_and_lock(pid, pid, &lock, &irqstate); - if(tproc){ - if(tproc->execed){ - process_unlock(lock, irqstate); + if(proc->pid != pid){ + proc = find_process(pid, &lock); + if(proc){ + if(proc->execed){ + process_unlock(proc, &lock); return -EACCES; } - process_unlock(lock, irqstate); + process_unlock(proc, &lock); } else return -ESRCH; @@ -2357,7 +2270,11 @@ SYSCALL_DECLARE(setpgid) rc = syscall_generic_forwarding(__NR_setpgid, ctx); if(rc == 0){ - do_setpgid(pid, pgid); + proc = find_process(pid, &lock); + if(proc){ + proc->pgid = pgid; + process_unlock(proc, &lock); + } } return rc; } @@ -2370,18 +2287,18 @@ SYSCALL_DECLARE(set_robust_list) int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct k_sigaction *k; long irqstate; ihk_mc_user_context_t ctx0; - irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); - k = proc->sighandler->action + sig - 1; + irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); + k = thread->sigcommon->action + sig - 1; if(oact) memcpy(oact, k, sizeof(struct k_sigaction)); if(act) memcpy(k, act, sizeof(struct k_sigaction)); - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); if(act){ ihk_mc_syscall_arg0(&ctx0) = sig; @@ -2396,13 +2313,13 @@ SYSCALL_DECLARE(close) { int fd = ihk_mc_syscall_arg0(ctx); int rc; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct sigfd *sfd; struct sigfd *sb; long irqstate; - irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); - for(sfd = proc->sighandler->sigfd, sb = NULL; sfd; sb = sfd, sfd = sfd->next) + irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); + for(sfd = thread->sigcommon->sigfd, sb = NULL; sfd; sb = sfd, sfd = sfd->next) if(sfd->fd == fd) break; if(sfd){ @@ -2410,8 +2327,8 @@ SYSCALL_DECLARE(close) if(sb) sb->next = sfd->next; else - proc->sighandler->sigfd = sfd->next; - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + thread->sigcommon->sigfd = sfd->next; + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); request.number = __NR_signalfd4; request.args[0] = 1; request.args[1] = sfd->fd; @@ -2419,7 +2336,7 @@ SYSCALL_DECLARE(close) rc = do_syscall(&request, ihk_mc_get_processor_id(), 0); } else{ - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); rc = syscall_generic_forwarding(__NR_close, ctx); } return rc; @@ -2431,8 +2348,7 @@ SYSCALL_DECLARE(rt_sigprocmask) const sigset_t *set = (const sigset_t *)ihk_mc_syscall_arg1(ctx); sigset_t *oldset = (sigset_t *)ihk_mc_syscall_arg2(ctx); size_t sigsetsize = (size_t)ihk_mc_syscall_arg3(ctx); - struct process *proc = cpu_local_var(current); - int flag; + struct thread *thread = cpu_local_var(current); __sigset_t wsig; ihk_mc_user_context_t ctx0; @@ -2445,9 +2361,8 @@ SYSCALL_DECLARE(rt_sigprocmask) how != SIG_SETMASK) return -EINVAL; - flag = ihk_mc_spinlock_lock(&proc->sighandler->lock); if(oldset){ - wsig = proc->sigmask.__val[0]; + wsig = thread->sigmask.__val[0]; if(copy_to_user(oldset->__val, &wsig, sizeof wsig)) goto fault; } @@ -2456,24 +2371,24 @@ SYSCALL_DECLARE(rt_sigprocmask) goto fault; switch(how){ case SIG_BLOCK: - proc->sigmask.__val[0] |= wsig; + thread->sigmask.__val[0] |= wsig; break; case SIG_UNBLOCK: - proc->sigmask.__val[0] &= ~wsig; + thread->sigmask.__val[0] &= ~wsig; break; case SIG_SETMASK: - proc->sigmask.__val[0] = wsig; + thread->sigmask.__val[0] = wsig; break; } } - wsig = proc->sigmask.__val[0]; - ihk_mc_spinlock_unlock(&proc->sighandler->lock, flag); + thread->sigmask.__val[0] &= ~__sigmask(SIGKILL); + thread->sigmask.__val[0] &= ~__sigmask(SIGSTOP); + wsig = thread->sigmask.__val[0]; ihk_mc_syscall_arg0(&ctx0) = wsig; syscall_generic_forwarding(__NR_rt_sigprocmask, &ctx0); return 0; fault: - ihk_mc_spinlock_unlock(&proc->sighandler->lock, flag); return -EFAULT; } @@ -2484,23 +2399,23 @@ SYSCALL_DECLARE(rt_sigpending) struct list_head *head; ihk_spinlock_t *lock; __sigset_t w = 0; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); sigset_t *set = (sigset_t *)ihk_mc_syscall_arg0(ctx); size_t sigsetsize = (size_t)ihk_mc_syscall_arg1(ctx); if (sigsetsize > sizeof(sigset_t)) return -EINVAL; - lock = &proc->sigshared->lock; - head = &proc->sigshared->sigpending; + lock = &thread->sigcommon->lock; + head = &thread->sigcommon->sigpending; flag = ihk_mc_spinlock_lock(lock); list_for_each_entry(pending, head, list){ w |= pending->sigmask.__val[0]; } ihk_mc_spinlock_unlock(lock, flag); - lock = &proc->sigpendinglock; - head = &proc->sigpending; + lock = &thread->sigpendinglock; + head = &thread->sigpending; flag = ihk_mc_spinlock_lock(lock); list_for_each_entry(pending, head, list){ w |= pending->sigmask.__val[0]; @@ -2521,7 +2436,7 @@ SYSCALL_DECLARE(signalfd) SYSCALL_DECLARE(signalfd4) { int fd = ihk_mc_syscall_arg0(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct sigfd *sfd; long irqstate; sigset_t *maskp = (sigset_t *)ihk_mc_syscall_arg1(ctx);; @@ -2536,10 +2451,10 @@ SYSCALL_DECLARE(signalfd4) if(flags & ~(SFD_NONBLOCK | SFD_CLOEXEC)) return -EINVAL; - irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); + irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); if(fd == -1){ struct syscall_request request IHK_DMA_ALIGN; - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); request.number = __NR_signalfd4; request.args[0] = 0; request.args[1] = flags; @@ -2551,21 +2466,21 @@ SYSCALL_DECLARE(signalfd4) if(!sfd) return -ENOMEM; sfd->fd = fd; - irqstate = ihk_mc_spinlock_lock(&proc->sighandler->lock); - sfd->next = proc->sighandler->sigfd; - proc->sighandler->sigfd = sfd; + irqstate = ihk_mc_spinlock_lock(&thread->sigcommon->lock); + sfd->next = thread->sigcommon->sigfd; + thread->sigcommon->sigfd = sfd; } else{ - for(sfd = proc->sighandler->sigfd; sfd; sfd = sfd->next) + for(sfd = thread->sigcommon->sigfd; sfd; sfd = sfd->next) if(sfd->fd == fd) break; if(!sfd){ - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); return -EINVAL; } } memcpy(&sfd->mask, &mask, sizeof mask); - ihk_mc_spinlock_unlock(&proc->sighandler->lock, irqstate); + ihk_mc_spinlock_unlock(&thread->sigcommon->lock, irqstate); return sfd->fd; } @@ -2612,11 +2527,11 @@ SYSCALL_DECLARE(rt_sigqueueinfo) if(copy_from_user(&info, winfo, sizeof info)) return -EFAULT; - return do_kill(pid, -1, sig, &info, 0); + return do_kill(cpu_local_var(current), pid, -1, sig, &info, 0); } static int -do_sigsuspend(struct process *proc, const sigset_t *set) +do_sigsuspend(struct thread *thread, const sigset_t *set) { __sigset_t wset; __sigset_t bset; @@ -2628,15 +2543,16 @@ do_sigsuspend(struct process *proc, const sigset_t *set) wset = set->__val[0]; wset &= ~__sigmask(SIGKILL); wset &= ~__sigmask(SIGSTOP); - bset = proc->sigmask.__val[0]; - proc->sigmask.__val[0] = wset; + bset = thread->sigmask.__val[0]; + thread->sigmask.__val[0] = wset; + thread->sigevent = 1; for(;;){ - while(proc->sigevent == 0); - proc->sigevent = 0; + while(thread->sigevent == 0) + cpu_pause(); - lock = &proc->sigshared->lock; - head = &proc->sigshared->sigpending; + lock = &thread->sigcommon->lock; + head = &thread->sigcommon->sigpending; flag = ihk_mc_spinlock_lock(lock); list_for_each_entry(pending, head, list){ if(!(pending->sigmask.__val[0] & wset)) @@ -2646,8 +2562,8 @@ do_sigsuspend(struct process *proc, const sigset_t *set) if(&pending->list == head){ ihk_mc_spinlock_unlock(lock, flag); - lock = &proc->sigpendinglock; - head = &proc->sigpending; + lock = &thread->sigpendinglock; + head = &thread->sigpending; flag = ihk_mc_spinlock_lock(lock); list_for_each_entry(pending, head, list){ if(!(pending->sigmask.__val[0] & wset)) @@ -2656,30 +2572,30 @@ do_sigsuspend(struct process *proc, const sigset_t *set) } if(&pending->list == head){ ihk_mc_spinlock_unlock(lock, flag); + thread->sigevent = 0; continue; } list_del(&pending->list); ihk_mc_spinlock_unlock(lock, flag); - proc->sigmask.__val[0] = bset; - do_signal(-EINTR, NULL, proc, pending, 0); + thread->sigmask.__val[0] = bset; + do_signal(-EINTR, NULL, thread, pending, 0); break; } -kprintf("return do_sigsuspend\n"); return -EINTR; } SYSCALL_DECLARE(pause) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); - return do_sigsuspend(proc, &proc->sigmask); + return do_sigsuspend(thread, &thread->sigmask); } SYSCALL_DECLARE(rt_sigsuspend) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); const sigset_t *set = (const sigset_t *)ihk_mc_syscall_arg0(ctx); size_t sigsetsize = (size_t)ihk_mc_syscall_arg1(ctx); sigset_t wset; @@ -2689,18 +2605,18 @@ SYSCALL_DECLARE(rt_sigsuspend) if(copy_from_user(&wset, set, sizeof wset)) return -EFAULT; - return do_sigsuspend(proc, &wset); + return do_sigsuspend(thread, &wset); } SYSCALL_DECLARE(sigaltstack) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); const stack_t *ss = (const stack_t *)ihk_mc_syscall_arg0(ctx); stack_t *oss = (stack_t *)ihk_mc_syscall_arg1(ctx); stack_t wss; if(oss) - if(copy_to_user(oss, &proc->sigstack, sizeof wss)) + if(copy_to_user(oss, &thread->sigstack, sizeof wss)) return -EFAULT; if(ss){ if(copy_from_user(&wss, ss, sizeof wss)) @@ -2708,15 +2624,15 @@ SYSCALL_DECLARE(sigaltstack) if(wss.ss_flags != 0 && wss.ss_flags != SS_DISABLE) return -EINVAL; if(wss.ss_flags == SS_DISABLE){ - proc->sigstack.ss_sp = NULL; - proc->sigstack.ss_flags = SS_DISABLE; - proc->sigstack.ss_size = 0; + thread->sigstack.ss_sp = NULL; + thread->sigstack.ss_flags = SS_DISABLE; + thread->sigstack.ss_size = 0; } else{ if(wss.ss_size < MINSIGSTKSZ) return -ENOMEM; - memcpy(&proc->sigstack, &wss, sizeof wss); + memcpy(&thread->sigstack, &wss, sizeof wss); } } @@ -2729,8 +2645,8 @@ SYSCALL_DECLARE(mincore) const size_t len = ihk_mc_syscall_arg1(ctx); uint8_t * const vec = (void *)ihk_mc_syscall_arg2(ctx); const uintptr_t end = start + len; - struct process *proc = cpu_local_var(current); - struct process_vm *vm = proc->vm; + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; void *up; uintptr_t addr; struct vm_range *range; @@ -2755,7 +2671,7 @@ SYSCALL_DECLARE(mincore) } ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)addr, NULL, NULL, NULL); + ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table, (void *)addr, NULL, NULL, NULL); /* * XXX: It might be necessary to consider whether this page is COW page or not. */ @@ -2782,8 +2698,8 @@ SYSCALL_DECLARE(madvise) const int advice = (int)ihk_mc_syscall_arg2(ctx); size_t len; uintptr_t end; - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; struct vm_range *first; uintptr_t addr; struct vm_range *range; @@ -2850,17 +2766,17 @@ SYSCALL_DECLARE(madvise) goto out2; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); /* check contiguous map */ first = NULL; range = NULL; /* for avoidance of warning */ for (addr = start; addr < end; addr = range->end) { if (first == NULL) { - range = lookup_process_memory_range(proc->vm, start, start+PAGE_SIZE); + range = lookup_process_memory_range(thread->vm, start, start+PAGE_SIZE); first = range; } else { - range = next_process_memory_range(proc->vm, range); + range = next_process_memory_range(thread->vm, range); } if ((range == NULL) || (addr < range->start)) { @@ -2898,7 +2814,7 @@ SYSCALL_DECLARE(madvise) error = 0; out: - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); out2: dkprintf("[%d]sys_madvise(%lx,%lx,%x): %d\n", @@ -2927,33 +2843,33 @@ struct shm_info the_shm_info = { 0, }; static uid_t geteuid(void) { struct syscall_request sreq IHK_DMA_ALIGN; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); sreq.number = __NR_geteuid; - return (uid_t)do_syscall(&sreq, ihk_mc_get_processor_id(), proc->ftn->pid); + return (uid_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid); } static gid_t getegid(void) { struct syscall_request sreq IHK_DMA_ALIGN; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); sreq.number = __NR_getegid; - return (gid_t)do_syscall(&sreq, ihk_mc_get_processor_id(), proc->ftn->pid); + return (gid_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid); } time_t time(void) { struct syscall_request sreq IHK_DMA_ALIGN; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); sreq.number = __NR_time; sreq.args[0] = (uintptr_t)NULL; - return (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), proc->ftn->pid); + return (time_t)do_syscall(&sreq, ihk_mc_get_processor_id(), thread->proc->pid); } pid_t getpid(void) { - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); - return proc->ftn->pid; + return thread->proc->pid; } static int make_shmid(struct shmobj *obj) @@ -3038,7 +2954,7 @@ SYSCALL_DECLARE(shmget) uid_t euid = geteuid(); gid_t egid = getegid(); time_t now = time(); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); int shmid; int error; struct shmid_ds ads; @@ -3126,7 +3042,7 @@ SYSCALL_DECLARE(shmget) ads.shm_perm.mode = shmflg & 0777; ads.shm_segsz = size; ads.shm_ctime = now; - ads.shm_cpid = proc->ftn->pid; + ads.shm_cpid = thread->proc->pid; error = shmobj_create_indexed(&ads, &obj); if (error) { @@ -3153,10 +3069,10 @@ SYSCALL_DECLARE(shmat) const int shmid = ihk_mc_syscall_arg0(ctx); void * const shmaddr = (void *)ihk_mc_syscall_arg1(ctx); const int shmflg = ihk_mc_syscall_arg2(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); size_t len; int error; - struct vm_regions *region = &proc->vm->region; + struct vm_regions *region = &thread->vm->region; intptr_t addr; int prot; int vrflags; @@ -3208,11 +3124,11 @@ SYSCALL_DECLARE(shmat) return -EACCES; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); if (addr) { - if (lookup_process_memory_range(proc->vm, addr, addr+len)) { - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + if (lookup_process_memory_range(thread->vm, addr, addr+len)) { + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat(%#x,%p,%#x):lookup_process_memory_range succeeded. -ENOMEM\n", shmid, shmaddr, shmflg); return -ENOMEM; @@ -3221,7 +3137,7 @@ SYSCALL_DECLARE(shmat) else { error = search_free_space(len, region->map_end, &addr); if (error) { - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat(%#x,%p,%#x):search_free_space failed. %d\n", shmid, shmaddr, shmflg, error); return error; @@ -3237,7 +3153,7 @@ SYSCALL_DECLARE(shmat) if (!(prot & PROT_WRITE)) { error = set_host_vma(addr, len, PROT_READ); if (error) { - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat(%#x,%p,%#x):set_host_vma failed. %d\n", shmid, shmaddr, shmflg, error); return error; @@ -3246,19 +3162,19 @@ SYSCALL_DECLARE(shmat) memobj_ref(&obj->memobj); - error = add_process_memory_range(proc, addr, addr+len, -1, vrflags, &obj->memobj, 0); + error = add_process_memory_range(thread->vm, addr, addr+len, -1, vrflags, &obj->memobj, 0); if (error) { if (!(prot & PROT_WRITE)) { (void)set_host_vma(addr, len, PROT_READ|PROT_WRITE); } memobj_release(&obj->memobj); - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat(%#x,%p,%#x):add_process_memory_range failed. %d\n", shmid, shmaddr, shmflg, error); return error; } - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); shmobj_list_unlock(); dkprintf("shmat:bump shm_nattach %p %d\n", obj, obj->ds.shm_nattch); @@ -3464,28 +3380,28 @@ SYSCALL_DECLARE(shmctl) SYSCALL_DECLARE(shmdt) { void * const shmaddr = (void *)ihk_mc_syscall_arg0(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); struct vm_range *range; int error; dkprintf("shmdt(%p)\n", shmaddr); - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); - range = lookup_process_memory_range(proc->vm, (uintptr_t)shmaddr, (uintptr_t)shmaddr+1); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); + range = lookup_process_memory_range(thread->vm, (uintptr_t)shmaddr, (uintptr_t)shmaddr+1); if (!range || (range->start != (uintptr_t)shmaddr) || !range->memobj || !(range->memobj->flags & MF_SHMDT_OK)) { - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); dkprintf("shmdt(%p): -EINVAL\n", shmaddr); return -EINVAL; } error = do_munmap((void *)range->start, (range->end - range->start)); if (error) { - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); dkprintf("shmdt(%p): %d\n", shmaddr, error); return error; } - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); dkprintf("shmdt(%p): 0\n", shmaddr); return 0; } /* sys_shmdt() */ @@ -3526,7 +3442,7 @@ SYSCALL_DECLARE(futex) dkprintf("futex,utime and FUTEX_WAIT_*, uaddr=%lx, []=%x\n", (unsigned long)uaddr, *uaddr); - if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->page_table, + if (ihk_mc_pt_virt_to_phys(cpu_local_var(current)->vm->address_space->page_table, (void *)&tv_now, &__phys)) { return -EFAULT; } @@ -3571,8 +3487,26 @@ SYSCALL_DECLARE(futex) SYSCALL_DECLARE(exit) { - struct process *proc = cpu_local_var(current); - dkprintf("sys_exit,pid=%d\n", proc->ftn->pid); + struct thread *thread = cpu_local_var(current); + struct thread *child; + struct process *proc = thread->proc; + struct mcs_rwlock_node_irqsave lock; + int nproc; + int exit_status = (int)ihk_mc_syscall_arg0(ctx); + + dkprintf("sys_exit,pid=%d\n", proc->pid); + + mcs_rwlock_reader_lock(&proc->threads_lock, &lock); + nproc = 0; + list_for_each_entry(child, &proc->threads_list, siblings_list){ + nproc++; + } + mcs_rwlock_reader_unlock(&proc->threads_lock, &lock); + + if(nproc == 1){ // process has only one thread + terminate(exit_status, 0); + return 0; + } #ifdef DCFA_KMOD do_mod_exit((int)ihk_mc_syscall_arg0(ctx)); @@ -3583,24 +3517,21 @@ SYSCALL_DECLARE(exit) */ /* If there is a clear_child_tid address set, clear it and wake it. * This unblocks any pthread_join() waiters. */ - if (proc->thread.clear_child_tid) { + if (thread->thread.clear_child_tid) { dkprintf("exit clear_child!\n"); - *proc->thread.clear_child_tid = 0; + *thread->thread.clear_child_tid = 0; barrier(); - futex((uint32_t *)proc->thread.clear_child_tid, + futex((uint32_t *)thread->thread.clear_child_tid, FUTEX_WAKE, 1, 0, NULL, 0, 0); } - - proc->ftn->status = PS_ZOMBIE; - - release_fork_tree_node(proc->ftn->parent); - release_fork_tree_node(proc->ftn); - //release_process(proc); + + thread->tstatus = PS_EXITED; + release_thread(thread); schedule(); - + return 0; } @@ -3660,7 +3591,7 @@ SYSCALL_DECLARE(setrlimit) int rc; int resource = ihk_mc_syscall_arg0(ctx); struct rlimit *rlm = (struct rlimit *)ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); int i; int mcresource; @@ -3683,7 +3614,7 @@ SYSCALL_DECLARE(setrlimit) if(i >= sizeof(rlimits) / sizeof(int)) return -EINVAL; - if(copy_from_user(proc->rlimit + mcresource, rlm, sizeof(struct rlimit))) + if(copy_from_user(thread->proc->rlimit + mcresource, rlm, sizeof(struct rlimit))) return -EFAULT; return 0; @@ -3693,7 +3624,7 @@ SYSCALL_DECLARE(getrlimit) { int resource = ihk_mc_syscall_arg0(ctx); struct rlimit *rlm = (struct rlimit *)ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); int i; int mcresource; @@ -3706,30 +3637,32 @@ SYSCALL_DECLARE(getrlimit) return -EINVAL; // TODO: check limit - if(copy_to_user(rlm, proc->rlimit + mcresource, sizeof(struct rlimit))) + if(copy_to_user(rlm, thread->proc->rlimit + mcresource, sizeof(struct rlimit))) return -EFAULT; return 0; } extern int ptrace_traceme(void); -extern void clear_single_step(struct process *proc); -extern void set_single_step(struct process *proc); +extern void clear_single_step(struct thread *thread); +extern void set_single_step(struct thread *thread); static int ptrace_wakeup_sig(int pid, long request, long data) { dkprintf("ptrace_wakeup_sig,pid=%d,data=%08x\n", pid, data); int error = 0; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; struct siginfo info; + struct mcs_rwlock_node_irqsave lock; + struct thread *thread = cpu_local_var(current); - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) { error = -ESRCH; goto out; } - ihk_mc_spinlock_unlock(savelock, irqstate); +// TODO: この位置で unlock してはいけない。 +// ここで unlock すると thread が生存していることが保証できなくなる。 + thread_unlock(child, &lock); if (data > 64 || data < 0) { error = -EINVAL; @@ -3740,7 +3673,7 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { case PTRACE_KILL: memset(&info, '\0', sizeof info); info.si_signo = SIGKILL; - error = do_kill(pid, -1, SIGKILL, &info, 0); + error = do_kill(thread, pid, -1, SIGKILL, &info, 0); if (error < 0) { goto out; } @@ -3751,14 +3684,13 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { if (request == PTRACE_SINGLESTEP) { set_single_step(child); } - ihk_mc_spinlock_lock_noirq(&child->ftn->lock); - child->ftn->ptrace &= ~PT_TRACE_SYSCALL_MASK; + //? ihk_mc_spinlock_lock_noirq(&child->proc->lock); + child->proc->ptrace &= ~PT_TRACE_SYSCALL_MASK; if (request == PTRACE_SYSCALL) { - child->ftn->ptrace |= PT_TRACE_SYSCALL_ENTER; + child->proc->ptrace |= PT_TRACE_SYSCALL_ENTER; } - ihk_mc_spinlock_unlock_noirq(&child->ftn->lock); + //? ihk_mc_spinlock_unlock_noirq(&child->proc->lock); if(data != 0 && data != SIGSTOP) { - struct process *proc; /* TODO: Tracing process replace the original signal with "data" */ @@ -3768,13 +3700,12 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { child->ptrace_sendsig = NULL; } else { - proc = cpu_local_var(current); memset(&info, '\0', sizeof info); info.si_signo = data; info.si_code = SI_USER; - info._sifields._kill.si_pid = proc->ftn->pid; + info._sifields._kill.si_pid = thread->proc->pid; } - error = do_kill(pid, -1, data, &info, 1); + error = do_kill(thread, pid, -1, data, &info, 1); if (error < 0) { goto out; } @@ -3784,30 +3715,29 @@ static int ptrace_wakeup_sig(int pid, long request, long data) { break; } - sched_wakeup_process(child, PS_TRACED | PS_STOPPED); + sched_wakeup_thread(child, PS_TRACED | PS_STOPPED); out: return error; } -extern long ptrace_read_user(struct process *proc, long addr, unsigned long *value); -extern long ptrace_write_user(struct process *proc, long addr, unsigned long value); +extern long ptrace_read_user(struct thread *thread, long addr, unsigned long *value); +extern long ptrace_write_user(struct thread *thread, long addr, unsigned long value); static long ptrace_pokeuser(int pid, long addr, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; if(addr > sizeof(struct user) - 8 || addr < 0) return -EFAULT; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if(child->ftn->status == PS_TRACED){ + if(child->proc->pstatus == PS_TRACED){ rc = ptrace_write_user(child, addr, (unsigned long)data); } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -3815,24 +3745,23 @@ static long ptrace_pokeuser(int pid, long addr, long data) static long ptrace_peekuser(int pid, long addr, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; unsigned long *p = (unsigned long *)data; if(addr > sizeof(struct user) - 8|| addr < 0) return -EFAULT; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if(child->ftn->status == PS_TRACED){ + if(child->proc->pstatus == PS_TRACED){ unsigned long value; rc = ptrace_read_user(child, addr, &value); if (rc == 0) { rc = copy_to_user(p, (char *)&value, sizeof(value)); } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -3841,14 +3770,13 @@ static long ptrace_getregs(int pid, long data) { struct user_regs_struct *regs = (struct user_regs_struct *)data; long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if(child->ftn->status == PS_TRACED){ + if(child->proc->pstatus == PS_TRACED){ struct user_regs_struct user_regs; long addr; unsigned long *p; @@ -3863,7 +3791,7 @@ static long ptrace_getregs(int pid, long data) rc = copy_to_user(regs, &user_regs, sizeof(struct user_regs_struct)); } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -3872,14 +3800,13 @@ static long ptrace_setregs(int pid, long data) { struct user_regs_struct *regs = (struct user_regs_struct *)data; long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if(child->ftn->status == PS_TRACED){ + if(child->proc->pstatus == PS_TRACED){ struct user_regs_struct user_regs; rc = copy_from_user(&user_regs, regs, sizeof(struct user_regs_struct)); if (rc == 0) { @@ -3895,7 +3822,7 @@ static long ptrace_setregs(int pid, long data) } } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -3903,14 +3830,13 @@ static long ptrace_setregs(int pid, long data) static long ptrace_arch_prctl(int pid, long code, long addr) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if (child->ftn->status == PS_TRACED) { + if (child->proc->pstatus == PS_TRACED) { switch (code) { case ARCH_GET_FS: { unsigned long value; @@ -3949,28 +3875,27 @@ static long ptrace_arch_prctl(int pid, long code, long addr) break; } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } -extern long ptrace_read_fpregs(struct process *proc, void *fpregs); -extern long ptrace_write_fpregs(struct process *proc, void *fpregs); +extern long ptrace_read_fpregs(struct thread *thread, void *fpregs); +extern long ptrace_write_fpregs(struct thread *thread, void *fpregs); static long ptrace_getfpregs(int pid, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if (child->ftn->status == PS_TRACED) { + if (child->proc->pstatus == PS_TRACED) { rc = ptrace_read_fpregs(child, (void *)data); } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -3978,35 +3903,33 @@ static long ptrace_getfpregs(int pid, long data) static long ptrace_setfpregs(int pid, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if (child->ftn->status == PS_TRACED) { + if (child->proc->pstatus == PS_TRACED) { rc = ptrace_write_fpregs(child, (void *)data); } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } -extern long ptrace_read_regset(struct process *proc, long type, struct iovec *iov); -extern long ptrace_write_regset(struct process *proc, long type, struct iovec *iov); +extern long ptrace_read_regset(struct thread *thread, long type, struct iovec *iov); +extern long ptrace_write_regset(struct thread *thread, long type, struct iovec *iov); static long ptrace_getregset(int pid, long type, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if (child->ftn->status == PS_TRACED) { + if (child->proc->pstatus == PS_TRACED) { struct iovec iov; rc = copy_from_user(&iov, (struct iovec *)data, sizeof(iov)); @@ -4018,7 +3941,7 @@ static long ptrace_getregset(int pid, long type, long data) &iov.iov_len, sizeof(iov.iov_len)); } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -4026,14 +3949,13 @@ static long ptrace_getregset(int pid, long type, long data) static long ptrace_setregset(int pid, long type, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if (child->ftn->status == PS_TRACED) { + if (child->proc->pstatus == PS_TRACED) { struct iovec iov; rc = copy_from_user(&iov, (struct iovec *)data, sizeof(iov)); @@ -4045,7 +3967,7 @@ static long ptrace_setregset(int pid, long type, long data) &iov.iov_len, sizeof(iov.iov_len)); } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -4053,15 +3975,14 @@ static long ptrace_setregset(int pid, long type, long data) static long ptrace_peektext(int pid, long addr, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; unsigned long *p = (unsigned long *)data; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if(child->ftn->status == PS_TRACED){ + if(child->proc->pstatus == PS_TRACED){ unsigned long value; rc = read_process_vm(child->vm, &value, (void *)addr, sizeof(value)); if (rc != 0) { @@ -4070,7 +3991,7 @@ static long ptrace_peektext(int pid, long addr, long data) rc = copy_to_user(p, &value, sizeof(value)); } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -4078,20 +3999,19 @@ static long ptrace_peektext(int pid, long addr, long data) static long ptrace_poketext(int pid, long addr, long data) { long rc = -EIO; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) return -ESRCH; - if(child->ftn->status == PS_TRACED){ + if(child->proc->pstatus == PS_TRACED){ rc = patch_process_vm(child->vm, (void *)addr, &data, sizeof(data)); if (rc) { dkprintf("ptrace_poketext: bad address 0x%llx\n", addr); } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -4099,9 +4019,8 @@ static long ptrace_poketext(int pid, long addr, long data) static int ptrace_setoptions(int pid, int flags) { int ret; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; /* Only supported options are enabled. * Following options are pretended to be supported for the time being: @@ -4123,18 +4042,19 @@ static int ptrace_setoptions(int pid, int flags) goto out; } - child = findthread_and_lock(pid, pid, &savelock, &irqstate); - if (!child || !child->ftn || !(child->ftn->ptrace & PT_TRACED)) { + child = find_thread(pid, pid, &lock); + if (!child || !child->proc || !(child->proc->ptrace & PT_TRACED)) { ret = -ESRCH; goto unlockout; } - child->ftn->ptrace &= ~PTRACE_O_MASK; /* PT_TRACE_EXEC remains */ - child->ftn->ptrace |= flags; + child->proc->ptrace &= ~PTRACE_O_MASK; /* PT_TRACE_EXEC remains */ + child->proc->ptrace |= flags; ret = 0; unlockout: - ihk_mc_spinlock_unlock(savelock, irqstate); + if(child) + thread_unlock(child, &lock); out: return ret; } @@ -4142,234 +4062,167 @@ out: static int ptrace_attach(int pid) { int error = 0; - struct process *proc; - struct fork_tree_node *child, *next; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *thread; + struct thread *mythread = cpu_local_var(current); + struct process *proc = mythread->proc; + struct process *child; + struct process *parent; + struct mcs_rwlock_node_irqsave lock; + struct mcs_rwlock_node childlock; + struct mcs_rwlock_node updatelock; struct siginfo info; - proc = findthread_and_lock(pid, pid, &savelock, &irqstate); - if (!proc) { + thread = find_thread(pid, pid, &lock); + if (!thread) { error = -ESRCH; goto out; } - ihk_mc_spinlock_unlock(savelock, irqstate); - dkprintf("ptrace_attach,pid=%d,proc->ftn->parent=%p\n", proc->ftn->pid, proc->ftn->parent); + child = thread->proc; + dkprintf("ptrace_attach,pid=%d,thread->proc->parent=%p\n", thread->proc->pid, thread->proc->parent); - if (proc->ftn->ptrace & PT_TRACED) { + mcs_rwlock_writer_lock_noirq(&child->update_lock, &updatelock); + if (thread->proc->ptrace & PT_TRACED) { + mcs_rwlock_writer_unlock_noirq(&child->update_lock, &updatelock); + thread_unlock(thread, &lock); error = -EPERM; goto out; } - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - if (proc->ftn->parent) { - dkprintf("ptrace_attach,parent->pid=%d\n", proc->ftn->parent->pid); + parent = child->parent; - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); + dkprintf("ptrace_attach,parent->pid=%d\n", parent->pid); - list_for_each_entry_safe(child, next, &proc->ftn->parent->children, siblings_list) { - if(child == proc->ftn) { - list_del(&child->siblings_list); - goto found; - } - } - kprintf("ptrace_attach,not found\n"); - error = -EPERM; - goto out_notfound; - found: - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - } else { - hold_fork_tree_node(proc->ftn); - } + mcs_rwlock_writer_lock_noirq(&parent->children_lock, &childlock); + list_del(&child->siblings_list); + list_add_tail(&child->ptraced_siblings_list, &parent->ptraced_children_list); + mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &childlock); - proc->ftn->ptrace = PT_TRACED | PT_TRACE_EXEC; - proc->ftn->ppid_parent = proc->ftn->parent; - proc->ftn->parent = cpu_local_var(current)->ftn; + mcs_rwlock_writer_lock_noirq(&proc->children_lock, &childlock); + list_add_tail(&child->siblings_list, &proc->children_list); + thread->proc->parent = proc; + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &childlock); - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - list_add_tail(&proc->ftn->ptrace_siblings_list, &proc->ftn->parent->ptrace_children); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); + child->ptrace = PT_TRACED | PT_TRACE_EXEC; - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + mcs_rwlock_writer_unlock_noirq(&thread->proc->update_lock, &updatelock); - if (proc->ptrace_debugreg == NULL) { - error = alloc_debugreg(proc); + if (thread->ptrace_debugreg == NULL) { + error = alloc_debugreg(thread); if (error < 0) { + thread_unlock(thread, &lock); goto out; } } - clear_single_step(proc); + clear_single_step(thread); + + thread_unlock(thread, &lock); memset(&info, '\0', sizeof info); info.si_signo = SIGSTOP; info.si_code = SI_USER; - info._sifields._kill.si_pid = cpu_local_var(current)->ftn->pid; - error = do_kill(pid, -1, SIGSTOP, &info, 0); + info._sifields._kill.si_pid = proc->pid; + error = do_kill(mythread, pid, -1, SIGSTOP, &info, 0); if (error < 0) { goto out; } - sched_wakeup_process(proc, PS_TRACED | PS_STOPPED); + sched_wakeup_thread(thread, PS_TRACED | PS_STOPPED); out: dkprintf("ptrace_attach,returning,error=%d\n", error); return error; - - out_notfound: - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - goto out; } -static int ptrace_detach(int pid, int data) +int ptrace_detach(int pid, int data) { int error = 0; - struct process *proc; - struct fork_tree_node *child, *next; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *thread; + struct thread *mythread = cpu_local_var(current); + struct process *proc = mythread->proc;; + struct process *child; + struct process *parent; + struct mcs_rwlock_node_irqsave lock; + struct mcs_rwlock_node childlock; + struct mcs_rwlock_node updatelock; struct siginfo info; - proc = findthread_and_lock(pid, pid, &savelock, &irqstate); - if (!proc) { - error = -ESRCH; - goto out; - } - ihk_mc_spinlock_unlock(savelock, irqstate); - - if (!(proc->ftn->ptrace & PT_TRACED) || - proc->ftn->parent != cpu_local_var(current)->ftn) { - error = -ESRCH; - goto out; - } - if (data > 64 || data < 0) { - error = -EIO; + return -EIO; + } + + thread = find_thread(pid, pid, &lock); + if (!thread) { + error = -ESRCH; goto out; } - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - - list_for_each_entry_safe(child, next, &proc->ftn->parent->ptrace_children, ptrace_siblings_list) { - if (child == proc->ftn) { - list_del(&child->ptrace_siblings_list); - goto found; - } - } - kprintf("ptrace_detach,not found\n"); - error = -EPERM; - goto out_notfound; -found: - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - - proc->ftn->ptrace = 0; - proc->ftn->parent = proc->ftn->ppid_parent; - proc->ftn->ppid_parent = NULL; - - if (proc->ftn->parent) { - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - list_add_tail(&proc->ftn->siblings_list, &proc->ftn->parent->children); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - } else { - release_fork_tree_node(proc->ftn); + mcs_rwlock_writer_lock_noirq(&proc->update_lock, &updatelock); + child = thread->proc; + parent = child->ppid_parent; + if (!(proc->ptrace & PT_TRACED) || proc->parent != proc) { + mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); + thread_unlock(thread, &lock); + error = -ESRCH; + goto out; } - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); + mcs_rwlock_writer_lock_noirq(&proc->children_lock, &childlock); + list_del(&proc->siblings_list); + mcs_rwlock_writer_unlock_noirq(&proc->children_lock, &childlock); - if (proc->ptrace_debugreg) { - kfree(proc->ptrace_debugreg); - proc->ptrace_debugreg = NULL; + mcs_rwlock_writer_lock_noirq(&parent->children_lock, &childlock); + list_del(&child->ptraced_siblings_list); + list_add_tail(&child->siblings_list, &parent->children_list); + child->parent = parent; + mcs_rwlock_writer_unlock_noirq(&parent->children_lock, &childlock); + + child->ptrace = 0; + + if (thread->ptrace_debugreg) { + kfree(thread->ptrace_debugreg); + thread->ptrace_debugreg = NULL; } - clear_single_step(proc); + clear_single_step(thread); + + thread_unlock(thread, &lock); if (data != 0) { memset(&info, '\0', sizeof info); info.si_signo = data; info.si_code = SI_USER; - info._sifields._kill.si_pid = cpu_local_var(current)->ftn->pid; - error = do_kill(pid, -1, data, &info, 1); + info._sifields._kill.si_pid = proc->pid; + error = do_kill(mythread, pid, -1, data, &info, 1); if (error < 0) { goto out; } } - sched_wakeup_process(proc, PS_TRACED | PS_STOPPED); + sched_wakeup_thread(thread, PS_TRACED | PS_STOPPED); out: return error; -out_notfound: - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - goto out; -} - -static int ptrace_terminate_tracer(struct process *proc, struct fork_tree_node *tracer) -{ - int error = 0; - - dkprintf("ptrace_terminate_tracer,pid=%d\n", proc->ftn->pid); - if (!(proc->ftn->ptrace & PT_TRACED) || - proc->ftn->parent != tracer) { - error = -ESRCH; - goto out; - } - - ihk_mc_spinlock_lock_noirq(&proc->ftn->lock); - - proc->ftn->ptrace = 0; - proc->ftn->parent = proc->ftn->ppid_parent; - proc->ftn->ppid_parent = NULL; - - if (proc->ftn->parent && proc->ftn->parent != tracer) { - /* re-connect real parent */ - ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock); - list_add_tail(&proc->ftn->siblings_list, &proc->ftn->parent->children); - ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock); - } else { - error = 1; /* will call release_fork_tree_node() */ - } - - /* if signal stopped, change to PS_STOPPED */ - if (proc->ftn->signal_flags & SIGNAL_STOP_STOPPED) { - proc->ftn->status = PS_STOPPED; - } - - ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); - - if (proc->ptrace_debugreg) { - kfree(proc->ptrace_debugreg); - proc->ptrace_debugreg = NULL; - } - - clear_single_step(proc); - -out: - dkprintf("ptrace_terminate_tracer,error=%d\n", error); - return error; } static long ptrace_geteventmsg(int pid, long data) { unsigned long *msg_p = (unsigned long *)data; long rc = -ESRCH; - struct process *child; - ihk_spinlock_t *savelock; - unsigned long irqstate; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) { return -ESRCH; } - if (child->ftn->status == PS_TRACED) { - if (copy_to_user(msg_p, &child->ftn->ptrace_eventmsg, sizeof(*msg_p))) { + if (child->proc->pstatus == PS_TRACED) { + if (copy_to_user(msg_p, &child->proc->ptrace_eventmsg, sizeof(*msg_p))) { rc = -EFAULT; } else { rc = 0; } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -4377,17 +4230,16 @@ static long ptrace_geteventmsg(int pid, long data) static long ptrace_getsiginfo(int pid, siginfo_t *data) { - ihk_spinlock_t *savelock; - unsigned long irqstate; - struct process *child; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; int rc = 0; - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) { return -ESRCH; } - if (child->ftn->status != PS_TRACED) { + if (child->proc->pstatus != PS_TRACED) { rc = -ESRCH; } else if (child->ptrace_recvsig) { @@ -4398,25 +4250,23 @@ ptrace_getsiginfo(int pid, siginfo_t *data) else { rc = -ESRCH; } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } static long ptrace_setsiginfo(int pid, siginfo_t *data) { - ihk_spinlock_t *savelock; - unsigned long irqstate; - struct process *child; + struct thread *child; + struct mcs_rwlock_node_irqsave lock; int rc = 0; -kprintf("ptrace_setsiginfo: sig=%d errno=%d code=%d\n", data->si_signo, data->si_errno, data->si_code); - child = findthread_and_lock(pid, pid, &savelock, &irqstate); + child = find_thread(pid, pid, &lock); if (!child) { return -ESRCH; } - if (child->ftn->status != PS_TRACED) { + if (child->proc->pstatus != PS_TRACED) { rc = -ESRCH; } else { @@ -4432,7 +4282,7 @@ kprintf("ptrace_setsiginfo: sig=%d errno=%d code=%d\n", data->si_signo, data->si rc = -EFAULT; } } - ihk_mc_spinlock_unlock(savelock, irqstate); + thread_unlock(child, &lock); return rc; } @@ -4552,7 +4402,7 @@ SYSCALL_DECLARE(ptrace) /* We do not have actual scheduling classes so we just make sure we store * policies and priorities in a POSIX/Linux complaint manner */ -static int setscheduler(struct process *proc, int policy, struct sched_param *param) +static int setscheduler(struct thread *thread, int policy, struct sched_param *param) { if ((policy == SCHED_FIFO || policy == SCHED_RR) && ((param->sched_priority < 1) || @@ -4565,8 +4415,8 @@ static int setscheduler(struct process *proc, int policy, struct sched_param *pa return -EINVAL; } - memcpy(&proc->sched_param, param, sizeof(*param)); - proc->sched_policy = policy; + memcpy(&thread->sched_param, param, sizeof(*param)); + thread->sched_policy = policy; return 0; } @@ -4580,9 +4430,8 @@ SYSCALL_DECLARE(sched_setparam) int pid = (int)ihk_mc_syscall_arg0(ctx); struct sched_param *uparam = (struct sched_param *)ihk_mc_syscall_arg1(ctx); struct sched_param param; - struct process *proc = cpu_local_var(current); - unsigned long irqstate = 0; - ihk_spinlock_t *lock; + struct thread *thread = cpu_local_var(current); + struct mcs_rwlock_node_irqsave lock; struct syscall_request request1 IHK_DMA_ALIGN; @@ -4593,14 +4442,16 @@ SYSCALL_DECLARE(sched_setparam) } if (pid == 0) - pid = proc->ftn->pid; + pid = thread->proc->pid; - if (proc->ftn->pid != pid) { - proc = findthread_and_lock(pid, pid, &lock, &irqstate); - if (!proc) { + if (thread->proc->pid != pid) { + thread = find_thread(pid, pid, &lock); + if (!thread) { return -ESRCH; } - process_unlock(lock, irqstate); + // TODO: unlock 場所のチェック + // 何をしようとしているのか理解 + thread_unlock(thread, &lock); /* Ask Linux about ownership.. */ request1.number = __NR_sched_setparam; @@ -4618,7 +4469,7 @@ SYSCALL_DECLARE(sched_setparam) return -EFAULT; } - return setscheduler(proc, proc->sched_policy, ¶m); + return setscheduler(thread, thread->sched_policy, ¶m); } SYSCALL_DECLARE(sched_getparam) @@ -4626,26 +4477,25 @@ SYSCALL_DECLARE(sched_getparam) int retval = 0; int pid = (int)ihk_mc_syscall_arg0(ctx); struct sched_param *param = (struct sched_param *)ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); - unsigned long irqstate = 0; - ihk_spinlock_t *lock; + struct thread *thread = cpu_local_var(current); + struct mcs_rwlock_node_irqsave lock; if (!param || pid < 0) { return -EINVAL; } if (pid == 0) - pid = proc->ftn->pid; + pid = thread->proc->pid; - if (proc->ftn->pid != pid) { - proc = findthread_and_lock(pid, pid, &lock, &irqstate); - if (!proc) { + if (thread->proc->pid != pid) { + thread = find_thread(pid, pid, &lock); + if (!thread) { return -ESRCH; } - process_unlock(lock, irqstate); + thread_unlock(thread, &lock); } - retval = copy_to_user(param, &proc->sched_param, sizeof(*param)) ? -EFAULT : 0; + retval = copy_to_user(param, &thread->sched_param, sizeof(*param)) ? -EFAULT : 0; return retval; } @@ -4657,9 +4507,8 @@ SYSCALL_DECLARE(sched_setscheduler) int policy = ihk_mc_syscall_arg1(ctx); struct sched_param *uparam = (struct sched_param *)ihk_mc_syscall_arg2(ctx); struct sched_param param; - struct process *proc = cpu_local_var(current); - unsigned long irqstate = 0; - ihk_spinlock_t *lock; + struct thread *thread = cpu_local_var(current); + struct mcs_rwlock_node_irqsave lock; struct syscall_request request1 IHK_DMA_ALIGN; @@ -4692,14 +4541,14 @@ SYSCALL_DECLARE(sched_setscheduler) } if (pid == 0) - pid = proc->ftn->pid; + pid = thread->proc->pid; - if (proc->ftn->pid != pid) { - proc = findthread_and_lock(pid, pid, &lock, &irqstate); - if (!proc) { + if (thread->proc->pid != pid) { + thread = find_thread(pid, pid, &lock); + if (!thread) { return -ESRCH; } - process_unlock(lock, irqstate); + thread_unlock(thread, &lock); /* Ask Linux about ownership.. */ request1.number = __NR_sched_setparam; @@ -4712,32 +4561,31 @@ SYSCALL_DECLARE(sched_setscheduler) } } - return setscheduler(proc, policy, ¶m); + return setscheduler(thread, policy, ¶m); } SYSCALL_DECLARE(sched_getscheduler) { int pid = (int)ihk_mc_syscall_arg0(ctx); - struct process *proc = cpu_local_var(current); - unsigned long irqstate = 0; - ihk_spinlock_t *lock; + struct thread *thread = cpu_local_var(current); + struct mcs_rwlock_node_irqsave lock; if (pid < 0) { return -EINVAL; } if (pid == 0) - pid = proc->ftn->pid; + pid = thread->proc->pid; - if (proc->ftn->pid != pid) { - proc = findthread_and_lock(pid, pid, &lock, &irqstate); - if (!proc) { + if (thread->proc->pid != pid) { + thread = find_thread(pid, pid, &lock); + if (!thread) { return -ESRCH; } - process_unlock(lock, irqstate); + thread_unlock(thread, &lock); } - return proc->sched_policy; + return thread->sched_policy; } SYSCALL_DECLARE(sched_get_priority_max) @@ -4784,28 +4632,27 @@ SYSCALL_DECLARE(sched_rr_get_interval) int pid = ihk_mc_syscall_arg0(ctx); struct timespec *utime = (struct timespec *)ihk_mc_syscall_arg1(ctx); struct timespec t; - struct process *proc = cpu_local_var(current); - unsigned long irqstate = 0; - ihk_spinlock_t *lock; + struct thread *thread = cpu_local_var(current); + struct mcs_rwlock_node_irqsave lock; int retval = 0; if (pid < 0) return -EINVAL; if (pid == 0) - pid = proc->ftn->pid; + pid = thread->proc->pid; - if (proc->ftn->pid != pid) { - proc = findthread_and_lock(pid, pid, &lock, &irqstate); - if (!proc) { + if (thread->proc->pid != pid) { + thread = find_thread(pid, pid, &lock); + if (!thread) { return -ESRCH; } - process_unlock(lock, irqstate); + thread_unlock(thread, &lock); } t.tv_sec = 0; t.tv_nsec = 0; - if (proc->sched_policy == SCHED_RR) { + if (thread->sched_policy == SCHED_RR) { t.tv_nsec = 10000; } @@ -4822,7 +4669,7 @@ SYSCALL_DECLARE(sched_setaffinity) cpu_set_t *u_cpu_set = (cpu_set_t *)ihk_mc_syscall_arg2(ctx); cpu_set_t k_cpu_set, cpu_set; - struct process *thread; + struct thread *thread; int cpu_id; int empty_set = 1; unsigned long irqstate; @@ -4845,7 +4692,7 @@ SYSCALL_DECLARE(sched_setaffinity) if (CPU_ISSET(cpu_id, &k_cpu_set)) { CPU_SET(cpu_id, &cpu_set); dkprintf("sched_setaffinity(): tid %d: setting target core %d\n", - cpu_local_var(current)->ftn->tid, cpu_id); + cpu_local_var(current)->tid, cpu_id); empty_set = 0; } } @@ -4856,7 +4703,7 @@ SYSCALL_DECLARE(sched_setaffinity) } if (tid == 0) { - tid = cpu_local_var(current)->ftn->tid; + tid = cpu_local_var(current)->tid; thread = cpu_local_var(current); cpu_id = ihk_mc_get_processor_id(); irqstate = ihk_mc_spinlock_lock(&get_cpu_local_var(cpu_id)->runq_lock); @@ -4871,7 +4718,7 @@ SYSCALL_DECLARE(sched_setaffinity) list_for_each_entry(thread, &get_cpu_local_var(cpu_id)->runq, sched_list) { - if (thread->ftn->pid && thread->ftn->tid == tid) { + if (thread->proc->pid && thread->tid == tid) { goto found; /* without unlocking runq_lock */ } } @@ -4888,12 +4735,12 @@ found: memcpy(&thread->cpu_set, &cpu_set, sizeof(cpu_set)); if (!CPU_ISSET(cpu_id, &thread->cpu_set)) { - hold_process(thread); + hold_thread(thread); ihk_mc_spinlock_unlock(&get_cpu_local_var(cpu_id)->runq_lock, irqstate); dkprintf("sched_setaffinity(): tid %d sched_request_migrate\n", - cpu_local_var(current)->ftn->tid, cpu_id); + cpu_local_var(current)->tid, cpu_id); sched_request_migrate(cpu_id, thread); - release_process(thread); + release_thread(thread); return 0; } else { @@ -4921,13 +4768,13 @@ SYSCALL_DECLARE(sched_getaffinity) len = MIN2(len, sizeof(k_cpu_set)); if(tid == 0) - tid = cpu_local_var(current)->ftn->tid; + tid = cpu_local_var(current)->tid; for (i = 0; i < num_processors && !found; i++) { - struct process *thread; + struct thread *thread; irqstate = ihk_mc_spinlock_lock(&get_cpu_local_var(i)->runq_lock); list_for_each_entry(thread, &get_cpu_local_var(i)->runq, sched_list) { - if (thread->ftn->pid && thread->ftn->tid == tid) { + if (thread->proc->pid && thread->tid == tid) { found = 1; memcpy(&k_cpu_set, &thread->cpu_set, sizeof(k_cpu_set)); break; @@ -5087,8 +4934,8 @@ SYSCALL_DECLARE(mlock) { const uintptr_t start0 = ihk_mc_syscall_arg0(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; uintptr_t start; size_t len; uintptr_t end; @@ -5124,17 +4971,17 @@ SYSCALL_DECLARE(mlock) goto out2; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); /* check contiguous map */ first = NULL; for (addr = start; addr < end; addr = range->end) { if (first == NULL) { - range = lookup_process_memory_range(proc->vm, start, start+PAGE_SIZE); + range = lookup_process_memory_range(thread->vm, start, start+PAGE_SIZE); first = range; } else { - range = next_process_memory_range(proc->vm, range); + range = next_process_memory_range(thread->vm, range); } if (!range || (addr < range->start)) { @@ -5165,7 +5012,7 @@ SYSCALL_DECLARE(mlock) range = first; } else { - range = next_process_memory_range(proc->vm, changed); + range = next_process_memory_range(thread->vm, changed); } if (!range || (addr < range->start)) { @@ -5180,7 +5027,7 @@ SYSCALL_DECLARE(mlock) } if (range->start < addr) { - error = split_process_memory_range(proc, range, addr, &range); + error = split_process_memory_range(thread->vm, range, addr, &range); if (error) { ekprintf("[%d]sys_mlock(%lx,%lx):split failed. " " [%lx-%lx) %lx %d\n", @@ -5191,7 +5038,7 @@ SYSCALL_DECLARE(mlock) } } if (end < range->end) { - error = split_process_memory_range(proc, range, end, NULL); + error = split_process_memory_range(thread->vm, range, end, NULL); if (error) { ekprintf("[%d]sys_mlock(%lx,%lx):split failed. " " [%lx-%lx) %lx %d\n", @@ -5208,7 +5055,7 @@ SYSCALL_DECLARE(mlock) changed = range; } else { - error = join_process_memory_range(proc, changed, range); + error = join_process_memory_range(thread->vm, changed, range); if (error) { dkprintf("[%d]sys_mlock(%lx,%lx):join failed. %d", ihk_mc_get_processor_id(), @@ -5229,10 +5076,10 @@ SYSCALL_DECLARE(mlock) error = 0; out: - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); if (!error) { - error = populate_process_memory(proc, (void *)start, len); + error = populate_process_memory(thread->vm, (void *)start, len); if (error) { ekprintf("sys_mlock(%lx,%lx):populate failed. %d\n", start0, len0, error); @@ -5262,8 +5109,8 @@ SYSCALL_DECLARE(munlock) { const uintptr_t start0 = ihk_mc_syscall_arg0(ctx); const size_t len0 = ihk_mc_syscall_arg1(ctx); - struct process *proc = cpu_local_var(current); - struct vm_regions *region = &proc->vm->region; + struct thread *thread = cpu_local_var(current); + struct vm_regions *region = &thread->vm->region; uintptr_t start; size_t len; uintptr_t end; @@ -5299,17 +5146,17 @@ SYSCALL_DECLARE(munlock) goto out2; } - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); /* check contiguous map */ first = NULL; for (addr = start; addr < end; addr = range->end) { if (first == NULL) { - range = lookup_process_memory_range(proc->vm, start, start+PAGE_SIZE); + range = lookup_process_memory_range(thread->vm, start, start+PAGE_SIZE); first = range; } else { - range = next_process_memory_range(proc->vm, range); + range = next_process_memory_range(thread->vm, range); } if (!range || (addr < range->start)) { @@ -5340,7 +5187,7 @@ SYSCALL_DECLARE(munlock) range = first; } else { - range = next_process_memory_range(proc->vm, changed); + range = next_process_memory_range(thread->vm, changed); } if (!range || (addr < range->start)) { @@ -5355,7 +5202,7 @@ SYSCALL_DECLARE(munlock) } if (range->start < addr) { - error = split_process_memory_range(proc, range, addr, &range); + error = split_process_memory_range(thread->vm, range, addr, &range); if (error) { ekprintf("[%d]sys_munlock(%lx,%lx):split failed. " " [%lx-%lx) %lx %d\n", @@ -5366,7 +5213,7 @@ SYSCALL_DECLARE(munlock) } } if (end < range->end) { - error = split_process_memory_range(proc, range, end, NULL); + error = split_process_memory_range(thread->vm, range, end, NULL); if (error) { ekprintf("[%d]sys_munlock(%lx,%lx):split failed. " " [%lx-%lx) %lx %d\n", @@ -5383,7 +5230,7 @@ SYSCALL_DECLARE(munlock) changed = range; } else { - error = join_process_memory_range(proc, changed, range); + error = join_process_memory_range(thread->vm, changed, range); if (error) { dkprintf("[%d]sys_munlock(%lx,%lx):join failed. %d", ihk_mc_get_processor_id(), @@ -5404,7 +5251,7 @@ SYSCALL_DECLARE(munlock) error = 0; out: - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); out2: dkprintf("[%d]sys_munlock(%lx,%lx): %d\n", ihk_mc_get_processor_id(), start0, len0, error); @@ -5414,7 +5261,7 @@ out2: SYSCALL_DECLARE(mlockall) { const int flags = ihk_mc_syscall_arg0(ctx); - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); uid_t euid = geteuid(); if (!flags || (flags & ~(MCL_CURRENT|MCL_FUTURE))) { @@ -5427,7 +5274,7 @@ SYSCALL_DECLARE(mlockall) return 0; } - if (proc->rlimit[MCK_RLIMIT_MEMLOCK].rlim_cur != 0) { + if (thread->proc->rlimit[MCK_RLIMIT_MEMLOCK].rlim_cur != 0) { kprintf("mlockall(0x%x):limits exists: ENOMEM\n", flags); return -ENOMEM; } @@ -5453,14 +5300,14 @@ SYSCALL_DECLARE(remap_file_pages) const uintptr_t start = start0 & PAGE_MASK; const uintptr_t end = start + size; const off_t off = (off_t)pgoff << PAGE_SHIFT; - struct process * const proc = cpu_local_var(current); + struct thread * const thread = cpu_local_var(current); struct vm_range *range; int er; int need_populate = 0; dkprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x)\n", start0, size, prot, pgoff, flags); - ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(&thread->vm->memory_range_lock); #define PGOFF_LIMIT ((off_t)1 << ((8*sizeof(off_t) - 1) - PAGE_SHIFT)) if ((size <= 0) || (size & (PAGE_SIZE - 1)) || (prot != 0) || (pgoff < 0) || (PGOFF_LIMIT <= pgoff) @@ -5473,7 +5320,7 @@ SYSCALL_DECLARE(remap_file_pages) goto out; } - range = lookup_process_memory_range(proc->vm, start, end); + range = lookup_process_memory_range(thread->vm, start, end); if (!range || (start < range->start) || (range->end < end) || (range->flag & VR_PRIVATE) || (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED)) @@ -5488,7 +5335,7 @@ SYSCALL_DECLARE(remap_file_pages) } range->flag |= VR_FILEOFF; - error = remap_process_memory_range(proc->vm, range, start, end, off); + error = remap_process_memory_range(thread->vm, range, start, end, off); if (error) { ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):" "remap failed %d\n", @@ -5502,11 +5349,11 @@ SYSCALL_DECLARE(remap_file_pages) } error = 0; out: - ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(&thread->vm->memory_range_lock); if (need_populate && (er = populate_process_memory( - proc, (void *)start, size))) { + thread->vm, (void *)start, size))) { ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):" "populate failed %d\n", start0, size, prot, pgoff, flags, er); @@ -5529,8 +5376,8 @@ SYSCALL_DECLARE(mremap) const ssize_t newsize = (newsize0 + PAGE_SIZE - 1) & PAGE_MASK; const uintptr_t oldstart = oldaddr; const uintptr_t oldend = oldstart + oldsize; - struct process *proc = cpu_local_var(current); - struct process_vm *vm = proc->vm; + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; int error; struct vm_range *range; int need_relocate; @@ -5661,7 +5508,7 @@ SYSCALL_DECLARE(mremap) if (range->memobj) { memobj_ref(range->memobj); } - error = add_process_memory_range(proc, newstart, newend, -1, + error = add_process_memory_range(thread->vm, newstart, newend, -1, range->flag, range->memobj, range->objoff + (oldstart - range->start)); if (error) { @@ -5682,7 +5529,7 @@ SYSCALL_DECLARE(mremap) if (oldsize > 0) { size = (oldsize < newsize)? oldsize: newsize; ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - error = move_pte_range(vm->page_table, vm, + error = move_pte_range(vm->address_space->page_table, vm, (void *)oldstart, (void *)newstart, size); ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); @@ -5722,7 +5569,7 @@ SYSCALL_DECLARE(mremap) out: ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); if (!error && (lckstart < lckend)) { - error = populate_process_memory(proc, (void *)lckstart, (lckend - lckstart)); + error = populate_process_memory(thread->vm, (void *)lckstart, (lckend - lckstart)); if (error) { ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" "populate failed. %d %#lx-%#lx\n", @@ -5746,8 +5593,8 @@ SYSCALL_DECLARE(msync) const size_t len = (len0 + PAGE_SIZE - 1) & PAGE_MASK; const uintptr_t start = start0; const uintptr_t end = start + len; - struct process *proc = cpu_local_var(current); - struct process_vm *vm = proc->vm; + struct thread *thread = cpu_local_var(current); + struct process_vm *vm = thread->vm; int error; uintptr_t addr; struct vm_range *range; @@ -5930,8 +5777,8 @@ static int do_process_vm_read_writev(int pid, size_t llen = 0, rlen = 0; size_t copied = 0; size_t to_copy; - struct process *lproc = cpu_local_var(current); - struct process *rproc, *p; + struct thread *lthread = cpu_local_var(current); + struct thread *rthread, *p; unsigned long rphys; unsigned long rpage_left; void *rva; @@ -5961,24 +5808,24 @@ static int do_process_vm_read_writev(int pid, /* Find remote process * XXX: are we going to have a hash table/function for this?? */ - rproc = NULL; + rthread = NULL; for (i = 0; i < num_processors; i++) { struct cpu_local_var *v = get_cpu_local_var(i); ihk_mc_spinlock_lock_noirq(&(v->runq_lock)); list_for_each_entry(p, &(v->runq), sched_list) { - if (p->ftn->pid == pid) { - rproc = p; + if (p->proc->pid == pid) { + rthread = p; break; } } ihk_mc_spinlock_unlock_noirq(&(v->runq_lock)); - if (rproc) + if (rthread) break; } - if (!rproc) { + if (!rthread) { ret = -ESRCH; goto out; } @@ -5997,9 +5844,9 @@ static int do_process_vm_read_writev(int pid, if (pli != li) { struct vm_range *range; - ihk_mc_spinlock_lock_noirq(&lproc->vm->memory_range_lock); + ihk_mc_spinlock_lock_noirq(<hread->vm->memory_range_lock); - range = lookup_process_memory_range(lproc->vm, + range = lookup_process_memory_range(lthread->vm, (uintptr_t)local_iov[li].iov_base, (uintptr_t)(local_iov[li].iov_base + local_iov[li].iov_len)); @@ -6016,7 +5863,7 @@ static int do_process_vm_read_writev(int pid, ret = 0; pli_out: - ihk_mc_spinlock_unlock_noirq(&lproc->vm->memory_range_lock); + ihk_mc_spinlock_unlock_noirq(<hread->vm->memory_range_lock); if (ret != 0) { goto out; @@ -6035,7 +5882,7 @@ pli_out: addr < (remote_iov[ri].iov_base + remote_iov[ri].iov_len); addr += PAGE_SIZE) { - ret = page_fault_process_vm(rproc->vm, addr, reason); + ret = page_fault_process_vm(rthread->vm, addr, reason); if (ret) { ret = -EINVAL; goto out; @@ -6059,7 +5906,7 @@ pli_out: } /* TODO: remember page and do this only if necessary */ - ret = ihk_mc_pt_virt_to_phys(rproc->vm->page_table, + ret = ihk_mc_pt_virt_to_phys(rthread->vm->address_space->page_table, remote_iov[ri].iov_base + roff, &rphys); if (ret) { @@ -6219,9 +6066,15 @@ long syscall(int num, ihk_mc_user_context_t *ctx) { long l; + if(cpu_local_var(current)->proc->pstatus == PS_EXITED && + (num != __NR_exit && num != __NR_exit_group)){ + check_signal(-EINVAL, NULL, 0); + return -EINVAL; + } + cpu_enable_interrupt(); - if (cpu_local_var(current)->ftn->ptrace) { + if (cpu_local_var(current)->proc->ptrace) { ptrace_syscall_enter(cpu_local_var(current)); } @@ -6270,42 +6123,9 @@ long syscall(int num, ihk_mc_user_context_t *ctx) check_signal(l, NULL, num); check_need_resched(); - if (cpu_local_var(current)->ftn->ptrace) { + if (cpu_local_var(current)->proc->ptrace) { ptrace_syscall_exit(cpu_local_var(current)); } return l; } - -#if 0 -void __host_update_process_range(struct process *process, - struct vm_range *range) -{ - struct syscall_post *post; - int idx; - - memcpy_async_wait(&cpu_local_var(scp).post_fin); - - post = &cpu_local_var(scp).post_buf; - - post->v[0] = 1; - post->v[1] = range->start; - post->v[2] = range->end; - post->v[3] = range->phys; - - cpu_disable_interrupt(); - if (cpu_local_var(scp).post_idx >= - PAGE_SIZE / sizeof(struct syscall_post)) { - /* XXX: Wait until it is consumed */ - } else { - idx = ++(cpu_local_var(scp).post_idx); - - cpu_local_var(scp).post_fin = 0; - memcpy_async(cpu_local_var(scp).post_pa + - idx * sizeof(*post), - virt_to_phys(post), sizeof(*post), 0, - &cpu_local_var(scp).post_fin); - } - cpu_enable_interrupt(); -} -#endif diff --git a/kernel/timer.c b/kernel/timer.c index 5125baf4..b6d331a6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -57,14 +57,14 @@ uint64_t schedule_timeout(uint64_t timeout) { struct waitq_entry my_wait; struct timer my_timer; - struct process *proc = cpu_local_var(current); + struct thread *thread = cpu_local_var(current); int irqstate; int spin_sleep; - irqstate = ihk_mc_spinlock_lock(&proc->spin_sleep_lock); + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout); - spin_sleep = ++proc->spin_sleep; - ihk_mc_spinlock_unlock(&proc->spin_sleep_lock, irqstate); + spin_sleep = ++thread->spin_sleep; + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); /* Spin sleep.. */ for (;;) { @@ -72,10 +72,10 @@ uint64_t schedule_timeout(uint64_t timeout) uint64_t t_e; int spin_over = 0; - irqstate = ihk_mc_spinlock_lock(&proc->spin_sleep_lock); + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); /* Woken up by someone? */ - if (proc->spin_sleep < 1) { + if (thread->spin_sleep < 1) { t_e = rdtsc(); spin_over = 1; @@ -87,7 +87,7 @@ uint64_t schedule_timeout(uint64_t timeout) } } - ihk_mc_spinlock_unlock(&proc->spin_sleep_lock, irqstate); + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); if (!spin_over) { t_s = rdtsc(); @@ -97,12 +97,12 @@ uint64_t schedule_timeout(uint64_t timeout) need_schedule = v->runq_len > 1 ? 1 : 0; ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); - /* Give a chance to another process (if any) in case the core is + /* Give a chance to another thread (if any) in case the core is * oversubscribed, but make sure we will be re-scheduled */ if (need_schedule) { - xchg4(&(cpu_local_var(current)->ftn->status), PS_RUNNING); + xchg4(&(cpu_local_var(current)->tstatus), PS_RUNNING); schedule(); - xchg4(&(cpu_local_var(current)->ftn->status), + xchg4(&(cpu_local_var(current)->tstatus), PS_INTERRUPTIBLE); } else { @@ -125,7 +125,7 @@ uint64_t schedule_timeout(uint64_t timeout) dkprintf("schedule_timeout() spin woken up, timeout: %lu\n", timeout); - /* Give a chance to another process (if any) in case we timed out, + /* Give a chance to another thread (if any) in case we timed out, * but make sure we will be re-scheduled */ if (timeout == 0) { int need_schedule; @@ -137,18 +137,18 @@ uint64_t schedule_timeout(uint64_t timeout) ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); if (need_schedule) { - xchg4(&(cpu_local_var(current)->ftn->status), PS_RUNNING); + xchg4(&(cpu_local_var(current)->tstatus), PS_RUNNING); schedule(); - xchg4(&(cpu_local_var(current)->ftn->status), + xchg4(&(cpu_local_var(current)->tstatus), PS_INTERRUPTIBLE); } } - irqstate = ihk_mc_spinlock_lock(&proc->spin_sleep_lock); - if (spin_sleep == proc->spin_sleep) { - --proc->spin_sleep; + irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock); + if (spin_sleep == thread->spin_sleep) { + --thread->spin_sleep; } - ihk_mc_spinlock_unlock(&proc->spin_sleep_lock, irqstate); + ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate); return timeout; } @@ -156,7 +156,7 @@ uint64_t schedule_timeout(uint64_t timeout) /* Init waitq and wait entry for this timer */ my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout; - my_timer.proc = cpu_local_var(current); + my_timer.thread = cpu_local_var(current); waitq_init(&my_timer.processes); waitq_init_entry(&my_wait, cpu_local_var(current)); @@ -213,7 +213,7 @@ void wake_timers_loop(void) list_del(&timer->list); dkprintf("timers timeout occurred, waking up pid: %d\n", - timer->proc->ftn->pid); + timer->thread->proc->pid); waitq_wakeup(&timer->processes); } diff --git a/kernel/waitq.c b/kernel/waitq.c index cbe612b3..90dbeccb 100644 --- a/kernel/waitq.c +++ b/kernel/waitq.c @@ -19,7 +19,7 @@ int default_wake_function(waitq_entry_t *entry, unsigned mode, int flags, void *key) { - return sched_wakeup_process(entry->private, PS_NORMAL); + return sched_wakeup_thread(entry->private, PS_NORMAL); } void @@ -30,7 +30,7 @@ waitq_init(waitq_t *waitq) } void -waitq_init_entry(waitq_entry_t *entry, struct process *proc) +waitq_init_entry(waitq_entry_t *entry, struct thread *proc) { entry->private = proc; entry->func = default_wake_function; @@ -89,14 +89,14 @@ waitq_prepare_to_wait(waitq_t *waitq, waitq_entry_t *entry, int state) ihk_mc_spinlock_lock_noirq(&waitq->lock); if (list_empty(&entry->link)) list_add(&entry->link, &waitq->waitq); - cpu_local_var(current)->ftn->status = state; + cpu_local_var(current)->tstatus = state; ihk_mc_spinlock_unlock_noirq(&waitq->lock); } void waitq_finish_wait(waitq_t *waitq, waitq_entry_t *entry) { - cpu_local_var(current)->ftn->status = PS_RUNNING; + cpu_local_var(current)->tstatus = PS_RUNNING; waitq_remove_entry(waitq, entry); }