From fa1be382c7aeef8d352b7e2c02d4f8ab837674dd Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 28 May 2013 10:49:33 +0900 Subject: [PATCH 1/4] release the resources of the process at exit(2)/exit_group(2). --- arch/x86/kernel/context.S | 7 ++++ arch/x86/kernel/include/arch-memory.h | 6 +++ arch/x86/kernel/memory.c | 44 ++++++++++++++++++++ kernel/include/process.h | 5 ++- kernel/process.c | 58 +++++++++++++++++++++------ kernel/syscall.c | 38 ++++++++++++------ lib/include/ihk/cpu.h | 7 +++- lib/include/ihk/mm.h | 2 + 8 files changed, 140 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/context.S b/arch/x86/kernel/context.S index 8f0d709d..eb7cb728 100644 --- a/arch/x86/kernel/context.S +++ b/arch/x86/kernel/context.S @@ -4,8 +4,14 @@ (X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0) .text + .globl ihk_mc_switch_context ihk_mc_switch_context: + /* + * rdi - ihk_mc_kernel_context_t *old_ctx + * rsi - ihk_mc_kernel_context_t *new_ctx + * rdx - void *prev + */ pushfq popq %rax testq %rdi, %rdi @@ -35,4 +41,5 @@ ihk_mc_switch_context: popfq movq 8(%rsi), %rbp movq 24(%rsi), %rsi + movq %rdx,%rax retq diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index f926161e..9bbee81e 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -42,6 +42,12 @@ #define PT_ENTRIES 512 +/* mask of the physical address of the entry to the page table */ +#define PT_PHYSMASK (((1UL << 52) - 1) & PAGE_MASK) + +#define PF_PRESENT 0x01 /* entry is valid */ +#define PF_SIZE 0x80 /* entry points large page */ + #define PFL4_PRESENT 0x01 #define PFL4_WRITABLE 0x02 #define PFL4_USER 0x04 diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 03239e6b..041b56ab 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -557,6 +557,50 @@ struct page_table *ihk_mc_pt_create(void) return pt; } +static void destroy_page_table(int level, struct page_table *pt) +{ + int ix; + unsigned long entry; + struct page_table *lower; + + if ((level < 1) || (4 < level)) { + panic("destroy_page_table: level is out of range"); + } + if (pt == NULL) { + panic("destroy_page_table: pt is NULL"); + } + + if (level > 1) { + for (ix = 0; ix < PT_ENTRIES; ++ix) { + entry = pt->entry[ix]; + if (!(entry & PF_PRESENT)) { + /* entry is not valid */ + continue; + } + if (entry & PF_SIZE) { + /* not a page table */ + continue; + } + lower = (struct page_table *)phys_to_virt(entry & PT_PHYSMASK); + destroy_page_table(level-1, lower); + } + } + + arch_free_page(pt); + return; +} + +void ihk_mc_pt_destroy(struct page_table *pt) +{ + const int level = 4; /* PML4 */ + + /* clear shared entry */ + memset(pt->entry + PT_ENTRIES / 2, 0, sizeof(pt->entry[0]) * PT_ENTRIES / 2); + + destroy_page_table(level, pt); + return; +} + int ihk_mc_pt_clear_page(page_table_t pt, void *virt) { return __clear_pt_page(pt, virt, 0); diff --git a/kernel/include/process.h b/kernel/include/process.h index f57ff567..9fd90b10 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -50,6 +50,7 @@ struct process { int status; int cpu_id; + ihk_atomic_t refcount; struct process_vm *vm; ihk_mc_kernel_context_t ctx; @@ -73,6 +74,7 @@ struct process_vm { struct page_table *page_table; struct list_head vm_range_list; struct vm_regions region; + struct process *owner_process; /* process that reside on the same page */ // Address space private futexes struct futex_queue futex_queues[1 << FUTEX_HASHBITS]; @@ -90,7 +92,8 @@ struct process_vm { struct process *create_process(unsigned long user_pc); struct process *clone_process(struct process *org, unsigned long pc, unsigned long sp); -void destroy_process(struct process *proc); +void hold_process(struct process *proc); +void free_process(struct process *proc); void free_process_memory(struct process *proc); int add_process_memory_range(struct process *process, diff --git a/kernel/process.c b/kernel/process.c index 2b334d6a..fb0de1e2 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -23,7 +23,7 @@ extern long do_arch_prctl(unsigned long code, unsigned long address); -void init_process_vm(struct process_vm *vm) +static void init_process_vm(struct process *owner, struct process_vm *vm) { int i; @@ -33,6 +33,8 @@ void init_process_vm(struct process_vm *vm) ihk_atomic_set(&vm->refcount, 1); INIT_LIST_HEAD(&vm->vm_range_list); vm->page_table = ihk_mc_pt_create(); + hold_process(owner); + vm->owner_process = owner; /* Initialize futex queues */ for (i = 0; i < (1 << FUTEX_HASHBITS); ++i) @@ -49,6 +51,7 @@ struct process *create_process(unsigned long user_pc) return NULL; memset(proc, 0, sizeof(struct process)); + ihk_atomic_set(&proc->refcount, 2); /* one for exit, another for wait */ ihk_mc_init_user_process(&proc->ctx, &proc->uctx, ((char *)proc) + @@ -56,7 +59,7 @@ struct process *create_process(unsigned long user_pc) proc->vm = (struct process_vm *)(proc + 1); - init_process_vm(proc->vm); + init_process_vm(proc, proc->vm); ihk_mc_spinlock_init(&proc->spin_sleep_lock); proc->spin_sleep = 0; @@ -72,6 +75,7 @@ struct process *clone_process(struct process *org, unsigned long pc, proc = ihk_mc_alloc_pages(KERNEL_STACK_NR_PAGES, 0); memset(proc, 0, KERNEL_STACK_NR_PAGES); + ihk_atomic_set(&proc->refcount, 2); /* one for exit, another for wait */ /* NOTE: sp is the user mode stack! */ ihk_mc_init_user_process(&proc->ctx, &proc->uctx, @@ -394,12 +398,18 @@ extern void print_free_list(void); void free_process_memory(struct process *proc) { struct vm_range *range, *next; + struct process_vm *vm = proc->vm; - if (!ihk_atomic_dec_and_test(&proc->vm->refcount)) { + if (vm == NULL) { return; } - list_for_each_entry_safe(range, next, &proc->vm->vm_range_list, + proc->vm = NULL; + if (!ihk_atomic_dec_and_test(&vm->refcount)) { + return; + } + + list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { if (!(range->flag & VR_REMOTE) && !(range->flag & VR_IO_NOCACHE) && @@ -411,13 +421,28 @@ void free_process_memory(struct process *proc) list_del(&range->list); ihk_mc_free(range); } - /* TODO: Free page tables */ - proc->status = PS_ZOMBIE; + + ihk_mc_pt_destroy(vm->page_table); + free_process(vm->owner_process); } -void destroy_process(struct process *proc) +void hold_process(struct process *proc) { - ihk_mc_free_pages(proc, 1); + if (proc->status & (PS_ZOMBIE | PS_EXITED)) { + panic("hold_process: already exited process"); + } + + ihk_atomic_inc(&proc->refcount); + return; +} + +void free_process(struct process *proc) +{ + if (!ihk_atomic_dec_and_test(&proc->refcount)) { + return; + } + + ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES); } static void idle(void) @@ -465,6 +490,7 @@ void schedule(void) struct process *next, *prev, *proc, *tmp = NULL; int switch_ctx = 0; unsigned long irqstate; + struct process *last; irqstate = ihk_mc_spinlock_lock(&(v->runq_lock)); @@ -477,10 +503,14 @@ void schedule(void) --v->runq_len; /* Round-robin if not exited yet */ - if (prev->status != PS_EXITED) { + if (!(prev->status & (PS_ZOMBIE | PS_EXITED))) { list_add_tail(&prev->sched_list, &(v->runq)); ++v->runq_len; } + + if (!v->runq_len) { + v->status = CPU_STATUS_IDLE; + } } /* Pick a new running process */ @@ -501,7 +531,6 @@ void schedule(void) v->current = next; } - if (switch_ctx) { dkprintf("[%d] schedule: %d => %d \n", ihk_mc_get_processor_id(), @@ -518,10 +547,15 @@ void schedule(void) ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate); if (prev) { - ihk_mc_switch_context(&prev->ctx, &next->ctx); + last = ihk_mc_switch_context(&prev->ctx, &next->ctx, prev); } else { - ihk_mc_switch_context(NULL, &next->ctx); + last = ihk_mc_switch_context(NULL, &next->ctx, prev); + } + + if ((last != NULL) && (last->status & (PS_ZOMBIE | PS_EXITED))) { + free_process_memory(last); + free_process(last); } } else { diff --git a/kernel/syscall.c b/kernel/syscall.c index 693ceac2..19c5249c 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -349,18 +349,24 @@ SYSCALL_DECLARE(lseek) SYSCALL_DECLARE(exit_group) { SYSCALL_HEADER; + struct process *proc = cpu_local_var(current); #ifdef DCFA_KMOD do_mod_exit((int)ihk_mc_syscall_arg0(ctx)); #endif - do_syscall(&request, ctx); - runq_del_proc(cpu_local_var(current), ihk_mc_get_processor_id()); - free_process_memory(cpu_local_var(current)); + /* XXX: send SIGKILL to all threads in this process */ + + do_syscall(&request, ctx); + +#define IS_DETACHED_PROCESS(proc) (1) /* should be implemented in the future */ + proc->status = PS_ZOMBIE; + if (IS_DETACHED_PROCESS(proc)) { + /* release a reference for wait(2) */ + proc->status = PS_EXITED; + free_process(proc); + } - //cpu_local_var(next) = &cpu_local_var(idle); - - cpu_local_var(current) = NULL; schedule(); return 0; @@ -962,26 +968,34 @@ SYSCALL_DECLARE(futex) SYSCALL_DECLARE(exit) { + struct process *proc = cpu_local_var(current); + #ifdef DCFA_KMOD do_mod_exit((int)ihk_mc_syscall_arg0(ctx)); #endif + /* XXX: for if all threads issued the exit(2) rather than exit_group(2), + * exit(2) also should delegate. + */ /* If there is a clear_child_tid address set, clear it and wake it. * This unblocks any pthread_join() waiters. */ - if (cpu_local_var(current)->thread.clear_child_tid) { + if (proc->thread.clear_child_tid) { kprintf("exit clear_child!\n"); - *cpu_local_var(current)->thread.clear_child_tid = 0; + *proc->thread.clear_child_tid = 0; barrier(); - futex((uint32_t *)cpu_local_var(current)->thread.clear_child_tid, + futex((uint32_t *)proc->thread.clear_child_tid, FUTEX_WAKE, 1, 0, NULL, 0, 0); } - runq_del_proc(cpu_local_var(current), cpu_local_var(current)->cpu_id); - free_process_memory(cpu_local_var(current)); + proc->status = PS_ZOMBIE; + if (IS_DETACHED_PROCESS(proc)) { + /* release a reference for wait(2) */ + proc->status = PS_EXITED; + free_process(proc); + } - cpu_local_var(current) = NULL; schedule(); return 0; diff --git a/lib/include/ihk/cpu.h b/lib/include/ihk/cpu.h index f0ee05ad..87e4f467 100644 --- a/lib/include/ihk/cpu.h +++ b/lib/include/ihk/cpu.h @@ -50,8 +50,11 @@ void ihk_mc_init_ap(void); void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx, void *stack_pointer, void (*next_function)(void)); -void ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx, - ihk_mc_kernel_context_t *new_ctx); + +/* returns the 'prev' argument of the call that caused the switch to the context returned. */ +void *ihk_mc_switch_context(ihk_mc_kernel_context_t *old_ctx, + ihk_mc_kernel_context_t *new_ctx, + void *prev); int ihk_mc_interrupt_cpu(int cpu, int vector); void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx, diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 354641d4..64b7148b 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -91,7 +91,9 @@ int ihk_mc_pt_clear_page(page_table_t pt, void *virt); int ihk_mc_pt_prepare_map(page_table_t pt, void *virt, unsigned long size, enum ihk_mc_pt_prepare_flag); +/* XXX: proper use of struct page_table and page_table_t is unknown */ struct page_table *ihk_mc_pt_create(void); +void ihk_mc_pt_destroy(struct page_table *pt); void ihk_mc_load_page_table(struct page_table *pt); int ihk_mc_pt_virt_to_phys(struct page_table *pt, void *virt, unsigned long *phys); From 0b97235b9f28c02060ef88c6b7bc5f92189ffa3a Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 28 May 2013 11:01:52 +0900 Subject: [PATCH 2/4] fix release leakage of extra pages --- kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/process.c b/kernel/process.c index fb0de1e2..b42e5daa 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -349,6 +349,9 @@ unsigned long extend_process_region(struct process *proc, if (p_aligned > (unsigned long)p) { free_pages(p, (p_aligned - (unsigned long)p) >> PAGE_SHIFT); } + free_pages( + (void *)(p_aligned + aligned_new_end - aligned_end), + (LARGE_PAGE_SIZE - (p_aligned - (unsigned long)p)) >> PAGE_SHIFT); add_process_memory_range(proc, aligned_end, aligned_new_end, virt_to_phys((void *)p_aligned), flag); From db17c4a7b05001aeb2868957855d1be222a275a5 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 28 May 2013 11:09:19 +0900 Subject: [PATCH 3/4] use the PID of the mcexec as PID of the McKernel process. --- kernel/host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/host.c b/kernel/host.c index 4e080bb0..4f7a4831 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -63,7 +63,7 @@ static void process_msg_prepare_process(unsigned long rphys) + sizeof(struct program_image_section) * n); proc = create_process(p->entry); - proc->pid = 1024; + proc->pid = pn->pid; proc->vm->region.user_start = pn->user_start; proc->vm->region.user_end = pn->user_end; From f8f91488b3b613b27f5ae231614f27be7f10aeeb Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 28 May 2013 10:57:20 +0900 Subject: [PATCH 4/4] fix release leakage of args_envs --- kernel/host.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/host.c b/kernel/host.c index 4f7a4831..f7872bff 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -186,9 +186,7 @@ static void process_msg_prepare_process(unsigned long rphys) args_envs = ihk_mc_alloc_pages(ARGENV_PAGE_COUNT, 0); args_envs_p = virt_to_phys(args_envs); - add_process_memory_range(proc, addr, e, - args_envs_p, - VR_RESERVED); + add_process_memory_range(proc, addr, e, args_envs_p, 0); dkprintf("args_envs mapping\n");