From 40f8091fab428d18fe30fb36d597b57c9084c8f9 Mon Sep 17 00:00:00 2001 From: Masamichi Takagi Date: Thu, 16 Jul 2020 16:58:23 +0900 Subject: [PATCH] stack: grow on page fault The steps of the technique to replace stack with hugetlbfs map are as follows: (1) Prepare a hugetlbfs map with the size of rlim_cur (2) Copy the active region of the stack to the hugetlbfs map. The range to copy is determined by reading /proc/[pid]/maps. (3) Replace the stack map with the hugetlbfs map The step (2) tries to copy a huge region if McKernel doesn't grow the stack at run-time. Change-Id: I5858c35b5c26dd0a42cccf9e3cc4c64b1a81f160 --- kernel/hugefileobj.c | 1 - kernel/process.c | 83 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/kernel/hugefileobj.c b/kernel/hugefileobj.c index e0d60b13..2ded1d50 100644 --- a/kernel/hugefileobj.c +++ b/kernel/hugefileobj.c @@ -139,7 +139,6 @@ static void hugefileobj_free(struct memobj *memobj) struct memobj_ops hugefileobj_ops = { .free = hugefileobj_free, .get_page = hugefileobj_get_page, - }; void hugefileobj_cleanup(void) diff --git a/kernel/process.c b/kernel/process.c index 5e2025b4..08b27362 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2211,13 +2211,62 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui { int error; const uintptr_t fault_addr = (uintptr_t)fault_addr0; - struct vm_range *range; + struct vm_range *range = NULL; struct thread *thread = cpu_local_var(current); int locked = 0; dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n", ihk_mc_get_processor_id(), vm, fault_addr0, reason); + /* grow stack */ + if (fault_addr >= thread->vm->region.stack_start && + fault_addr < thread->vm->region.stack_end) { + range = lookup_process_memory_range(vm, + thread->vm->region.stack_end - 1, + thread->vm->region.stack_end); + if (range == NULL) { + error = -EFAULT; + ekprintf("%s: vm: %p, addr: %p, reason: %lx):" + "stack not found: %d\n", + __func__, vm, fault_addr0, reason, error); + goto out; + } + + /* don't grow if replaced with hugetlbfs */ + if (range->memobj) { + goto skip; + } + + if (fault_addr >= range->start) { + goto skip; + } + + if (thread->vm->is_memory_range_lock_taken == -1 || + thread->vm->is_memory_range_lock_taken != + ihk_mc_get_processor_id()) { + ihk_rwspinlock_write_lock_noirq(&vm->memory_range_lock); + locked = 1; + } + + if (range->pgshift) { + range->start = fault_addr & + ~((1UL << range->pgshift) - 1); + } else { + range->start = fault_addr & PAGE_MASK; + } + + if (locked) { + ihk_rwspinlock_write_unlock_noirq(&vm->memory_range_lock); + locked = 0; + } + + dkprintf("%s: addr: %lx, reason: %lx, range: %lx-%lx:" + "stack found\n", + __func__, (unsigned long)fault_addr, reason, + range->start, range->end); + } +skip: + if (thread->vm->is_memory_range_lock_taken == -1 || thread->vm->is_memory_range_lock_taken != ihk_mc_get_processor_id()) { ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock); @@ -2232,13 +2281,16 @@ static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, ui goto out; } - range = lookup_process_memory_range(vm, fault_addr, fault_addr+1); - if (range == NULL) { - error = -EFAULT; - dkprintf("do_page_fault_process_vm(): vm: %p, addr: %p, reason: %lx):" - "out of range: %d\n", - vm, fault_addr0, reason, error); - goto out; + if (!range) { + range = lookup_process_memory_range(vm, fault_addr, + fault_addr+1); + if (range == NULL) { + error = -EFAULT; + dkprintf("%s: vm: %p, addr: %p, reason: %lx):" + "out of range: %d\n", + __func__, vm, fault_addr0, reason, error); + goto out; + } } if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE) @@ -2402,16 +2454,15 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, size = minsz; } size = (size + USER_STACK_PREPAGE_SIZE - 1) & USER_STACK_PAGE_MASK; - dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu\n", - __FUNCTION__, - pn->stack_premap, - proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, - minsz, size); - start = (end - size) & USER_STACK_PAGE_MASK; + dkprintf("%s: stack_premap: %lu, rlim_cur: %lu, minsz: %lu, size: %lu, maxsz: %lx\n", + __func__, pn->stack_premap, + proc->rlimit[MCK_RLIMIT_STACK].rlim_cur, + minsz, size, maxsz); + start = (end - minsz) & USER_STACK_PAGE_MASK; /* Apply user allocation policy to stacks */ /* TODO: make threshold kernel or mcexec argument */ - ap_flag = (size >= proc->mpol_threshold && + ap_flag = (minsz >= proc->mpol_threshold && !(proc->mpol_flags & MPOL_NO_STACK)) ? IHK_MC_AP_USER : 0; dkprintf("%s: max size: %lu, mapped size: %lu %s\n", __FUNCTION__, size, minsz, @@ -2543,7 +2594,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, ihk_mc_modify_user_context(thread->uctx, IHK_UCR_STACK_POINTER, end + sizeof(unsigned long) * s_ind); thread->vm->region.stack_end = end; - thread->vm->region.stack_start = start; + thread->vm->region.stack_start = (end - size) & USER_STACK_PAGE_MASK; return 0; }