From 11bb334bd4661478647db58799baa1e3ffb05048 Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 1 Jul 2014 18:52:31 +0900 Subject: [PATCH] support mremap(2) fixes #22 --- arch/x86/kernel/include/syscall_list.h | 1 + arch/x86/kernel/memory.c | 70 ++++++++ kernel/include/mman.h | 6 + kernel/include/process.h | 3 + kernel/process.c | 33 ++++ kernel/syscall.c | 221 +++++++++++++++++++++++++ lib/include/ihk/mm.h | 1 + 7 files changed, 335 insertions(+) diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index cb246049..23209802 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -40,6 +40,7 @@ SYSCALL_DELEGATED(18, pwrite64) SYSCALL_DELEGATED(20, writev) SYSCALL_DELEGATED(21, access) SYSCALL_HANDLED(24, sched_yield) +SYSCALL_HANDLED(25, mremap) SYSCALL_HANDLED(28, madvise) SYSCALL_HANDLED(34, pause) SYSCALL_HANDLED(39, getpid) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index d1118c00..ac5cb12a 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -1802,6 +1802,76 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul return attr; } +struct move_args { + uintptr_t src; + uintptr_t dest; +}; + +static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, void *pgaddr, size_t pgsize) +{ + int error; + struct move_args *args = arg0; + uintptr_t dest; + pte_t apte; + uintptr_t phys; + enum ihk_mc_pt_attribute attr; + + dkprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx)\n", + arg0, pt, ptep, *ptep, pgaddr, pgsize); + if (pte_is_fileoff(ptep, pgsize)) { + error = -ENOTSUPP; + kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):fileoff. %d\n", + arg0, pt, ptep, *ptep, pgaddr, pgsize, error); + goto out; + } + + dest = args->dest + ((uintptr_t)pgaddr - args->src); + + apte = PTE_NULL; + pte_xchg(ptep, &apte); + + phys = apte & PT_PHYSMASK; + attr = apte & ~PT_PHYSMASK; + + error = ihk_mc_pt_set_range(pt, (void *)dest, + (void *)(dest + pgsize), phys, attr); + if (error) { + kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):" + "set failed. %d\n", + arg0, pt, ptep, *ptep, pgaddr, pgsize, error); + goto out; + } + + error = 0; +out: + dkprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n", + arg0, pt, ptep, *ptep, pgaddr, pgsize, error); + return error; +} + +int move_pte_range(page_table_t pt, void *src, void *dest, size_t size) +{ + int error; + struct move_args args; + + dkprintf("move_pte_range(%p,%p,%p,%#lx)\n", pt, src, dest, size); + args.src = (uintptr_t)src; + args.dest = (uintptr_t)dest; + + error = visit_pte_range(pt, src, src+size, VPTEF_SKIP_NULL, + &move_one_page, &args); + flush_tlb(); /* XXX: TLB flush */ + if (error) { + goto out; + } + + error = 0; +out: + dkprintf("move_pte_range(%p,%p,%p,%#lx):%d\n", + pt, src, dest, size, error); + return error; +} + void load_page_table(struct page_table *pt) { unsigned long pt_addr; diff --git a/kernel/include/mman.h b/kernel/include/mman.h index c29ef8da..b7555eef 100644 --- a/kernel/include/mman.h +++ b/kernel/include/mman.h @@ -63,4 +63,10 @@ #define MADV_HWPOISON 100 #define MADV_SOFT_OFFLINE 101 +/* + * for mremap() + */ +#define MREMAP_MAYMOVE 0x01 +#define MREMAP_FIXED 0x02 + #endif /* HEADER_MMAN_H */ diff --git a/kernel/include/process.h b/kernel/include/process.h index 2688c887..0011a68e 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -29,6 +29,7 @@ #define VR_DEMAND_PAGING 0x1000 #define VR_PRIVATE 0x2000 #define VR_LOCKED 0x4000 +#define VR_FILEOFF 0x8000 /* remap_file_pages()ed range */ #define VR_PROT_NONE 0x00000000 #define VR_PROT_READ 0x00010000 #define VR_PROT_WRITE 0x00020000 @@ -236,6 +237,8 @@ struct vm_range *next_process_memory_range( struct process_vm *vm, struct vm_range *range); struct vm_range *previous_process_memory_range( struct process_vm *vm, struct vm_range *range); +int extend_up_process_memory_range(struct process_vm *vm, + struct vm_range *range, uintptr_t newend); int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason); int remove_process_region(struct process *proc, diff --git a/kernel/process.c b/kernel/process.c index 174a1181..18ce28d9 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -936,6 +936,39 @@ struct vm_range *previous_process_memory_range( return prev; } +int extend_up_process_memory_range(struct process_vm *vm, + struct vm_range *range, uintptr_t newend) +{ + int error; + struct vm_range *next; + + dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx)\n", + vm, range, range->start, range->end, newend); + if (newend <= range->end) { + error = -EINVAL; + goto out; + } + + if (vm->region.user_end < newend) { + error = -EPERM; + goto out; + } + + next = next_process_memory_range(vm ,range); + if (next && (next->start < newend)) { + error = -ENOMEM; + goto out; + } + + error = 0; + range->end = newend; + +out: + dkprintf("exntend_up_process_memory_range(%p,%p %#lx-%#lx,%#lx):%d\n", + vm, range, range->start, range->end, newend, error); + return error; +} + int change_prot_process_memory_range(struct process *proc, struct vm_range *range, unsigned long protflag) { diff --git a/kernel/syscall.c b/kernel/syscall.c index 6d86ff3d..5fbf73ba 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -2315,6 +2315,7 @@ SYSCALL_DECLARE(remap_file_pages) goto out; } + range->flag |= VR_FILEOFF; error = remap_process_memory_range(proc->vm, range, start, end, off); if (error) { ekprintf("sys_remap_file_pages(%#lx,%#lx,%#x,%#lx,%#x):" @@ -2345,6 +2346,226 @@ out: return error; } +SYSCALL_DECLARE(mremap) +{ + const uintptr_t oldaddr = ihk_mc_syscall_arg0(ctx); + const size_t oldsize0 = ihk_mc_syscall_arg1(ctx); + const size_t newsize0 = ihk_mc_syscall_arg2(ctx); + const int flags = ihk_mc_syscall_arg3(ctx); + const uintptr_t newaddr = ihk_mc_syscall_arg4(ctx); + const ssize_t oldsize = (oldsize0 + PAGE_SIZE - 1) & PAGE_MASK; + const ssize_t newsize = (newsize0 + PAGE_SIZE - 1) & PAGE_MASK; + const uintptr_t oldstart = oldaddr; + const uintptr_t oldend = oldstart + oldsize; + struct process *proc = cpu_local_var(current); + struct process_vm *vm = proc->vm; + int error; + struct vm_range *range; + int need_relocate; + uintptr_t newstart; + uintptr_t newend; + size_t size; + uintptr_t ret; + uintptr_t lckstart = -1; + uintptr_t lckend = -1; + + dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx)\n", + oldaddr, oldsize0, newsize0, flags, newaddr); + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); + + if ((oldaddr & ~PAGE_MASK) + || (oldsize < 0) + || (newsize <= 0) + || (flags & ~(MREMAP_MAYMOVE | MREMAP_FIXED)) + || ((flags & MREMAP_FIXED) + && !(flags & MREMAP_MAYMOVE)) + || ((flags & MREMAP_FIXED) + && (newaddr & ~PAGE_MASK))) { + error = -EINVAL; + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):invalid. %d\n", + oldaddr, oldsize0, newsize0, flags, newaddr, + error); + goto out; + } + + /* check original mapping */ + range = lookup_process_memory_range(vm, oldstart, oldstart+PAGE_SIZE); + if (!range || (oldstart < range->start) || (range->end < oldend) + || (range->flag & (VR_FILEOFF)) + || (range->flag & (VR_REMOTE|VR_IO_NOCACHE|VR_RESERVED))) { + error = -EFAULT; + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "lookup failed. %d %p %#lx-%#lx %#lx\n", + oldaddr, oldsize0, newsize0, flags, newaddr, + error, range, range?range->start:0, + range?range->end:0, range?range->flag:0); + goto out; + } + + if (oldend < oldstart) { + error = -EINVAL; + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "old range overflow. %d\n", + oldaddr, oldsize0, newsize0, flags, newaddr, + error); + goto out; + } + + /* determine new mapping range */ + need_relocate = 0; + if (flags & MREMAP_FIXED) { + need_relocate = 1; + newstart = newaddr; + newend = newstart + newsize; + if (newstart < vm->region.user_start) { + error = -EPERM; + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "mmap_min_addr %#lx. %d\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, vm->region.user_start, + error); + goto out; + } + if ((newstart < oldend) && (oldstart < newend)) { + error = -EINVAL; + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "fixed:overlapped. %d\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, error); + goto out; + } + } + else if (!(flags & MREMAP_FIXED) && (oldsize < newsize)) { + if (oldend == range->end) { + newstart = oldstart; + newend = newstart + newsize; + error = extend_up_process_memory_range(vm, range, + newend); + if (!error) { + if (range->flag & VR_LOCKED) { + lckstart = oldend; + lckend = newend; + } + goto out; + } + } + if (!(flags & MREMAP_MAYMOVE)) { + error = -ENOMEM; + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "cannot relocate. %d\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, error); + goto out; + } + need_relocate = 1; + error = search_free_space(newsize, vm->region.map_end, + (intptr_t *)&newstart); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "search failed. %d\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, error); + goto out; + } + newend = newstart + newsize; + } + else { + newstart = oldstart; + newend = newstart + newsize; + } + + /* do the remap */ + if (need_relocate) { + if (flags & MREMAP_FIXED) { + error = do_munmap((void *)newstart, newsize); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "fixed:munmap failed. %d\n", + oldaddr, oldsize0, newsize0, + flags, newaddr, error); + goto out; + } + } + if (range->memobj) { + memobj_ref(range->memobj); + } + error = add_process_memory_range(proc, newstart, newend, -1, + range->flag, range->memobj, + range->objoff + (oldstart - range->start)); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "add failed. %d\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, error); + if (range->memobj) { + memobj_release(range->memobj); + } + goto out; + } + if (range->flag & VR_LOCKED) { + lckstart = newstart; + lckend = newend; + } + + if (oldsize > 0) { + size = (oldsize < newsize)? oldsize: newsize; + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); + error = move_pte_range(vm->page_table, + (void *)oldstart, (void *)newstart, + size); + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "move failed. %d\n", + oldaddr, oldsize0, newsize0, + flags, newaddr, error); + goto out; + } + + error = do_munmap((void *)oldstart, oldsize); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "relocate:munmap failed. %d\n", + oldaddr, oldsize0, newsize0, + flags, newaddr, error); + goto out; + } + } + } + else if (newsize < oldsize) { + error = do_munmap((void *)newend, (oldend - newend)); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "shrink:munmap failed. %d\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, error); + goto out; + } + } + else { + /* nothing to do */ + } + + error = 0; +out: + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); + if (!error && (lckstart < lckend)) { + error = populate_process_memory(proc, (void *)lckstart, (lckend - lckstart)); + if (error) { + ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):" + "populate failed. %d %#lx-%#lx\n", + oldaddr, oldsize0, newsize0, flags, + newaddr, error, lckstart, lckend); + error = 0; /* ignore error */ + } + } + ret = (error)? error: newstart; + dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):%d %#lx\n", + oldaddr, oldsize0, newsize0, flags, newaddr, error, + ret); + return ret; +} + #ifdef DCFA_KMOD #ifdef CMD_DCFA diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index c8970f5a..8f3e57a0 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -133,6 +133,7 @@ typedef int pte_visitor_t(void *arg, page_table_t pt, pte_t *ptep, void *pgaddr, size_t pgsize); int visit_pte_range(page_table_t pt, void *start, void *end, enum visit_pte_flag flags, pte_visitor_t *funcp, void *arg); +int move_pte_range(page_table_t pt, void *src, void *dest, size_t size); struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag); /* XXX: proper use of struct page_table and page_table_t is unknown */