From deb9cd4e75aeb07b3ac6285877f8af97377b8b4e Mon Sep 17 00:00:00 2001 From: NAKAMURA Gou Date: Tue, 16 Jul 2013 19:52:34 +0900 Subject: [PATCH] implement sys_munmap() --- kernel/include/cls.h | 2 + kernel/include/page.h | 14 +++ kernel/include/process.h | 2 + kernel/mem.c | 96 +++++++++++++++++- kernel/process.c | 214 ++++++++++++++++++++++++--------------- kernel/syscall.c | 13 ++- 6 files changed, 254 insertions(+), 87 deletions(-) diff --git a/kernel/include/cls.h b/kernel/include/cls.h index 1c6e7e13..02cdde11 100644 --- a/kernel/include/cls.h +++ b/kernel/include/cls.h @@ -37,6 +37,8 @@ struct cpu_local_var { int status; int fs; + + struct list_head pending_free_pages; } __attribute__((aligned(64))); diff --git a/kernel/include/page.h b/kernel/include/page.h index 54cc56ba..6d5aaaaa 100644 --- a/kernel/include/page.h +++ b/kernel/include/page.h @@ -1,7 +1,21 @@ #ifndef __HEADER_PAGE_H #define __HEADER_PAGE_H +struct page { + struct list_head list; + uint64_t flags; + int64_t count; +}; + +/* flags */ +#define PAGE_IN_LIST 0x0001UL + +struct page *phys_to_page(uintptr_t phys); +uintptr_t page_to_phys(struct page *page); + void *allocate_pages(int npages, enum ihk_mc_ap_flag flag); void free_pages(void *va, int npages); +void begin_free_pages_pending(void); +void finish_free_pages_pending(void); #endif diff --git a/kernel/include/process.h b/kernel/include/process.h index d4e298c4..5788f840 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -131,6 +131,8 @@ struct vm_range *lookup_process_memory_range( struct process_vm *vm, uintptr_t start, uintptr_t end); struct vm_range *next_process_memory_range( struct process_vm *vm, struct vm_range *range); +struct vm_range *previous_process_memory_range( + struct process_vm *vm, struct vm_range *range); int remove_process_region(struct process *proc, unsigned long start, unsigned long end); struct program_load_desc; diff --git a/kernel/mem.c b/kernel/mem.c index e408ac76..b36dfcca 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -12,17 +12,21 @@ #include #endif #include +#include //#define DEBUG_PRINT_MEM #ifdef DEBUG_PRINT_MEM -#define dkprintf kprintf +#define dkprintf(...) kprintf(__VA_ARGS__) +#define ekprintf(...) kprintf(__VA_ARGS__) #else #define dkprintf(...) +#define ekprintf(...) kprintf(__VA_ARGS__) #endif static struct ihk_page_allocator_desc *pa_allocator; static unsigned long pa_start, pa_end; +static struct page *pa_pages; extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt); @@ -61,9 +65,56 @@ void *allocate_pages(int npages, enum ihk_mc_ap_flag flag) void free_pages(void *va, int npages) { + struct list_head *pendings = &cpu_local_var(pending_free_pages); + struct page *page; + + if (pendings->next != NULL) { + page = phys_to_page(virt_to_phys(va)); + if (page->flags & PAGE_IN_LIST) { + panic("free_pages"); + } + page->flags |= PAGE_IN_LIST; + page->count = npages; + list_add_tail(&page->list, pendings); + return; + } + ihk_pagealloc_free(pa_allocator, virt_to_phys(va), npages); } +void begin_free_pages_pending(void) { + struct list_head *pendings = &cpu_local_var(pending_free_pages); + + if (pendings->next != NULL) { + panic("begin_free_pages_pending"); + } + INIT_LIST_HEAD(pendings); + return; +} + +void finish_free_pages_pending(void) +{ + struct list_head *pendings = &cpu_local_var(pending_free_pages); + struct page *page; + struct page *next; + + if (pendings->next == NULL) { + return; + } + + list_for_each_entry_safe(page, next, pendings, list) { + if (!(page->flags & PAGE_IN_LIST)) { + panic("free_pending_pages"); + } + page->flags &= ~PAGE_IN_LIST; + list_del(&page->list); + ihk_pagealloc_free(pa_allocator, page_to_phys(page), page->count); + } + + pendings->next = pendings->prev = NULL; + return; +} + static struct ihk_mc_pa_ops allocator = { .alloc_page = allocate_aligned_pages, .free_page = free_pages, @@ -236,6 +287,48 @@ static void page_allocator_init(void) &query_free_mem_handler); } +struct page *phys_to_page(uintptr_t phys) +{ + int64_t ix; + + if ((phys < pa_start) || (pa_end <= phys)) { + return NULL; + } + + ix = (phys - pa_start) >> PAGE_SHIFT; + return &pa_pages[ix]; +} + +uintptr_t page_to_phys(struct page *page) +{ + int64_t ix; + uintptr_t phys; + + ix = page - pa_pages; + phys = pa_start + (ix << PAGE_SHIFT); + if ((phys < pa_start) || (pa_end <= phys)) { + ekprintf("page_to_phys(%p):not a pa_pages[]:%p %lx-%lx\n", + page, pa_pages, pa_start, pa_end); + panic("page_to_phys"); + } + return phys; +} + +static void page_init(void) +{ + size_t npages; + size_t allocsize; + size_t allocpages; + + npages = (pa_end - pa_start) >> PAGE_SHIFT; + allocsize = sizeof(struct page) * npages; + allocpages = (allocsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + + pa_pages = allocate_pages(allocpages, IHK_MC_AP_CRITICAL); + memset(pa_pages, 0, allocsize); + return; +} + void register_kmalloc(void) { allocator.alloc = kmalloc; @@ -348,6 +441,7 @@ void ihk_mc_clean_micpa(void){ void mem_init(void) { page_allocator_init(); + page_init(); /* Prepare the kernel virtual map space */ virtual_allocator_init(); diff --git a/kernel/process.c b/kernel/process.c index 25318870..e5ca9486 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -255,100 +255,130 @@ out: return error; } -int remove_process_memory_range(struct process *process, unsigned long start, unsigned long end) +int free_process_memory_range(struct process_vm *vm, struct vm_range *range) +{ + const intptr_t start0 = range->start; + const intptr_t end0 = range->end; + int error; + intptr_t start; + intptr_t end; +#ifdef USE_LARGE_PAGES + struct vm_range *neighbor; + intptr_t lpstart; + intptr_t lpend; +#endif /* USE_LARGE_PAGES */ + + dkprintf("free_process_memory_range(%p,%lx-%lx)\n", + vm, start0, end0); + + start = range->start; + end = range->end; + if (!(range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED))) { +#ifdef USE_LARGE_PAGES + lpstart = start & LARGE_PAGE_MASK; + lpend = (end + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK; + + + if (lpstart < start) { + neighbor = previous_process_memory_range(vm, range); + if ((neighbor == NULL) || (neighbor->end <= lpstart)) { + start = lpstart; + } + } + + if (end < lpend) { + neighbor = next_process_memory_range(vm, range); + if ((neighbor == NULL) || (lpend <= neighbor->start)) { + end = lpend; + } + } +#endif /* USE_LARGE_PAGES */ + + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); + error = ihk_mc_pt_free_range(vm->page_table, + (void *)start, (void *)end); + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); + if (error && (error != -ENOENT)) { + ekprintf("free_process_memory_range(%p,%lx-%lx):" + "ihk_mc_pt_free_range(%lx-%lx) failed. %d\n", + vm, start0, end0, start, end, error); + /* through */ + } + } + else { + ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); + error = ihk_mc_pt_clear_range(vm->page_table, + (void *)start, (void *)end); + ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); + if (error && (error != -ENOENT)) { + ekprintf("free_process_memory_range(%p,%lx-%lx):" + "ihk_mc_pt_clear_range(%lx-%lx) failed. %d\n", + vm, start0, end0, start, end, error); + /* through */ + } + } + + list_del(&range->list); + ihk_mc_free(range); + + dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n", + vm, start0, end0); + return 0; +} + +int remove_process_memory_range(struct process *process, + unsigned long start, unsigned long end) { struct process_vm * const vm = process->vm; struct vm_range *range; struct vm_range *next; int error; - unsigned long freestart; - unsigned long freesize; struct vm_range *freerange; - struct vm_range *newrange; + + dkprintf("remove_process_memory_range(%p,%lx,%lx)\n", + process, start, end); list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { if ((range->end <= start) || (end <= range->start)) { /* no overlap */ continue; } + freerange = range; - error = 0; - freerange = NULL; - freesize = 0; - - if (start <= range->start) { - /* partial or whole delete from range->start */ - freestart = range->start; - freesize = end - range->start; - - if (freesize >= (range->end - range->start)) { - freesize = range->end - range->start; - list_del(&range->list); - freerange = range; - } - else { - range->start += freesize; + if (freerange->start < start) { + error = split_process_memory_range(process, + freerange, start, &freerange); + if (error) { + ekprintf("remove_process_memory_range(%p,%lx,%lx):" + "split failed %d\n", + process, start, end, error); + return error; } } - else if (range->end <= end) { - /* partial delete up to range->end */ - freestart = start; - freesize = range->end - start; - range->end = start; - } - else { - /* delete the middle part of the 'range' */ - freestart = start; - freesize = end - start; - - newrange = kmalloc(sizeof(struct vm_range), IHK_MC_AP_NOWAIT); - if (!newrange) { - kprintf("remove_process_memory_range:kmalloc failed\n"); - return -ENOMEM; - } - newrange->start = end; - newrange->end = range->end; - newrange->flag = range->flag; - list_add_tail(&newrange->list, &vm->vm_range_list); - - range->end = start; - } - - if (freesize > 0) { - if (!(range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED))) { - /* clear page table and free physical pages */ - ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); - error = ihk_mc_pt_free_range(vm->page_table, - (void *)start, (void *)end); - ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock); - if (error && (error != -ENOENT)) { - kprintf("remove_process_memory_range:" - "ihk_mc_pt_free_range failed: %d\n", - error); - /* through */ - } - } - else { - /* clear page table */ - error = remove_process_region(process, freestart, - (freestart + freesize)); - if (error) { - kprintf("remove_process_memory_range:" - "remove_process_region failed: %d\n", - error); - /* through */ - } + if (end < freerange->end) { + error = split_process_memory_range(process, + freerange, end, NULL); + if (error) { + ekprintf("remove_process_memory_range(%p,%lx,%lx):" + "split failed %d\n", + process, start, end, error); + return error; } } - if (freerange != NULL) { - ihk_mc_free(freerange); - } + + error = free_process_memory_range(process->vm, freerange); if (error) { + ekprintf("remove_process_memory_range(%p,%lx,%lx):" + "free failed %d\n", + process, start, end, error); return error; } + } + dkprintf("remove_process_memory_range(%p,%lx,%lx): 0\n", + process, start, end); return 0; } @@ -524,6 +554,27 @@ struct vm_range *next_process_memory_range( return next; } +struct vm_range *previous_process_memory_range( + struct process_vm *vm, struct vm_range *range) +{ + struct vm_range *prev; + + dkprintf("previous_process_memory_range(%p,%lx-%lx)\n", + vm, range->start, range->end); + + if (list_first_entry(&vm->vm_range_list, struct vm_range, list) == range) { + prev = NULL; + } + else { + prev = list_entry(range->list.prev, struct vm_range, list); + } + + dkprintf("previous_process_memory_range(%p,%lx-%lx): %p %lx-%lx\n", + vm, range->start, range->end, prev, + prev? prev->start: 0, prev? prev->end: 0); + return prev; +} + int change_prot_process_memory_range(struct process *proc, struct vm_range *range, unsigned long protflag) { @@ -772,22 +823,17 @@ void free_process_memory(struct process *proc) return; } - ihk_mc_spinlock_lock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { - if (!(range->flag & (VR_REMOTE | VR_IO_NOCACHE | VR_RESERVED))) { - error = ihk_mc_pt_free_range(vm->page_table, - (void *)range->start, (void *)range->end); - if (error && (error != -ENOENT)) { - kprintf("free_process_memory:" - "ihk_mc_pt_free_range(%lx,%lx) failed. %d\n", - range->start, range->end, error); - /* through */ - } + error = free_process_memory_range(vm, range); + if (error) { + ekprintf("free_process_memory(%p):" + "free range failed. %lx-%lx %d\n", + proc, range->start, range->end, error); + /* through */ } - list_del(&range->list); - ihk_mc_free(range); } - ihk_mc_spinlock_unlock_noirq(&proc->vm->page_table_lock); + ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); ihk_mc_pt_destroy(vm->page_table); free_process(vm->owner_process); diff --git a/kernel/syscall.c b/kernel/syscall.c index 3a5654bb..997bb967 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -173,8 +173,14 @@ SYSCALL_DECLARE(exit_group) static int do_munmap(void *addr, size_t len) { - return remove_process_memory_range( - cpu_local_var(current), (intptr_t)addr, (intptr_t)addr+len); + int error; + + begin_free_pages_pending(); + error = remove_process_memory_range(cpu_local_var(current), + (intptr_t)addr, (intptr_t)addr+len); + // XXX: TLB flush + finish_free_pages_pending(); + return error; } static int search_free_space(size_t len, intptr_t hint, intptr_t *addrp) @@ -488,6 +494,7 @@ SYSCALL_DECLARE(mprotect) } ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); + begin_free_pages_pending(); /* check contiguous map */ first = NULL; @@ -572,6 +579,8 @@ SYSCALL_DECLARE(mprotect) error = 0; out: + // XXX: TLB flush + finish_free_pages_pending(); ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); dkprintf("[%d]sys_mprotect(%lx,%lx,%x): %d\n", ihk_mc_get_processor_id(), start, len0, prot, error);