From 4d4279121bea226e37b0d5f5ab13953963986673 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 12 Sep 2017 16:43:46 +0900 Subject: [PATCH] process/vm; replace vm_range list by a rbtree This replaces the chained list used to keep track of all memory ranges of a process by a standard rbtree (no need of interval tree here because there is no overlap) Accesses that were done directly through vm_range_list before were replaced by lookup_process_memory_range, even full list scan (e.g. coredump). The full scans will thus be less efficient because calls to rb_next() will not be inlined, but these are rarer calls that can probably afford this compared to code simplicity. The only reference to the actual backing structure left outside of process.c is a call to rb_erase in xpmem_free_process_memory_range. v2: fix lookup_process_memory_range with small start address v3: make vm_range_insert error out properly Panic does not lead to easy debug, all error paths are handled to just return someting on error v4: fix lookup_process_memory_range (again) That optimistically going left was a more serious bug than just last iteration, we could just pass by a match and continue down the tree if the match was not a leaf. v5: some users actually needed leftmost match, so restore behavior without the breakage (hopefully) --- arch/arm64/kernel/cpu.c | 18 ++-- arch/arm64/kernel/gencore.c | 17 +++- arch/x86/kernel/cpu.c | 18 ++-- arch/x86/kernel/gencore.c | 17 +++- kernel/include/process.h | 5 +- kernel/mem.c | 9 +- kernel/pager.c | 8 +- kernel/process.c | 190 ++++++++++++++++++++++-------------- kernel/procfs.c | 8 +- kernel/syscall.c | 5 +- kernel/xpmem.c | 28 +++--- 11 files changed, 191 insertions(+), 132 deletions(-) diff --git a/arch/arm64/kernel/cpu.c b/arch/arm64/kernel/cpu.c index 3fe31507..882b251b 100644 --- a/arch/arm64/kernel/cpu.c +++ b/arch/arm64/kernel/cpu.c @@ -1499,7 +1499,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) const uintptr_t address = (uintptr_t)fault_addr; struct process_vm *vm = thread->vm; struct vm_range *range; - char found; unsigned long irqflags; unsigned long error = 0; @@ -1513,17 +1512,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) (error & PF_RSVD ? "was" : "wasn't"), (error & PF_INSTR ? "was" : "wasn't")); - found = 0; - list_for_each_entry(range, &vm->vm_range_list, list) { - if (range->start <= address && range->end > address) { - found = 1; - __kprintf("address is in range, flag: 0x%lx\n", - range->flag); - ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); - break; - } - } - if (!found) { + range = lookup_process_memory_range(vm, address, address+1); + if (range) { + __kprintf("address is in range, flag: 0x%lx\n", + range->flag); + ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); + } else { __kprintf("address is out of range! \n"); } diff --git a/arch/arm64/kernel/gencore.c b/arch/arm64/kernel/gencore.c index e193fc4d..77e33a53 100644 --- a/arch/arm64/kernel/gencore.c +++ b/arch/arm64/kernel/gencore.c @@ -221,7 +221,7 @@ int gencore(struct thread *thread, void *regs, Elf64_Ehdr eh; Elf64_Phdr *ph = NULL; void *note = NULL; - struct vm_range *range; + struct vm_range *range, *next; struct process_vm *vm = thread->vm; int segs = 1; /* the first one is for NOTE */ int notesize, phsize, alignednotesize; @@ -235,7 +235,10 @@ int gencore(struct thread *thread, void *regs, return -1; } - list_for_each_entry(range, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", range->start, range->end, range->flag, range->objoff); /* We omit reserved areas because they are only for @@ -323,7 +326,10 @@ int gencore(struct thread *thread, void *regs, /* program header for each memory chunk */ i = 1; - list_for_each_entry(range, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + unsigned long flag = range->flag; unsigned long size = range->end - range->start; @@ -364,7 +370,10 @@ int gencore(struct thread *thread, void *regs, dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note); i = 3; /* memory segments */ - list_for_each_entry(range, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + unsigned long phys; if (range->flag & VR_RESERVED) diff --git a/arch/x86/kernel/cpu.c b/arch/x86/kernel/cpu.c index b881f34a..339e246c 100644 --- a/arch/x86/kernel/cpu.c +++ b/arch/x86/kernel/cpu.c @@ -1081,7 +1081,6 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) const uintptr_t address = (uintptr_t)fault_addr; struct process_vm *vm = thread->vm; struct vm_range *range; - char found; unsigned long irqflags; unsigned long error = ((struct x86_user_context *)regs)->gpr.error; @@ -1095,17 +1094,12 @@ unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs) (error & PF_RSVD ? "was" : "wasn't"), (error & PF_INSTR ? "was" : "wasn't")); - found = 0; - list_for_each_entry(range, &vm->vm_range_list, list) { - if (range->start <= address && range->end > address) { - found = 1; - __kprintf("address is in range, flag: 0x%lx\n", - range->flag); - ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); - break; - } - } - if (!found) { + range = lookup_process_memory_range(vm, address, address+1); + if (range) { + __kprintf("address is in range, flag: 0x%lx\n", + range->flag); + ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address); + } else { __kprintf("address is out of range! \n"); } diff --git a/arch/x86/kernel/gencore.c b/arch/x86/kernel/gencore.c index ee9d5ca6..7ad03f06 100644 --- a/arch/x86/kernel/gencore.c +++ b/arch/x86/kernel/gencore.c @@ -289,7 +289,7 @@ int gencore(struct thread *thread, void *regs, Elf64_Ehdr eh; Elf64_Phdr *ph = NULL; void *note = NULL; - struct vm_range *range; + struct vm_range *range, *next; struct process_vm *vm = thread->vm; int segs = 1; /* the first one is for NOTE */ int notesize, phsize, alignednotesize; @@ -303,7 +303,10 @@ int gencore(struct thread *thread, void *regs, return -1; } - list_for_each_entry(range, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n", range->start, range->end, range->flag, range->objoff); /* We omit reserved areas because they are only for @@ -393,7 +396,10 @@ int gencore(struct thread *thread, void *regs, /* program header for each memory chunk */ i = 1; - list_for_each_entry(range, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + unsigned long flag = range->flag; unsigned long size = range->end - range->start; @@ -434,7 +440,10 @@ int gencore(struct thread *thread, void *regs, dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note); i = 3; /* memory segments */ - list_for_each_entry(range, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + unsigned long phys; if (range->flag & VR_RESERVED) diff --git a/kernel/include/process.h b/kernel/include/process.h index 445031ce..87a7e059 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -377,7 +378,7 @@ struct user #define AUXV_LEN 18 struct vm_range { - struct list_head list; + struct rb_node vm_rb_node; unsigned long start, end; unsigned long flag; struct memobj *memobj; @@ -695,7 +696,7 @@ struct thread { struct process_vm { struct address_space *address_space; - struct list_head vm_range_list; + struct rb_root vm_range_tree; struct vm_regions region; struct process *proc; /* process that reside on the same page */ void *opt; diff --git a/kernel/mem.c b/kernel/mem.c index 1b9568e3..6e5941fd 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -1154,12 +1154,9 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) info.si_signo = SIGSEGV; info.si_code = SEGV_MAPERR; - list_for_each_entry(range, &vm->vm_range_list, list) { - if (range->start <= (unsigned long)fault_addr && range->end > (unsigned long)fault_addr) { - info.si_code = SEGV_ACCERR; - break; - } - } + range = lookup_process_memory_range(vm, (uintptr_t)fault_addr, ((uintptr_t)fault_addr) + 1); + if (range) + info.si_code = SEGV_ACCERR; info._sifields._sigfault.si_addr = fault_addr; set_signal(SIGSEGV, regs, &info); } diff --git a/kernel/pager.c b/kernel/pager.c index 67b007c2..4fddd650 100644 --- a/kernel/pager.c +++ b/kernel/pager.c @@ -561,7 +561,9 @@ print_region(char *msg, struct process_vm *vm) struct vm_range *range, *next; kprintf("%s:\n", msg); - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); if (range->memobj != NULL) continue; kprintf("\t%016lx:%016lx (%lx)\n", range->start, range->end, range->flag); @@ -686,7 +688,9 @@ do_pageout(char *fname, void *buf, size_t size, int flag) area_print(region); /* looking at ranges except for non anoymous, text, and data */ - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); if (range->memobj != NULL) continue; if (IS_TEXT(range->start, region) || IS_STACK(range->start, region) diff --git a/kernel/process.c b/kernel/process.c index fdfabcd3..453439f3 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -44,9 +44,25 @@ #ifdef DEBUG_PRINT_PROCESS #define dkprintf(...) kprintf(__VA_ARGS__) #define ekprintf(...) kprintf(__VA_ARGS__) +static void dtree(struct rb_node *node, int l) { + struct vm_range *range; + if (!node) + return; + + range = rb_entry(node, struct vm_range, vm_rb_node); + + dtree(node->rb_left, l+1); + kprintf("dtree: %0*d, %p: %lx-%lx\n", l, 0, range, range->start, range->end); + dtree(node->rb_right, l+1); +} +static void dump_tree(struct process_vm *vm) { + kprintf("dump_tree %p\n", vm); + dtree(vm->vm_range_tree.rb_node, 1); +} #else #define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) #define ekprintf(...) kprintf(__VA_ARGS__) +static void dump_tree(struct process_vm *vm) {} #endif #ifdef POSTK_DEBUG_ARCH_DEP_22 @@ -57,8 +73,10 @@ extern void save_debugreg(unsigned long *debugreg); extern void restore_debugreg(unsigned long *debugreg); extern void clear_debugreg(void); extern void clear_single_step(struct thread *proc); -static void insert_vm_range_list(struct process_vm *vm, +static int vm_range_insert(struct process_vm *vm, struct vm_range *newrange); +static struct vm_range *vm_range_find(struct process_vm *vm, + unsigned long addr); static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm); extern void release_fp_regs(struct thread *proc); extern void save_fp_regs(struct thread *proc); @@ -237,7 +255,7 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process ihk_mc_spinlock_init(&vm->page_table_lock); ihk_atomic_set(&vm->refcount, 1); - INIT_LIST_HEAD(&vm->vm_range_list); + vm->vm_range_tree = RB_ROOT; INIT_LIST_HEAD(&vm->vm_range_numa_policy_list); vm->address_space = asp; vm->proc = owner; @@ -693,7 +711,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm) goto err_rollback; } - INIT_LIST_HEAD(&range->list); + RB_CLEAR_NODE(&range->vm_rb_node); range->start = src_range->start; range->end = src_range->end; range->flag = src_range->flag; @@ -727,7 +745,7 @@ static int copy_user_ranges(struct process_vm *vm, struct process_vm *orgvm) } // memory_stat_rss_add() is called in child-node, i.e. copy_user_pte() - insert_vm_range_list(vm, range); + vm_range_insert(vm, range); } ihk_mc_spinlock_unlock_noirq(&orgvm->memory_range_lock); @@ -815,9 +833,13 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range, range->end = addr; - list_add(&newrange->list, &range->list); + error = vm_range_insert(vm, newrange); + if (error) { + kprintf("%s: ERROR: could not insert range: %d\n", + __FUNCTION__, error); + return error; + } - error = 0; if (splitp != NULL) { *splitp = newrange; } @@ -862,7 +884,7 @@ int join_process_memory_range(struct process_vm *vm, if (merging->memobj) { memobj_release(merging->memobj); } - list_del(&merging->list); + rb_erase(&merging->vm_rb_node, &vm->vm_range_tree); for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) { if (vm->range_cache[i] == merging) vm->range_cache[i] = surviving; @@ -969,7 +991,7 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) memobj_release(range->memobj); } - list_del(&range->list); + rb_erase(&range->vm_rb_node, &vm->vm_range_tree); for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) { if (vm->range_cache[i] == range) vm->range_cache[i] = NULL; @@ -984,25 +1006,20 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range) int remove_process_memory_range(struct process_vm *vm, unsigned long start, unsigned long end, int *ro_freedp) { - struct vm_range *range; - struct vm_range *next; + struct vm_range *range, *next; int error; - struct vm_range *freerange; int ro_freed = 0; dkprintf("remove_process_memory_range(%p,%lx,%lx)\n", vm, start, end); - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { - if ((range->end <= start) || (end <= range->start)) { - /* no overlap */ - continue; - } - freerange = range; + next = lookup_process_memory_range(vm, start, end); + while ((range = next) && range->start < end) { + next = next_process_memory_range(vm, range); - if (freerange->start < start) { + if (range->start < start) { error = split_process_memory_range(vm, - freerange, start, &freerange); + range, start, &range); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" "split failed %d\n", @@ -1011,9 +1028,9 @@ int remove_process_memory_range(struct process_vm *vm, } } - if (end < freerange->end) { + if (end < range->end) { error = split_process_memory_range(vm, - freerange, end, NULL); + range, end, NULL); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" "split failed %d\n", @@ -1022,22 +1039,21 @@ int remove_process_memory_range(struct process_vm *vm, } } - if (!(freerange->flag & VR_PROT_WRITE)) { + if (!(range->flag & VR_PROT_WRITE)) { ro_freed = 1; } - if (freerange->private_data) { - xpmem_remove_process_memory_range(vm, freerange); + if (range->private_data) { + xpmem_remove_process_memory_range(vm, range); } - error = free_process_memory_range(vm, freerange); + error = free_process_memory_range(vm, range); if (error) { ekprintf("remove_process_memory_range(%p,%lx,%lx):" "free failed %d\n", vm, start, end, error); return error; } - } if (ro_freedp) { @@ -1048,28 +1064,33 @@ int remove_process_memory_range(struct process_vm *vm, return 0; } -static void insert_vm_range_list(struct process_vm *vm, struct vm_range *newrange) +static int vm_range_insert(struct process_vm *vm, struct vm_range *newrange) { - struct list_head *next; + struct rb_root *root = &vm->vm_range_tree; + struct rb_node **new = &(root->rb_node), *parent = NULL; struct vm_range *range; - next = &vm->vm_range_list; - list_for_each_entry(range, &vm->vm_range_list, list) { - if ((newrange->start < range->end) && (range->start < newrange->end)) { - ekprintf("insert_vm_range_list(%p,%lx-%lx %lx):overlap %lx-%lx %lx\n", + while (*new) { + range = rb_entry(*new, struct vm_range, vm_rb_node); + parent = *new; + if (newrange->end <= range->start) { + new = &((*new)->rb_left); + } else if (newrange->start >= range->end) { + new = &((*new)->rb_right); + } else { + ekprintf("vm_range_insert(%p,%lx-%lx %x): overlap %lx-%lx %lx\n", vm, newrange->start, newrange->end, newrange->flag, range->start, range->end, range->flag); - panic("insert_vm_range_list\n"); - } - - if (newrange->end <= range->start) { - next = &range->list; - break; + return -EFAULT; } } - list_add_tail(&newrange->list, next); - return; + dkprintf("vm_range_insert: %p,%p: %lx-%lx %x\n", vm, newrange, newrange->start, newrange->end, newrange->flag); + dump_tree(vm); + rb_link_node(&newrange->vm_rb_node, parent, new); + rb_insert_color(&newrange->vm_rb_node, root); + + return 0; } enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep) @@ -1181,7 +1202,7 @@ int add_process_memory_range(struct process_vm *vm, return -ENOMEM; } - INIT_LIST_HEAD(&range->list); + RB_CLEAR_NODE(&range->vm_rb_node); range->start = start; range->end = end; range->flag = flag; @@ -1219,7 +1240,12 @@ int add_process_memory_range(struct process_vm *vm, return rc; } - insert_vm_range_list(vm, range); + rc = vm_range_insert(vm, range); + if (rc) { + kprintf("%s: ERROR: could not insert range: %d\n", + __FUNCTION__, rc); + return rc; + } /* Clear content! */ if (phys != NOPHYS && !(flag & (VR_REMOTE | VR_DEMAND_PAGING)) @@ -1249,7 +1275,9 @@ struct vm_range *lookup_process_memory_range( struct process_vm *vm, uintptr_t start, uintptr_t end) { int i; - struct vm_range *range = NULL; + struct vm_range *range = NULL, *match = NULL; + struct rb_root *root = &vm->vm_range_tree; + struct rb_node *node = root->rb_node; dkprintf("lookup_process_memory_range(%p,%lx,%lx)\n", vm, start, end); @@ -1267,43 +1295,50 @@ struct vm_range *lookup_process_memory_range( return vm->range_cache[c_i]; } - list_for_each_entry(range, &vm->vm_range_list, list) { + while (node) { + range = rb_entry(node, struct vm_range, vm_rb_node); if (end <= range->start) { + node = node->rb_left; + } else if (start >= range->end) { + node = node->rb_right; + } else if (start < range->start) { + /* We have a match, but we need to try left to + * return the first possible match */ + match = range; + node = node->rb_left; + } else { + match = range; break; } - if ((start < range->end) && (range->start < end)) { - goto out; - } } - range = NULL; -out: - if (range) { + if (match && end > match->start) { vm->range_cache_ind = (vm->range_cache_ind - 1 + VM_RANGE_CACHE_SIZE) % VM_RANGE_CACHE_SIZE; - vm->range_cache[vm->range_cache_ind] = range; + vm->range_cache[vm->range_cache_ind] = match; } +out: dkprintf("lookup_process_memory_range(%p,%lx,%lx): %p %lx-%lx\n", - vm, start, end, range, - range? range->start: 0, range? range->end: 0); - return range; + vm, start, end, match, + match? match->start: 0, match? match->end: 0); + return match; } struct vm_range *next_process_memory_range( struct process_vm *vm, struct vm_range *range) { struct vm_range *next; + struct rb_node *node; dkprintf("next_process_memory_range(%p,%lx-%lx)\n", vm, range->start, range->end); - if (list_is_last(&range->list, &vm->vm_range_list)) { + node = rb_next(&range->vm_rb_node); + if (node) + next = rb_entry(node, struct vm_range, vm_rb_node); + else next = NULL; - } - else { - next = list_entry(range->list.next, struct vm_range, list); - } dkprintf("next_process_memory_range(%p,%lx-%lx): %p %lx-%lx\n", vm, range->start, range->end, next, @@ -1315,16 +1350,16 @@ struct vm_range *previous_process_memory_range( struct process_vm *vm, struct vm_range *range) { struct vm_range *prev; + struct rb_node *node; dkprintf("previous_process_memory_range(%p,%lx-%lx)\n", vm, range->start, range->end); - if (list_first_entry(&vm->vm_range_list, struct vm_range, list) == range) { + node = rb_prev(&range->vm_rb_node); + if (node) + prev = rb_entry(node, struct vm_range, vm_rb_node); + else prev = NULL; - } - else { - prev = list_entry(range->list.prev, struct vm_range, list); - } dkprintf("previous_process_memory_range(%p,%lx-%lx): %p %lx-%lx\n", vm, range->start, range->end, prev, @@ -2139,6 +2174,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn, if ((rc = add_process_memory_range(thread->vm, start, end, NOPHYS, vrflag, NULL, 0, LARGE_PAGE_SHIFT, &range)) != 0) { ihk_mc_free_pages_user(stack, minsz >> PAGE_SHIFT); + kprintf("%s: error addding process memory range: %d\n", rc); return rc; } @@ -2300,6 +2336,7 @@ unsigned long extend_process_region(struct process_vm *vm, (p == 0 ? 0 : virt_to_phys(p)), flag, NULL, 0, align_p2align, NULL)) != 0) { ihk_mc_free_pages_user(p, (new_end_allocated - end_allocated) >> PAGE_SHIFT); + kprintf("%s: error addding process memory range: %d\n", rc); return end_allocated; } #endif /* POSTK_DEBUG_TEMP_FIX_68 */ @@ -2335,14 +2372,17 @@ int remove_process_region(struct process_vm *vm, void flush_process_memory(struct process_vm *vm) { struct vm_range *range; - struct vm_range *next; + struct rb_node *node, *next = rb_first(&vm->vm_range_tree); int error; dkprintf("flush_process_memory(%p)\n", vm); ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); /* Let concurrent page faults know the VM will be gone */ vm->exiting = 1; - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + while ((node = next)) { + range = rb_entry(node, struct vm_range, vm_rb_node); + next = rb_next(node); + if (range->memobj) { // XXX: temporary of temporary error = free_process_memory_range(vm, range); @@ -2362,14 +2402,18 @@ void flush_process_memory(struct process_vm *vm) void free_process_memory_ranges(struct process_vm *vm) { int error; - struct vm_range *range, *next; + struct vm_range *range; + struct rb_node *node, *next = rb_first(&vm->vm_range_tree); if (vm == NULL) { return; } ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + while ((node = next)) { + range = rb_entry(node, struct vm_range, vm_rb_node); + next = rb_next(node); + error = free_process_memory_range(vm, range); if (error) { ekprintf("free_process_memory(%p):" @@ -2444,11 +2488,15 @@ hold_process_vm(struct process_vm *vm) void free_all_process_memory_range(struct process_vm *vm) { - struct vm_range *range, *next; + struct vm_range *range; + struct rb_node *node, *next = rb_first(&vm->vm_range_tree); int error; ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + while ((node = next)) { + range = rb_entry(node, struct vm_range, vm_rb_node); + next = rb_next(node); + if (range->memobj) { range->memobj->flags |= MF_HOST_RELEASED; } @@ -2869,7 +2917,7 @@ void sched_init(void) ihk_mc_init_context(&idle_thread->ctx, NULL, idle); ihk_mc_spinlock_init(&idle_thread->vm->memory_range_lock); - INIT_LIST_HEAD(&idle_thread->vm->vm_range_list); + idle_thread->vm->vm_range_tree = RB_ROOT; INIT_LIST_HEAD(&idle_thread->vm->vm_range_numa_policy_list); idle_thread->proc->pid = 0; idle_thread->tid = ihk_mc_get_processor_id(); diff --git a/kernel/procfs.c b/kernel/procfs.c index b5de66f9..e61e95f3 100644 --- a/kernel/procfs.c +++ b/kernel/procfs.c @@ -349,7 +349,8 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); - list_for_each_entry(range, &vm->vm_range_list, list) { + range = lookup_process_memory_range(vm, 0, -1); + while (range) { int written_now; /* format is (from man proc): @@ -388,6 +389,7 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) break; } #endif /* POSTK_DEBUG_TEMP_FIX_47 */ + range = next_process_memory_range(vm, range); } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); @@ -490,9 +492,11 @@ void process_procfs_request(struct ikc_scd_packet *rpacket) } ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); - list_for_each_entry(range, &proc->vm->vm_range_list, list) { + range = lookup_process_memory_range(vm, 0, -1); + while (range) { if(range->flag & VR_LOCKED) lockedsize += range->end - range->start; + range = next_process_memory_range(vm, range); } ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); diff --git a/kernel/syscall.c b/kernel/syscall.c index 4a7dcd9a..162f0e3e 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -2123,7 +2123,10 @@ static void munmap_all(void) int error; ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { + next = lookup_process_memory_range(vm, 0, -1); + while ((range = next)) { + next = next_process_memory_range(vm, range); + addr = (void *)range->start; size = range->end - range->start; error = do_munmap(addr, size); diff --git a/kernel/xpmem.c b/kernel/xpmem.c index 7ae8c4bb..2be01545 100644 --- a/kernel/xpmem.c +++ b/kernel/xpmem.c @@ -1299,21 +1299,17 @@ static int xpmem_remove_process_range( int error = 0; struct vm_range *range; struct vm_range *next; - struct vm_range *freerange; int ro_freed = 0; XPMEM_DEBUG("call: vm=0x%p, start=0x%lx, end=0x%lx", vm, start, end); - list_for_each_entry_safe(range, next, &vm->vm_range_list, list) { - if ((range->end <= start) || (end <= range->start)) { - /* no overlap */ - continue; - } - freerange = range; + next = lookup_process_memory_range(vm, start, end); + while ((range = next) && range->start < end) { + next = next_process_memory_range(vm, range); - if (freerange->start < start) { + if (range->start < start) { error = split_process_memory_range(vm, - freerange, start, &freerange); + range, start, &range); if (error) { ekprintf("%s(%p,%lx,%lx): ERROR: " "split failed %d\n", @@ -1322,8 +1318,8 @@ static int xpmem_remove_process_range( } } - if (end < freerange->end) { - error = split_process_memory_range(vm, freerange, end, + if (end < range->end) { + error = split_process_memory_range(vm, range, end, NULL); if (error) { ekprintf("%s(%p,%lx,%lx): ERROR: " @@ -1333,15 +1329,15 @@ static int xpmem_remove_process_range( } } - if (!(freerange->flag & VR_PROT_WRITE)) { + if (!(range->flag & VR_PROT_WRITE)) { ro_freed = 1; } - if (freerange->private_data) { - xpmem_remove_process_memory_range(vm, freerange); + if (range->private_data) { + xpmem_remove_process_memory_range(vm, range); } - error = xpmem_free_process_memory_range(vm, freerange); + error = xpmem_free_process_memory_range(vm, range); if (error) { ekprintf("%s(%p,%lx,%lx): ERROR: free failed %d\n", __FUNCTION__, vm, start, end, error); @@ -1389,7 +1385,7 @@ static int xpmem_free_process_memory_range( memobj_release(range->memobj); } - list_del(&range->list); + rb_erase(&range->vm_rb_node, &vm->vm_range_tree); for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) { if (vm->range_cache[i] == range) vm->range_cache[i] = NULL;