From bf5ac7afc8c9377881757f54f94424d9ebce081c Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 21 Jul 2017 15:34:48 +0900 Subject: [PATCH] remote_flush_tlb_array_cpumask(): bundle remote TLB invalidations --- arch/x86/kernel/memory.c | 57 ++++++++++++++++++++++++------ kernel/mem.c | 76 ++++++++++++++++++++++++---------------- lib/include/ihk/mm.h | 7 +++- 3 files changed, 97 insertions(+), 43 deletions(-) diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index c4e0869d..fa67bd1f 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -1077,8 +1077,27 @@ struct clear_range_args { int free_physical; struct memobj *memobj; struct process_vm *vm; + unsigned long *addr; + int nr_addr; + int max_nr_addr; }; +static void remote_flush_tlb_add_addr(struct clear_range_args *args, + unsigned long addr) +{ + if (args->nr_addr < args->max_nr_addr) { + args->addr[args->nr_addr] = addr; + ++args->nr_addr; + return; + } + + remote_flush_tlb_array_cpumask(args->vm, args->addr, args->nr_addr, + ihk_mc_get_processor_id()); + + args->addr[0] = addr; + args->nr_addr = 1; +} + static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, uint64_t start, uint64_t end) { @@ -1092,7 +1111,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, } old = xchg(ptep, PTE_NULL); - remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id()); + remote_flush_tlb_add_addr(args, base); page = NULL; if (!pte_is_fileoff(&old, PTL1_SIZE)) { @@ -1141,8 +1160,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, if (*ptep & PFL2_SIZE) { old = xchg(ptep, PTE_NULL); - remote_flush_tlb_cpumask(args->vm, base, - ihk_mc_get_processor_id()); + remote_flush_tlb_add_addr(args, base); page = NULL; if (!pte_is_fileoff(&old, PTL2_SIZE)) { @@ -1174,8 +1192,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base, if ((start <= base) && ((base + PTL2_SIZE) <= end)) { *ptep = PTE_NULL; - remote_flush_tlb_cpumask(args->vm, base, - ihk_mc_get_processor_id()); + remote_flush_tlb_add_addr(args, base); ihk_mc_free_pages(pt, 1); } @@ -1207,8 +1224,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, if (*ptep & PFL3_SIZE) { old = xchg(ptep, PTE_NULL); - remote_flush_tlb_cpumask(args->vm, base, - ihk_mc_get_processor_id()); + remote_flush_tlb_add_addr(args, base); page = NULL; if (!pte_is_fileoff(&old, PTL3_SIZE)) { @@ -1239,8 +1255,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base, if (use_1gb_page && (start <= base) && ((base + PTL3_SIZE) <= end)) { *ptep = PTE_NULL; - remote_flush_tlb_cpumask(args->vm, base, - ihk_mc_get_processor_id()); + remote_flush_tlb_add_addr(args, base); ihk_mc_free_pages(pt, 1); } @@ -1260,8 +1275,10 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base, return walk_pte_l3(pt, base, start, end, &clear_range_l3, args0); } -static int clear_range(struct page_table *pt, struct process_vm *vm, - uintptr_t start, uintptr_t end, int free_physical, +#define TLB_INVALID_ARRAY_PAGES (4) + +static int clear_range(struct page_table *pt, struct process_vm *vm, + uintptr_t start, uintptr_t end, int free_physical, struct memobj *memobj) { int error; @@ -1276,6 +1293,17 @@ static int clear_range(struct page_table *pt, struct process_vm *vm, return -EINVAL; } + /* TODO: embedd this in tlb_flush_entry? */ + args.addr = (unsigned long *)ihk_mc_alloc_pages( + TLB_INVALID_ARRAY_PAGES, IHK_MC_AP_CRITICAL); + if (!args.addr) { + ekprintf("%s: error: allocating address array\n", __FUNCTION__); + return -ENOMEM; + } + args.nr_addr = 0; + args.max_nr_addr = (TLB_INVALID_ARRAY_PAGES * PAGE_SIZE / + sizeof(uint64_t)); + args.free_physical = free_physical; if (memobj && (memobj->flags & MF_DEV_FILE)) { args.free_physical = 0; @@ -1287,6 +1315,13 @@ static int clear_range(struct page_table *pt, struct process_vm *vm, args.vm = vm; error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args); + if (args.nr_addr) { + remote_flush_tlb_array_cpumask(vm, args.addr, args.nr_addr, + ihk_mc_get_processor_id()); + } + + ihk_mc_free_pages(args.addr, TLB_INVALID_ARRAY_PAGES); + return error; } diff --git a/kernel/mem.c b/kernel/mem.c index 867f4225..c501ddc2 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -885,86 +885,98 @@ void coredump(struct thread *thread, void *regs) freecore(&coretable); } -void remote_flush_tlb_cpumask(struct process_vm *vm, +void remote_flush_tlb_cpumask(struct process_vm *vm, unsigned long addr, int cpu_id) +{ + unsigned long __addr = addr; + return remote_flush_tlb_array_cpumask(vm, &__addr, 1, cpu_id); +} + +void remote_flush_tlb_array_cpumask(struct process_vm *vm, + unsigned long *addr, + int nr_addr, + int cpu_id) { unsigned long cpu; int flush_ind; struct tlb_flush_entry *flush_entry; cpu_set_t _cpu_set; - if (addr) { - flush_ind = (addr >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; + if (addr[0]) { + flush_ind = (addr[0] >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; } /* Zero address denotes full TLB flush */ - else { + else { /* Random.. */ flush_ind = (rdtsc()) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; } - - flush_entry = &tlb_flush_vector[flush_ind]; + + flush_entry = &tlb_flush_vector[flush_ind]; /* Take a copy of the cpu set so that we don't hold the lock * all the way while interrupting other cores */ ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock); memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t)); ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock); - + dkprintf("trying to aquire flush_entry->lock flush_ind: %d\n", flush_ind); - + ihk_mc_spinlock_lock_noirq(&flush_entry->lock); flush_entry->vm = vm; flush_entry->addr = addr; + flush_entry->nr_addr = nr_addr; ihk_atomic_set(&flush_entry->pending, 0); dkprintf("lock aquired, iterating cpu mask.. flush_ind: %d\n", flush_ind); - + /* Loop through CPUs in this address space and interrupt them for * TLB flush on the specified address */ for_each_set_bit(cpu, (const unsigned long*)&_cpu_set.__bits, CPU_SETSIZE) { - - if (ihk_mc_get_processor_id() == cpu) + + if (ihk_mc_get_processor_id() == cpu) continue; ihk_atomic_inc(&flush_entry->pending); dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n", flush_ind, addr, cpu); - ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id, + ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id, flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START); } - + #ifdef DEBUG_IC_TLB { unsigned long tsc; tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */ #endif - if (flush_entry->addr) { - flush_tlb_single(flush_entry->addr & PAGE_MASK); + if (flush_entry->addr[0]) { + int i; + + for (i = 0; i < flush_entry->nr_addr; ++i) { + flush_tlb_single(flush_entry->addr[i] & PAGE_MASK); + } } /* Zero address denotes full TLB flush */ else { flush_tlb(); } - /* Flush on this core */ - flush_tlb_single(addr & PAGE_MASK); /* Wait for all cores */ while (ihk_atomic_read(&flush_entry->pending) != 0) { cpu_pause(); #ifdef DEBUG_IC_TLB if (rdtsc() > tsc) { - kprintf("waited 10 secs for remote TLB!! -> panic_all()\n"); - panic_all_cores("waited 10 secs for remote TLB!!\n"); + kprintf("waited 10 secs for remote TLB!! -> panic_all()\n"); + panic_all_cores("waited 10 secs for remote TLB!!\n"); } #endif } #ifdef DEBUG_IC_TLB } #endif - + ihk_mc_spinlock_unlock_noirq(&flush_entry->lock); } @@ -975,25 +987,27 @@ void tlb_flush_handler(int vector) #endif // PROFILE_ENABLE int flags = cpu_disable_interrupt_save(); - struct tlb_flush_entry *flush_entry = &tlb_flush_vector[vector - + struct tlb_flush_entry *flush_entry = &tlb_flush_vector[vector - IHK_TLB_FLUSH_IRQ_VECTOR_START]; - - dkprintf("decreasing pending cnt for %d\n", - vector - IHK_TLB_FLUSH_IRQ_VECTOR_START); - /* Decrease counter */ - ihk_atomic_dec(&flush_entry->pending); + if (flush_entry->addr[0]) { + int i; - dkprintf("flusing TLB for addr: 0x%lX\n", flush_entry->addr); - - if (flush_entry->addr) { - flush_tlb_single(flush_entry->addr & PAGE_MASK); + for (i = 0; i < flush_entry->nr_addr; ++i) { + flush_tlb_single(flush_entry->addr[i] & PAGE_MASK); + dkprintf("flusing TLB for addr: 0x%lX\n", flush_entry->addr[i]); + } } /* Zero address denotes full TLB flush */ else { flush_tlb(); } - + + /* Decrease counter */ + dkprintf("decreasing pending cnt for %d\n", + vector - IHK_TLB_FLUSH_IRQ_VECTOR_START); + ihk_atomic_dec(&flush_entry->pending); + cpu_restore_interrupt(flags); #ifdef PROFILE_ENABLE { diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index dee805a0..66db1fe8 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -215,6 +215,10 @@ int ihk_mc_get_memory_chunk(int id, void remote_flush_tlb_cpumask(struct process_vm *vm, unsigned long addr, int cpu_id); +void remote_flush_tlb_array_cpumask(struct process_vm *vm, + unsigned long *addr, + int nr_addr, + int cpu_id); int ihk_set_kmsg(unsigned long addr, unsigned long size); char *ihk_get_kargs(); @@ -226,7 +230,8 @@ extern void (*__tlb_flush_handler)(int vector); struct tlb_flush_entry { struct process_vm *vm; - unsigned long addr; + unsigned long *addr; + int nr_addr; ihk_atomic_t pending; ihk_spinlock_t lock; } __attribute__((aligned(64)));