remote_flush_tlb_array_cpumask(): bundle remote TLB invalidations

This commit is contained in:
Balazs Gerofi
2017-07-21 15:34:48 +09:00
parent bc423255d9
commit bf5ac7afc8
3 changed files with 97 additions and 43 deletions

View File

@@ -1077,8 +1077,27 @@ struct clear_range_args {
int free_physical; int free_physical;
struct memobj *memobj; struct memobj *memobj;
struct process_vm *vm; struct process_vm *vm;
unsigned long *addr;
int nr_addr;
int max_nr_addr;
}; };
static void remote_flush_tlb_add_addr(struct clear_range_args *args,
unsigned long addr)
{
if (args->nr_addr < args->max_nr_addr) {
args->addr[args->nr_addr] = addr;
++args->nr_addr;
return;
}
remote_flush_tlb_array_cpumask(args->vm, args->addr, args->nr_addr,
ihk_mc_get_processor_id());
args->addr[0] = addr;
args->nr_addr = 1;
}
static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base, static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end) uint64_t start, uint64_t end)
{ {
@@ -1092,7 +1111,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
} }
old = xchg(ptep, PTE_NULL); old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id()); remote_flush_tlb_add_addr(args, base);
page = NULL; page = NULL;
if (!pte_is_fileoff(&old, PTL1_SIZE)) { if (!pte_is_fileoff(&old, PTL1_SIZE)) {
@@ -1141,8 +1160,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if (*ptep & PFL2_SIZE) { if (*ptep & PFL2_SIZE) {
old = xchg(ptep, PTE_NULL); old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base, remote_flush_tlb_add_addr(args, base);
ihk_mc_get_processor_id());
page = NULL; page = NULL;
if (!pte_is_fileoff(&old, PTL2_SIZE)) { if (!pte_is_fileoff(&old, PTL2_SIZE)) {
@@ -1174,8 +1192,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if ((start <= base) && ((base + PTL2_SIZE) <= end)) { if ((start <= base) && ((base + PTL2_SIZE) <= end)) {
*ptep = PTE_NULL; *ptep = PTE_NULL;
remote_flush_tlb_cpumask(args->vm, base, remote_flush_tlb_add_addr(args, base);
ihk_mc_get_processor_id());
ihk_mc_free_pages(pt, 1); ihk_mc_free_pages(pt, 1);
} }
@@ -1207,8 +1224,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
if (*ptep & PFL3_SIZE) { if (*ptep & PFL3_SIZE) {
old = xchg(ptep, PTE_NULL); old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base, remote_flush_tlb_add_addr(args, base);
ihk_mc_get_processor_id());
page = NULL; page = NULL;
if (!pte_is_fileoff(&old, PTL3_SIZE)) { if (!pte_is_fileoff(&old, PTL3_SIZE)) {
@@ -1239,8 +1255,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
if (use_1gb_page && (start <= base) && ((base + PTL3_SIZE) <= end)) { if (use_1gb_page && (start <= base) && ((base + PTL3_SIZE) <= end)) {
*ptep = PTE_NULL; *ptep = PTE_NULL;
remote_flush_tlb_cpumask(args->vm, base, remote_flush_tlb_add_addr(args, base);
ihk_mc_get_processor_id());
ihk_mc_free_pages(pt, 1); ihk_mc_free_pages(pt, 1);
} }
@@ -1260,8 +1275,10 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
return walk_pte_l3(pt, base, start, end, &clear_range_l3, args0); return walk_pte_l3(pt, base, start, end, &clear_range_l3, args0);
} }
static int clear_range(struct page_table *pt, struct process_vm *vm, #define TLB_INVALID_ARRAY_PAGES (4)
uintptr_t start, uintptr_t end, int free_physical,
static int clear_range(struct page_table *pt, struct process_vm *vm,
uintptr_t start, uintptr_t end, int free_physical,
struct memobj *memobj) struct memobj *memobj)
{ {
int error; int error;
@@ -1276,6 +1293,17 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
return -EINVAL; return -EINVAL;
} }
/* TODO: embedd this in tlb_flush_entry? */
args.addr = (unsigned long *)ihk_mc_alloc_pages(
TLB_INVALID_ARRAY_PAGES, IHK_MC_AP_CRITICAL);
if (!args.addr) {
ekprintf("%s: error: allocating address array\n", __FUNCTION__);
return -ENOMEM;
}
args.nr_addr = 0;
args.max_nr_addr = (TLB_INVALID_ARRAY_PAGES * PAGE_SIZE /
sizeof(uint64_t));
args.free_physical = free_physical; args.free_physical = free_physical;
if (memobj && (memobj->flags & MF_DEV_FILE)) { if (memobj && (memobj->flags & MF_DEV_FILE)) {
args.free_physical = 0; args.free_physical = 0;
@@ -1287,6 +1315,13 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
args.vm = vm; args.vm = vm;
error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args); error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args);
if (args.nr_addr) {
remote_flush_tlb_array_cpumask(vm, args.addr, args.nr_addr,
ihk_mc_get_processor_id());
}
ihk_mc_free_pages(args.addr, TLB_INVALID_ARRAY_PAGES);
return error; return error;
} }

View File

@@ -885,86 +885,98 @@ void coredump(struct thread *thread, void *regs)
freecore(&coretable); freecore(&coretable);
} }
void remote_flush_tlb_cpumask(struct process_vm *vm, void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id) unsigned long addr, int cpu_id)
{
unsigned long __addr = addr;
return remote_flush_tlb_array_cpumask(vm, &__addr, 1, cpu_id);
}
void remote_flush_tlb_array_cpumask(struct process_vm *vm,
unsigned long *addr,
int nr_addr,
int cpu_id)
{ {
unsigned long cpu; unsigned long cpu;
int flush_ind; int flush_ind;
struct tlb_flush_entry *flush_entry; struct tlb_flush_entry *flush_entry;
cpu_set_t _cpu_set; cpu_set_t _cpu_set;
if (addr) { if (addr[0]) {
flush_ind = (addr >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; flush_ind = (addr[0] >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE;
} }
/* Zero address denotes full TLB flush */ /* Zero address denotes full TLB flush */
else { else {
/* Random.. */ /* Random.. */
flush_ind = (rdtsc()) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE; flush_ind = (rdtsc()) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE;
} }
flush_entry = &tlb_flush_vector[flush_ind]; flush_entry = &tlb_flush_vector[flush_ind];
/* Take a copy of the cpu set so that we don't hold the lock /* Take a copy of the cpu set so that we don't hold the lock
* all the way while interrupting other cores */ * all the way while interrupting other cores */
ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock); ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock);
memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t)); memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t));
ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock); ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock);
dkprintf("trying to aquire flush_entry->lock flush_ind: %d\n", flush_ind); dkprintf("trying to aquire flush_entry->lock flush_ind: %d\n", flush_ind);
ihk_mc_spinlock_lock_noirq(&flush_entry->lock); ihk_mc_spinlock_lock_noirq(&flush_entry->lock);
flush_entry->vm = vm; flush_entry->vm = vm;
flush_entry->addr = addr; flush_entry->addr = addr;
flush_entry->nr_addr = nr_addr;
ihk_atomic_set(&flush_entry->pending, 0); ihk_atomic_set(&flush_entry->pending, 0);
dkprintf("lock aquired, iterating cpu mask.. flush_ind: %d\n", flush_ind); dkprintf("lock aquired, iterating cpu mask.. flush_ind: %d\n", flush_ind);
/* Loop through CPUs in this address space and interrupt them for /* Loop through CPUs in this address space and interrupt them for
* TLB flush on the specified address */ * TLB flush on the specified address */
for_each_set_bit(cpu, (const unsigned long*)&_cpu_set.__bits, CPU_SETSIZE) { for_each_set_bit(cpu, (const unsigned long*)&_cpu_set.__bits, CPU_SETSIZE) {
if (ihk_mc_get_processor_id() == cpu) if (ihk_mc_get_processor_id() == cpu)
continue; continue;
ihk_atomic_inc(&flush_entry->pending); ihk_atomic_inc(&flush_entry->pending);
dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n", dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n",
flush_ind, addr, cpu); flush_ind, addr, cpu);
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id, ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id,
flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START); flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START);
} }
#ifdef DEBUG_IC_TLB #ifdef DEBUG_IC_TLB
{ {
unsigned long tsc; unsigned long tsc;
tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */ tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */
#endif #endif
if (flush_entry->addr) { if (flush_entry->addr[0]) {
flush_tlb_single(flush_entry->addr & PAGE_MASK); int i;
for (i = 0; i < flush_entry->nr_addr; ++i) {
flush_tlb_single(flush_entry->addr[i] & PAGE_MASK);
}
} }
/* Zero address denotes full TLB flush */ /* Zero address denotes full TLB flush */
else { else {
flush_tlb(); flush_tlb();
} }
/* Flush on this core */
flush_tlb_single(addr & PAGE_MASK);
/* Wait for all cores */ /* Wait for all cores */
while (ihk_atomic_read(&flush_entry->pending) != 0) { while (ihk_atomic_read(&flush_entry->pending) != 0) {
cpu_pause(); cpu_pause();
#ifdef DEBUG_IC_TLB #ifdef DEBUG_IC_TLB
if (rdtsc() > tsc) { if (rdtsc() > tsc) {
kprintf("waited 10 secs for remote TLB!! -> panic_all()\n"); kprintf("waited 10 secs for remote TLB!! -> panic_all()\n");
panic_all_cores("waited 10 secs for remote TLB!!\n"); panic_all_cores("waited 10 secs for remote TLB!!\n");
} }
#endif #endif
} }
#ifdef DEBUG_IC_TLB #ifdef DEBUG_IC_TLB
} }
#endif #endif
ihk_mc_spinlock_unlock_noirq(&flush_entry->lock); ihk_mc_spinlock_unlock_noirq(&flush_entry->lock);
} }
@@ -975,25 +987,27 @@ void tlb_flush_handler(int vector)
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
int flags = cpu_disable_interrupt_save(); int flags = cpu_disable_interrupt_save();
struct tlb_flush_entry *flush_entry = &tlb_flush_vector[vector - struct tlb_flush_entry *flush_entry = &tlb_flush_vector[vector -
IHK_TLB_FLUSH_IRQ_VECTOR_START]; IHK_TLB_FLUSH_IRQ_VECTOR_START];
dkprintf("decreasing pending cnt for %d\n",
vector - IHK_TLB_FLUSH_IRQ_VECTOR_START);
/* Decrease counter */ if (flush_entry->addr[0]) {
ihk_atomic_dec(&flush_entry->pending); int i;
dkprintf("flusing TLB for addr: 0x%lX\n", flush_entry->addr); for (i = 0; i < flush_entry->nr_addr; ++i) {
flush_tlb_single(flush_entry->addr[i] & PAGE_MASK);
if (flush_entry->addr) { dkprintf("flusing TLB for addr: 0x%lX\n", flush_entry->addr[i]);
flush_tlb_single(flush_entry->addr & PAGE_MASK); }
} }
/* Zero address denotes full TLB flush */ /* Zero address denotes full TLB flush */
else { else {
flush_tlb(); flush_tlb();
} }
/* Decrease counter */
dkprintf("decreasing pending cnt for %d\n",
vector - IHK_TLB_FLUSH_IRQ_VECTOR_START);
ihk_atomic_dec(&flush_entry->pending);
cpu_restore_interrupt(flags); cpu_restore_interrupt(flags);
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
{ {

View File

@@ -215,6 +215,10 @@ int ihk_mc_get_memory_chunk(int id,
void remote_flush_tlb_cpumask(struct process_vm *vm, void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id); unsigned long addr, int cpu_id);
void remote_flush_tlb_array_cpumask(struct process_vm *vm,
unsigned long *addr,
int nr_addr,
int cpu_id);
int ihk_set_kmsg(unsigned long addr, unsigned long size); int ihk_set_kmsg(unsigned long addr, unsigned long size);
char *ihk_get_kargs(); char *ihk_get_kargs();
@@ -226,7 +230,8 @@ extern void (*__tlb_flush_handler)(int vector);
struct tlb_flush_entry { struct tlb_flush_entry {
struct process_vm *vm; struct process_vm *vm;
unsigned long addr; unsigned long *addr;
int nr_addr;
ihk_atomic_t pending; ihk_atomic_t pending;
ihk_spinlock_t lock; ihk_spinlock_t lock;
} __attribute__((aligned(64))); } __attribute__((aligned(64)));