mem: per-CPU allocator cache (ThunderX2 workaround)

Change-Id: I7694524c5e9674a6f7bfcd911f8b0dbbead7df5a
This commit is contained in:
Balazs Gerofi
2019-05-10 08:29:52 +09:00
committed by Masamichi Takagi
parent 239c95449b
commit 99fba2df1c
3 changed files with 41 additions and 0 deletions

View File

@@ -42,9 +42,11 @@ void cpu_local_var_init(void)
clv[i].monitor = monitor->cpu + i; clv[i].monitor = monitor->cpu + i;
clv[i].rusage = rusage.cpu + i; clv[i].rusage = rusage.cpu + i;
INIT_LIST_HEAD(&clv[i].smp_func_req_list); INIT_LIST_HEAD(&clv[i].smp_func_req_list);
clv[i].free_chunks.rb_node = NULL;
} }
cpu_local_var_initialized = 1; cpu_local_var_initialized = 1;
smp_mb();
} }
struct cpu_local_var *get_cpu_local_var(int id) struct cpu_local_var *get_cpu_local_var(int id)

View File

@@ -103,8 +103,11 @@ struct cpu_local_var {
/* UTI */ /* UTI */
void *uti_futex_resp; void *uti_futex_resp;
/* Per-CPU memory allocator cache */
struct rb_root free_chunks;
} __attribute__((aligned(64))); } __attribute__((aligned(64)));
extern int cpu_local_var_initialized;
struct cpu_local_var *get_cpu_local_var(int id); struct cpu_local_var *get_cpu_local_var(int id);
static struct cpu_local_var *get_this_cpu_local_var(void) static struct cpu_local_var *get_this_cpu_local_var(void)

View File

@@ -19,6 +19,7 @@
#include <memory.h> #include <memory.h>
#include <bitops.h> #include <bitops.h>
#include <errno.h> #include <errno.h>
#include <cls.h>
//#define DEBUG_PRINT_PAGE_ALLOC //#define DEBUG_PRINT_PAGE_ALLOC
@@ -608,6 +609,22 @@ unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
unsigned long addr = 0; unsigned long addr = 0;
mcs_lock_node_t mcs_node; mcs_lock_node_t mcs_node;
/* Check CPU local cache first */
if (cpu_local_var_initialized) {
unsigned long irqflags;
irqflags = cpu_disable_interrupt_save();
addr = __page_alloc_rbtree_alloc_pages(&cpu_local_var(free_chunks),
npages, p2align);
cpu_restore_interrupt(irqflags);
if (addr) {
dkprintf("%s: 0x%lx:%d allocated from cache\n",
__func__, addr, npages);
return addr;
}
}
mcs_lock_lock(&node->lock, &mcs_node); mcs_lock_lock(&node->lock, &mcs_node);
if (node->nr_free_pages < npages) { if (node->nr_free_pages < npages) {
@@ -635,6 +652,25 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node,
{ {
mcs_lock_node_t mcs_node; mcs_lock_node_t mcs_node;
/* CPU local cache */
if (cpu_local_var_initialized) {
unsigned long irqflags;
irqflags = cpu_disable_interrupt_save();
if (__page_alloc_rbtree_free_range(&cpu_local_var(free_chunks), addr,
npages << PAGE_SHIFT)) {
kprintf("%s: ERROR: freeing 0x%lx:%lu to CPU local cache\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
cpu_restore_interrupt(irqflags);
}
else {
dkprintf("%s: 0x%lx:%d freed to cache\n",
__func__, addr, npages);
cpu_restore_interrupt(irqflags);
return;
}
}
if (addr < node->min_addr || if (addr < node->min_addr ||
(addr + (npages << PAGE_SHIFT)) > node->max_addr) { (addr + (npages << PAGE_SHIFT)) > node->max_addr) {
return; return;