NUMA: support multiple physical allocators

This commit is contained in:
Balazs Gerofi
2016-09-15 19:15:19 -04:00
parent f4db8b96de
commit 2929fbb803
6 changed files with 160 additions and 62 deletions

View File

@@ -306,7 +306,7 @@ struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
void *early_alloc_page(void);
void *early_alloc_pages(int nr_pages);
void *get_last_early_heap(void);
void flush_tlb(void);
void flush_tlb_single(unsigned long addr);

View File

@@ -35,7 +35,7 @@ static struct ihk_mc_pa_ops *pa_ops;
extern unsigned long x86_kernel_phys_base;
void *early_alloc_page(void)
void *early_alloc_pages(int nr_pages)
{
void *p;
@@ -48,7 +48,7 @@ void *early_alloc_page(void)
panic("Early allocator is already finalized. Do not use it.\n");
}
p = last_page;
last_page += PAGE_SIZE;
last_page += (nr_pages * PAGE_SIZE);
return p;
}
@@ -58,7 +58,7 @@ void *arch_alloc_page(enum ihk_mc_ap_flag flag)
if (pa_ops)
return pa_ops->alloc_page(1, PAGE_P2ALIGN, flag);
else
return early_alloc_page();
return early_alloc_pages(1);
}
void arch_free_page(void *ptr)
{
@@ -2111,18 +2111,24 @@ void init_page_table(void)
}
extern void __reserve_arch_pages(unsigned long, unsigned long,
void (*)(unsigned long, unsigned long, int));
void (*)(struct ihk_page_allocator_desc *,
unsigned long, unsigned long, int));
void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end,
void (*cb)(unsigned long, unsigned long, int))
void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator,
unsigned long start, unsigned long end,
void (*cb)(struct ihk_page_allocator_desc *,
unsigned long, unsigned long, int))
{
/* Reserve Text + temporal heap */
cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
cb(pa_allocator, virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
/* Reserve trampoline area to boot the second ap */
cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
cb(pa_allocator, ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
/* Reserve the null page */
cb(0, PAGE_SIZE, 0);
/* Micro-arch specific */
cb(pa_allocator, 0, PAGE_SIZE, 0);
/*
* Micro-arch specific
* TODO: this does nothing in SMP mode, update it for KNC if necessary
*/
__reserve_arch_pages(start, end, cb);
}

View File

@@ -48,8 +48,8 @@
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
static struct ihk_page_allocator_desc *pa_allocator;
static unsigned long pa_start, pa_end;
static struct ihk_mc_numa_node *memory_nodes = NULL;
extern void unhandled_page_fault(struct thread *, void *, void *);
extern int interrupt_from_user(void *);
@@ -58,13 +58,14 @@ struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
static void reserve_pages(unsigned long start, unsigned long end, int type)
static void reserve_pages(struct ihk_page_allocator_desc *pa_allocator,
unsigned long start, unsigned long end, int type)
{
if (start < pa_start) {
if (start < pa_allocator->start) {
start = pa_allocator->start;
}
if (end > pa_end) {
end = pa_allocator->last;
if (end > pa_allocator->end) {
end = pa_allocator->end;
}
if (start >= end) {
return;
@@ -77,7 +78,22 @@ static void reserve_pages(unsigned long start, unsigned long end, int type)
static void *allocate_aligned_pages(int npages, int p2align,
enum ihk_mc_ap_flag flag)
{
unsigned long pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
unsigned long pa;
int i;
/* TODO: match NUMA id and distance matrix with allocating core */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
if (pa) break;
}
if (pa) break;
}
/* all_pagealloc_alloc returns zero when error occured,
and callee (in mcos/kernel/process.c) so propagate it */
if(pa)
@@ -92,6 +108,29 @@ static void *allocate_pages(int npages, enum ihk_mc_ap_flag flag)
return allocate_aligned_pages(npages, PAGE_P2ALIGN, flag);
}
static void __free_pages_in_allocator(void *va, int npages)
{
int i;
unsigned long pa_start = virt_to_phys(va);
unsigned long pa_end = pa_start + (npages * PAGE_SIZE);
/* Find corresponding memory allocator */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
if (pa_start >= pa_allocator->start &&
pa_end <= pa_allocator->end) {
ihk_pagealloc_free(pa_allocator, pa_start, npages);
return;
}
}
}
}
static void free_pages(void *va, int npages)
{
struct list_head *pendings = &cpu_local_var(pending_free_pages);
@@ -110,7 +149,7 @@ static void free_pages(void *va, int npages)
}
}
ihk_pagealloc_free(pa_allocator, virt_to_phys(va), npages);
__free_pages_in_allocator(va, npages);
}
void begin_free_pages_pending(void) {
@@ -139,7 +178,8 @@ void finish_free_pages_pending(void)
}
page->mode = PM_NONE;
list_del(&page->list);
ihk_pagealloc_free(pa_allocator, page_to_phys(page), page->offset);
__free_pages_in_allocator(phys_to_virt(page_to_phys(page)),
page->offset);
}
pendings->next = pendings->prev = NULL;
@@ -155,9 +195,22 @@ void sbox_write(int offset, unsigned int value);
static void query_free_mem_interrupt_handler(void *priv)
{
int pages = ihk_pagealloc_query_free(pa_allocator);
kprintf("McKernel free pages: %d\n", pages);
int i, pages = 0;
/* Iterate memory allocators */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
int __pages = ihk_pagealloc_query_free(pa_allocator);
kprintf("McKernel free pages in (0x%lx - 0x%lx): %d\n",
pa_allocator->start, pa_allocator->end, __pages);
pages += __pages;
}
}
kprintf("McKernel free pages in total: %d\n", pages);
if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void);
@@ -390,81 +443,97 @@ out:
return;
}
static void page_allocator_init(uint64_t start, uint64_t end, int initial)
static struct ihk_page_allocator_desc *page_allocator_init(uint64_t start,
uint64_t end, int initial)
{
struct ihk_page_allocator_desc *pa_allocator;
unsigned long page_map_pa, pages;
void *page_map;
unsigned int i;
start &= PAGE_MASK;
pa_start = start & LARGE_PAGE_MASK;
pa_end = (end + PAGE_SIZE - 1) & PAGE_MASK;
pa_start = (start + PAGE_SIZE - 1) & PAGE_MASK;
pa_end = end & PAGE_MASK;
#ifndef ATTACHED_MIC
page_map_pa = ihk_mc_get_memory_address(IHK_MC_GMA_HEAP_START, 0);
#else
#ifdef ATTACHED_MIC
/*
* Can't allocate in reserved area
* TODO: figure this out automatically!
*/
page_map_pa = 0x100000;
#else
page_map_pa = initial ? virt_to_phys(get_last_early_heap()) : pa_start;
#endif
page_map = phys_to_virt(page_map_pa);
pa_allocator = __ihk_pagealloc_init(pa_start, pa_end - pa_start,
PAGE_SIZE, page_map, &pages);
reserve_pages(page_map_pa, page_map_pa + pages * PAGE_SIZE, 0);
reserve_pages(pa_allocator, page_map_pa,
page_map_pa + pages * PAGE_SIZE, 0);
if (pa_start < start) {
reserve_pages(pa_start, start, 0);
reserve_pages(pa_allocator, pa_start, start, 0);
}
/* BIOS reserved ranges */
for (i = 1; i <= ihk_mc_get_memory_address(IHK_MC_NR_RESERVED_AREAS, 0);
++i) {
reserve_pages(ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_START, i),
ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_END, i), 0);
reserve_pages(pa_allocator,
ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_START, i),
ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_END, i), 0);
}
ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages);
ihk_mc_reserve_arch_pages(pa_allocator, pa_start, pa_end, reserve_pages);
kprintf("Available memory: %ld bytes in %ld pages\n",
(ihk_pagealloc_count(pa_allocator) * PAGE_SIZE),
ihk_pagealloc_count(pa_allocator));
/* Notify the ihk to use my page allocator */
ihk_mc_set_page_allocator(&allocator);
/* And prepare some exception handlers */
ihk_mc_set_page_fault_handler(page_fault_handler);
/* Register query free mem handler */
ihk_mc_register_interrupt_handler(ihk_mc_get_vector(IHK_GV_QUERY_FREE_MEM),
&query_free_mem_handler);
return pa_allocator;
}
static void numa_init(void)
{
int i;
int i, j;
struct ihk_mc_numa_node *node;
memory_nodes = early_alloc_pages((sizeof(*memory_nodes) *
ihk_mc_get_nr_numa_nodes() + PAGE_SIZE - 1)
>> PAGE_SHIFT);
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
int linux_numa_id, type;
ihk_mc_get_numa_node(i, &linux_numa_id, &type);
memory_nodes[i].id = i;
memory_nodes[i].linux_numa_id = linux_numa_id;
memory_nodes[i].type = type;
INIT_LIST_HEAD(&memory_nodes[i].allocators);
kprintf("NUMA %d, Linux NUMA id: %d, type: %d\n",
kprintf("NUMA: %d, Linux NUMA: %d, type: %d\n",
i, linux_numa_id, type);
}
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
node = memory_nodes;
for (j = 0; j < ihk_mc_get_nr_memory_chunks(); ++j) {
unsigned long start, end;
int linux_numa_id;
struct ihk_page_allocator_desc *allocator;
ihk_mc_get_memory_chunk(i, &start, &end, &linux_numa_id);
ihk_mc_get_memory_chunk(j, &start, &end, &linux_numa_id);
kprintf("Physical memory region: %p - %p @ Linux NUMA id: %d\n",
start, end, linux_numa_id);
allocator = page_allocator_init(start, end, (j == 0));
while (node->linux_numa_id != linux_numa_id &&
node < (memory_nodes + ihk_mc_get_nr_numa_nodes())) ++node;
if (node == (memory_nodes + ihk_mc_get_nr_numa_nodes())) {
panic("invalid memory chunk: no corresponding NUMA node found\n");
}
list_add_tail(&allocator->list, &node->allocators);
kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n",
start, end,
ihk_pagealloc_count(allocator) * PAGE_SIZE,
ihk_pagealloc_count(allocator),
node->id);
}
}
@@ -705,11 +774,20 @@ void ihk_mc_clean_micpa(void){
void mem_init(void)
{
/* Initialize NUMA information and memory allocator bitmaps */
numa_init();
page_allocator_init(
ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_START, 0),
ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_END, 0), 1);
/* Notify the ihk to use my page allocator */
ihk_mc_set_page_allocator(&allocator);
/* And prepare some exception handlers */
ihk_mc_set_page_fault_handler(page_fault_handler);
/* Register query free mem handler */
ihk_mc_register_interrupt_handler(ihk_mc_get_vector(IHK_GV_QUERY_FREE_MEM),
&query_free_mem_handler);
/* Init page frame hash */
page_init();
/* Prepare the kernel virtual map space */

View File

@@ -72,8 +72,11 @@ struct ihk_mc_memory_node {
unsigned long ihk_mc_get_memory_address(enum ihk_mc_gma_type, int);
void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end,
void (*cb)(unsigned long, unsigned long, int));
struct ihk_page_allocator_desc *pa_allocator;
void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator,
unsigned long start, unsigned long end,
void (*cb)(struct ihk_page_allocator_desc *,
unsigned long, unsigned long, int));
struct ihk_mc_pa_ops {
void *(*alloc_page)(int, int, enum ihk_mc_ap_flag);

View File

@@ -14,14 +14,24 @@
#ifndef __HEADER_GENERIC_IHK_PAGE_ALLOC
#define __HEADER_GENERIC_IHK_PAGE_ALLOC
#include <list.h>
/* XXX: Physical memory management shouldn't be part of IHK */
struct ihk_mc_numa_node {
int id;
int linux_numa_id;
int type;
struct list_head allocators;
};
struct ihk_page_allocator_desc {
unsigned long start;
unsigned long start, end;
unsigned int last;
unsigned int count;
unsigned int flag;
unsigned int shift;
ihk_spinlock_t lock;
unsigned int pad;
struct list_head list;
unsigned long map[0];
};

View File

@@ -64,13 +64,14 @@ void *__ihk_pagealloc_init(unsigned long start, unsigned long size,
memset(desc, 0, descsize * PAGE_SIZE);
desc->start = start;
desc->end = start + size;
desc->last = 0;
desc->count = mapaligned >> 3;
desc->shift = page_shift;
desc->flag = flag;
kprintf("Page allocator: %lx - %lx (%d)\n", start, start + size,
page_shift);
//kprintf("page allocator @ %lx - %lx (%d)\n", start, start + size,
// page_shift);
ihk_mc_spinlock_init(&desc->lock);