diff --git a/arch/x86/kernel/include/arch-memory.h b/arch/x86/kernel/include/arch-memory.h index 47ebbb40..2997ca9d 100644 --- a/arch/x86/kernel/include/arch-memory.h +++ b/arch/x86/kernel/include/arch-memory.h @@ -306,7 +306,7 @@ struct page_table; void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr); pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr); -void *early_alloc_page(void); +void *early_alloc_pages(int nr_pages); void *get_last_early_heap(void); void flush_tlb(void); void flush_tlb_single(unsigned long addr); diff --git a/arch/x86/kernel/memory.c b/arch/x86/kernel/memory.c index 08c23cee..6b8f9380 100644 --- a/arch/x86/kernel/memory.c +++ b/arch/x86/kernel/memory.c @@ -35,7 +35,7 @@ static struct ihk_mc_pa_ops *pa_ops; extern unsigned long x86_kernel_phys_base; -void *early_alloc_page(void) +void *early_alloc_pages(int nr_pages) { void *p; @@ -48,7 +48,7 @@ void *early_alloc_page(void) panic("Early allocator is already finalized. Do not use it.\n"); } p = last_page; - last_page += PAGE_SIZE; + last_page += (nr_pages * PAGE_SIZE); return p; } @@ -58,7 +58,7 @@ void *arch_alloc_page(enum ihk_mc_ap_flag flag) if (pa_ops) return pa_ops->alloc_page(1, PAGE_P2ALIGN, flag); else - return early_alloc_page(); + return early_alloc_pages(1); } void arch_free_page(void *ptr) { @@ -2111,18 +2111,24 @@ void init_page_table(void) } extern void __reserve_arch_pages(unsigned long, unsigned long, - void (*)(unsigned long, unsigned long, int)); + void (*)(struct ihk_page_allocator_desc *, + unsigned long, unsigned long, int)); -void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end, - void (*cb)(unsigned long, unsigned long, int)) +void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator, + unsigned long start, unsigned long end, + void (*cb)(struct ihk_page_allocator_desc *, + unsigned long, unsigned long, int)) { /* Reserve Text + temporal heap */ - cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0); + cb(pa_allocator, virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0); /* Reserve trampoline area to boot the second ap */ - cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0); + cb(pa_allocator, ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0); /* Reserve the null page */ - cb(0, PAGE_SIZE, 0); - /* Micro-arch specific */ + cb(pa_allocator, 0, PAGE_SIZE, 0); + /* + * Micro-arch specific + * TODO: this does nothing in SMP mode, update it for KNC if necessary + */ __reserve_arch_pages(start, end, cb); } diff --git a/kernel/mem.c b/kernel/mem.c index 5f4ef85c..968548fe 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -48,8 +48,8 @@ #define ekprintf(...) kprintf(__VA_ARGS__) #endif -static struct ihk_page_allocator_desc *pa_allocator; static unsigned long pa_start, pa_end; +static struct ihk_mc_numa_node *memory_nodes = NULL; extern void unhandled_page_fault(struct thread *, void *, void *); extern int interrupt_from_user(void *); @@ -58,13 +58,14 @@ struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE]; int anon_on_demand = 0; -static void reserve_pages(unsigned long start, unsigned long end, int type) +static void reserve_pages(struct ihk_page_allocator_desc *pa_allocator, + unsigned long start, unsigned long end, int type) { - if (start < pa_start) { + if (start < pa_allocator->start) { start = pa_allocator->start; } - if (end > pa_end) { - end = pa_allocator->last; + if (end > pa_allocator->end) { + end = pa_allocator->end; } if (start >= end) { return; @@ -77,7 +78,22 @@ static void reserve_pages(unsigned long start, unsigned long end, int type) static void *allocate_aligned_pages(int npages, int p2align, enum ihk_mc_ap_flag flag) { - unsigned long pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); + unsigned long pa; + int i; + + /* TODO: match NUMA id and distance matrix with allocating core */ + for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { + struct ihk_page_allocator_desc *pa_allocator; + + list_for_each_entry(pa_allocator, + &memory_nodes[i].allocators, list) { + pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); + + if (pa) break; + } + + if (pa) break; + } /* all_pagealloc_alloc returns zero when error occured, and callee (in mcos/kernel/process.c) so propagate it */ if(pa) @@ -92,6 +108,29 @@ static void *allocate_pages(int npages, enum ihk_mc_ap_flag flag) return allocate_aligned_pages(npages, PAGE_P2ALIGN, flag); } +static void __free_pages_in_allocator(void *va, int npages) +{ + int i; + unsigned long pa_start = virt_to_phys(va); + unsigned long pa_end = pa_start + (npages * PAGE_SIZE); + + /* Find corresponding memory allocator */ + for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { + struct ihk_page_allocator_desc *pa_allocator; + + list_for_each_entry(pa_allocator, + &memory_nodes[i].allocators, list) { + + if (pa_start >= pa_allocator->start && + pa_end <= pa_allocator->end) { + ihk_pagealloc_free(pa_allocator, pa_start, npages); + return; + } + } + } +} + + static void free_pages(void *va, int npages) { struct list_head *pendings = &cpu_local_var(pending_free_pages); @@ -110,7 +149,7 @@ static void free_pages(void *va, int npages) } } - ihk_pagealloc_free(pa_allocator, virt_to_phys(va), npages); + __free_pages_in_allocator(va, npages); } void begin_free_pages_pending(void) { @@ -139,7 +178,8 @@ void finish_free_pages_pending(void) } page->mode = PM_NONE; list_del(&page->list); - ihk_pagealloc_free(pa_allocator, page_to_phys(page), page->offset); + __free_pages_in_allocator(phys_to_virt(page_to_phys(page)), + page->offset); } pendings->next = pendings->prev = NULL; @@ -155,9 +195,22 @@ void sbox_write(int offset, unsigned int value); static void query_free_mem_interrupt_handler(void *priv) { - int pages = ihk_pagealloc_query_free(pa_allocator); - - kprintf("McKernel free pages: %d\n", pages); + int i, pages = 0; + + /* Iterate memory allocators */ + for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { + struct ihk_page_allocator_desc *pa_allocator; + + list_for_each_entry(pa_allocator, + &memory_nodes[i].allocators, list) { + int __pages = ihk_pagealloc_query_free(pa_allocator); + kprintf("McKernel free pages in (0x%lx - 0x%lx): %d\n", + pa_allocator->start, pa_allocator->end, __pages); + pages += __pages; + } + } + + kprintf("McKernel free pages in total: %d\n", pages); if (find_command_line("memdebug")) { extern void kmalloc_memcheck(void); @@ -390,81 +443,97 @@ out: return; } -static void page_allocator_init(uint64_t start, uint64_t end, int initial) +static struct ihk_page_allocator_desc *page_allocator_init(uint64_t start, + uint64_t end, int initial) { + struct ihk_page_allocator_desc *pa_allocator; unsigned long page_map_pa, pages; void *page_map; unsigned int i; start &= PAGE_MASK; - pa_start = start & LARGE_PAGE_MASK; - pa_end = (end + PAGE_SIZE - 1) & PAGE_MASK; + pa_start = (start + PAGE_SIZE - 1) & PAGE_MASK; + pa_end = end & PAGE_MASK; -#ifndef ATTACHED_MIC - page_map_pa = ihk_mc_get_memory_address(IHK_MC_GMA_HEAP_START, 0); -#else +#ifdef ATTACHED_MIC /* * Can't allocate in reserved area * TODO: figure this out automatically! */ page_map_pa = 0x100000; +#else + page_map_pa = initial ? virt_to_phys(get_last_early_heap()) : pa_start; #endif + page_map = phys_to_virt(page_map_pa); pa_allocator = __ihk_pagealloc_init(pa_start, pa_end - pa_start, PAGE_SIZE, page_map, &pages); - reserve_pages(page_map_pa, page_map_pa + pages * PAGE_SIZE, 0); + reserve_pages(pa_allocator, page_map_pa, + page_map_pa + pages * PAGE_SIZE, 0); + if (pa_start < start) { - reserve_pages(pa_start, start, 0); + reserve_pages(pa_allocator, pa_start, start, 0); } /* BIOS reserved ranges */ for (i = 1; i <= ihk_mc_get_memory_address(IHK_MC_NR_RESERVED_AREAS, 0); ++i) { - - reserve_pages(ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_START, i), - ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_END, i), 0); + reserve_pages(pa_allocator, + ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_START, i), + ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_END, i), 0); } - ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages); + ihk_mc_reserve_arch_pages(pa_allocator, pa_start, pa_end, reserve_pages); - kprintf("Available memory: %ld bytes in %ld pages\n", - (ihk_pagealloc_count(pa_allocator) * PAGE_SIZE), - ihk_pagealloc_count(pa_allocator)); - - /* Notify the ihk to use my page allocator */ - ihk_mc_set_page_allocator(&allocator); - - /* And prepare some exception handlers */ - ihk_mc_set_page_fault_handler(page_fault_handler); - - /* Register query free mem handler */ - ihk_mc_register_interrupt_handler(ihk_mc_get_vector(IHK_GV_QUERY_FREE_MEM), - &query_free_mem_handler); + return pa_allocator; } static void numa_init(void) { - int i; + int i, j; + struct ihk_mc_numa_node *node; + memory_nodes = early_alloc_pages((sizeof(*memory_nodes) * + ihk_mc_get_nr_numa_nodes() + PAGE_SIZE - 1) + >> PAGE_SHIFT); for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { int linux_numa_id, type; ihk_mc_get_numa_node(i, &linux_numa_id, &type); + memory_nodes[i].id = i; + memory_nodes[i].linux_numa_id = linux_numa_id; + memory_nodes[i].type = type; + INIT_LIST_HEAD(&memory_nodes[i].allocators); - kprintf("NUMA %d, Linux NUMA id: %d, type: %d\n", + kprintf("NUMA: %d, Linux NUMA: %d, type: %d\n", i, linux_numa_id, type); } - for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { + node = memory_nodes; + for (j = 0; j < ihk_mc_get_nr_memory_chunks(); ++j) { unsigned long start, end; int linux_numa_id; + struct ihk_page_allocator_desc *allocator; - ihk_mc_get_memory_chunk(i, &start, &end, &linux_numa_id); + ihk_mc_get_memory_chunk(j, &start, &end, &linux_numa_id); - kprintf("Physical memory region: %p - %p @ Linux NUMA id: %d\n", - start, end, linux_numa_id); + allocator = page_allocator_init(start, end, (j == 0)); + while (node->linux_numa_id != linux_numa_id && + node < (memory_nodes + ihk_mc_get_nr_numa_nodes())) ++node; + + if (node == (memory_nodes + ihk_mc_get_nr_numa_nodes())) { + panic("invalid memory chunk: no corresponding NUMA node found\n"); + } + + list_add_tail(&allocator->list, &node->allocators); + + kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n", + start, end, + ihk_pagealloc_count(allocator) * PAGE_SIZE, + ihk_pagealloc_count(allocator), + node->id); } } @@ -705,11 +774,20 @@ void ihk_mc_clean_micpa(void){ void mem_init(void) { + /* Initialize NUMA information and memory allocator bitmaps */ numa_init(); - page_allocator_init( - ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_START, 0), - ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_END, 0), 1); + /* Notify the ihk to use my page allocator */ + ihk_mc_set_page_allocator(&allocator); + + /* And prepare some exception handlers */ + ihk_mc_set_page_fault_handler(page_fault_handler); + + /* Register query free mem handler */ + ihk_mc_register_interrupt_handler(ihk_mc_get_vector(IHK_GV_QUERY_FREE_MEM), + &query_free_mem_handler); + + /* Init page frame hash */ page_init(); /* Prepare the kernel virtual map space */ diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 998b33a4..88265eab 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -72,8 +72,11 @@ struct ihk_mc_memory_node { unsigned long ihk_mc_get_memory_address(enum ihk_mc_gma_type, int); -void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end, - void (*cb)(unsigned long, unsigned long, int)); +struct ihk_page_allocator_desc *pa_allocator; +void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator, + unsigned long start, unsigned long end, + void (*cb)(struct ihk_page_allocator_desc *, + unsigned long, unsigned long, int)); struct ihk_mc_pa_ops { void *(*alloc_page)(int, int, enum ihk_mc_ap_flag); diff --git a/lib/include/ihk/page_alloc.h b/lib/include/ihk/page_alloc.h index fe7ee473..26c83d48 100644 --- a/lib/include/ihk/page_alloc.h +++ b/lib/include/ihk/page_alloc.h @@ -14,14 +14,24 @@ #ifndef __HEADER_GENERIC_IHK_PAGE_ALLOC #define __HEADER_GENERIC_IHK_PAGE_ALLOC +#include + +/* XXX: Physical memory management shouldn't be part of IHK */ +struct ihk_mc_numa_node { + int id; + int linux_numa_id; + int type; + struct list_head allocators; +}; + struct ihk_page_allocator_desc { - unsigned long start; + unsigned long start, end; unsigned int last; unsigned int count; unsigned int flag; unsigned int shift; ihk_spinlock_t lock; - unsigned int pad; + struct list_head list; unsigned long map[0]; }; diff --git a/lib/page_alloc.c b/lib/page_alloc.c index ae6eebdd..06802a05 100644 --- a/lib/page_alloc.c +++ b/lib/page_alloc.c @@ -64,13 +64,14 @@ void *__ihk_pagealloc_init(unsigned long start, unsigned long size, memset(desc, 0, descsize * PAGE_SIZE); desc->start = start; + desc->end = start + size; desc->last = 0; desc->count = mapaligned >> 3; desc->shift = page_shift; desc->flag = flag; - kprintf("Page allocator: %lx - %lx (%d)\n", start, start + size, - page_shift); + //kprintf("page allocator @ %lx - %lx (%d)\n", start, start + size, + // page_shift); ihk_mc_spinlock_init(&desc->lock);