NUMA: support multiple physical allocators

This commit is contained in:
Balazs Gerofi
2016-09-15 19:15:19 -04:00
parent f4db8b96de
commit 2929fbb803
6 changed files with 160 additions and 62 deletions

View File

@ -306,7 +306,7 @@ struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr); void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr); pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
void *early_alloc_page(void); void *early_alloc_pages(int nr_pages);
void *get_last_early_heap(void); void *get_last_early_heap(void);
void flush_tlb(void); void flush_tlb(void);
void flush_tlb_single(unsigned long addr); void flush_tlb_single(unsigned long addr);

View File

@ -35,7 +35,7 @@ static struct ihk_mc_pa_ops *pa_ops;
extern unsigned long x86_kernel_phys_base; extern unsigned long x86_kernel_phys_base;
void *early_alloc_page(void) void *early_alloc_pages(int nr_pages)
{ {
void *p; void *p;
@ -48,7 +48,7 @@ void *early_alloc_page(void)
panic("Early allocator is already finalized. Do not use it.\n"); panic("Early allocator is already finalized. Do not use it.\n");
} }
p = last_page; p = last_page;
last_page += PAGE_SIZE; last_page += (nr_pages * PAGE_SIZE);
return p; return p;
} }
@ -58,7 +58,7 @@ void *arch_alloc_page(enum ihk_mc_ap_flag flag)
if (pa_ops) if (pa_ops)
return pa_ops->alloc_page(1, PAGE_P2ALIGN, flag); return pa_ops->alloc_page(1, PAGE_P2ALIGN, flag);
else else
return early_alloc_page(); return early_alloc_pages(1);
} }
void arch_free_page(void *ptr) void arch_free_page(void *ptr)
{ {
@ -2111,18 +2111,24 @@ void init_page_table(void)
} }
extern void __reserve_arch_pages(unsigned long, unsigned long, extern void __reserve_arch_pages(unsigned long, unsigned long,
void (*)(unsigned long, unsigned long, int)); void (*)(struct ihk_page_allocator_desc *,
unsigned long, unsigned long, int));
void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end, void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator,
void (*cb)(unsigned long, unsigned long, int)) unsigned long start, unsigned long end,
void (*cb)(struct ihk_page_allocator_desc *,
unsigned long, unsigned long, int))
{ {
/* Reserve Text + temporal heap */ /* Reserve Text + temporal heap */
cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0); cb(pa_allocator, virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
/* Reserve trampoline area to boot the second ap */ /* Reserve trampoline area to boot the second ap */
cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0); cb(pa_allocator, ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
/* Reserve the null page */ /* Reserve the null page */
cb(0, PAGE_SIZE, 0); cb(pa_allocator, 0, PAGE_SIZE, 0);
/* Micro-arch specific */ /*
* Micro-arch specific
* TODO: this does nothing in SMP mode, update it for KNC if necessary
*/
__reserve_arch_pages(start, end, cb); __reserve_arch_pages(start, end, cb);
} }

View File

@ -48,8 +48,8 @@
#define ekprintf(...) kprintf(__VA_ARGS__) #define ekprintf(...) kprintf(__VA_ARGS__)
#endif #endif
static struct ihk_page_allocator_desc *pa_allocator;
static unsigned long pa_start, pa_end; static unsigned long pa_start, pa_end;
static struct ihk_mc_numa_node *memory_nodes = NULL;
extern void unhandled_page_fault(struct thread *, void *, void *); extern void unhandled_page_fault(struct thread *, void *, void *);
extern int interrupt_from_user(void *); extern int interrupt_from_user(void *);
@ -58,13 +58,14 @@ struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0; int anon_on_demand = 0;
static void reserve_pages(unsigned long start, unsigned long end, int type) static void reserve_pages(struct ihk_page_allocator_desc *pa_allocator,
unsigned long start, unsigned long end, int type)
{ {
if (start < pa_start) { if (start < pa_allocator->start) {
start = pa_allocator->start; start = pa_allocator->start;
} }
if (end > pa_end) { if (end > pa_allocator->end) {
end = pa_allocator->last; end = pa_allocator->end;
} }
if (start >= end) { if (start >= end) {
return; return;
@ -77,7 +78,22 @@ static void reserve_pages(unsigned long start, unsigned long end, int type)
static void *allocate_aligned_pages(int npages, int p2align, static void *allocate_aligned_pages(int npages, int p2align,
enum ihk_mc_ap_flag flag) enum ihk_mc_ap_flag flag)
{ {
unsigned long pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); unsigned long pa;
int i;
/* TODO: match NUMA id and distance matrix with allocating core */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align);
if (pa) break;
}
if (pa) break;
}
/* all_pagealloc_alloc returns zero when error occured, /* all_pagealloc_alloc returns zero when error occured,
and callee (in mcos/kernel/process.c) so propagate it */ and callee (in mcos/kernel/process.c) so propagate it */
if(pa) if(pa)
@ -92,6 +108,29 @@ static void *allocate_pages(int npages, enum ihk_mc_ap_flag flag)
return allocate_aligned_pages(npages, PAGE_P2ALIGN, flag); return allocate_aligned_pages(npages, PAGE_P2ALIGN, flag);
} }
static void __free_pages_in_allocator(void *va, int npages)
{
int i;
unsigned long pa_start = virt_to_phys(va);
unsigned long pa_end = pa_start + (npages * PAGE_SIZE);
/* Find corresponding memory allocator */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
if (pa_start >= pa_allocator->start &&
pa_end <= pa_allocator->end) {
ihk_pagealloc_free(pa_allocator, pa_start, npages);
return;
}
}
}
}
static void free_pages(void *va, int npages) static void free_pages(void *va, int npages)
{ {
struct list_head *pendings = &cpu_local_var(pending_free_pages); struct list_head *pendings = &cpu_local_var(pending_free_pages);
@ -110,7 +149,7 @@ static void free_pages(void *va, int npages)
} }
} }
ihk_pagealloc_free(pa_allocator, virt_to_phys(va), npages); __free_pages_in_allocator(va, npages);
} }
void begin_free_pages_pending(void) { void begin_free_pages_pending(void) {
@ -139,7 +178,8 @@ void finish_free_pages_pending(void)
} }
page->mode = PM_NONE; page->mode = PM_NONE;
list_del(&page->list); list_del(&page->list);
ihk_pagealloc_free(pa_allocator, page_to_phys(page), page->offset); __free_pages_in_allocator(phys_to_virt(page_to_phys(page)),
page->offset);
} }
pendings->next = pendings->prev = NULL; pendings->next = pendings->prev = NULL;
@ -155,9 +195,22 @@ void sbox_write(int offset, unsigned int value);
static void query_free_mem_interrupt_handler(void *priv) static void query_free_mem_interrupt_handler(void *priv)
{ {
int pages = ihk_pagealloc_query_free(pa_allocator); int i, pages = 0;
kprintf("McKernel free pages: %d\n", pages); /* Iterate memory allocators */
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
struct ihk_page_allocator_desc *pa_allocator;
list_for_each_entry(pa_allocator,
&memory_nodes[i].allocators, list) {
int __pages = ihk_pagealloc_query_free(pa_allocator);
kprintf("McKernel free pages in (0x%lx - 0x%lx): %d\n",
pa_allocator->start, pa_allocator->end, __pages);
pages += __pages;
}
}
kprintf("McKernel free pages in total: %d\n", pages);
if (find_command_line("memdebug")) { if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void); extern void kmalloc_memcheck(void);
@ -390,81 +443,97 @@ out:
return; return;
} }
static void page_allocator_init(uint64_t start, uint64_t end, int initial) static struct ihk_page_allocator_desc *page_allocator_init(uint64_t start,
uint64_t end, int initial)
{ {
struct ihk_page_allocator_desc *pa_allocator;
unsigned long page_map_pa, pages; unsigned long page_map_pa, pages;
void *page_map; void *page_map;
unsigned int i; unsigned int i;
start &= PAGE_MASK; start &= PAGE_MASK;
pa_start = start & LARGE_PAGE_MASK; pa_start = (start + PAGE_SIZE - 1) & PAGE_MASK;
pa_end = (end + PAGE_SIZE - 1) & PAGE_MASK; pa_end = end & PAGE_MASK;
#ifndef ATTACHED_MIC #ifdef ATTACHED_MIC
page_map_pa = ihk_mc_get_memory_address(IHK_MC_GMA_HEAP_START, 0);
#else
/* /*
* Can't allocate in reserved area * Can't allocate in reserved area
* TODO: figure this out automatically! * TODO: figure this out automatically!
*/ */
page_map_pa = 0x100000; page_map_pa = 0x100000;
#else
page_map_pa = initial ? virt_to_phys(get_last_early_heap()) : pa_start;
#endif #endif
page_map = phys_to_virt(page_map_pa); page_map = phys_to_virt(page_map_pa);
pa_allocator = __ihk_pagealloc_init(pa_start, pa_end - pa_start, pa_allocator = __ihk_pagealloc_init(pa_start, pa_end - pa_start,
PAGE_SIZE, page_map, &pages); PAGE_SIZE, page_map, &pages);
reserve_pages(page_map_pa, page_map_pa + pages * PAGE_SIZE, 0); reserve_pages(pa_allocator, page_map_pa,
page_map_pa + pages * PAGE_SIZE, 0);
if (pa_start < start) { if (pa_start < start) {
reserve_pages(pa_start, start, 0); reserve_pages(pa_allocator, pa_start, start, 0);
} }
/* BIOS reserved ranges */ /* BIOS reserved ranges */
for (i = 1; i <= ihk_mc_get_memory_address(IHK_MC_NR_RESERVED_AREAS, 0); for (i = 1; i <= ihk_mc_get_memory_address(IHK_MC_NR_RESERVED_AREAS, 0);
++i) { ++i) {
reserve_pages(pa_allocator,
reserve_pages(ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_START, i), ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_START, i),
ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_END, i), 0); ihk_mc_get_memory_address(IHK_MC_RESERVED_AREA_END, i), 0);
} }
ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages); ihk_mc_reserve_arch_pages(pa_allocator, pa_start, pa_end, reserve_pages);
kprintf("Available memory: %ld bytes in %ld pages\n", return pa_allocator;
(ihk_pagealloc_count(pa_allocator) * PAGE_SIZE),
ihk_pagealloc_count(pa_allocator));
/* Notify the ihk to use my page allocator */
ihk_mc_set_page_allocator(&allocator);
/* And prepare some exception handlers */
ihk_mc_set_page_fault_handler(page_fault_handler);
/* Register query free mem handler */
ihk_mc_register_interrupt_handler(ihk_mc_get_vector(IHK_GV_QUERY_FREE_MEM),
&query_free_mem_handler);
} }
static void numa_init(void) static void numa_init(void)
{ {
int i; int i, j;
struct ihk_mc_numa_node *node;
memory_nodes = early_alloc_pages((sizeof(*memory_nodes) *
ihk_mc_get_nr_numa_nodes() + PAGE_SIZE - 1)
>> PAGE_SHIFT);
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
int linux_numa_id, type; int linux_numa_id, type;
ihk_mc_get_numa_node(i, &linux_numa_id, &type); ihk_mc_get_numa_node(i, &linux_numa_id, &type);
memory_nodes[i].id = i;
memory_nodes[i].linux_numa_id = linux_numa_id;
memory_nodes[i].type = type;
INIT_LIST_HEAD(&memory_nodes[i].allocators);
kprintf("NUMA %d, Linux NUMA id: %d, type: %d\n", kprintf("NUMA: %d, Linux NUMA: %d, type: %d\n",
i, linux_numa_id, type); i, linux_numa_id, type);
} }
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { node = memory_nodes;
for (j = 0; j < ihk_mc_get_nr_memory_chunks(); ++j) {
unsigned long start, end; unsigned long start, end;
int linux_numa_id; int linux_numa_id;
struct ihk_page_allocator_desc *allocator;
ihk_mc_get_memory_chunk(i, &start, &end, &linux_numa_id); ihk_mc_get_memory_chunk(j, &start, &end, &linux_numa_id);
kprintf("Physical memory region: %p - %p @ Linux NUMA id: %d\n", allocator = page_allocator_init(start, end, (j == 0));
start, end, linux_numa_id); while (node->linux_numa_id != linux_numa_id &&
node < (memory_nodes + ihk_mc_get_nr_numa_nodes())) ++node;
if (node == (memory_nodes + ihk_mc_get_nr_numa_nodes())) {
panic("invalid memory chunk: no corresponding NUMA node found\n");
}
list_add_tail(&allocator->list, &node->allocators);
kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n",
start, end,
ihk_pagealloc_count(allocator) * PAGE_SIZE,
ihk_pagealloc_count(allocator),
node->id);
} }
} }
@ -705,11 +774,20 @@ void ihk_mc_clean_micpa(void){
void mem_init(void) void mem_init(void)
{ {
/* Initialize NUMA information and memory allocator bitmaps */
numa_init(); numa_init();
page_allocator_init(
ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_START, 0),
ihk_mc_get_memory_address(IHK_MC_GMA_AVAIL_END, 0), 1);
/* Notify the ihk to use my page allocator */
ihk_mc_set_page_allocator(&allocator);
/* And prepare some exception handlers */
ihk_mc_set_page_fault_handler(page_fault_handler);
/* Register query free mem handler */
ihk_mc_register_interrupt_handler(ihk_mc_get_vector(IHK_GV_QUERY_FREE_MEM),
&query_free_mem_handler);
/* Init page frame hash */
page_init(); page_init();
/* Prepare the kernel virtual map space */ /* Prepare the kernel virtual map space */

View File

@ -72,8 +72,11 @@ struct ihk_mc_memory_node {
unsigned long ihk_mc_get_memory_address(enum ihk_mc_gma_type, int); unsigned long ihk_mc_get_memory_address(enum ihk_mc_gma_type, int);
void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end, struct ihk_page_allocator_desc *pa_allocator;
void (*cb)(unsigned long, unsigned long, int)); void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator,
unsigned long start, unsigned long end,
void (*cb)(struct ihk_page_allocator_desc *,
unsigned long, unsigned long, int));
struct ihk_mc_pa_ops { struct ihk_mc_pa_ops {
void *(*alloc_page)(int, int, enum ihk_mc_ap_flag); void *(*alloc_page)(int, int, enum ihk_mc_ap_flag);

View File

@ -14,14 +14,24 @@
#ifndef __HEADER_GENERIC_IHK_PAGE_ALLOC #ifndef __HEADER_GENERIC_IHK_PAGE_ALLOC
#define __HEADER_GENERIC_IHK_PAGE_ALLOC #define __HEADER_GENERIC_IHK_PAGE_ALLOC
#include <list.h>
/* XXX: Physical memory management shouldn't be part of IHK */
struct ihk_mc_numa_node {
int id;
int linux_numa_id;
int type;
struct list_head allocators;
};
struct ihk_page_allocator_desc { struct ihk_page_allocator_desc {
unsigned long start; unsigned long start, end;
unsigned int last; unsigned int last;
unsigned int count; unsigned int count;
unsigned int flag; unsigned int flag;
unsigned int shift; unsigned int shift;
ihk_spinlock_t lock; ihk_spinlock_t lock;
unsigned int pad; struct list_head list;
unsigned long map[0]; unsigned long map[0];
}; };

View File

@ -64,13 +64,14 @@ void *__ihk_pagealloc_init(unsigned long start, unsigned long size,
memset(desc, 0, descsize * PAGE_SIZE); memset(desc, 0, descsize * PAGE_SIZE);
desc->start = start; desc->start = start;
desc->end = start + size;
desc->last = 0; desc->last = 0;
desc->count = mapaligned >> 3; desc->count = mapaligned >> 3;
desc->shift = page_shift; desc->shift = page_shift;
desc->flag = flag; desc->flag = flag;
kprintf("Page allocator: %lx - %lx (%d)\n", start, start + size, //kprintf("page allocator @ %lx - %lx (%d)\n", start, start + size,
page_shift); // page_shift);
ihk_mc_spinlock_init(&desc->lock); ihk_mc_spinlock_init(&desc->lock);