diff --git a/kernel/mem.c b/kernel/mem.c index db93c7fb..804860fc 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -498,7 +498,9 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, { unsigned long pa = 0; int i, node; +#ifndef IHK_RBTREE_ALLOCATOR struct ihk_page_allocator_desc *pa_allocator; +#endif int numa_id; /* Not yet initialized or idle process */ @@ -519,9 +521,14 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, /* Explicit valid node? */ if (pref_node > -1 && pref_node < ihk_mc_get_nr_numa_nodes()) { +#ifdef IHK_RBTREE_ALLOCATOR + { + pa = ihk_numa_alloc_pages(&memory_nodes[pref_node], npages, p2align); +#else list_for_each_entry(pa_allocator, &memory_nodes[pref_node].allocators, list) { pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); +#endif if (pa) { dkprintf("%s: explicit (node: %d) CPU @ node %d allocated " @@ -559,12 +566,16 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align, continue; } - numa_id = memory_nodes[node]. - nodes_by_distance[i].id; + numa_id = memory_nodes[node].nodes_by_distance[i].id; +#ifdef IHK_RBTREE_ALLOCATOR + { + pa = ihk_numa_alloc_pages(&memory_nodes[memory_nodes[node]. + nodes_by_distance[i].id], npages, p2align); +#else list_for_each_entry(pa_allocator, - &memory_nodes[numa_id]. - allocators, list) { + &memory_nodes[numa_id].allocators, list) { pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); +#endif if (pa) { dkprintf("%s: policy: CPU @ node %d allocated " @@ -612,9 +623,16 @@ distance_based: for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { numa_id = memory_nodes[node].nodes_by_distance[i].id; + +#ifdef IHK_RBTREE_ALLOCATOR + { + pa = ihk_numa_alloc_pages(&memory_nodes[memory_nodes[node]. + nodes_by_distance[i].id], npages, p2align); +#else list_for_each_entry(pa_allocator, &memory_nodes[numa_id].allocators, list) { pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); +#endif if (pa) { dkprintf("%s: distance: CPU @ node %d allocated " @@ -640,11 +658,19 @@ order_based: /* Fall back to regular order */ for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { numa_id = (node + i) % ihk_mc_get_nr_numa_nodes(); +#ifdef IHK_RBTREE_ALLOCATOR + { + pa = ihk_numa_alloc_pages(&memory_nodes[(node + i) % + ihk_mc_get_nr_numa_nodes()], npages, p2align); +#else list_for_each_entry(pa_allocator, &memory_nodes[numa_id].allocators, list) { pa = ihk_pagealloc_alloc(pa_allocator, npages, p2align); +#endif if (pa) { +#ifdef ENABLE_RUSAGE rusage_numa_add(numa_id, npages * PAGE_SIZE); +#endif break; } } @@ -669,15 +695,25 @@ static void __mckernel_free_pages_in_allocator(void *va, int npages) /* Find corresponding memory allocator */ for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { - struct ihk_page_allocator_desc *pa_allocator; +#ifdef IHK_RBTREE_ALLOCATOR + { + if (pa_start >= memory_nodes[i].min_addr && + pa_end <= memory_nodes[i].max_addr) { + + ihk_numa_free_pages(&memory_nodes[i], pa_start, npages); +#else + struct ihk_page_allocator_desc *pa_allocator; list_for_each_entry(pa_allocator, &memory_nodes[i].allocators, list) { if (pa_start >= pa_allocator->start && pa_end <= pa_allocator->end) { ihk_pagealloc_free(pa_allocator, pa_start, npages); +#endif +#ifdef ENABLE_RUSAGE rusage_numa_sub(i, npages * PAGE_SIZE); +#endif return; } } @@ -755,6 +791,9 @@ static void query_free_mem_interrupt_handler(void *priv) /* Iterate memory allocators */ for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) { +#ifdef IHK_RBTREE_ALLOCATOR + pages += memory_nodes[i].nr_free_pages; +#else struct ihk_page_allocator_desc *pa_allocator; list_for_each_entry(pa_allocator, @@ -764,6 +803,7 @@ static void query_free_mem_interrupt_handler(void *priv) pa_allocator->start, pa_allocator->end, __pages); pages += __pages; } +#endif } kprintf("McKernel free pages in total: %d\n", pages); @@ -1082,6 +1122,13 @@ static void numa_init(void) memory_nodes[i].type = type; INIT_LIST_HEAD(&memory_nodes[i].allocators); memory_nodes[i].nodes_by_distance = 0; +#ifdef IHK_RBTREE_ALLOCATOR + memory_nodes[i].free_chunks.rb_node = 0; + mcs_lock_init(&memory_nodes[i].lock); + memory_nodes[i].min_addr = 0xFFFFFFFFFFFFFFFF; + memory_nodes[i].max_addr = 0; + memory_nodes[i].nr_free_pages = 0; +#endif kprintf("NUMA: %d, Linux NUMA: %d, type: %d\n", i, linux_numa_id, type); @@ -1090,21 +1137,48 @@ static void numa_init(void) for (j = 0; j < ihk_mc_get_nr_memory_chunks(); ++j) { unsigned long start, end; int numa_id; +#ifndef IHK_RBTREE_ALLOCATOR struct ihk_page_allocator_desc *allocator; +#endif ihk_mc_get_memory_chunk(j, &start, &end, &numa_id); + if (virt_to_phys(get_last_early_heap()) >= start && + virt_to_phys(get_last_early_heap()) < end) { + dkprintf("%s: start from 0x%lx\n", + __FUNCTION__, virt_to_phys(get_last_early_heap())); + start = virt_to_phys(get_last_early_heap()); + } + +#ifdef IHK_RBTREE_ALLOCATOR + ihk_numa_add_free_pages(&memory_nodes[numa_id], start, end - start); +#else allocator = page_allocator_init(start, end); list_add_tail(&allocator->list, &memory_nodes[numa_id].allocators); +#endif +#ifdef IHK_RBTREE_ALLOCATOR + kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n", + start, end, + end - start, + (end - start) >> PAGE_SHIFT, + numa_id); +#else kprintf("Physical memory: 0x%lx - 0x%lx, %lu bytes, %d pages available @ NUMA: %d\n", start, end, ihk_pagealloc_count(allocator) * PAGE_SIZE, ihk_pagealloc_count(allocator), numa_id); - +#endif +#ifdef ENABLE_RUSAGE +#ifdef IHK_RBTREE_ALLOCATOR + rusage_max_memory_add(memory_nodes[numa_id].nr_free_pages * + PAGE_SIZE); +#else rusage_max_memory_add(ihk_pagealloc_count(allocator) * - PAGE_SIZE); + PAGE_SIZE); +#endif +#endif } } diff --git a/lib/include/ihk/page_alloc.h b/lib/include/ihk/page_alloc.h index 3d610c8a..d844b46e 100644 --- a/lib/include/ihk/page_alloc.h +++ b/lib/include/ihk/page_alloc.h @@ -5,16 +5,19 @@ * Declare functions acquire physical pages and assign virtual addresses * to them. * \author Taku Shimosawa \par - * Copyright (C) 2011 - 2012 Taku Shimosawa + * \author Balazs Gerofi \par */ /* * HISTORY + * 2016/12 - bgerofi - NUMA support + * 2017/06 - bgerofi - rewrite physical memory mngt for red-black trees */ #ifndef __HEADER_GENERIC_IHK_PAGE_ALLOC #define __HEADER_GENERIC_IHK_PAGE_ALLOC #include +#include /* XXX: Physical memory management shouldn't be part of IHK */ struct node_distance { @@ -22,14 +25,40 @@ struct node_distance { int distance; }; +#define IHK_RBTREE_ALLOCATOR + +#ifdef IHK_RBTREE_ALLOCATOR +struct free_chunk { + unsigned long addr, size; + struct rb_node node; +}; +#endif + struct ihk_mc_numa_node { int id; int linux_numa_id; int type; struct list_head allocators; struct node_distance *nodes_by_distance; +#ifdef IHK_RBTREE_ALLOCATOR + struct rb_root free_chunks; + mcs_lock_node_t lock; + + unsigned long nr_free_pages; + unsigned long min_addr; + unsigned long max_addr; +#endif }; +#ifdef IHK_RBTREE_ALLOCATOR +unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node, + int npages, int p2align); +void ihk_numa_free_pages(struct ihk_mc_numa_node *node, + unsigned long addr, int npages); +int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node, + unsigned long addr, unsigned long size); +#endif + struct ihk_page_allocator_desc { unsigned long start, end; unsigned int last; diff --git a/lib/page_alloc.c b/lib/page_alloc.c index 268f4d10..5ccda66b 100644 --- a/lib/page_alloc.c +++ b/lib/page_alloc.c @@ -18,6 +18,15 @@ #include #include #include +#include + +//#define DEBUG_PRINT_PAGE_ALLOC + +#ifdef DEBUG_PRINT_PAGE_ALLOC +#define dkprintf kprintf +#else +#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0) +#endif void free_pages(void *, int npages); @@ -301,3 +310,345 @@ kprintf("\nzeroing done\n"); } +#ifdef IHK_RBTREE_ALLOCATOR + +/* + * Simple red-black tree based physical memory management routines. + * + * Allocation grabs first suitable chunk (splits chunk if alignment requires it). + * Deallocation merges with immediate neighbours. + * + * NOTE: invariant property: free_chunk structures are placed in the very front + * of their corresponding memory (i.e., they are on the free memory chunk itself). + */ + +/* + * Free pages. + * NOTE: locking must be managed by the caller. + */ +static int __page_alloc_rbtree_free_range(struct rb_root *root, + unsigned long addr, unsigned long size) +{ + struct rb_node **iter = &(root->rb_node), *parent = NULL; + struct free_chunk *new_chunk; + + /* Figure out where to put new node */ + while (*iter) { + struct free_chunk *ichunk = container_of(*iter, struct free_chunk, node); + parent = *iter; + + if ((addr >= ichunk->addr) && (addr < ichunk->addr + ichunk->size)) { + kprintf("%s: ERROR: free memory chunk: 0x%lx:%lu" + " and requested range to be freed: 0x%lx:%lu are " + "overlapping (double-free?)\n", + __FUNCTION__, + ichunk->addr, ichunk->size, addr, size); + return EINVAL; + } + + /* Is ichunk contigous from the left? */ + if (ichunk->addr + ichunk->size == addr) { + struct rb_node *right; + /* Extend it to the right */ + ichunk->size += size; + dkprintf("%s: chunk extended to right: 0x%lx:%lu\n", + __FUNCTION__, ichunk->addr, ichunk->size); + + /* Have the right chunk of ichunk and ichunk become contigous? */ + right = rb_next(*iter); + if (right) { + struct free_chunk *right_chunk = + container_of(right, struct free_chunk, node); + + if (ichunk->addr + ichunk->size == right_chunk->addr) { + ichunk->size += right_chunk->size; + rb_erase(right, root); + dkprintf("%s: chunk merged to right: 0x%lx:%lu\n", + __FUNCTION__, ichunk->addr, ichunk->size); + } + } + + return 0; + } + + /* Is ichunk contigous from the right? */ + if (addr + size == ichunk->addr) { + struct rb_node *left; + /* Extend it to the left */ + ichunk->addr -= size; + ichunk->size += size; + dkprintf("%s: chunk extended to left: 0x%lx:%lu\n", + __FUNCTION__, ichunk->addr, ichunk->size); + + /* Have the left chunk of ichunk and ichunk become contigous? */ + left = rb_prev(*iter); + if (left) { + struct free_chunk *left_chunk = + container_of(left, struct free_chunk, node); + + if (left_chunk->addr + left_chunk->size == ichunk->addr) { + ichunk->addr -= left_chunk->size; + ichunk->size += left_chunk->size; + rb_erase(left, root); + dkprintf("%s: chunk merged to left: 0x%lx:%lu\n", + __FUNCTION__, ichunk->addr, ichunk->size); + } + } + + /* Move chunk structure to the front */ + new_chunk = (struct free_chunk *)phys_to_virt(ichunk->addr); + *new_chunk = *ichunk; + rb_replace_node(&ichunk->node, &new_chunk->node, root); + dkprintf("%s: chunk moved to front: 0x%lx:%lu\n", + __FUNCTION__, new_chunk->addr, new_chunk->size); + + return 0; + } + + if (addr < ichunk->addr) + iter = &((*iter)->rb_left); + else + iter = &((*iter)->rb_right); + } + + new_chunk = (struct free_chunk *)phys_to_virt(addr); + new_chunk->addr = addr; + new_chunk->size = size; + dkprintf("%s: new chunk: 0x%lx:%lu\n", + __FUNCTION__, new_chunk->addr, new_chunk->size); + + /* Add new node and rebalance tree. */ + rb_link_node(&new_chunk->node, parent, iter); + rb_insert_color(&new_chunk->node, root); + + return 0; +} + +/* + * Mark address range as used (i.e., allocated). + * + * chunk is the free memory chunk in which + * [aligned_addr, aligned_addr + size] resides. + * + * NOTE: locking must be managed by the caller. + */ +static int __page_alloc_rbtree_mark_range_allocated(struct rb_root *root, + struct free_chunk *chunk, + unsigned long aligned_addr, unsigned long size) +{ + struct free_chunk *left_chunk = NULL, *right_chunk = NULL; + + /* Is there leftover on the right? */ + if ((aligned_addr + size) < (chunk->addr + chunk->size)) { + right_chunk = (struct free_chunk *)phys_to_virt(aligned_addr + size); + right_chunk->addr = aligned_addr + size; + right_chunk->size = (chunk->addr + chunk->size) - (aligned_addr + size); + } + + /* Is there leftover on the left? */ + if (aligned_addr != chunk->addr) { + left_chunk = chunk; + } + + /* Update chunk's size, possibly becomes zero */ + chunk->size = (aligned_addr - chunk->addr); + + if (left_chunk) { + /* Left chunk reuses chunk, add right chunk */ + if (right_chunk) { + dkprintf("%s: adding right chunk: 0x%lx:%lu\n", + __FUNCTION__, right_chunk->addr, right_chunk->size); + if (__page_alloc_rbtree_free_range(root, + right_chunk->addr, right_chunk->size)) { + kprintf("%s: ERROR: adding right chunk: 0x%lx:%lu\n", + __FUNCTION__, right_chunk->addr, right_chunk->size); + return EINVAL; + } + } + } + else { + /* Replace left with right */ + if (right_chunk) { + rb_replace_node(&chunk->node, &right_chunk->node, root); + dkprintf("%s: chunk replaced with right: 0x%lx:%lu\n", + __FUNCTION__, right_chunk->addr, right_chunk->size); + } + /* No left chunk and no right chunk => chunk was exact match, delete it */ + else { + rb_erase(&chunk->node, root); + dkprintf("%s: chunk deleted: 0x%lx:%lu\n", + __FUNCTION__, chunk->addr, chunk->size); + } + } + + return 0; +} + +/* + * Allocate pages. + * + * NOTE: locking must be managed by the caller. + */ +static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root, + int npages, int p2align) +{ + struct free_chunk *chunk; + struct rb_node *node; + unsigned long size = PAGE_SIZE * npages; + unsigned long align_size = (PAGE_SIZE << p2align); + unsigned long align_mask = ~(align_size - 1); + unsigned long aligned_addr = 0; + + for (node = rb_first(root); node; node = rb_next(node)) { + chunk = container_of(node, struct free_chunk, node); + aligned_addr = (chunk->addr + (align_size - 1)) & align_mask; + + /* Is this a suitable chunk? */ + if ((aligned_addr + size) <= (chunk->addr + chunk->size)) { + break; + } + } + + /* No matching chunk at all? */ + if (!node) { + return 0; + } + + dkprintf("%s: allocating: 0x%lx:%lu\n", + __FUNCTION__, aligned_addr, size); + if (__page_alloc_rbtree_mark_range_allocated(root, chunk, + aligned_addr, size)) { + kprintf("%s: ERROR: allocating 0x%lx:%lu\n", + __FUNCTION__, aligned_addr, size); + return 0; + } + + return aligned_addr; +} + +/* + * Reserve pages. + * + * NOTE: locking must be managed by the caller. + */ +static unsigned long __page_alloc_rbtree_reserve_pages(struct rb_root *root, + unsigned long aligned_addr, int npages) +{ + struct free_chunk *chunk; + struct rb_node *node; + unsigned long size = PAGE_SIZE * npages; + + for (node = rb_first(root); node; node = rb_next(node)) { + chunk = container_of(node, struct free_chunk, node); + + /* Is this the containing chunk? */ + if (aligned_addr >= chunk->addr && + (aligned_addr + size) <= (chunk->addr + chunk->size)) { + break; + } + } + + /* No matching chunk at all? */ + if (!node) { + kprintf("%s: WARNING: attempted to reserve non-free" + " physical range: 0x%lx:%lu\n", + __FUNCTION__, + aligned_addr, size); + return 0; + } + + dkprintf("%s: reserving: 0x%lx:%lu\n", + __FUNCTION__, aligned_addr, size); + if (__page_alloc_rbtree_mark_range_allocated(root, chunk, + aligned_addr, size)) { + kprintf("%s: ERROR: reserving 0x%lx:%lu\n", + __FUNCTION__, aligned_addr, size); + return 0; + } + + return aligned_addr; +} + + +/* + * External routines. + */ +int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node, + unsigned long addr, unsigned long size) +{ + if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) { + kprintf("%s: ERROR: adding 0x%lx:%lu\n", + __FUNCTION__, addr, size); + return EINVAL; + } + + if (addr < node->min_addr) + node->min_addr = addr; + + if (addr + size > node->max_addr) + node->max_addr = addr + size; + + node->nr_free_pages += (size >> PAGE_SHIFT); + dkprintf("%s: added free pages 0x%lx:%lu\n", + __FUNCTION__, addr, size); + return 0; +} + + +unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node, + int npages, int p2align) +{ + unsigned long addr = 0; + mcs_lock_node_t mcs_node; + + mcs_lock_lock(&node->lock, &mcs_node); + + if (node->nr_free_pages < npages) { + goto unlock_out; + } + + addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks, + npages, p2align); + + /* Does not necessarily succeed due to alignment */ + if (addr) { + node->nr_free_pages -= npages; + dkprintf("%s: allocated pages 0x%lx:%lu\n", + __FUNCTION__, addr, npages << PAGE_SHIFT); + } + +unlock_out: + mcs_lock_unlock(&node->lock, &mcs_node); + + return addr; +} + +void ihk_numa_free_pages(struct ihk_mc_numa_node *node, + unsigned long addr, int npages) +{ + mcs_lock_node_t mcs_node; + + if (addr < node->min_addr || + (addr + (npages << PAGE_SHIFT)) > node->max_addr) { + return; + } + + if (npages <= 0) { + return; + } + + mcs_lock_lock(&node->lock, &mcs_node); + if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, + npages << PAGE_SHIFT)) { + kprintf("%s: ERROR: freeing 0x%lx:%lu\n", + __FUNCTION__, addr, npages << PAGE_SHIFT); + } + else { + node->nr_free_pages += npages; + dkprintf("%s: freed pages 0x%lx:%lu\n", + __FUNCTION__, addr, npages << PAGE_SHIFT); + } + mcs_lock_unlock(&node->lock, &mcs_node); +} + +#endif // IHK_RBTREE_ALLOCATOR