MM: zero memory at free and deferred zero

Change-Id: Ib0055d6f2bdd10d05d749dcd1f3d5c3d318f22f3
This commit is contained in:
Masamichi Takagi
2020-12-08 11:59:08 +09:00
parent fbd121d28c
commit 100bbe6231
7 changed files with 263 additions and 18 deletions

View File

@ -792,6 +792,27 @@ order_based:
return NULL;
}
/*
* Get NUMA node structure offsetted by index in the order of distance
*/
struct ihk_mc_numa_node *ihk_mc_get_numa_node_by_distance(int i)
{
int numa_id;
if (!cpu_local_var_initialized)
return NULL;
if (i < 0 || i > ihk_mc_get_nr_numa_nodes()) {
return NULL;
}
numa_id = ihk_mc_get_numa_id();
if (!memory_nodes[numa_id].nodes_by_distance)
return NULL;
return &memory_nodes[memory_nodes[numa_id].nodes_by_distance[i].id];
}
static void __mckernel_free_pages_in_allocator(void *va, int npages,
int is_user)
{
@ -1465,11 +1486,13 @@ static void numa_init(void)
INIT_LIST_HEAD(&memory_nodes[i].allocators);
memory_nodes[i].nodes_by_distance = 0;
#ifdef IHK_RBTREE_ALLOCATOR
memory_nodes[i].zeroed_chunks.rb_node = 0;
memory_nodes[i].free_chunks.rb_node = 0;
mcs_lock_init(&memory_nodes[i].lock);
memory_nodes[i].min_addr = 0xFFFFFFFFFFFFFFFF;
memory_nodes[i].max_addr = 0;
memory_nodes[i].nr_pages = 0;
memory_nodes[i].nr_zeroed_pages = 0;
memory_nodes[i].nr_free_pages = 0;
#endif
}

View File

@ -3122,6 +3122,7 @@ static void idle(void)
v->status == CPU_STATUS_RESERVED) {
/* No work to do? Consolidate the kmalloc free list */
kmalloc_consolidate_free_list();
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
monitor->status = IHK_OS_MONITOR_IDLE;
cpu_local_var(current)->status = PS_INTERRUPTIBLE;
cpu_safe_halt();
@ -3477,6 +3478,7 @@ void spin_sleep_or_schedule(void)
break;
}
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
cpu_pause();
}

View File

@ -266,6 +266,7 @@ long do_syscall(struct syscall_request *req, int cpu)
cpu_restore_interrupt(runq_irqstate);
if (!do_schedule) {
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
continue;
}

View File

@ -102,6 +102,7 @@ uint64_t schedule_timeout(uint64_t timeout)
/* Spin wait */
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
cpu_pause();
}

View File

@ -208,6 +208,10 @@ int ihk_mc_pt_virt_to_phys(struct page_table *pt,
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
int ihk_mc_get_nr_numa_nodes(void);
struct ihk_mc_numa_node *ihk_mc_get_numa_node_by_distance(int i);
void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node);
extern int zero_at_free;
struct smp_coreset;
int ihk_mc_get_numa_node(int id, int *linux_numa_id, int *type);
int ihk_mc_get_numa_distance(int i, int j);

View File

@ -41,10 +41,17 @@ struct ihk_mc_numa_node {
struct list_head allocators;
struct node_distance *nodes_by_distance;
#ifdef IHK_RBTREE_ALLOCATOR
struct rb_root zeroed_chunks;
struct rb_root free_chunks;
mcs_lock_node_t lock;
unsigned long nr_pages;
/*
* nr_free_pages: all freed pages
* nr_zeroed_pages: zeroed free pages
* Invariant: nr_zeroed_pages <= nr_free_pages
*/
unsigned long nr_zeroed_pages;
unsigned long nr_free_pages;
unsigned long min_addr;
unsigned long max_addr;

View File

@ -319,6 +319,9 @@ kprintf("\nzeroing done\n");
#ifdef IHK_RBTREE_ALLOCATOR
int zero_at_free = 1;
int deferred_zero_at_free = 1;
/*
* Simple red-black tree based physical memory management routines.
*
@ -356,6 +359,7 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
/* Is ichunk contigous from the left? */
if (ichunk->addr + ichunk->size == addr) {
struct rb_node *right;
/* Extend it to the right */
ichunk->size += size;
dkprintf("%s: chunk extended to right: 0x%lx:%lu\n",
@ -370,6 +374,10 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
if (ichunk->addr + ichunk->size == right_chunk->addr) {
ichunk->size += right_chunk->size;
rb_erase(right, root);
/* Clear old structure */
memset(right_chunk, 0, sizeof(*right_chunk));
dkprintf("%s: chunk merged to right: 0x%lx:%lu\n",
__FUNCTION__, ichunk->addr, ichunk->size);
}
@ -381,6 +389,7 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
/* Is ichunk contigous from the right? */
if (addr + size == ichunk->addr) {
struct rb_node *left;
/* Extend it to the left */
ichunk->addr -= size;
ichunk->size += size;
@ -397,6 +406,10 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
ichunk->addr -= left_chunk->size;
ichunk->size += left_chunk->size;
rb_erase(left, root);
/* Clear old structure */
memset(left_chunk, 0, sizeof(*left_chunk));
dkprintf("%s: chunk merged to left: 0x%lx:%lu\n",
__FUNCTION__, ichunk->addr, ichunk->size);
}
@ -406,6 +419,10 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
new_chunk = (struct free_chunk *)phys_to_virt(ichunk->addr);
*new_chunk = *ichunk;
rb_replace_node(&ichunk->node, &new_chunk->node, root);
/* Clear old structure */
memset(ichunk, 0, sizeof(*ichunk));
dkprintf("%s: chunk moved to front: 0x%lx:%lu\n",
__FUNCTION__, new_chunk->addr, new_chunk->size);
@ -530,6 +547,11 @@ static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root,
return 0;
}
if (zero_at_free) {
memset(phys_to_virt(aligned_addr),
0, sizeof(struct free_chunk));
}
return aligned_addr;
}
@ -576,6 +598,17 @@ static unsigned long __page_alloc_rbtree_reserve_pages(struct rb_root *root,
return aligned_addr;
}
static struct free_chunk *__page_alloc_rbtree_get_root_chunk(
struct rb_root *root)
{
struct rb_node *node = root->rb_node;
if (!node) {
return NULL;
}
rb_erase(node, root);
return container_of(node, struct free_chunk, node);
}
/*
* External routines.
@ -583,10 +616,23 @@ static unsigned long __page_alloc_rbtree_reserve_pages(struct rb_root *root,
int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node,
unsigned long addr, unsigned long size)
{
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) {
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
__FUNCTION__, addr, size);
return EINVAL;
if (zero_at_free) {
/* Zero chunk */
memset(phys_to_virt(addr), 0, size);
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) {
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
__FUNCTION__, addr, size);
return EINVAL;
}
}
/* Default behavior */
else {
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) {
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
__FUNCTION__, addr, size);
return EINVAL;
}
}
if (addr < node->min_addr)
@ -596,12 +642,81 @@ int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node,
node->max_addr = addr + size;
node->nr_pages += (size >> PAGE_SHIFT);
if (zero_at_free) {
node->nr_zeroed_pages += (size >> PAGE_SHIFT);
}
node->nr_free_pages += (size >> PAGE_SHIFT);
dkprintf("%s: added free pages 0x%lx:%lu\n",
__FUNCTION__, addr, size);
return 0;
}
void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node)
{
mcs_lock_node_t mcs_node;
unsigned long irqflags;
int i, max_i;
if (!zero_at_free)
return;
/* If explicitly specified, zero only in __node */
max_i = __node ? 1 : ihk_mc_get_nr_numa_nodes();
irqflags = cpu_disable_interrupt_save();
/* Look at NUMA nodes in the order of distance */
for (i = 0; i < max_i; ++i) {
struct ihk_mc_numa_node *node;
node = __node ? __node : ihk_mc_get_numa_node_by_distance(i);
if (!node) {
break;
}
/* Iterate free chunks */
for (;;) {
struct free_chunk *chunk;
unsigned long addr, size;
mcs_lock_lock_noirq(&node->lock, &mcs_node);
chunk = __page_alloc_rbtree_get_root_chunk(&node->free_chunks);
/*
* Release the lock to let other CPUs potentially proceed
* in parallel with other chunks
*/
mcs_lock_unlock_noirq(&node->lock, &mcs_node);
if (!chunk) {
break;
}
/*
* Zero chunk
* NOTE: we cannot refer to chunk structure any more after zeroing
*/
addr = chunk->addr;
size = chunk->size;
memset(phys_to_virt(addr), 0, chunk->size);
mcs_lock_lock_noirq(&node->lock, &mcs_node);
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) {
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
__FUNCTION__, addr, size);
goto unlock;
}
node->nr_zeroed_pages += (size >> PAGE_SHIFT);
if (cpu_local_var(current)->profile)
kprintf("%s: zeroed %lu pages @ NUMA %d\n",
__func__, size >> PAGE_SHIFT, node->id);
unlock:
mcs_lock_unlock_noirq(&node->lock, &mcs_node);
}
}
cpu_restore_interrupt(irqflags);
}
unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
int npages, int p2align)
@ -633,14 +748,61 @@ unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
goto unlock_out;
}
addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks,
npages, p2align);
if (zero_at_free) {
/* Do we need to zero pages? */
if (node->nr_zeroed_pages < npages) {
mcs_lock_unlock(&node->lock, &mcs_node);
ihk_numa_zero_free_pages(node);
mcs_lock_lock(&node->lock, &mcs_node);
}
/* Does not necessarily succeed due to alignment */
if (addr) {
node->nr_free_pages -= npages;
dkprintf("%s: allocated pages 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
/* Still not enough? Give up.. */
if (node->nr_zeroed_pages < npages) {
goto unlock_out;
}
addr = __page_alloc_rbtree_alloc_pages(&node->zeroed_chunks,
npages, p2align);
/* Does not necessarily succeed due to alignment */
if (addr) {
node->nr_free_pages -= npages;
node->nr_zeroed_pages -= npages;
#if 0
{
size_t free_bytes = __count_free_bytes(&node->free_chunks);
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
panic("");
}
}
#endif
dkprintf("%s: allocated pages 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
}
}
/* Default behavior */
else {
addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks,
npages, p2align);
/* Does not necessarily succeed due to alignment */
if (addr) {
node->nr_free_pages -= npages;
#if 0
{
size_t free_bytes = __count_free_bytes(&node->free_chunks);
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
panic("");
}
}
#endif
dkprintf("%s: allocated pages 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
}
}
unlock_out:
@ -685,15 +847,60 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node,
}
mcs_lock_lock(&node->lock, &mcs_node);
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr,
npages << PAGE_SHIFT)) {
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
if (!zero_at_free ||
(zero_at_free && deferred_zero_at_free)) {
/*
* Free to free_chunks first, will be moved to zeroed_chunks later
* if zero at free or asynchronously
*/
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr,
npages << PAGE_SHIFT)) {
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
}
else {
node->nr_free_pages += npages;
#if 0
{
size_t free_bytes = __count_free_bytes(&node->free_chunks);
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
panic("");
}
}
#endif
dkprintf("%s: freed pages 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
}
}
else {
node->nr_free_pages += npages;
dkprintf("%s: freed pages 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
/*
* Free and zero chunk right here
*/
memset(phys_to_virt(addr), 0, npages << PAGE_SHIFT);
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr,
npages << PAGE_SHIFT)) {
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
}
else {
node->nr_free_pages += npages;
node->nr_zeroed_pages += npages;
#if 0
{
size_t free_bytes = __count_free_bytes(&node->free_chunks);
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
panic("");
}
}
#endif
dkprintf("%s: freed+zeroed pages 0x%lx:%lu\n",
__FUNCTION__, addr, npages << PAGE_SHIFT);
}
}
mcs_lock_unlock(&node->lock, &mcs_node);
}