MM: zero memory at free and deferred zero
Change-Id: Ib0055d6f2bdd10d05d749dcd1f3d5c3d318f22f3
This commit is contained in:
23
kernel/mem.c
23
kernel/mem.c
@ -792,6 +792,27 @@ order_based:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get NUMA node structure offsetted by index in the order of distance
|
||||
*/
|
||||
struct ihk_mc_numa_node *ihk_mc_get_numa_node_by_distance(int i)
|
||||
{
|
||||
int numa_id;
|
||||
|
||||
if (!cpu_local_var_initialized)
|
||||
return NULL;
|
||||
|
||||
if (i < 0 || i > ihk_mc_get_nr_numa_nodes()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
numa_id = ihk_mc_get_numa_id();
|
||||
if (!memory_nodes[numa_id].nodes_by_distance)
|
||||
return NULL;
|
||||
|
||||
return &memory_nodes[memory_nodes[numa_id].nodes_by_distance[i].id];
|
||||
}
|
||||
|
||||
static void __mckernel_free_pages_in_allocator(void *va, int npages,
|
||||
int is_user)
|
||||
{
|
||||
@ -1465,11 +1486,13 @@ static void numa_init(void)
|
||||
INIT_LIST_HEAD(&memory_nodes[i].allocators);
|
||||
memory_nodes[i].nodes_by_distance = 0;
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
memory_nodes[i].zeroed_chunks.rb_node = 0;
|
||||
memory_nodes[i].free_chunks.rb_node = 0;
|
||||
mcs_lock_init(&memory_nodes[i].lock);
|
||||
memory_nodes[i].min_addr = 0xFFFFFFFFFFFFFFFF;
|
||||
memory_nodes[i].max_addr = 0;
|
||||
memory_nodes[i].nr_pages = 0;
|
||||
memory_nodes[i].nr_zeroed_pages = 0;
|
||||
memory_nodes[i].nr_free_pages = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3122,6 +3122,7 @@ static void idle(void)
|
||||
v->status == CPU_STATUS_RESERVED) {
|
||||
/* No work to do? Consolidate the kmalloc free list */
|
||||
kmalloc_consolidate_free_list();
|
||||
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
|
||||
monitor->status = IHK_OS_MONITOR_IDLE;
|
||||
cpu_local_var(current)->status = PS_INTERRUPTIBLE;
|
||||
cpu_safe_halt();
|
||||
@ -3477,6 +3478,7 @@ void spin_sleep_or_schedule(void)
|
||||
break;
|
||||
}
|
||||
|
||||
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
|
||||
@ -266,6 +266,7 @@ long do_syscall(struct syscall_request *req, int cpu)
|
||||
cpu_restore_interrupt(runq_irqstate);
|
||||
|
||||
if (!do_schedule) {
|
||||
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@ -102,6 +102,7 @@ uint64_t schedule_timeout(uint64_t timeout)
|
||||
|
||||
/* Spin wait */
|
||||
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
|
||||
ihk_numa_zero_free_pages(ihk_mc_get_numa_node_by_distance(0));
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
|
||||
@ -208,6 +208,10 @@ int ihk_mc_pt_virt_to_phys(struct page_table *pt,
|
||||
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
|
||||
|
||||
int ihk_mc_get_nr_numa_nodes(void);
|
||||
struct ihk_mc_numa_node *ihk_mc_get_numa_node_by_distance(int i);
|
||||
void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node);
|
||||
extern int zero_at_free;
|
||||
|
||||
struct smp_coreset;
|
||||
int ihk_mc_get_numa_node(int id, int *linux_numa_id, int *type);
|
||||
int ihk_mc_get_numa_distance(int i, int j);
|
||||
|
||||
@ -41,10 +41,17 @@ struct ihk_mc_numa_node {
|
||||
struct list_head allocators;
|
||||
struct node_distance *nodes_by_distance;
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
struct rb_root zeroed_chunks;
|
||||
struct rb_root free_chunks;
|
||||
mcs_lock_node_t lock;
|
||||
|
||||
unsigned long nr_pages;
|
||||
/*
|
||||
* nr_free_pages: all freed pages
|
||||
* nr_zeroed_pages: zeroed free pages
|
||||
* Invariant: nr_zeroed_pages <= nr_free_pages
|
||||
*/
|
||||
unsigned long nr_zeroed_pages;
|
||||
unsigned long nr_free_pages;
|
||||
unsigned long min_addr;
|
||||
unsigned long max_addr;
|
||||
|
||||
243
lib/page_alloc.c
243
lib/page_alloc.c
@ -319,6 +319,9 @@ kprintf("\nzeroing done\n");
|
||||
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
|
||||
int zero_at_free = 1;
|
||||
int deferred_zero_at_free = 1;
|
||||
|
||||
/*
|
||||
* Simple red-black tree based physical memory management routines.
|
||||
*
|
||||
@ -356,6 +359,7 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
|
||||
/* Is ichunk contigous from the left? */
|
||||
if (ichunk->addr + ichunk->size == addr) {
|
||||
struct rb_node *right;
|
||||
|
||||
/* Extend it to the right */
|
||||
ichunk->size += size;
|
||||
dkprintf("%s: chunk extended to right: 0x%lx:%lu\n",
|
||||
@ -370,6 +374,10 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
|
||||
if (ichunk->addr + ichunk->size == right_chunk->addr) {
|
||||
ichunk->size += right_chunk->size;
|
||||
rb_erase(right, root);
|
||||
|
||||
/* Clear old structure */
|
||||
memset(right_chunk, 0, sizeof(*right_chunk));
|
||||
|
||||
dkprintf("%s: chunk merged to right: 0x%lx:%lu\n",
|
||||
__FUNCTION__, ichunk->addr, ichunk->size);
|
||||
}
|
||||
@ -381,6 +389,7 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
|
||||
/* Is ichunk contigous from the right? */
|
||||
if (addr + size == ichunk->addr) {
|
||||
struct rb_node *left;
|
||||
|
||||
/* Extend it to the left */
|
||||
ichunk->addr -= size;
|
||||
ichunk->size += size;
|
||||
@ -397,6 +406,10 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
|
||||
ichunk->addr -= left_chunk->size;
|
||||
ichunk->size += left_chunk->size;
|
||||
rb_erase(left, root);
|
||||
|
||||
/* Clear old structure */
|
||||
memset(left_chunk, 0, sizeof(*left_chunk));
|
||||
|
||||
dkprintf("%s: chunk merged to left: 0x%lx:%lu\n",
|
||||
__FUNCTION__, ichunk->addr, ichunk->size);
|
||||
}
|
||||
@ -406,6 +419,10 @@ static int __page_alloc_rbtree_free_range(struct rb_root *root,
|
||||
new_chunk = (struct free_chunk *)phys_to_virt(ichunk->addr);
|
||||
*new_chunk = *ichunk;
|
||||
rb_replace_node(&ichunk->node, &new_chunk->node, root);
|
||||
|
||||
/* Clear old structure */
|
||||
memset(ichunk, 0, sizeof(*ichunk));
|
||||
|
||||
dkprintf("%s: chunk moved to front: 0x%lx:%lu\n",
|
||||
__FUNCTION__, new_chunk->addr, new_chunk->size);
|
||||
|
||||
@ -530,6 +547,11 @@ static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (zero_at_free) {
|
||||
memset(phys_to_virt(aligned_addr),
|
||||
0, sizeof(struct free_chunk));
|
||||
}
|
||||
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
@ -576,6 +598,17 @@ static unsigned long __page_alloc_rbtree_reserve_pages(struct rb_root *root,
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
static struct free_chunk *__page_alloc_rbtree_get_root_chunk(
|
||||
struct rb_root *root)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
if (!node) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rb_erase(node, root);
|
||||
return container_of(node, struct free_chunk, node);
|
||||
}
|
||||
|
||||
/*
|
||||
* External routines.
|
||||
@ -583,10 +616,23 @@ static unsigned long __page_alloc_rbtree_reserve_pages(struct rb_root *root,
|
||||
int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node,
|
||||
unsigned long addr, unsigned long size)
|
||||
{
|
||||
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) {
|
||||
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, size);
|
||||
return EINVAL;
|
||||
if (zero_at_free) {
|
||||
/* Zero chunk */
|
||||
memset(phys_to_virt(addr), 0, size);
|
||||
|
||||
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) {
|
||||
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, size);
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
/* Default behavior */
|
||||
else {
|
||||
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) {
|
||||
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, size);
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (addr < node->min_addr)
|
||||
@ -596,12 +642,81 @@ int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node,
|
||||
node->max_addr = addr + size;
|
||||
|
||||
node->nr_pages += (size >> PAGE_SHIFT);
|
||||
if (zero_at_free) {
|
||||
node->nr_zeroed_pages += (size >> PAGE_SHIFT);
|
||||
}
|
||||
node->nr_free_pages += (size >> PAGE_SHIFT);
|
||||
dkprintf("%s: added free pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node)
|
||||
{
|
||||
mcs_lock_node_t mcs_node;
|
||||
unsigned long irqflags;
|
||||
int i, max_i;
|
||||
|
||||
if (!zero_at_free)
|
||||
return;
|
||||
|
||||
/* If explicitly specified, zero only in __node */
|
||||
max_i = __node ? 1 : ihk_mc_get_nr_numa_nodes();
|
||||
|
||||
irqflags = cpu_disable_interrupt_save();
|
||||
|
||||
/* Look at NUMA nodes in the order of distance */
|
||||
for (i = 0; i < max_i; ++i) {
|
||||
struct ihk_mc_numa_node *node;
|
||||
|
||||
node = __node ? __node : ihk_mc_get_numa_node_by_distance(i);
|
||||
if (!node) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Iterate free chunks */
|
||||
for (;;) {
|
||||
struct free_chunk *chunk;
|
||||
unsigned long addr, size;
|
||||
|
||||
mcs_lock_lock_noirq(&node->lock, &mcs_node);
|
||||
chunk = __page_alloc_rbtree_get_root_chunk(&node->free_chunks);
|
||||
/*
|
||||
* Release the lock to let other CPUs potentially proceed
|
||||
* in parallel with other chunks
|
||||
*/
|
||||
mcs_lock_unlock_noirq(&node->lock, &mcs_node);
|
||||
|
||||
if (!chunk) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero chunk
|
||||
* NOTE: we cannot refer to chunk structure any more after zeroing
|
||||
*/
|
||||
addr = chunk->addr;
|
||||
size = chunk->size;
|
||||
memset(phys_to_virt(addr), 0, chunk->size);
|
||||
|
||||
mcs_lock_lock_noirq(&node->lock, &mcs_node);
|
||||
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) {
|
||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, size);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
node->nr_zeroed_pages += (size >> PAGE_SHIFT);
|
||||
if (cpu_local_var(current)->profile)
|
||||
kprintf("%s: zeroed %lu pages @ NUMA %d\n",
|
||||
__func__, size >> PAGE_SHIFT, node->id);
|
||||
unlock:
|
||||
mcs_lock_unlock_noirq(&node->lock, &mcs_node);
|
||||
}
|
||||
}
|
||||
|
||||
cpu_restore_interrupt(irqflags);
|
||||
}
|
||||
|
||||
unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
|
||||
int npages, int p2align)
|
||||
@ -633,14 +748,61 @@ unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks,
|
||||
npages, p2align);
|
||||
if (zero_at_free) {
|
||||
/* Do we need to zero pages? */
|
||||
if (node->nr_zeroed_pages < npages) {
|
||||
mcs_lock_unlock(&node->lock, &mcs_node);
|
||||
ihk_numa_zero_free_pages(node);
|
||||
mcs_lock_lock(&node->lock, &mcs_node);
|
||||
}
|
||||
|
||||
/* Does not necessarily succeed due to alignment */
|
||||
if (addr) {
|
||||
node->nr_free_pages -= npages;
|
||||
dkprintf("%s: allocated pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
/* Still not enough? Give up.. */
|
||||
if (node->nr_zeroed_pages < npages) {
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
addr = __page_alloc_rbtree_alloc_pages(&node->zeroed_chunks,
|
||||
npages, p2align);
|
||||
|
||||
/* Does not necessarily succeed due to alignment */
|
||||
if (addr) {
|
||||
node->nr_free_pages -= npages;
|
||||
node->nr_zeroed_pages -= npages;
|
||||
#if 0
|
||||
{
|
||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
||||
panic("");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
dkprintf("%s: allocated pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
/* Default behavior */
|
||||
else {
|
||||
addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks,
|
||||
npages, p2align);
|
||||
|
||||
/* Does not necessarily succeed due to alignment */
|
||||
if (addr) {
|
||||
node->nr_free_pages -= npages;
|
||||
#if 0
|
||||
{
|
||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
||||
panic("");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
dkprintf("%s: allocated pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
@ -685,15 +847,60 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node,
|
||||
}
|
||||
|
||||
mcs_lock_lock(&node->lock, &mcs_node);
|
||||
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr,
|
||||
npages << PAGE_SHIFT)) {
|
||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
if (!zero_at_free ||
|
||||
(zero_at_free && deferred_zero_at_free)) {
|
||||
/*
|
||||
* Free to free_chunks first, will be moved to zeroed_chunks later
|
||||
* if zero at free or asynchronously
|
||||
*/
|
||||
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr,
|
||||
npages << PAGE_SHIFT)) {
|
||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
}
|
||||
else {
|
||||
node->nr_free_pages += npages;
|
||||
#if 0
|
||||
{
|
||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
||||
panic("");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
dkprintf("%s: freed pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
else {
|
||||
node->nr_free_pages += npages;
|
||||
dkprintf("%s: freed pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
/*
|
||||
* Free and zero chunk right here
|
||||
*/
|
||||
memset(phys_to_virt(addr), 0, npages << PAGE_SHIFT);
|
||||
|
||||
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr,
|
||||
npages << PAGE_SHIFT)) {
|
||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
}
|
||||
else {
|
||||
node->nr_free_pages += npages;
|
||||
node->nr_zeroed_pages += npages;
|
||||
#if 0
|
||||
{
|
||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
||||
panic("");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
dkprintf("%s: freed+zeroed pages 0x%lx:%lu\n",
|
||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
mcs_lock_unlock(&node->lock, &mcs_node);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user