From 41f5c0bdde61f58e5b38fa282d25b677d90ece3e Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Sun, 6 Sep 2020 11:47:56 +0900 Subject: [PATCH] MM: deferred zero cleaning on Linux CPUs Change-Id: Icdb8ac807688533be7a95b7101edfd904250cd02 --- executer/kernel/mcctrl/syscall.c | 99 +++++++++ kernel/include/rbtree.h | 2 + kernel/mem.c | 8 +- kernel/rbtree.c | 37 ++++ lib/include/ihk/mm.h | 2 + lib/include/ihk/page_alloc.h | 13 +- lib/page_alloc.c | 369 +++++++++++++++++++------------ 7 files changed, 386 insertions(+), 144 deletions(-) diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 443d717c..4ab9509a 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include #include #include @@ -2224,6 +2226,93 @@ int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet) return 0; } +/* + * Memory clearing helpers. + */ +struct node_distance; + +#define IHK_RBTREE_ALLOCATOR + +#ifdef IHK_RBTREE_ALLOCATOR +struct free_chunk { + unsigned long addr, size; + struct rb_node node; + struct llist_node list; +}; +#endif + +typedef struct mcs_lock_node { +#ifndef SPIN_LOCK_IN_MCS + unsigned long locked; + struct mcs_lock_node *next; +#endif + unsigned long irqsave; +#ifdef SPIN_LOCK_IN_MCS + ihk_spinlock_t spinlock; +#endif +#ifndef ENABLE_UBSAN +} __aligned(64) mcs_lock_node_t; +#else +} mcs_lock_node_t; +#endif + +struct ihk_mc_numa_node { + int id; + int linux_numa_id; + int type; + struct list_head allocators; + struct node_distance *nodes_by_distance; +#ifdef IHK_RBTREE_ALLOCATOR + atomic_t zeroing_workers; + atomic_t nr_to_zero_pages; + struct llist_head zeroed_list; + struct llist_head to_zero_list; + struct rb_root free_chunks; + mcs_lock_node_t lock; + + unsigned long nr_pages; + /* + * nr_free_pages: all freed pages, zeroed if zero_at_free + */ + unsigned long nr_free_pages; + unsigned long min_addr; + unsigned long max_addr; +#endif +}; + +void mcctrl_zero_mckernel_pages(unsigned long arg) +{ + struct llist_node *llnode; + struct ihk_mc_numa_node *node = + (struct ihk_mc_numa_node *)arg; + + /* Iterate free chunks */ + while ((llnode = llist_del_first(&node->to_zero_list))) { + unsigned long addr; + unsigned long size; + struct free_chunk *chunk = + container_of(llnode, struct free_chunk, list); + + addr = chunk->addr; + size = chunk->size; + + memset(phys_to_virt(addr) + sizeof(*chunk), 0, + chunk->size - sizeof(*chunk)); + llist_add(&chunk->list, &node->zeroed_list); + + dprintk("%s: zeroed %lu pages @ McKernel NUMA %d (chunk: 0x%lx:%lu)\n", + __func__, + size >> PAGE_SHIFT, + node->id, + addr, size); + barrier(); + atomic_sub((int)(size >> PAGE_SHIFT), &node->nr_to_zero_pages); + } + + atomic_dec(&node->zeroing_workers); +} + + int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) { struct syscall_request *sc = &packet->req; @@ -2244,6 +2333,14 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet) ret = remap_user_space(sc->args[0], sc->args[1], sc->args[2]); break; + case __NR_move_pages: + /* + * move pages is used for zeroing McKernel side memory, + * this call is NOT offloaded by applications. + */ + mcctrl_zero_mckernel_pages(sc->args[0]); + goto out_no_syscall_return; + case __NR_exit_group: { /* Make sure the user space handler will be called as well */ @@ -2328,6 +2425,8 @@ sched_setparam_out: } __return_syscall(os, packet, ret, 0); + +out_no_syscall_return: ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet); error = 0; diff --git a/kernel/include/rbtree.h b/kernel/include/rbtree.h index 990f0d74..4d0e9395 100644 --- a/kernel/include/rbtree.h +++ b/kernel/include/rbtree.h @@ -108,4 +108,6 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, typeof(*pos), field); 1; }); \ pos = n) +struct rb_node *rb_preorder_dfs_search(const struct rb_root *root, + bool (*__cond)(struct rb_node *, void *arg), void *__cond_arg); #endif /* _LINUX_RBTREE_H */ diff --git a/kernel/mem.c b/kernel/mem.c index 9483e5eb..c337bac2 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -44,6 +44,7 @@ #include #include #include +#include #include //#define DEBUG_PRINT_MEM @@ -749,7 +750,6 @@ distance_based: ihk_mc_get_numa_id(), npages, memory_nodes[node].nodes_by_distance[i].id); - } } @@ -1546,13 +1546,15 @@ static void numa_init(void) INIT_LIST_HEAD(&memory_nodes[i].allocators); memory_nodes[i].nodes_by_distance = 0; #ifdef IHK_RBTREE_ALLOCATOR - memory_nodes[i].zeroed_chunks.rb_node = 0; + ihk_atomic_set(&memory_nodes[i].zeroing_workers, 0); + ihk_atomic_set(&memory_nodes[i].nr_to_zero_pages, 0); memory_nodes[i].free_chunks.rb_node = 0; + init_llist_head(&memory_nodes[i].zeroed_list); + init_llist_head(&memory_nodes[i].to_zero_list); mcs_lock_init(&memory_nodes[i].lock); memory_nodes[i].min_addr = 0xFFFFFFFFFFFFFFFF; memory_nodes[i].max_addr = 0; memory_nodes[i].nr_pages = 0; - memory_nodes[i].nr_zeroed_pages = 0; memory_nodes[i].nr_free_pages = 0; #endif } diff --git a/kernel/rbtree.c b/kernel/rbtree.c index 5134a0d6..b2bed275 100644 --- a/kernel/rbtree.c +++ b/kernel/rbtree.c @@ -432,6 +432,43 @@ struct rb_node *rb_first(const struct rb_root *root) } EXPORT_SYMBOL(rb_first); +/* + * Pre-order depth first search. + * Return a node where __cond is true. + */ +static struct rb_node *__rb_preorder_dfs(struct rb_node *n, + bool (*__cond)(struct rb_node *, void *arg), void *__cond_arg) +{ + struct rb_node *left_res = NULL; + + if (__cond(n, __cond_arg)) + return n; + + if (n->rb_left) { + left_res = __rb_preorder_dfs(n->rb_left, __cond, __cond_arg); + if (left_res) { + return left_res; + } + } + if (n->rb_right) + return __rb_preorder_dfs(n->rb_right, __cond, __cond_arg); + + return NULL; +} + +struct rb_node *rb_preorder_dfs_search(const struct rb_root *root, + bool (*__cond)(struct rb_node *, void *arg), void *__cond_arg) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + + return __rb_preorder_dfs(n, __cond, __cond_arg); +} + + struct rb_node *rb_first_safe(const struct rb_root *root) { struct rb_node *n; diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 2569c860..aadb4997 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -55,6 +55,8 @@ typedef unsigned long ihk_mc_ap_flag; #define IHK_MC_AP_BANDWIDTH 0x010000 #define IHK_MC_AP_LATENCY 0x020000 +/* Only allocate from the closest NUMA node */ +#define IHK_MC_AP_NUMA_STRICT 0x040000 #define IHK_MC_PG_KERNEL 0 #define IHK_MC_PG_USER 1 diff --git a/lib/include/ihk/page_alloc.h b/lib/include/ihk/page_alloc.h index 71a1f185..a7a292d6 100644 --- a/lib/include/ihk/page_alloc.h +++ b/lib/include/ihk/page_alloc.h @@ -17,6 +17,7 @@ #define __HEADER_GENERIC_IHK_PAGE_ALLOC #include +#include #include /* XXX: Physical memory management shouldn't be part of IHK */ @@ -31,9 +32,11 @@ struct node_distance { struct free_chunk { unsigned long addr, size; struct rb_node node; + struct llist_node list; }; #endif + struct ihk_mc_numa_node { int id; int linux_numa_id; @@ -41,17 +44,17 @@ struct ihk_mc_numa_node { struct list_head allocators; struct node_distance *nodes_by_distance; #ifdef IHK_RBTREE_ALLOCATOR - struct rb_root zeroed_chunks; + ihk_atomic_t zeroing_workers; + ihk_atomic_t nr_to_zero_pages; + struct llist_head zeroed_list; + struct llist_head to_zero_list; struct rb_root free_chunks; mcs_lock_node_t lock; unsigned long nr_pages; /* - * nr_free_pages: all freed pages - * nr_zeroed_pages: zeroed free pages - * Invariant: nr_zeroed_pages <= nr_free_pages + * nr_free_pages: all freed pages, zeroed if zero_at_free */ - unsigned long nr_zeroed_pages; unsigned long nr_free_pages; unsigned long min_addr; unsigned long max_addr; diff --git a/lib/page_alloc.c b/lib/page_alloc.c index bff2bf3f..8825e38c 100644 --- a/lib/page_alloc.c +++ b/lib/page_alloc.c @@ -513,6 +513,30 @@ static int __page_alloc_rbtree_mark_range_allocated(struct rb_root *root, * * NOTE: locking must be managed by the caller. */ +struct chunk_fits_arg { + unsigned long size; + unsigned long align_size; + unsigned long align_mask; +}; + +bool chunk_fits(struct rb_node *node, void *arg) +{ + struct free_chunk *chunk; + unsigned long aligned_addr = 0; + struct chunk_fits_arg *cfa = (struct chunk_fits_arg *)arg; + + chunk = container_of(node, struct free_chunk, node); + aligned_addr = (chunk->addr + (cfa->align_size - 1)) & cfa->align_mask; + + /* Is this a suitable chunk? */ + if ((aligned_addr + cfa->size) <= (chunk->addr + chunk->size)) { + return true; + } + + return false; +} + + static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root, int npages, int p2align) { @@ -523,6 +547,19 @@ static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root, unsigned long align_mask = ~(align_size - 1); unsigned long aligned_addr = 0; +#if 0 + struct chunk_fits_arg cfa = { + .size = size, + .align_size = align_size, + .align_mask = align_mask + }; + + /* Find first maching chunk */ + node = rb_preorder_dfs_search(root, chunk_fits, &cfa); + + chunk = container_of(node, struct free_chunk, node); + aligned_addr = (chunk->addr + (align_size - 1)) & align_mask; +#else for (node = rb_first(root); node; node = rb_next(node)) { chunk = container_of(node, struct free_chunk, node); aligned_addr = (chunk->addr + (align_size - 1)) & align_mask; @@ -537,6 +574,7 @@ static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root, if (!node) { return 0; } +#endif dkprintf("%s: allocating: 0x%lx:%lu\n", __FUNCTION__, aligned_addr, size); @@ -619,20 +657,12 @@ int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node, if (zero_at_free) { /* Zero chunk */ memset(phys_to_virt(addr), 0, size); - - if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) { - kprintf("%s: ERROR: adding 0x%lx:%lu\n", - __FUNCTION__, addr, size); - return EINVAL; - } } - /* Default behavior */ - else { - if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) { - kprintf("%s: ERROR: adding 0x%lx:%lu\n", - __FUNCTION__, addr, size); - return EINVAL; - } + + if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) { + kprintf("%s: ERROR: adding 0x%lx:%lu\n", + __FUNCTION__, addr, size); + return EINVAL; } if (addr < node->min_addr) @@ -642,80 +672,104 @@ int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node, node->max_addr = addr + size; node->nr_pages += (size >> PAGE_SHIFT); - if (zero_at_free) { - node->nr_zeroed_pages += (size >> PAGE_SHIFT); - } node->nr_free_pages += (size >> PAGE_SHIFT); dkprintf("%s: added free pages 0x%lx:%lu\n", __FUNCTION__, addr, size); return 0; } -void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node) +#define IHK_NUMA_ALL_PAGES (0) + +int __ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node, int nr_pages) { - mcs_lock_node_t mcs_node; - unsigned long irqflags; int i, max_i; + int nr_zeroed_pages = 0; if (!zero_at_free) - return; + return 0; /* If explicitly specified, zero only in __node */ max_i = __node ? 1 : ihk_mc_get_nr_numa_nodes(); - irqflags = cpu_disable_interrupt_save(); - /* Look at NUMA nodes in the order of distance */ for (i = 0; i < max_i; ++i) { struct ihk_mc_numa_node *node; + struct llist_node *llnode; + /* Unless explicitly specified.. */ node = __node ? __node : ihk_mc_get_numa_node_by_distance(i); if (!node) { break; } - /* Iterate free chunks */ - for (;;) { - struct free_chunk *chunk; - unsigned long addr, size; + /* + * If number of pages specified, look for a big enough chunk + */ + if (nr_pages) { + struct llist_head tmp; - mcs_lock_lock_noirq(&node->lock, &mcs_node); - chunk = __page_alloc_rbtree_get_root_chunk(&node->free_chunks); - /* - * Release the lock to let other CPUs potentially proceed - * in parallel with other chunks - */ - mcs_lock_unlock_noirq(&node->lock, &mcs_node); + init_llist_head(&tmp); - if (!chunk) { + /* Look for a suitable chunk */ + while ((llnode = llist_del_first(&node->to_zero_list))) { + unsigned long addr; + unsigned long size; + struct free_chunk *chunk = + container_of(llnode, struct free_chunk, list); + + addr = chunk->addr; + size = chunk->size; + + if (size < (nr_pages << PAGE_SHIFT)) { + llist_add(llnode, &tmp); + continue; + } + + memset(phys_to_virt(addr) + sizeof(*chunk), 0, + size - sizeof(*chunk)); + llist_add(&chunk->list, &node->zeroed_list); + barrier(); + ihk_atomic_sub((int)(size >> PAGE_SHIFT), + &node->nr_to_zero_pages); + nr_zeroed_pages += (chunk->size >> PAGE_SHIFT); + kprintf("%s: zeroed chunk 0x%lx:%lu in allocate path\n", + __func__, addr, size); break; } - /* - * Zero chunk - * NOTE: we cannot refer to chunk structure any more after zeroing - */ - addr = chunk->addr; - size = chunk->size; - memset(phys_to_virt(addr), 0, chunk->size); - - mcs_lock_lock_noirq(&node->lock, &mcs_node); - if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) { - kprintf("%s: ERROR: freeing 0x%lx:%lu\n", - __FUNCTION__, addr, size); - goto unlock; + /* Add back the ones that didn't match */ + while ((llnode = llist_del_first(&tmp))) { + llist_add(llnode, &node->to_zero_list); } + } + /* Otherwise iterate all to_zero chunks */ + else { + while ((llnode = llist_del_first(&node->to_zero_list))) { + unsigned long addr; + unsigned long size; + struct free_chunk *chunk = + container_of(llnode, struct free_chunk, list); - node->nr_zeroed_pages += (size >> PAGE_SHIFT); -if (cpu_local_var(current)->profile) - kprintf("%s: zeroed %lu pages @ NUMA %d\n", - __func__, size >> PAGE_SHIFT, node->id); -unlock: - mcs_lock_unlock_noirq(&node->lock, &mcs_node); + addr = chunk->addr; + size = chunk->size; + + memset(phys_to_virt(addr) + sizeof(*chunk), 0, + size - sizeof(*chunk)); + llist_add(&chunk->list, &node->zeroed_list); + barrier(); + ihk_atomic_sub((int)(size >> PAGE_SHIFT), + &node->nr_to_zero_pages); + nr_zeroed_pages += (chunk->size >> PAGE_SHIFT); + } } } - cpu_restore_interrupt(irqflags); + return nr_zeroed_pages; +} + +void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node) +{ + __ihk_numa_zero_free_pages(__node, IHK_NUMA_ALL_PAGES); } unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node, @@ -743,66 +797,66 @@ unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node, #endif mcs_lock_lock(&node->lock, &mcs_node); +retry: + if (zero_at_free) { + struct llist_node *llnode; + /* + * Process zeroed chunks that are not + * on the free tree yet. + */ + while ((llnode = llist_del_first(&node->zeroed_list))) { + unsigned long addr; + unsigned long size; + struct free_chunk *chunk = + container_of(llnode, struct free_chunk, list); + + addr = chunk->addr; + size = chunk->size; + + if (__page_alloc_rbtree_free_range(&node->free_chunks, + addr, size)) { + kprintf("%s: ERROR: freeing zeroed chunk 0x%lx:%lu\n", + __FUNCTION__, addr, npages << PAGE_SHIFT); + } + else { + node->nr_free_pages += (size >> PAGE_SHIFT); + dkprintf("%s: freed zeroed chunk 0x%lx:%lu\n", + __FUNCTION__, addr, size); + } + } + + /* Not enough? Check if we can zero pages now */ + if (node->nr_free_pages < npages) { + if (__ihk_numa_zero_free_pages(node, npages) >= npages) { + goto retry; + } + } + } + + /* Not enough pages? Give up.. */ if (node->nr_free_pages < npages) { goto unlock_out; } - if (zero_at_free) { - /* Do we need to zero pages? */ - if (node->nr_zeroed_pages < npages) { - mcs_lock_unlock(&node->lock, &mcs_node); - ihk_numa_zero_free_pages(node); - mcs_lock_lock(&node->lock, &mcs_node); - } + addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks, + npages, p2align); - /* Still not enough? Give up.. */ - if (node->nr_zeroed_pages < npages) { - goto unlock_out; - } - - addr = __page_alloc_rbtree_alloc_pages(&node->zeroed_chunks, - npages, p2align); - - /* Does not necessarily succeed due to alignment */ - if (addr) { - node->nr_free_pages -= npages; - node->nr_zeroed_pages -= npages; + /* Does not necessarily succeed due to alignment */ + if (addr) { + node->nr_free_pages -= npages; #if 0 - { - size_t free_bytes = __count_free_bytes(&node->free_chunks); - if (free_bytes != node->nr_free_pages * PAGE_SIZE) { - kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n", - __func__, node->nr_free_pages * PAGE_SIZE, free_bytes); - panic(""); - } + { + size_t free_bytes = __count_free_bytes(&node->free_chunks); + if (free_bytes != node->nr_free_pages * PAGE_SIZE) { + kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n", + __func__, node->nr_free_pages * PAGE_SIZE, free_bytes); + panic(""); } -#endif - dkprintf("%s: allocated pages 0x%lx:%lu\n", - __FUNCTION__, addr, npages << PAGE_SHIFT); } - } - /* Default behavior */ - else { - addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks, - npages, p2align); - - /* Does not necessarily succeed due to alignment */ - if (addr) { - node->nr_free_pages -= npages; -#if 0 - { - size_t free_bytes = __count_free_bytes(&node->free_chunks); - if (free_bytes != node->nr_free_pages * PAGE_SIZE) { - kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n", - __func__, node->nr_free_pages * PAGE_SIZE, free_bytes); - panic(""); - } - } #endif - dkprintf("%s: allocated pages 0x%lx:%lu\n", - __FUNCTION__, addr, npages << PAGE_SHIFT); - } + dkprintf("%s: allocated pages 0x%lx:%lu\n", + __FUNCTION__, addr, npages << PAGE_SHIFT); } unlock_out: @@ -815,6 +869,7 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node, unsigned long addr, int npages) { mcs_lock_node_t mcs_node; + int defer_zero_at_free = deferred_zero_at_free; #ifdef ENABLE_PER_CPU_ALLOC_CACHE /* CPU local cache */ @@ -846,13 +901,30 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node, return; } - mcs_lock_lock(&node->lock, &mcs_node); +#if 0 + /* Do not defer zeroing when the number of free pages is low */ + if (zero_at_free && defer_zero_at_free) { + mcs_lock_lock(&node->lock, &mcs_node); + if (node->nr_free_pages < (node->nr_pages * 3 / 100)) + defer_zero_at_free = 0; + mcs_lock_unlock(&node->lock, &mcs_node); + } +#endif + + + /* Zero chunk right here if needed */ + if (zero_at_free && !defer_zero_at_free) { + memset(phys_to_virt(addr), 0, npages << PAGE_SHIFT); + } + + /* + * If we don't zero at free() or we zeroed the chunk + * already, simply add it to the free tree. + */ if (!zero_at_free || - (zero_at_free && deferred_zero_at_free)) { - /* - * Free to free_chunks first, will be moved to zeroed_chunks later - * if zero at free or asynchronously - */ + (zero_at_free && !defer_zero_at_free)) { + mcs_lock_lock(&node->lock, &mcs_node); + if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, npages << PAGE_SHIFT)) { kprintf("%s: ERROR: freeing 0x%lx:%lu\n", @@ -870,39 +942,64 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node, } } #endif - dkprintf("%s: freed pages 0x%lx:%lu\n", - __FUNCTION__, addr, npages << PAGE_SHIFT); + dkprintf("%s: freed%s chunk 0x%lx:%lu\n", + __FUNCTION__, + zero_at_free ? " and zeroed" : "", + addr, npages << PAGE_SHIFT); } + mcs_lock_unlock(&node->lock, &mcs_node); } + /* + * Deferred zeroing. + * Put the chunk to the to_zero list. + */ else { - /* - * Free and zero chunk right here - */ - memset(phys_to_virt(addr), 0, npages << PAGE_SHIFT); + struct free_chunk *chunk = + (struct free_chunk *)phys_to_virt(addr); + chunk->addr = addr; + chunk->size = npages << PAGE_SHIFT; + ihk_atomic_add(npages, &node->nr_to_zero_pages); + barrier(); + llist_add(&chunk->list, &node->to_zero_list); - if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, - npages << PAGE_SHIFT)) { - kprintf("%s: ERROR: freeing 0x%lx:%lu\n", - __FUNCTION__, addr, npages << PAGE_SHIFT); - } - else { - node->nr_free_pages += npages; - node->nr_zeroed_pages += npages; -#if 0 - { - size_t free_bytes = __count_free_bytes(&node->free_chunks); - if (free_bytes != node->nr_free_pages * PAGE_SIZE) { - kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n", - __func__, node->nr_free_pages * PAGE_SIZE, free_bytes); - panic(""); - } + /* Ask Linux to clear memory */ + if (cpu_local_var_initialized && + cpu_local_var(current) && + cpu_local_var(current) != &cpu_local_var(idle) && + !cpu_local_var(current)->proc->nohost) { + struct ihk_ikc_channel_desc *syscall_channel = + cpu_local_var(ikc2linux); + struct ikc_scd_packet packet IHK_DMA_ALIGN; + + if (ihk_atomic_read(&node->zeroing_workers) > 0) { + dkprintf("%s: skipping Linux zero request..\n", __func__); + return; + } + + ihk_atomic_inc(&node->zeroing_workers); + + memset(&packet, 0, sizeof(packet)); + packet.req.number = __NR_move_pages; + packet.req.args[0] = (unsigned long)node; + + barrier(); + smp_store_release(&packet.req.valid, 1); + packet.msg = SCD_MSG_SYSCALL_ONESIDE; + packet.ref = ihk_mc_get_processor_id(); + packet.pid = cpu_local_var(current)->proc->pid; + packet.resp_pa = 0; + + if (ihk_ikc_send(syscall_channel, &packet, 0) < 0) { + kprintf("%s: WARNING: failed to send memory clear" + " send IKC req..\n", __func__); + } + else { + dkprintf("%s: clear mem req for NUMA %d sent in req" + " for addr: 0x%lx\n", + __func__, node->id, addr); } -#endif - dkprintf("%s: freed+zeroed pages 0x%lx:%lu\n", - __FUNCTION__, addr, npages << PAGE_SHIFT); } } - mcs_lock_unlock(&node->lock, &mcs_node); } #endif // IHK_RBTREE_ALLOCATOR