MM: deferred zero cleaning on Linux CPUs
Change-Id: Icdb8ac807688533be7a95b7101edfd904250cd02
This commit is contained in:
committed by
Masamichi Takagi
parent
e7b8aeb4f7
commit
41f5c0bdde
@ -46,6 +46,8 @@
|
|||||||
#include <linux/kdev_t.h>
|
#include <linux/kdev_t.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
#include <linux/proc_fs.h>
|
#include <linux/proc_fs.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
#include <linux/llist.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/delay.h>
|
#include <asm/delay.h>
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
@ -2224,6 +2226,93 @@ int __do_in_kernel_irq_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Memory clearing helpers.
|
||||||
|
*/
|
||||||
|
struct node_distance;
|
||||||
|
|
||||||
|
#define IHK_RBTREE_ALLOCATOR
|
||||||
|
|
||||||
|
#ifdef IHK_RBTREE_ALLOCATOR
|
||||||
|
struct free_chunk {
|
||||||
|
unsigned long addr, size;
|
||||||
|
struct rb_node node;
|
||||||
|
struct llist_node list;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct mcs_lock_node {
|
||||||
|
#ifndef SPIN_LOCK_IN_MCS
|
||||||
|
unsigned long locked;
|
||||||
|
struct mcs_lock_node *next;
|
||||||
|
#endif
|
||||||
|
unsigned long irqsave;
|
||||||
|
#ifdef SPIN_LOCK_IN_MCS
|
||||||
|
ihk_spinlock_t spinlock;
|
||||||
|
#endif
|
||||||
|
#ifndef ENABLE_UBSAN
|
||||||
|
} __aligned(64) mcs_lock_node_t;
|
||||||
|
#else
|
||||||
|
} mcs_lock_node_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct ihk_mc_numa_node {
|
||||||
|
int id;
|
||||||
|
int linux_numa_id;
|
||||||
|
int type;
|
||||||
|
struct list_head allocators;
|
||||||
|
struct node_distance *nodes_by_distance;
|
||||||
|
#ifdef IHK_RBTREE_ALLOCATOR
|
||||||
|
atomic_t zeroing_workers;
|
||||||
|
atomic_t nr_to_zero_pages;
|
||||||
|
struct llist_head zeroed_list;
|
||||||
|
struct llist_head to_zero_list;
|
||||||
|
struct rb_root free_chunks;
|
||||||
|
mcs_lock_node_t lock;
|
||||||
|
|
||||||
|
unsigned long nr_pages;
|
||||||
|
/*
|
||||||
|
* nr_free_pages: all freed pages, zeroed if zero_at_free
|
||||||
|
*/
|
||||||
|
unsigned long nr_free_pages;
|
||||||
|
unsigned long min_addr;
|
||||||
|
unsigned long max_addr;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
void mcctrl_zero_mckernel_pages(unsigned long arg)
|
||||||
|
{
|
||||||
|
struct llist_node *llnode;
|
||||||
|
struct ihk_mc_numa_node *node =
|
||||||
|
(struct ihk_mc_numa_node *)arg;
|
||||||
|
|
||||||
|
/* Iterate free chunks */
|
||||||
|
while ((llnode = llist_del_first(&node->to_zero_list))) {
|
||||||
|
unsigned long addr;
|
||||||
|
unsigned long size;
|
||||||
|
struct free_chunk *chunk =
|
||||||
|
container_of(llnode, struct free_chunk, list);
|
||||||
|
|
||||||
|
addr = chunk->addr;
|
||||||
|
size = chunk->size;
|
||||||
|
|
||||||
|
memset(phys_to_virt(addr) + sizeof(*chunk), 0,
|
||||||
|
chunk->size - sizeof(*chunk));
|
||||||
|
llist_add(&chunk->list, &node->zeroed_list);
|
||||||
|
|
||||||
|
dprintk("%s: zeroed %lu pages @ McKernel NUMA %d (chunk: 0x%lx:%lu)\n",
|
||||||
|
__func__,
|
||||||
|
size >> PAGE_SHIFT,
|
||||||
|
node->id,
|
||||||
|
addr, size);
|
||||||
|
barrier();
|
||||||
|
atomic_sub((int)(size >> PAGE_SHIFT), &node->nr_to_zero_pages);
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic_dec(&node->zeroing_workers);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
|
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
|
||||||
{
|
{
|
||||||
struct syscall_request *sc = &packet->req;
|
struct syscall_request *sc = &packet->req;
|
||||||
@ -2244,6 +2333,14 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
|
|||||||
ret = remap_user_space(sc->args[0], sc->args[1], sc->args[2]);
|
ret = remap_user_space(sc->args[0], sc->args[1], sc->args[2]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case __NR_move_pages:
|
||||||
|
/*
|
||||||
|
* move pages is used for zeroing McKernel side memory,
|
||||||
|
* this call is NOT offloaded by applications.
|
||||||
|
*/
|
||||||
|
mcctrl_zero_mckernel_pages(sc->args[0]);
|
||||||
|
goto out_no_syscall_return;
|
||||||
|
|
||||||
case __NR_exit_group: {
|
case __NR_exit_group: {
|
||||||
|
|
||||||
/* Make sure the user space handler will be called as well */
|
/* Make sure the user space handler will be called as well */
|
||||||
@ -2328,6 +2425,8 @@ sched_setparam_out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
__return_syscall(os, packet, ret, 0);
|
__return_syscall(os, packet, ret, 0);
|
||||||
|
|
||||||
|
out_no_syscall_return:
|
||||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
|
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet);
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
|
|||||||
@ -108,4 +108,6 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
|
|||||||
typeof(*pos), field); 1; }); \
|
typeof(*pos), field); 1; }); \
|
||||||
pos = n)
|
pos = n)
|
||||||
|
|
||||||
|
struct rb_node *rb_preorder_dfs_search(const struct rb_root *root,
|
||||||
|
bool (*__cond)(struct rb_node *, void *arg), void *__cond_arg);
|
||||||
#endif /* _LINUX_RBTREE_H */
|
#endif /* _LINUX_RBTREE_H */
|
||||||
|
|||||||
@ -44,6 +44,7 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <sysfs.h>
|
#include <sysfs.h>
|
||||||
#include <ihk/debug.h>
|
#include <ihk/debug.h>
|
||||||
|
#include <llist.h>
|
||||||
#include <bootparam.h>
|
#include <bootparam.h>
|
||||||
|
|
||||||
//#define DEBUG_PRINT_MEM
|
//#define DEBUG_PRINT_MEM
|
||||||
@ -749,7 +750,6 @@ distance_based:
|
|||||||
ihk_mc_get_numa_id(),
|
ihk_mc_get_numa_id(),
|
||||||
npages,
|
npages,
|
||||||
memory_nodes[node].nodes_by_distance[i].id);
|
memory_nodes[node].nodes_by_distance[i].id);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1546,13 +1546,15 @@ static void numa_init(void)
|
|||||||
INIT_LIST_HEAD(&memory_nodes[i].allocators);
|
INIT_LIST_HEAD(&memory_nodes[i].allocators);
|
||||||
memory_nodes[i].nodes_by_distance = 0;
|
memory_nodes[i].nodes_by_distance = 0;
|
||||||
#ifdef IHK_RBTREE_ALLOCATOR
|
#ifdef IHK_RBTREE_ALLOCATOR
|
||||||
memory_nodes[i].zeroed_chunks.rb_node = 0;
|
ihk_atomic_set(&memory_nodes[i].zeroing_workers, 0);
|
||||||
|
ihk_atomic_set(&memory_nodes[i].nr_to_zero_pages, 0);
|
||||||
memory_nodes[i].free_chunks.rb_node = 0;
|
memory_nodes[i].free_chunks.rb_node = 0;
|
||||||
|
init_llist_head(&memory_nodes[i].zeroed_list);
|
||||||
|
init_llist_head(&memory_nodes[i].to_zero_list);
|
||||||
mcs_lock_init(&memory_nodes[i].lock);
|
mcs_lock_init(&memory_nodes[i].lock);
|
||||||
memory_nodes[i].min_addr = 0xFFFFFFFFFFFFFFFF;
|
memory_nodes[i].min_addr = 0xFFFFFFFFFFFFFFFF;
|
||||||
memory_nodes[i].max_addr = 0;
|
memory_nodes[i].max_addr = 0;
|
||||||
memory_nodes[i].nr_pages = 0;
|
memory_nodes[i].nr_pages = 0;
|
||||||
memory_nodes[i].nr_zeroed_pages = 0;
|
|
||||||
memory_nodes[i].nr_free_pages = 0;
|
memory_nodes[i].nr_free_pages = 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@ -432,6 +432,43 @@ struct rb_node *rb_first(const struct rb_root *root)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(rb_first);
|
EXPORT_SYMBOL(rb_first);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pre-order depth first search.
|
||||||
|
* Return a node where __cond is true.
|
||||||
|
*/
|
||||||
|
static struct rb_node *__rb_preorder_dfs(struct rb_node *n,
|
||||||
|
bool (*__cond)(struct rb_node *, void *arg), void *__cond_arg)
|
||||||
|
{
|
||||||
|
struct rb_node *left_res = NULL;
|
||||||
|
|
||||||
|
if (__cond(n, __cond_arg))
|
||||||
|
return n;
|
||||||
|
|
||||||
|
if (n->rb_left) {
|
||||||
|
left_res = __rb_preorder_dfs(n->rb_left, __cond, __cond_arg);
|
||||||
|
if (left_res) {
|
||||||
|
return left_res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n->rb_right)
|
||||||
|
return __rb_preorder_dfs(n->rb_right, __cond, __cond_arg);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct rb_node *rb_preorder_dfs_search(const struct rb_root *root,
|
||||||
|
bool (*__cond)(struct rb_node *, void *arg), void *__cond_arg)
|
||||||
|
{
|
||||||
|
struct rb_node *n;
|
||||||
|
|
||||||
|
n = root->rb_node;
|
||||||
|
if (!n)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return __rb_preorder_dfs(n, __cond, __cond_arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
struct rb_node *rb_first_safe(const struct rb_root *root)
|
struct rb_node *rb_first_safe(const struct rb_root *root)
|
||||||
{
|
{
|
||||||
struct rb_node *n;
|
struct rb_node *n;
|
||||||
|
|||||||
@ -55,6 +55,8 @@ typedef unsigned long ihk_mc_ap_flag;
|
|||||||
|
|
||||||
#define IHK_MC_AP_BANDWIDTH 0x010000
|
#define IHK_MC_AP_BANDWIDTH 0x010000
|
||||||
#define IHK_MC_AP_LATENCY 0x020000
|
#define IHK_MC_AP_LATENCY 0x020000
|
||||||
|
/* Only allocate from the closest NUMA node */
|
||||||
|
#define IHK_MC_AP_NUMA_STRICT 0x040000
|
||||||
|
|
||||||
#define IHK_MC_PG_KERNEL 0
|
#define IHK_MC_PG_KERNEL 0
|
||||||
#define IHK_MC_PG_USER 1
|
#define IHK_MC_PG_USER 1
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
#define __HEADER_GENERIC_IHK_PAGE_ALLOC
|
#define __HEADER_GENERIC_IHK_PAGE_ALLOC
|
||||||
|
|
||||||
#include <list.h>
|
#include <list.h>
|
||||||
|
#include <llist.h>
|
||||||
#include <rbtree.h>
|
#include <rbtree.h>
|
||||||
|
|
||||||
/* XXX: Physical memory management shouldn't be part of IHK */
|
/* XXX: Physical memory management shouldn't be part of IHK */
|
||||||
@ -31,9 +32,11 @@ struct node_distance {
|
|||||||
struct free_chunk {
|
struct free_chunk {
|
||||||
unsigned long addr, size;
|
unsigned long addr, size;
|
||||||
struct rb_node node;
|
struct rb_node node;
|
||||||
|
struct llist_node list;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
struct ihk_mc_numa_node {
|
struct ihk_mc_numa_node {
|
||||||
int id;
|
int id;
|
||||||
int linux_numa_id;
|
int linux_numa_id;
|
||||||
@ -41,17 +44,17 @@ struct ihk_mc_numa_node {
|
|||||||
struct list_head allocators;
|
struct list_head allocators;
|
||||||
struct node_distance *nodes_by_distance;
|
struct node_distance *nodes_by_distance;
|
||||||
#ifdef IHK_RBTREE_ALLOCATOR
|
#ifdef IHK_RBTREE_ALLOCATOR
|
||||||
struct rb_root zeroed_chunks;
|
ihk_atomic_t zeroing_workers;
|
||||||
|
ihk_atomic_t nr_to_zero_pages;
|
||||||
|
struct llist_head zeroed_list;
|
||||||
|
struct llist_head to_zero_list;
|
||||||
struct rb_root free_chunks;
|
struct rb_root free_chunks;
|
||||||
mcs_lock_node_t lock;
|
mcs_lock_node_t lock;
|
||||||
|
|
||||||
unsigned long nr_pages;
|
unsigned long nr_pages;
|
||||||
/*
|
/*
|
||||||
* nr_free_pages: all freed pages
|
* nr_free_pages: all freed pages, zeroed if zero_at_free
|
||||||
* nr_zeroed_pages: zeroed free pages
|
|
||||||
* Invariant: nr_zeroed_pages <= nr_free_pages
|
|
||||||
*/
|
*/
|
||||||
unsigned long nr_zeroed_pages;
|
|
||||||
unsigned long nr_free_pages;
|
unsigned long nr_free_pages;
|
||||||
unsigned long min_addr;
|
unsigned long min_addr;
|
||||||
unsigned long max_addr;
|
unsigned long max_addr;
|
||||||
|
|||||||
369
lib/page_alloc.c
369
lib/page_alloc.c
@ -513,6 +513,30 @@ static int __page_alloc_rbtree_mark_range_allocated(struct rb_root *root,
|
|||||||
*
|
*
|
||||||
* NOTE: locking must be managed by the caller.
|
* NOTE: locking must be managed by the caller.
|
||||||
*/
|
*/
|
||||||
|
struct chunk_fits_arg {
|
||||||
|
unsigned long size;
|
||||||
|
unsigned long align_size;
|
||||||
|
unsigned long align_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool chunk_fits(struct rb_node *node, void *arg)
|
||||||
|
{
|
||||||
|
struct free_chunk *chunk;
|
||||||
|
unsigned long aligned_addr = 0;
|
||||||
|
struct chunk_fits_arg *cfa = (struct chunk_fits_arg *)arg;
|
||||||
|
|
||||||
|
chunk = container_of(node, struct free_chunk, node);
|
||||||
|
aligned_addr = (chunk->addr + (cfa->align_size - 1)) & cfa->align_mask;
|
||||||
|
|
||||||
|
/* Is this a suitable chunk? */
|
||||||
|
if ((aligned_addr + cfa->size) <= (chunk->addr + chunk->size)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root,
|
static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root,
|
||||||
int npages, int p2align)
|
int npages, int p2align)
|
||||||
{
|
{
|
||||||
@ -523,6 +547,19 @@ static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root,
|
|||||||
unsigned long align_mask = ~(align_size - 1);
|
unsigned long align_mask = ~(align_size - 1);
|
||||||
unsigned long aligned_addr = 0;
|
unsigned long aligned_addr = 0;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
struct chunk_fits_arg cfa = {
|
||||||
|
.size = size,
|
||||||
|
.align_size = align_size,
|
||||||
|
.align_mask = align_mask
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Find first maching chunk */
|
||||||
|
node = rb_preorder_dfs_search(root, chunk_fits, &cfa);
|
||||||
|
|
||||||
|
chunk = container_of(node, struct free_chunk, node);
|
||||||
|
aligned_addr = (chunk->addr + (align_size - 1)) & align_mask;
|
||||||
|
#else
|
||||||
for (node = rb_first(root); node; node = rb_next(node)) {
|
for (node = rb_first(root); node; node = rb_next(node)) {
|
||||||
chunk = container_of(node, struct free_chunk, node);
|
chunk = container_of(node, struct free_chunk, node);
|
||||||
aligned_addr = (chunk->addr + (align_size - 1)) & align_mask;
|
aligned_addr = (chunk->addr + (align_size - 1)) & align_mask;
|
||||||
@ -537,6 +574,7 @@ static unsigned long __page_alloc_rbtree_alloc_pages(struct rb_root *root,
|
|||||||
if (!node) {
|
if (!node) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
dkprintf("%s: allocating: 0x%lx:%lu\n",
|
dkprintf("%s: allocating: 0x%lx:%lu\n",
|
||||||
__FUNCTION__, aligned_addr, size);
|
__FUNCTION__, aligned_addr, size);
|
||||||
@ -619,20 +657,12 @@ int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node,
|
|||||||
if (zero_at_free) {
|
if (zero_at_free) {
|
||||||
/* Zero chunk */
|
/* Zero chunk */
|
||||||
memset(phys_to_virt(addr), 0, size);
|
memset(phys_to_virt(addr), 0, size);
|
||||||
|
|
||||||
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) {
|
|
||||||
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
|
|
||||||
__FUNCTION__, addr, size);
|
|
||||||
return EINVAL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/* Default behavior */
|
|
||||||
else {
|
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) {
|
||||||
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr, size)) {
|
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
|
||||||
kprintf("%s: ERROR: adding 0x%lx:%lu\n",
|
__FUNCTION__, addr, size);
|
||||||
__FUNCTION__, addr, size);
|
return EINVAL;
|
||||||
return EINVAL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (addr < node->min_addr)
|
if (addr < node->min_addr)
|
||||||
@ -642,80 +672,104 @@ int ihk_numa_add_free_pages(struct ihk_mc_numa_node *node,
|
|||||||
node->max_addr = addr + size;
|
node->max_addr = addr + size;
|
||||||
|
|
||||||
node->nr_pages += (size >> PAGE_SHIFT);
|
node->nr_pages += (size >> PAGE_SHIFT);
|
||||||
if (zero_at_free) {
|
|
||||||
node->nr_zeroed_pages += (size >> PAGE_SHIFT);
|
|
||||||
}
|
|
||||||
node->nr_free_pages += (size >> PAGE_SHIFT);
|
node->nr_free_pages += (size >> PAGE_SHIFT);
|
||||||
dkprintf("%s: added free pages 0x%lx:%lu\n",
|
dkprintf("%s: added free pages 0x%lx:%lu\n",
|
||||||
__FUNCTION__, addr, size);
|
__FUNCTION__, addr, size);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node)
|
#define IHK_NUMA_ALL_PAGES (0)
|
||||||
|
|
||||||
|
int __ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node, int nr_pages)
|
||||||
{
|
{
|
||||||
mcs_lock_node_t mcs_node;
|
|
||||||
unsigned long irqflags;
|
|
||||||
int i, max_i;
|
int i, max_i;
|
||||||
|
int nr_zeroed_pages = 0;
|
||||||
|
|
||||||
if (!zero_at_free)
|
if (!zero_at_free)
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
/* If explicitly specified, zero only in __node */
|
/* If explicitly specified, zero only in __node */
|
||||||
max_i = __node ? 1 : ihk_mc_get_nr_numa_nodes();
|
max_i = __node ? 1 : ihk_mc_get_nr_numa_nodes();
|
||||||
|
|
||||||
irqflags = cpu_disable_interrupt_save();
|
|
||||||
|
|
||||||
/* Look at NUMA nodes in the order of distance */
|
/* Look at NUMA nodes in the order of distance */
|
||||||
for (i = 0; i < max_i; ++i) {
|
for (i = 0; i < max_i; ++i) {
|
||||||
struct ihk_mc_numa_node *node;
|
struct ihk_mc_numa_node *node;
|
||||||
|
struct llist_node *llnode;
|
||||||
|
|
||||||
|
/* Unless explicitly specified.. */
|
||||||
node = __node ? __node : ihk_mc_get_numa_node_by_distance(i);
|
node = __node ? __node : ihk_mc_get_numa_node_by_distance(i);
|
||||||
if (!node) {
|
if (!node) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Iterate free chunks */
|
/*
|
||||||
for (;;) {
|
* If number of pages specified, look for a big enough chunk
|
||||||
struct free_chunk *chunk;
|
*/
|
||||||
unsigned long addr, size;
|
if (nr_pages) {
|
||||||
|
struct llist_head tmp;
|
||||||
|
|
||||||
mcs_lock_lock_noirq(&node->lock, &mcs_node);
|
init_llist_head(&tmp);
|
||||||
chunk = __page_alloc_rbtree_get_root_chunk(&node->free_chunks);
|
|
||||||
/*
|
|
||||||
* Release the lock to let other CPUs potentially proceed
|
|
||||||
* in parallel with other chunks
|
|
||||||
*/
|
|
||||||
mcs_lock_unlock_noirq(&node->lock, &mcs_node);
|
|
||||||
|
|
||||||
if (!chunk) {
|
/* Look for a suitable chunk */
|
||||||
|
while ((llnode = llist_del_first(&node->to_zero_list))) {
|
||||||
|
unsigned long addr;
|
||||||
|
unsigned long size;
|
||||||
|
struct free_chunk *chunk =
|
||||||
|
container_of(llnode, struct free_chunk, list);
|
||||||
|
|
||||||
|
addr = chunk->addr;
|
||||||
|
size = chunk->size;
|
||||||
|
|
||||||
|
if (size < (nr_pages << PAGE_SHIFT)) {
|
||||||
|
llist_add(llnode, &tmp);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(phys_to_virt(addr) + sizeof(*chunk), 0,
|
||||||
|
size - sizeof(*chunk));
|
||||||
|
llist_add(&chunk->list, &node->zeroed_list);
|
||||||
|
barrier();
|
||||||
|
ihk_atomic_sub((int)(size >> PAGE_SHIFT),
|
||||||
|
&node->nr_to_zero_pages);
|
||||||
|
nr_zeroed_pages += (chunk->size >> PAGE_SHIFT);
|
||||||
|
kprintf("%s: zeroed chunk 0x%lx:%lu in allocate path\n",
|
||||||
|
__func__, addr, size);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Add back the ones that didn't match */
|
||||||
* Zero chunk
|
while ((llnode = llist_del_first(&tmp))) {
|
||||||
* NOTE: we cannot refer to chunk structure any more after zeroing
|
llist_add(llnode, &node->to_zero_list);
|
||||||
*/
|
|
||||||
addr = chunk->addr;
|
|
||||||
size = chunk->size;
|
|
||||||
memset(phys_to_virt(addr), 0, chunk->size);
|
|
||||||
|
|
||||||
mcs_lock_lock_noirq(&node->lock, &mcs_node);
|
|
||||||
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr, size)) {
|
|
||||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
|
||||||
__FUNCTION__, addr, size);
|
|
||||||
goto unlock;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
/* Otherwise iterate all to_zero chunks */
|
||||||
|
else {
|
||||||
|
while ((llnode = llist_del_first(&node->to_zero_list))) {
|
||||||
|
unsigned long addr;
|
||||||
|
unsigned long size;
|
||||||
|
struct free_chunk *chunk =
|
||||||
|
container_of(llnode, struct free_chunk, list);
|
||||||
|
|
||||||
node->nr_zeroed_pages += (size >> PAGE_SHIFT);
|
addr = chunk->addr;
|
||||||
if (cpu_local_var(current)->profile)
|
size = chunk->size;
|
||||||
kprintf("%s: zeroed %lu pages @ NUMA %d\n",
|
|
||||||
__func__, size >> PAGE_SHIFT, node->id);
|
memset(phys_to_virt(addr) + sizeof(*chunk), 0,
|
||||||
unlock:
|
size - sizeof(*chunk));
|
||||||
mcs_lock_unlock_noirq(&node->lock, &mcs_node);
|
llist_add(&chunk->list, &node->zeroed_list);
|
||||||
|
barrier();
|
||||||
|
ihk_atomic_sub((int)(size >> PAGE_SHIFT),
|
||||||
|
&node->nr_to_zero_pages);
|
||||||
|
nr_zeroed_pages += (chunk->size >> PAGE_SHIFT);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_restore_interrupt(irqflags);
|
return nr_zeroed_pages;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ihk_numa_zero_free_pages(struct ihk_mc_numa_node *__node)
|
||||||
|
{
|
||||||
|
__ihk_numa_zero_free_pages(__node, IHK_NUMA_ALL_PAGES);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
|
unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
|
||||||
@ -743,66 +797,66 @@ unsigned long ihk_numa_alloc_pages(struct ihk_mc_numa_node *node,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
mcs_lock_lock(&node->lock, &mcs_node);
|
mcs_lock_lock(&node->lock, &mcs_node);
|
||||||
|
retry:
|
||||||
|
if (zero_at_free) {
|
||||||
|
struct llist_node *llnode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process zeroed chunks that are not
|
||||||
|
* on the free tree yet.
|
||||||
|
*/
|
||||||
|
while ((llnode = llist_del_first(&node->zeroed_list))) {
|
||||||
|
unsigned long addr;
|
||||||
|
unsigned long size;
|
||||||
|
struct free_chunk *chunk =
|
||||||
|
container_of(llnode, struct free_chunk, list);
|
||||||
|
|
||||||
|
addr = chunk->addr;
|
||||||
|
size = chunk->size;
|
||||||
|
|
||||||
|
if (__page_alloc_rbtree_free_range(&node->free_chunks,
|
||||||
|
addr, size)) {
|
||||||
|
kprintf("%s: ERROR: freeing zeroed chunk 0x%lx:%lu\n",
|
||||||
|
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
node->nr_free_pages += (size >> PAGE_SHIFT);
|
||||||
|
dkprintf("%s: freed zeroed chunk 0x%lx:%lu\n",
|
||||||
|
__FUNCTION__, addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not enough? Check if we can zero pages now */
|
||||||
|
if (node->nr_free_pages < npages) {
|
||||||
|
if (__ihk_numa_zero_free_pages(node, npages) >= npages) {
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not enough pages? Give up.. */
|
||||||
if (node->nr_free_pages < npages) {
|
if (node->nr_free_pages < npages) {
|
||||||
goto unlock_out;
|
goto unlock_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zero_at_free) {
|
addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks,
|
||||||
/* Do we need to zero pages? */
|
npages, p2align);
|
||||||
if (node->nr_zeroed_pages < npages) {
|
|
||||||
mcs_lock_unlock(&node->lock, &mcs_node);
|
|
||||||
ihk_numa_zero_free_pages(node);
|
|
||||||
mcs_lock_lock(&node->lock, &mcs_node);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Still not enough? Give up.. */
|
/* Does not necessarily succeed due to alignment */
|
||||||
if (node->nr_zeroed_pages < npages) {
|
if (addr) {
|
||||||
goto unlock_out;
|
node->nr_free_pages -= npages;
|
||||||
}
|
|
||||||
|
|
||||||
addr = __page_alloc_rbtree_alloc_pages(&node->zeroed_chunks,
|
|
||||||
npages, p2align);
|
|
||||||
|
|
||||||
/* Does not necessarily succeed due to alignment */
|
|
||||||
if (addr) {
|
|
||||||
node->nr_free_pages -= npages;
|
|
||||||
node->nr_zeroed_pages -= npages;
|
|
||||||
#if 0
|
#if 0
|
||||||
{
|
{
|
||||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
||||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
||||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
||||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
||||||
panic("");
|
panic("");
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
dkprintf("%s: allocated pages 0x%lx:%lu\n",
|
|
||||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
/* Default behavior */
|
|
||||||
else {
|
|
||||||
addr = __page_alloc_rbtree_alloc_pages(&node->free_chunks,
|
|
||||||
npages, p2align);
|
|
||||||
|
|
||||||
/* Does not necessarily succeed due to alignment */
|
|
||||||
if (addr) {
|
|
||||||
node->nr_free_pages -= npages;
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
|
||||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
|
||||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
|
||||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
|
||||||
panic("");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
dkprintf("%s: allocated pages 0x%lx:%lu\n",
|
dkprintf("%s: allocated pages 0x%lx:%lu\n",
|
||||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_out:
|
unlock_out:
|
||||||
@ -815,6 +869,7 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node,
|
|||||||
unsigned long addr, int npages)
|
unsigned long addr, int npages)
|
||||||
{
|
{
|
||||||
mcs_lock_node_t mcs_node;
|
mcs_lock_node_t mcs_node;
|
||||||
|
int defer_zero_at_free = deferred_zero_at_free;
|
||||||
|
|
||||||
#ifdef ENABLE_PER_CPU_ALLOC_CACHE
|
#ifdef ENABLE_PER_CPU_ALLOC_CACHE
|
||||||
/* CPU local cache */
|
/* CPU local cache */
|
||||||
@ -846,13 +901,30 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mcs_lock_lock(&node->lock, &mcs_node);
|
#if 0
|
||||||
|
/* Do not defer zeroing when the number of free pages is low */
|
||||||
|
if (zero_at_free && defer_zero_at_free) {
|
||||||
|
mcs_lock_lock(&node->lock, &mcs_node);
|
||||||
|
if (node->nr_free_pages < (node->nr_pages * 3 / 100))
|
||||||
|
defer_zero_at_free = 0;
|
||||||
|
mcs_lock_unlock(&node->lock, &mcs_node);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Zero chunk right here if needed */
|
||||||
|
if (zero_at_free && !defer_zero_at_free) {
|
||||||
|
memset(phys_to_virt(addr), 0, npages << PAGE_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we don't zero at free() or we zeroed the chunk
|
||||||
|
* already, simply add it to the free tree.
|
||||||
|
*/
|
||||||
if (!zero_at_free ||
|
if (!zero_at_free ||
|
||||||
(zero_at_free && deferred_zero_at_free)) {
|
(zero_at_free && !defer_zero_at_free)) {
|
||||||
/*
|
mcs_lock_lock(&node->lock, &mcs_node);
|
||||||
* Free to free_chunks first, will be moved to zeroed_chunks later
|
|
||||||
* if zero at free or asynchronously
|
|
||||||
*/
|
|
||||||
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr,
|
if (__page_alloc_rbtree_free_range(&node->free_chunks, addr,
|
||||||
npages << PAGE_SHIFT)) {
|
npages << PAGE_SHIFT)) {
|
||||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
||||||
@ -870,39 +942,64 @@ void ihk_numa_free_pages(struct ihk_mc_numa_node *node,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
dkprintf("%s: freed pages 0x%lx:%lu\n",
|
dkprintf("%s: freed%s chunk 0x%lx:%lu\n",
|
||||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
__FUNCTION__,
|
||||||
|
zero_at_free ? " and zeroed" : "",
|
||||||
|
addr, npages << PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
mcs_lock_unlock(&node->lock, &mcs_node);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Deferred zeroing.
|
||||||
|
* Put the chunk to the to_zero list.
|
||||||
|
*/
|
||||||
else {
|
else {
|
||||||
/*
|
struct free_chunk *chunk =
|
||||||
* Free and zero chunk right here
|
(struct free_chunk *)phys_to_virt(addr);
|
||||||
*/
|
chunk->addr = addr;
|
||||||
memset(phys_to_virt(addr), 0, npages << PAGE_SHIFT);
|
chunk->size = npages << PAGE_SHIFT;
|
||||||
|
ihk_atomic_add(npages, &node->nr_to_zero_pages);
|
||||||
|
barrier();
|
||||||
|
llist_add(&chunk->list, &node->to_zero_list);
|
||||||
|
|
||||||
if (__page_alloc_rbtree_free_range(&node->zeroed_chunks, addr,
|
/* Ask Linux to clear memory */
|
||||||
npages << PAGE_SHIFT)) {
|
if (cpu_local_var_initialized &&
|
||||||
kprintf("%s: ERROR: freeing 0x%lx:%lu\n",
|
cpu_local_var(current) &&
|
||||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
cpu_local_var(current) != &cpu_local_var(idle) &&
|
||||||
}
|
!cpu_local_var(current)->proc->nohost) {
|
||||||
else {
|
struct ihk_ikc_channel_desc *syscall_channel =
|
||||||
node->nr_free_pages += npages;
|
cpu_local_var(ikc2linux);
|
||||||
node->nr_zeroed_pages += npages;
|
struct ikc_scd_packet packet IHK_DMA_ALIGN;
|
||||||
#if 0
|
|
||||||
{
|
if (ihk_atomic_read(&node->zeroing_workers) > 0) {
|
||||||
size_t free_bytes = __count_free_bytes(&node->free_chunks);
|
dkprintf("%s: skipping Linux zero request..\n", __func__);
|
||||||
if (free_bytes != node->nr_free_pages * PAGE_SIZE) {
|
return;
|
||||||
kprintf("%s: inconsistent free count? node: %lu vs. cnt: %lu\n",
|
}
|
||||||
__func__, node->nr_free_pages * PAGE_SIZE, free_bytes);
|
|
||||||
panic("");
|
ihk_atomic_inc(&node->zeroing_workers);
|
||||||
}
|
|
||||||
|
memset(&packet, 0, sizeof(packet));
|
||||||
|
packet.req.number = __NR_move_pages;
|
||||||
|
packet.req.args[0] = (unsigned long)node;
|
||||||
|
|
||||||
|
barrier();
|
||||||
|
smp_store_release(&packet.req.valid, 1);
|
||||||
|
packet.msg = SCD_MSG_SYSCALL_ONESIDE;
|
||||||
|
packet.ref = ihk_mc_get_processor_id();
|
||||||
|
packet.pid = cpu_local_var(current)->proc->pid;
|
||||||
|
packet.resp_pa = 0;
|
||||||
|
|
||||||
|
if (ihk_ikc_send(syscall_channel, &packet, 0) < 0) {
|
||||||
|
kprintf("%s: WARNING: failed to send memory clear"
|
||||||
|
" send IKC req..\n", __func__);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dkprintf("%s: clear mem req for NUMA %d sent in req"
|
||||||
|
" for addr: 0x%lx\n",
|
||||||
|
__func__, node->id, addr);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
dkprintf("%s: freed+zeroed pages 0x%lx:%lu\n",
|
|
||||||
__FUNCTION__, addr, npages << PAGE_SHIFT);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mcs_lock_unlock(&node->lock, &mcs_node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // IHK_RBTREE_ALLOCATOR
|
#endif // IHK_RBTREE_ALLOCATOR
|
||||||
|
|||||||
Reference in New Issue
Block a user