HFI1: make kmalloc caches per-CPU and pre-allocate at boot time

This commit is contained in:
Balazs Gerofi
2017-10-25 13:19:08 +09:00
parent 33ad55e72b
commit 5bea237581
6 changed files with 37 additions and 14 deletions

View File

@ -68,6 +68,11 @@ static void ap_wait(void)
init_host_ikc2mckernel();
init_host_ikc2linux(ikc_cpu);
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
{
extern void hfi1_kmalloc_cache_prealloc(void);
hfi1_kmalloc_cache_prealloc();
}
}
/* one of them listens */

View File

@ -106,7 +106,12 @@ struct cpu_local_var {
struct list_head smp_func_req_list;
struct process_vm *on_fork_vm;
/* HFI1 related per-core kmalloc caches */
struct kmalloc_cache_header txreq_cache;
struct kmalloc_cache_header tids_cache;
struct kmalloc_cache_header tidlist_cache;
struct kmalloc_cache_header tid_node_cache;
} __attribute__((aligned(64)));

View File

@ -360,6 +360,11 @@ static void post_init(void)
}
init_host_ikc2mckernel();
init_host_ikc2linux(ikc_cpu);
{
extern void hfi1_kmalloc_cache_prealloc(void);
hfi1_kmalloc_cache_prealloc();
}
}
arch_setup_vdso();

View File

@ -484,14 +484,12 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
if ((cpu_local_var(current)->proc->mcexec_flags & MCEXEC_HFI1) &&
res.private_data &&
!strncmp((const char *)req->args[0], "/dev/hfi", 8)) {
extern void hfi1_txreq_prealloc(void);
thread->proc->fd_priv_table[rc] = res.private_data;
dkprintf("%s: PID: %d, open fd: %d, filename: "
"%s, private_data: 0x%lx\n",
__FUNCTION__, thread->proc->pid,
rc, req->args[0], res.private_data);
hfi1_txreq_prealloc();
}
}

View File

@ -67,8 +67,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *, uintptr_t,
static int unprogram_rcvarray(struct hfi1_filedata *, u32, struct tid_group **);
static void clear_tid_node(struct hfi1_filedata *, struct tid_rb_node *);
struct kmalloc_cache_header tidlist_cache = { NULL };
/*
* RcvArray entry allocation for Expected Receives is done by the
* following algorithm:
@ -93,9 +91,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinf
return -EINVAL;
}
tidlist = kmalloc_cache_alloc(&tidlist_cache,
//sizeof(*tidlist) * uctxt->expected_count);
sizeof(*tidlist) * 1024);
/* TODO: sizeof(*tidlist) * uctxt->expected_count); */
tidlist = kmalloc_cache_alloc(&cpu_local_var(tidlist_cache),
sizeof(*tidlist) * 2048);
if (!tidlist)
return -ENOMEM;
@ -328,7 +327,6 @@ static int program_rcvarray(struct hfi1_filedata *fd, uintptr_t phys,
return count;
}
struct kmalloc_cache_header tid_node_cache = { NULL };
static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
u32 rcventry, struct tid_group *grp,
@ -342,7 +340,8 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
* Allocate the node first so we can handle a potential
* failure before we've programmed anything.
*/
node = kmalloc_cache_alloc(&tid_node_cache, sizeof(*node));
node = kmalloc_cache_alloc(&cpu_local_var(tid_node_cache),
sizeof(*node));
if (!node)
return -ENOMEM;

View File

@ -873,8 +873,6 @@ int hfi1_unmap_device_addresses(struct process *proc)
return ret;
}
struct kmalloc_cache_header tids_cache = {NULL};
#undef PROFILE_ENABLE
#ifdef __HFI1_ORIG__
@ -1143,7 +1141,8 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
ret = -EINVAL;
goto free_req;
}
req->tids = kmalloc_cache_alloc(&tids_cache,
req->tids = kmalloc_cache_alloc(
&cpu_local_var(tids_cache),
sizeof(*req->tids) * MAX_TID_PAIR_ENTRIES);
if (!req->tids) {
ret = -ENOMEM;
@ -1336,10 +1335,22 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
}
void hfi1_txreq_prealloc(void)
void hfi1_kmalloc_cache_prealloc(void)
{
/*
* TODO: nr_elems have been determined based on profiling
* HACC and UMT2013, would be interesting to do some clever
* dynamic releasing/expanding.
*/
kmalloc_cache_prealloc(&cpu_local_var(txreq_cache),
sizeof(struct user_sdma_txreq));
sizeof(struct user_sdma_txreq), 2048);
kmalloc_cache_prealloc(&cpu_local_var(tids_cache),
sizeof(*(((struct user_sdma_request *)0)->tids)) *
MAX_TID_PAIR_ENTRIES, 256);
kmalloc_cache_prealloc(&cpu_local_var(tidlist_cache),
sizeof(u32) * 2048, 128);
kmalloc_cache_prealloc(&cpu_local_var(tid_node_cache),
sizeof(struct tid_rb_node), 512);
}
static int user_sdma_send_pkts(struct user_sdma_request *req,