HFI1: make kmalloc caches per-CPU and pre-allocate at boot time
This commit is contained in:
@ -68,6 +68,11 @@ static void ap_wait(void)
|
||||
init_host_ikc2mckernel();
|
||||
init_host_ikc2linux(ikc_cpu);
|
||||
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
|
||||
|
||||
{
|
||||
extern void hfi1_kmalloc_cache_prealloc(void);
|
||||
hfi1_kmalloc_cache_prealloc();
|
||||
}
|
||||
}
|
||||
|
||||
/* one of them listens */
|
||||
|
||||
@ -106,7 +106,12 @@ struct cpu_local_var {
|
||||
struct list_head smp_func_req_list;
|
||||
|
||||
struct process_vm *on_fork_vm;
|
||||
|
||||
/* HFI1 related per-core kmalloc caches */
|
||||
struct kmalloc_cache_header txreq_cache;
|
||||
struct kmalloc_cache_header tids_cache;
|
||||
struct kmalloc_cache_header tidlist_cache;
|
||||
struct kmalloc_cache_header tid_node_cache;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
|
||||
|
||||
@ -360,6 +360,11 @@ static void post_init(void)
|
||||
}
|
||||
init_host_ikc2mckernel();
|
||||
init_host_ikc2linux(ikc_cpu);
|
||||
|
||||
{
|
||||
extern void hfi1_kmalloc_cache_prealloc(void);
|
||||
hfi1_kmalloc_cache_prealloc();
|
||||
}
|
||||
}
|
||||
|
||||
arch_setup_vdso();
|
||||
|
||||
@ -484,14 +484,12 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
||||
if ((cpu_local_var(current)->proc->mcexec_flags & MCEXEC_HFI1) &&
|
||||
res.private_data &&
|
||||
!strncmp((const char *)req->args[0], "/dev/hfi", 8)) {
|
||||
extern void hfi1_txreq_prealloc(void);
|
||||
|
||||
thread->proc->fd_priv_table[rc] = res.private_data;
|
||||
dkprintf("%s: PID: %d, open fd: %d, filename: "
|
||||
"%s, private_data: 0x%lx\n",
|
||||
__FUNCTION__, thread->proc->pid,
|
||||
rc, req->args[0], res.private_data);
|
||||
hfi1_txreq_prealloc();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -67,8 +67,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *, uintptr_t,
|
||||
static int unprogram_rcvarray(struct hfi1_filedata *, u32, struct tid_group **);
|
||||
static void clear_tid_node(struct hfi1_filedata *, struct tid_rb_node *);
|
||||
|
||||
struct kmalloc_cache_header tidlist_cache = { NULL };
|
||||
|
||||
/*
|
||||
* RcvArray entry allocation for Expected Receives is done by the
|
||||
* following algorithm:
|
||||
@ -93,9 +91,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinf
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tidlist = kmalloc_cache_alloc(&tidlist_cache,
|
||||
//sizeof(*tidlist) * uctxt->expected_count);
|
||||
sizeof(*tidlist) * 1024);
|
||||
/* TODO: sizeof(*tidlist) * uctxt->expected_count); */
|
||||
tidlist = kmalloc_cache_alloc(&cpu_local_var(tidlist_cache),
|
||||
sizeof(*tidlist) * 2048);
|
||||
|
||||
if (!tidlist)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -328,7 +327,6 @@ static int program_rcvarray(struct hfi1_filedata *fd, uintptr_t phys,
|
||||
return count;
|
||||
}
|
||||
|
||||
struct kmalloc_cache_header tid_node_cache = { NULL };
|
||||
|
||||
static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
|
||||
u32 rcventry, struct tid_group *grp,
|
||||
@ -342,7 +340,8 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
|
||||
* Allocate the node first so we can handle a potential
|
||||
* failure before we've programmed anything.
|
||||
*/
|
||||
node = kmalloc_cache_alloc(&tid_node_cache, sizeof(*node));
|
||||
node = kmalloc_cache_alloc(&cpu_local_var(tid_node_cache),
|
||||
sizeof(*node));
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
@ -873,8 +873,6 @@ int hfi1_unmap_device_addresses(struct process *proc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct kmalloc_cache_header tids_cache = {NULL};
|
||||
|
||||
#undef PROFILE_ENABLE
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
@ -1143,7 +1141,8 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
||||
ret = -EINVAL;
|
||||
goto free_req;
|
||||
}
|
||||
req->tids = kmalloc_cache_alloc(&tids_cache,
|
||||
req->tids = kmalloc_cache_alloc(
|
||||
&cpu_local_var(tids_cache),
|
||||
sizeof(*req->tids) * MAX_TID_PAIR_ENTRIES);
|
||||
if (!req->tids) {
|
||||
ret = -ENOMEM;
|
||||
@ -1336,10 +1335,22 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
|
||||
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
|
||||
}
|
||||
|
||||
void hfi1_txreq_prealloc(void)
|
||||
void hfi1_kmalloc_cache_prealloc(void)
|
||||
{
|
||||
/*
|
||||
* TODO: nr_elems have been determined based on profiling
|
||||
* HACC and UMT2013, would be interesting to do some clever
|
||||
* dynamic releasing/expanding.
|
||||
*/
|
||||
kmalloc_cache_prealloc(&cpu_local_var(txreq_cache),
|
||||
sizeof(struct user_sdma_txreq));
|
||||
sizeof(struct user_sdma_txreq), 2048);
|
||||
kmalloc_cache_prealloc(&cpu_local_var(tids_cache),
|
||||
sizeof(*(((struct user_sdma_request *)0)->tids)) *
|
||||
MAX_TID_PAIR_ENTRIES, 256);
|
||||
kmalloc_cache_prealloc(&cpu_local_var(tidlist_cache),
|
||||
sizeof(u32) * 2048, 128);
|
||||
kmalloc_cache_prealloc(&cpu_local_var(tid_node_cache),
|
||||
sizeof(struct tid_rb_node), 512);
|
||||
}
|
||||
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *req,
|
||||
|
||||
Reference in New Issue
Block a user