HFI1: make kmalloc caches per-CPU and pre-allocate at boot time
This commit is contained in:
@ -68,6 +68,11 @@ static void ap_wait(void)
|
|||||||
init_host_ikc2mckernel();
|
init_host_ikc2mckernel();
|
||||||
init_host_ikc2linux(ikc_cpu);
|
init_host_ikc2linux(ikc_cpu);
|
||||||
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
|
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
|
||||||
|
|
||||||
|
{
|
||||||
|
extern void hfi1_kmalloc_cache_prealloc(void);
|
||||||
|
hfi1_kmalloc_cache_prealloc();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* one of them listens */
|
/* one of them listens */
|
||||||
|
|||||||
@ -106,7 +106,12 @@ struct cpu_local_var {
|
|||||||
struct list_head smp_func_req_list;
|
struct list_head smp_func_req_list;
|
||||||
|
|
||||||
struct process_vm *on_fork_vm;
|
struct process_vm *on_fork_vm;
|
||||||
|
|
||||||
|
/* HFI1 related per-core kmalloc caches */
|
||||||
struct kmalloc_cache_header txreq_cache;
|
struct kmalloc_cache_header txreq_cache;
|
||||||
|
struct kmalloc_cache_header tids_cache;
|
||||||
|
struct kmalloc_cache_header tidlist_cache;
|
||||||
|
struct kmalloc_cache_header tid_node_cache;
|
||||||
} __attribute__((aligned(64)));
|
} __attribute__((aligned(64)));
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -360,6 +360,11 @@ static void post_init(void)
|
|||||||
}
|
}
|
||||||
init_host_ikc2mckernel();
|
init_host_ikc2mckernel();
|
||||||
init_host_ikc2linux(ikc_cpu);
|
init_host_ikc2linux(ikc_cpu);
|
||||||
|
|
||||||
|
{
|
||||||
|
extern void hfi1_kmalloc_cache_prealloc(void);
|
||||||
|
hfi1_kmalloc_cache_prealloc();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
arch_setup_vdso();
|
arch_setup_vdso();
|
||||||
|
|||||||
@ -484,14 +484,12 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
|||||||
if ((cpu_local_var(current)->proc->mcexec_flags & MCEXEC_HFI1) &&
|
if ((cpu_local_var(current)->proc->mcexec_flags & MCEXEC_HFI1) &&
|
||||||
res.private_data &&
|
res.private_data &&
|
||||||
!strncmp((const char *)req->args[0], "/dev/hfi", 8)) {
|
!strncmp((const char *)req->args[0], "/dev/hfi", 8)) {
|
||||||
extern void hfi1_txreq_prealloc(void);
|
|
||||||
|
|
||||||
thread->proc->fd_priv_table[rc] = res.private_data;
|
thread->proc->fd_priv_table[rc] = res.private_data;
|
||||||
dkprintf("%s: PID: %d, open fd: %d, filename: "
|
dkprintf("%s: PID: %d, open fd: %d, filename: "
|
||||||
"%s, private_data: 0x%lx\n",
|
"%s, private_data: 0x%lx\n",
|
||||||
__FUNCTION__, thread->proc->pid,
|
__FUNCTION__, thread->proc->pid,
|
||||||
rc, req->args[0], res.private_data);
|
rc, req->args[0], res.private_data);
|
||||||
hfi1_txreq_prealloc();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -67,8 +67,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *, uintptr_t,
|
|||||||
static int unprogram_rcvarray(struct hfi1_filedata *, u32, struct tid_group **);
|
static int unprogram_rcvarray(struct hfi1_filedata *, u32, struct tid_group **);
|
||||||
static void clear_tid_node(struct hfi1_filedata *, struct tid_rb_node *);
|
static void clear_tid_node(struct hfi1_filedata *, struct tid_rb_node *);
|
||||||
|
|
||||||
struct kmalloc_cache_header tidlist_cache = { NULL };
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RcvArray entry allocation for Expected Receives is done by the
|
* RcvArray entry allocation for Expected Receives is done by the
|
||||||
* following algorithm:
|
* following algorithm:
|
||||||
@ -93,9 +91,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinf
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
tidlist = kmalloc_cache_alloc(&tidlist_cache,
|
/* TODO: sizeof(*tidlist) * uctxt->expected_count); */
|
||||||
//sizeof(*tidlist) * uctxt->expected_count);
|
tidlist = kmalloc_cache_alloc(&cpu_local_var(tidlist_cache),
|
||||||
sizeof(*tidlist) * 1024);
|
sizeof(*tidlist) * 2048);
|
||||||
|
|
||||||
if (!tidlist)
|
if (!tidlist)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@ -328,7 +327,6 @@ static int program_rcvarray(struct hfi1_filedata *fd, uintptr_t phys,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct kmalloc_cache_header tid_node_cache = { NULL };
|
|
||||||
|
|
||||||
static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
|
static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
|
||||||
u32 rcventry, struct tid_group *grp,
|
u32 rcventry, struct tid_group *grp,
|
||||||
@ -342,7 +340,8 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, uintptr_t phys,
|
|||||||
* Allocate the node first so we can handle a potential
|
* Allocate the node first so we can handle a potential
|
||||||
* failure before we've programmed anything.
|
* failure before we've programmed anything.
|
||||||
*/
|
*/
|
||||||
node = kmalloc_cache_alloc(&tid_node_cache, sizeof(*node));
|
node = kmalloc_cache_alloc(&cpu_local_var(tid_node_cache),
|
||||||
|
sizeof(*node));
|
||||||
if (!node)
|
if (!node)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
|||||||
@ -873,8 +873,6 @@ int hfi1_unmap_device_addresses(struct process *proc)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct kmalloc_cache_header tids_cache = {NULL};
|
|
||||||
|
|
||||||
#undef PROFILE_ENABLE
|
#undef PROFILE_ENABLE
|
||||||
|
|
||||||
#ifdef __HFI1_ORIG__
|
#ifdef __HFI1_ORIG__
|
||||||
@ -1143,7 +1141,8 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
|||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto free_req;
|
goto free_req;
|
||||||
}
|
}
|
||||||
req->tids = kmalloc_cache_alloc(&tids_cache,
|
req->tids = kmalloc_cache_alloc(
|
||||||
|
&cpu_local_var(tids_cache),
|
||||||
sizeof(*req->tids) * MAX_TID_PAIR_ENTRIES);
|
sizeof(*req->tids) * MAX_TID_PAIR_ENTRIES);
|
||||||
if (!req->tids) {
|
if (!req->tids) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
@ -1336,10 +1335,22 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
|
|||||||
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
|
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hfi1_txreq_prealloc(void)
|
void hfi1_kmalloc_cache_prealloc(void)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* TODO: nr_elems have been determined based on profiling
|
||||||
|
* HACC and UMT2013, would be interesting to do some clever
|
||||||
|
* dynamic releasing/expanding.
|
||||||
|
*/
|
||||||
kmalloc_cache_prealloc(&cpu_local_var(txreq_cache),
|
kmalloc_cache_prealloc(&cpu_local_var(txreq_cache),
|
||||||
sizeof(struct user_sdma_txreq));
|
sizeof(struct user_sdma_txreq), 2048);
|
||||||
|
kmalloc_cache_prealloc(&cpu_local_var(tids_cache),
|
||||||
|
sizeof(*(((struct user_sdma_request *)0)->tids)) *
|
||||||
|
MAX_TID_PAIR_ENTRIES, 256);
|
||||||
|
kmalloc_cache_prealloc(&cpu_local_var(tidlist_cache),
|
||||||
|
sizeof(u32) * 2048, 128);
|
||||||
|
kmalloc_cache_prealloc(&cpu_local_var(tid_node_cache),
|
||||||
|
sizeof(struct tid_rb_node), 512);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int user_sdma_send_pkts(struct user_sdma_request *req,
|
static int user_sdma_send_pkts(struct user_sdma_request *req,
|
||||||
|
|||||||
Reference in New Issue
Block a user