HFI1: txreq cache and profiling

This commit is contained in:
Balazs Gerofi
2017-08-17 16:38:27 +09:00
parent 3b5363c533
commit 6e289e8d9f
4 changed files with 111 additions and 18 deletions

View File

@ -759,7 +759,7 @@ static inline int sdma_txadd_page(
struct page *page,
unsigned long offset,
#else
void *virt,
dma_addr_t paddr,
#endif
u16 len)
{
@ -789,15 +789,7 @@ static inline int sdma_txadd_page(
hfi1_cdbg(AIOWRITE, "-");
#else
if (ihk_mc_pt_virt_to_phys(
cpu_local_var(current)->vm->address_space->page_table,
virt, &addr) < 0) {
/* TODO: shall we make this function fail? *
* Handle this error. */
kprintf("%s: ERROR: virt_to_phys failed - virt = 0x%lx\n",
__FUNCTION__, virt);
return -EFAULT;
}
addr = paddr;
#endif
/*
* XXX: It seems that this is the place where the reference to

View File

@ -28,7 +28,7 @@ struct profile_event {
* [PROFILE_SYSCALL_MAX,PROFILE_OFFLOAD_MAX) - syscall offloads
* [PROFILE_OFFLOAD_MAX,PROFILE_EVENT_MAX) - general events
*
* XXX: Make sure to fill in prof_event_names in profile.c
* XXX: Make sure to fill in profile_event_names in profile.c
* for each added profiled event.
*/
enum profile_event_type {
@ -44,6 +44,11 @@ enum profile_event_type {
PROFILE_mmap_anon_no_contig_phys,
PROFILE_mmap_regular_file,
PROFILE_mmap_device_file,
PROFILE_sdma_1,
PROFILE_sdma_2,
PROFILE_sdma_3,
PROFILE_sdma_4,
PROFILE_sdma_5,
PROFILE_EVENT_MAX /* Should be the last event type */
};

View File

@ -69,6 +69,11 @@ char *profile_event_names[] =
"mmap_anon_no_contig_phys",
"mmap_regular_file",
"mmap_device_file",
"sdma_1",
"sdma_2",
"sdma_3",
"sdma_4",
"sdma_5",
""
};

View File

@ -79,6 +79,7 @@
#include "mmu_rb.h"
#include <ihk/mm.h>
#include <profile.h>
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
@ -564,7 +565,6 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
u16 dlid;
u32 selector;
#ifndef __HFI1_ORIG__
if (!hfi1_kregbase) {
struct process_vm *vm = cpu_local_var(current)->vm;
@ -959,8 +959,9 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
}
}
// set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
// atomic_inc(&pq->n_reqs);
/* TODO: set these! */
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs);
req_queued = 1;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
@ -1009,7 +1010,12 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
hfi1_cdbg(AIOWRITE, "-wait_event_interruptible_timeout");
#else
TP("+ polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
while (pq->state != SDMA_PKT_Q_ACTIVE) cpu_pause();
{
unsigned long ts = rdtsc();
while (pq->state != SDMA_PKT_Q_ACTIVE) cpu_pause();
kprintf("%s: waited %lu cycles for SDMA_PKT_Q_ACTIVE\n",
__FUNCTION__, rdtsc() - ts);
}
TP("- polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
#endif /* __HFI1_ORIG__ */
}
@ -1093,6 +1099,49 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
}
static ihk_spinlock_t txreq_cache_lock = 0;
static LIST_HEAD(txreq_cache_list);
struct user_sdma_txreq *txreq_cache_alloc(void)
{
struct user_sdma_txreq *req = NULL;
ihk_mc_spinlock_lock_noirq(&txreq_cache_lock);
retry:
if (!list_empty(&txreq_cache_list)) {
req = list_first_entry(&txreq_cache_list,
struct user_sdma_txreq, list);
list_del(&req->list);
}
else {
int i;
kprintf("%s: cache empty, allocating ...\n", __FUNCTION__);
for (i = 0; i < 100; ++i) {
req = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
if (!req) {
kprintf("%s: ERROR: allocating txreq\n", __FUNCTION__);
continue;
}
list_add_tail(&req->list, &txreq_cache_list);
}
goto retry;
}
ihk_mc_spinlock_unlock_noirq(&txreq_cache_lock);
return req;
}
void txreq_cache_free(struct user_sdma_txreq *req)
{
ihk_mc_spinlock_lock_noirq(&txreq_cache_lock);
list_add_tail(&req->list, &txreq_cache_list);
ihk_mc_spinlock_unlock_noirq(&txreq_cache_lock);
}
//#undef PROFILE_ENABLE
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
{
int ret = 0, count;
@ -1133,6 +1182,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
unsigned long base_phys;
u64 iov_offset = 0;
#ifdef PROFILE_ENABLE
unsigned long prof_ts = rdtsc();
#endif
//TODO: enable test_bit
#ifdef __HFI1_ORIG__
/*
@ -1146,8 +1199,14 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
#else
tx = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
//tx = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
tx = txreq_cache_alloc();
#endif /* __HFI1_ORIG__ */
#ifdef PROFILE_ENABLE
profile_event_add(PROFILE_sdma_1,
(rdtsc() - prof_ts));
prof_ts = rdtsc();
#endif // PROFILE_ENABLE
if (!tx)
return -ENOMEM;
TP("- kmalloc");
@ -1156,6 +1215,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
/*
* For the last packet set the ACK request
* and disable header suppression.
@ -1273,12 +1333,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
* If the request contains any data vectors, add up to
* fragsize bytes to the descriptor.
*/
#ifdef PROFILE_ENABLE
profile_event_add(PROFILE_sdma_2,
(rdtsc() - prof_ts));
prof_ts = rdtsc();
#endif // PROFILE_ENABLE
TP("+ If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
while (queued < datalen &&
(req->sent + data_sent) < req->data_len) {
unsigned pageidx, len;
unsigned long base, offset;
const void *virt;
unsigned long paddr_base;
base = (unsigned long)iovec->iov.iov_base;
offset = offset_in_page(base + iovec->offset +
@ -1291,11 +1357,24 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
len = min((datalen - queued), len);
SDMA_DBG("%s: dl: %d, qd: %d, len: %d\n",
__FUNCTION__, datalen, queued, len);
//if (iov_offset == 0 || iovec->offset == 0) {
if (ihk_mc_pt_virt_to_phys(
cpu_local_var(current)->vm->address_space->page_table,
virt, &paddr_base) < 0) {
/* TODO: shall we make this function fail? *
* Handle this error. */
kprintf("%s: ERROR: virt_to_phys failed - virt = 0x%lx\n",
__FUNCTION__, virt);
return -EFAULT;
}
//}
ret = sdma_txadd_page(pq->dd, &tx->txreq,
#ifdef __HFI1_ORIG__
iovec->pages[pageidx], offset,
#else
virt,
paddr_base + iov_offset + iovec->offset,
#endif
len);
if (ret) {
@ -1318,6 +1397,12 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
iov_offset = 0;
}
}
#ifdef PROFILE_ENABLE
profile_event_add(PROFILE_sdma_3,
(rdtsc() - prof_ts));
prof_ts = rdtsc();
#endif // PROFILE_ENABLE
TP("- If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
/*
* The txreq was submitted successfully so we can update
@ -1337,6 +1422,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
*/
tx->seqnum = req->seqnum++;
npkts++;
#ifdef PROFILE_ENABLE
profile_event_add(PROFILE_sdma_4,
(rdtsc() - prof_ts));
prof_ts = rdtsc();
#endif // PROFILE_ENABLE
}
dosend:
@ -1793,7 +1883,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
#ifdef __HFI1_ORIG__
kmem_cache_free(pq->txreq_cache, tx);
#else
kfree(tx);
//kfree(tx);
txreq_cache_free(tx);
#endif /* __HFI1_ORIG__ */
tx = NULL;