HFI1: txreq cache and profiling
This commit is contained in:
@@ -79,6 +79,7 @@
|
||||
#include "mmu_rb.h"
|
||||
|
||||
#include <ihk/mm.h>
|
||||
#include <profile.h>
|
||||
|
||||
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
|
||||
@@ -564,7 +565,6 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
||||
u16 dlid;
|
||||
u32 selector;
|
||||
|
||||
|
||||
#ifndef __HFI1_ORIG__
|
||||
if (!hfi1_kregbase) {
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
@@ -959,8 +959,9 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
||||
}
|
||||
}
|
||||
|
||||
// set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
|
||||
// atomic_inc(&pq->n_reqs);
|
||||
/* TODO: set these! */
|
||||
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
|
||||
atomic_inc(&pq->n_reqs);
|
||||
req_queued = 1;
|
||||
/* Send the first N packets in the request to buy us some time */
|
||||
ret = user_sdma_send_pkts(req, pcount);
|
||||
@@ -1009,7 +1010,12 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
||||
hfi1_cdbg(AIOWRITE, "-wait_event_interruptible_timeout");
|
||||
#else
|
||||
TP("+ polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
|
||||
while (pq->state != SDMA_PKT_Q_ACTIVE) cpu_pause();
|
||||
{
|
||||
unsigned long ts = rdtsc();
|
||||
while (pq->state != SDMA_PKT_Q_ACTIVE) cpu_pause();
|
||||
kprintf("%s: waited %lu cycles for SDMA_PKT_Q_ACTIVE\n",
|
||||
__FUNCTION__, rdtsc() - ts);
|
||||
}
|
||||
TP("- polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
}
|
||||
@@ -1093,6 +1099,49 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
|
||||
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
|
||||
}
|
||||
|
||||
static ihk_spinlock_t txreq_cache_lock = 0;
|
||||
static LIST_HEAD(txreq_cache_list);
|
||||
|
||||
struct user_sdma_txreq *txreq_cache_alloc(void)
|
||||
{
|
||||
struct user_sdma_txreq *req = NULL;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&txreq_cache_lock);
|
||||
retry:
|
||||
if (!list_empty(&txreq_cache_list)) {
|
||||
req = list_first_entry(&txreq_cache_list,
|
||||
struct user_sdma_txreq, list);
|
||||
list_del(&req->list);
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
kprintf("%s: cache empty, allocating ...\n", __FUNCTION__);
|
||||
for (i = 0; i < 100; ++i) {
|
||||
req = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
|
||||
if (!req) {
|
||||
kprintf("%s: ERROR: allocating txreq\n", __FUNCTION__);
|
||||
continue;
|
||||
}
|
||||
|
||||
list_add_tail(&req->list, &txreq_cache_list);
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&txreq_cache_lock);
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
void txreq_cache_free(struct user_sdma_txreq *req)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&txreq_cache_lock);
|
||||
list_add_tail(&req->list, &txreq_cache_list);
|
||||
ihk_mc_spinlock_unlock_noirq(&txreq_cache_lock);
|
||||
}
|
||||
|
||||
//#undef PROFILE_ENABLE
|
||||
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
{
|
||||
int ret = 0, count;
|
||||
@@ -1133,6 +1182,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
unsigned long base_phys;
|
||||
u64 iov_offset = 0;
|
||||
|
||||
#ifdef PROFILE_ENABLE
|
||||
unsigned long prof_ts = rdtsc();
|
||||
#endif
|
||||
|
||||
//TODO: enable test_bit
|
||||
#ifdef __HFI1_ORIG__
|
||||
/*
|
||||
@@ -1146,8 +1199,14 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
}
|
||||
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
|
||||
#else
|
||||
tx = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
|
||||
//tx = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
|
||||
tx = txreq_cache_alloc();
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_sdma_1,
|
||||
(rdtsc() - prof_ts));
|
||||
prof_ts = rdtsc();
|
||||
#endif // PROFILE_ENABLE
|
||||
if (!tx)
|
||||
return -ENOMEM;
|
||||
TP("- kmalloc");
|
||||
@@ -1156,6 +1215,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
tx->busycount = 0;
|
||||
INIT_LIST_HEAD(&tx->list);
|
||||
|
||||
|
||||
/*
|
||||
* For the last packet set the ACK request
|
||||
* and disable header suppression.
|
||||
@@ -1273,12 +1333,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
* If the request contains any data vectors, add up to
|
||||
* fragsize bytes to the descriptor.
|
||||
*/
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_sdma_2,
|
||||
(rdtsc() - prof_ts));
|
||||
prof_ts = rdtsc();
|
||||
#endif // PROFILE_ENABLE
|
||||
TP("+ If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
|
||||
while (queued < datalen &&
|
||||
(req->sent + data_sent) < req->data_len) {
|
||||
unsigned pageidx, len;
|
||||
unsigned long base, offset;
|
||||
const void *virt;
|
||||
unsigned long paddr_base;
|
||||
|
||||
base = (unsigned long)iovec->iov.iov_base;
|
||||
offset = offset_in_page(base + iovec->offset +
|
||||
@@ -1291,11 +1357,24 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
len = min((datalen - queued), len);
|
||||
SDMA_DBG("%s: dl: %d, qd: %d, len: %d\n",
|
||||
__FUNCTION__, datalen, queued, len);
|
||||
|
||||
//if (iov_offset == 0 || iovec->offset == 0) {
|
||||
if (ihk_mc_pt_virt_to_phys(
|
||||
cpu_local_var(current)->vm->address_space->page_table,
|
||||
virt, &paddr_base) < 0) {
|
||||
/* TODO: shall we make this function fail? *
|
||||
* Handle this error. */
|
||||
kprintf("%s: ERROR: virt_to_phys failed - virt = 0x%lx\n",
|
||||
__FUNCTION__, virt);
|
||||
return -EFAULT;
|
||||
}
|
||||
//}
|
||||
|
||||
ret = sdma_txadd_page(pq->dd, &tx->txreq,
|
||||
#ifdef __HFI1_ORIG__
|
||||
iovec->pages[pageidx], offset,
|
||||
#else
|
||||
virt,
|
||||
paddr_base + iov_offset + iovec->offset,
|
||||
#endif
|
||||
len);
|
||||
if (ret) {
|
||||
@@ -1318,6 +1397,12 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
iov_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_sdma_3,
|
||||
(rdtsc() - prof_ts));
|
||||
prof_ts = rdtsc();
|
||||
#endif // PROFILE_ENABLE
|
||||
TP("- If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
|
||||
/*
|
||||
* The txreq was submitted successfully so we can update
|
||||
@@ -1337,6 +1422,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||
*/
|
||||
tx->seqnum = req->seqnum++;
|
||||
npkts++;
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_sdma_4,
|
||||
(rdtsc() - prof_ts));
|
||||
prof_ts = rdtsc();
|
||||
#endif // PROFILE_ENABLE
|
||||
}
|
||||
dosend:
|
||||
|
||||
@@ -1793,7 +1883,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
|
||||
#ifdef __HFI1_ORIG__
|
||||
kmem_cache_free(pq->txreq_cache, tx);
|
||||
#else
|
||||
kfree(tx);
|
||||
//kfree(tx);
|
||||
txreq_cache_free(tx);
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
tx = NULL;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user