HFI1: txreq cache and profiling
This commit is contained in:
@@ -759,7 +759,7 @@ static inline int sdma_txadd_page(
|
|||||||
struct page *page,
|
struct page *page,
|
||||||
unsigned long offset,
|
unsigned long offset,
|
||||||
#else
|
#else
|
||||||
void *virt,
|
dma_addr_t paddr,
|
||||||
#endif
|
#endif
|
||||||
u16 len)
|
u16 len)
|
||||||
{
|
{
|
||||||
@@ -789,15 +789,7 @@ static inline int sdma_txadd_page(
|
|||||||
|
|
||||||
hfi1_cdbg(AIOWRITE, "-");
|
hfi1_cdbg(AIOWRITE, "-");
|
||||||
#else
|
#else
|
||||||
if (ihk_mc_pt_virt_to_phys(
|
addr = paddr;
|
||||||
cpu_local_var(current)->vm->address_space->page_table,
|
|
||||||
virt, &addr) < 0) {
|
|
||||||
/* TODO: shall we make this function fail? *
|
|
||||||
* Handle this error. */
|
|
||||||
kprintf("%s: ERROR: virt_to_phys failed - virt = 0x%lx\n",
|
|
||||||
__FUNCTION__, virt);
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* XXX: It seems that this is the place where the reference to
|
* XXX: It seems that this is the place where the reference to
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ struct profile_event {
|
|||||||
* [PROFILE_SYSCALL_MAX,PROFILE_OFFLOAD_MAX) - syscall offloads
|
* [PROFILE_SYSCALL_MAX,PROFILE_OFFLOAD_MAX) - syscall offloads
|
||||||
* [PROFILE_OFFLOAD_MAX,PROFILE_EVENT_MAX) - general events
|
* [PROFILE_OFFLOAD_MAX,PROFILE_EVENT_MAX) - general events
|
||||||
*
|
*
|
||||||
* XXX: Make sure to fill in prof_event_names in profile.c
|
* XXX: Make sure to fill in profile_event_names in profile.c
|
||||||
* for each added profiled event.
|
* for each added profiled event.
|
||||||
*/
|
*/
|
||||||
enum profile_event_type {
|
enum profile_event_type {
|
||||||
@@ -44,6 +44,11 @@ enum profile_event_type {
|
|||||||
PROFILE_mmap_anon_no_contig_phys,
|
PROFILE_mmap_anon_no_contig_phys,
|
||||||
PROFILE_mmap_regular_file,
|
PROFILE_mmap_regular_file,
|
||||||
PROFILE_mmap_device_file,
|
PROFILE_mmap_device_file,
|
||||||
|
PROFILE_sdma_1,
|
||||||
|
PROFILE_sdma_2,
|
||||||
|
PROFILE_sdma_3,
|
||||||
|
PROFILE_sdma_4,
|
||||||
|
PROFILE_sdma_5,
|
||||||
PROFILE_EVENT_MAX /* Should be the last event type */
|
PROFILE_EVENT_MAX /* Should be the last event type */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -69,6 +69,11 @@ char *profile_event_names[] =
|
|||||||
"mmap_anon_no_contig_phys",
|
"mmap_anon_no_contig_phys",
|
||||||
"mmap_regular_file",
|
"mmap_regular_file",
|
||||||
"mmap_device_file",
|
"mmap_device_file",
|
||||||
|
"sdma_1",
|
||||||
|
"sdma_2",
|
||||||
|
"sdma_3",
|
||||||
|
"sdma_4",
|
||||||
|
"sdma_5",
|
||||||
""
|
""
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -79,6 +79,7 @@
|
|||||||
#include "mmu_rb.h"
|
#include "mmu_rb.h"
|
||||||
|
|
||||||
#include <ihk/mm.h>
|
#include <ihk/mm.h>
|
||||||
|
#include <profile.h>
|
||||||
|
|
||||||
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
|
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
|
||||||
MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
|
MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
|
||||||
@@ -564,7 +565,6 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
|||||||
u16 dlid;
|
u16 dlid;
|
||||||
u32 selector;
|
u32 selector;
|
||||||
|
|
||||||
|
|
||||||
#ifndef __HFI1_ORIG__
|
#ifndef __HFI1_ORIG__
|
||||||
if (!hfi1_kregbase) {
|
if (!hfi1_kregbase) {
|
||||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||||
@@ -959,8 +959,9 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
|
/* TODO: set these! */
|
||||||
// atomic_inc(&pq->n_reqs);
|
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
|
||||||
|
atomic_inc(&pq->n_reqs);
|
||||||
req_queued = 1;
|
req_queued = 1;
|
||||||
/* Send the first N packets in the request to buy us some time */
|
/* Send the first N packets in the request to buy us some time */
|
||||||
ret = user_sdma_send_pkts(req, pcount);
|
ret = user_sdma_send_pkts(req, pcount);
|
||||||
@@ -1009,7 +1010,12 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
|||||||
hfi1_cdbg(AIOWRITE, "-wait_event_interruptible_timeout");
|
hfi1_cdbg(AIOWRITE, "-wait_event_interruptible_timeout");
|
||||||
#else
|
#else
|
||||||
TP("+ polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
|
TP("+ polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
|
||||||
while (pq->state != SDMA_PKT_Q_ACTIVE) cpu_pause();
|
{
|
||||||
|
unsigned long ts = rdtsc();
|
||||||
|
while (pq->state != SDMA_PKT_Q_ACTIVE) cpu_pause();
|
||||||
|
kprintf("%s: waited %lu cycles for SDMA_PKT_Q_ACTIVE\n",
|
||||||
|
__FUNCTION__, rdtsc() - ts);
|
||||||
|
}
|
||||||
TP("- polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
|
TP("- polling while(pq->state != SDMA_PKT_Q_ACTIVE)");
|
||||||
#endif /* __HFI1_ORIG__ */
|
#endif /* __HFI1_ORIG__ */
|
||||||
}
|
}
|
||||||
@@ -1093,6 +1099,49 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
|
|||||||
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
|
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ihk_spinlock_t txreq_cache_lock = 0;
|
||||||
|
static LIST_HEAD(txreq_cache_list);
|
||||||
|
|
||||||
|
struct user_sdma_txreq *txreq_cache_alloc(void)
|
||||||
|
{
|
||||||
|
struct user_sdma_txreq *req = NULL;
|
||||||
|
|
||||||
|
ihk_mc_spinlock_lock_noirq(&txreq_cache_lock);
|
||||||
|
retry:
|
||||||
|
if (!list_empty(&txreq_cache_list)) {
|
||||||
|
req = list_first_entry(&txreq_cache_list,
|
||||||
|
struct user_sdma_txreq, list);
|
||||||
|
list_del(&req->list);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int i;
|
||||||
|
kprintf("%s: cache empty, allocating ...\n", __FUNCTION__);
|
||||||
|
for (i = 0; i < 100; ++i) {
|
||||||
|
req = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
|
||||||
|
if (!req) {
|
||||||
|
kprintf("%s: ERROR: allocating txreq\n", __FUNCTION__);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_add_tail(&req->list, &txreq_cache_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&txreq_cache_lock);
|
||||||
|
|
||||||
|
return req;
|
||||||
|
}
|
||||||
|
|
||||||
|
void txreq_cache_free(struct user_sdma_txreq *req)
|
||||||
|
{
|
||||||
|
ihk_mc_spinlock_lock_noirq(&txreq_cache_lock);
|
||||||
|
list_add_tail(&req->list, &txreq_cache_list);
|
||||||
|
ihk_mc_spinlock_unlock_noirq(&txreq_cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
//#undef PROFILE_ENABLE
|
||||||
|
|
||||||
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
||||||
{
|
{
|
||||||
int ret = 0, count;
|
int ret = 0, count;
|
||||||
@@ -1133,6 +1182,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
unsigned long base_phys;
|
unsigned long base_phys;
|
||||||
u64 iov_offset = 0;
|
u64 iov_offset = 0;
|
||||||
|
|
||||||
|
#ifdef PROFILE_ENABLE
|
||||||
|
unsigned long prof_ts = rdtsc();
|
||||||
|
#endif
|
||||||
|
|
||||||
//TODO: enable test_bit
|
//TODO: enable test_bit
|
||||||
#ifdef __HFI1_ORIG__
|
#ifdef __HFI1_ORIG__
|
||||||
/*
|
/*
|
||||||
@@ -1146,8 +1199,14 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
}
|
}
|
||||||
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
|
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
|
||||||
#else
|
#else
|
||||||
tx = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
|
//tx = kmalloc(sizeof(struct user_sdma_txreq), GFP_KERNEL);
|
||||||
|
tx = txreq_cache_alloc();
|
||||||
#endif /* __HFI1_ORIG__ */
|
#endif /* __HFI1_ORIG__ */
|
||||||
|
#ifdef PROFILE_ENABLE
|
||||||
|
profile_event_add(PROFILE_sdma_1,
|
||||||
|
(rdtsc() - prof_ts));
|
||||||
|
prof_ts = rdtsc();
|
||||||
|
#endif // PROFILE_ENABLE
|
||||||
if (!tx)
|
if (!tx)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
TP("- kmalloc");
|
TP("- kmalloc");
|
||||||
@@ -1156,6 +1215,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
tx->busycount = 0;
|
tx->busycount = 0;
|
||||||
INIT_LIST_HEAD(&tx->list);
|
INIT_LIST_HEAD(&tx->list);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For the last packet set the ACK request
|
* For the last packet set the ACK request
|
||||||
* and disable header suppression.
|
* and disable header suppression.
|
||||||
@@ -1273,12 +1333,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
* If the request contains any data vectors, add up to
|
* If the request contains any data vectors, add up to
|
||||||
* fragsize bytes to the descriptor.
|
* fragsize bytes to the descriptor.
|
||||||
*/
|
*/
|
||||||
|
#ifdef PROFILE_ENABLE
|
||||||
|
profile_event_add(PROFILE_sdma_2,
|
||||||
|
(rdtsc() - prof_ts));
|
||||||
|
prof_ts = rdtsc();
|
||||||
|
#endif // PROFILE_ENABLE
|
||||||
TP("+ If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
|
TP("+ If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
|
||||||
while (queued < datalen &&
|
while (queued < datalen &&
|
||||||
(req->sent + data_sent) < req->data_len) {
|
(req->sent + data_sent) < req->data_len) {
|
||||||
unsigned pageidx, len;
|
unsigned pageidx, len;
|
||||||
unsigned long base, offset;
|
unsigned long base, offset;
|
||||||
const void *virt;
|
const void *virt;
|
||||||
|
unsigned long paddr_base;
|
||||||
|
|
||||||
base = (unsigned long)iovec->iov.iov_base;
|
base = (unsigned long)iovec->iov.iov_base;
|
||||||
offset = offset_in_page(base + iovec->offset +
|
offset = offset_in_page(base + iovec->offset +
|
||||||
@@ -1291,11 +1357,24 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
len = min((datalen - queued), len);
|
len = min((datalen - queued), len);
|
||||||
SDMA_DBG("%s: dl: %d, qd: %d, len: %d\n",
|
SDMA_DBG("%s: dl: %d, qd: %d, len: %d\n",
|
||||||
__FUNCTION__, datalen, queued, len);
|
__FUNCTION__, datalen, queued, len);
|
||||||
|
|
||||||
|
//if (iov_offset == 0 || iovec->offset == 0) {
|
||||||
|
if (ihk_mc_pt_virt_to_phys(
|
||||||
|
cpu_local_var(current)->vm->address_space->page_table,
|
||||||
|
virt, &paddr_base) < 0) {
|
||||||
|
/* TODO: shall we make this function fail? *
|
||||||
|
* Handle this error. */
|
||||||
|
kprintf("%s: ERROR: virt_to_phys failed - virt = 0x%lx\n",
|
||||||
|
__FUNCTION__, virt);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
//}
|
||||||
|
|
||||||
ret = sdma_txadd_page(pq->dd, &tx->txreq,
|
ret = sdma_txadd_page(pq->dd, &tx->txreq,
|
||||||
#ifdef __HFI1_ORIG__
|
#ifdef __HFI1_ORIG__
|
||||||
iovec->pages[pageidx], offset,
|
iovec->pages[pageidx], offset,
|
||||||
#else
|
#else
|
||||||
virt,
|
paddr_base + iov_offset + iovec->offset,
|
||||||
#endif
|
#endif
|
||||||
len);
|
len);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
@@ -1318,6 +1397,12 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
iov_offset = 0;
|
iov_offset = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PROFILE_ENABLE
|
||||||
|
profile_event_add(PROFILE_sdma_3,
|
||||||
|
(rdtsc() - prof_ts));
|
||||||
|
prof_ts = rdtsc();
|
||||||
|
#endif // PROFILE_ENABLE
|
||||||
TP("- If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
|
TP("- If the request contains any data vectors, add up to fragsize bytes to the descriptor.");
|
||||||
/*
|
/*
|
||||||
* The txreq was submitted successfully so we can update
|
* The txreq was submitted successfully so we can update
|
||||||
@@ -1337,6 +1422,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
|
|||||||
*/
|
*/
|
||||||
tx->seqnum = req->seqnum++;
|
tx->seqnum = req->seqnum++;
|
||||||
npkts++;
|
npkts++;
|
||||||
|
#ifdef PROFILE_ENABLE
|
||||||
|
profile_event_add(PROFILE_sdma_4,
|
||||||
|
(rdtsc() - prof_ts));
|
||||||
|
prof_ts = rdtsc();
|
||||||
|
#endif // PROFILE_ENABLE
|
||||||
}
|
}
|
||||||
dosend:
|
dosend:
|
||||||
|
|
||||||
@@ -1793,7 +1883,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
|
|||||||
#ifdef __HFI1_ORIG__
|
#ifdef __HFI1_ORIG__
|
||||||
kmem_cache_free(pq->txreq_cache, tx);
|
kmem_cache_free(pq->txreq_cache, tx);
|
||||||
#else
|
#else
|
||||||
kfree(tx);
|
//kfree(tx);
|
||||||
|
txreq_cache_free(tx);
|
||||||
#endif /* __HFI1_ORIG__ */
|
#endif /* __HFI1_ORIG__ */
|
||||||
tx = NULL;
|
tx = NULL;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user