From 0b9a657a01922f70bf9d2a0070619bf42e45fe52 Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Mon, 15 Apr 2019 11:25:53 +0900 Subject: [PATCH] HFI: support IFS 10.8-0 Change-Id: Iebc0e2b50faf464efcc5134cc40dc52e0bd6eea7 --- kernel/include/hfi1/hfi1_generated_ctxtdata.h | 2 +- kernel/include/hfi1/hfi1_generated_devdata.h | 10 +-- .../hfi1_generated_hfi1_user_sdma_pkt_q.h | 2 +- .../include/hfi1/hfi1_generated_pportdata.h | 2 +- .../hfi1/hfi1_generated_user_sdma_request.h | 26 ++---- kernel/include/hfi1/user_sdma.h | 5 ++ kernel/script/regenerate_hfi1_header.sh | 4 +- kernel/user_sdma.c | 85 ++++++++----------- 8 files changed, 59 insertions(+), 77 deletions(-) diff --git a/kernel/include/hfi1/hfi1_generated_ctxtdata.h b/kernel/include/hfi1/hfi1_generated_ctxtdata.h index 44131135..200a1b04 100644 --- a/kernel/include/hfi1/hfi1_generated_ctxtdata.h +++ b/kernel/include/hfi1/hfi1_generated_ctxtdata.h @@ -1,6 +1,6 @@ struct hfi1_ctxtdata { union { - char whole_struct[1408]; + char whole_struct[1160]; struct { char padding0[144]; u16 ctxt; diff --git a/kernel/include/hfi1/hfi1_generated_devdata.h b/kernel/include/hfi1/hfi1_generated_devdata.h index 69ccfb7e..d80d2d9f 100644 --- a/kernel/include/hfi1/hfi1_generated_devdata.h +++ b/kernel/include/hfi1/hfi1_generated_devdata.h @@ -1,6 +1,6 @@ struct hfi1_devdata { union { - char whole_struct[7872]; + char whole_struct[7808]; struct { char padding0[3368]; u8 *kregbase1; @@ -46,19 +46,19 @@ struct hfi1_devdata { u32 chip_rcv_array_count; }; struct { - char padding11[7392]; + char padding11[7264]; struct hfi1_pportdata *pport; }; struct { - char padding12[7416]; + char padding12[7296]; u16 flags; }; struct { - char padding13[7419]; + char padding13[7299]; u8 first_dyn_alloc_ctxt; }; struct { - char padding14[7432]; + char padding14[7368]; u64 sc2vl[4]; }; }; diff --git a/kernel/include/hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h b/kernel/include/hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h index 01d7fee0..3c171294 100644 --- a/kernel/include/hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h +++ b/kernel/include/hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h @@ -23,7 +23,7 @@ struct hfi1_user_sdma_pkt_q { }; struct { char padding5[288]; - unsigned int state; + enum pkt_q_sdma_state state; }; }; }; diff --git a/kernel/include/hfi1/hfi1_generated_pportdata.h b/kernel/include/hfi1/hfi1_generated_pportdata.h index 12a6519c..e3ac3522 100644 --- a/kernel/include/hfi1/hfi1_generated_pportdata.h +++ b/kernel/include/hfi1/hfi1_generated_pportdata.h @@ -1,6 +1,6 @@ struct hfi1_pportdata { union { - char whole_struct[12928]; + char whole_struct[12992]; struct { char padding0[2113]; u8 vls_operational; diff --git a/kernel/include/hfi1/hfi1_generated_user_sdma_request.h b/kernel/include/hfi1/hfi1_generated_user_sdma_request.h index 4b15066f..db17a3f3 100644 --- a/kernel/include/hfi1/hfi1_generated_user_sdma_request.h +++ b/kernel/include/hfi1/hfi1_generated_user_sdma_request.h @@ -50,47 +50,39 @@ struct user_sdma_request { u64 seqsubmitted; }; struct { - char padding12[144]; - int status; - }; - struct { - char padding13[192]; + char padding12[192]; struct list_head txps; }; struct { - char padding14[208]; + char padding13[208]; u64 seqnum; }; struct { - char padding15[216]; + char padding14[216]; u32 tidoffset; }; struct { - char padding16[220]; + char padding15[220]; u32 koffset; }; struct { - char padding17[224]; + char padding16[224]; u32 sent; }; struct { - char padding18[228]; + char padding17[228]; u16 tididx; }; struct { - char padding19[230]; + char padding18[230]; u8 iov_idx; }; struct { - char padding20[231]; - u8 done; - }; - struct { - char padding21[232]; + char padding19[231]; u8 has_error; }; struct { - char padding22[240]; + char padding20[232]; struct user_sdma_iovec iovs[8]; }; }; diff --git a/kernel/include/hfi1/user_sdma.h b/kernel/include/hfi1/user_sdma.h index 44d462b9..917a07db 100644 --- a/kernel/include/hfi1/user_sdma.h +++ b/kernel/include/hfi1/user_sdma.h @@ -114,6 +114,11 @@ extern uint extended_psn; #define KDETH_OM_LARGE_SHIFT 6 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) +enum pkt_q_sdma_state { + SDMA_PKT_Q_ACTIVE, + SDMA_PKT_Q_DEFERRED, +}; + #include struct hfi1_user_sdma_comp_q { diff --git a/kernel/script/regenerate_hfi1_header.sh b/kernel/script/regenerate_hfi1_header.sh index 4a2c6310..4c5cc296 100755 --- a/kernel/script/regenerate_hfi1_header.sh +++ b/kernel/script/regenerate_hfi1_header.sh @@ -57,8 +57,8 @@ HFI1_KO="${1-$(modinfo -n hfi1)}" || \ > "${HDR_PREFIX}sdma_engine.h" "$DES_BIN" "$HFI1_KO" user_sdma_request \ - data_iovs pq cq status txps info hdr tidoffset data_len \ - iov_idx sent seqnum done has_error koffset tididx \ + data_iovs pq cq txps info hdr tidoffset data_len \ + iov_idx sent seqnum has_error koffset tididx \ tids n_tids sde ahg_idx iovs seqcomp seqsubmitted \ > "${HDR_PREFIX}user_sdma_request.h" diff --git a/kernel/user_sdma.c b/kernel/user_sdma.c index 836a1c81..b1d84194 100644 --- a/kernel/user_sdma.c +++ b/kernel/user_sdma.c @@ -60,9 +60,6 @@ static uint hfi1_sdma_comp_ring_size = 128; #define SDMA_REQ_HAS_ERROR 4 #define SDMA_REQ_DONE_ERROR 5 -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) /* * Maximum retry attempts to submit a TX request @@ -549,7 +546,6 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; - int req_queued = 0; u16 dlid; u32 selector; unsigned long size_info = sizeof(info); @@ -620,7 +616,6 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, req->data_len = 0; req->pq = pq; req->cq = cq; - req->status = -1; req->ahg_idx = -1; req->iov_idx = 0; req->sent = 0; @@ -628,13 +623,15 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, req->seqcomp = 0; req->seqsubmitted = 0; req->tids = NULL; - req->done = 0; req->has_error = 0; INIT_LIST_HEAD(&req->txps); fast_memcpy(&req->info, &info, size_info); + /* The request is initialized, count it */ + ihk_atomic_inc(&pq->n_reqs); + if (req_opcode(info.ctrl) == EXPECTED) { /* expected must have a TID info and at least one data vector */ if (req->data_iovs < 2) { @@ -841,25 +838,14 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, } set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); - atomic_inc(&pq->n_reqs); - req_queued = 1; + pq->state = SDMA_PKT_Q_ACTIVE; /* Send the first N packets in the request to buy us some time */ ret = user_sdma_send_pkts(req, pcount, txreq_cache); if (unlikely(ret < 0 && ret != -EBUSY)) { - req->status = ret; goto free_req; } - /* - * It is possible that the SDMA engine would have processed all the - * submitted packets by the time we get here. Therefore, only set - * packet queue state to ACTIVE if there are still uncompleted - * requests. - */ - if (atomic_read(&pq->n_reqs)) - xchg(&pq->state, SDMA_PKT_Q_ACTIVE); - /* * This is a somewhat blocking send implementation. * The driver will block the caller until all packets of the @@ -870,16 +856,12 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, ret = user_sdma_send_pkts(req, pcount, txreq_cache); if (ret < 0) { if (ret != -EBUSY) { - req->status = ret; - WRITE_ONCE(req->has_error, 1); - if (ACCESS_ONCE(req->seqcomp) == - req->seqsubmitted - 1) - goto free_req; - return ret; + goto free_req; } { unsigned long ts = rdtsc(); - while (pq->state != SDMA_PKT_Q_ACTIVE) { + while (ihk_atomic_read(&pq->n_reqs) > 0 && + pq->state != SDMA_PKT_Q_ACTIVE) { cpu_pause(); } kprintf("%s: waited %lu cycles for SDMA_PKT_Q_ACTIVE\n", @@ -891,9 +873,26 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, return 0; free_req: user_sdma_free_request(req, true); - if (req_queued) + /* + * If the submitted seqsubmitted == npkts, the completion routine + * controls the final state. If sequbmitted < npkts, wait for any + * outstanding packets to finish before cleaning up. + */ + if (req->seqsubmitted < req->info.npkts) { + if (req->seqsubmitted) { + { + unsigned long ts = rdtsc(); + while (req->seqcomp != req->seqsubmitted - 1) { + cpu_pause(); + } + kprintf("%s: waited %lu cycles for req->seqcomp\n", + __FUNCTION__, rdtsc() - ts); + } + } + user_sdma_free_request(req, true); pq_update(pq); - set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); + set_comp_state(pq, cq, info.comp_idx, ERROR, ret); + } return ret; } @@ -1263,7 +1262,6 @@ dosend: &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1572,7 +1570,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) struct user_sdma_request *req; struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; - u16 idx; + enum hfi1_sdma_comp_state state = COMPLETE; if (!tx->req) return; @@ -1585,37 +1583,24 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) SDMA_DBG(req, "SDMA completion with error %d", status); WRITE_ONCE(req->has_error, 1); + state = ERROR; } req->seqcomp = tx->seqnum; kmalloc_cache_free(tx); - tx = NULL; - idx = req->info.comp_idx; - if (req->status == -1 && status == SDMA_TXREQ_S_OK) { - if (req->seqcomp == req->info.npkts - 1) { - req->status = 0; - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, COMPLETE, 0); - } - } else { - if (status != SDMA_TXREQ_S_OK) - req->status = status; - if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (READ_ONCE(req->done) || - READ_ONCE(req->has_error))) { - user_sdma_free_request(req, false); - pq_update(pq); - set_comp_state(pq, cq, idx, ERROR, req->status); - } - } + /* sequence isn't complete? We are done */ + if (req->seqcomp != req->info.npkts - 1) + return; + + user_sdma_free_request(req, false); + set_comp_state(pq, cq, req->info.comp_idx, state, status); + pq_update(pq); } static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) { if (atomic_dec_and_test(&pq->n_reqs)) { - xchg(&pq->state, SDMA_PKT_Q_INACTIVE); //TODO: pq_update wake_up //wake_up(&pq->wait); }