From 238e346586bee518397fac5ecf084ac26b1031bf Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Fri, 9 Feb 2018 10:48:31 +0900 Subject: [PATCH] HFI1: use DWARF generated headers for user_sdma_request and user_sdma_txreq --- .../include/hfi1/hfi1_generated_sdma_engine.h | 24 ++- kernel/include/hfi1/ihk_hfi1_common.h | 4 +- kernel/include/hfi1/user_sdma.h | 3 + kernel/include/lwk/compiler.h | 69 +++++- kernel/mem.c | 2 +- kernel/script/regenerate_hfi1_header.sh | 11 + kernel/user_sdma.c | 200 ++++++------------ 7 files changed, 164 insertions(+), 149 deletions(-) diff --git a/kernel/include/hfi1/hfi1_generated_sdma_engine.h b/kernel/include/hfi1/hfi1_generated_sdma_engine.h index c3e21697..ed6d3f7b 100644 --- a/kernel/include/hfi1/hfi1_generated_sdma_engine.h +++ b/kernel/include/hfi1/hfi1_generated_sdma_engine.h @@ -34,43 +34,47 @@ struct sdma_engine { u8 sdma_shift; }; struct { - char padding8[256]; + char padding8[181]; + u8 this_idx; + }; + struct { + char padding9[256]; spinlock_t tail_lock; }; struct { - char padding9[260]; + char padding10[260]; u32 descq_tail; }; struct { - char padding10[264]; + char padding11[264]; long unsigned int ahg_bits; }; struct { - char padding11[272]; + char padding12[272]; u16 desc_avail; }; struct { - char padding12[274]; + char padding13[274]; u16 tx_tail; }; struct { - char padding13[276]; + char padding14[276]; u16 descq_cnt; }; struct { - char padding14[320]; + char padding15[320]; seqlock_t head_lock; }; struct { - char padding15[328]; + char padding16[328]; u32 descq_head; }; struct { - char padding16[704]; + char padding17[704]; spinlock_t flushlist_lock; }; struct { - char padding17[712]; + char padding18[712]; struct list_head flushlist; }; }; diff --git a/kernel/include/hfi1/ihk_hfi1_common.h b/kernel/include/hfi1/ihk_hfi1_common.h index d2f09051..afbee64a 100644 --- a/kernel/include/hfi1/ihk_hfi1_common.h +++ b/kernel/include/hfi1/ihk_hfi1_common.h @@ -191,8 +191,8 @@ typedef unsigned short __u16; typedef __signed__ int __s32; typedef unsigned int __u32; -typedef __signed__ long __s64; -typedef unsigned long __u64; +typedef __signed__ long long __s64; +typedef unsigned long long __u64; typedef __u64 u64; typedef __s64 s64; diff --git a/kernel/include/hfi1/user_sdma.h b/kernel/include/hfi1/user_sdma.h index 5e3903fc..99e09f9e 100644 --- a/kernel/include/hfi1/user_sdma.h +++ b/kernel/include/hfi1/user_sdma.h @@ -94,6 +94,7 @@ extern uint extended_psn; #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 #define KDETH_GET(val, field) \ (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) @@ -108,7 +109,9 @@ extern uint extended_psn; /* KDETH OM multipliers and switch over point */ #define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 #define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) /* The original size on Linux is 376 B */ diff --git a/kernel/include/lwk/compiler.h b/kernel/include/lwk/compiler.h index df62035b..6dfd702d 100644 --- a/kernel/include/lwk/compiler.h +++ b/kernel/include/lwk/compiler.h @@ -1,6 +1,8 @@ #ifndef __LWK_COMPILER_H #define __LWK_COMPILER_H +#include + #ifndef __ASSEMBLY__ #ifdef __CHECKER__ @@ -175,11 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define unlikely(x) __builtin_expect(!!(x), 0) #endif -/* Optimization barrier */ -#ifndef barrier -# define barrier() __memory_barrier() -#endif - #ifndef barrier_data # define barrier_data(ptr) barrier() #endif @@ -490,4 +487,66 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, (_________p1); \ }) +extern void *memcpy(void *dest, const void *src, size_t n); + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; + default: + barrier(); + memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + + + + + #endif /* __LWK_COMPILER_H */ diff --git a/kernel/mem.c b/kernel/mem.c index 351f4405..395cb032 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -1214,7 +1214,7 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) "mapped on demand\n", __FUNCTION__, virt, phys); - flush_tlb_single(virt); + flush_tlb_single((unsigned long)virt); error = 0; goto out; } diff --git a/kernel/script/regenerate_hfi1_header.sh b/kernel/script/regenerate_hfi1_header.sh index 3057ce5b..8e223fb2 100755 --- a/kernel/script/regenerate_hfi1_header.sh +++ b/kernel/script/regenerate_hfi1_header.sh @@ -53,4 +53,15 @@ HFI1_KO="${1-$(modinfo -n hfi1)}" || \ dd tail_lock desc_avail tail_csr flushlist flushlist_lock \ descq_head descq_tail descq_cnt state sdma_shift sdma_mask\ descq tx_ring tx_tail head_lock descq_full_count ahg_bits\ + this_idx \ > "${HDR_PREFIX}sdma_engine.h" + +"$DES_BIN" "$HFI1_KO" user_sdma_request \ + data_iovs pq cq status txps info hdr tidoffset data_len \ + iov_idx sent seqnum done has_error koffset tididx \ + tids n_tids sde ahg_idx iovs seqcomp seqsubmitted \ + > "${HDR_PREFIX}user_sdma_request.h" + +"$DES_BIN" "$HFI1_KO" user_sdma_txreq \ + hdr txreq list req flags busycount seqnum \ + > "${HDR_PREFIX}user_sdma_txreq.h" diff --git a/kernel/user_sdma.c b/kernel/user_sdma.c index f9e41d44..d6a891ec 100644 --- a/kernel/user_sdma.c +++ b/kernel/user_sdma.c @@ -153,62 +153,8 @@ struct user_sdma_iovec { #endif }; -struct user_sdma_request { - struct sdma_req_info info; - struct hfi1_user_sdma_pkt_q *pq; - struct hfi1_user_sdma_comp_q *cq; - /* This is the original header from user space */ - struct hfi1_pkt_header hdr; - /* - * Pointer to the SDMA engine for this request. - * Since different request could be on different VLs, - * each request will need it's own engine pointer. - */ - struct sdma_engine *sde; - u8 ahg_idx; - u32 ahg[9]; - /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. - */ - u32 koffset; - /* - * KDETH.OFFSET (TID) field - * The offset can cover multiple packets, depending on the - * size of the TID entry. - */ - u32 tidoffset; - /* - * KDETH.OM - * Remember this because the header template always sets it - * to 0. - */ - u8 omfactor; - /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors - */ - unsigned data_iovs; - /* total length of the data in the request */ - u32 data_len; - /* progress index moving along the iovs array */ - unsigned iov_idx; - struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; - /* number of elements copied to the tids array */ - u16 n_tids; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - u16 tididx; - u32 sent; - u64 seqnum; - u64 seqcomp; - u64 seqsubmitted; - struct list_head txps; - unsigned long flags; - /* status of the last txreq completed */ - int status; -}; + +#include /* * A single txreq could span up to 3 physical pages when the MTU @@ -216,16 +162,9 @@ struct user_sdma_request { * needs it's own set of flags so the vector has been handled * independently of each other. */ -struct user_sdma_txreq { - /* Packet header for the txreq */ - struct hfi1_pkt_header hdr; - struct sdma_txreq txreq; - struct list_head list; - struct user_sdma_request *req; - u16 flags; - unsigned busycount; - u64 seqnum; -}; + +#include + static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts, @@ -728,13 +667,23 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; - memset(req, 0, sizeof(*req)); req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ + req->data_len = 0; req->pq = pq; req->cq = cq; req->status = -1; + req->ahg_idx = -1; + req->iov_idx = 0; + req->sent = 0; + req->seqnum = 0; + req->seqcomp = 0; + req->seqsubmitted = 0; + req->tids = NULL; + req->done = 0; + req->has_error = 0; INIT_LIST_HEAD(&req->txps); + fast_memcpy(&req->info, &info, sizeof(info)); if (req_opcode(info.ctrl) == EXPECTED) { @@ -830,6 +779,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, TP("+ Save all the IO vector structures"); /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { + req->iovs[i].offset = 0; INIT_LIST_HEAD(&req->iovs[i].list); /* * req->iovs[] contain only the data. @@ -925,6 +875,7 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, goto free_req; } req->n_tids = ntids; + req->tididx = 0; idx++; } TP("- Copy any TID info"); @@ -954,7 +905,6 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, if (likely(ahg >= 0)) { req->ahg_idx = (u8)ahg; - set_bit(SDMA_REQ_HAVE_AHG, &req->flags); } } @@ -984,13 +934,13 @@ int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec, * will not wait for send completions. */ TP("+ while user_sdma_send_pkts()"); - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount, txreq_cache); if (ret < 0) { TP("user_sdma_send_pkts() early return"); if (ret != -EBUSY) { req->status = ret; - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); if (ACCESS_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; @@ -1126,19 +1076,15 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = NULL; struct user_sdma_iovec *iovec = NULL; - TP("+"); - hfi1_cdbg(AIOWRITE, "+"); if (!req->pq) return -EINVAL; - TP("- !req->pq"); + pq = req->pq; /* If tx completion has reported an error, we are done. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); - TP("test_bit(SDMA_REQ_HAS_ERROR, &req->flags)"); + if (READ_ONCE(req->has_error)) return -EFAULT; - } + /* * Check if we might have sent the entire request already */ @@ -1168,10 +1114,9 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, * with errors. If so, we are not going to process any * more packets from this request. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } + tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); #else tx = kmalloc_cache_alloc(txreq_cache, sizeof(*tx)); @@ -1239,8 +1184,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, (rdtsc() - prof_ts)); prof_ts = rdtsc(); #endif // PROFILE_ENABLE - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { - TP("+ if test_bit(SDMA_REQ_HAVE_AHG, &req->flags)"); + if (req->ahg_idx >= 0) { if (!req->seqnum) { TP("+ if !req->seqnum"); u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); @@ -1277,21 +1221,14 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, if (ret) goto free_txreq; } else { - TP("+ else !req->seqnum"); int changes; changes = set_txreq_header_ahg(req, tx, datalen); if (changes < 0) goto free_tx; - sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, changes, - req->ahg, sizeof(req->hdr), - user_sdma_txreq_cb); } } else { - TP("+ else test_bit(SDMA_REQ_HAVE_AHG, &req->flags)"); ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + datalen, user_sdma_txreq_cb); if (ret) @@ -1306,7 +1243,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, if (ret) goto free_txreq; } - TP("- test_bit(SDMA_REQ_HAVE_AHG, &req->flags)"); #ifdef PROFILE_ENABLE profile_event_add(PROFILE_sdma_2, @@ -1443,24 +1379,21 @@ dosend: &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); + WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not * happen due to the sequential manner in which * descriptors are processed. */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + if (req->ahg_idx >= 0) sdma_ahg_free(req->sde, req->ahg_idx); } hfi1_cdbg(AIOWRITE, "-"); - TP("-"); return ret; free_txreq: - TP("free_txreq"); sdma_txclean(pq->dd, &tx->txreq); free_tx: - TP("free_tx"); #ifdef __HFI1_ORIG__ kmem_cache_free(pq->txreq_cache, tx); hfi1_cdbg(AIOWRITE, "-"); @@ -1545,6 +1478,7 @@ static int set_txreq_header(struct user_sdma_request *req, { struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &tx->hdr; + u8 omfactor; /* KDETH.OM */ u16 pbclen; int ret; u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); @@ -1622,8 +1556,9 @@ static int set_txreq_header(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= - KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL; + omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= + KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* Set KDETH.TIDCtrl based on value for this TID. */ KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, EXP_TID_GET(tidval, CTRL)); @@ -1638,12 +1573,12 @@ static int set_txreq_header(struct user_sdma_request *req, * transfer. */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset / req->omfactor, - req->omfactor != KDETH_OM_SMALL); + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, - req->tidoffset / req->omfactor); + req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - req->omfactor != KDETH_OM_SMALL); + omfactor != KDETH_OM_SMALL_SHIFT); } done: // trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, @@ -1652,20 +1587,22 @@ done: } static int set_txreq_header_ahg(struct user_sdma_request *req, - struct user_sdma_txreq *tx, u32 len) + struct user_sdma_txreq *tx, u32 datalen) { + u32 ahg[AHG_KDETH_ARRAY_SIZE]; int diff = 0; - // struct hfi1_user_sdma_pkt_q *pq = req->pq; + u8 omfactor; /* KDETH.OM */ + struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(req->ahg, diff, 0, 0, 12, + AHG_HEADER_SET(ahg, diff, 0, 0, 12, cpu_to_le16(LRH2PBC(lrhlen))); /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(req->ahg, diff, 3, 0, 16, + AHG_HEADER_SET(ahg, diff, 3, 0, 16, cpu_to_be16(lrhlen >> 2)); } @@ -1677,13 +1614,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); + AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); /* KDETH.Offset */ - AHG_HEADER_SET(req->ahg, diff, 15, 0, 16, + AHG_HEADER_SET(ahg, diff, 15, 0, 16, cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(req->ahg, diff, 15, 16, 16, - cpu_to_le16(req->koffset >> 16)); + AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1701,19 +1637,19 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * we have to check again. */ if (++req->tididx > req->n_tids - 1 || - !req->tids[req->tididx]) { + !req->tids[req->tididx]) return -EINVAL; - } tidval = req->tids[req->tididx]; } - req->omfactor = ((EXP_TID_GET(tidval, LEN) * + omfactor = ((EXP_TID_GET(tidval, LEN) * PAGE_SIZE) >= - KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE : - KDETH_OM_SMALL; + KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, - ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | - ((req->tidoffset / req->omfactor) & 0x7fff))); + AHG_HEADER_SET(ahg, diff, 7, 0, 16, + ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | + ((req->tidoffset >> omfactor) + & 0x7fff))); /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); @@ -1730,12 +1666,17 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); } + if (diff < 0) + return diff; + + sdma_txinit_ahg(&tx->txreq, + SDMA_TXREQ_F_USE_AHG, + datalen, req->ahg_idx, diff, + ahg, sizeof(req->hdr), + user_sdma_txreq_cb); - // trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, - // req->info.comp_idx, req->sde->this_idx, - // req->ahg_idx, req->ahg, diff, tidval); return diff; } @@ -1764,7 +1705,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); - set_bit(SDMA_REQ_HAS_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); } req->seqcomp = tx->seqnum; @@ -1787,8 +1728,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || - test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { + (READ_ONCE(req->done) || + READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); @@ -1850,11 +1791,8 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, u16 idx, enum hfi1_sdma_comp_state state, int ret) { - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", - pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); - cq->comps[idx].status = state; if (state == ERROR) cq->comps[idx].errcode = -ret; - // trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, - // idx, state, ret); + barrier(); + cq->comps[idx].status = state; }