From 2152c80ffd06f725103973dfd03381c6dc607b0b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 10 Sep 2024 18:05:01 -0700 Subject: [PATCH] sgemm_impl: Add missing reconvergence barrier after mmio --- tests/regression/sgemm_tcore/sgemm_impl.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index d52f9b0a..f0998873 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -1022,6 +1022,10 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, /*acc=*/0, /*act=*/NO_ACTIVATION, /*skips=*/skips) #endif } + + // reconverge after mmio divergence + threadblock_barrier(threadblock_id_in_cluster, + warps_per_threadblock_per_core); #else // move A if constexpr (!TRANSPOSE_AT_PRODUCE) { @@ -1038,9 +1042,6 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, load_tile_to_smem(dim_n, block_n, block_k, B, local_b, tid_in_threadblock); - - threadblock_barrier(threadblock_id_in_cluster, - warps_per_threadblock_per_core); #endif // consumer code: SMEM->RF and compute