From 4aba01873393a5027d7d308c5eccb7f4d6b5bfa7 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Mon, 19 Aug 2024 15:33:23 -0700 Subject: [PATCH] sgemm_impl: Fix wrong barrier count; add barrier for write_to_smem --- tests/regression/sgemm_tcore/sgemm_impl.hpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index f500280e..db6800fc 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -568,7 +568,9 @@ template (sharedmem_per_threadblock + smem_a_offset); T *local_a_buf = @@ -858,7 +867,8 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, /*write_to_smem=*/false>( local_a_consume, local_b_consume, static_cast(nullptr) /*ignore*/, tid_in_threadblock, - threads_per_threadblock); + threads_per_threadblock, threadblocks_per_cluster, + threadblock_id_in_cluster); if constexpr (GEMMINI_DMA) { // Call gemmini fence at the end of the loop to overlap dma & wmma.