From c001618fb91adf6debcab52dd1cee984a723541b Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 29 Oct 2024 22:35:56 -0700 Subject: [PATCH] sgemm_impl: Fix wrong next block_m logic for DMA --- tests/regression/sgemm_tcore/sgemm_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index 5bd694dd..aaf66492 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -1237,7 +1237,8 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, : block_n; const uint32_t next_block_m = (next_block_n == 0) - ? ((block_m == block_m_end) ? 0 : block_n + 1) + ? (((block_m + 1) == block_m_end) ? block_m_start /*unused*/ + : block_m + 1) : block_m; asm volatile("next_index_end_%=:" ::);