diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index f26697e1..d744a8c1 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -606,6 +606,9 @@ load_tile_to_smem(const uint32_t dim_major, const uint32_t mn_index, template RF @@ -682,7 +685,7 @@ __attribute__((always_inline)) inline void thread_block_gemm_single_tile( for (int wm_iter = 0; wm_iter < WMITER; wm_iter++) { #pragma GCC unroll for (int wn_iter = 0; wn_iter < WNITER; wn_iter++) { - wmma_store(tid_in_warp, warp_col, warp_row, wn_iter, wm_iter, BN, + wmma_store(tid_in_warp, warp_col, warp_row, wn_iter, wm_iter, tile_dim_n, result_addr); } } @@ -918,6 +921,7 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, constexpr MemLayout layout_a = TRANSPOSE_AT_CONSUME ? MemLayout::K_major : MemLayout::MN_major; thread_block_gemm_single_tile( local_a_consume, local_b_consume,