diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index f0998873..d1b9d76e 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -1042,6 +1042,9 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, load_tile_to_smem(dim_n, block_n, block_k, B, local_b, tid_in_threadblock); + + threadblock_barrier(threadblock_id_in_cluster, + warps_per_threadblock_per_core); #endif // consumer code: SMEM->RF and compute