sgemm_impl: Fix wrong next block_m logic for DMA

This commit is contained in:
Hansung Kim
2024-10-29 22:35:56 -07:00
parent 21b6655c10
commit c001618fb9

View File

@@ -1237,7 +1237,8 @@ inline void thread_block_gemm(const T *A, const T *B, float *C,
: block_n;
const uint32_t next_block_m =
(next_block_n == 0)
? ((block_m == block_m_end) ? 0 : block_n + 1)
? (((block_m + 1) == block_m_end) ? block_m_start /*unused*/
: block_m + 1)
: block_m;
asm volatile("next_index_end_%=:" ::);