sgemm_impl: Add DMA_FAST option; fix dbuf offset for dma

This commit is contained in:
Hansung Kim
2024-09-08 14:56:48 -07:00
parent 42913c00c4
commit 443a37be6c
2 changed files with 31 additions and 16 deletions

View File

@@ -90,13 +90,22 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
thread_block_gemm<float_type, threads_per_threadblock,
/*write_to_gmem=*/true,
/*smem_a_offset=*/0,
/*smem_a_dbuf_offset=*/0,
#ifdef GEMMINI_DMA
/*smem_a_dbuf_offset=*/1 * 128 * 128 * sizeof(float_type),
/*smem_b_offset=*/2 * 128 * 128 * sizeof(float_type),
/*smem_b_dbuf_offset=*/3 * 128 * 128 * sizeof(float_type)
// FIXME: above offsets are hardcoded to agree with CISC
// spadQuartile
#else
/*smem_a_dbuf_offset=*/1 * BM * BK * sizeof(float_type),
/*smem_b_offset=*/2 * BM * BK * sizeof(float_type),
/*smem_b_dbuf_offset=*/2 * BM * BK * sizeof(float_type)>(
(const float_type *)arg->addr_a, (const float_type *)arg->addr_b,
(float *)arg->addr_c, arg->dim_m, arg->dim_n, arg->dim_k,
tid_in_threadblock, threadblocks_per_cluster, threadblock_id_in_cluster,
sharedmem_per_threadblock);
/*smem_b_dbuf_offset=*/(2 * BM * BK + BK * BN) * sizeof(float_type)
#endif
>((const float_type *)arg->addr_a,
(const float_type *)arg->addr_b, (float *)arg->addr_c,
arg->dim_m, arg->dim_n, arg->dim_k, tid_in_threadblock,
threadblocks_per_cluster, threadblock_id_in_cluster,
sharedmem_per_threadblock);
float *gmem_tmp_d0 = reinterpret_cast<float *>(0xd0000000UL);
float *gmem_tmp_d1 = reinterpret_cast<float *>(0xd1000000UL);