Switch header configs to flash

2024-11-08 21:56:26 -08:00
parent 4e087a8aab
commit 1e3d476e70
3 changed files with 9 additions and 7 deletions
--- a/kernel/include/gemmini_mmio.h
+++ b/kernel/include/gemmini_mmio.h
@@ -9,9 +9,9 @@
 // #define SMEM_SIZE 0x4000
 // 64KB
 // #define SMEM_SIZE 0x10000
-// 128KB
+// 128KB (FP16 GEMM)
 // #define SMEM_SIZE 0x20000
-// 256KB
+// 256KB (FlashAttention)
 #define SMEM_SIZE 0x40000

 #define SMEM_MASK (SMEM_SIZE - 1)
--- a/tests/regression/flash_attention/flash_impl.hpp
+++ b/tests/regression/flash_attention/flash_impl.hpp
@@ -11,8 +11,10 @@
 #define ROW_REMAINDER_LOGIC

 constexpr uint32_t ROWMAX_SETS = 3;
-constexpr bool WARP_SPECIALIZED = true;
-constexpr bool TENSOR_CORE = true;
+// constexpr bool WARP_SPECIALIZED = true;
+// constexpr bool TENSOR_CORE = true;
+constexpr bool WARP_SPECIALIZED = false;
+constexpr bool TENSOR_CORE = false;

 // temporary safety stop for wrong configs
 static_assert(NUM_CORES == 4);
--- a/tests/regression/sgemm_tcore/sgemm_impl.hpp
+++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp
@@ -6,7 +6,7 @@
 #include "include/gemmini.h"
 #include "gemmini_mmio.h"

-#define FP_SIZE 16
+#define FP_SIZE 32

 // "fake" fp16 type that only has the correct data width.
 using float16_t = uint16_t;
@@ -19,7 +19,7 @@ using float_type = float16_t;

 // Generate kernel for the Hopper-style SMEM-decoupled tensor core.  This uses
 // asynchronous HGMMA and HGMMA_WAIT instructions.
-#define TENSOR_HOPPER 1
+#define TENSOR_HOPPER 0

 // Constraints on parameters:
 // * Memory:
@@ -110,7 +110,7 @@ static_assert(WMITER * WNITER * TCM * TCN * NUM_WARPS * CORES_PER_CLUSTER ==
 // result matrix will be stored in a swizzled form in the global memory.
 #define WMMA_STORE_FAST 1

-#define GEMMINI_DMA 0
+#define GEMMINI_DMA 1
 #define GEMMINI_DMA_FAST 1
 #if SMEM_SIZE == 0x4000
 #define SMEM_ADDR_Q0 ((float * const) 0xff000000)