From 7d7cb5f60aa6d7d5e214b1ab6b6775b1bd8ab5bc Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 10 Nov 2024 22:44:02 -0800 Subject: [PATCH] flash: Disable perf loop multiplier --- tests/regression/flash_attention/kernel.cpp | 2 +- tests/regression/flash_attention/kernel.gemmini.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/regression/flash_attention/kernel.cpp b/tests/regression/flash_attention/kernel.cpp index c5efbf3b..9120d683 100644 --- a/tests/regression/flash_attention/kernel.cpp +++ b/tests/regression/flash_attention/kernel.cpp @@ -355,7 +355,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { // "inner loop" along the columns of K^T const uint32_t k_tiles = (dim_seqlen / B_COL); - for (uint32_t tile_k = 0; tile_k < (4 /* for perf measurement */ * k_tiles); + for (uint32_t tile_k = 0; tile_k < (1 /* for perf measurement */ * k_tiles); tile_k++) { // float *smem_P_produce = (tile_k % 2) ? smem_P0 : smem_P1; // float *smem_P_consume = (tile_k % 2) ? smem_P1 : smem_P0; diff --git a/tests/regression/flash_attention/kernel.gemmini.cpp b/tests/regression/flash_attention/kernel.gemmini.cpp index b1ec5b29..9208e863 100644 --- a/tests/regression/flash_attention/kernel.gemmini.cpp +++ b/tests/regression/flash_attention/kernel.gemmini.cpp @@ -347,7 +347,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) { // "inner loop" along the columns of K^T const uint32_t k_tiles = (dim_seqlen / B_COL); for (uint32_t tile_k = 0; - tile_k < (4 /*for perf measurement*/ * + tile_k < (1 /*for perf measurement*/ * // virgo kernel is fully pipelined around (2 GEMMs | softmax); // requires two loop iterations to finish one tile compute (2 * k_tiles)) +