flash: Reduce smem_scratchpad alloc size

This commit is contained in:
Hansung Kim
2024-09-01 16:02:06 -07:00
parent 817cc9a5a5
commit 6cc1b5ca37

View File

@@ -545,7 +545,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
// NOTE: out-of bounds is not checked
// TODO: reduce this from B_ROW to NUM_WARPS
constexpr uint32_t smem_scratchpad_size =
B_ROW * NUM_THREADS * 2 /*arbitrary slack*/;
threads_per_warpgroup * 2 /*arbitrary slack*/;
float *smem_scratchpad = smem_O_row_scale_1 - smem_scratchpad_size;
// initialize rowmax/rowsum values in sharedmem