flash: Reduce smem_scratchpad alloc size
This commit is contained in:
@@ -545,7 +545,7 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
|
|||||||
// NOTE: out-of bounds is not checked
|
// NOTE: out-of bounds is not checked
|
||||||
// TODO: reduce this from B_ROW to NUM_WARPS
|
// TODO: reduce this from B_ROW to NUM_WARPS
|
||||||
constexpr uint32_t smem_scratchpad_size =
|
constexpr uint32_t smem_scratchpad_size =
|
||||||
B_ROW * NUM_THREADS * 2 /*arbitrary slack*/;
|
threads_per_warpgroup * 2 /*arbitrary slack*/;
|
||||||
float *smem_scratchpad = smem_O_row_scale_1 - smem_scratchpad_size;
|
float *smem_scratchpad = smem_O_row_scale_1 - smem_scratchpad_size;
|
||||||
|
|
||||||
// initialize rowmax/rowsum values in sharedmem
|
// initialize rowmax/rowsum values in sharedmem
|
||||||
|
|||||||
Reference in New Issue
Block a user