power specific code for kernel

This commit is contained in:
Richard Yan
2024-06-09 14:34:58 -07:00
parent 7cf59c9480
commit c327474e3b
2 changed files with 21 additions and 7 deletions

View File

@@ -32,6 +32,7 @@
#define REMATERIALIZE #define REMATERIALIZE
#define DBUF #define DBUF
//#define CISC //#define CISC
#define POWER
//#define DEBUG_PRINT //#define DEBUG_PRINT
//#define DETAILED_PERF //#define DETAILED_PERF
@@ -504,6 +505,11 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
if (threadblock_id == NUM_CLUSTERS - 1) { if (threadblock_id == NUM_CLUSTERS - 1) {
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS); threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
rd_cycles_force(marker9); rd_cycles_force(marker9);
#ifdef POWER
if (HW_TID() == 0) {
PRINTF("\nstart %d end %d\n", marker0, marker9);
}
#else
if (HW_TID() == 0) { if (HW_TID() == 0) {
PRINTF("\ncomplete\n"); PRINTF("\ncomplete\n");
PRINTF("total cycles: %d\n", marker9 - marker0); PRINTF("total cycles: %d\n", marker9 - marker0);
@@ -541,7 +547,9 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
PRINTF("\n"); PRINTF("\n");
} }
} }
#endif
} }
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
vx_tmc(0); vx_tmc(0);
} }

View File

@@ -41,6 +41,7 @@
#define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__) #define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__)
// #define PRINTF(...) vx_printf(__VA_ARGS__) // #define PRINTF(...) vx_printf(__VA_ARGS__)
#define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x)))) #define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x))))
#define POWER
inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) { inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) {
vx_fence(); vx_fence();
@@ -139,16 +140,21 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS); threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
rd_cycles_force(marker1); rd_cycles_force(marker1);
if (HW_TID() == 0) { if (HW_TID() == 0) {
PRINTF("\ncomplete\n"); #ifdef POWER
PRINTF("total cycles: %d\n", marker1 - marker0); PRINTF("\nstart %d end %d\n", marker0, marker1);
for (int i = 0; i < dim_m; i += 8) { #else
for (int j = 0; j < dim_n; j += 8) { PRINTF("\ncomplete\n");
PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4])); PRINTF("total cycles: %d\n", marker1 - marker0);
for (int i = 0; i < dim_m; i += 8) {
for (int j = 0; j < dim_n; j += 8) {
PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4]));
}
PRINTF("\n");
} }
PRINTF("\n"); #endif
}
} }
} }
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
vx_tmc(0); vx_tmc(0);
} }