power specific code for kernel
This commit is contained in:
@@ -32,6 +32,7 @@
|
|||||||
#define REMATERIALIZE
|
#define REMATERIALIZE
|
||||||
#define DBUF
|
#define DBUF
|
||||||
//#define CISC
|
//#define CISC
|
||||||
|
#define POWER
|
||||||
|
|
||||||
//#define DEBUG_PRINT
|
//#define DEBUG_PRINT
|
||||||
//#define DETAILED_PERF
|
//#define DETAILED_PERF
|
||||||
@@ -504,6 +505,11 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
|
|||||||
if (threadblock_id == NUM_CLUSTERS - 1) {
|
if (threadblock_id == NUM_CLUSTERS - 1) {
|
||||||
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
|
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
|
||||||
rd_cycles_force(marker9);
|
rd_cycles_force(marker9);
|
||||||
|
#ifdef POWER
|
||||||
|
if (HW_TID() == 0) {
|
||||||
|
PRINTF("\nstart %d end %d\n", marker0, marker9);
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (HW_TID() == 0) {
|
if (HW_TID() == 0) {
|
||||||
PRINTF("\ncomplete\n");
|
PRINTF("\ncomplete\n");
|
||||||
PRINTF("total cycles: %d\n", marker9 - marker0);
|
PRINTF("total cycles: %d\n", marker9 - marker0);
|
||||||
@@ -541,7 +547,9 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
|
|||||||
PRINTF("\n");
|
PRINTF("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
|
||||||
vx_tmc(0);
|
vx_tmc(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
#define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__)
|
#define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__)
|
||||||
// #define PRINTF(...) vx_printf(__VA_ARGS__)
|
// #define PRINTF(...) vx_printf(__VA_ARGS__)
|
||||||
#define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x))))
|
#define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x))))
|
||||||
|
#define POWER
|
||||||
|
|
||||||
inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) {
|
inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) {
|
||||||
vx_fence();
|
vx_fence();
|
||||||
@@ -139,16 +140,21 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
|
|||||||
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
|
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
|
||||||
rd_cycles_force(marker1);
|
rd_cycles_force(marker1);
|
||||||
if (HW_TID() == 0) {
|
if (HW_TID() == 0) {
|
||||||
PRINTF("\ncomplete\n");
|
#ifdef POWER
|
||||||
PRINTF("total cycles: %d\n", marker1 - marker0);
|
PRINTF("\nstart %d end %d\n", marker0, marker1);
|
||||||
for (int i = 0; i < dim_m; i += 8) {
|
#else
|
||||||
for (int j = 0; j < dim_n; j += 8) {
|
PRINTF("\ncomplete\n");
|
||||||
PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4]));
|
PRINTF("total cycles: %d\n", marker1 - marker0);
|
||||||
|
for (int i = 0; i < dim_m; i += 8) {
|
||||||
|
for (int j = 0; j < dim_n; j += 8) {
|
||||||
|
PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4]));
|
||||||
|
}
|
||||||
|
PRINTF("\n");
|
||||||
}
|
}
|
||||||
PRINTF("\n");
|
#endif
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
|
||||||
vx_tmc(0);
|
vx_tmc(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user