Merge remote-tracking branch 'origin/kernels' into kernels

This commit is contained in:
Hansung Kim
2024-06-10 16:41:36 -07:00
2 changed files with 21 additions and 7 deletions

View File

@@ -32,6 +32,7 @@
#define REMATERIALIZE
#define DBUF
//#define CISC
#define POWER
//#define DEBUG_PRINT
//#define DETAILED_PERF
@@ -504,6 +505,11 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
if (threadblock_id == NUM_CLUSTERS - 1) {
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
rd_cycles_force(marker9);
#ifdef POWER
if (HW_TID() == 0) {
PRINTF("\nstart %d end %d\n", marker0, marker9);
}
#else
if (HW_TID() == 0) {
PRINTF("\ncomplete\n");
PRINTF("total cycles: %d\n", marker9 - marker0);
@@ -541,7 +547,9 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
PRINTF("\n");
}
}
#endif
}
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
vx_tmc(0);
}

View File

@@ -41,6 +41,7 @@
#define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__)
// #define PRINTF(...) vx_printf(__VA_ARGS__)
#define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x))))
#define POWER
inline void threadblock_barrier(unsigned int barrier_id, unsigned int count) {
vx_fence();
@@ -139,16 +140,21 @@ void thread_block_matmul_gemmini(kernel_arg_t *__UNIFORM__ arg,
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
rd_cycles_force(marker1);
if (HW_TID() == 0) {
PRINTF("\ncomplete\n");
PRINTF("total cycles: %d\n", marker1 - marker0);
for (int i = 0; i < dim_m; i += 8) {
for (int j = 0; j < dim_n; j += 8) {
PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4]));
#ifdef POWER
PRINTF("\nstart %d end %d\n", marker0, marker1);
#else
PRINTF("\ncomplete\n");
PRINTF("total cycles: %d\n", marker1 - marker0);
for (int i = 0; i < dim_m; i += 8) {
for (int j = 0; j < dim_n; j += 8) {
PRINTF("%d %d ", (int) (C[i * dim_n + j]), (int) (C[i * dim_n + j + 4]));
}
PRINTF("\n");
}
PRINTF("\n");
}
#endif
}
}
threadblock_barrier(/*barrier_id=*/0, /*count=*/NUM_WARPS);
vx_tmc(0);
}