From bca53a9c76e1edbbbdff74ec4e299951303a2a7c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Fri, 19 Jul 2024 16:37:51 -0700 Subject: [PATCH] sgemm_tcore: Skip load at last k-iter; do DMA by default --- tests/regression/sgemm_tcore/Makefile | 2 +- tests/regression/sgemm_tcore/kernel.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/regression/sgemm_tcore/Makefile b/tests/regression/sgemm_tcore/Makefile index c9e2e02c..deb2c1ca 100644 --- a/tests/regression/sgemm_tcore/Makefile +++ b/tests/regression/sgemm_tcore/Makefile @@ -2,7 +2,7 @@ PROJECT = sgemm_tcore SRCS = main.cpp common.h -VX_SRCS = kernel.activation.cpp +VX_SRCS = kernel.cpp OPTS ?= -n16 diff --git a/tests/regression/sgemm_tcore/kernel.cpp b/tests/regression/sgemm_tcore/kernel.cpp index 6859b7c9..6e7a275a 100644 --- a/tests/regression/sgemm_tcore/kernel.cpp +++ b/tests/regression/sgemm_tcore/kernel.cpp @@ -7,7 +7,7 @@ #include "include/gemmini.h" #include "gemmini_mmio.h" -#define GEMMINI_DMA 0 +#define GEMMINI_DMA 1 #if SMEM_SIZE == 0x4000 #define SMEM_ADDR_Q0 ((float * const) 0xff000000) #define SMEM_ADDR_Q1 ((float * const) 0xff001000) @@ -368,7 +368,7 @@ inline void thread_block_gemm(kernel_arg_t *__UNIFORM__ arg, // this is either done using DMA or SIMT cores depending on GEMMINI_DMA #if (GEMMINI_DMA == 1) - if (tid_in_threadblock == 0) { + if ((tid_in_threadblock == 0) && ((block_k * BK) != (dim_k - BK))) { // configure dma gmem address to load from // FIXME: block_k is wrong ROCC_INSTRUCTION_RS1_RS2(