diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index f500280e..db6800fc 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -568,7 +568,9 @@ template (sharedmem_per_threadblock + smem_a_offset); T *local_a_buf = @@ -858,7 +867,8 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, /*write_to_smem=*/false>( local_a_consume, local_b_consume, static_cast(nullptr) /*ignore*/, tid_in_threadblock, - threads_per_threadblock); + threads_per_threadblock, threadblocks_per_cluster, + threadblock_id_in_cluster); if constexpr (GEMMINI_DMA) { // Call gemmini fence at the end of the loop to overlap dma & wmma.