From 1b133e7b5ccf435e1e5063d0619065a537508fc3 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 18 Aug 2024 22:25:01 -0700 Subject: [PATCH] sgemm_impl: Rename dmem load function --- tests/regression/sgemm_tcore/sgemm_impl.hpp | 25 +++++++++------------ 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tests/regression/sgemm_tcore/sgemm_impl.hpp b/tests/regression/sgemm_tcore/sgemm_impl.hpp index 9daaae2a..ab2a9233 100644 --- a/tests/regression/sgemm_tcore/sgemm_impl.hpp +++ b/tests/regression/sgemm_tcore/sgemm_impl.hpp @@ -395,13 +395,12 @@ template __attribute__((always_inline)) inline void -global_dmem_load_new(const uint32_t dim_col, const uint32_t mn_index, - const uint32_t k, const T *global_addr, - volatile T *local_addr, - const uint32_t tid_in_threadblock) { +load_tile_to_smem(const uint32_t dim_col, const uint32_t mn_index, + const uint32_t k, const T *global_addr, + volatile T *local_addr, const uint32_t tid_in_threadblock) { asm volatile("global_dmem_load_start_new_%=:" ::); // In fp16 mode, bit-pack two fp16 elements into each fp32 element, and do @@ -805,19 +804,17 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, #else // move A if constexpr (!TRANSPOSE_AT_PRODUCE) { - global_dmem_load_new(dim_m, block_m, block_k * BK, A, local_a, - tid_in_threadblock); + load_tile_to_smem(dim_m, block_m, block_k * BK, A, local_a, + tid_in_threadblock); } else { - global_dmem_load_new(dim_k, block_m, block_k * BK, A, local_a, - tid_in_threadblock); + load_tile_to_smem( + dim_k, block_m, block_k * BK, A, local_a, tid_in_threadblock); } // move B - global_dmem_load_new(dim_n, block_n, block_k * BK, B, local_b, - tid_in_threadblock); + load_tile_to_smem( + dim_n, block_n, block_k * BK, B, local_b, tid_in_threadblock); threadblock_barrier(threadblock_id_in_cluster, warps_per_threadblock_per_core);