From 17a9d31be58c6f2d9792bb81c83f8605de207204 Mon Sep 17 00:00:00 2001 From: Richard Yan Date: Thu, 30 Jan 2025 15:33:58 -0800 Subject: [PATCH 1/2] fix dma invocation --- kernels/sgemm_tcore/sgemm_impl.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernels/sgemm_tcore/sgemm_impl.hpp b/kernels/sgemm_tcore/sgemm_impl.hpp index 10829db2..376a88c9 100644 --- a/kernels/sgemm_tcore/sgemm_impl.hpp +++ b/kernels/sgemm_tcore/sgemm_impl.hpp @@ -1190,10 +1190,10 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, (uint64_t)(B + /*block_k:*/ 0 * BK * dim_n + block_n * BN), k_LOOP_WS_CONFIG_ADDRS_AB) // GEMMINI_CISC(8) does k_LOOP_WS_CONFIG_STRIDES_AB - GEMMINI_CISC_CMD_R((dim_n << 20) | (dim_k << 8) | 8); + GEMMINI_CISC_CMD_R((dim_n << 20) | (dim_k << 8) | GEMMINI_CISC_SET_AB_STRIDE); gemmini_fence(); - GEMMINI_CISC_CMD_I(10); + GEMMINI_CISC_CMD_R((11 << 16) | (0 << 8) | GEMMINI_CISC_LOAD_TO_HEXADECILES); gemmini_fence(); #if 0 @@ -1266,8 +1266,9 @@ inline void thread_block_gemm(const T *A, const T *B, float *C, // the last iteration of the k-loop is prefetching for the first // iteration of the n-loop. The ping-poing indexing has to match for // the two loop end to connect. - const uint32_t opcode = 11 - (block_k & 1); - GEMMINI_CISC_CMD_I(opcode); + const uint32_t a_hexadecile = (block_k & 1) * 4; + const uint32_t b_hexadecile = (block_k & 1) * 4 + 11; + GEMMINI_CISC_CMD_R((b_hexadecile << 16) | (a_hexadecile << 8) | GEMMINI_CISC_LOAD_TO_HEXADECILES); // // TODO: branch is probably slow // if (block_k & 1) { // GEMMINI_CISC_CMD_I(12); From f8c51669c1afe14b06a00bb2d58c46968b2cfadf Mon Sep 17 00:00:00 2001 From: Richard Yan Date: Thu, 30 Jan 2025 21:17:12 -0800 Subject: [PATCH 2/2] fix toolchain env sh --- scripts/toolchain_env.sh | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/scripts/toolchain_env.sh b/scripts/toolchain_env.sh index 98b44f04..b742d1f6 100644 --- a/scripts/toolchain_env.sh +++ b/scripts/toolchain_env.sh @@ -1,21 +1,23 @@ -#!/bin/sh +#!/bin/bash # Copyright 2023 blaise -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -TOOLDIR=${TOOLDIR:=$HOME/build/vortex-toolchain-prebuilt} -export TOOLDIR +ENV_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +AE_TOOLCHAIN_DIR="$(realpath ${ENV_SCRIPT_DIR}/../../toolchain)" + +export TOOLDIR=${AE_TOOLCHAIN_DIR}/vortex-toolchain-prebuilt export VERILATOR_ROOT=$TOOLDIR/verilator export PATH=$VERILATOR_ROOT/bin:$PATH @@ -27,7 +29,7 @@ export YOSYS_PATH=$TOOLDIR/yosys export PATH=$YOSYS_PATH/bin:$PATH # LLVM_POCL seems to be only used in tests/opencl -export LLVM_POCL=/home/virgo-ae/build/llvm-vortex2 -export LLVM_VORTEX=/home/virgo-ae/build/llvm-vortex2 -export POCL_CC_PATH=/home/virgo-ae/build/pocl-vortex2/compiler -export POCL_RT_PATH=/home/virgo-ae/build/pocl-vortex2/runtime +export LLVM_POCL=${AE_TOOLCHAIN_DIR}/llvm-vortex2 +export LLVM_VORTEX=${AE_TOOLCHAIN_DIR}/llvm-vortex2 +export POCL_CC_PATH=${AE_TOOLCHAIN_DIR}/pocl-vortex2/compiler +export POCL_RT_PATH=${AE_TOOLCHAIN_DIR}/pocl-vortex2/runtime