Merge branch 'ae' into ae-flash-virgo

This commit is contained in:
Richard Yan
2025-01-30 23:42:05 -08:00
2 changed files with 17 additions and 14 deletions

View File

@@ -1190,10 +1190,10 @@ inline void thread_block_gemm(const T *A, const T *B, float *C,
(uint64_t)(B + /*block_k:*/ 0 * BK * dim_n + block_n * BN), (uint64_t)(B + /*block_k:*/ 0 * BK * dim_n + block_n * BN),
k_LOOP_WS_CONFIG_ADDRS_AB) k_LOOP_WS_CONFIG_ADDRS_AB)
// GEMMINI_CISC(8) does k_LOOP_WS_CONFIG_STRIDES_AB // GEMMINI_CISC(8) does k_LOOP_WS_CONFIG_STRIDES_AB
GEMMINI_CISC_CMD_R((dim_n << 20) | (dim_k << 8) | 8); GEMMINI_CISC_CMD_R((dim_n << 20) | (dim_k << 8) | GEMMINI_CISC_SET_AB_STRIDE);
gemmini_fence(); gemmini_fence();
GEMMINI_CISC_CMD_I(10); GEMMINI_CISC_CMD_R((11 << 16) | (0 << 8) | GEMMINI_CISC_LOAD_TO_HEXADECILES);
gemmini_fence(); gemmini_fence();
#if 0 #if 0
@@ -1266,8 +1266,9 @@ inline void thread_block_gemm(const T *A, const T *B, float *C,
// the last iteration of the k-loop is prefetching for the first // the last iteration of the k-loop is prefetching for the first
// iteration of the n-loop. The ping-poing indexing has to match for // iteration of the n-loop. The ping-poing indexing has to match for
// the two loop end to connect. // the two loop end to connect.
const uint32_t opcode = 11 - (block_k & 1); const uint32_t a_hexadecile = (block_k & 1) * 4;
GEMMINI_CISC_CMD_I(opcode); const uint32_t b_hexadecile = (block_k & 1) * 4 + 11;
GEMMINI_CISC_CMD_R((b_hexadecile << 16) | (a_hexadecile << 8) | GEMMINI_CISC_LOAD_TO_HEXADECILES);
// // TODO: branch is probably slow // // TODO: branch is probably slow
// if (block_k & 1) { // if (block_k & 1) {
// GEMMINI_CISC_CMD_I(12); // GEMMINI_CISC_CMD_I(12);

View File

@@ -1,21 +1,23 @@
#!/bin/sh #!/bin/bash
# Copyright 2023 blaise # Copyright 2023 blaise
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
TOOLDIR=${TOOLDIR:=$HOME/build/vortex-toolchain-prebuilt} ENV_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export TOOLDIR AE_TOOLCHAIN_DIR="$(realpath ${ENV_SCRIPT_DIR}/../../toolchain)"
export TOOLDIR=${AE_TOOLCHAIN_DIR}/vortex-toolchain-prebuilt
export VERILATOR_ROOT=$TOOLDIR/verilator export VERILATOR_ROOT=$TOOLDIR/verilator
export PATH=$VERILATOR_ROOT/bin:$PATH export PATH=$VERILATOR_ROOT/bin:$PATH
@@ -27,7 +29,7 @@ export YOSYS_PATH=$TOOLDIR/yosys
export PATH=$YOSYS_PATH/bin:$PATH export PATH=$YOSYS_PATH/bin:$PATH
# LLVM_POCL seems to be only used in tests/opencl # LLVM_POCL seems to be only used in tests/opencl
export LLVM_POCL=/home/virgo-ae/build/llvm-vortex2 export LLVM_POCL=${AE_TOOLCHAIN_DIR}/llvm-vortex2
export LLVM_VORTEX=/home/virgo-ae/build/llvm-vortex2 export LLVM_VORTEX=${AE_TOOLCHAIN_DIR}/llvm-vortex2
export POCL_CC_PATH=/home/virgo-ae/build/pocl-vortex2/compiler export POCL_CC_PATH=${AE_TOOLCHAIN_DIR}/pocl-vortex2/compiler
export POCL_RT_PATH=/home/virgo-ae/build/pocl-vortex2/runtime export POCL_RT_PATH=${AE_TOOLCHAIN_DIR}/pocl-vortex2/runtime