Merge branch 'ae' into ae-flash-ampere
This commit is contained in:
@@ -1190,10 +1190,10 @@ inline void thread_block_gemm(const T *A, const T *B, float *C,
|
|||||||
(uint64_t)(B + /*block_k:*/ 0 * BK * dim_n + block_n * BN),
|
(uint64_t)(B + /*block_k:*/ 0 * BK * dim_n + block_n * BN),
|
||||||
k_LOOP_WS_CONFIG_ADDRS_AB)
|
k_LOOP_WS_CONFIG_ADDRS_AB)
|
||||||
// GEMMINI_CISC(8) does k_LOOP_WS_CONFIG_STRIDES_AB
|
// GEMMINI_CISC(8) does k_LOOP_WS_CONFIG_STRIDES_AB
|
||||||
GEMMINI_CISC_CMD_R((dim_n << 20) | (dim_k << 8) | 8);
|
GEMMINI_CISC_CMD_R((dim_n << 20) | (dim_k << 8) | GEMMINI_CISC_SET_AB_STRIDE);
|
||||||
gemmini_fence();
|
gemmini_fence();
|
||||||
|
|
||||||
GEMMINI_CISC_CMD_I(10);
|
GEMMINI_CISC_CMD_R((11 << 16) | (0 << 8) | GEMMINI_CISC_LOAD_TO_HEXADECILES);
|
||||||
gemmini_fence();
|
gemmini_fence();
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@@ -1266,8 +1266,9 @@ inline void thread_block_gemm(const T *A, const T *B, float *C,
|
|||||||
// the last iteration of the k-loop is prefetching for the first
|
// the last iteration of the k-loop is prefetching for the first
|
||||||
// iteration of the n-loop. The ping-poing indexing has to match for
|
// iteration of the n-loop. The ping-poing indexing has to match for
|
||||||
// the two loop end to connect.
|
// the two loop end to connect.
|
||||||
const uint32_t opcode = 11 - (block_k & 1);
|
const uint32_t a_hexadecile = (block_k & 1) * 4;
|
||||||
GEMMINI_CISC_CMD_I(opcode);
|
const uint32_t b_hexadecile = (block_k & 1) * 4 + 11;
|
||||||
|
GEMMINI_CISC_CMD_R((b_hexadecile << 16) | (a_hexadecile << 8) | GEMMINI_CISC_LOAD_TO_HEXADECILES);
|
||||||
// // TODO: branch is probably slow
|
// // TODO: branch is probably slow
|
||||||
// if (block_k & 1) {
|
// if (block_k & 1) {
|
||||||
// GEMMINI_CISC_CMD_I(12);
|
// GEMMINI_CISC_CMD_I(12);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/bin/sh
|
#!/bin/bash
|
||||||
|
|
||||||
# Copyright 2023 blaise
|
# Copyright 2023 blaise
|
||||||
#
|
#
|
||||||
@@ -14,8 +14,10 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
TOOLDIR=${TOOLDIR:=$HOME/build/vortex-toolchain-prebuilt}
|
ENV_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
export TOOLDIR
|
AE_TOOLCHAIN_DIR="$(realpath ${ENV_SCRIPT_DIR}/../../toolchain)"
|
||||||
|
|
||||||
|
export TOOLDIR=${AE_TOOLCHAIN_DIR}/vortex-toolchain-prebuilt
|
||||||
|
|
||||||
export VERILATOR_ROOT=$TOOLDIR/verilator
|
export VERILATOR_ROOT=$TOOLDIR/verilator
|
||||||
export PATH=$VERILATOR_ROOT/bin:$PATH
|
export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||||
@@ -27,7 +29,7 @@ export YOSYS_PATH=$TOOLDIR/yosys
|
|||||||
export PATH=$YOSYS_PATH/bin:$PATH
|
export PATH=$YOSYS_PATH/bin:$PATH
|
||||||
|
|
||||||
# LLVM_POCL seems to be only used in tests/opencl
|
# LLVM_POCL seems to be only used in tests/opencl
|
||||||
export LLVM_POCL=/home/virgo-ae/build/llvm-vortex2
|
export LLVM_POCL=${AE_TOOLCHAIN_DIR}/llvm-vortex2
|
||||||
export LLVM_VORTEX=/home/virgo-ae/build/llvm-vortex2
|
export LLVM_VORTEX=${AE_TOOLCHAIN_DIR}/llvm-vortex2
|
||||||
export POCL_CC_PATH=/home/virgo-ae/build/pocl-vortex2/compiler
|
export POCL_CC_PATH=${AE_TOOLCHAIN_DIR}/pocl-vortex2/compiler
|
||||||
export POCL_RT_PATH=/home/virgo-ae/build/pocl-vortex2/runtime
|
export POCL_RT_PATH=${AE_TOOLCHAIN_DIR}/pocl-vortex2/runtime
|
||||||
|
|||||||
Reference in New Issue
Block a user