diff --git a/kernels/flash_attention/compile_flash.sh b/kernels/flash_attention/compile_flash.sh index 5808508c..a2209047 100755 --- a/kernels/flash_attention/compile_flash.sh +++ b/kernels/flash_attention/compile_flash.sh @@ -27,6 +27,7 @@ ln -sf input.c.rand.fp32.seqlen1024headdim64.row.bin input.c.bin for arch in "${archs[@]}"; do git checkout ae-flash-$arch + git pull # re-compile libvortexrt.a # FIXME after restructure diff --git a/kernels/sgemm_gemmini_dma/compile_virgo.sh b/kernels/sgemm_gemmini_dma/compile_virgo.sh index f2deb04c..de9b4b38 100755 --- a/kernels/sgemm_gemmini_dma/compile_virgo.sh +++ b/kernels/sgemm_gemmini_dma/compile_virgo.sh @@ -1,5 +1,9 @@ #!/bin/sh +# hopper and virgo has the same SIMT configurations +git checkout ae-hopper +git pull + if [ ! -f input.a.rand01.fp16.m256n256k256.row.bin ]; then echo "input binaries not found, generating operands" python3 generate_operands.py diff --git a/kernels/sgemm_gemmini_dma/kernel.cpp b/kernels/sgemm_gemmini_dma/kernel.cpp index 8ae36bd8..5b6c3f71 100644 --- a/kernels/sgemm_gemmini_dma/kernel.cpp +++ b/kernels/sgemm_gemmini_dma/kernel.cpp @@ -53,7 +53,7 @@ #define PRINTF(...) sprintf(PRINT_BUF, __VA_ARGS__) // #define PRINTF(...) vx_printf(__VA_ARGS__) #define SWISH(beta, x) ((x) / (1 + exp(-(beta) * (x)))) -// #define POWER +#define POWER typedef uint16_t smem_elem_t; // typedef float smem_elem_t; diff --git a/kernels/sgemm_tcore/compile_tcore.sh b/kernels/sgemm_tcore/compile_tcore.sh index 128eb175..d338521b 100755 --- a/kernels/sgemm_tcore/compile_tcore.sh +++ b/kernels/sgemm_tcore/compile_tcore.sh @@ -53,6 +53,7 @@ done for arch in "${archs[@]}"; do git checkout ae-$arch + git pull # re-compile libvortexrt.a # FIXME after restructure