Merge branch 'ae' into ae-flash-virgo

2025-02-07 14:53:04 -08:00 · 2025-01-31 03:53:39 -08:00 · 2025-01-30 23:42:05 -08:00 · 2025-01-30 13:25:00 -08:00 · 2025-01-30 01:49:05 -08:00 · 2025-01-30 01:23:01 -08:00
6 changed files with 11 additions and 7 deletions
--- a/kernels/sgemm_tcore/kernel.cpp
+++ b/kernels/sgemm_tcore/kernel.cpp
@@ -95,6 +95,8 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
  constexpr uint32_t quartile = (128 << 10) >> 2; // 128KB / 4
  static_assert((quartile * 4) == SMEM_SIZE, "wrong quartile constant");

+  MARK_BEG();
+
  constexpr uint32_t smem_a_offset = 0;
  constexpr uint32_t smem_a_dbuf_offset = 1 * quartile;
  constexpr uint32_t smem_b_offset =
@@ -119,6 +121,8 @@ void kernel_body(int task_id, kernel_arg_t *__UNIFORM__ arg) {
                      threadblocks_per_cluster, threadblock_id_in_cluster,
                      sharedmem_per_threadblock);

+  MARK_END();
+
  float *gmem_tmp_d0 = reinterpret_cast<float *>(0xd0000000UL);
  float *gmem_tmp_d1 = reinterpret_cast<float *>(0xd1000000UL);
  float *gmem_tmp_d2 = reinterpret_cast<float *>(0xd2000000UL);
--- a/kernels/sgemm_tcore/sgemm_impl.hpp
+++ b/kernels/sgemm_tcore/sgemm_impl.hpp
@@ -6,7 +6,7 @@
 #include "include/gemmini.h"
 #include "gemmini_mmio.h"

-#define FP_SIZE 16
+#define FP_SIZE 32

 // "fake" fp16 type that only has the correct data width.
 using float16_t = uint16_t;
@@ -110,7 +110,7 @@ static_assert(WMITER * WNITER * TCM * TCN * NUM_WARPS * CORES_PER_CLUSTER ==
 // result matrix will be stored in a swizzled form in the global memory.
 #define WMMA_STORE_FAST 0

-#define GEMMINI_DMA 0
+#define GEMMINI_DMA 1
 #define GEMMINI_DMA_FAST 1
 #if SMEM_SIZE == 0x4000
 #define SMEM_ADDR_Q0 ((float * const) 0xff000000)
--- a/lib/gemmini/include/gemmini_params.h
+++ b/lib/gemmini/include/gemmini_params.h
@@ -1 +1 @@
-gemmini_params.dim16fp16.h
+gemmini_params.dim8fp32.h
--- a/lib/include/VX_config.h
+++ b/lib/include/VX_config.h
@@ -84,7 +84,7 @@
 #endif

 #ifndef NUM_CORES
-#define NUM_CORES 8
+#define NUM_CORES 4
 #endif

 #ifndef NUM_WARPS
--- a/lib/include/gemmini_mmio.h
+++ b/lib/include/gemmini_mmio.h
@@ -12,9 +12,9 @@
 // 64KB
 // #define SMEM_SIZE 0x10000
 // 128KB (FP16 GEMM)
-#define SMEM_SIZE 0x20000
+// #define SMEM_SIZE 0x20000
 // 256KB (FlashAttention)
-// #define SMEM_SIZE 0x40000
+#define SMEM_SIZE 0x40000

 #define SMEM_MASK (SMEM_SIZE - 1)
 #define SMEM_ADDR_END (SMEM_BASE + SMEM_SIZE)
--- a/lib/include/vx_spawn.h
+++ b/lib/include/vx_spawn.h
@@ -18,7 +18,7 @@
 #include <stdio.h>

 #ifndef CORES_PER_CLUSTER
-#define CORES_PER_CLUSTER 8
+#define CORES_PER_CLUSTER 4
 #endif

 #ifdef __cplusplus
Author	SHA1	Message	Date
Virgo-AE Eval	c24585570d	Merge branch 'ae' into ae-flash-virgo	2025-02-07 14:53:04 -08:00
Richard Yan	8071faf7c2	Merge branch 'ae' into ae-flash-virgo	2025-01-31 03:53:39 -08:00
Richard Yan	a4bd41392c	Merge branch 'ae' into ae-flash-virgo	2025-01-30 23:42:05 -08:00
Hansung Kim	692f3dddff	Merge branch 'ae' into ae-flash-virgo	2025-01-30 13:25:00 -08:00
Hansung Kim	c75ed0d531	Merge branch 'ae' into ae-flash-virgo	2025-01-30 01:49:05 -08:00
Hansung Kim	96500e0abc	Turn off TENSOR_HOPPER for Virgo flash	2025-01-30 01:23:01 -08:00
Hansung Kim	4f12227327	Increase SMEM size for flash	2025-01-30 01:17:38 -08:00
Hansung Kim	efd2d232fe	Merge branch 'ae' into ae-flash-virgo	2025-01-30 01:16:23 -08:00
Hansung Kim	b97df2ce6a	Switch to fp32 for flash	2025-01-30 01:12:32 -08:00
Hansung Kim	e4f8f3481c	Merge branch 'ae' into ae-hopper	2025-01-30 01:05:31 -08:00
Hansung Kim	c7f713c71e	Merge branch 'ae' into ae-hopper	2025-01-30 00:49:23 -08:00
Hansung Kim	b06e345706	Merge branch 'ae' into ae-hopper	2025-01-30 00:35:10 -08:00
Hansung Kim	8a635b5fcb	Set TENSOR_HOPPER to 1, add missing markers	2025-01-30 00:34:13 -08:00
Richard Yan	f23b2a3fcc	Merge branch 'ae' into ae-hopper	2025-01-29 23:31:21 -08:00
Richard Yan	ac34a8f5f5	hopper changes	2025-01-29 22:22:34 -08:00