From 5821bfd10d88566e578a13dc58ccf3a04f0b4d9a Mon Sep 17 00:00:00 2001
From: Hansung Kim <hansung_kim@berkeley.edu>
Date: Wed, 8 May 2024 13:22:26 -0700
Subject: [PATCH] Repeat vx_wmma issue & hardcode dst address

---
 tests/kernel/reductions/main.cpp | 3 ++-
 tests/kernel/tensor/main.cpp     | 8 ++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/tests/kernel/reductions/main.cpp b/tests/kernel/reductions/main.cpp
index edde1da4..fcadddb6 100644
--- a/tests/kernel/reductions/main.cpp
+++ b/tests/kernel/reductions/main.cpp
@@ -138,6 +138,7 @@ void test_maxu_reduce() {
 	y = reduced;
 }
 
+// assumes NUM_THREADS == 4
 unsigned bit_vectors[4] = {0b11010110000111001100010100100110, 0b10010100011010001010000000001110, 0b10001001010111110001110000000010, 0b00010011010100101101110111001111};
 
 void test_and_reduce() {
@@ -213,4 +214,4 @@ int main()
 	
 
 	return 0;
-}
\ No newline at end of file
+}
diff --git a/tests/kernel/tensor/main.cpp b/tests/kernel/tensor/main.cpp
index 5fc222b2..0fc4274d 100644
--- a/tests/kernel/tensor/main.cpp
+++ b/tests/kernel/tensor/main.cpp
@@ -65,6 +65,7 @@ float results[32*8];
 void store_wmma_result() {
 	int tid = vx_thread_id();
 	
+	float *results = reinterpret_cast<float *>(0xc0000000UL);
 	asm volatile ("fsw f16, %0" :: "m"(results[tid*8+0])); 
 	asm volatile ("fsw f17, %0" :: "m"(results[tid*8+1])); 
 	asm volatile ("fsw f18, %0" :: "m"(results[tid*8+2])); 
@@ -87,10 +88,13 @@ int main()
 {
 	vx_tmc(-1);
 	vx_wmma_load();
-	vx_wmma();
+#pragma GCC unroll 100
+	for (int i = 0; i < 100; i++) {
+		vx_wmma();
+	}
 	store_wmma_result();
 	vx_tmc(1);
 	// print_wmma_result();
 	
 	return 0;
-}
\ No newline at end of file
+}