From 2743d32bd2658b362656088f45736942a6e699bc Mon Sep 17 00:00:00 2001
From: Hansung Kim <hansung_kim@berkeley.edu>
Date: Thu, 30 May 2024 15:25:00 -0700
Subject: [PATCH] tensor: Handle wid queue backpressure in dpu

---
 hw/rtl/core/VX_tensor_core.sv | 4 ++--
 hw/rtl/fpu/VX_tensor_dpu.sv   | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/hw/rtl/core/VX_tensor_core.sv b/hw/rtl/core/VX_tensor_core.sv
index 71b17e08..2ddd6a70 100644
--- a/hw/rtl/core/VX_tensor_core.sv
+++ b/hw/rtl/core/VX_tensor_core.sv
@@ -77,7 +77,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
     // octet. E.g. two tgs map lane 0-3 and lane 16-19 -> 16
     // FIXME: not sure this is the right logic.  just filling in what works
     localparam LANE_OFFSET_THREADGROUP = (4 * NUM_OCTETS);
-    localparam REQ_QUEUE_DEPTH = 4;
+    localparam METADATA_QUEUE_DEPTH = 4;
 
     wire [1:0] step = 2'(execute_if.data.op_type);
     wire last_in_pair = (execute_if.data.op_mod == `INST_MOD_BITS'(1));
@@ -220,7 +220,7 @@ module VX_tensor_core_warp import VX_gpu_pkg::*; #(
 
         VX_fifo_queue #(
             .DATAW(DATAW),
-            .DEPTH(REQ_QUEUE_DEPTH)
+            .DEPTH(METADATA_QUEUE_DEPTH)
         ) pending_uops (
             .clk(clk),
             .reset(reset),
diff --git a/hw/rtl/fpu/VX_tensor_dpu.sv b/hw/rtl/fpu/VX_tensor_dpu.sv
index 49d2418d..870f6870 100644
--- a/hw/rtl/fpu/VX_tensor_dpu.sv
+++ b/hw/rtl/fpu/VX_tensor_dpu.sv
@@ -93,7 +93,7 @@ module VX_tensor_dpu #(
     wire enq = valid_in && ready_in;
     wire deq = valid_out && ready_out;
 
-    assign ready_in  = &(threadgroup_readys);
+    assign ready_in  = &(threadgroup_readys) && !full;
     assign valid_out = &(threadgroup_valids);
 
     // need to pass along warp id's to do multithreading
@@ -109,13 +109,11 @@ module VX_tensor_dpu #(
         .data_out  (D_wid),
         .empty     (empty),
         `UNUSED_PIN(alm_empty),
-        .full      (full), // should be impossible to overflow
+        .full      (full),
         `UNUSED_PIN(alm_full),
         `UNUSED_PIN(size)
     );
 
-    `RUNTIME_ASSERT(reset || !full, ("dpu wid queue is full!"))
-
     // `RUNTIME_ASSERT(reset || (&(threadgroup_valids) == valid_out),
     //                 ("FEDP and metadata queue went out of sync!"))
 endmodule