diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 2a39c058..67c077ef 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -209,13 +209,15 @@ module VX_scoreboard import VX_gpu_pkg::*; #( assign perf_issue_fires_per_cycle[i] = ibuffer_if[i].valid && ibuffer_if[i].ready; `endif - // NOTE(hansung): why is inuse_rd checked? to prevent WAW? wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3}; `ifdef EXT_T_HOPPER wire hgmma_wait = ibuffer_if[i].valid && (ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) && (ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA_WAIT); - wire hgmma_ready = ~(hgmma_wait && inuse_tensor[ibuffer_if[i].data.wis]); + // block both HGMMA and HGMMA_WAIT until inuse goes down. If we pass + // HGMMA through, we can't accurately keep track of the busy state of + // the tensor core and block WAITs accordingly. + wire hgmma_ready = !inuse_tensor[ibuffer_if[i].data.wis]; wire operands_ready = (~(| operands_busy)) && hgmma_ready; `else wire operands_ready = ~(| operands_busy); diff --git a/hw/rtl/core/VX_tensor_hopper_core.sv b/hw/rtl/core/VX_tensor_hopper_core.sv index 32fa6f5b..b03b0b0e 100644 --- a/hw/rtl/core/VX_tensor_hopper_core.sv +++ b/hw/rtl/core/VX_tensor_hopper_core.sv @@ -80,8 +80,8 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( ); end - // this shouldn't really happen unless there's a big contention over - // the commit stage + // NOTE: this is not an error but tells us if backend doesn't keep up with + // HGMMA calls from the kernel `RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!")) wire initiate_ready; @@ -222,7 +222,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( commit_if.data.PC = execute_if_data_PC[0]; commit_if.data.wb = execute_if_data_wb[0]; commit_if.data.rd = execute_if_data_rd[0]; - commit_if.data.data = '0; // FIXME ? + commit_if.data.data = '0; // can be arbitrary as rd is zero commit_if.data.tensor = 1'b0; commit_if.data.pid = 1'b0; commit_if.data.sop = 1'b1;