diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index a8de44d8..7888788f 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -248,6 +248,20 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .ready_out (scoreboard_if[i].ready) ); + // inflight_tensor overflow/underflow check + `RUNTIME_ASSERT( + !(writeback_fire && writeback_if[i].data.tensor) || + inflight_tensor[writeback_if[i].data.wis] != INFLT_WIDTH'(0), + ("%t: *** core%0d: wid=%0d, underflow at inflight_tensor!", + $time, CORE_ID, wis_to_wid(writeback_if[i].data.wis, i)) + ) + `RUNTIME_ASSERT( + !(ibuffer_if[i].valid && ibuffer_if[i].ready && hgmma_start) || + inflight_tensor[ibuffer_if[i].data.wis] != INFLT_MAX, + ("%t: *** core%0d: wid=%0d, overflow at inflight_tensor!", + $time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i)) + ) + always @(posedge clk) begin if (reset) begin inuse_regs <= '0; @@ -261,8 +275,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `ifdef EXT_T_HOPPER if (writeback_fire && writeback_if[i].data.tensor) begin - inflight_tensor[ibuffer_if[i].data.wis] <= - inflight_tensor[ibuffer_if[i].data.wis] - INFLT_WIDTH'(1); + inflight_tensor[writeback_if[i].data.wis] <= + inflight_tensor[writeback_if[i].data.wis] - INFLT_WIDTH'(1); end if (ibuffer_if[i].valid && ibuffer_if[i].ready && hgmma_start) begin inflight_tensor[ibuffer_if[i].data.wis] <= @@ -305,7 +319,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #( $time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr, operands_busy, ibuffer_if[i].data.uuid)); - `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0, + `RUNTIME_ASSERT((~writeback_fire || + writeback_if[i].data.tensor /* dont check rd for tensor ghost writes */ || + inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0), ("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", $time, CORE_ID, wis_to_wid(writeback_if[i].data.wis, i), writeback_if[i].data.PC, writeback_if[i].data.tmask, writeback_if[i].data.rd, writeback_if[i].data.uuid)); `endif diff --git a/hw/rtl/core/VX_tensor_hopper_core.sv b/hw/rtl/core/VX_tensor_hopper_core.sv index ad79ac1a..ad661301 100644 --- a/hw/rtl/core/VX_tensor_hopper_core.sv +++ b/hw/rtl/core/VX_tensor_hopper_core.sv @@ -89,7 +89,9 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( wire hmma_wait = metadata_valid && (execute_if_data_op_type == `INST_TENSOR_HGMMA_WAIT); // skip HGMMA_WAIT for kickoff - wire initiate_valid = metadata_valid && !hmma_wait; + // should be metadata_deq not metadata_valid, since initiate should be + // synced with metadata->commit path + wire initiate_valid = metadata_deq && !hmma_wait; wire [`NW_WIDTH-1:0] initiate_wid = execute_if_data_wid; // we're recycling execute_if.op_type as operands_if.op_type which might