tensor: Block both HGMMA/HGMMA_WAIT at scoreboard

If we let back-to-back HGMMAs pass at scoreboard, we can't accurately
keep track of the busy state of the tensor core and block WAITs
accordingly.

TODO: Distinguish "ready-to-fire" from "ready-to-use-writeback".
This commit is contained in:
Hansung Kim
2024-10-22 21:10:55 -07:00
parent 83979c3341
commit 98eb7cb594
2 changed files with 7 additions and 5 deletions

View File

@@ -209,13 +209,15 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
assign perf_issue_fires_per_cycle[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
`endif
// NOTE(hansung): why is inuse_rd checked? to prevent WAW?
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
`ifdef EXT_T_HOPPER
wire hgmma_wait = ibuffer_if[i].valid &&
(ibuffer_if[i].data.ex_type == `EX_BITS'(`EX_TENSOR)) &&
(ibuffer_if[i].data.op_type == `INST_TENSOR_HGMMA_WAIT);
wire hgmma_ready = ~(hgmma_wait && inuse_tensor[ibuffer_if[i].data.wis]);
// block both HGMMA and HGMMA_WAIT until inuse goes down. If we pass
// HGMMA through, we can't accurately keep track of the busy state of
// the tensor core and block WAITs accordingly.
wire hgmma_ready = !inuse_tensor[ibuffer_if[i].data.wis];
wire operands_ready = (~(| operands_busy)) && hgmma_ready;
`else
wire operands_ready = ~(| operands_busy);

View File

@@ -80,8 +80,8 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
);
end
// this shouldn't really happen unless there's a big contention over
// the commit stage
// NOTE: this is not an error but tells us if backend doesn't keep up with
// HGMMA calls from the kernel
`RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
wire initiate_ready;
@@ -222,7 +222,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
commit_if.data.PC = execute_if_data_PC[0];
commit_if.data.wb = execute_if_data_wb[0];
commit_if.data.rd = execute_if_data_rd[0];
commit_if.data.data = '0; // FIXME ?
commit_if.data.data = '0; // can be arbitrary as rd is zero
commit_if.data.tensor = 1'b0;
commit_if.data.pid = 1'b0;
commit_if.data.sop = 1'b1;