tensor: Bore tensor regfile IO to execute units
This commit is contained in:
@@ -63,6 +63,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
||||
VX_decode_if decode_if();
|
||||
VX_sched_csr_if sched_csr_if();
|
||||
VX_decode_sched_if decode_sched_if();
|
||||
VX_tc_rf_if tensor_regfile_if();
|
||||
VX_commit_sched_if commit_sched_if();
|
||||
VX_commit_csr_if commit_csr_if();
|
||||
VX_branch_ctl_if branch_ctl_if[`NUM_ALU_BLOCKS]();
|
||||
@@ -190,6 +191,9 @@ module VX_core import VX_gpu_pkg::*; #(
|
||||
`endif
|
||||
`ifdef EXT_T_ENABLE
|
||||
.tensor_dispatch_if(tensor_dispatch_if),
|
||||
`ifdef EXT_T_HOPPER
|
||||
.tensor_regfile_if (tensor_regfile_if),
|
||||
`endif
|
||||
`endif
|
||||
.sfu_dispatch_if(sfu_dispatch_if)
|
||||
);
|
||||
@@ -221,6 +225,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
||||
.tensor_dispatch_if (tensor_dispatch_if),
|
||||
.tensor_commit_if (tensor_commit_if),
|
||||
`ifdef EXT_T_HOPPER
|
||||
.tensor_regfile_if (tensor_regfile_if),
|
||||
.tensor_smem_A_if (tensor_smem_A_if),
|
||||
.tensor_smem_B_if (tensor_smem_B_if),
|
||||
`endif
|
||||
|
||||
@@ -59,6 +59,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
||||
VX_dispatch_if.slave tensor_dispatch_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master tensor_commit_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_T_HOPPER
|
||||
VX_tc_rf_if.master tensor_regfile_if,
|
||||
VX_tc_bus_if.master tensor_smem_A_if,
|
||||
VX_tc_bus_if.master tensor_smem_B_if,
|
||||
`endif
|
||||
@@ -156,6 +157,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
||||
|
||||
.dispatch_if(tensor_dispatch_if),
|
||||
`ifdef EXT_T_HOPPER
|
||||
.regfile_if(tensor_regfile_if),
|
||||
.smem_A_if(tensor_smem_A_if),
|
||||
.smem_B_if(tensor_smem_B_if),
|
||||
`endif
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_trace.vh"
|
||||
|
||||
module VX_issue #(
|
||||
module VX_issue import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
@@ -36,6 +36,9 @@ module VX_issue #(
|
||||
`endif
|
||||
`ifdef EXT_T_ENABLE
|
||||
VX_dispatch_if.master tensor_dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_T_HOPPER
|
||||
VX_tc_rf_if.slave tensor_regfile_if,
|
||||
`endif
|
||||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
@@ -75,22 +78,6 @@ module VX_issue #(
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
||||
|
||||
// /*
|
||||
// fake fsm driving tc output
|
||||
reg [11:0] counter;
|
||||
wire tc_rf_valid;
|
||||
wire [4:0] tc_rf_addr;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
counter <= 12'd1;
|
||||
end else begin
|
||||
counter <= counter + 12'd1;
|
||||
end
|
||||
end
|
||||
assign tc_rf_valid = (counter[6:0] == 7'd0);
|
||||
assign tc_rf_addr = counter[11:7];
|
||||
// */
|
||||
`ifdef GPR_DUPLICATED
|
||||
VX_operands_dup #(
|
||||
`else
|
||||
@@ -104,11 +91,7 @@ module VX_issue #(
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if),
|
||||
`ifdef GPR_DUPLICATED
|
||||
.tc_rf_valid ('{`ISSUE_WIDTH{tc_rf_valid}}),
|
||||
.tc_rf_addr ('{`ISSUE_WIDTH{tc_rf_addr}}),
|
||||
.tc_rf_data ()
|
||||
`endif
|
||||
.tensor_regfile_if (tensor_regfile_if)
|
||||
);
|
||||
|
||||
VX_dispatch #(
|
||||
|
||||
@@ -24,11 +24,8 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH],
|
||||
|
||||
input wire tc_rf_valid [`ISSUE_WIDTH],
|
||||
input wire [`LOG2UP(`NUM_REGS * ISSUE_RATIO)-1:0] tc_rf_addr [`ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] tc_rf_data [`ISSUE_WIDTH]
|
||||
VX_tc_rf_if.slave tensor_regfile_if,
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||
@@ -47,6 +44,18 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
||||
logic [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] empty2;
|
||||
logic [`ISSUE_WIDTH-1:0][2:0] size1;
|
||||
|
||||
wire tc_rf_valid [`ISSUE_WIDTH];
|
||||
wire [`LOG2UP(`NUM_REGS * ISSUE_RATIO)-1:0] tc_rf_addr [`ISSUE_WIDTH];
|
||||
// FIXME: don't need full ISSUE_WIDTH; only one warp is read at a time
|
||||
// because NUM_BLOCKS == 1
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] tc_rf_data [`ISSUE_WIDTH];
|
||||
|
||||
`STATIC_ASSERT((ISSUE_RATIO == 1),
|
||||
("static assertion failed: tensor core only supports ISSUE_RATIO == 1"))
|
||||
assign tc_rf_valid = '{`ISSUE_WIDTH{tensor_regfile_if.req_valid}};
|
||||
assign tc_rf_addr = '{`ISSUE_WIDTH{tensor_regfile_if.req_data.rs}};
|
||||
assign tensor_regfile_if.rsp_data.data = tc_rf_data[0];
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -104,7 +113,7 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
||||
.size (size1[i])
|
||||
);
|
||||
assign operands_if[i].valid = ~empty1[i];
|
||||
assign scoreboard_if[i].ready = (size1[i] < 2'd2) && ~tc_rf_valid[i];
|
||||
assign scoreboard_if[i].ready = (size1[i] < 3'd2) && ~tc_rf_valid[i];
|
||||
|
||||
// assert (full1[i] == full2[i]);
|
||||
// assert (empty1[i] == empty2[i]);
|
||||
@@ -207,10 +216,10 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
reg wr_enabled = 1'b0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
wr_enabled <= 1'b1;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -9,6 +9,7 @@ module VX_tensor_core import VX_gpu_pkg::*; #(
|
||||
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
`ifdef EXT_T_HOPPER
|
||||
VX_tc_rf_if.master regfile_if,
|
||||
VX_tc_bus_if.master smem_A_if,
|
||||
VX_tc_bus_if.master smem_B_if,
|
||||
`endif
|
||||
@@ -63,6 +64,7 @@ module VX_tensor_core import VX_gpu_pkg::*; #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.execute_if (execute_if[block_idx]),
|
||||
.regfile_if (regfile_if),
|
||||
.smem_A_if (smem_A_if),
|
||||
.smem_B_if (smem_B_if),
|
||||
.commit_if (commit_block_if[block_idx])
|
||||
|
||||
@@ -9,6 +9,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
input reset,
|
||||
|
||||
VX_execute_if.slave execute_if,
|
||||
VX_tc_rf_if.master regfile_if,
|
||||
VX_tc_bus_if.master smem_A_if,
|
||||
VX_tc_bus_if.master smem_B_if,
|
||||
VX_commit_if.master commit_if
|
||||
@@ -104,6 +105,21 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||
`STATIC_ASSERT((`XLEN == 32),
|
||||
("static assertion failed: tensor_hopper_core only supports XLEN == 32"))
|
||||
|
||||
// /*
|
||||
// fake fsm driving tc rf port
|
||||
reg [11:0] counter;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
counter <= 12'd1;
|
||||
end else begin
|
||||
counter <= counter + 12'd1;
|
||||
end
|
||||
end
|
||||
assign regfile_if.req_valid = (counter[3:0] != 4'd0);
|
||||
assign regfile_if.req_data.wis = '0;
|
||||
assign regfile_if.req_data.rs = counter[11:7];
|
||||
// */
|
||||
|
||||
TensorCoreDecoupled tensor_hopper_core (
|
||||
.clock(clk),
|
||||
.reset(reset),
|
||||
|
||||
46
hw/rtl/mem/VX_tc_rf_if.sv
Normal file
46
hw/rtl/mem/VX_tc_rf_if.sv
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tc_rf_if import VX_gpu_pkg::*; ();
|
||||
|
||||
typedef struct packed {
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NR_BITS-1:0] rs;
|
||||
} req_data_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
||||
} rsp_data_t;
|
||||
|
||||
logic req_valid;
|
||||
req_data_t req_data;
|
||||
|
||||
rsp_data_t rsp_data;
|
||||
|
||||
modport master (
|
||||
output req_valid,
|
||||
output req_data,
|
||||
|
||||
input rsp_data
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input req_valid,
|
||||
input req_data,
|
||||
|
||||
output rsp_data
|
||||
);
|
||||
|
||||
endinterface
|
||||
Reference in New Issue
Block a user