tensor: Bore tensor regfile IO to execute units
This commit is contained in:
@@ -63,6 +63,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||||||
VX_decode_if decode_if();
|
VX_decode_if decode_if();
|
||||||
VX_sched_csr_if sched_csr_if();
|
VX_sched_csr_if sched_csr_if();
|
||||||
VX_decode_sched_if decode_sched_if();
|
VX_decode_sched_if decode_sched_if();
|
||||||
|
VX_tc_rf_if tensor_regfile_if();
|
||||||
VX_commit_sched_if commit_sched_if();
|
VX_commit_sched_if commit_sched_if();
|
||||||
VX_commit_csr_if commit_csr_if();
|
VX_commit_csr_if commit_csr_if();
|
||||||
VX_branch_ctl_if branch_ctl_if[`NUM_ALU_BLOCKS]();
|
VX_branch_ctl_if branch_ctl_if[`NUM_ALU_BLOCKS]();
|
||||||
@@ -190,6 +191,9 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||||||
`endif
|
`endif
|
||||||
`ifdef EXT_T_ENABLE
|
`ifdef EXT_T_ENABLE
|
||||||
.tensor_dispatch_if(tensor_dispatch_if),
|
.tensor_dispatch_if(tensor_dispatch_if),
|
||||||
|
`ifdef EXT_T_HOPPER
|
||||||
|
.tensor_regfile_if (tensor_regfile_if),
|
||||||
|
`endif
|
||||||
`endif
|
`endif
|
||||||
.sfu_dispatch_if(sfu_dispatch_if)
|
.sfu_dispatch_if(sfu_dispatch_if)
|
||||||
);
|
);
|
||||||
@@ -221,8 +225,9 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||||||
.tensor_dispatch_if (tensor_dispatch_if),
|
.tensor_dispatch_if (tensor_dispatch_if),
|
||||||
.tensor_commit_if (tensor_commit_if),
|
.tensor_commit_if (tensor_commit_if),
|
||||||
`ifdef EXT_T_HOPPER
|
`ifdef EXT_T_HOPPER
|
||||||
.tensor_smem_A_if (tensor_smem_A_if),
|
.tensor_regfile_if (tensor_regfile_if),
|
||||||
.tensor_smem_B_if (tensor_smem_B_if),
|
.tensor_smem_A_if (tensor_smem_A_if),
|
||||||
|
.tensor_smem_B_if (tensor_smem_B_if),
|
||||||
`endif
|
`endif
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||||||
VX_dispatch_if.slave tensor_dispatch_if [`ISSUE_WIDTH],
|
VX_dispatch_if.slave tensor_dispatch_if [`ISSUE_WIDTH],
|
||||||
VX_commit_if.master tensor_commit_if [`ISSUE_WIDTH],
|
VX_commit_if.master tensor_commit_if [`ISSUE_WIDTH],
|
||||||
`ifdef EXT_T_HOPPER
|
`ifdef EXT_T_HOPPER
|
||||||
|
VX_tc_rf_if.master tensor_regfile_if,
|
||||||
VX_tc_bus_if.master tensor_smem_A_if,
|
VX_tc_bus_if.master tensor_smem_A_if,
|
||||||
VX_tc_bus_if.master tensor_smem_B_if,
|
VX_tc_bus_if.master tensor_smem_B_if,
|
||||||
`endif
|
`endif
|
||||||
@@ -156,6 +157,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
.dispatch_if(tensor_dispatch_if),
|
.dispatch_if(tensor_dispatch_if),
|
||||||
`ifdef EXT_T_HOPPER
|
`ifdef EXT_T_HOPPER
|
||||||
|
.regfile_if(tensor_regfile_if),
|
||||||
.smem_A_if(tensor_smem_A_if),
|
.smem_A_if(tensor_smem_A_if),
|
||||||
.smem_B_if(tensor_smem_B_if),
|
.smem_B_if(tensor_smem_B_if),
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
`include "VX_trace.vh"
|
`include "VX_trace.vh"
|
||||||
|
|
||||||
module VX_issue #(
|
module VX_issue import VX_gpu_pkg::*; #(
|
||||||
parameter CORE_ID = 0
|
parameter CORE_ID = 0
|
||||||
) (
|
) (
|
||||||
`SCOPE_IO_DECL
|
`SCOPE_IO_DECL
|
||||||
@@ -36,6 +36,9 @@ module VX_issue #(
|
|||||||
`endif
|
`endif
|
||||||
`ifdef EXT_T_ENABLE
|
`ifdef EXT_T_ENABLE
|
||||||
VX_dispatch_if.master tensor_dispatch_if [`ISSUE_WIDTH],
|
VX_dispatch_if.master tensor_dispatch_if [`ISSUE_WIDTH],
|
||||||
|
`ifdef EXT_T_HOPPER
|
||||||
|
VX_tc_rf_if.slave tensor_regfile_if,
|
||||||
|
`endif
|
||||||
`endif
|
`endif
|
||||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||||
);
|
);
|
||||||
@@ -75,22 +78,6 @@ module VX_issue #(
|
|||||||
.scoreboard_if (scoreboard_if)
|
.scoreboard_if (scoreboard_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
// /*
|
|
||||||
// fake fsm driving tc output
|
|
||||||
reg [11:0] counter;
|
|
||||||
wire tc_rf_valid;
|
|
||||||
wire [4:0] tc_rf_addr;
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
counter <= 12'd1;
|
|
||||||
end else begin
|
|
||||||
counter <= counter + 12'd1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
assign tc_rf_valid = (counter[6:0] == 7'd0);
|
|
||||||
assign tc_rf_addr = counter[11:7];
|
|
||||||
// */
|
|
||||||
`ifdef GPR_DUPLICATED
|
`ifdef GPR_DUPLICATED
|
||||||
VX_operands_dup #(
|
VX_operands_dup #(
|
||||||
`else
|
`else
|
||||||
@@ -104,11 +91,7 @@ module VX_issue #(
|
|||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.scoreboard_if (scoreboard_if),
|
.scoreboard_if (scoreboard_if),
|
||||||
.operands_if (operands_if),
|
.operands_if (operands_if),
|
||||||
`ifdef GPR_DUPLICATED
|
.tensor_regfile_if (tensor_regfile_if)
|
||||||
.tc_rf_valid ('{`ISSUE_WIDTH{tc_rf_valid}}),
|
|
||||||
.tc_rf_addr ('{`ISSUE_WIDTH{tc_rf_addr}}),
|
|
||||||
.tc_rf_data ()
|
|
||||||
`endif
|
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_dispatch #(
|
VX_dispatch #(
|
||||||
|
|||||||
@@ -24,11 +24,8 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||||
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||||
VX_operands_if.master operands_if [`ISSUE_WIDTH],
|
VX_tc_rf_if.slave tensor_regfile_if,
|
||||||
|
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||||
input wire tc_rf_valid [`ISSUE_WIDTH],
|
|
||||||
input wire [`LOG2UP(`NUM_REGS * ISSUE_RATIO)-1:0] tc_rf_addr [`ISSUE_WIDTH],
|
|
||||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] tc_rf_data [`ISSUE_WIDTH]
|
|
||||||
);
|
);
|
||||||
`UNUSED_PARAM (CORE_ID)
|
`UNUSED_PARAM (CORE_ID)
|
||||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||||
@@ -47,6 +44,18 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
|||||||
logic [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] empty2;
|
logic [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] empty2;
|
||||||
logic [`ISSUE_WIDTH-1:0][2:0] size1;
|
logic [`ISSUE_WIDTH-1:0][2:0] size1;
|
||||||
|
|
||||||
|
wire tc_rf_valid [`ISSUE_WIDTH];
|
||||||
|
wire [`LOG2UP(`NUM_REGS * ISSUE_RATIO)-1:0] tc_rf_addr [`ISSUE_WIDTH];
|
||||||
|
// FIXME: don't need full ISSUE_WIDTH; only one warp is read at a time
|
||||||
|
// because NUM_BLOCKS == 1
|
||||||
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] tc_rf_data [`ISSUE_WIDTH];
|
||||||
|
|
||||||
|
`STATIC_ASSERT((ISSUE_RATIO == 1),
|
||||||
|
("static assertion failed: tensor core only supports ISSUE_RATIO == 1"))
|
||||||
|
assign tc_rf_valid = '{`ISSUE_WIDTH{tensor_regfile_if.req_valid}};
|
||||||
|
assign tc_rf_addr = '{`ISSUE_WIDTH{tensor_regfile_if.req_data.rs}};
|
||||||
|
assign tensor_regfile_if.rsp_data.data = tc_rf_data[0];
|
||||||
|
|
||||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
@@ -104,7 +113,7 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
|||||||
.size (size1[i])
|
.size (size1[i])
|
||||||
);
|
);
|
||||||
assign operands_if[i].valid = ~empty1[i];
|
assign operands_if[i].valid = ~empty1[i];
|
||||||
assign scoreboard_if[i].ready = (size1[i] < 2'd2) && ~tc_rf_valid[i];
|
assign scoreboard_if[i].ready = (size1[i] < 3'd2) && ~tc_rf_valid[i];
|
||||||
|
|
||||||
// assert (full1[i] == full2[i]);
|
// assert (full1[i] == full2[i]);
|
||||||
// assert (empty1[i] == empty2[i]);
|
// assert (empty1[i] == empty2[i]);
|
||||||
@@ -207,10 +216,10 @@ module VX_operands_dup import VX_gpu_pkg::*; #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
`ifdef GPR_RESET
|
`ifdef GPR_RESET
|
||||||
reg wr_enabled = 0;
|
reg wr_enabled = 1'b0;
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
wr_enabled <= 1;
|
wr_enabled <= 1'b1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ module VX_tensor_core import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||||
`ifdef EXT_T_HOPPER
|
`ifdef EXT_T_HOPPER
|
||||||
|
VX_tc_rf_if.master regfile_if,
|
||||||
VX_tc_bus_if.master smem_A_if,
|
VX_tc_bus_if.master smem_A_if,
|
||||||
VX_tc_bus_if.master smem_B_if,
|
VX_tc_bus_if.master smem_B_if,
|
||||||
`endif
|
`endif
|
||||||
@@ -63,6 +64,7 @@ module VX_tensor_core import VX_gpu_pkg::*; #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.execute_if (execute_if[block_idx]),
|
.execute_if (execute_if[block_idx]),
|
||||||
|
.regfile_if (regfile_if),
|
||||||
.smem_A_if (smem_A_if),
|
.smem_A_if (smem_A_if),
|
||||||
.smem_B_if (smem_B_if),
|
.smem_B_if (smem_B_if),
|
||||||
.commit_if (commit_block_if[block_idx])
|
.commit_if (commit_block_if[block_idx])
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
input reset,
|
input reset,
|
||||||
|
|
||||||
VX_execute_if.slave execute_if,
|
VX_execute_if.slave execute_if,
|
||||||
|
VX_tc_rf_if.master regfile_if,
|
||||||
VX_tc_bus_if.master smem_A_if,
|
VX_tc_bus_if.master smem_A_if,
|
||||||
VX_tc_bus_if.master smem_B_if,
|
VX_tc_bus_if.master smem_B_if,
|
||||||
VX_commit_if.master commit_if
|
VX_commit_if.master commit_if
|
||||||
@@ -104,6 +105,21 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
`STATIC_ASSERT((`XLEN == 32),
|
`STATIC_ASSERT((`XLEN == 32),
|
||||||
("static assertion failed: tensor_hopper_core only supports XLEN == 32"))
|
("static assertion failed: tensor_hopper_core only supports XLEN == 32"))
|
||||||
|
|
||||||
|
// /*
|
||||||
|
// fake fsm driving tc rf port
|
||||||
|
reg [11:0] counter;
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
counter <= 12'd1;
|
||||||
|
end else begin
|
||||||
|
counter <= counter + 12'd1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assign regfile_if.req_valid = (counter[3:0] != 4'd0);
|
||||||
|
assign regfile_if.req_data.wis = '0;
|
||||||
|
assign regfile_if.req_data.rs = counter[11:7];
|
||||||
|
// */
|
||||||
|
|
||||||
TensorCoreDecoupled tensor_hopper_core (
|
TensorCoreDecoupled tensor_hopper_core (
|
||||||
.clock(clk),
|
.clock(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
|
|||||||
46
hw/rtl/mem/VX_tc_rf_if.sv
Normal file
46
hw/rtl/mem/VX_tc_rf_if.sv
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
// Copyright © 2019-2023
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_tc_rf_if import VX_gpu_pkg::*; ();
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic [ISSUE_WIS_W-1:0] wis;
|
||||||
|
logic [`NR_BITS-1:0] rs;
|
||||||
|
} req_data_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
||||||
|
} rsp_data_t;
|
||||||
|
|
||||||
|
logic req_valid;
|
||||||
|
req_data_t req_data;
|
||||||
|
|
||||||
|
rsp_data_t rsp_data;
|
||||||
|
|
||||||
|
modport master (
|
||||||
|
output req_valid,
|
||||||
|
output req_data,
|
||||||
|
|
||||||
|
input rsp_data
|
||||||
|
);
|
||||||
|
|
||||||
|
modport slave (
|
||||||
|
input req_valid,
|
||||||
|
input req_data,
|
||||||
|
|
||||||
|
output rsp_data
|
||||||
|
);
|
||||||
|
|
||||||
|
endinterface
|
||||||
Reference in New Issue
Block a user