- Add Blackwell tensor core support in VX_tensor_blackwell_core.sv - Update decode, execute, and dispatch logic for new instructions - Extend VX_define.vh and VX_types.vh with Blackwell ISA definitions
230 lines
8.3 KiB
Systemverilog
230 lines
8.3 KiB
Systemverilog
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
`include "VX_define.vh"
|
|
`include "VX_trace.vh"
|
|
|
|
module VX_issue import VX_gpu_pkg::*; #(
|
|
parameter CORE_ID = 0
|
|
) (
|
|
`SCOPE_IO_DECL
|
|
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
`ifdef PERF_ENABLE
|
|
VX_pipeline_perf_if.issue perf_issue_if,
|
|
`endif
|
|
|
|
VX_decode_if.slave decode_if,
|
|
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
|
|
|
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
|
|
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
|
|
`ifdef EXT_F_ENABLE
|
|
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
|
|
`endif
|
|
`ifdef EXT_T_ENABLE
|
|
VX_dispatch_if.master tensor_dispatch_if [`ISSUE_WIDTH],
|
|
`ifdef EXT_T_ASYNC
|
|
VX_tc_rf_if.slave tensor_regfile_if,
|
|
`endif
|
|
`endif
|
|
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
|
);
|
|
VX_ibuffer_if ibuffer_if [`ISSUE_WIDTH]();
|
|
VX_ibuffer_if scoreboard_if [`ISSUE_WIDTH]();
|
|
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
|
|
|
`RESET_RELAY (ibuf_reset, reset);
|
|
`RESET_RELAY (scoreboard_reset, reset);
|
|
`RESET_RELAY (operands_reset, reset);
|
|
`RESET_RELAY (dispatch_reset, reset);
|
|
|
|
VX_ibuffer #(
|
|
.CORE_ID (CORE_ID)
|
|
) ibuffer (
|
|
.clk (clk),
|
|
.reset (ibuf_reset),
|
|
.decode_if (decode_if),
|
|
.ibuffer_if (ibuffer_if)
|
|
);
|
|
|
|
VX_scoreboard #(
|
|
.CORE_ID (CORE_ID)
|
|
) scoreboard (
|
|
.clk (clk),
|
|
.reset (scoreboard_reset),
|
|
`ifdef PERF_ENABLE
|
|
.perf_scb_stalls(perf_issue_if.scb_stalls),
|
|
.perf_scb_any_unit_uses(perf_issue_if.scb_any_unit_uses),
|
|
.perf_scb_fires (perf_issue_if.scb_fires),
|
|
.perf_scb_any_fire_cycles (perf_issue_if.scb_any_fire_cycles),
|
|
.perf_units_uses(perf_issue_if.units_uses),
|
|
.perf_sfu_uses (perf_issue_if.sfu_uses),
|
|
`endif
|
|
.writeback_if (writeback_if),
|
|
.ibuffer_if (ibuffer_if),
|
|
.scoreboard_if (scoreboard_if)
|
|
);
|
|
|
|
`ifdef GPR_DUPLICATED
|
|
VX_operands_dup #(
|
|
`else
|
|
VX_operands #(
|
|
`endif
|
|
.CORE_ID (CORE_ID),
|
|
.CACHE_ENABLE (0)
|
|
) operands (
|
|
.clk (clk),
|
|
.reset (operands_reset),
|
|
.writeback_if (writeback_if),
|
|
.scoreboard_if (scoreboard_if),
|
|
`ifdef EXT_T_ASYNC
|
|
.operands_if (operands_if),
|
|
.tensor_regfile_if (tensor_regfile_if)
|
|
`else
|
|
.operands_if (operands_if)
|
|
`endif
|
|
);
|
|
|
|
VX_dispatch #(
|
|
.CORE_ID (CORE_ID)
|
|
) dispatch (
|
|
.clk (clk),
|
|
.reset (dispatch_reset),
|
|
`ifdef PERF_ENABLE
|
|
.perf_stalls (perf_issue_if.dispatch_stalls),
|
|
.perf_valids (perf_issue_if.dispatch_valids),
|
|
.perf_fires (perf_issue_if.dispatch_fires),
|
|
.perf_any_fire_cycles (perf_issue_if.dispatch_any_fire_cycles),
|
|
`endif
|
|
.operands_if (operands_if),
|
|
.alu_dispatch_if(alu_dispatch_if),
|
|
.lsu_dispatch_if(lsu_dispatch_if),
|
|
`ifdef EXT_F_ENABLE
|
|
.fpu_dispatch_if(fpu_dispatch_if),
|
|
`endif
|
|
`ifdef EXT_T_ENABLE
|
|
.tensor_dispatch_if(tensor_dispatch_if),
|
|
`endif
|
|
.sfu_dispatch_if(sfu_dispatch_if)
|
|
);
|
|
|
|
`ifdef SIMULATION
|
|
`ifdef DBG_TRACE_CORE_PIPELINE_VCS
|
|
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
|
wire ibuf_probe_pc = ibuffer_if[i].valid
|
|
&& (ibuffer_if[i].data.PC >= 32'h80000240)
|
|
&& (ibuffer_if[i].data.PC <= 32'h80000260);
|
|
wire scb_probe_pc = scoreboard_if[i].valid
|
|
&& (scoreboard_if[i].data.PC >= 32'h80000240)
|
|
&& (scoreboard_if[i].data.PC <= 32'h80000260);
|
|
wire ops_probe_pc = operands_if[i].valid
|
|
&& (operands_if[i].data.PC >= 32'h80000240)
|
|
&& (operands_if[i].data.PC <= 32'h80000260);
|
|
|
|
always @(posedge clk) begin
|
|
if (!reset && (CORE_ID == 0) && ($time > `TRACE_STARTTIME) && (ibuf_probe_pc || scb_probe_pc || ops_probe_pc)) begin
|
|
`TRACE(2, ("%d: core%0d-issue-probe: isw=%0d, ibuf=%b/%b PC=0x%0h ex=0x%0h op=0x%0h, scb=%b/%b PC=0x%0h ex=0x%0h op=0x%0h, ops=%b/%b PC=0x%0h ex=0x%0h op=0x%0h, alu=%b/%b lsu=%b/%b sfu=%b/%b (#ibuf=%0d #scb=%0d #ops=%0d)\n",
|
|
$time, CORE_ID, i,
|
|
ibuffer_if[i].valid, ibuffer_if[i].ready, ibuffer_if[i].data.PC, ibuffer_if[i].data.ex_type, ibuffer_if[i].data.op_type,
|
|
scoreboard_if[i].valid, scoreboard_if[i].ready, scoreboard_if[i].data.PC, scoreboard_if[i].data.ex_type, scoreboard_if[i].data.op_type,
|
|
operands_if[i].valid, operands_if[i].ready, operands_if[i].data.PC, operands_if[i].data.ex_type, operands_if[i].data.op_type,
|
|
alu_dispatch_if[i].valid, alu_dispatch_if[i].ready,
|
|
lsu_dispatch_if[i].valid, lsu_dispatch_if[i].ready,
|
|
sfu_dispatch_if[i].valid, sfu_dispatch_if[i].ready,
|
|
ibuffer_if[i].data.uuid, scoreboard_if[i].data.uuid, operands_if[i].data.uuid));
|
|
end
|
|
end
|
|
end
|
|
`endif
|
|
`endif
|
|
|
|
`ifdef DBG_SCOPE_ISSUE
|
|
if (CORE_ID == 0) begin
|
|
`ifdef SCOPE
|
|
wire operands_if_fire = operands_if[0].valid && operands_if[0].ready;
|
|
wire operands_if_not_ready = ~operands_if[0].ready;
|
|
wire writeback_if_valid = writeback_if[0].valid;
|
|
VX_scope_tap #(
|
|
.SCOPE_ID (2),
|
|
.TRIGGERW (4),
|
|
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS +
|
|
1 + `NR_BITS + `XLEN + 1 + 1 + (`NUM_THREADS * 3 * `XLEN) +
|
|
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
|
) scope_tap (
|
|
.clk(clk),
|
|
.reset(scope_reset),
|
|
.start(1'b0),
|
|
.stop(1'b0),
|
|
.triggers({
|
|
reset,
|
|
operands_if_fire,
|
|
operands_if_not_ready,
|
|
writeback_if_valid
|
|
}),
|
|
.probes({
|
|
operands_if[0].data.uuid,
|
|
operands_if[0].data.tmask,
|
|
operands_if[0].data.ex_type,
|
|
operands_if[0].data.op_type,
|
|
operands_if[0].data.op_mod,
|
|
operands_if[0].data.wb,
|
|
operands_if[0].data.rd,
|
|
operands_if[0].data.imm,
|
|
operands_if[0].data.use_PC,
|
|
operands_if[0].data.use_imm,
|
|
operands_if[0].data.rs1_data,
|
|
operands_if[0].data.rs2_data,
|
|
operands_if[0].data.rs3_data,
|
|
writeback_if[0].data.uuid,
|
|
writeback_if[0].data.tmask,
|
|
writeback_if[0].data.rd,
|
|
writeback_if[0].data.data,
|
|
writeback_if[0].data.eop
|
|
}),
|
|
.bus_in(scope_bus_in),
|
|
.bus_out(scope_bus_out)
|
|
);
|
|
`endif
|
|
`ifdef CHIPSCOPE
|
|
ila_issue ila_issue_inst (
|
|
.clk (clk),
|
|
.probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}),
|
|
.probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid})
|
|
);
|
|
`endif
|
|
end
|
|
`else
|
|
`SCOPE_IO_UNUSED()
|
|
`endif
|
|
|
|
`ifdef PERF_ENABLE
|
|
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
|
|
|
wire decode_stall = decode_if.valid && ~decode_if.ready;
|
|
|
|
always @(posedge clk) begin
|
|
if (reset) begin
|
|
perf_ibf_stalls <= '0;
|
|
end else begin
|
|
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall);
|
|
end
|
|
end
|
|
|
|
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
|
`endif
|
|
|
|
endmodule
|