Add dummy code for decoupled Hopper tensor core
Define EXT_T_HOPPER that, when EXT_T_ENABLE is defined, distinguishes whether to instantiate core-coupled Volta-style or decoupled Hopper-style Tensor Core.
This commit is contained in:
@@ -40,8 +40,13 @@
|
|||||||
`define EXT_F_ENABLE
|
`define EXT_F_ENABLE
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
// core-coupled tensor core
|
||||||
`ifndef EXT_T_DISABLE
|
`ifndef EXT_T_DISABLE
|
||||||
`define EXT_T_ENABLE
|
`define EXT_T_ENABLE
|
||||||
|
// decoupled Hopper-style tensor core
|
||||||
|
`ifndef EXT_T_HOPPER
|
||||||
|
`define EXT_T_HOPPER
|
||||||
|
`endif
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef XLEN_32
|
`ifndef XLEN_32
|
||||||
@@ -83,7 +88,7 @@
|
|||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef NUM_CORES
|
`ifndef NUM_CORES
|
||||||
`define NUM_CORES 8
|
`define NUM_CORES 4
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef NUM_WARPS
|
`ifndef NUM_WARPS
|
||||||
|
|||||||
@@ -542,16 +542,30 @@ module VX_decode #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
`ifdef EXT_T_ENABLE
|
`ifdef EXT_T_ENABLE
|
||||||
`INST_EXT4: begin
|
`ifdef EXT_T_HOPPER
|
||||||
ex_type = `EX_TENSOR;
|
// TODO
|
||||||
op_type = `INST_TENSOR_HMMA;
|
`INST_EXT4: begin
|
||||||
// tensor core macroop is encoded as r-type
|
ex_type = `EX_TENSOR;
|
||||||
use_rd = 1;
|
op_type = `INST_TENSOR_HMMA;
|
||||||
`USED_IREG (rd);
|
// tensor core macroop is encoded as r-type
|
||||||
`USED_IREG (rs1);
|
use_rd = 1;
|
||||||
`USED_IREG (rs2);
|
`USED_IREG (rd);
|
||||||
`USED_IREG (rs3);
|
`USED_IREG (rs1);
|
||||||
end
|
`USED_IREG (rs2);
|
||||||
|
`USED_IREG (rs3);
|
||||||
|
end
|
||||||
|
`else
|
||||||
|
`INST_EXT4: begin
|
||||||
|
ex_type = `EX_TENSOR;
|
||||||
|
op_type = `INST_TENSOR_HMMA;
|
||||||
|
// tensor core macroop is encoded as r-type
|
||||||
|
use_rd = 1;
|
||||||
|
`USED_IREG (rd);
|
||||||
|
`USED_IREG (rs1);
|
||||||
|
`USED_IREG (rs2);
|
||||||
|
`USED_IREG (rs3);
|
||||||
|
end
|
||||||
|
`endif
|
||||||
`endif
|
`endif
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ module VX_tensor_core import VX_gpu_pkg::*; #(
|
|||||||
);
|
);
|
||||||
localparam BLOCK_SIZE = 1;
|
localparam BLOCK_SIZE = 1;
|
||||||
localparam NUM_LANES = `NUM_THREADS;
|
localparam NUM_LANES = `NUM_THREADS;
|
||||||
// FIXME: @perf: PARTIAL_BW==1 increases power instantiating
|
// @perf: PARTIAL_BW==1 increases power instantiating
|
||||||
// stream_buffers for ISSUE_WIDTH times
|
// stream_buffers for ISSUE_WIDTH times
|
||||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||||
|
|
||||||
@@ -51,16 +51,27 @@ module VX_tensor_core import VX_gpu_pkg::*; #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||||
VX_tensor_core_block #(
|
`ifdef EXT_T_HOPPER
|
||||||
|
VX_tensor_hopper_core_block #(
|
||||||
.ISW(1), // FIXME: not block_idx
|
.ISW(1), // FIXME: not block_idx
|
||||||
.FP16(FP16)
|
.FP16(FP16)
|
||||||
|
) tensor_hopper_core (
|
||||||
|
.clk(clk),
|
||||||
|
.reset(reset),
|
||||||
|
.execute_if(execute_if[block_idx]),
|
||||||
|
.commit_if(commit_block_if[block_idx])
|
||||||
|
);
|
||||||
|
`else
|
||||||
|
VX_tensor_core_block #(
|
||||||
|
.ISW(1), // FIXME: use block_idx
|
||||||
|
.FP16(FP16)
|
||||||
) tensor_core (
|
) tensor_core (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
|
|
||||||
.execute_if(execute_if[block_idx]),
|
.execute_if(execute_if[block_idx]),
|
||||||
.commit_if(commit_block_if[block_idx])
|
.commit_if(commit_block_if[block_idx])
|
||||||
);
|
);
|
||||||
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -275,7 +286,6 @@ module VX_tensor_core_block import VX_gpu_pkg::*; #(
|
|||||||
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
||||||
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
||||||
execute_if_data_deq[wb_wid], /* uuid ~ rd */
|
execute_if_data_deq[wb_wid], /* uuid ~ rd */
|
||||||
// execute_if_data_deq, /* uuid ~ rd */
|
|
||||||
subcommit == 1'b0 ? wb_data_0 : wb_data_1, /* data */
|
subcommit == 1'b0 ? wb_data_0 : wb_data_1, /* data */
|
||||||
1'b0, /* pid */
|
1'b0, /* pid */
|
||||||
1'b1, /* sop */
|
1'b1, /* sop */
|
||||||
|
|||||||
102
hw/rtl/core/VX_tensor_hopper_core.sv
Normal file
102
hw/rtl/core/VX_tensor_hopper_core.sv
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
`ifdef EXT_T_ENABLE
|
||||||
|
`include "VX_fpu_define.vh"
|
||||||
|
|
||||||
|
module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
||||||
|
parameter ISW,
|
||||||
|
parameter FP16
|
||||||
|
) (
|
||||||
|
input clk,
|
||||||
|
input reset,
|
||||||
|
|
||||||
|
VX_execute_if.slave execute_if,
|
||||||
|
VX_commit_if.master commit_if
|
||||||
|
);
|
||||||
|
localparam METADATA_QUEUE_DEPTH = 2; // FIXME: arbitrary
|
||||||
|
|
||||||
|
/* commit_if.data_t parts that we need to keep around:
|
||||||
|
- uuid
|
||||||
|
- wid
|
||||||
|
- tmask
|
||||||
|
- PC
|
||||||
|
- wb
|
||||||
|
- rd
|
||||||
|
*/
|
||||||
|
|
||||||
|
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS;
|
||||||
|
|
||||||
|
wire operand_enq_fire = execute_if.valid && execute_if.ready;
|
||||||
|
wire commit_if_fire = commit_if.valid && commit_if.ready;
|
||||||
|
wire [DATAW-1:0] execute_if_data_enq = {
|
||||||
|
execute_if.data.uuid,
|
||||||
|
execute_if.data.wid,
|
||||||
|
execute_if.data.tmask,
|
||||||
|
execute_if.data.PC,
|
||||||
|
execute_if.data.wb,
|
||||||
|
execute_if.data.rd
|
||||||
|
// pid/sop/eop set later
|
||||||
|
};
|
||||||
|
|
||||||
|
wire [`NUM_WARPS-1:0][DATAW-1:0] execute_if_data_deq;
|
||||||
|
|
||||||
|
wire [`NUM_WARPS-1:0] metadata_queue_fulls;
|
||||||
|
wire [`NUM_WARPS-1:0] metadata_queue_emptys;
|
||||||
|
// OR not AND, we don't want any warp full
|
||||||
|
wire metadata_queue_full = |(metadata_queue_fulls);
|
||||||
|
assign execute_if.ready = !metadata_queue_full;
|
||||||
|
|
||||||
|
`RUNTIME_ASSERT((!execute_if.valid || execute_if.data.wid == `NW_WIDTH'(0)),
|
||||||
|
("runtime error: WGMMA execute not supported for warps other than 0!"))
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||||
|
// Metadata queue for commit_if. This simply copies execute_if's
|
||||||
|
// metadata and pops them in conjunction with commit fire.
|
||||||
|
//
|
||||||
|
// This has to be separated per-warp, as otherwise requests from
|
||||||
|
// multiple warps can be enqueued interleaved, which makes it hard to
|
||||||
|
// ensure two consecutive dequeues are associated with the same warp for
|
||||||
|
// commit. (FIXME: this is not strictly necessary though.)
|
||||||
|
|
||||||
|
wire enq = operand_enq_fire && (execute_if.data.wid == `NW_WIDTH'(i));
|
||||||
|
// FIXME: commit only warp 0
|
||||||
|
wire deq = commit_if_fire && (`NW_WIDTH'(i) == `NW_WIDTH'(0));
|
||||||
|
|
||||||
|
VX_fifo_queue #(
|
||||||
|
.DATAW(DATAW),
|
||||||
|
.DEPTH(METADATA_QUEUE_DEPTH)
|
||||||
|
) pending_uops (
|
||||||
|
.clk(clk),
|
||||||
|
.reset(reset),
|
||||||
|
.push(enq),
|
||||||
|
.pop(deq),
|
||||||
|
.data_in(execute_if_data_enq),
|
||||||
|
.data_out(execute_if_data_deq[i]),
|
||||||
|
.empty(metadata_queue_emptys[i]),
|
||||||
|
`UNUSED_PIN(alm_empty),
|
||||||
|
.full(metadata_queue_fulls[i]),
|
||||||
|
`UNUSED_PIN(alm_full),
|
||||||
|
`UNUSED_PIN(size)
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
// this shouldn't really happen unless there's a big contention over
|
||||||
|
// the commit stage
|
||||||
|
`RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
|
||||||
|
|
||||||
|
// FIXME: only checks warp 0 for commit!
|
||||||
|
assign commit_if.valid = ~metadata_queue_emptys[0/*FIXME*/];
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0;
|
||||||
|
|
||||||
|
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
||||||
|
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
||||||
|
execute_if_data_deq[0/*FIXME*/], /* uuid ~ rd */
|
||||||
|
wb_data, /* data */
|
||||||
|
1'b0, /* pid */
|
||||||
|
1'b1, /* sop */
|
||||||
|
1'b1 /* eop */
|
||||||
|
};
|
||||||
|
|
||||||
|
assign commit_if.data = commit_if_data;
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
`endif
|
||||||
Reference in New Issue
Block a user