tensor: more doc

This commit is contained in:
Hansung Kim
2024-05-07 13:54:10 -07:00
parent 9c1d797250
commit 868bbdb15e
5 changed files with 9 additions and 3 deletions

View File

@@ -74,6 +74,11 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
assign decode_if.ibuf_pop[i] = uop_sequencer_if[i].valid && uop_sequencer_if[i].ready;
`endif
// tensor-core operation is controlled by a single macro-instruction at
// the ISA; internally, the uop_sequencer blitzs micro-ops (counterpart
// to Volta SASS set/step instructions) into the ibuffer upon encountering
// this macro-instruction. this becomes a pass-through for non-tensorcore
// instructions.
VX_uop_sequencer uop_sequencer (
.clk(clk),
.reset(reset),

View File

@@ -27,6 +27,7 @@ module VX_reduce_ext #(
input wire [`INST_RED_BITS-1:0] op_type,
output wire [DATAW_OUT-1:0] data_out
);
// recursive binary reduction
if (N == 1) begin
`UNUSED_VAR(op_type)
`UNUSED_VAR(mask)

View File

@@ -211,12 +211,11 @@ module VX_tensor_octet #(
// half the inputs are buffered, half are not (instead coming straight
// from operand bus) unlike the real tensor core.
// the banks are only 32 bit rather than 64 bit (a pair of fp32 regs).
// since A and B are supplied by 4 lanes each, we get 4 fp32's at a time
// (8 for C).
logic [3:0][31:0] A_half;
logic [3:0][31:0] B_half;
logic [7:0][31:0] C_half;
always @(*) begin
// note that not all lanes participate at every step
case (step)
2'b00: begin
A_half = { A_in[5:4], A_in[1:0] };
@@ -268,7 +267,6 @@ module VX_tensor_octet #(
end
end
wire stall = result_valid && ~result_ready;
assign operands_ready = ~stall;

View File

@@ -1,3 +1,4 @@
// uop metadata (sequencing, next state), execution metadata (EX_TYPE, OP_TYPE, OP_MOD), wb, use pc, use imm, pc, imm, rd, rs1, rs2, rs3
HMMA_SET0_STEP0_0: begin
uop = {NEXT, HMMA_SET0_STEP0_1, `EX_BITS'(`EX_TENSOR), `INST_OP_BITS'(0), `INST_MOD_BITS'(0), 1'b1, 1'b0, 1'b0, 32'b0, 32'b0, `FREG(16), `FREG(0), `FREG(8), `FREG(16)};
end

View File

@@ -119,6 +119,7 @@ module VX_uop_sequencer import VX_gpu_pkg::*; (
uop[UOP_TABLE_WIDTH-UBR_BITS-UPC_BITS-1:0]
};
// passthrough when !use_uop
assign ibuffer_if.valid = use_uop ? 1'b1 : uop_sequencer_if.valid;
assign uop_sequencer_if.ready = use_uop ? (uop_fire && ubr == FINISH) : ibuffer_if.ready;
assign ibuffer_if.data = use_uop ? ibuffer_output : uop_sequencer_if.data;