diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 071cc08d..1c089509 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -32,7 +32,7 @@ module VX_alu_unit #( localparam NUM_LANES = `NUM_ALU_LANES; localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); - localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; + localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + 1 + PID_WIDTH + 1 + 1; localparam RSP_ARB_SIZE = 2 + `EXT_M_ENABLED; localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS); diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index a584cace..9b930818 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -41,7 +41,7 @@ module VX_commit import VX_gpu_pkg::*; #( output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value ); `UNUSED_PARAM (CORE_ID) - localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1; + localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1 + 1; localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1); localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1; @@ -210,6 +210,7 @@ module VX_commit import VX_gpu_pkg::*; #( assign writeback_if[i].data.tmask= commit_if[i].data.tmask; assign writeback_if[i].data.rd = commit_if[i].data.rd; assign writeback_if[i].data.data = commit_if[i].data.data; + assign writeback_if[i].data.tensor = commit_if[i].data.tensor; assign writeback_if[i].data.sop = commit_if[i].data.sop; assign writeback_if[i].data.eop = commit_if[i].data.eop; assign commit_if[i].ready = 1'b1; // writeback has no backpressure diff --git a/hw/rtl/core/VX_csr_unit.sv b/hw/rtl/core/VX_csr_unit.sv index 9fa373b6..bf229789 100644 --- a/hw/rtl/core/VX_csr_unit.sv +++ b/hw/rtl/core/VX_csr_unit.sv @@ -43,7 +43,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #( `UNUSED_PARAM (CORE_ID) localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); - localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + PID_WIDTH + 1 + 1; + localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + 1 + PID_WIDTH + 1 + 1; `UNUSED_VAR (execute_if.data.rs3_data) @@ -174,8 +174,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #( .reset (reset), .valid_in (csr_req_valid), .ready_in (csr_req_ready), - .data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}), - .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}), + .data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, 1'b0/*tensor*/, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}), + .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}), .valid_out (commit_if.valid), .ready_out (commit_if.ready) ); diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 21ae4485..fc8270d4 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -31,7 +31,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); - localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; + localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + 1 + PID_WIDTH + 1 + 1; localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH); wire [BLOCK_SIZE-1:0] commit_in_valid; @@ -119,6 +119,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( commit_tmp_if.data.wb, commit_tmp_if.data.rd, commit_data_r, + commit_tmp_if.data.tensor, 1'b0, // PID commit_tmp_if.data.sop, commit_tmp_if.data.eop diff --git a/hw/rtl/core/VX_int_unit.sv b/hw/rtl/core/VX_int_unit.sv index a5e4f394..b8cb78dd 100644 --- a/hw/rtl/core/VX_int_unit.sv +++ b/hw/rtl/core/VX_int_unit.sv @@ -136,14 +136,14 @@ module VX_int_unit #( end VX_elastic_buffer #( - .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH) + .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH) ) rsp_buf ( .clk (clk), .reset (reset), .valid_in (execute_if.valid), .ready_in (execute_if.ready), - .data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}), - .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}), + .data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, 1'b0/*tensor*/, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}), + .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.tensor, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}), .valid_out (commit_if.valid), .ready_out (commit_if.ready) ); diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 20fac1d1..e8748e39 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -36,7 +36,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( localparam NUM_LANES = `NUM_LSU_LANES; localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); - localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; + localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + 1 + PID_WIDTH + 1 + 1; localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_SIZE); localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE); localparam MEM_ADDRW = `XLEN - MEM_ASHIFT; @@ -527,15 +527,15 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( // load commit VX_elastic_buffer #( - .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1), + .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + 1 + PID_WIDTH + 1 + 1), .SIZE (2) ) ld_rsp_buf ( .clk (clk), .reset (reset), .valid_in (mem_rsp_valid), .ready_in (mem_rsp_ready), - .data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}), - .data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}), + .data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, 1'b0/*tensor*/, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}), + .data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.tensor, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}), .valid_out (commit_ld_if.valid), .ready_out (commit_ld_if.ready) ); @@ -545,15 +545,15 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( // store commit VX_elastic_buffer #( - .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + PID_WIDTH + 1 + 1), + .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + PID_WIDTH + 1 + 1), .SIZE (2) ) st_rsp_buf ( .clk (clk), .reset (reset), .valid_in (mem_req_fire && mem_req_rw), .ready_in (st_rsp_ready), - .data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}), - .data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}), + .data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, 1'b0/*tensor*/, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}), + .data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.tensor, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}), .valid_out (commit_st_if.valid), .ready_out (commit_st_if.ready) ); diff --git a/hw/rtl/core/VX_muldiv_unit.sv b/hw/rtl/core/VX_muldiv_unit.sv index 6daa3c3d..80168c73 100644 --- a/hw/rtl/core/VX_muldiv_unit.sv +++ b/hw/rtl/core/VX_muldiv_unit.sv @@ -323,16 +323,16 @@ module VX_muldiv_unit #( VX_stream_arb #( .NUM_INPUTS (2), - .DATAW (TAGW + (NUM_LANES * `XLEN)), + .DATAW (1/*tensor field only in commit*/ + TAGW + (NUM_LANES * `XLEN)), .OUT_REG (1) ) rsp_buf ( .clk (clk), .reset (reset), .valid_in ({div_valid_out, mul_valid_out}), .ready_in ({div_ready_out, mul_ready_out}), - .data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out}, - {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}), - .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}), + .data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, 1'b0/*tensor*/, div_pid_out, div_sop_out, div_eop_out, div_result_out}, + {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, 1'b0/*tensor*/, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}), + .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}), .valid_out (commit_if.valid), .ready_out (commit_if.ready), `UNUSED_PIN (sel_out) diff --git a/hw/rtl/core/VX_reduce_unit.sv b/hw/rtl/core/VX_reduce_unit.sv index 8522f8d1..b63e57ae 100644 --- a/hw/rtl/core/VX_reduce_unit.sv +++ b/hw/rtl/core/VX_reduce_unit.sv @@ -269,7 +269,7 @@ module VX_reduce_unit #( ); VX_elastic_buffer #( - .DATAW(`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + (`XLEN * NUM_LANES) + PID_WIDTH + 1 + 1) + .DATAW(`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + (`XLEN * NUM_LANES) + 1 + PID_WIDTH + 1 + 1) ) output_buffer ( .clk(clk), .reset(reset), @@ -277,7 +277,7 @@ module VX_reduce_unit #( .ready_in(commit_if_ready), .data_in({execute_if.data.uuid, execute_if.data.wid, stored_tmask, execute_if.data.PC, execute_if.data.wb, execute_if.data.rd, broadcasted_accumulator, stored_pid, stored_sop, stored_eop}), - .data_out({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.wb, commit_if.data.rd, commit_if.data.data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}), + .data_out({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.wb, commit_if.data.rd, commit_if.data.data, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}), .ready_out(commit_if.ready), .valid_out(commit_if.valid) ); diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index c63a5dcb..42a876f5 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -142,6 +142,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #( for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs; + // busy bit for the asynchronous Tensor unit. Since the ISA does not + // have an explicit destination register, use a separate status bit. + reg [`UP(ISSUE_RATIO)-1:0] inuse_tensor; wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop; @@ -227,6 +230,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( always @(posedge clk) begin if (reset) begin inuse_regs <= '0; + inuse_tensor <= '0; end else begin if (writeback_fire) begin inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0; diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index ed2023b7..48f1cb8f 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -49,7 +49,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); - localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + PID_WIDTH + 1 + 1; + localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + 1 + PID_WIDTH + 1 + 1; localparam RSP_ARB_SIZE = 1 + 1; localparam RSP_ARB_IDX_WCTL = 0; localparam RSP_ARB_IDX_CSRS = 1; diff --git a/hw/rtl/core/VX_tensor_core.sv b/hw/rtl/core/VX_tensor_core.sv index 802af43d..1f7a95db 100644 --- a/hw/rtl/core/VX_tensor_core.sv +++ b/hw/rtl/core/VX_tensor_core.sv @@ -283,10 +283,11 @@ module VX_tensor_core_block import VX_gpu_pkg::*; #( assign commit_if_ready_override = commit_if.ready && (counter == 2'b0); `endif - localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1; + localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1 + 1; wire [COMMIT_DATAW-1:0] commit_if_data = { execute_if_data_deq[wb_wid], /* uuid ~ rd */ subcommit == 1'b0 ? wb_data_0 : wb_data_1, /* data */ + 1'b0, /* tensor */ 1'b0, /* pid */ 1'b1, /* sop */ 1'b1 /* eop */ diff --git a/hw/rtl/core/VX_tensor_hopper_core.sv b/hw/rtl/core/VX_tensor_hopper_core.sv index a58f4027..2ecbea70 100644 --- a/hw/rtl/core/VX_tensor_hopper_core.sv +++ b/hw/rtl/core/VX_tensor_hopper_core.sv @@ -128,11 +128,12 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0; - localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1; + localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1 + 1; wire [COMMIT_DATAW-1:0] commit_if_data = { // write-back to the correct rd only when eop ((state == 2'b11) ? execute_if_data_deq[0/*FIXME*/] : execute_if_data_new_rd), /* uuid ~ rd */ wb_data, /* data */ + 1'b0, /* tensor */ 1'b0, /* pid */ 1'b1, /* sop */ (state == 2'b11) /* eop */ diff --git a/hw/rtl/core/VX_wctl_unit.sv b/hw/rtl/core/VX_wctl_unit.sv index 5b1ad834..36144018 100644 --- a/hw/rtl/core/VX_wctl_unit.sv +++ b/hw/rtl/core/VX_wctl_unit.sv @@ -32,7 +32,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t); - localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1; + localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + 1 + PID_WIDTH + 1 + 1; `UNUSED_VAR (execute_if.data.rs3_data) @@ -141,8 +141,8 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( .reset (reset), .valid_in (execute_if.valid), .ready_in (execute_if.ready), - .data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}), - .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}), + .data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, 1'b0/*tensor*/, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}), + .data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}), .valid_out (commit_if.valid), .ready_out (commit_if.ready) ); diff --git a/hw/rtl/interfaces/VX_commit_if.sv b/hw/rtl/interfaces/VX_commit_if.sv index e5bfa13a..2eaf5d0e 100644 --- a/hw/rtl/interfaces/VX_commit_if.sv +++ b/hw/rtl/interfaces/VX_commit_if.sv @@ -26,6 +26,7 @@ interface VX_commit_if #( logic wb; logic [`NR_BITS-1:0] rd; logic [NUM_LANES-1:0][`XLEN-1:0] data; + logic tensor; logic [PID_WIDTH-1:0] pid; logic sop; logic eop; diff --git a/hw/rtl/interfaces/VX_writeback_if.sv b/hw/rtl/interfaces/VX_writeback_if.sv index ce6241ef..72abdb45 100644 --- a/hw/rtl/interfaces/VX_writeback_if.sv +++ b/hw/rtl/interfaces/VX_writeback_if.sv @@ -22,6 +22,7 @@ interface VX_writeback_if import VX_gpu_pkg::*; (); logic [`XLEN-1:0] PC; logic [`NR_BITS-1:0] rd; logic [`NUM_THREADS-1:0][`XLEN-1:0] data; + logic tensor; logic sop; logic eop; } data_t;