diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 30f1aece..c4db29d4 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -39,8 +39,8 @@ module VX_core import VX_gpu_pkg::*; #( VX_mem_bus_if.master icache_bus_if, - VX_tc_bus_if.master tc_p0_bus_if, - VX_tc_bus_if.master tc_p1_bus_if, + VX_tc_bus_if.master tensor_smem_A_if, + VX_tc_bus_if.master tensor_smem_B_if, `ifdef GBAR_ENABLE VX_gbar_bus_if.master gbar_bus_if, @@ -220,6 +220,10 @@ module VX_core import VX_gpu_pkg::*; #( `ifdef EXT_T_ENABLE .tensor_dispatch_if (tensor_dispatch_if), .tensor_commit_if (tensor_commit_if), + `ifdef EXT_T_HOPPER + .tensor_smem_A_if (tensor_smem_A_if), + .tensor_smem_B_if (tensor_smem_B_if), + `endif `endif .commit_csr_if (commit_csr_if), diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index 723d7c60..e062e648 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -58,6 +58,8 @@ module VX_execute import VX_gpu_pkg::*; #( `ifdef EXT_T_ENABLE VX_dispatch_if.slave tensor_dispatch_if [`ISSUE_WIDTH], VX_commit_if.master tensor_commit_if [`ISSUE_WIDTH], + VX_tc_bus_if.master tensor_smem_A_if, + VX_tc_bus_if.master tensor_smem_B_if, `endif // simulation helper signals @@ -151,6 +153,10 @@ module VX_execute import VX_gpu_pkg::*; #( .reset(reset), .dispatch_if(tensor_dispatch_if), +`ifdef EXT_T_HOPPER + .smem_A_if(tensor_smem_A_if), + .smem_B_if(tensor_smem_B_if), +`endif .commit_if(tensor_commit_if) ); `endif diff --git a/hw/rtl/core/VX_tensor_core.sv b/hw/rtl/core/VX_tensor_core.sv index cad70b97..6d3a8a4a 100644 --- a/hw/rtl/core/VX_tensor_core.sv +++ b/hw/rtl/core/VX_tensor_core.sv @@ -8,7 +8,11 @@ module VX_tensor_core import VX_gpu_pkg::*; #( input reset, VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH], - VX_commit_if.master commit_if [`ISSUE_WIDTH] +`ifdef EXT_T_HOPPER + VX_tc_bus_if.master smem_A_if, + VX_tc_bus_if.master smem_B_if, +`endif + VX_commit_if.master commit_if [`ISSUE_WIDTH] ); localparam BLOCK_SIZE = 1; localparam NUM_LANES = `NUM_THREADS; @@ -56,11 +60,21 @@ module VX_tensor_core import VX_gpu_pkg::*; #( .ISW(1), // FIXME: not block_idx .FP16(FP16) ) tensor_hopper_core_block ( - .clk(clk), - .reset(reset), - .execute_if(execute_if[block_idx]), - .commit_if(commit_block_if[block_idx]) + .clk (clk), + .reset (reset), + .execute_if (execute_if[block_idx]), + .smem_A_if (smem_A_if), + .smem_B_if (smem_B_if), + .commit_if (commit_block_if[block_idx]) ); + // ) tensor_hopper_core_block ( + // .clk (clk), + // .reset (reset), + // .execute_if (execute_if[block_idx]), + // .smem_A_if (smem_A_if), + // .smem_B_if (smem_B_if), + // .commit_if_haha (commit_block_if[block_idx]) + // ); `else VX_tensor_core_block #( .ISW(1), // FIXME: use block_idx diff --git a/hw/rtl/core/VX_tensor_hopper_core.sv b/hw/rtl/core/VX_tensor_hopper_core.sv index 7e8989b1..2f48b7e4 100644 --- a/hw/rtl/core/VX_tensor_hopper_core.sv +++ b/hw/rtl/core/VX_tensor_hopper_core.sv @@ -9,6 +9,8 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( input reset, VX_execute_if.slave execute_if, + VX_tc_bus_if.master smem_A_if, + VX_tc_bus_if.master smem_B_if, VX_commit_if.master commit_if ); localparam NUM_LANES = `NUM_THREADS; @@ -107,7 +109,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( .io_writeback_ready(writeback_ready), .io_writeback_valid(writeback_valid), - .io_writeback_bits_last(/*unused*/), + .io_writeback_bits_last(writeback_last), .io_writeback_bits_wid(/*unused*/), .io_writeback_bits_rd(/*unused*/), .io_writeback_bits_data_0(/*unused*/), @@ -119,23 +121,23 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #( .io_writeback_bits_data_6(/*unused*/), .io_writeback_bits_data_7(/*unused*/), - .io_respA_ready(/*unused*/), - .io_respA_valid(1'b0/*FIXME*/), - .io_respA_bits_source(2'b0/*FIXME*/), - .io_respA_bits_data(256'b0/*FIXME*/), - .io_respB_ready(/*unused*/), - .io_respB_valid(1'b0/*FIXME*/), - .io_respB_bits_source(2'b0/*FIXME*/), - .io_respB_bits_data(256'b0/*FIXME*/), + .io_respA_ready(smem_A_if.rsp_ready), + .io_respA_valid(smem_A_if.rsp_valid), + .io_respA_bits_source(smem_A_if.rsp_data.tag), + .io_respA_bits_data(smem_A_if.rsp_data.data), + .io_respB_ready(smem_B_if.rsp_ready), + .io_respB_valid(smem_B_if.rsp_valid), + .io_respB_bits_source(smem_B_if.rsp_data.tag), + .io_respB_bits_data(smem_B_if.rsp_data.data), - .io_reqA_ready(1'b0/*FIXME*/), - .io_reqA_valid(/*unused*/), - .io_reqA_bits_source(/*unused*/), - .io_reqA_bits_address(/*unused*/), - .io_reqB_ready(1'b0/*FIXME*/), - .io_reqB_valid(/*unused*/), - .io_reqB_bits_source(/*unused*/), - .io_reqB_bits_address(/*unused*/) + .io_reqA_ready(smem_A_if.req_ready), + .io_reqA_valid(smem_A_if.req_valid), + .io_reqA_bits_source(smem_A_if.req_data.tag), + .io_reqA_bits_address(smem_A_if.req_data.addr), + .io_reqB_ready(smem_B_if.req_ready), + .io_reqB_valid(smem_B_if.req_valid), + .io_reqB_bits_source(smem_B_if.req_data.tag), + .io_reqB_bits_address(smem_B_if.req_data.addr) ); // VX_tensor_hopper_core #(