From 144521e19ce72cd5efafdbbca1399d270b9f64c9 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sun, 31 Dec 2023 23:57:31 -0800 Subject: [PATCH] Expose smem ports at VX_core top smem_unit stays inside the core, and the two separate buses to dcache and smem are exposed at VX_core. Currently core_wrapper ties req valid to 1'b0, stalling kernels that reads from sharedmem. --- hw/rtl/VX_core_wrapper.sv | 21 +++++++++++++++++++-- hw/rtl/core/VX_core.sv | 5 ++++- hw/rtl/core/VX_smem_unit.sv | 18 +++++++++++++----- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/hw/rtl/VX_core_wrapper.sv b/hw/rtl/VX_core_wrapper.sv index f0310562..18477c76 100644 --- a/hw/rtl/VX_core_wrapper.sv +++ b/hw/rtl/VX_core_wrapper.sv @@ -200,13 +200,17 @@ module Vortex import VX_gpu_pkg::*; #( // NOTE(hansung): need to use DCACHE_NOSM_TAG_WIDTH here instead of // DCACHE_TAG_WIDTH; the latter is only used inside the core to - // differentiate between requests going to the outside cache vs. going to - // the shared memory. + // differentiate between requests going to the cache vs. sharedmem. VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), .TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH) ) dcache_bus_if[DCACHE_NUM_REQS](); + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH) + ) smem_bus_if[DCACHE_NUM_REQS](); + // always @(posedge clock) begin // `ASSERT(DCACHE_NUM_REQS == NUM_THREADS, "DCACHE_NUM_REQS doesn't match NUM_THREADS"); // end @@ -345,6 +349,17 @@ module Vortex import VX_gpu_pkg::*; #( assign dcache_bus_if[2].req_ready = dmem_2_a_ready; assign dcache_bus_if[3].req_ready = dmem_3_a_ready; + /* smem */ + + assign smem_bus_if[0].req_ready = 1'd1; + assign smem_bus_if[1].req_ready = 1'd1; + assign smem_bus_if[2].req_ready = 1'd1; + assign smem_bus_if[3].req_ready = 1'd1; + assign smem_bus_if[0].rsp_valid = 1'd0; + assign smem_bus_if[1].rsp_valid = 1'd0; + assign smem_bus_if[2].rsp_valid = 1'd0; + assign smem_bus_if[3].rsp_valid = 1'd0; + /* fpu */ // assign {fpu_hartid, fpu_time, fpu_inst, fpu_fromint_data, fpu_fcsr_rm, fpu_dmem_resp_val, fpu_dmem_resp_type, @@ -469,6 +484,8 @@ module Vortex import VX_gpu_pkg::*; #( .dcr_bus_if (dcr_bus_if), + .smem_bus_if (smem_bus_if), + .dcache_bus_if (dcache_bus_if), .icache_bus_if (icache_bus_if), diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index d50a3d32..c16ddff2 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -32,6 +32,8 @@ module VX_core import VX_gpu_pkg::*; #( VX_dcr_bus_if.slave dcr_bus_if, + VX_mem_bus_if.master smem_bus_if [DCACHE_NUM_REQS], + VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS], VX_mem_bus_if.master icache_bus_if, @@ -249,7 +251,8 @@ module VX_core import VX_gpu_pkg::*; #( .cache_perf (smem_perf), `endif .dcache_bus_in_if (dcache_bus_tmp_if), - .dcache_bus_out_if (dcache_bus_if) + .dcache_bus_out_if (dcache_bus_if), + .smem_bus_out_if (smem_bus_if) ); `else diff --git a/hw/rtl/core/VX_smem_unit.sv b/hw/rtl/core/VX_smem_unit.sv index 7ff7c2d8..a84f6dec 100644 --- a/hw/rtl/core/VX_smem_unit.sv +++ b/hw/rtl/core/VX_smem_unit.sv @@ -24,7 +24,8 @@ module VX_smem_unit import VX_gpu_pkg::*; #( `endif VX_mem_bus_if.slave dcache_bus_in_if [DCACHE_NUM_REQS], - VX_mem_bus_if.master dcache_bus_out_if [DCACHE_NUM_REQS] + VX_mem_bus_if.master dcache_bus_out_if [DCACHE_NUM_REQS], + VX_mem_bus_if.master smem_bus_out_if [DCACHE_NUM_REQS] ); `UNUSED_PARAM (CORE_ID) @@ -85,17 +86,24 @@ module VX_smem_unit import VX_gpu_pkg::*; #( `RESET_RELAY (switch_reset, reset); for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + assign smem_bus_out_if[i].req_valid = switch_out_bus_if[i * 2 + 1].req_valid; + assign smem_bus_out_if[i].req_data = switch_out_bus_if[i * 2 + 1].req_data; + assign switch_out_bus_if[i * 2 + 1].req_ready = smem_bus_out_if[i].req_ready; + + assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_bus_out_if[i].rsp_valid; + assign switch_out_bus_if[i * 2 + 1].rsp_data = smem_bus_out_if[i].rsp_data; + assign smem_bus_out_if[i].rsp_ready = switch_out_bus_if[i * 2 + 1].rsp_ready; assign smem_req_valid[i] = switch_out_bus_if[i * 2 + 1].req_valid; assign smem_req_rw[i] = switch_out_bus_if[i * 2 + 1].req_data.rw; assign smem_req_byteen[i] = switch_out_bus_if[i * 2 + 1].req_data.byteen; assign smem_req_data[i] = switch_out_bus_if[i * 2 + 1].req_data.data; assign smem_req_tag[i] = switch_out_bus_if[i * 2 + 1].req_data.tag; - assign switch_out_bus_if[i * 2 + 1].req_ready = smem_req_ready[i]; + // assign switch_out_bus_if[i * 2 + 1].req_ready = smem_req_ready[i]; - assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_rsp_valid[i]; - assign switch_out_bus_if[i * 2 + 1].rsp_data.data = smem_rsp_data[i]; - assign switch_out_bus_if[i * 2 + 1].rsp_data.tag = smem_rsp_tag[i]; + // assign switch_out_bus_if[i * 2 + 1].rsp_valid = smem_rsp_valid[i]; + // assign switch_out_bus_if[i * 2 + 1].rsp_data.data = smem_rsp_data[i]; + // assign switch_out_bus_if[i * 2 + 1].rsp_data.tag = smem_rsp_tag[i]; assign smem_rsp_ready[i] = switch_out_bus_if[i * 2 + 1].rsp_ready; assign smem_req_addr[i] = switch_out_bus_if[i * 2 + 1].req_data.addr[SMEM_ADDR_WIDTH-1:0];