diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 6df04d39..c5c27899 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -242,8 +242,11 @@ `define DBG_CACHE_REQ_MDATAW 0 `endif -// non-cacheable address bit -`define NC_FLAG_BITS 1 +// non-cacheable tag bits +`define NC_TAG_BIT 1 + +// texture tag bits +`define TEX_TAG_BIT 1 ////////////////////////// Icache Configurable Knobs ////////////////////////// @@ -285,15 +288,15 @@ // Core request tag bits `define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE) `ifdef EXT_TEX_ENABLE -`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE) +`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_TAG_BIT + `SM_ENABLE) `define TEX_TAG_ID_BITS (2) `define LSU_TEX_TAG_ID_BITS `MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS) -`define DCACHE_CORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `NC_FLAG_BITS) +`define DCACHE_CORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `TEX_TAG_BIT) `define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TAG_ID_BITS) `define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `TEX_TAG_ID_BITS) `define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TEX_TAG_ID_BITS) `else -`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE) +`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_TAG_BIT + `SM_ENABLE) `endif `define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS) @@ -312,7 +315,7 @@ // Memory request tag bits `define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE) `define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH) -`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_FLAG_BITS), `_DNC_MEM_TAG_WIDTH) +`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_TAG_BIT), `_DNC_MEM_TAG_WIDTH) // Merged D-cache/I-cache memory tag `define L1_MEM_TAG_WIDTH (`MAX(`ICACHE_MEM_TAG_WIDTH, `DCACHE_MEM_TAG_WIDTH) + `CLOG2(2)) @@ -360,7 +363,7 @@ // Memory request tag bits `define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE) `define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `L1_MEM_TAG_WIDTH) -`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_FLAG_BITS), `_L2_NC_MEM_TAG_WIDTH) +`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_TAG_BIT), `_L2_NC_MEM_TAG_WIDTH) `define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`L1_MEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS))) ////////////////////////// L3cache Configurable Knobs ///////////////////////// @@ -392,7 +395,7 @@ // Memory request tag bits `define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE) `define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH) -`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_FLAG_BITS), `_L3_NC_MEM_TAG_WIDTH) +`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_TAG_BIT), `_L3_NC_MEM_TAG_WIDTH) `define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS))) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_execute.sv b/hw/rtl/VX_execute.sv index 41d3ba30..f0cdd37e 100644 --- a/hw/rtl/VX_execute.sv +++ b/hw/rtl/VX_execute.sv @@ -102,7 +102,7 @@ module VX_execute #( .LANES (`NUM_THREADS), .DATA_SIZE (4), .TAG_IN_WIDTH (`LSU_TEX_DCACHE_TAG_BITS), - .TAG_SEL_IDX (`NC_FLAG_BITS + `SM_ENABLE) + .TAG_SEL_IDX (`NC_TAG_BIT + `SM_ENABLE) ) tex_lsu_arb ( .clk (clk), .reset (reset), @@ -142,7 +142,7 @@ module VX_execute #( `endif -`ifdef EXT_TEX_ENABLE +`ifdef EXT_F_ENABLE wire [`NUM_WARPS-1:0] csr_pending; wire [`NUM_WARPS-1:0] fpu_pending; VX_fpu_to_csr_if fpu_to_csr_if(); diff --git a/hw/rtl/VX_gpu_unit.sv b/hw/rtl/VX_gpu_unit.sv index 9a02b835..06d5fbc7 100644 --- a/hw/rtl/VX_gpu_unit.sv +++ b/hw/rtl/VX_gpu_unit.sv @@ -12,10 +12,9 @@ module VX_gpu_unit #( VX_gpu_req_if.slave gpu_req_if, `ifdef EXT_TEX_ENABLE - VX_tex_csr_if tex_csr_if, - - VX_dcache_req_if dcache_req_if, - VX_dcache_rsp_if dcache_rsp_if, + VX_dcache_req_if.master dcache_req_if, + VX_dcache_rsp_if.slave dcache_rsp_if, + VX_tex_csr_if.slave tex_csr_if, `endif // Outputs diff --git a/hw/rtl/VX_lsu_unit.sv b/hw/rtl/VX_lsu_unit.sv index 6788a76e..cf4d58cf 100644 --- a/hw/rtl/VX_lsu_unit.sv +++ b/hw/rtl/VX_lsu_unit.sv @@ -24,7 +24,7 @@ module VX_lsu_unit #( localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE); - localparam ADDR_TYPEW = `NC_FLAG_BITS + `SM_ENABLE; + localparam ADDR_TYPEW = `NC_TAG_BIT + `SM_ENABLE; `STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter")) `STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter")) diff --git a/hw/rtl/VX_mem_unit.sv b/hw/rtl/VX_mem_unit.sv index 7c53ccc8..56de47ef 100644 --- a/hw/rtl/VX_mem_unit.sv +++ b/hw/rtl/VX_mem_unit.sv @@ -206,6 +206,7 @@ module VX_mem_unit # ( .LANES (`NUM_THREADS), .DATA_SIZE (4), .TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH), + .TAG_SEL_IDX (0), // SM flag .TYPE ("P"), .BUFFERED_REQ (2), .BUFFERED_RSP (1) diff --git a/hw/rtl/cache/VX_shared_mem.sv b/hw/rtl/cache/VX_shared_mem.sv index f3af3139..46ea0cfc 100644 --- a/hw/rtl/cache/VX_shared_mem.sv +++ b/hw/rtl/cache/VX_shared_mem.sv @@ -229,7 +229,7 @@ module VX_shared_mem #( core_rsp_data_in = 'x; bank_rsp_sel_n = bank_rsp_sel_r; for (integer i = 0; i < NUM_BANKS; i++) begin - if (per_bank_core_req_valid[i] + if (core_req_read_mask[i] && (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin core_rsp_valids_in[per_bank_core_req_tid[i]] = 1; core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i]; diff --git a/hw/rtl/tex_unit/VX_tex_mem.sv b/hw/rtl/tex_unit/VX_tex_mem.sv index 059f01e6..91aa0438 100644 --- a/hw/rtl/tex_unit/VX_tex_mem.sv +++ b/hw/rtl/tex_unit/VX_tex_mem.sv @@ -8,8 +8,8 @@ module VX_tex_mem #( input wire reset, // memory interface - VX_dcache_req_if dcache_req_if, - VX_dcache_rsp_if dcache_rsp_if, + VX_dcache_req_if.master dcache_req_if, + VX_dcache_rsp_if.slave dcache_rsp_if, // inputs input wire req_valid,