From 2d69ca5d676f945ca16742a30da4218bc561d7f1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 2 Jan 2021 16:00:00 -0500 Subject: [PATCH] scratchpad optimization for stack access using custom bank offset aligned to stack size --- hw/rtl/VX_cluster.v | 2 +- hw/rtl/VX_config.vh | 9 +- hw/rtl/VX_define.vh | 42 ++-- hw/rtl/VX_mem_unit.v | 9 +- hw/rtl/VX_platform.vh | 6 + hw/rtl/VX_print_instr.vh | 260 +++++++++++----------- hw/rtl/Vortex.v | 2 +- hw/rtl/cache/VX_bank.v | 84 ++++--- hw/rtl/cache/VX_cache.v | 48 ++-- hw/rtl/cache/VX_cache_config.vh | 55 ++--- hw/rtl/cache/VX_cache_core_req_bank_sel.v | 35 +-- hw/rtl/cache/VX_data_access.v | 38 ++-- hw/rtl/cache/VX_data_store.v | 18 +- hw/rtl/cache/VX_miss_resrv.v | 2 +- hw/rtl/cache/VX_tag_access.v | 20 +- hw/rtl/cache/VX_tag_store.v | 18 +- hw/unit_tests/cache/Makefile | 2 +- 17 files changed, 339 insertions(+), 311 deletions(-) diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index f06fa702..68af5f73 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -152,7 +152,7 @@ module VX_cluster #( VX_cache #( .CACHE_ID (`L2CACHE_ID), .CACHE_SIZE (`L2CACHE_SIZE), - .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), + .CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE), .NUM_BANKS (`L2NUM_BANKS), .WORD_SIZE (`L2WORD_SIZE), .NUM_REQS (`NUM_CORES), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 15568c7b..e1c6241b 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -248,7 +248,7 @@ // Size of LSU Request Queue `ifndef LSUQ_SIZE -`define LSUQ_SIZE 8 +`define LSUQ_SIZE (`NUM_WARPS * `NUM_THREADS) `endif // Size of FPU Request Queue @@ -327,9 +327,14 @@ // SM Configurable Knobs ////////////////////////////////////////////////////// +// per thread stack size +`ifndef STACK_SIZE +`define STACK_SIZE 1024 +`endif + // Size of cache in bytes `ifndef SMEM_SIZE -`define SMEM_SIZE (1024 * `NUM_WARPS * `NUM_THREADS) +`define SMEM_SIZE (`STACK_SIZE * `NUM_WARPS * `NUM_THREADS) `endif // Number of banks diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 998edb75..e0b728b1 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -246,7 +246,7 @@ `define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0) // Block size in bytes -`define IBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE) +`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE) // Word size in bytes `define IWORD_SIZE 4 @@ -267,13 +267,13 @@ `define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS) // DRAM request data bits -`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8) +`define IDRAM_LINE_WIDTH (`ICACHE_LINE_SIZE * 8) // DRAM request address bits -`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE)) +`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE)) // DRAM byte enable bits -`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE +`define IDRAM_BYTEEN_WIDTH `ICACHE_LINE_SIZE // DRAM request tag bits `define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH @@ -287,7 +287,7 @@ `define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1) // Block size in bytes -`define DBANK_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE) +`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE) // Word size in bytes `define DWORD_SIZE 4 @@ -299,13 +299,13 @@ `define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) // DRAM request data bits -`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8) +`define DDRAM_LINE_WIDTH (`DCACHE_LINE_SIZE * 8) // DRAM request address bits -`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE)) +`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE)) // DRAM byte enable bits -`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE +`define DDRAM_BYTEEN_WIDTH `DCACHE_LINE_SIZE // DRAM request tag bits `define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH @@ -319,16 +319,18 @@ `define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2) // Block size in bytes -`define SBANK_LINE_SIZE 4 +`define SCACHE_LINE_SIZE 4 // Word size in bytes `define SWORD_SIZE 4 +`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SCACHE_LINE_SIZE) + // Core request size `define SNUM_REQUESTS `NUM_THREADS // DRAM request address bits -`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE)) +`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SCACHE_LINE_SIZE)) // DRAM request tag bits `define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH @@ -342,22 +344,22 @@ `define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID) // Block size in bytes -`define L2BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE +`define L2CACHE_LINE_SIZE `GLOBAL_BLOCK_SIZE // Word size in bytes -`define L2WORD_SIZE `DBANK_LINE_SIZE +`define L2WORD_SIZE `DCACHE_LINE_SIZE // Core request tag bits `define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES)) // DRAM request data bits -`define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8) +`define L2DRAM_LINE_WIDTH (`L2CACHE_LINE_SIZE * 8) // DRAM request address bits -`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE)) +`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE)) // DRAM byte enable bits -`define L2DRAM_BYTEEN_WIDTH `L2BANK_LINE_SIZE +`define L2DRAM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE // DRAM request tag bits `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES))) @@ -368,22 +370,22 @@ `define L3CACHE_ID 0 // Block size in bytes -`define L3BANK_LINE_SIZE `GLOBAL_BLOCK_SIZE +`define L3CACHE_LINE_SIZE `GLOBAL_BLOCK_SIZE // Word size in bytes -`define L3WORD_SIZE `L2BANK_LINE_SIZE +`define L3WORD_SIZE `L2CACHE_LINE_SIZE // Core request tag bits `define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS)) // DRAM request data bits -`define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8) +`define L3DRAM_LINE_WIDTH (`L3CACHE_LINE_SIZE * 8) // DRAM request address bits -`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE)) +`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE)) // DRAM byte enable bits -`define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE +`define L3DRAM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE // DRAM request tag bits `define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS))) diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 5b11981c..b6d94d7c 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -84,7 +84,7 @@ module VX_mem_unit # ( VX_cache #( .CACHE_ID (`ICACHE_ID), .CACHE_SIZE (`ICACHE_SIZE), - .BANK_LINE_SIZE (`IBANK_LINE_SIZE), + .CACHE_LINE_SIZE (`ICACHE_LINE_SIZE), .NUM_BANKS (`INUM_BANKS), .WORD_SIZE (`IWORD_SIZE), .NUM_REQS (`INUM_REQUESTS), @@ -142,7 +142,7 @@ module VX_mem_unit # ( VX_cache #( .CACHE_ID (`DCACHE_ID), .CACHE_SIZE (`DCACHE_SIZE), - .BANK_LINE_SIZE (`DBANK_LINE_SIZE), + .CACHE_LINE_SIZE (`DCACHE_LINE_SIZE), .NUM_BANKS (`DNUM_BANKS), .WORD_SIZE (`DWORD_SIZE), .NUM_REQS (`DNUM_REQUESTS), @@ -202,7 +202,7 @@ module VX_mem_unit # ( VX_cache #( .CACHE_ID (`SCACHE_ID), .CACHE_SIZE (`SMEM_SIZE), - .BANK_LINE_SIZE (`SBANK_LINE_SIZE), + .CACHE_LINE_SIZE (`SCACHE_LINE_SIZE), .NUM_BANKS (`SNUM_BANKS), .WORD_SIZE (`SWORD_SIZE), .NUM_REQS (`SNUM_REQUESTS), @@ -215,7 +215,8 @@ module VX_mem_unit # ( .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), - .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) + .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH), + .BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET) ) smem ( `SCOPE_BIND_VX_mem_unit_smem diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 1b6b4717..953aed86 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -66,4 +66,10 @@ `define UP(x) (((x) > 0) ? x : 1) +`define SAFE_RNG(h,l) `MAX(h,l) : l + +`define RTRIM(x,s) x[$bits(x)-1:($bits(x)-s)] + +`define LTRIM(x,s) x[s-1:0] + `endif \ No newline at end of file diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index 00c373d0..b76c6368 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -3,142 +3,140 @@ `include "VX_define.vh" -task print_ex_type; - input [`EX_BITS-1:0] ex; - begin - case (ex) - `EX_ALU: $write("ALU"); - `EX_LSU: $write("LSU"); - `EX_CSR: $write("CSR"); - `EX_MUL: $write("MUL"); - `EX_FPU: $write("FPU"); - `EX_GPU: $write("GPU"); - default: $write("NOP"); - endcase - end +task print_ex_type ( + input [`EX_BITS-1:0] ex +); + case (ex) + `EX_ALU: $write("ALU"); + `EX_LSU: $write("LSU"); + `EX_CSR: $write("CSR"); + `EX_MUL: $write("MUL"); + `EX_FPU: $write("FPU"); + `EX_GPU: $write("GPU"); + default: $write("NOP"); + endcase endtask -task print_ex_op; - input [`EX_BITS-1:0] ex_type; - input [`OP_BITS-1:0] op_type; - input [`MOD_BITS-1:0] op_mod; - begin - case (ex_type) - `EX_ALU: begin - if (`IS_BR_MOD(op_mod)) begin - case (`BR_BITS'(op_type)) - `BR_EQ: $write("BEQ"); - `BR_NE: $write("BNE"); - `BR_LT: $write("BLT"); - `BR_GE: $write("BGE"); - `BR_LTU: $write("BLTU"); - `BR_GEU: $write("BGEU"); - `BR_JAL: $write("JAL"); - `BR_JALR: $write("JALR"); - `BR_ECALL: $write("ECALL"); - `BR_EBREAK:$write("EBREAK"); - `BR_MRET: $write("MRET"); - `BR_SRET: $write("SRET"); - `BR_DRET: $write("DRET"); - default: $write("?"); - endcase - end else begin - case (`ALU_BITS'(op_type)) - `ALU_ADD: $write("ADD"); - `ALU_SUB: $write("SUB"); - `ALU_SLL: $write("SLL"); - `ALU_SRL: $write("SRL"); - `ALU_SRA: $write("SRA"); - `ALU_SLT: $write("SLT"); - `ALU_SLTU: $write("SLTU"); - `ALU_XOR: $write("XOR"); - `ALU_OR: $write("OR"); - `ALU_AND: $write("AND"); - `ALU_LUI: $write("LUI"); - `ALU_AUIPC: $write("AUIPC"); - default: $write("?"); - endcase - end - end - `EX_LSU: begin - case (`LSU_BITS'(op_type)) - `LSU_LB: $write("LB"); - `LSU_LH: $write("LH"); - `LSU_LW: $write("LW"); - `LSU_LBU: $write("LBU"); - `LSU_LHU: $write("LHU"); - `LSU_SB: $write("SB"); - `LSU_SH: $write("SH"); - `LSU_SW: $write("SW"); - `LSU_SBU: $write("SBU"); - `LSU_SHU: $write("SHU"); - default: $write("?"); - endcase - end - `EX_CSR: begin - case (`CSR_BITS'(op_type)) - `CSR_RW: $write("CSRW"); - `CSR_RS: $write("CSRS"); - `CSR_RC: $write("CSRC"); - default: $write("?"); - endcase - end - `EX_MUL: begin - case (`MUL_BITS'(op_type)) - `MUL_MUL: $write("MUL"); - `MUL_MULH: $write("MULH"); - `MUL_MULHSU:$write("MULHSU"); - `MUL_MULHU: $write("MULHU"); - `MUL_DIV: $write("DIV"); - `MUL_DIVU: $write("DIVU"); - `MUL_REM: $write("REM"); - `MUL_REMU: $write("REMU"); +task print_ex_op ( + input [`EX_BITS-1:0] ex_type, + input [`OP_BITS-1:0] op_type, + input [`MOD_BITS-1:0] op_mod +); + case (ex_type) + `EX_ALU: begin + if (`IS_BR_MOD(op_mod)) begin + case (`BR_BITS'(op_type)) + `BR_EQ: $write("BEQ"); + `BR_NE: $write("BNE"); + `BR_LT: $write("BLT"); + `BR_GE: $write("BGE"); + `BR_LTU: $write("BLTU"); + `BR_GEU: $write("BGEU"); + `BR_JAL: $write("JAL"); + `BR_JALR: $write("JALR"); + `BR_ECALL: $write("ECALL"); + `BR_EBREAK:$write("EBREAK"); + `BR_MRET: $write("MRET"); + `BR_SRET: $write("SRET"); + `BR_DRET: $write("DRET"); default: $write("?"); - endcase - end - `EX_FPU: begin - case (`FPU_BITS'(op_type)) - `FPU_ADD: $write("ADD"); - `FPU_SUB: $write("SUB"); - `FPU_MUL: $write("MUL"); - `FPU_DIV: $write("DIV"); - `FPU_SQRT: $write("SQRT"); - `FPU_MADD: $write("MADD"); - `FPU_NMSUB: $write("NMSUB"); - `FPU_NMADD: $write("NMADD"); - `FPU_CVTWS: $write("CVTWS"); - `FPU_CVTWUS:$write("CVTWUS"); - `FPU_CVTSW: $write("CVTSW"); - `FPU_CVTSWU:$write("CVTSWU"); - `FPU_CLASS: $write("CLASS"); - `FPU_CMP: $write("CMP"); - `FPU_MISC: begin - case (op_mod) - 0: $write("SGNJ"); - 1: $write("SGNJN"); - 2: $write("SGNJX"); - 3: $write("MIN"); - 4: $write("MAX"); - 5: $write("MVXW"); - 6: $write("MVWX"); - endcase - end + endcase + end else begin + case (`ALU_BITS'(op_type)) + `ALU_ADD: $write("ADD"); + `ALU_SUB: $write("SUB"); + `ALU_SLL: $write("SLL"); + `ALU_SRL: $write("SRL"); + `ALU_SRA: $write("SRA"); + `ALU_SLT: $write("SLT"); + `ALU_SLTU: $write("SLTU"); + `ALU_XOR: $write("XOR"); + `ALU_OR: $write("OR"); + `ALU_AND: $write("AND"); + `ALU_LUI: $write("LUI"); + `ALU_AUIPC: $write("AUIPC"); default: $write("?"); - endcase + endcase end - `EX_GPU: begin - case (`GPU_BITS'(op_type)) - `GPU_TMC: $write("TMC"); - `GPU_WSPAWN:$write("WSPAWN"); - `GPU_SPLIT: $write("SPLIT"); - `GPU_JOIN: $write("JOIN"); - `GPU_BAR: $write("BAR"); - default: $write("?"); - endcase - end - default: $write("?"); - endcase - end + end + `EX_LSU: begin + case (`LSU_BITS'(op_type)) + `LSU_LB: $write("LB"); + `LSU_LH: $write("LH"); + `LSU_LW: $write("LW"); + `LSU_LBU: $write("LBU"); + `LSU_LHU: $write("LHU"); + `LSU_SB: $write("SB"); + `LSU_SH: $write("SH"); + `LSU_SW: $write("SW"); + `LSU_SBU: $write("SBU"); + `LSU_SHU: $write("SHU"); + default: $write("?"); + endcase + end + `EX_CSR: begin + case (`CSR_BITS'(op_type)) + `CSR_RW: $write("CSRW"); + `CSR_RS: $write("CSRS"); + `CSR_RC: $write("CSRC"); + default: $write("?"); + endcase + end + `EX_MUL: begin + case (`MUL_BITS'(op_type)) + `MUL_MUL: $write("MUL"); + `MUL_MULH: $write("MULH"); + `MUL_MULHSU:$write("MULHSU"); + `MUL_MULHU: $write("MULHU"); + `MUL_DIV: $write("DIV"); + `MUL_DIVU: $write("DIVU"); + `MUL_REM: $write("REM"); + `MUL_REMU: $write("REMU"); + default: $write("?"); + endcase + end + `EX_FPU: begin + case (`FPU_BITS'(op_type)) + `FPU_ADD: $write("ADD"); + `FPU_SUB: $write("SUB"); + `FPU_MUL: $write("MUL"); + `FPU_DIV: $write("DIV"); + `FPU_SQRT: $write("SQRT"); + `FPU_MADD: $write("MADD"); + `FPU_NMSUB: $write("NMSUB"); + `FPU_NMADD: $write("NMADD"); + `FPU_CVTWS: $write("CVTWS"); + `FPU_CVTWUS:$write("CVTWUS"); + `FPU_CVTSW: $write("CVTSW"); + `FPU_CVTSWU:$write("CVTSWU"); + `FPU_CLASS: $write("CLASS"); + `FPU_CMP: $write("CMP"); + `FPU_MISC: begin + case (op_mod) + 0: $write("SGNJ"); + 1: $write("SGNJN"); + 2: $write("SGNJX"); + 3: $write("MIN"); + 4: $write("MAX"); + 5: $write("MVXW"); + 6: $write("MVWX"); + endcase + end + default: $write("?"); + endcase + end + `EX_GPU: begin + case (`GPU_BITS'(op_type)) + `GPU_TMC: $write("TMC"); + `GPU_WSPAWN:$write("WSPAWN"); + `GPU_SPLIT: $write("SPLIT"); + `GPU_JOIN: $write("JOIN"); + `GPU_BAR: $write("BAR"); + default: $write("?"); + endcase + end + default: $write("?"); + endcase endtask `endif diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 7da97c5b..e74d62ae 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -154,7 +154,7 @@ module Vortex ( VX_cache #( .CACHE_ID (`L3CACHE_ID), .CACHE_SIZE (`L3CACHE_SIZE), - .BANK_LINE_SIZE (`L3BANK_LINE_SIZE), + .CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE), .NUM_BANKS (`L3NUM_BANKS), .WORD_SIZE (`L3WORD_SIZE), .NUM_REQS (`NUM_CLUSTERS), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 4660ef3d..776ab52a 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -7,7 +7,7 @@ module VX_bank #( // Size of cache in bytes parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of bankS parameter NUM_BANKS = 1, // Size of a word in bytes @@ -40,7 +40,10 @@ module VX_bank #( parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0, + + // bank offset from beginning of index range + parameter BANK_ADDR_OFFSET = 0 ) ( `SCOPE_IO_VX_bank @@ -74,17 +77,18 @@ module VX_bank #( // DRAM request output wire dram_req_valid, output wire dram_req_rw, - output wire [BANK_LINE_SIZE-1:0] dram_req_byteen, + output wire [CACHE_LINE_SIZE-1:0] dram_req_byteen, output wire [`LINE_ADDR_WIDTH-1:0] dram_req_addr, - output wire [`BANK_LINE_WIDTH-1:0] dram_req_data, + output wire [`CACHE_LINE_WIDTH-1:0] dram_req_data, input wire dram_req_ready, // DRAM response input wire dram_rsp_valid, input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr, - input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, + input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data, output wire dram_rsp_ready ); + `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0; @@ -105,7 +109,7 @@ module VX_bank #( wire drsq_empty; wire [`LINE_ADDR_WIDTH-1:0] drsq_addr_st0; - wire [`BANK_LINE_WIDTH-1:0] drsq_filldata_st0; + wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata_st0; wire drsq_push = dram_rsp_valid && dram_rsp_ready; @@ -146,14 +150,24 @@ module VX_bank #( wire creq_rw_st0; wire [WORD_SIZE-1:0] creq_byteen_st0; `IGNORE_WARNINGS_BEGIN - wire [`WORD_ADDR_WIDTH-1:0] creq_addr_st0; -`IGNORE_WARNINGS_END + wire [`WORD_ADDR_WIDTH-1:0] creq_addr_unqual; +`IGNORE_WARNINGS_END + wire [`LINE_ADDR_WIDTH-1:0] creq_addr_st0; + wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel_st0; wire [`WORD_WIDTH-1:0] creq_writeword_st0; wire [CORE_TAG_WIDTH-1:0] creq_tag_st0; wire creq_push = (| core_req_valid) && core_req_ready; assign core_req_ready = !creq_full; + if (BANK_ADDR_OFFSET == 0) begin + assign creq_addr_st0 = `LINE_SELECT_ADDR0(creq_addr_unqual); + end else begin + assign creq_addr_st0 = `LINE_SELECT_ADDRX(creq_addr_unqual); + end + + assign creq_wsel_st0 = creq_addr_unqual[`UP(`WORD_SELECT_BITS)-1:0]; + VX_fifo_queue #( .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), .SIZE (CREQ_SIZE), @@ -165,7 +179,7 @@ module VX_bank #( .push (creq_push), .pop (creq_pop), .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}), - .data_out({creq_tag_st0, creq_tid_st0, creq_rw_st0, creq_byteen_st0, creq_addr_st0, creq_writeword_st0}), + .data_out({creq_tag_st0, creq_tid_st0, creq_rw_st0, creq_byteen_st0, creq_addr_unqual, creq_writeword_st0}), .empty (creq_empty), .full (creq_full), `UNUSED_PIN (size) @@ -179,7 +193,7 @@ module VX_bank #( wire mshr_valid_st0; wire [`REQS_BITS-1:0] mshr_tid_st0; wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_st0; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] mshr_wsel_st0; + wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel_st0; wire [`WORD_WIDTH-1:0] mshr_writeword_st0; wire [`REQ_TAG_WIDTH-1:0] mshr_tag_st0; wire mshr_rw_st0; @@ -190,9 +204,9 @@ module VX_bank #( wire is_mshr_st0; wire valid_st0; wire [`LINE_ADDR_WIDTH-1:0] addr_st0; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0; + wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0; wire [`WORD_WIDTH-1:0] writeword_st0; - wire [`BANK_LINE_WIDTH-1:0] writedata_st0; + wire [`CACHE_LINE_WIDTH-1:0] writedata_st0; wire [`REQ_TAG_WIDTH-1:0] tag_st0; wire mem_rw_st0; wire [WORD_SIZE-1:0] byteen_st0; @@ -202,12 +216,12 @@ module VX_bank #( wire is_mshr_st1; wire valid_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st1; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; + wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st1; wire [`WORD_WIDTH-1:0] readword_st1; - wire [`BANK_LINE_WIDTH-1:0] readdata_st1; - wire [BANK_LINE_SIZE-1:0] dirtyb_st1; + wire [`CACHE_LINE_WIDTH-1:0] readdata_st1; + wire [CACHE_LINE_SIZE-1:0] dirtyb_st1; wire [`WORD_WIDTH-1:0] writeword_st1; - wire [`BANK_LINE_WIDTH-1:0] writedata_st1; + wire [`CACHE_LINE_WIDTH-1:0] writedata_st1; wire [`TAG_SELECT_BITS-1:0] readtag_st1; wire miss_st1; wire force_miss_st1; @@ -227,18 +241,18 @@ module VX_bank #( wire valid_st12; wire writeen_st12; wire [`LINE_ADDR_WIDTH-1:0] addr_st12; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st12; + wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st12; wire [WORD_SIZE-1:0] byteen_st12; wire [`WORD_WIDTH-1:0] writeword_st12; wire [`REQ_TAG_WIDTH-1:0] tag_st12; wire valid_st2; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; + wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st2; wire [`WORD_WIDTH-1:0] readword_st2; wire [`WORD_WIDTH-1:0] writeword_st2; - wire [`BANK_LINE_WIDTH-1:0] readdata_st2; - wire [`BANK_LINE_WIDTH-1:0] writedata_st2; - wire [BANK_LINE_SIZE-1:0] dirtyb_st2; + wire [`CACHE_LINE_WIDTH-1:0] readdata_st2; + wire [`CACHE_LINE_WIDTH-1:0] writedata_st2; + wire [CACHE_LINE_SIZE-1:0] dirtyb_st2; wire [`TAG_SELECT_BITS-1:0] readtag_st2; wire is_fill_st2; wire is_mshr_st2; @@ -298,14 +312,15 @@ module VX_bank #( assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : drsq_pop_unqual ? drsq_addr_st0 : - creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] : + creq_pop_unqual ? creq_addr_st0 : 0; - if (`WORD_SELECT_WIDTH != 0) begin - assign wsel_st0 = creq_pop_unqual ? creq_addr_st0[`WORD_SELECT_WIDTH-1:0] : + if (`WORD_SELECT_BITS != 0) begin + assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_pop_unqual ? mshr_wsel_st0 : 0; end else begin + `UNUSED_VAR (creq_wsel_st0) `UNUSED_VAR (mshr_wsel_st0) assign wsel_st0 = 0; end @@ -349,7 +364,7 @@ if (DRAM_ENABLE) begin || (valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st0)); VX_pipe_register #( - .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg1 ( .clk (clk), @@ -379,10 +394,11 @@ if (DRAM_ENABLE) begin .CACHE_ID (CACHE_ID), .CORE_TAG_ID_BITS(CORE_TAG_ID_BITS), .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .WRITE_ENABLE (WRITE_ENABLE) + .WRITE_ENABLE (WRITE_ENABLE), + .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) ) tag_access ( .clk (clk), .reset (reset), @@ -454,7 +470,7 @@ end else begin assign wsel_st1 = wsel_st0; assign writeword_st1 = writeword_st0; assign writedata_st1 = writedata_st0; - assign addr_st1 = creq_addr_st0[`LINE_SELECT_ADDR_RNG]; + assign addr_st1 = creq_addr_st0; assign tag_st1 = tag_st0; assign mem_rw_st1 = mem_rw_st0; assign byteen_st1 = byteen_st0; @@ -482,7 +498,7 @@ end else begin end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg2 ( .clk (clk), @@ -500,7 +516,7 @@ end end else begin VX_pipe_register #( - .DATAW (BANK_LINE_SIZE + `BANK_LINE_WIDTH), + .DATAW (CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), .RESETW (0) ) pipe_reg2b ( .clk (clk), @@ -526,7 +542,7 @@ end .CACHE_ID (CACHE_ID), .CORE_TAG_ID_BITS(CORE_TAG_ID_BITS), .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .DRAM_ENABLE (DRAM_ENABLE), .WORD_SIZE (WORD_SIZE), @@ -595,7 +611,7 @@ end .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), - .BANK_LINE_SIZE (BANK_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), @@ -710,11 +726,11 @@ end wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; - wire [BANK_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st2 : {BANK_LINE_SIZE{1'b1}}; + wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st2 : {CACHE_LINE_SIZE{1'b1}}; if (DRAM_ENABLE) begin VX_fifo_queue #( - .DATAW (1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH), + .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (DREQ_SIZE), .BUFFERED (1), .FASTRAM (1) diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 29bad532..03cf8071 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -6,7 +6,7 @@ module VX_cache #( // Size of cache in bytes parameter CACHE_SIZE = 8092, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 16, + parameter CACHE_LINE_SIZE = 16, // Number of banks parameter NUM_BANKS = 4, // Size of a word in bytes @@ -42,7 +42,10 @@ module VX_cache #( parameter CORE_TAG_ID_BITS = 0, // dram request tag size - parameter DRAM_TAG_WIDTH = (32 - $clog2(BANK_LINE_SIZE)) + parameter DRAM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)), + + // bank offset from beginning of index range + parameter BANK_ADDR_OFFSET = 0 ) ( `SCOPE_IO_VX_cache @@ -72,20 +75,21 @@ module VX_cache #( // DRAM request output wire dram_req_valid, output wire dram_req_rw, - output wire [BANK_LINE_SIZE-1:0] dram_req_byteen, + output wire [CACHE_LINE_SIZE-1:0] dram_req_byteen, output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr, - output wire [`BANK_LINE_WIDTH-1:0] dram_req_data, + output wire [`CACHE_LINE_WIDTH-1:0] dram_req_data, output wire [DRAM_TAG_WIDTH-1:0] dram_req_tag, input wire dram_req_ready, // DRAM response input wire dram_rsp_valid, - input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, + input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data, input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready ); `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) + `UNUSED_VAR (dram_rsp_tag) wire [NUM_BANKS-1:0] per_bank_core_req_valid; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; @@ -104,9 +108,9 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_dram_req_valid; wire [NUM_BANKS-1:0] per_bank_dram_req_rw; - wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen; + wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_dram_req_byteen; wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr; - wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data; + wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_dram_req_data; wire [NUM_BANKS-1:0] per_bank_dram_req_ready; wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready; @@ -119,11 +123,12 @@ module VX_cache #( `endif VX_cache_core_req_bank_sel #( - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQS (NUM_REQS), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH) + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQS (NUM_REQS), + .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET) ) cache_core_req_bank_sel ( .clk (clk), .reset (reset), @@ -174,13 +179,13 @@ module VX_cache #( wire curr_bank_dram_req_valid; wire curr_bank_dram_req_rw; - wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen; + wire [CACHE_LINE_SIZE-1:0] curr_bank_dram_req_byteen; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr; - wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data; + wire[`CACHE_LINE_WIDTH-1:0] curr_bank_dram_req_data; wire curr_bank_dram_req_ready; wire curr_bank_dram_rsp_valid; - wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data; + wire [`CACHE_LINE_WIDTH-1:0] curr_bank_dram_rsp_data; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; wire curr_bank_dram_rsp_ready; @@ -208,7 +213,7 @@ module VX_cache #( if (NUM_BANKS == 1) begin assign per_bank_dram_req_addr[i] = curr_bank_dram_req_addr; end else begin - assign per_bank_dram_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_req_addr, i); + assign per_bank_dram_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_req_addr, i); end assign per_bank_dram_req_data[i] = curr_bank_dram_req_data; assign curr_bank_dram_req_ready = per_bank_dram_req_ready[i]; @@ -219,7 +224,7 @@ module VX_cache #( assign curr_bank_dram_rsp_addr = dram_rsp_tag; end else begin assign curr_bank_dram_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i); - assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag); + assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag); end assign curr_bank_dram_rsp_data = dram_rsp_data; assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; @@ -228,7 +233,7 @@ module VX_cache #( .BANK_ID (i), .CACHE_ID (CACHE_ID), .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), @@ -241,7 +246,8 @@ module VX_cache #( .WRITE_ENABLE (WRITE_ENABLE), .WRITE_THROUGH (WRITE_THROUGH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), + .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) ) bank ( `SCOPE_BIND_VX_cache_bank(i) @@ -309,14 +315,14 @@ module VX_cache #( ); if (DRAM_ENABLE) begin - wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH)-1:0] data_in; + wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; i++) begin assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]}; end VX_stream_arbiter #( .NUM_REQS (NUM_BANKS), - .DATAW (`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH), + .DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), .BUFFERED (1) ) dram_req_arb ( .clk (clk), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 469d36d7..36be1033 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -15,66 +15,55 @@ `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) // data metadata word_sel -`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH)) - -`define BANK_BITS `LOG2UP(NUM_BANKS) +`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_BITS)) `define WORD_WIDTH (8 * WORD_SIZE) -`define BANK_LINE_WIDTH (8 * BANK_LINE_SIZE) +`define CACHE_LINE_WIDTH (8 * CACHE_LINE_SIZE) -`define BANK_SIZE (CACHE_SIZE / NUM_BANKS) -`define BANK_LINE_COUNT (`BANK_SIZE / BANK_LINE_SIZE) -`define BANK_LINE_WORDS (BANK_LINE_SIZE / WORD_SIZE) +`define BANK_SIZE (CACHE_SIZE / NUM_BANKS) +`define LINES_PER_BANK (`BANK_SIZE / CACHE_LINE_SIZE) +`define WORDS_PER_LINE (CACHE_LINE_SIZE / WORD_SIZE) -// Offset select -`define OFFSET_ADDR_BITS `CLOG2(WORD_SIZE) -`define OFFSET_ADDR_START 0 -`define OFFSET_ADDR_END (`OFFSET_ADDR_START+`OFFSET_ADDR_BITS-1) +`define WORD_SELECT_BITS `CLOG2(`WORDS_PER_LINE) +`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE)) +`define DRAM_ADDR_WIDTH (32-`CLOG2(CACHE_LINE_SIZE)) +`define LINE_ADDR_WIDTH (`DRAM_ADDR_WIDTH-`BANK_SELECT_BITS) // Word select -`define WORD_SELECT_BITS `CLOG2(`BANK_LINE_WORDS) -`define WORD_SELECT_ADDR_START (1+`OFFSET_ADDR_END) +`define WORD_SELECT_BITS `CLOG2(`WORDS_PER_LINE) +`define WORD_SELECT_ADDR_START 0 `define WORD_SELECT_ADDR_END (`WORD_SELECT_ADDR_START+`WORD_SELECT_BITS-1) // Bank select `define BANK_SELECT_BITS `CLOG2(NUM_BANKS) -`define BANK_SELECT_ADDR_START (1+`WORD_SELECT_ADDR_END) +`define BANK_SELECT_ADDR_START (1+`WORD_SELECT_ADDR_END+BANK_ADDR_OFFSET) `define BANK_SELECT_ADDR_END (`BANK_SELECT_ADDR_START+`BANK_SELECT_BITS-1) // Line select -`define LINE_SELECT_BITS `CLOG2(`BANK_LINE_COUNT) +`define LINE_SELECT_BITS `CLOG2(`LINES_PER_BANK) `define LINE_SELECT_ADDR_START (1+`BANK_SELECT_ADDR_END) -`define LINE_SELECT_ADDR_END (`LINE_SELECT_ADDR_START+`LINE_SELECT_BITS-1) +`define LINE_SELECT_ADDR_END (`LINE_SELECT_ADDR_START-BANK_ADDR_OFFSET+`LINE_SELECT_BITS-1) // Tag select -`define TAG_SELECT_BITS (31-`LINE_SELECT_ADDR_END) +`define TAG_SELECT_BITS (`WORD_ADDR_WIDTH-1-`LINE_SELECT_ADDR_END) `define TAG_SELECT_ADDR_START (1+`LINE_SELECT_ADDR_END) -`define TAG_SELECT_ADDR_END 31 +`define TAG_SELECT_ADDR_END (`WORD_ADDR_WIDTH-1) -`define WORD_SELECT_WIDTH `CLOG2(`BANK_LINE_WORDS) +`define BANK_SELECT_ADDR(x) x[`BANK_SELECT_ADDR_END : `BANK_SELECT_ADDR_START] -`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE)) +`define LINE_SELECT_ADDR0(x) x[`WORD_ADDR_WIDTH-1 : `LINE_SELECT_ADDR_START] +`define LINE_SELECT_ADDRX(x) {x[`WORD_ADDR_WIDTH-1 : `LINE_SELECT_ADDR_START], x[`BANK_SELECT_ADDR_START-1 : 1+`WORD_SELECT_ADDR_END]} -`define DRAM_ADDR_WIDTH (32-`CLOG2(BANK_LINE_SIZE)) - -`define LINE_ADDR_WIDTH (`DRAM_ADDR_WIDTH-`BANK_SELECT_BITS) - -`define BANK_SELECT_ADDR_RNG (`BANK_SELECT_BITS+`WORD_SELECT_BITS-1):`WORD_SELECT_BITS - -`define LINE_SELECT_ADDR_RNG `WORD_ADDR_WIDTH-1:(`BANK_SELECT_BITS + `WORD_SELECT_BITS) - -`define TAG_LINE_ADDR_RNG `LINE_ADDR_WIDTH-1:`LINE_SELECT_BITS - -`define BASE_ADDR_BITS (`WORD_SELECT_BITS+`OFFSET_ADDR_BITS) +`define LINE_TAG_ADDR(x) x[`LINE_ADDR_WIDTH-1 : `LINE_SELECT_BITS] /////////////////////////////////////////////////////////////////////////////// `define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS) -`define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS-1:0] +`define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET] -`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1:`BANK_SELECT_BITS] +`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1 : `BANK_SELECT_BITS] `define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 12f0d997..5fc4c91b 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -1,8 +1,8 @@ `include "VX_cache_config.vh" -module VX_cache_core_req_bank_sel #( +module VX_cache_core_req_bank_sel #( // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE= 1, // Size of a word in bytes parameter WORD_SIZE = 1, // Number of banks @@ -10,7 +10,10 @@ module VX_cache_core_req_bank_sel #( // Number of Word requests per cycle parameter NUM_REQS = 1, // core request tag size - parameter CORE_TAG_WIDTH = 1 + parameter CORE_TAG_WIDTH = 1, + + // bank offset from beginning of index range + parameter BANK_ADDR_OFFSET = 0 ) ( input wire clk, input wire reset, @@ -33,22 +36,22 @@ module VX_cache_core_req_bank_sel #( output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag, output wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data, input wire [NUM_BANKS-1:0] per_bank_core_req_ready -); +); if (NUM_BANKS > 1) begin - reg [NUM_BANKS-1:0] per_bank_core_req_valid_r; - reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; - reg [NUM_BANKS-1:0] per_bank_core_req_rw_r; - reg [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; - reg [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; - reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; - reg [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; - reg [NUM_REQS-1:0] core_req_ready_r; - reg [NUM_BANKS-1:0] core_req_sel_r; - wire [NUM_REQS-1:0][`BANK_BITS-1:0] core_req_bid; + reg [NUM_BANKS-1:0] per_bank_core_req_valid_r; + reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; + reg [NUM_BANKS-1:0] per_bank_core_req_rw_r; + reg [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; + reg [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; + reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; + reg [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; + reg [NUM_REQS-1:0] core_req_ready_r; + reg [NUM_BANKS-1:0] core_req_sel_r; + wire [NUM_REQS-1:0][`BANK_SELECT_BITS-1:0] core_req_bid; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_bid[i] = core_req_addr[i][`BANK_SELECT_ADDR_RNG]; + assign core_req_bid[i] = `BANK_SELECT_ADDR(core_req_addr[i]); end always @(*) begin @@ -79,7 +82,7 @@ module VX_cache_core_req_bank_sel #( for (integer j = 0; j < NUM_BANKS; ++j) begin for (integer i = 0; i < NUM_REQS; ++i) begin - if (core_req_valid[i] && (core_req_bid[i] == `BANK_BITS'(j))) begin + if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin core_req_ready_r[i] = per_bank_core_req_ready[j]; core_req_sel_r[i] = 1; break; diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 7bc70fb9..4428679f 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -7,7 +7,7 @@ module VX_data_access #( // Size of cache in bytes parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks parameter NUM_BANKS = 1, // Size of a word in bytes @@ -44,29 +44,29 @@ module VX_data_access #( `IGNORE_WARNINGS_BEGIN input wire[`LINE_ADDR_WIDTH-1:0] raddr_in, `IGNORE_WARNINGS_END - input wire [`UP(`WORD_SELECT_WIDTH)-1:0] rwsel_in, + input wire [`UP(`WORD_SELECT_BITS)-1:0] rwsel_in, input wire [WORD_SIZE-1:0] rbyteen_in, output wire[`WORD_WIDTH-1:0] readword_out, - output wire [`BANK_LINE_WIDTH-1:0] readdata_out, - output wire [BANK_LINE_SIZE-1:0] dirtyb_out, + output wire [`CACHE_LINE_WIDTH-1:0] readdata_out, + output wire [CACHE_LINE_SIZE-1:0] dirtyb_out, // writing input wire writeen_in, `IGNORE_WARNINGS_BEGIN input wire[`LINE_ADDR_WIDTH-1:0] waddr_in, `IGNORE_WARNINGS_END - input wire [`UP(`WORD_SELECT_WIDTH)-1:0] wwsel_in, + input wire [`UP(`WORD_SELECT_BITS)-1:0] wwsel_in, input wire [WORD_SIZE-1:0] wbyteen_in, input wire wfill_in, input wire [`WORD_WIDTH-1:0] writeword_in, - input wire [`BANK_LINE_WIDTH-1:0] writedata_in + input wire [`CACHE_LINE_WIDTH-1:0] writedata_in ); - wire [BANK_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual; - wire [`BANK_LINE_WIDTH-1:0] read_data, readdata_qual; + wire [CACHE_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual; + wire [`CACHE_LINE_WIDTH-1:0] read_data, readdata_qual; - wire [BANK_LINE_SIZE-1:0] byte_enable; - wire [`BANK_LINE_WIDTH-1:0] write_data; + wire [CACHE_LINE_SIZE-1:0] byte_enable; + wire [`CACHE_LINE_WIDTH-1:0] write_data; wire write_enable; wire [`LINE_SELECT_BITS-1:0] raddr = raddr_in[`LINE_SELECT_BITS-1:0]; @@ -76,7 +76,7 @@ module VX_data_access #( VX_data_store #( .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .WRITE_ENABLE (WRITE_ENABLE) @@ -95,12 +95,12 @@ module VX_data_access #( .write_data (write_data) ); - wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] wbyteen_qual; - wire [`BANK_LINE_WIDTH-1:0] writeword_qual; + wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wbyteen_qual; + wire [`CACHE_LINE_WIDTH-1:0] writeword_qual; - if (`WORD_SELECT_WIDTH != 0) begin - for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_WIDTH'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; + if (`WORD_SELECT_BITS != 0) begin + for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin + assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; assign writeword_qual[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_in; end end else begin @@ -109,13 +109,13 @@ module VX_data_access #( assign writeword_qual = writeword_in; end - assign byte_enable = wfill_in ? {BANK_LINE_SIZE{1'b1}} : wbyteen_qual; + assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual; assign write_data = wfill_in ? writedata_in : writeword_qual; assign write_enable = writeen_in && !stall; wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in; - for (genvar i = 0; i < BANK_LINE_SIZE; i++) begin + for (genvar i = 0; i < CACHE_LINE_SIZE; i++) begin assign dirtyb_qual[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i]; assign readdata_qual[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8]; end @@ -129,7 +129,7 @@ module VX_data_access #( assign readdata_out = readdata_qual; end - if (`WORD_SELECT_WIDTH != 0) begin + if (`WORD_SELECT_BITS != 0) begin wire [`WORD_WIDTH-1:0] readword = readdata_qual[rwsel_in * `WORD_WIDTH +: `WORD_WIDTH]; for (genvar i = 0; i < WORD_SIZE; i++) begin assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{rbyteen_in[i]}}; diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index c65e8dbd..55d2f6be 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -4,7 +4,7 @@ module VX_data_store #( // Size of cache in bytes parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks parameter NUM_BANKS = 1, // Size of a word in bytes @@ -18,18 +18,18 @@ module VX_data_store #( input wire write_enable, input wire write_fill, - input wire[BANK_LINE_SIZE-1:0] byte_enable, + input wire[CACHE_LINE_SIZE-1:0] byte_enable, input wire[`LINE_SELECT_BITS-1:0] write_addr, - input wire[`BANK_LINE_WIDTH-1:0] write_data, + input wire[`CACHE_LINE_WIDTH-1:0] write_data, input wire[`LINE_SELECT_BITS-1:0] read_addr, - output wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] read_dirtyb, - output wire[`BANK_LINE_WIDTH-1:0] read_data + output wire[`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] read_dirtyb, + output wire[`CACHE_LINE_WIDTH-1:0] read_data ); `UNUSED_VAR (reset) if (WRITE_ENABLE) begin - reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; + reg [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] dirtyb[`LINES_PER_BANK-1:0]; always @(posedge clk) begin if (write_enable) begin dirtyb[write_addr] <= write_fill ? 0 : (dirtyb[write_addr] | byte_enable); @@ -43,9 +43,9 @@ module VX_data_store #( end VX_dp_ram #( - .DATAW(BANK_LINE_SIZE * 8), - .SIZE(`BANK_LINE_COUNT), - .BYTEENW(BANK_LINE_SIZE), + .DATAW(CACHE_LINE_SIZE * 8), + .SIZE(`LINES_PER_BANK), + .BYTEENW(CACHE_LINE_SIZE), .RWCHECK(1) ) data ( .clk(clk), diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 1c07d14d..aea35057 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -5,7 +5,7 @@ module VX_miss_resrv #( parameter BANK_ID = 0, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks parameter NUM_BANKS = 1, // Size of a word in bytes diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 5004562d..64e016b6 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -2,22 +2,21 @@ module VX_tag_access #( parameter CACHE_ID = 0, - parameter BANK_ID = 0, - + parameter BANK_ID = 0, // Size of cache in bytes parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1, - + parameter WORD_SIZE = 1, // Enable cache writeable parameter WRITE_ENABLE = 0, - // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0, + // bank offset from beginning of index range + parameter BANK_ADDR_OFFSET = 0 ) ( input wire clk, input wire reset, @@ -53,14 +52,15 @@ module VX_tag_access #( wire do_write; wire do_invalidate; - wire [`TAG_SELECT_BITS-1:0] addrtag = addr_in [`TAG_LINE_ADDR_RNG]; + wire [`TAG_SELECT_BITS-1:0] addrtag = `LINE_TAG_ADDR(addr_in); wire [`LINE_SELECT_BITS-1:0] addrline = addr_in [`LINE_SELECT_BITS-1:0]; VX_tag_store #( .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE) + .WORD_SIZE (WORD_SIZE), + .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_tag_store.v b/hw/rtl/cache/VX_tag_store.v index 57107f5c..7d9898cc 100644 --- a/hw/rtl/cache/VX_tag_store.v +++ b/hw/rtl/cache/VX_tag_store.v @@ -2,13 +2,15 @@ module VX_tag_store #( // Size of cache in bytes - parameter CACHE_SIZE = 1, + parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1 + parameter WORD_SIZE = 1, + // bank offset from beginning of index range + parameter BANK_ADDR_OFFSET = 0 ) ( input wire clk, input wire reset, @@ -24,12 +26,12 @@ module VX_tag_store #( output wire read_valid, output wire read_dirty ); - reg [`BANK_LINE_COUNT-1:0] dirty; - reg [`BANK_LINE_COUNT-1:0] valid; + reg [`LINES_PER_BANK-1:0] dirty; + reg [`LINES_PER_BANK-1:0] valid; always @(posedge clk) begin if (reset) begin - for (integer i = 0; i < `BANK_LINE_COUNT; i++) begin + for (integer i = 0; i < `LINES_PER_BANK; i++) begin valid[i] <= 0; dirty[i] <= 0; end @@ -47,7 +49,7 @@ module VX_tag_store #( VX_dp_ram #( .DATAW(`TAG_SELECT_BITS), - .SIZE(`BANK_LINE_COUNT), + .SIZE(`LINES_PER_BANK), .FASTRAM(1), .RWCHECK(1) ) tags ( diff --git a/hw/unit_tests/cache/Makefile b/hw/unit_tests/cache/Makefile index d909e56e..bf22bcbc 100644 --- a/hw/unit_tests/cache/Makefile +++ b/hw/unit_tests/cache/Makefile @@ -1,4 +1,4 @@ -PARAM += -DCACHE_SIZE=4096 -DWORD_SIZE=4 -DBANK_LINE_SIZE=16 -DNUM_BANKS=4 -DCREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4 +PARAM += -DCACHE_SIZE=4096 -DWORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DNUM_BANKS=4 -DCREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4