From 90b50277d07ed6aabd1725d774a2d80cb9e6e9e9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Aug 2021 18:33:49 -0700 Subject: [PATCH] cache multi-porting fixes + optimization --- ci/regression.sh | 16 +++++--- hw/rtl/VX_config.vh | 2 +- hw/rtl/VX_define.vh | 4 +- hw/rtl/cache/VX_bank.v | 66 ++++++++++++++++++-------------- hw/rtl/cache/VX_cache.v | 66 +++++++++++++++++++------------- hw/rtl/cache/VX_core_rsp_merge.v | 2 +- hw/rtl/cache/VX_data_access.v | 4 +- hw/rtl/cache/VX_nc_bypass.v | 9 ++++- 8 files changed, 100 insertions(+), 69 deletions(-) diff --git a/ci/regression.sh b/ci/regression.sh index 11f5d21a..7821d626 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -46,8 +46,8 @@ debug() { echo "begin debugging tests..." -./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1" -./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1" +./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1" +./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1" ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1" echo "debugging tests done!" @@ -72,14 +72,18 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood # using FPNEW FPU core FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood +# adjust l1 block size to match l2 +CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1" + # test cache banking -CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo -CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo +CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr +CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr # test cache multi-porting CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr -CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr -CONFIGS="-DL2NUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr +CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1" +CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr +CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr # test 128-bit MEM block CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 5124116e..1d16c7f7 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -38,7 +38,7 @@ `endif `ifndef L1_BLOCK_SIZE -`define L1_BLOCK_SIZE (`NUM_THREADS * 4) +`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? (`NUM_THREADS * 4) : `MEM_BLOCK_SIZE) `endif `ifndef STARTUP_ADDR diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 94287911..87f0001d 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -251,7 +251,7 @@ `define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0) // Block size in bytes -`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE) +`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE // Word size in bytes `define IWORD_SIZE 4 @@ -289,7 +289,7 @@ `define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1) // Block size in bytes -`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE) +`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE // Word size in bytes `define DWORD_SIZE 4 diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 848a4278..f5497e0c 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -77,6 +77,7 @@ module VX_bank #( // Memory request output wire mem_req_valid, output wire mem_req_rw, + output wire [NUM_PORTS-1:0] mem_req_pmask, output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen, output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel, output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr, @@ -161,6 +162,7 @@ module VX_bank #( wire crsq_valid, crsq_ready, crsq_stall; wire mreq_alm_full; + // prevent read-during-write hazard when accessing tags/data block RAMs wire rdw_fill_hazard = valid_st0 && is_fill_st0; wire rdw_write_hazard = valid_st0 && write_st0 && ~creq_rw; @@ -174,14 +176,14 @@ module VX_bank #( wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable; wire mshr_ready = mshr_grant - && !rdw_fill_hazard // prevent read-during-write + && !rdw_fill_hazard // prevent read-during-write hazard && !crsq_stall; // ensure core response ready assign mem_rsp_ready = mrsq_grant && !crsq_stall; // ensure core response ready assign creq_ready = creq_grant - && !rdw_write_hazard // prevent read-during-write + && !rdw_write_hazard // prevent read-during-write hazard && !mreq_alm_full // ensure memory request ready && !mshr_alm_full // ensure mshr enqueue ready && !crsq_stall; // ensure core response ready @@ -198,6 +200,12 @@ module VX_bank #( end `endif + wire [`CACHE_LINE_WIDTH-1:0] wdata_sel; + assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data; + for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin + assign wdata_sel[i] = mem_rsp_data[i]; + end + VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH), .RESETW (1) @@ -212,7 +220,7 @@ module VX_bank #( mshr_enable, creq_fire && creq_rw, mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)), - (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : `CACHE_LINE_WIDTH'(creq_data), + wdata_sel, mshr_enable ? mshr_wsel : creq_wsel, creq_byteen, mshr_enable ? mshr_tid : creq_tid, @@ -265,6 +273,8 @@ module VX_bank #( // we have a core request hit assign miss_st0 = !is_fill_st0 && !tag_match_st0; + wire read_st0 = !is_fill_st0 && !write_st0; + VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1), .RESETW (1) @@ -302,19 +312,22 @@ module VX_bank #( if (`WORDS_PER_LINE > 1) begin reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r; reg [CACHE_LINE_SIZE-1:0] line_byteen_r; - always @(*) begin - line_wdata_r = 'x; - line_byteen_r = 0; - if (NUM_PORTS > 1) begin - for (integer p = 0; p < NUM_PORTS; p++) begin - if (creq_pmask[p]) begin - line_wdata_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[p]; - line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p]; + if (NUM_PORTS > 1) begin + always @(*) begin + line_wdata_r = 'x; + line_byteen_r = 0; + for (integer i = 0; i < NUM_PORTS; ++i) begin + if (pmask_st1[i]) begin + line_wdata_r[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[i]; + line_byteen_r[wsel_st1[i] * WORD_SIZE +: WORD_SIZE] = byteen_st1[i]; end end - end else begin + end + end else begin + always @(*) begin line_wdata_r = {`WORDS_PER_LINE{creq_data_st1}}; - line_byteen_r[wsel_st1[0] * WORD_SIZE +: WORD_SIZE] = byteen_st1[0]; + line_byteen_r = 0; + line_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1; end end assign line_wdata_st1 = line_wdata_r; @@ -360,8 +373,8 @@ module VX_bank #( wire mshr_allocate = creq_fire && ~creq_rw; wire mshr_replay = do_fill_st0 && ~crsq_stall; - wire mshr_lookup = valid_st0 && ~write_st0 && ~is_mshr_st0 && ~crsq_stall; - wire mshr_release = valid_st1 && read_st1 && ~is_mshr_st1 && ~miss_st1 && ~crsq_stall; + wire mshr_lookup = valid_st0 && read_st0 && !is_mshr_st0 && !crsq_stall; + wire mshr_release = valid_st1 && read_st1 && !is_mshr_st1 && !miss_st1 && !crsq_stall; wire mshr_not_full; @@ -435,15 +448,15 @@ module VX_bank #( assign crsq_tag = tag_st1; if (`WORDS_PER_LINE > 1) begin - for (genvar p = 0; p < NUM_PORTS; ++p) begin - assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH]; + for (genvar i = 0; i < NUM_PORTS; ++i) begin + assign crsq_data[i] = rdata_st1[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH]; end end else begin assign crsq_data = rdata_st1; end VX_elastic_buffer #( - .DATAW ((CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), + .DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)), .SIZE (CRSQ_SIZE), .OUTPUT_REG (1 == NUM_BANKS) ) core_rsp_req ( @@ -462,6 +475,7 @@ module VX_bank #( wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data; wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen; wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel; + wire [NUM_PORTS-1:0] mreq_pmask; wire [`LINE_ADDR_WIDTH-1:0] mreq_addr; wire [MSHR_ADDR_WIDTH-1:0] mreq_id; @@ -474,19 +488,13 @@ module VX_bank #( assign mreq_rw = WRITE_ENABLE && write_st1; assign mreq_addr = addr_st1; assign mreq_id = mshr_id_st1; + assign mreq_pmask= pmask_st1; assign mreq_wsel = wsel_st1; + assign mreq_byteen = byteen_st1; assign mreq_data = creq_data_st1; - if (NUM_PORTS > 1) begin - for (genvar p = 0; p < NUM_PORTS; ++p) begin - assign mreq_byteen[p] = pmask_st1[p] ? byteen_st1[p] : WORD_SIZE'(0); - end - end else begin - assign mreq_byteen[0] = byteen_st1[0]; - end - VX_fifo_queue #( - .DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)), + .DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)), .SIZE (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-2) ) mem_req_queue ( @@ -494,8 +502,8 @@ module VX_bank #( .reset (reset), .push (mreq_push), .pop (mreq_pop), - .data_in ({mreq_rw, mreq_addr, mreq_id, mreq_byteen, mreq_wsel, mreq_data}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}), + .data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}), .empty (mreq_empty), .alm_full (mreq_alm_full), `UNUSED_PIN (full), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index eca019bc..e36155eb 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -107,34 +107,41 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// - wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p; + wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p; + wire [NUM_PORTS-1:0] mem_req_pmask_p; wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p; wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p; - wire mem_req_rw_p; + wire mem_req_rw_p; if (WRITE_ENABLE) begin + if (`WORDS_PER_LINE > 1) begin + reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r; + reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r; - reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r; - reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r; - - always @(*) begin - mem_req_byteen_r = 0; - mem_req_data_r = 'x; - for (integer p = 0; p < NUM_PORTS; ++p) begin - if (mem_req_byteen_p[p] != 0) begin - mem_req_byteen_r[mem_req_wsel_p[p] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[p]; - mem_req_data_r[mem_req_wsel_p[p] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[p]; + always @(*) begin + mem_req_byteen_r = 0; + mem_req_data_r = 'x; + for (integer i = 0; i < NUM_PORTS; ++i) begin + if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin + mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i]; + mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i]; + end end end + + assign mem_req_rw = mem_req_rw_p; + assign mem_req_byteen = mem_req_byteen_r; + assign mem_req_data = mem_req_data_r; + end else begin + `UNUSED_VAR (mem_req_pmask_p) + `UNUSED_VAR (mem_req_wsel_p) + assign mem_req_rw = mem_req_rw_p; + assign mem_req_byteen = mem_req_byteen_p; + assign mem_req_data = mem_req_data_p; end - - assign mem_req_rw = mem_req_rw_p; - assign mem_req_byteen = mem_req_byteen_r; - assign mem_req_data = mem_req_data_r; - end else begin - `UNUSED_VAR (mem_req_byteen_p) + `UNUSED_VAR (mem_req_pmask_p) `UNUSED_VAR (mem_req_wsel_p) `UNUSED_VAR (mem_req_data_p) `UNUSED_VAR (mem_req_rw_p) @@ -142,7 +149,6 @@ module VX_cache #( assign mem_req_rw = 0; assign mem_req_byteen = 'x; assign mem_req_data = 'x; - end @@ -169,7 +175,8 @@ module VX_cache #( wire mem_req_valid_nc; wire mem_req_rw_nc; wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc; - wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc; + wire [NUM_PORTS-1:0] mem_req_pmask_nc; + wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc; wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc; wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc; wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc; @@ -236,6 +243,7 @@ module VX_cache #( .mem_req_valid_in (mem_req_valid_nc), .mem_req_rw_in (mem_req_rw_nc), .mem_req_addr_in (mem_req_addr_nc), + .mem_req_pmask_in (mem_req_pmask_nc), .mem_req_byteen_in (mem_req_byteen_nc), .mem_req_wsel_in (mem_req_wsel_nc), .mem_req_data_in (mem_req_data_nc), @@ -246,6 +254,7 @@ module VX_cache #( .mem_req_valid_out (mem_req_valid), .mem_req_addr_out (mem_req_addr), .mem_req_rw_out (mem_req_rw_p), + .mem_req_pmask_out (mem_req_pmask_p), .mem_req_byteen_out (mem_req_byteen_p), .mem_req_wsel_out (mem_req_wsel_p), .mem_req_data_out (mem_req_data_p), @@ -282,6 +291,7 @@ module VX_cache #( assign mem_req_valid = mem_req_valid_nc; assign mem_req_addr = mem_req_addr_nc; assign mem_req_rw_p = mem_req_rw_nc; + assign mem_req_pmask_p = mem_req_pmask_nc; assign mem_req_byteen_p = mem_req_byteen_nc; assign mem_req_wsel_p = mem_req_wsel_nc; assign mem_req_data_p = mem_req_data_nc; @@ -360,7 +370,8 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_mem_req_valid; wire [NUM_BANKS-1:0] per_bank_mem_req_rw; - wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel; wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; @@ -433,6 +444,7 @@ module VX_cache #( wire curr_bank_mem_req_valid; wire curr_bank_mem_req_rw; + wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask; wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen; wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; @@ -469,6 +481,7 @@ module VX_cache #( // Memory request assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid; assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw; + assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask; assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen; assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel; if (NUM_BANKS == 1) begin @@ -547,6 +560,7 @@ module VX_cache #( // Memory request .mem_req_valid (curr_bank_mem_req_valid), .mem_req_rw (curr_bank_mem_req_rw), + .mem_req_pmask (curr_bank_mem_req_pmask), .mem_req_byteen (curr_bank_mem_req_byteen), .mem_req_wsel (curr_bank_mem_req_wsel), .mem_req_addr (curr_bank_mem_req_addr), @@ -591,9 +605,9 @@ module VX_cache #( .core_rsp_ready (core_rsp_ready_nc) ); - wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in; - for (genvar i = 0; i < NUM_BANKS; i++) begin - assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]}; + wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in; + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]}; end wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; @@ -602,7 +616,7 @@ module VX_cache #( VX_stream_arbiter #( .NUM_REQS (NUM_BANKS), - .DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)), + .DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)), .BUFFERED (1) ) mem_req_arb ( .clk (clk), @@ -611,7 +625,7 @@ module VX_cache #( .data_in (data_in), .ready_in (per_bank_mem_req_ready), .valid_out (mem_req_valid_nc), - .data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}), + .data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}), .ready_out (mem_req_ready_nc) ); diff --git a/hw/rtl/cache/VX_core_rsp_merge.v b/hw/rtl/cache/VX_core_rsp_merge.v index 6fe84690..826cbb86 100644 --- a/hw/rtl/cache/VX_core_rsp_merge.v +++ b/hw/rtl/cache/VX_core_rsp_merge.v @@ -237,7 +237,7 @@ module VX_core_rsp_merge #( core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i]; core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; - bank_select_table[per_bank_core_rsp_tid[i][i]] = (1 << i); + bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i); end end end diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 249d2eaf..36f33938 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -73,7 +73,7 @@ module VX_data_access #( .BYTEENW (BYTEENW), .NO_RWCHECK (1) ) data_store ( - .clk (clk), + .clk (clk), .addr (line_addr), .wren (wren), .wdata (wdata), @@ -89,7 +89,7 @@ module VX_data_access #( if (is_fill) begin dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data); end else begin - dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, write_data); + dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, wren, line_addr, write_data); end end if (readen && ~stall) begin diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index f1e19df7..9ab575e2 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -59,7 +59,8 @@ module VX_nc_bypass #( input wire mem_req_valid_in, input wire mem_req_rw_in, input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in, - input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in, + input wire [NUM_PORTS-1:0] mem_req_pmask_in, + input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in, input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in, input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in, input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in, @@ -69,6 +70,7 @@ module VX_nc_bypass #( output wire mem_req_valid_out, output wire mem_req_rw_out, output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out, + output wire [NUM_PORTS-1:0] mem_req_pmask_out, output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out, output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out, output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out, @@ -188,7 +190,7 @@ module VX_nc_bypass #( assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel; assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH]; - if (D != 0) begin + if (D != 0) begin reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r; reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r; reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; @@ -206,12 +208,15 @@ module VX_nc_bypass #( mem_req_data_in_r[0] = core_req_data_in_sel; end + assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1); assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r; assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r; assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r; assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel}); end else begin `UNUSED_VAR (mem_req_wsel_in) + `UNUSED_VAR (mem_req_pmask_in) + assign mem_req_pmask_out = 0; assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel; assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel; assign mem_req_wsel_out = 0;