From 6525dff158081230ba4916e5ade26534b4c5b7e6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Aug 2021 15:59:33 -0700 Subject: [PATCH] fixed no shared memory bug, fixed cache debug log --- hw/rtl/VX_icache_stage.v | 2 +- hw/rtl/VX_lsu_unit.v | 30 ++++----- hw/rtl/VX_mem_unit.v | 5 +- hw/rtl/cache/VX_bank.v | 12 ++-- hw/rtl/cache/VX_cache.v | 62 +++++++++--------- hw/rtl/cache/VX_cache_define.vh | 14 +++-- hw/rtl/cache/VX_nc_bypass.v | 107 +++++++++++++++++++++++--------- hw/rtl/cache/VX_shared_mem.v | 8 +-- 8 files changed, 144 insertions(+), 96 deletions(-) diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 4e099d51..537d759a 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -55,7 +55,7 @@ module VX_icache_stage #( assign ifetch_req_if.ready = icache_req_if.ready; `ifdef DBG_CACHE_REQ_INFO - assign icache_req_if.tag = {ifetch_req_if.PC, ifetch_req_if.wid, req_tag}; + assign icache_req_if.tag = {ifetch_req_if.wid, ifetch_req_if.PC, req_tag}; `else assign icache_req_if.tag = req_tag; `endif diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 540cb03f..a0d986dd 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -120,9 +120,11 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; + wire dcache_req_fire_any = (| dcache_req_fire); + wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready; - wire mbuf_push = (| dcache_req_fire) + wire mbuf_push = dcache_req_fire_any && is_req_start // first submission only && req_wb; // loads only @@ -228,7 +230,7 @@ module VX_lsu_unit #( assign dcache_req_if.data[i] = mem_req_data; `ifdef DBG_CACHE_REQ_INFO - assign dcache_req_if.tag[i] = {req_pc, req_wid, req_tag, req_addr_type[i]}; + assign dcache_req_if.tag[i] = {req_wid, req_pc, req_tag, req_addr_type[i]}; `else assign dcache_req_if.tag[i] = {req_tag, req_addr_type[i]}; `endif @@ -333,26 +335,26 @@ module VX_lsu_unit #( if (lsu_req_if.valid && fence_wait) begin $display("%t: *** D$%0d fence wait", $time, CORE_ID); end - if ((| dcache_req_fire)) begin + if (dcache_req_fire_any) begin if (dcache_req_if.rw[0]) begin $write("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire); `PRINT_ARRAY1D(req_addr, `NUM_THREADS); - $write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen); - `PRINT_ARRAY1D(req_addr_type, `NUM_THREADS); - $write(", data="); - `PRINT_ARRAY1D(dcache_req_if.data, `NUM_THREADS); - $write("\n"); + $write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen); + `PRINT_ARRAY1D(req_addr_type, `NUM_THREADS); + $write(", data="); + `PRINT_ARRAY1D(dcache_req_if.data, `NUM_THREADS); + $write("\n"); end else begin - $write("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire); + $write("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire); `PRINT_ARRAY1D(req_addr, `NUM_THREADS); - $write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen); - `PRINT_ARRAY1D(req_addr_type, `NUM_THREADS); - $write(", rd=%0d, is_dup=%b\n", req_rd, req_is_dup); + $write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen); + `PRINT_ARRAY1D(req_addr_type, `NUM_THREADS); + $write(", rd=%0d, is_dup=%b\n", req_rd, req_is_dup); end end if (dcache_rsp_fire) begin - $write("%t: D$%0d Rsp: tmask=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=", - $time, CORE_ID, dcache_rsp_if.tmask, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd); + $write("%t: D$%0d Rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=", + $time, CORE_ID, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd); `PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS); $write(", is_dup=%b\n", rsp_is_dup); end diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 3c26f6c7..7cb071db 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -287,13 +287,14 @@ module VX_mem_unit # ( assign dcache_req_tmp_if.byteen = dcache_req_if.byteen; assign dcache_req_tmp_if.data = dcache_req_if.data; assign dcache_req_tmp_if.tag = dcache_req_if.tag; - assign dcache_req_tmp_if.ready = dcache_req_if.ready; + assign dcache_req_if.ready = dcache_req_tmp_if.ready; // D-cache to core reponse assign dcache_rsp_if.valid = dcache_rsp_tmp_if.valid; + assign dcache_rsp_if.tmask = dcache_rsp_tmp_if.tmask; assign dcache_rsp_if.tag = dcache_rsp_tmp_if.tag; assign dcache_rsp_if.data = dcache_rsp_tmp_if.data; - assign dcache_rsp_if.ready = dcache_rsp_tmp_if.ready; + assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready; end wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag); diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 6e8a89fb..e149340c 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -200,9 +200,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_sel, debug_wid_sel} = mshr_enable ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[`CACHE_REQ_INFO_RNG] : creq_tag[`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_sel, debug_wid_sel} = 0; + assign {debug_wid_sel, debug_pc_sel} = 0; end `endif @@ -253,9 +253,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0, debug_wid_st0} = tag_st0[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_wid_st0, debug_pc_st0} = tag_st0[`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_st0, debug_wid_st0} = 0; + assign {debug_wid_st0, debug_pc_st0} = 0; end `endif @@ -322,9 +322,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st1, debug_wid_st1} = tag_st1[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_wid_st1, debug_pc_st1} = tag_st1[`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_st1, debug_wid_st1} = 0; + assign {debug_wid_st1, debug_pc_st1} = 0; end `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 5a4c609c..9637e0b6 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -91,6 +91,9 @@ module VX_cache #( `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) `STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value")) + localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE; + localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS; + `ifdef PERF_ENABLE wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_write_miss_per_bank; @@ -106,14 +109,14 @@ module VX_cache #( wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_nc; wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_nc; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_nc; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_nc; + wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_nc; wire [NUM_REQS-1:0] core_req_ready_nc; // Core response wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_nc; wire [NUM_REQS-1:0] core_rsp_tmask_nc; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_nc; - wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_nc; + wire [`CORE_RSP_TAGS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_nc; wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_nc; // Memory request @@ -122,28 +125,29 @@ module VX_cache #( wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc; wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc; wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_nc; + wire [`MEM_ADDR_WIDTH-1:0] mem_req_tag_nc; wire mem_req_ready_nc; // Memory response wire mem_rsp_valid_nc; wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc; - wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_nc; + wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc; wire mem_rsp_ready_nc; if (NC_ENABLE) begin VX_nc_bypass #( - .NUM_REQS (NUM_REQS), - .NUM_RSP_TAGS (`CORE_RSP_TAGS), - .NC_TAG_BIT (0), + .NUM_REQS (NUM_REQS), + .NUM_RSP_TAGS (`CORE_RSP_TAGS), + .NC_TAG_BIT (0), - .CORE_ADDR_WIDTH(`WORD_ADDR_WIDTH), - .CORE_DATA_SIZE (WORD_SIZE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_ADDR_WIDTH (`WORD_ADDR_WIDTH), + .CORE_DATA_SIZE (WORD_SIZE), + .CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH), - .MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH), - .MEM_DATA_SIZE (CACHE_LINE_SIZE), - .MEM_TAG_WIDTH (MEM_TAG_WIDTH) + .MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH), + .MEM_DATA_SIZE (CACHE_LINE_SIZE), + .MEM_TAG_IN_WIDTH (`MEM_ADDR_WIDTH), + .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH) ) nc_bypass ( .clk (clk), .reset (reset), @@ -242,12 +246,9 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual; - wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc_a, mem_rsp_tag_qual; + wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_qual; wire mrsq_out_valid, mrsq_out_ready; - - // trim out shared memory and non-cacheable flags - assign mem_rsp_tag_nc_a = mem_rsp_tag_nc[NC_ENABLE +: `MEM_ADDR_WIDTH]; VX_elastic_buffer #( .DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH), @@ -258,7 +259,7 @@ module VX_cache #( .reset (reset), .ready_in (mem_rsp_ready_nc), .valid_in (mem_rsp_valid_nc), - .data_in ({mem_rsp_tag_nc_a, mem_rsp_data_nc}), + .data_in ({mem_rsp_tag_nc, mem_rsp_data_nc}), .data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}), .ready_out (mrsq_out_ready), .valid_out (mrsq_out_valid) @@ -292,14 +293,14 @@ module VX_cache #( wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire [NUM_BANKS-1:0] per_bank_core_req_rw; wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; - wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; - wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; + wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; wire [NUM_BANKS-1:0] per_bank_mem_req_valid; @@ -325,7 +326,7 @@ module VX_cache #( .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_TAG_WIDTH (CORE_TAG_X_WIDTH), .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET) ) core_req_bank_sel ( .clk (clk), @@ -363,14 +364,14 @@ module VX_cache #( wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid; wire curr_bank_core_req_rw; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr; - wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; + wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag; wire curr_bank_core_req_ready; wire curr_bank_core_rsp_valid; wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask; wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data; wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid; - wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; + wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag; wire curr_bank_core_rsp_ready; wire curr_bank_mem_req_valid; @@ -442,8 +443,8 @@ module VX_cache #( .MSHR_SIZE (MSHR_SIZE), .MREQ_SIZE (MREQ_SIZE), .WRITE_ENABLE (WRITE_ENABLE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), + .CORE_TAG_WIDTH (CORE_TAG_X_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_X_BITS), .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) ) bank ( `SCOPE_BIND_VX_cache_bank(i) @@ -504,8 +505,8 @@ module VX_cache #( .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) + .CORE_TAG_WIDTH (CORE_TAG_X_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_X_BITS) ) core_rsp_merge ( .clk (clk), .reset (reset), @@ -542,12 +543,7 @@ module VX_cache #( .ready_out (mem_req_ready_nc) ); - // build memory tag adding non-cacheable flag - if (NC_ENABLE) begin - assign mem_req_tag_nc = MEM_TAG_WIDTH'({mem_req_addr_nc, 1'b0}); - end else begin - assign mem_req_tag_nc = MEM_TAG_WIDTH'(mem_req_addr_nc); - end + assign mem_req_tag_nc = mem_req_addr_nc; `ifdef PERF_ENABLE // per cycle: core_reads, core_writes diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index 17b37a12..5ca9e80d 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -1,5 +1,5 @@ -`ifndef VX_CACHE_CONFIG -`define VX_CACHE_CONFIG +`ifndef VX_CACHE_DEFINE +`define VX_CACHE_DEFINE `include "VX_platform.vh" @@ -51,20 +51,22 @@ `define LINE_TAG_ADDR(x) x[`LINE_ADDR_WIDTH-1 : `LINE_SELECT_BITS] +`define CACHE_REQ_INFO_RNG CORE_TAG_WIDTH-1:(CORE_TAG_WIDTH-`NW_BITS-32) + /////////////////////////////////////////////////////////////////////////////// `define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS) `define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS) -`define MEM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET] +`define MEM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET] -`define MEM_TO_LINE_ADDR(x) x[`MEM_ADDR_WIDTH-1 : `BANK_SELECT_BITS] +`define MEM_TO_LINE_ADDR(x) x[`MEM_ADDR_WIDTH-1 : `BANK_SELECT_BITS] -`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} +`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} `define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))} `define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)} -`endif +`endif \ No newline at end of file diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index aad20de7..a1cf3156 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -7,14 +7,16 @@ module VX_nc_bypass #( parameter CORE_ADDR_WIDTH = 1, parameter CORE_DATA_SIZE = 1, - parameter CORE_TAG_WIDTH = 1, + parameter CORE_TAG_IN_WIDTH = 1, parameter MEM_ADDR_WIDTH = 1, parameter MEM_DATA_SIZE = 1, - parameter MEM_TAG_WIDTH = 1, + parameter MEM_TAG_IN_WIDTH = 1, + parameter MEM_TAG_OUT_WIDTH = 1, - parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8, - parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8 + localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8, + localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8, + localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1 ) ( input wire clk, input wire reset, @@ -25,7 +27,7 @@ module VX_nc_bypass #( input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in, input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in, input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in, - input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_in, + input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in, output wire [NUM_REQS-1:0] core_req_ready_in, // Core request out @@ -34,21 +36,21 @@ module VX_nc_bypass #( output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out, output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out, output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out, - output wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_out, + output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out, input wire [NUM_REQS-1:0] core_req_ready_out, // Core response in input wire [NUM_RSP_TAGS-1:0] core_rsp_valid_in, input wire [NUM_REQS-1:0] core_rsp_tmask_in, input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in, - input wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in, + input wire [NUM_RSP_TAGS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in, output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in, // Core response out output wire [NUM_RSP_TAGS-1:0] core_rsp_valid_out, output wire [NUM_REQS-1:0] core_rsp_tmask_out, output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out, - output wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out, + output wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out, input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out, // Memory request in @@ -57,7 +59,7 @@ module VX_nc_bypass #( input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in, input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in, input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in, - input wire [MEM_TAG_WIDTH-1:0] mem_req_tag_in, + input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in, output wire mem_req_ready_in, // Memory request out @@ -66,19 +68,19 @@ module VX_nc_bypass #( output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out, output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out, output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out, - output wire [MEM_TAG_WIDTH-1:0] mem_req_tag_out, + output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out, input wire mem_req_ready_out, // Memory response in input wire mem_rsp_valid_in, input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in, - input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_in, + input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in, output wire mem_rsp_ready_in, // Memory response out output wire mem_rsp_valid_out, output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out, - output wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_out, + output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out, input wire mem_rsp_ready_out ); `STATIC_ASSERT((NUM_RSP_TAGS == 1 || NUM_RSP_TAGS == NUM_REQS), ("invalid paramter")) @@ -87,6 +89,7 @@ module VX_nc_bypass #( `UNUSED_VAR (reset) localparam CORE_REQ_TIDW = $clog2(NUM_REQS); + localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1; localparam CORE_LDATAW = $clog2(CORE_DATA_WIDTH); localparam MEM_LDATAW = $clog2(MEM_DATA_WIDTH); @@ -121,7 +124,17 @@ module VX_nc_bypass #( assign core_req_addr_out = core_req_addr_in; assign core_req_byteen_out = core_req_byteen_in; assign core_req_data_out = core_req_data_in; - assign core_req_tag_out = core_req_tag_in; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + VX_bits_remove #( + .N (CORE_TAG_IN_WIDTH), + .S (1), + .POS (NC_TAG_BIT) + ) core_req_tag_remove ( + .data_in (core_req_tag_in[i]), + .data_out (core_req_tag_out[i]) + ); + end if (NUM_REQS > 1) begin for (genvar i = 0; i < NUM_REQS; ++i) begin @@ -138,21 +151,33 @@ module VX_nc_bypass #( assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid; assign mem_req_ready_in = mem_req_ready_out; + wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_nc; + + VX_bits_insert #( + .N (MEM_TAG_IN_WIDTH), + .S (1), + .POS (NC_TAG_BIT) + ) mem_req_tag_insert ( + .data_in (mem_req_tag_in), + .sel_in ('0), + .data_out (mem_req_tag_in_nc) + ); + if (NUM_REQS > 1) begin - wire [CORE_TAG_WIDTH-1:0] core_req_tag_in_sel; + wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel; wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel; wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel; wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel; wire core_req_rw_in_sel; - wire [NUM_REQS-1:0][(CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1)-1:0] core_req_nc_mux_in; + wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]}; end VX_onehot_mux #( - .DATAW (CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1), + .DATAW (MUX_DATAW), .N (NUM_REQS) ) core_req_nc_mux ( .data_in (core_req_nc_mux_in), @@ -176,10 +201,10 @@ module VX_nc_bypass #( mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel; end assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r; - assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel}); + assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel}); end else begin assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel; - assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in_sel}); + assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel}); end end else begin `UNUSED_VAR (core_req_nc_tid) @@ -200,19 +225,33 @@ module VX_nc_bypass #( mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in; end assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r; - assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in}); + assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({req_addr_idx, core_req_tag_in}); end else begin assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in; - assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'(core_req_tag_in); + assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'(core_req_tag_in); end end // core response handling + wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_unqual; + wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT]; + for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin + VX_bits_insert #( + .N (CORE_TAG_OUT_WIDTH), + .S (1), + .POS (NC_TAG_BIT) + ) core_rsp_tag_insert ( + .data_in (core_rsp_tag_in[i]), + .sel_in ('0), + .data_out (core_rsp_tag_out_unqual[i]) + ); + end + if (NUM_RSP_TAGS > 1) begin - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW]; reg [NUM_REQS-1:0] rsp_nc_valid_r; always @(*) begin rsp_nc_valid_r = 0; @@ -224,7 +263,7 @@ module VX_nc_bypass #( assign core_rsp_ready_in = core_rsp_ready_out; if (D != 0) begin - wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D]; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; @@ -236,15 +275,15 @@ module VX_nc_bypass #( end for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in[i] : mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; - end + assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_unqual[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0]; + end end else begin assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc; - assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_in : mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; + assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_unqual : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0]; assign core_rsp_ready_in = core_rsp_ready_out; if (NUM_REQS > 1) begin - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW]; reg [NUM_REQS-1:0] core_rsp_tmask_in_r; always @(*) begin core_rsp_tmask_in_r = 0; @@ -256,7 +295,7 @@ module VX_nc_bypass #( end if (D != 0) begin - wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D]; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_data_out[i] = core_rsp_valid_in ? core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; @@ -272,13 +311,21 @@ module VX_nc_bypass #( assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT]; assign mem_rsp_data_out = mem_rsp_data_in; - assign mem_rsp_tag_out = mem_rsp_tag_in; + + VX_bits_remove #( + .N (MEM_TAG_IN_WIDTH+1), + .S (1), + .POS (NC_TAG_BIT) + ) mem_rsp_tag_remove ( + .data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH+1)-1:0]), + .data_out (mem_rsp_tag_out) + ); if (NUM_RSP_TAGS > 1) begin - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW]; assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_tid] && core_rsp_ready_out[rsp_tid]) : mem_rsp_ready_out; end else begin assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in && core_rsp_ready_out) : mem_rsp_ready_out; end -endmodule +endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 9156e98c..61ebea1e 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -264,11 +264,11 @@ module VX_shared_mem #( for (genvar i = 0; i < NUM_BANKS; ++i) begin if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0[i], debug_wid_st0[i]} = per_bank_core_req_tag_unqual[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; - assign {debug_pc_st1[i], debug_wid_st1[i]} = per_bank_core_req_tag[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_wid_st0[i], debug_pc_st0[i]} = per_bank_core_req_tag_unqual[i][`CACHE_REQ_INFO_RNG]; + assign {debug_wid_st1[i], debug_pc_st1[i]} = per_bank_core_req_tag[i][`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_st0[i], debug_wid_st0[i]} = 0; - assign {debug_pc_st1[i], debug_wid_st1[i]} = 0; + assign {debug_wid_st0[i], debug_pc_st0[i]} = 0; + assign {debug_wid_st1[i], debug_pc_st1[i]} = 0; end end `endif