From f8d54c6994298087d4a29a7a8d9e4f1dd5f965cf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 9 Nov 2020 02:10:35 -0800 Subject: [PATCH] fixed cache_core_rsp_merge unit --- driver/opae/vlsim/Makefile | 6 +++--- hw/opae/vortex_afu.sv | 7 ------- hw/rtl/VX_lsu_unit.v | 11 ++++------- hw/rtl/cache/VX_bank.v | 25 +++++++++++++++++------- hw/rtl/cache/VX_cache.v | 27 +++++++------------------- hw/rtl/cache/VX_cache_core_rsp_merge.v | 20 ++++++++++--------- 6 files changed, 43 insertions(+), 53 deletions(-) diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index ee9da2e9..7528e1ec 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -20,11 +20,11 @@ DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DEBUG=1 +#DEBUG=1 #SCOPE=1 CFLAGS += -fPIC diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index f04301d6..663bbe30 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -1058,13 +1058,6 @@ Vortex #() vortex ( `UNUSED_PIN (ebreak) ); -always @(posedge clk) begin - if (!reset) begin - // DRAM reads should only happen during vortex execution - assert(vx_busy || !vx_dram_rd_req_enable); - end -end - // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef SCOPE diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 507f0189..6f442a0e 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -67,6 +67,7 @@ module VX_lsu_unit #( `IGNORE_WARNINGS_BEGIN wire [`NUM_THREADS-1:0][31:0] req_address; + reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; `IGNORE_WARNINGS_END wire valid_in; @@ -74,7 +75,7 @@ module VX_lsu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))) - ) lsu_req_reg ( + ) req_reg ( .clk (clk), .reset (reset), .stall (stall_in), @@ -91,10 +92,6 @@ module VX_lsu_unit #( wire [1:0] rsp_sext; reg [`NUM_THREADS-1:0][31:0] rsp_data; -`DEBUG_BLOCK( - reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; -) - reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask; wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag; @@ -114,7 +111,7 @@ module VX_lsu_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .SIZE (`LSUQ_SIZE) - ) lsu_cam ( + ) cam_buffer ( .clk (clk), .reset (reset), .write_addr (req_tag), @@ -184,7 +181,7 @@ module VX_lsu_unit #( VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)) - ) lsu_rsp_reg ( + ) rsp_reg ( .clk (clk), .reset (reset), .stall (stall_out), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 0b460f85..2b44d95a 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -352,6 +352,8 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0; + end else begin + assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = 0; end `endif @@ -369,6 +371,8 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; + end else begin + assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0; end `endif @@ -468,6 +472,8 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; + end else begin + assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = 0; end `endif @@ -540,6 +546,8 @@ module VX_bank #( `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3; + end else begin + assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = 0; end `endif @@ -719,9 +727,11 @@ module VX_bank #( wire dwbq_pop = dram_req_valid && dram_req_ready; - wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr = dwbq_is_dwb_in ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : + wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = dwbq_is_dwb_in ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : addr_st3; + wire [BANK_LINE_SIZE-1:0] dwbq_byteen = dwbq_is_dwb_in ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; + if (DRAM_ENABLE) begin VX_generic_queue #( .DATAW(1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH), @@ -731,7 +741,7 @@ module VX_bank #( .reset (reset), .push (dwbq_push), .pop (dwbq_pop), - .data_in ({dwbq_is_dwb_in, dirtyb_st3, dwbq_req_addr, readdata_st3}), + .data_in ({dwbq_is_dwb_in, dwbq_byteen, dwbq_addr, readdata_st3}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dwbq_empty), .full (dwbq_full), @@ -740,10 +750,11 @@ module VX_bank #( end else begin `UNUSED_VAR (dwbq_push) `UNUSED_VAR (dwbq_pop) + `UNUSED_VAR (dwbq_addr) + `UNUSED_VAR (dwbq_byteen) `UNUSED_VAR (readtag_st3) `UNUSED_VAR (dirtyb_st3) - `UNUSED_VAR (readdata_st3) - `UNUSED_VAR (dwbq_req_addr) + `UNUSED_VAR (readdata_st3) `UNUSED_VAR (dram_req_ready) assign dwbq_empty = 1; assign dwbq_full = 0; @@ -836,7 +847,7 @@ module VX_bank #( $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0); end if (reqq_pop) begin - $display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0); @@ -846,9 +857,9 @@ module VX_bank #( end if (dwbq_push) begin if (dwbq_is_dwb_in) - $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); else - $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_addr, BANK_ID), debug_wid_st3, debug_pc_st3); end if (snpq_push) begin $display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 608863bf..13109130 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -4,7 +4,7 @@ module VX_cache #( parameter CACHE_ID = 0, // Size of cache in bytes - parameter CACHE_SIZE = 2048, + parameter CACHE_SIZE = 4096, // Size of line inside a bank in bytes parameter BANK_LINE_SIZE = 16, // Number of banks @@ -15,7 +15,7 @@ module VX_cache #( parameter NUM_REQUESTS = 4, // Core Request Queue Size - parameter CREQ_SIZE = 4, + parameter CREQ_SIZE = 8, // Miss Reserv Queue Knob parameter MRVQ_SIZE = 8, // DRAM Response Queue Size @@ -24,7 +24,7 @@ module VX_cache #( parameter SNRQ_SIZE = 8, // Core Writeback Queue Size - parameter CWBQ_SIZE = 4, + parameter CWBQ_SIZE = 8, // DRAM Request Queue Size parameter DREQ_SIZE = 8, // Snoop Response Size @@ -40,7 +40,7 @@ module VX_cache #( parameter FLUSH_ENABLE = 1, // Enable snoop forwarding - parameter SNOOP_FORWARDING = 0, + parameter SNOOP_FORWARDING = 1, // core request tag size parameter CORE_TAG_WIDTH = 4, @@ -52,13 +52,13 @@ module VX_cache #( parameter DRAM_TAG_WIDTH = 28, // Number of snoop forwarding requests - parameter NUM_SNP_REQUESTS = 1, + parameter NUM_SNP_REQUESTS = (SNOOP_FORWARDING ? 4 : 1), // Snooping request tag width - parameter SNP_REQ_TAG_WIDTH = 1, + parameter SNP_REQ_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1), // Snooping forward tag width - parameter SNP_FWD_TAG_WIDTH = 1 + parameter SNP_FWD_TAG_WIDTH = (SNOOP_FORWARDING ? 4 : 1) ) ( `SCOPE_IO_VX_cache @@ -122,19 +122,6 @@ module VX_cache #( output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready ); -`ifdef DBG_CORE_REQ_INFO - /* verilator lint_off UNUSED */ - wire[31:0] debug_core_req_use_pc; - wire[`NR_BITS-1:0] debug_core_req_rd; - wire[`NW_BITS-1:0] debug_core_req_wid; - wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx; - /* verilator lint_on UNUSED */ - - if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_core_req_use_pc, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0]; - end -`endif - wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid; wire [NUM_BANKS-1:0] per_bank_core_req_ready; diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index c6059838..a7c6e201 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -50,9 +50,10 @@ module VX_cache_core_rsp_merge #( if (CORE_TAG_ID_BITS != 0) begin always @(*) begin core_rsp_valid_unqual = 0; - core_rsp_bank_select = 0; + core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx]; core_rsp_data_unqual = 'x; - core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx]; + core_rsp_bank_select = 0; + for (integer i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_rsp_valid[i] && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[sel_idx][CORE_TAG_ID_BITS-1:0])) begin @@ -65,22 +66,23 @@ module VX_cache_core_rsp_merge #( end else begin always @(*) begin core_rsp_valid_unqual = 0; - core_rsp_valid_unqual[per_bank_core_rsp_tid[sel_idx]] = 1; + core_rsp_valid_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_valid[sel_idx]; - core_rsp_bank_select = 0; - core_rsp_bank_select[sel_idx] = 1; + core_rsp_tag_unqual = 'x; + core_rsp_tag_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_tag[sel_idx]; core_rsp_data_unqual = 'x; core_rsp_data_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_data[sel_idx]; - core_rsp_tag_unqual = 'x; - core_rsp_tag_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_tag[sel_idx]; + core_rsp_bank_select = 0; + core_rsp_bank_select[sel_idx] = 1; for (integer i = 0; i < NUM_BANKS; i++) begin - if (per_bank_core_rsp_valid[i] && !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]) begin + if (per_bank_core_rsp_valid[i] + && !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]) begin core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; - core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i]; + core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; core_rsp_bank_select[i] = 1; end end