diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 724ff8dd..62dcea69 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -638,8 +638,8 @@ assign avs_rtq_push = vx_dram_rd_req_fire; assign avs_rtq_pop = vx_dram_rd_rsp_fire; VX_generic_queue #( - .DATAW(`VX_DRAM_TAG_WIDTH + DRAM_LINE_LW), - .SIZE(AVS_RD_QUEUE_SIZE) + .DATAW (`VX_DRAM_TAG_WIDTH + DRAM_LINE_LW), + .SIZE (AVS_RD_QUEUE_SIZE) ) avs_rd_req_queue ( .clk (clk), .reset (reset), @@ -660,8 +660,8 @@ assign avs_rdq_push = avs_readdatavalid; assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire; VX_generic_queue #( - .DATAW(DRAM_LINE_WIDTH), - .SIZE(AVS_RD_QUEUE_SIZE) + .DATAW (DRAM_LINE_WIDTH), + .SIZE (AVS_RD_QUEUE_SIZE) ) avs_rd_rsp_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index d2e7e5c0..32a64e25 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -357,11 +357,13 @@ module VX_cluster #( .NUM_REQUESTS (`L2NUM_REQUESTS), .CREQ_SIZE (`L2CREQ_SIZE), .MRVQ_SIZE (`L2MRVQ_SIZE), - .DRPQ_SIZE (`L2DRPQ_SIZE), + .DRFQ_SIZE (`L2DRFQ_SIZE), .SNRQ_SIZE (`L2SNRQ_SIZE), .CWBQ_SIZE (`L2CWBQ_SIZE), .DREQ_SIZE (`L2DREQ_SIZE), + .SNPQ_SIZE (`L2SNPQ_SIZE), .DRAM_ENABLE (1), + .FLUSH_ENABLE (1), .WRITE_ENABLE (1), .SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index 498ee39c..c40469e8 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -125,7 +125,7 @@ module VX_commit #( end end `else - `UNUSED_VAR(fpu_commit_if.PC) + `UNUSED_VAR (fpu_commit_if.PC) `endif endmodule diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index aafb86d1..71762790 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -223,9 +223,14 @@ `define DDREQ_SIZE 8 `endif +// Snoop Response Queue Size +`ifndef DSNPQ_SIZE +`define DSNPQ_SIZE 8 +`endif + // DRAM Response Queue Size -`ifndef DDRPQ_SIZE -`define DDRPQ_SIZE 8 +`ifndef DDRFQ_SIZE +`define DDRFQ_SIZE 8 `endif // Snoop Req Queue Size @@ -276,8 +281,8 @@ `endif // DRAM Response Queue Size -`ifndef IDRPQ_SIZE -`define IDRPQ_SIZE 8 +`ifndef IDRFQ_SIZE +`define IDRFQ_SIZE 8 `endif // SM Configurable Knobs ====================================================== @@ -355,8 +360,8 @@ `endif // DRAM Response Queue Size -`ifndef L2DRPQ_SIZE -`define L2DRPQ_SIZE 8 +`ifndef L2DRFQ_SIZE +`define L2DRFQ_SIZE 8 `endif // Snoop Req Queue Size @@ -364,6 +369,11 @@ `define L2SNRQ_SIZE 8 `endif +// Snoop Response Queue Size +`ifndef L2SNPQ_SIZE +`define L2SNPQ_SIZE 8 +`endif + // L3cache Configurable Knobs ================================================= // Size of cache in bytes @@ -407,8 +417,8 @@ `endif // DRAM Response Queue Size -`ifndef L3DRPQ_SIZE -`define L3DRPQ_SIZE 8 +`ifndef L3DRFQ_SIZE +`define L3DRFQ_SIZE 8 `endif // Snoop Req Queue Size @@ -416,4 +426,9 @@ `define L3SNRQ_SIZE 8 `endif +// Snoop Response Queue Size +`ifndef L3SNPQ_SIZE +`define L3SNPQ_SIZE 8 +`endif + `endif diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 40fec965..f8d70590 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -65,13 +65,15 @@ module VX_mem_unit # ( .NUM_REQUESTS (`SNUM_REQUESTS), .CREQ_SIZE (`SCREQ_SIZE), .MRVQ_SIZE (8), - .DRPQ_SIZE (1), + .DRFQ_SIZE (1), .SNRQ_SIZE (1), .CWBQ_SIZE (`SCWBQ_SIZE), .DREQ_SIZE (1), - .SNOOP_FORWARDING (0), + .SNPQ_SIZE (1), .DRAM_ENABLE (0), + .FLUSH_ENABLE (0), .WRITE_ENABLE (1), + .SNOOP_FORWARDING (0), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH) @@ -145,13 +147,15 @@ module VX_mem_unit # ( .NUM_REQUESTS (`DNUM_REQUESTS), .CREQ_SIZE (`DCREQ_SIZE), .MRVQ_SIZE (`DMRVQ_SIZE), - .DRPQ_SIZE (`DDRPQ_SIZE), + .DRFQ_SIZE (`DDRFQ_SIZE), .SNRQ_SIZE (`DSNRQ_SIZE), .CWBQ_SIZE (`DCWBQ_SIZE), - .DREQ_SIZE (`DDREQ_SIZE), - .SNOOP_FORWARDING (0), + .DREQ_SIZE (`DDREQ_SIZE), + .SNPQ_SIZE (`DSNPQ_SIZE), .DRAM_ENABLE (1), + .FLUSH_ENABLE (1), .WRITE_ENABLE (1), + .SNOOP_FORWARDING (0), .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), @@ -226,13 +230,15 @@ module VX_mem_unit # ( .NUM_REQUESTS (`INUM_REQUESTS), .CREQ_SIZE (`ICREQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE), - .DRPQ_SIZE (`IDRPQ_SIZE), + .DRFQ_SIZE (`IDRFQ_SIZE), .SNRQ_SIZE (1), .CWBQ_SIZE (`ICWBQ_SIZE), .DREQ_SIZE (`IDREQ_SIZE), - .SNOOP_FORWARDING (0), + .SNPQ_SIZE (1), .DRAM_ENABLE (1), + .FLUSH_ENABLE (0), .WRITE_ENABLE (0), + .SNOOP_FORWARDING (0), .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), .DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH) diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 8dacb37a..05c2b8db 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -380,11 +380,13 @@ module Vortex ( .NUM_REQUESTS (`L3NUM_REQUESTS), .CREQ_SIZE (`L3CREQ_SIZE), .MRVQ_SIZE (`L3MRVQ_SIZE), - .DRPQ_SIZE (`L3DRPQ_SIZE), + .DRFQ_SIZE (`L3DRFQ_SIZE), .SNRQ_SIZE (`L3SNRQ_SIZE), .CWBQ_SIZE (`L3CWBQ_SIZE), .DREQ_SIZE (`L3DREQ_SIZE), + .SNPQ_SIZE (`L3SNPQ_SIZE), .DRAM_ENABLE (1), + .FLUSH_ENABLE (1), .WRITE_ENABLE (1), .SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 23ea49cd..ba171d11 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -2,51 +2,52 @@ module VX_bank #( parameter CACHE_ID = 0, - parameter BANK_ID = 0, + parameter BANK_ID = 0, + // Size of cache in bytes - parameter CACHE_SIZE = 0, + parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, + parameter BANK_LINE_SIZE = 1, + // Number of bankS + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 0, - // Number of Word requests per cycle {1, 2, 4, 8, ...} - parameter NUM_REQUESTS = 0, + parameter WORD_SIZE = 1, + // Number of Word requests per cycle + parameter NUM_REQUESTS = 1, - // Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Core Request Queue Size - parameter CREQ_SIZE = 0, + parameter CREQ_SIZE = 1, // Miss Reserv Queue Knob - parameter MRVQ_SIZE = 0, + parameter MRVQ_SIZE = 1, // DRAM Response Queue Size - parameter DRPQ_SIZE = 0, + parameter DRFQ_SIZE = 1, // Snoop Req Queue Size - parameter SNRQ_SIZE = 0, + parameter SNRQ_SIZE = 1, - // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size - parameter CWBQ_SIZE = 0, + parameter CWBQ_SIZE = 1, // DRAM Request Queue Size - parameter DREQ_SIZE = 0, + parameter DREQ_SIZE = 1, + // Snoop Response Size + parameter SNPQ_SIZE = 1, // Enable cache writeable parameter WRITE_ENABLE = 0, - // Enable dram update - parameter DRAM_ENABLE = 0, - - // Enable snoop forwarding - parameter SNOOP_FORWARDING = 0, + // Enable dram update + parameter DRAM_ENABLE = 0, + + // Enable cache flush + parameter FLUSH_ENABLE = 0, // core request tag size - parameter CORE_TAG_WIDTH = 0, + parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0, // Snooping request tag width - parameter SNP_REQ_TAG_WIDTH = 0 + parameter SNP_REQ_TAG_WIDTH = 1 ) ( `SCOPE_IO_VX_bank @@ -156,23 +157,34 @@ module VX_bank #( wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0; wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; - wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready; assign dram_rsp_ready = !dfpq_full; - VX_generic_queue #( - .DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), - .SIZE(DRPQ_SIZE) - ) dfp_queue ( - .clk (clk), - .reset (reset), - .push (dram_rsp_fire), - .pop (dfpq_pop), - .data_in ({dram_rsp_addr, dram_rsp_data}), - .data_out({dfpq_addr_st0, dfpq_filldata_st0}), - .empty (dfpq_empty), - .full (dfpq_full), - `UNUSED_PIN (size) - ); + if (DRAM_ENABLE) begin + wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready; + + VX_generic_queue #( + .DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), + .SIZE(DRFQ_SIZE) + ) dfp_queue ( + .clk (clk), + .reset (reset), + .push (dram_rsp_fire), + .pop (dfpq_pop), + .data_in ({dram_rsp_addr, dram_rsp_data}), + .data_out({dfpq_addr_st0, dfpq_filldata_st0}), + .empty (dfpq_empty), + .full (dfpq_full), + `UNUSED_PIN (size) + ); + end else begin + `UNUSED_VAR (dram_rsp_valid) + `UNUSED_VAR (dram_rsp_addr) + `UNUSED_VAR (dram_rsp_data) + assign dfpq_empty = 1; + assign dfpq_full = 0; + assign dfpq_addr_st0 = 0; + assign dfpq_filldata_st0 = 0; + end wire reqq_pop; wire reqq_empty; @@ -232,7 +244,6 @@ module VX_bank #( wire [WORD_SIZE-1:0] msrq_byteen_st0; wire msrq_is_snp_st0; wire msrq_snp_invalidate_st0; - wire msrq_pending_hazard_st0; wire msrq_pending_hazard_st1; wire[`REQS_BITS-1:0] miss_add_tid; @@ -247,6 +258,7 @@ module VX_bank #( wire msrq_push_stall; wire cwbq_push_stall; wire dwbq_push_stall; + wire snpq_push_stall; wire stall_bank_pipe; wire is_fill_st1; @@ -304,7 +316,7 @@ module VX_bank #( msrq_pop_unqual ? msrq_wsel_st0 : 0; end else begin - `UNUSED_VAR(msrq_wsel_st0) + `UNUSED_VAR (msrq_wsel_st0) assign wsel_st0 = 0; end @@ -377,10 +389,7 @@ module VX_bank #( // also force a miss for msrq requests when previous request in st2 got a miss wire force_miss_st1 = (valid_st1 && !is_msrq_st1 && ~is_fill_st1 && (msrq_pending_hazard_st1 || st2_pending_hazard_st1)) || (valid_st1 && is_msrq_st1 && is_msrq_miss_st2); - - // access the tag data store - wire tag_data_fire = valid_st1 && !stall_bank_pipe; - + VX_tag_data_access #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), @@ -395,6 +404,8 @@ module VX_bank #( .clk (clk), .reset (reset), + .stall (stall_bank_pipe), + `ifdef DBG_CORE_REQ_INFO .debug_pc_st1 (debug_pc_st1), .debug_rd_st1 (debug_rd_st1), @@ -403,7 +414,7 @@ module VX_bank #( `endif // Actual Read/Write - .valid_req_st1 (tag_data_fire), + .valid_req_st1 (valid_st1), .writefill_st1 (is_fill_st1), .addr_st1 (addr_st1), .wordsel_st1 (wsel_st1), @@ -412,7 +423,7 @@ module VX_bank #( .mem_rw_st1 (mem_rw_st1), .mem_byteen_st1 (mem_byteen_st1), - .is_snp_st1 (is_snp_st1), + .is_snp_st1 (is_snp_st1 && !stall_bank_pipe), .snp_invalidate_st1(snp_invalidate_st1), .force_miss_st1 (force_miss_st1), @@ -466,121 +477,140 @@ module VX_bank #( wire miss_add = miss_add_unqual && !msrq_full && !cwbq_push_stall - && !dwbq_push_stall; + && !dwbq_push_stall + && !snpq_push_stall; + + assign {miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_tid} = inst_meta_st2; // we have a recurrent msrq miss assign is_msrq_miss_st2 = miss_add_unqual && is_msrq_st2; - wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2; - wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; - assign {miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_tid} = inst_meta_st2; - wire miss_add_is_snp = is_snp_st2; - wire miss_add_snp_invalidate = snp_invalidate_st2; + // a matching incoming fill request to the block is in stage 0 + wire incoming_st0_fill_st2 = is_fill_st0 && (addr_st2 == addr_st0); - wire msrq_real_pop_st2 = valid_st2 && is_msrq_st2 && !miss_add_unqual && !stall_bank_pipe; + // a matching incoming fill request to the block is in stage 1 + wire incoming_st1_fill_st2 = is_fill_st1 && (addr_st2 == addr_st1); - // mark msrq entry that match DRAM fill as 'ready' - wire update_ready_st0 = dfpq_pop; + if (DRAM_ENABLE) begin + wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2; + wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; + wire miss_add_is_snp = is_snp_st2; + wire miss_add_snp_invalidate = snp_invalidate_st2; - // push missed requests as 'ready' - // if it didn't actually missed but had to abort because of pending requets in msrq - // if matching fill request to the block is in stage 0 - // if matching fill request to the block is in stage 1 - wire match_st0_fill_st2 = is_fill_st0 && (miss_add_addr == addr_st0); - wire match_st1_fill_st2 = is_fill_st1 && (miss_add_addr == addr_st1); - wire msrq_init_ready_state_st2 = !miss_st2 - || match_st0_fill_st2 - || match_st1_fill_st2; + wire msrq_real_pop_st2 = valid_st2 && is_msrq_st2 && !miss_add_unqual && !stall_bank_pipe; - always @(*) begin - if (miss_st2 && (match_st0_fill_st2 || match_st1_fill_st2)) begin - $display("%t: incoming fill - addr=%0h, st0=%b, st1=%b", $time, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), match_st0_fill_st2, match_st1_fill_st2); - end + // mark msrq entry that match DRAM fill as 'ready' + wire update_ready_st0 = dfpq_pop; + + // push missed requests as 'ready' + // if it didn't actually missed but had to abort because of pending requets in msrq + wire msrq_init_ready_state_st2 = !miss_st2 + || incoming_st0_fill_st2 + || incoming_st1_fill_st2; + + VX_cache_miss_resrv #( + .BANK_ID (BANK_ID), + .CACHE_ID (CACHE_ID), + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQUESTS (NUM_REQUESTS), + .MRVQ_SIZE (MRVQ_SIZE), + .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) + ) cache_miss_resrv ( + .clk (clk), + .reset (reset), + + `ifdef DBG_CORE_REQ_INFO + .debug_pc_st0 (debug_pc_st0), + .debug_rd_st0 (debug_rd_st0), + .debug_wid_st0 (debug_wid_st0), + .debug_tagid_st0(debug_tagid_st0), + .debug_pc_st2 (debug_pc_st2), + .debug_rd_st2 (debug_rd_st2), + .debug_wid_st2 (debug_wid_st2), + .debug_tagid_st2(debug_tagid_st2), + `endif + + // enqueue + .miss_add (miss_add), + .miss_add_addr (miss_add_addr), + .miss_add_wsel (miss_add_wsel), + .miss_add_data (miss_add_data), + .miss_add_tid (miss_add_tid), + .miss_add_tag (miss_add_tag), + .miss_add_rw (miss_add_rw), + .miss_add_byteen (miss_add_byteen), + .miss_add_is_snp (miss_add_is_snp), + .miss_add_snp_invalidate (miss_add_snp_invalidate), + .is_msrq_st2 (is_msrq_st2), + .init_ready_state_st2 (msrq_init_ready_state_st2), + + .miss_resrv_full (msrq_full), + .miss_resrv_almfull (msrq_almfull), + + // fill + .update_ready_st0 (update_ready_st0), + .fill_addr_st0 (addr_st0), + .pending_hazard_st0 (msrq_pending_hazard_unqual_st0), + + // dequeue + .miss_resrv_schedule_st0 (msrq_pop), + .miss_resrv_valid_st0 (msrq_valid_st0), + .miss_resrv_addr_st0 (msrq_addr_st0), + .miss_resrv_wsel_st0 (msrq_wsel_st0), + .miss_resrv_data_st0 (msrq_writeword_st0), + .miss_resrv_tid_st0 (msrq_tid_st0), + .miss_resrv_tag_st0 (msrq_tag_st0), + .miss_resrv_rw_st0 (msrq_rw_st0), + .miss_resrv_byteen_st0 (msrq_byteen_st0), + .miss_resrv_is_snp_st0 (msrq_is_snp_st0), + .miss_resrv_snp_invalidate_st0 (msrq_snp_invalidate_st0), + .miss_resrv_pop_st2 (msrq_real_pop_st2) + ); + end else begin + `UNUSED_VAR (miss_add) + `UNUSED_VAR (wsel_st2) + `UNUSED_VAR (writeword_st2) + `UNUSED_VAR (snp_invalidate_st2) + `UNUSED_VAR (miss_add_byteen) + assign msrq_pending_hazard_unqual_st0 = 0; + assign msrq_full = 0; + assign msrq_almfull = 0; + assign msrq_valid_st0 = 0; + assign msrq_addr_st0 = 0; + assign msrq_wsel_st0 = 0; + assign msrq_writeword_st0 = 0; + assign msrq_tid_st0 = 0; + assign msrq_tag_st0 = 0; + assign msrq_rw_st0 = 0; + assign msrq_byteen_st0 = 0; + assign msrq_is_snp_st0 = 0; + assign msrq_snp_invalidate_st0 = 0; end - VX_cache_miss_resrv #( - .BANK_ID (BANK_ID), - .CACHE_ID (CACHE_ID), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), - .MRVQ_SIZE (MRVQ_SIZE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) - ) cache_miss_resrv ( - .clk (clk), - .reset (reset), - - `ifdef DBG_CORE_REQ_INFO - .debug_pc_st0 (debug_pc_st0), - .debug_rd_st0 (debug_rd_st0), - .debug_wid_st0 (debug_wid_st0), - .debug_tagid_st0(debug_tagid_st0), - .debug_pc_st2 (debug_pc_st2), - .debug_rd_st2 (debug_rd_st2), - .debug_wid_st2 (debug_wid_st2), - .debug_tagid_st2(debug_tagid_st2), - `endif - - // enqueue - .miss_add (miss_add), - .miss_add_addr (miss_add_addr), - .miss_add_wsel (miss_add_wsel), - .miss_add_data (miss_add_data), - .miss_add_tid (miss_add_tid), - .miss_add_tag (miss_add_tag), - .miss_add_rw (miss_add_rw), - .miss_add_byteen (miss_add_byteen), - .miss_add_is_snp (miss_add_is_snp), - .miss_add_snp_invalidate (miss_add_snp_invalidate), - .is_msrq_st2 (is_msrq_st2), - .init_ready_state_st2 (msrq_init_ready_state_st2), - - .miss_resrv_full (msrq_full), - .miss_resrv_almfull (msrq_almfull), - - // fill - .update_ready_st0 (update_ready_st0), - .fill_addr_st0 (addr_st0), - .pending_hazard_st0 (msrq_pending_hazard_unqual_st0), - - // dequeue - .miss_resrv_schedule_st0 (msrq_pop), - .miss_resrv_valid_st0 (msrq_valid_st0), - .miss_resrv_addr_st0 (msrq_addr_st0), - .miss_resrv_wsel_st0 (msrq_wsel_st0), - .miss_resrv_data_st0 (msrq_writeword_st0), - .miss_resrv_tid_st0 (msrq_tid_st0), - .miss_resrv_tag_st0 (msrq_tag_st0), - .miss_resrv_rw_st0 (msrq_rw_st0), - .miss_resrv_byteen_st0 (msrq_byteen_st0), - .miss_resrv_is_snp_st0 (msrq_is_snp_st0), - .miss_resrv_snp_invalidate_st0 (msrq_snp_invalidate_st0), - .miss_resrv_pop_st2 (msrq_real_pop_st2) - ); - // Enqueue core response - wire cwbq_push, cwbq_pop; wire cwbq_empty, cwbq_full; wire cwbq_push_unqual = valid_st2 && !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2 && !miss_add_rw; assign cwbq_push_stall = cwbq_push_unqual && cwbq_full; - assign cwbq_push = cwbq_push_unqual - && !cwbq_full - && !dwbq_push_stall - && !msrq_push_stall; + wire cwbq_push = cwbq_push_unqual + && !cwbq_full + && !msrq_push_stall + && !dwbq_push_stall + && !snpq_push_stall; - assign cwbq_pop = core_rsp_valid && core_rsp_ready; + wire cwbq_pop = core_rsp_valid && core_rsp_ready; - wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid; wire [CORE_TAG_WIDTH-1:0] cwbq_tag = CORE_TAG_WIDTH'(miss_add_tag); - + wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; + VX_generic_queue #( .DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), .SIZE(CWBQ_SIZE) @@ -598,78 +628,114 @@ module VX_bank #( assign core_rsp_valid = !cwbq_empty; - // Enqueue DRAM / Snoop request + // Enqueue DRAM request - wire dwbq_push, dwbq_pop; wire dwbq_empty, dwbq_full; - wire dwbq_is_dram_out, dwbq_is_snp_out; - - wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr; - wire [SNP_REQ_TAG_WIDTH-1:0] dwbq_snp_tag; + wire incoming_fill = incoming_st0_fill_st2 || incoming_st1_fill_st2; - wire dwbq_is_dfl_in = miss_st2 && !msrq_init_ready_state_st2 && (!force_miss_st2 || is_msrq_st2); - wire dwbq_is_dwb_in = dirty_st2 && !force_miss_st2 && (is_fill_st2 || is_snp_st2); - wire dwbq_is_snp_in = valid_st2 && !force_miss_st2 && is_snp_st2; - - wire dwbq_is_dram_in = dwbq_is_dfl_in || dwbq_is_dwb_in; - - always @(posedge clk) begin - assert(!is_msrq_st2 || !is_fill_st2); - assert(!dwbq_is_dfl_in || !dwbq_is_dwb_in); - end - - wire dwbq_push_unqual = dwbq_is_dram_in || dwbq_is_snp_in; + wire dwbq_is_dfl_in = valid_st2 && miss_st2 && !incoming_fill && (!force_miss_st2 || is_msrq_st2); + wire dwbq_is_dwb_in = valid_st2 && dirty_st2 && !force_miss_st2 && (is_fill_st2 || is_snp_st2); + wire dwbq_push_unqual = dwbq_is_dfl_in || dwbq_is_dwb_in; assign dwbq_push_stall = dwbq_push_unqual && dwbq_full; - - assign dwbq_push = dwbq_push_unqual - && !dwbq_full - && !cwbq_push_stall - && !msrq_push_stall; - - assign dwbq_req_addr = dwbq_is_dwb_in ? {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]} : addr_st2; - assign dwbq_snp_tag = SNP_REQ_TAG_WIDTH'(miss_add_tag); - VX_generic_queue #( - .DATAW(1 + 1 + 1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH), - .SIZE(DREQ_SIZE) - ) dwb_queue ( - .clk (clk), - .reset (reset), - .push (dwbq_push), - .pop (dwbq_pop), - .data_in ({dwbq_is_dram_in, dwbq_is_snp_in, dwbq_is_dwb_in, dirtyb_st2, dwbq_req_addr, readdata_st2, dwbq_snp_tag}), - .data_out({dwbq_is_dram_out, dwbq_is_snp_out, dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data, snp_rsp_tag}), - .empty (dwbq_empty), - .full (dwbq_full), - `UNUSED_PIN (size) - ); + wire dwbq_push = dwbq_push_unqual + && !dwbq_full + && !msrq_push_stall + && !cwbq_push_stall + && !snpq_push_stall; - wire dram_req_fire = dram_req_valid && dram_req_ready; - wire snp_rsp_fire = snp_rsp_valid && snp_rsp_ready; + wire dwbq_pop = dram_req_valid && dram_req_ready; - reg dwbq_out_sel_snp; + if (DRAM_ENABLE) begin - always @(posedge clk) begin - if (reset) begin - dwbq_out_sel_snp <= 0; - end else if (dwbq_is_dram_out - && dwbq_is_snp_out - && (dram_req_fire || snp_rsp_fire)) begin - dwbq_out_sel_snp <= ~dwbq_out_sel_snp; - end - end + wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr = dwbq_is_dwb_in ? {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]} : + addr_st2; - // when both dwb and snp are asserted, first release the cwb, then release the snp. - assign dram_req_valid = !dwbq_empty && dwbq_is_dram_out && (~dwbq_is_snp_out || !dwbq_out_sel_snp); - assign snp_rsp_valid = !dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dram_out || dwbq_out_sel_snp); + VX_generic_queue #( + .DATAW(1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH), + .SIZE(DREQ_SIZE) + ) dwb_queue ( + .clk (clk), + .reset (reset), + .push (dwbq_push), + .pop (dwbq_pop), + .data_in ({dwbq_is_dwb_in, dirtyb_st2, dwbq_req_addr, readdata_st2}), + .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), + .empty (dwbq_empty), + .full (dwbq_full), + `UNUSED_PIN (size) + ); + end else begin + `UNUSED_VAR (dwbq_push) + `UNUSED_VAR (dwbq_pop) + `UNUSED_VAR (readtag_st2) + `UNUSED_VAR (dirtyb_st2) + `UNUSED_VAR (readdata_st2) + assign dwbq_empty = 1; + assign dwbq_full = 0; + assign dram_req_rw = 0; + assign dram_req_byteen = 0; + assign dram_req_addr = 0; + assign dram_req_data = 0; + `UNUSED_VAR (dram_req_ready) + end + + assign dram_req_valid = !dwbq_empty; + + // Enqueue snoop response + + wire snpq_empty, snpq_full; - assign dwbq_pop = (dwbq_is_dram_out && !dwbq_is_snp_out && dram_req_fire) - || (dwbq_is_snp_out && snp_rsp_fire); + wire snpq_push_unqual = valid_st2 && is_snp_st2 && !force_miss_st2; + + assign snpq_push_stall = snpq_push_unqual && snpq_full; + + wire snpq_push = snpq_push_unqual + && !snpq_full + && !msrq_push_stall + && !cwbq_push_stall + && !dwbq_push_stall; + + wire snpq_pop = snp_rsp_valid && snp_rsp_ready; + + wire [SNP_REQ_TAG_WIDTH-1:0] snpq_tag_st2 = SNP_REQ_TAG_WIDTH'(miss_add_tag); + + if (FLUSH_ENABLE) begin + + VX_generic_queue #( + .DATAW(SNP_REQ_TAG_WIDTH), + .SIZE(SNPQ_SIZE) + ) snp_queue ( + .clk (clk), + .reset (reset), + .push (snpq_push), + .pop (snpq_pop), + .data_in (snpq_tag_st2), + .data_out(snp_rsp_tag), + .empty (snpq_empty), + .full (snpq_full), + `UNUSED_PIN (size) + ); + end else begin + `UNUSED_VAR (snpq_push) + `UNUSED_VAR (snpq_pop) + `UNUSED_VAR (snpq_tag_st2) + assign snpq_empty = 1; + assign snpq_full = 0; + assign snp_rsp_tag = 0; + `UNUSED_VAR (snp_rsp_ready) + end + + assign snp_rsp_valid = !snpq_empty + && dwbq_empty; // ensure all writebacks are sent // bank pipeline stall - assign stall_bank_pipe = (cwbq_push_stall || dwbq_push_stall || msrq_push_stall); + assign stall_bank_pipe = msrq_push_stall + || cwbq_push_stall + || dwbq_push_stall + || snpq_push_stall; `SCOPE_ASSIGN (valid_st0, valid_st0); `SCOPE_ASSIGN (valid_st1, valid_st1); @@ -687,6 +753,10 @@ module VX_bank #( `ifdef DBG_PRINT_CACHE_BANK always @(posedge clk) begin + if (miss_st2 && (incoming_st0_fill_st2 || incoming_st1_fill_st2)) begin + $display("%t: incoming fill - addr=%0h, st0=%b, st1=%b", $time, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), incoming_st0_fill_st2, incoming_st1_fill_st2); + assert(!is_msrq_st2); + end if ((|core_req_valid) && core_req_ready) begin $display("%t: cache%0d:%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr[0], BANK_ID), core_req_tag); end @@ -700,7 +770,7 @@ module VX_bank #( $display("%t: cache%0d:%0d dram rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID), dram_rsp_data); end if (snp_req_valid && snp_req_ready) begin - $display("%t: cache%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag); + $display("%t: cache%0d:%0d snp req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag, snp_req_invalidate); end if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: cache%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); @@ -715,13 +785,16 @@ module VX_bank #( $display("%t: cache%0d:%0d reqq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin - $display("%t: cache%0d:%0d snrq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d snrq_pop: addr=%0h tag=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0); end if (cwbq_push) begin $display("%t: cache%0d:%0d cwbq_push: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2); end if (dwbq_push) begin - $display("%t: cache%0d:%0d dwbq_push: addr=%0h wid=%0d, PC=%0h, fill=%b, wb=%b, snp=%b", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dwbq_is_dfl_in, dwbq_is_dwb_in, dwbq_is_snp_in); + $display("%t: cache%0d:%0d dwbq_push: addr=%0h wid=%0d, PC=%0h, rw=%b", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dwbq_is_dwb_in); + end + if (snpq_push) begin + $display("%t: cache%0d:%0d snpq_push: addr=%0h tag=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), snpq_tag_st2); end end `endif diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index 311fb9ff..3510b01a 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -2,13 +2,13 @@ module VX_bank_core_req_arb #( // Size of a word in bytes - parameter WORD_SIZE = 0, - // Number of Word requests per cycle {1, 2, 4, 8, ...} - parameter NUM_REQUESTS = 0, + parameter WORD_SIZE = 1, + // Number of Word requests per cycle + parameter NUM_REQUESTS = 1, // Core Request Queue Size - parameter CREQ_SIZE = 0, + parameter CREQ_SIZE = 1, // core request tag size - parameter CORE_TAG_WIDTH = 0, + parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 ) ( diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index b8ffaafc..39010bc6 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -2,33 +2,33 @@ module VX_cache #( parameter CACHE_ID = 0, + // Size of cache in bytes parameter CACHE_SIZE = 2048, // Size of line inside a bank in bytes parameter BANK_LINE_SIZE = 16, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 8, + // Number of banks + parameter NUM_BANKS = 4, // Size of a word in bytes parameter WORD_SIZE = 4, - // Number of Word requests per cycle {1, 2, 4, 8, ...} + // Number of Word requests per cycle parameter NUM_REQUESTS = 4, - // Queues feeding into banks Knobs {1, 2, 4, 8, ...} - // Core Request Queue Size - parameter CREQ_SIZE = 8, + parameter CREQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MRVQ_SIZE = 16, + parameter MRVQ_SIZE = 8, // DRAM Response Queue Size - parameter DRPQ_SIZE = 16, + parameter DRFQ_SIZE = 8, // Snoop Req Queue Size - parameter SNRQ_SIZE = 16, + parameter SNRQ_SIZE = 8, - // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size - parameter CWBQ_SIZE = 8, + parameter CWBQ_SIZE = 4, // DRAM Request Queue Size - parameter DREQ_SIZE = 4, + parameter DREQ_SIZE = 8, + // Snoop Response Size + parameter SNPQ_SIZE = 8, // Enable cache writeable parameter WRITE_ENABLE = 1, @@ -36,14 +36,17 @@ module VX_cache #( // Enable dram update parameter DRAM_ENABLE = 1, + // Enable cache flush + parameter FLUSH_ENABLE = 1, + // Enable snoop forwarding parameter SNOOP_FORWARDING = 0, // core request tag size - parameter CORE_TAG_WIDTH = 42, + parameter CORE_TAG_WIDTH = 4, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 8, + parameter CORE_TAG_ID_BITS = 4, // dram request tag size parameter DRAM_TAG_WIDTH = 28, @@ -336,13 +339,14 @@ module VX_cache #( .NUM_REQUESTS (NUM_REQUESTS), .CREQ_SIZE (CREQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE), - .DRPQ_SIZE (DRPQ_SIZE), + .DRFQ_SIZE (DRFQ_SIZE), .SNRQ_SIZE (SNRQ_SIZE), .CWBQ_SIZE (CWBQ_SIZE), .DREQ_SIZE (DREQ_SIZE), + .SNPQ_SIZE (SNPQ_SIZE), .DRAM_ENABLE (DRAM_ENABLE), + .FLUSH_ENABLE (FLUSH_ENABLE), .WRITE_ENABLE (WRITE_ENABLE), - .SNOOP_FORWARDING (SNOOP_FORWARDING), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) @@ -413,42 +417,66 @@ module VX_cache #( .core_rsp_data (core_rsp_data), .core_rsp_tag (core_rsp_tag), .core_rsp_ready (core_rsp_ready) - ); + ); - VX_cache_dram_req_arb #( - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE) - ) cache_dram_req_arb ( - .clk (clk), - .reset (reset), - .per_bank_dram_req_valid (per_bank_dram_req_valid), - .per_bank_dram_req_rw (per_bank_dram_req_rw), - .per_bank_dram_req_byteen (per_bank_dram_req_byteen), - .per_bank_dram_req_addr (per_bank_dram_req_addr), - .per_bank_dram_req_data (per_bank_dram_req_data), - .per_bank_dram_req_ready (per_bank_dram_req_ready), - .dram_req_valid (dram_req_valid), - .dram_req_rw (dram_req_rw), - .dram_req_byteen (dram_req_byteen), - .dram_req_addr (dram_req_addr), - .dram_req_data (dram_req_data), - .dram_req_ready (dram_req_ready) - ); + if (DRAM_ENABLE) begin + VX_cache_dram_req_arb #( + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE) + ) cache_dram_req_arb ( + .clk (clk), + .reset (reset), + .per_bank_dram_req_valid (per_bank_dram_req_valid), + .per_bank_dram_req_rw (per_bank_dram_req_rw), + .per_bank_dram_req_byteen (per_bank_dram_req_byteen), + .per_bank_dram_req_addr (per_bank_dram_req_addr), + .per_bank_dram_req_data (per_bank_dram_req_data), + .per_bank_dram_req_ready (per_bank_dram_req_ready), + .dram_req_valid (dram_req_valid), + .dram_req_rw (dram_req_rw), + .dram_req_byteen (dram_req_byteen), + .dram_req_addr (dram_req_addr), + .dram_req_data (dram_req_data), + .dram_req_ready (dram_req_ready) + ); + end else begin + `UNUSED_VAR (per_bank_dram_req_valid) + `UNUSED_VAR (per_bank_dram_req_rw) + `UNUSED_VAR (per_bank_dram_req_byteen) + `UNUSED_VAR (per_bank_dram_req_addr) + `UNUSED_VAR (per_bank_dram_req_data) + assign per_bank_dram_req_ready = 0; + assign dram_req_valid = 0; + assign dram_req_rw = 0; + assign dram_req_byteen = 0; + assign dram_req_addr = 0; + assign dram_req_data = 0; + `UNUSED_VAR (dram_req_ready) + end - VX_snp_rsp_arb #( - .NUM_BANKS (NUM_BANKS), - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) - ) snp_rsp_arb ( - .clk (clk), - .reset (reset), - .per_bank_snp_rsp_valid (per_bank_snp_rsp_valid), - .per_bank_snp_rsp_tag (per_bank_snp_rsp_tag), - .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready), - .snp_rsp_valid (snp_rsp_valid), - .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready) - ); + if (FLUSH_ENABLE) begin + VX_snp_rsp_arb #( + .NUM_BANKS (NUM_BANKS), + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) + ) snp_rsp_arb ( + .clk (clk), + .reset (reset), + .per_bank_snp_rsp_valid (per_bank_snp_rsp_valid), + .per_bank_snp_rsp_tag (per_bank_snp_rsp_tag), + .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready), + .snp_rsp_valid (snp_rsp_valid), + .snp_rsp_tag (snp_rsp_tag), + .snp_rsp_ready (snp_rsp_ready) + ); + end else begin + `UNUSED_VAR (per_bank_snp_rsp_valid) + `UNUSED_VAR (per_bank_snp_rsp_tag) + assign per_bank_snp_rsp_ready = 0; + assign snp_rsp_valid = 0; + assign snp_rsp_tag = 0; + `UNUSED_VAR (snp_rsp_ready) + end endmodule diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 1a479b5c..b2dffbb6 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -2,13 +2,13 @@ module VX_cache_core_req_bank_sel #( // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, + parameter BANK_LINE_SIZE = 1, // Size of a word in bytes - parameter WORD_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, - // Number of Word requests per cycle {1, 2, 4, 8, ...} - parameter NUM_REQUESTS = 0 + parameter WORD_SIZE = 1, + // Number of banks + parameter NUM_BANKS = 1, + // Number of Word requests per cycle + parameter NUM_REQUESTS = 1 ) ( input wire [NUM_REQUESTS-1:0] core_req_valid, `IGNORE_WARNINGS_BEGIN diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 7cd20e43..240359b2 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -1,14 +1,14 @@ `include "VX_cache_config.vh" module VX_cache_core_rsp_merge #( - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, + // Number of banks + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 0, - // Number of Word requests per cycle {1, 2, 4, 8, ...} - parameter NUM_REQUESTS = 0, + parameter WORD_SIZE = 1, + // Number of Word requests per cycle + parameter NUM_REQUESTS = 1, // core request tag size - parameter CORE_TAG_WIDTH = 0, + parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 ) ( diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index 3251b4d3..7eac5862 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -2,11 +2,11 @@ module VX_cache_dram_req_arb #( // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, + parameter BANK_LINE_SIZE = 1, + // Number of banks + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 0 + parameter WORD_SIZE = 1 ) ( input wire clk, input wire reset, @@ -28,28 +28,36 @@ module VX_cache_dram_req_arb #( input wire dram_req_ready ); - wire [`BANK_BITS-1:0] sel_bank; wire sel_valid; + wire [`BANK_BITS-1:0] sel_idx; + wire [NUM_BANKS-1:0] sel_1hot; VX_fixed_arbiter #( .N(NUM_BANKS) ) sel_arb ( .clk (clk), .reset (reset), - .requests (per_bank_dram_req_valid), - .grant_index (sel_bank), + .requests (per_bank_dram_req_valid), .grant_valid (sel_valid), - `UNUSED_PIN (grant_onehot) + .grant_index (sel_idx), + .grant_onehot(sel_1hot) ); - assign dram_req_valid = sel_valid; - assign dram_req_rw = per_bank_dram_req_rw[sel_bank]; - assign dram_req_byteen = per_bank_dram_req_byteen[sel_bank]; - assign dram_req_addr = per_bank_dram_req_addr[sel_bank]; - assign dram_req_data = per_bank_dram_req_data[sel_bank]; - + wire stall = ~dram_req_ready && dram_req_valid; + + VX_generic_register #( + .N(1 + 1 + BANK_LINE_SIZE + `DRAM_ADDR_WIDTH + `BANK_LINE_WIDTH) + ) core_wb_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({sel_valid, per_bank_dram_req_rw[sel_idx], per_bank_dram_req_byteen[sel_idx], per_bank_dram_req_addr[sel_idx], per_bank_dram_req_data[sel_idx]}), + .out ({dram_req_valid, dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}) + ); + for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_dram_req_ready[i] = dram_req_ready && (sel_bank == `BANK_BITS'(i)); + assign per_bank_dram_req_ready[i] = sel_1hot[i] && !stall; end endmodule diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 67fb728b..e377e2c8 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -3,21 +3,23 @@ module VX_cache_miss_resrv #( parameter CACHE_ID = 0, parameter BANK_ID = 0, - parameter CORE_TAG_ID_BITS = 0, + // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, + parameter BANK_LINE_SIZE = 1, + // Number of banks + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 0, - // Number of Word requests per cycle {1, 2, 4, 8, ...} - parameter NUM_REQUESTS = 0, + parameter WORD_SIZE = 1, + // Number of Word requests per cycle + parameter NUM_REQUESTS = 1, // Miss Reserv Queue Knob - parameter MRVQ_SIZE = 0, + parameter MRVQ_SIZE = 1, // core request tag size - parameter CORE_TAG_WIDTH = 0, + parameter CORE_TAG_WIDTH = 1, // Snooping request tag width - parameter SNP_REQ_TAG_WIDTH = 0 + parameter SNP_REQ_TAG_WIDTH = 1, + // size of tag id in core request tag + parameter CORE_TAG_ID_BITS = 0 ) ( input wire clk, input wire reset, @@ -177,15 +179,17 @@ module VX_cache_miss_resrv #( `ifdef DBG_PRINT_CACHE_MSRQ always @(posedge clk) begin if (miss_add || miss_resrv_schedule_st0 || miss_resrv_pop_st2) begin - if (miss_add) + if (miss_add) begin if (is_msrq_st2) - $write("%t: cache%0d:%0d msrq-restore addr%0d=%0h ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2); + $display("%t: cache%0d:%0d msrq-restore addr%0d=%0h ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2); else - $write("%t: cache%0d:%0d msrq-push addr%0d=%0h ready=%b wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2, debug_wid_st2, debug_pc_st2); - else if (miss_resrv_schedule_st0) - $write("%t: cache%0d:%0d msrq-schedule wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st0, debug_pc_st0); - else if (miss_resrv_pop_st2) - $write("%t: cache%0d:%0d msrq-pop addr%0d wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st2, debug_pc_st2); + $display("%t: cache%0d:%0d msrq-push addr%0d=%0h ready=%b wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2, debug_wid_st2, debug_pc_st2); + end + if (miss_resrv_schedule_st0) + $display("%t: cache%0d:%0d msrq-schedule addr%0d=%0h wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(miss_resrv_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); + if (miss_resrv_pop_st2) + $display("%t: cache%0d:%0d msrq-pop addr%0d wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st2, debug_pc_st2); + $write("%t: cache%0d:%0d msrq-table", $time, CACHE_ID, BANK_ID); for (integer j = 0; j < MRVQ_SIZE; j++) begin if (valid_table[j]) begin $write(" "); diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index bffc4679..3b9d762c 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -2,11 +2,11 @@ module VX_snp_forwarder #( parameter CACHE_ID = 0, - parameter BANK_LINE_SIZE = 0, - parameter NUM_REQUESTS = 0, - parameter SNRQ_SIZE = 0, - parameter SNP_REQ_TAG_WIDTH = 0, - parameter SNP_FWD_TAG_WIDTH = 0 + parameter BANK_LINE_SIZE = 1, + parameter NUM_REQUESTS = 1, + parameter SNRQ_SIZE = 1, + parameter SNP_REQ_TAG_WIDTH = 1, + parameter SNP_FWD_TAG_WIDTH = 1 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_snp_rsp_arb.v b/hw/rtl/cache/VX_snp_rsp_arb.v index 50c98d0d..35449c4a 100644 --- a/hw/rtl/cache/VX_snp_rsp_arb.v +++ b/hw/rtl/cache/VX_snp_rsp_arb.v @@ -1,9 +1,9 @@ `include "VX_cache_config.vh" module VX_snp_rsp_arb #( - parameter NUM_BANKS = 0, - parameter BANK_LINE_SIZE = 0, - parameter SNP_REQ_TAG_WIDTH = 0 + parameter NUM_BANKS = 1, + parameter BANK_LINE_SIZE = 1, + parameter SNP_REQ_TAG_WIDTH = 1 ) ( input wire clk, input wire reset, @@ -17,25 +17,36 @@ module VX_snp_rsp_arb #( input wire snp_rsp_ready ); - wire [`BANK_BITS-1:0] sel_bank; - wire sel_valid; + wire sel_valid; + wire [`BANK_BITS-1:0] sel_idx; + wire [NUM_BANKS-1:0] sel_1hot; VX_fixed_arbiter #( .N(NUM_BANKS) ) sel_arb ( .clk (clk), .reset (reset), - .requests (per_bank_snp_rsp_valid), - .grant_index (sel_bank), + .requests (per_bank_snp_rsp_valid), .grant_valid (sel_valid), - `UNUSED_PIN (grant_onehot) + .grant_index (sel_idx), + .grant_onehot(sel_1hot) ); - assign snp_rsp_valid = sel_valid; - assign snp_rsp_tag = per_bank_snp_rsp_tag[sel_bank]; + wire stall = ~snp_rsp_ready && snp_rsp_valid; + + VX_generic_register #( + .N(1 + SNP_REQ_TAG_WIDTH) + ) core_wb_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({sel_valid, per_bank_snp_rsp_tag[sel_idx]}), + .out ({snp_rsp_valid, snp_rsp_tag}) + ); for (genvar i = 0; i < NUM_BANKS; i++) begin - assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (sel_bank == `BANK_BITS'(i)); + assign per_bank_snp_rsp_ready[i] = sel_1hot[i] && !stall; end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index c9bdc81e..e0f8cdcc 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -3,25 +3,30 @@ module VX_tag_data_access #( parameter CACHE_ID = 0, parameter BANK_ID = 0, - parameter CORE_TAG_ID_BITS = 0, + // Size of cache in bytes - parameter CACHE_SIZE = 0, + parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, + parameter BANK_LINE_SIZE = 1, + // Number of banks + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 0, + parameter WORD_SIZE = 1, - // Enable cache writeable - parameter WRITE_ENABLE = 0, + // Enable cache writeable + parameter WRITE_ENABLE = 0, - // Enable dram update - parameter DRAM_ENABLE = 0 + // Enable dram update + parameter DRAM_ENABLE = 0, + + // size of tag id in core request tag + parameter CORE_TAG_ID_BITS = 0 ) ( input wire clk, input wire reset, + input wire stall, + `ifdef DBG_CORE_REQ_INFO `IGNORE_WARNINGS_BEGIN input wire[31:0] debug_pc_st1, @@ -70,9 +75,7 @@ module VX_tag_data_access #( wire[`BANK_LINE_WIDTH-1:0] use_read_data_st1; wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_write_enable; wire[`BANK_LINE_WIDTH-1:0] use_write_data; - - wire use_invalidate; - wire tags_match; + wire use_invalidate; wire[`TAG_SELECT_BITS-1:0] addrtag_st1 = addr_st1[`TAG_LINE_ADDR_RNG]; wire[`LINE_SELECT_BITS-1:0] addrline_st1 = addr_st1[`LINE_SELECT_BITS-1:0]; @@ -118,6 +121,9 @@ module VX_tag_data_access #( end end + // use "case equality" to handle uninitialized tag when block entry is not valid + wire tags_match = use_read_valid_st1 && (addrtag_st1 === use_read_tag_st1); + wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] write_enable; wire [`BANK_LINE_WIDTH-1:0] data_write; @@ -129,7 +135,8 @@ module VX_tag_data_access #( && mem_rw_st1 && use_read_valid_st1; - wire fill_write = valid_req_st1 && writefill_st1 && !force_miss_st1; + wire fill_write = valid_req_st1 && writefill_st1 + && !tags_match; // disable redundant fills because the block could be dirty for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin wire normal_write_w = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i))) @@ -142,14 +149,12 @@ module VX_tag_data_access #( assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = writefill_st1 ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1; end - // use "case equality" to handle uninitialized tag when block entry is not valid - assign tags_match = (addrtag_st1 === use_read_tag_st1); - assign use_write_enable = write_enable; assign use_write_data = data_write; - assign use_invalidate = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match + assign use_invalidate = valid_req_st1 && is_snp_st1 && tags_match && (use_read_dirty_st1 || snp_invalidate_st1) // block is dirty or need to force invalidation - && !force_miss_st1; + && !force_miss_st1 + && !stall; // do not invalidate the cache on stalls wire core_req_miss = valid_req_st1 && !is_snp_st1 && !writefill_st1 // is core request && (!use_read_valid_st1 || !tags_match); // block missing or has wrong tag @@ -158,29 +163,24 @@ module VX_tag_data_access #( assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1; assign dirtyb_st1 = use_read_dirtyb_st1; assign readdata_st1 = use_read_data_st1; - assign readtag_st1 = use_read_tag_st1; - - always @(*) begin - if (valid_req_st1 && writefill_st1) begin - if (!(!use_read_valid_st1 || !tags_match)) begin - $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); - end - end - end + assign readtag_st1 = use_read_tag_st1; `ifdef DBG_PRINT_CACHE_DATA - always @(posedge clk) begin + always @(posedge clk) begin if (valid_req_st1) begin + if (writefill_st1 && use_read_valid_st1 && tags_match) begin + $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + end if (miss_st1) begin $display("%t: cache%0d:%0d data-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, tagmatch=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, use_read_dirty_st1, tags_match, addrline_st1, addrtag_st1); end else if ((| use_write_enable)) begin if (writefill_st1) begin - $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), dirty_st1, addrline_st1, addrtag_st1, use_write_data); + $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), dirtyb_st1, addrline_st1, addrtag_st1, use_write_data); end else begin - $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirty_st1, addrline_st1, addrtag_st1, wordsel_st1, writeword_st1); + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirtyb_st1, addrline_st1, addrtag_st1, wordsel_st1, writeword_st1); end end else begin - $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirty_st1, addrline_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirtyb_st1, addrline_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); end end end diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index e351bf20..07f5e423 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -2,13 +2,13 @@ module VX_tag_data_store #( // Size of cache in bytes - parameter CACHE_SIZE = 0, + parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter BANK_LINE_SIZE = 0, - // Number of banks {1, 2, 4, 8,...} - parameter NUM_BANKS = 0, //unused parameter? + parameter BANK_LINE_SIZE = 1, + // Number of banks + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 0 + parameter WORD_SIZE = 1 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index 01a0a167..6c39b0af 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -73,7 +73,7 @@ module VX_dp_ram #( end else begin - `UNUSED_VAR(rden) + `UNUSED_VAR (rden) if (RWCHECK) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 3c5c9a78..ce6c735e 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -48,13 +48,45 @@ module VX_generic_queue #( assign size = size_r; end else begin + + reg empty_r; + reg full_r; + reg [ADDRW-1:0] used_r; + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + used_r <= 0; + end else begin + if (push) begin + assert(!full); + if (!pop) begin + empty_r <= 0; + if (used_r == ADDRW'(SIZE-1)) begin + full_r <= 1; + end + used_r <= used_r + ADDRW'(1); + end + end + if (pop) begin + assert(!empty); + if (!push) begin + full_r <= 0; + if (used_r == ADDRW'(1)) begin + empty_r <= 1; + end; + used_r <= used_r - ADDRW'(1); + end + end + end + end if (0 == BUFFERED) begin reg [ADDRW:0] rd_ptr_r; reg [ADDRW:0] wr_ptr_r; - reg [ADDRW-1:0] used_r; - + wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0]; wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0]; @@ -62,21 +94,12 @@ module VX_generic_queue #( if (reset) begin rd_ptr_r <= 0; wr_ptr_r <= 0; - used_r <= 0; end else begin - if (push) begin - assert(!full); + if (push) begin wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1); - if (!pop) begin - used_r <= used_r + ADDRW'(1); - end end if (pop) begin - assert(!empty); rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1); - if (!push) begin - used_r <= used_r - ADDRW'(1); - end end end end @@ -95,22 +118,14 @@ module VX_generic_queue #( .din(data_in), .dout(data_out) ); - - assign empty = (wr_ptr_r == rd_ptr_r); - assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]); - assign size = {full, used_r}; end else begin wire [DATAW-1:0] dout; - reg [DATAW-1:0] din_r; reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] rd_ptr_n_r; - reg [ADDRW-1:0] used_r; - reg empty_r; - reg full_r; reg bypass_r; always @(posedge clk) begin @@ -118,39 +133,17 @@ module VX_generic_queue #( wr_ptr_r <= 0; rd_ptr_r <= 0; rd_ptr_n_r <= 1; - empty_r <= 1; - full_r <= 0; - used_r <= 0; end else begin if (push) begin - wr_ptr_r <= wr_ptr_r + ADDRW'(1); - - if (!pop) begin - empty_r <= 0; - if (used_r == ADDRW'(SIZE-1)) begin - full_r <= 1; - end - used_r <= used_r + ADDRW'(1); - end + wr_ptr_r <= wr_ptr_r + ADDRW'(1); end - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - + rd_ptr_r <= rd_ptr_n_r; if (SIZE > 2) begin rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); end else begin // (SIZE == 2); rd_ptr_n_r <= ~rd_ptr_n_r; end - - if (!push) begin - full_r <= 0; - if (used_r == ADDRW'(1)) begin - assert(rd_ptr_n_r == wr_ptr_r); - empty_r <= 1; - end; - used_r <= used_r - ADDRW'(1); - end end end end @@ -179,10 +172,11 @@ module VX_generic_queue #( ); assign data_out = bypass_r ? din_r : dout; - assign empty = empty_r; - assign full = full_r; - assign size = {full_r, used_r}; end + + assign empty = empty_r; + assign full = full_r; + assign size = {full_r, used_r}; end endmodule