From 06945533cfc04cb90090debada44c1661f799037 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 9 Jan 2021 16:32:55 -0800 Subject: [PATCH] fixed l2/l3 caches related bugs --- hw/rtl/VX_define.vh | 6 +- hw/rtl/VX_lsu_unit.v | 2 +- hw/rtl/VX_mem_unit.v | 6 +- hw/rtl/VX_scoreboard.v | 2 +- hw/rtl/afu/VX_avs_wrapper.v | 39 ++- hw/rtl/cache/VX_bank.v | 231 ++++++++---------- hw/rtl/cache/VX_cache.v | 31 ++- hw/rtl/cache/VX_data_access.v | 50 +++- hw/rtl/cache/VX_data_store.v | 21 +- .../{VX_input_queue.v => VX_fifo_queue_xt.v} | 93 ++++--- hw/rtl/cache/VX_miss_resrv.v | 87 ++++--- hw/rtl/libs/VX_fifo_queue.v | 25 +- hw/rtl/libs/VX_pending_size.v | 35 +++ 13 files changed, 354 insertions(+), 274 deletions(-) rename hw/rtl/cache/{VX_input_queue.v => VX_fifo_queue_xt.v} (74%) create mode 100644 hw/rtl/libs/VX_pending_size.v diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 31285edc..f033083b 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -302,7 +302,7 @@ `define DDRAM_BYTEEN_WIDTH `DCACHE_LINE_SIZE // DRAM request tag bits -`define DDRAM_TAG_WIDTH `LOG2UP(`DNUM_BANKS) +`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH // Core request size `define DNUM_REQUESTS `NUM_THREADS @@ -351,7 +351,7 @@ `define L2DRAM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE // DRAM request tag bits -`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `LOG2UP(`L2NUM_BANKS) : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES))) +`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES))) ////////////////////////// L3cache Configurable Knobs ///////////////////////// @@ -377,7 +377,7 @@ `define L3DRAM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE // DRAM request tag bits -`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3NUM_BANKS) : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS))) +`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS))) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 9aa3fb80..28febc06 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -239,7 +239,7 @@ module VX_lsu_unit #( `ifdef DBG_PRINT_CORE_DCACHE always @(posedge clk) begin - if ((| dcache_req_if.valid) && (|dcache_req_if.ready)) begin + if (| (dcache_req_if.valid & dcache_req_if.ready)) begin if (dcache_req_if.rw[0]) $display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h", $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index a04e4fcf..438a0aff 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -269,9 +269,9 @@ module VX_mem_unit # ( .dram_req_ready (1'b0), // DRAM response - .dram_rsp_valid (1'b0), - .dram_rsp_data ((`SCACHE_LINE_SIZE*8)'(0)), - .dram_rsp_tag (`LOG2UP(`SNUM_BANKS)'(0)), + .dram_rsp_valid (0), + .dram_rsp_data (0), + .dram_rsp_tag (0), `UNUSED_PIN (dram_rsp_ready) ); diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index fd4efee0..1f6b5754 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -61,7 +61,7 @@ module VX_scoreboard #( stall_ctr <= 0; end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin stall_ctr <= stall_ctr + 1; - assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b", + assert(stall_ctr < 10000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b", $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, deq_real_inuse_regs[ibuf_deq_if.rd], deq_real_inuse_regs[ibuf_deq_if.rs1], deq_real_inuse_regs[ibuf_deq_if.rs2], deq_real_inuse_regs[ibuf_deq_if.rs3]); end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index e853c3af..e4167ad7 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -52,25 +52,20 @@ module VX_avs_wrapper #( wire avs_rspq_pop = avs_reqq_pop; wire avs_rspq_empty; - reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads; - wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n; - - assign avs_pending_reads_n = avs_pending_reads - + RD_QUEUE_ADDRW'((avs_reqq_push && !avs_rspq_pop) ? 1 : - (avs_rspq_pop && !avs_reqq_push) ? -1 : 0); - - reg rsp_queue_ready; + wire rsp_queue_going_full; + VX_pending_size #( + .SIZE (RD_QUEUE_SIZE) + ) pending_size ( + .clk (clk), + .reset (reset), + .push (avs_reqq_push), + .pop (avs_rspq_pop), + .full (rsp_queue_going_full) + ); always @(posedge clk) begin - if (reset) begin - avs_burstcount_r <= 1; - avs_bankselect_r <= 0; - avs_pending_reads <= 0; - rsp_queue_ready <= 1; - end else begin - avs_pending_reads <= avs_pending_reads_n; - rsp_queue_ready <= (avs_pending_reads_n != RD_QUEUE_SIZE); - end + avs_burstcount_r <= 1; + avs_bankselect_r <= 0; end VX_fifo_queue #( @@ -107,12 +102,12 @@ module VX_avs_wrapper #( `UNUSED_PIN (size) ); - assign avs_read = dram_req_valid && !dram_req_rw && rsp_queue_ready; - assign avs_write = dram_req_valid && dram_req_rw && rsp_queue_ready; + assign avs_read = dram_req_valid && !dram_req_rw && !rsp_queue_going_full; + assign avs_write = dram_req_valid && dram_req_rw && !rsp_queue_going_full; assign avs_address = dram_req_addr; assign avs_byteenable = dram_req_byteen; assign avs_writedata = dram_req_data; - assign dram_req_ready = !avs_waitrequest && rsp_queue_ready; + assign dram_req_ready = !avs_waitrequest && !rsp_queue_going_full; assign avs_burstcount = avs_burstcount_r; assign avs_bankselect = avs_bankselect_r; @@ -124,10 +119,10 @@ module VX_avs_wrapper #( if (dram_req_rw) $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data); else - $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, avs_pending_reads_n); + $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag); end if (dram_rsp_valid && dram_rsp_ready) begin - $display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, avs_pending_reads_n); + $display("%t: AVS Rd Rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data); end end `endif diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 2f8cac57..9cc8f36c 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -84,12 +84,11 @@ module VX_bank #( // DRAM response input wire dram_rsp_valid, + input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr, input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data, output wire dram_rsp_ready ); - localparam MSHR_SIZE_BITS = $clog2(MSHR_SIZE+1); - `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0, debug_pc_st1, debug_pc_st01; @@ -98,9 +97,9 @@ module VX_bank #( `endif wire drsq_pop; - wire drsq_empty, drsp_empty_next; - - wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata; + wire drsq_empty, drsq_empty_next; + wire [`LINE_ADDR_WIDTH-1:0] drsq_addr_next; + wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata_next; wire drsq_push = dram_rsp_valid && dram_rsp_ready; @@ -108,8 +107,8 @@ module VX_bank #( wire drsq_full; assign dram_rsp_ready = !drsq_full; - VX_input_queue #( - .DATAW ($bits(dram_rsp_data)), + VX_fifo_queue_xt #( + .DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), .SIZE (DRSQ_SIZE), .FASTRAM (1) ) dram_rsp_queue ( @@ -117,29 +116,31 @@ module VX_bank #( .reset (reset), .push (drsq_push), .pop (drsq_pop), - .data_in (dram_rsp_data), - .data_out(drsq_filldata), + .data_in ({dram_rsp_addr, dram_rsp_data}), + `UNUSED_PIN (data_out), .empty (drsq_empty), - `UNUSED_PIN (data_out_next), - .empty_next(drsp_empty_next), + .data_out_next ({drsq_addr_next, drsq_filldata_next}), + .empty_next (drsq_empty_next), .full (drsq_full), + `UNUSED_PIN (almost_full), `UNUSED_PIN (size) ); end else begin `UNUSED_VAR (dram_rsp_valid) + `UNUSED_VAR (dram_rsp_addr) `UNUSED_VAR (dram_rsp_data) - assign drsq_empty = 1; - assign drsp_empty_next = 1; - assign drsq_filldata = 0; - assign dram_rsp_ready = 0; + assign drsq_empty = 1; + assign drsq_empty_next = 1; + assign drsq_addr_next = 0; + assign drsq_filldata_next = 0; + assign dram_rsp_ready = 0; end wire creq_pop; - wire creq_full; - wire creq_empty; - wire [`REQS_BITS-1:0] creq_tid_next; + wire creq_full, creq_empty; wire creq_rw_next; wire [WORD_SIZE-1:0] creq_byteen_next; + wire [`REQS_BITS-1:0] creq_tid_next; `IGNORE_WARNINGS_BEGIN wire [`WORD_ADDR_WIDTH-1:0] creq_addr_next_unqual; `IGNORE_WARNINGS_END @@ -163,7 +164,7 @@ module VX_bank #( assign creq_wsel_next = 0; end - VX_input_queue #( + VX_fifo_queue_xt #( .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), .SIZE (CREQ_SIZE), .FASTRAM (1) @@ -173,14 +174,18 @@ module VX_bank #( .push (creq_push), .pop (creq_pop), .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}), - .data_out_next({creq_tag_next, creq_tid_next, creq_rw_next, creq_byteen_next, creq_addr_next_unqual, creq_writeword_next}), - `UNUSED_PIN (empty_next), `UNUSED_PIN (data_out), .empty (creq_empty), + .data_out_next({creq_tag_next, creq_tid_next, creq_rw_next, creq_byteen_next, creq_addr_next_unqual, creq_writeword_next}), + `UNUSED_PIN (empty_next), .full (creq_full), + `UNUSED_PIN (almost_full), `UNUSED_PIN (size) ); + wire mshr_pop; + wire mshr_almost_full; + wire mshr_pending_unqual_st0; wire mshr_valid; wire mshr_valid_next; wire [`REQS_BITS-1:0] mshr_tid_next; @@ -190,48 +195,26 @@ module VX_bank #( wire [`REQ_TAG_WIDTH-1:0] mshr_tag_next; wire mshr_rw_next; wire [WORD_SIZE-1:0] mshr_byteen_next; - - reg [`LINE_ADDR_WIDTH-1:0] creq_addr; - reg [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; - reg [`REQ_TAG_WIDTH-1:0] creq_tag; - reg creq_mem_rw; - reg [WORD_SIZE-1:0] creq_byteen; - reg [`WORD_WIDTH-1:0] creq_writeword; - reg [`REQS_BITS-1:0] creq_tid; - always @(posedge clk) begin - creq_addr <= (mshr_valid_next || !drsp_empty_next) ? mshr_addr_next : creq_addr_next; - creq_wsel <= mshr_valid_next ? mshr_wsel_next : creq_wsel_next; - creq_mem_rw <= mshr_valid_next ? mshr_rw_next : creq_rw_next; - creq_byteen <= mshr_valid_next ? mshr_byteen_next : creq_byteen_next; - creq_writeword <= mshr_valid_next ? mshr_writeword_next : creq_writeword_next; - creq_tid <= mshr_valid_next ? mshr_tid_next : creq_tid_next; - creq_tag <= mshr_valid_next ? `REQ_TAG_WIDTH'(mshr_tag_next) : `REQ_TAG_WIDTH'(creq_tag_next); - end + wire dreq_almost_full; - wire mshr_pop; - reg [MSHR_SIZE_BITS-1:0] mshr_pending_size; - wire [MSHR_SIZE_BITS-1:0] mshr_pending_size_n; - reg mshr_going_full; - wire mshr_pending_hazard_unqual_st0; - - wire valid_st0, valid_st1; - wire is_fill_st0, is_fill_st1; - wire is_mshr_st0, is_mshr_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1; wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; - wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1; + wire mem_rw_st0, mem_rw_st1; + wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; wire [`WORD_WIDTH-1:0] writeword_st0, writeword_st1; wire [`CACHE_LINE_WIDTH-1:0] filldata_st0, filldata_st1; + wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1; + wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1; + wire valid_st0, valid_st1; + wire is_fill_st0, is_fill_st1; + wire is_mshr_st0, is_mshr_st1; + wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1; wire [`TAG_SELECT_BITS-1:0] readtag_st0, readtag_st1; wire miss_st0, miss_st1; wire force_miss_st0, force_miss_st1; wire dirty_st0; wire [CACHE_LINE_SIZE-1:0] dirtyb_st0, dirtyb_st1; - wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1; - wire mem_rw_st0, mem_rw_st1; - wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; - wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1; wire do_writeback_st0, do_writeback_st1; wire writeen_unqual_st0, writeen_unqual_st1; wire mshr_push_unqual_st0, mshr_push_unqual_st1; @@ -254,44 +237,39 @@ module VX_bank #( wire is_mshr_miss_st1 = valid_st1 && is_mshr_st1 && (miss_st1 || force_miss_st1); - wire creq_commit = valid_st1 && !is_fill_st1 - && (core_req_hit_st1 || (WRITE_THROUGH && mem_rw_st1)) - && !pipeline_stall; - // determine which queue to pop next in piority order wire mshr_pop_unqual = mshr_valid; wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty; - wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full; + wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_almost_full && !dreq_almost_full; assign mshr_pop = mshr_pop_unqual && !pipeline_stall && !is_mshr_miss_st1; // stop if previous request was a miss assign drsq_pop = drsq_pop_unqual && !pipeline_stall; assign creq_pop = creq_pop_unqual && !pipeline_stall; - // MSHR pending size - assign mshr_pending_size_n = mshr_pending_size + - ((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0)); - always @(posedge clk) begin - if (reset) begin - mshr_pending_size <= 0; - mshr_going_full <= 0; - end else begin - mshr_pending_size <= mshr_pending_size_n; - mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE_BITS'(MSHR_SIZE)); - end - end + assign valid_st0 = mshr_pop || drsq_pop || creq_pop; + assign is_mshr_st0 = mshr_pop_unqual; + assign is_fill_st0 = drsq_pop_unqual; - assign valid_st0 = mshr_pop || drsq_pop || creq_pop; - assign is_mshr_st0 = mshr_pop_unqual; - assign is_fill_st0 = drsq_pop_unqual; - assign addr_st0 = creq_addr; - assign wsel_st0 = creq_wsel; - assign mem_rw_st0 = creq_mem_rw; - assign byteen_st0 = creq_byteen; - assign writeword_st0 = creq_writeword; - assign req_tid_st0 = creq_tid; - assign tag_st0 = creq_tag; - assign filldata_st0 = drsq_filldata; + VX_pipe_register #( + .DATAW (`LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + 1 + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH + `CACHE_LINE_WIDTH), + .RESETW (0) + ) pipe_reg0 ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({ + mshr_valid_next ? mshr_addr_next : (!drsq_empty_next ? drsq_addr_next : creq_addr_next), + mshr_valid_next ? mshr_wsel_next : creq_wsel_next, + mshr_valid_next ? mshr_rw_next : creq_rw_next, + mshr_valid_next ? mshr_byteen_next : creq_byteen_next, + mshr_valid_next ? mshr_writeword_next : creq_writeword_next, + mshr_valid_next ? mshr_tid_next : creq_tid_next, + mshr_valid_next ? `REQ_TAG_WIDTH'(mshr_tag_next) : `REQ_TAG_WIDTH'(creq_tag_next), + drsq_filldata_next + }), + .data_out ({addr_st0, wsel_st0, mem_rw_st0, byteen_st0, writeword_st0, req_tid_st0, tag_st0, filldata_st0}) + ); `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -347,12 +325,12 @@ if (DRAM_ENABLE) begin // redundant fills wire is_redundant_fill = is_fill_st0 && !miss_st0; - // we have a miss in mshr for the current address - wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 - || (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1)); + // we have a miss in mshr or going to it for the current address + wire mshr_pending_st0 = mshr_pending_unqual_st0 + || (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1)); // force miss to ensure commit order when a new request has pending previous requests to same block - assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_hazard_st0; + assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_st0; assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0) || (is_fill_st0 && !is_redundant_fill); @@ -369,7 +347,7 @@ if (DRAM_ENABLE) begin end else begin - `UNUSED_VAR (mshr_pending_hazard_unqual_st0) + `UNUSED_VAR (mshr_pending_unqual_st0) `UNUSED_VAR (drsq_push) `UNUSED_VAR (dirty_st0) `UNUSED_VAR (writeen_st1) @@ -400,7 +378,7 @@ end VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) - ) pipe_reg ( + ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (!pipeline_stall), @@ -482,7 +460,7 @@ end && !crsq_push_stall && !dreq_push_stall; - wire incoming_fill_st1 = (!drsq_empty && (addr_st1 == addr_st0)); + wire incoming_fill_st1 = valid_st0 && is_fill_st0 && (addr_st1 == addr_st0); if (DRAM_ENABLE) begin @@ -501,6 +479,7 @@ end .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .MSHR_SIZE (MSHR_SIZE), + .ALM_FULL (MSHR_SIZE-1), .CORE_TAG_WIDTH (CORE_TAG_WIDTH) ) miss_resrv ( .clk (clk), @@ -518,21 +497,22 @@ end .enqueue_addr (addr_st1), .enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}), .enqueue_is_mshr (is_mshr_st1), - .enqueue_ready (mshr_init_ready_state_st1), + .enqueue_as_ready (mshr_init_ready_state_st1), + .enqueue_almfull (mshr_almost_full), // lookup .lookup_ready (drsq_pop), .lookup_addr (addr_st0), - .lookup_match (mshr_pending_hazard_unqual_st0), + .lookup_match (mshr_pending_unqual_st0), // schedule .schedule (mshr_pop), .schedule_valid (mshr_valid), - .schedule_valid_next(mshr_valid_next), - .schedule_addr_next (mshr_addr_next), - .schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}), `UNUSED_PIN (schedule_addr), `UNUSED_PIN (schedule_data), + .schedule_valid_next(mshr_valid_next), + .schedule_addr_next (mshr_addr_next), + .schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}), // dequeue .dequeue (mshr_dequeue_st1) @@ -545,15 +525,16 @@ end `UNUSED_VAR (mem_rw_st1) `UNUSED_VAR (byteen_st1) `UNUSED_VAR (incoming_fill_st1) - assign mshr_pending_hazard_unqual_st0 = 0; - assign mshr_valid = 0; - assign mshr_valid_next = 0; - assign mshr_addr_next = 0; - assign mshr_wsel_next = 0; + assign mshr_almost_full = 0; + assign mshr_pending_unqual_st0 = 0; + assign mshr_valid = 0; + assign mshr_valid_next = 0; + assign mshr_addr_next = 0; + assign mshr_wsel_next = 0; assign mshr_writeword_next = 0; - assign mshr_tid_next = 0; - assign mshr_tag_next = 0; - assign mshr_rw_next = 0; + assign mshr_tid_next = 0; + assign mshr_tag_next = 0; + assign mshr_rw_next = 0; assign mshr_byteen_next = 0; end @@ -607,13 +588,13 @@ end // Enqueue DRAM request - wire dreq_empty, dreq_full; + wire dreq_empty; wire dreq_push_unqual = valid_st1 && dreq_push_st1; - assign dreq_push_stall = dreq_push_unqual && dreq_full; + assign dreq_push_stall = 0; - wire dreq_push = dreq_push_unqual - && !dreq_full + wire dreq_push = dreq_push_unqual + && (do_writeback_st1 || !incoming_fill_st1) && !mshr_push_stall && !crsq_push_stall; @@ -645,16 +626,11 @@ end assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}}; - if (DRAM_ENABLE) begin - always @(posedge clk) begin - assert (!(dreq_push && !do_writeback_st1 && incoming_fill_st1)) - else $error("%t: incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); - end - - VX_fifo_queue #( + if (DRAM_ENABLE) begin + VX_fifo_queue_xt #( .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (DREQ_SIZE), - .BUFFERED (1), + .ALM_FULL (DREQ_SIZE-1), .FASTRAM (1) ) dram_req_queue ( .clk (clk), @@ -664,7 +640,10 @@ end .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty), - .full (dreq_full), + .almost_full (dreq_almost_full), + `UNUSED_PIN (full), + `UNUSED_PIN (data_out_next), + `UNUSED_PIN (empty_next), `UNUSED_PIN (size) ); end else begin @@ -678,9 +657,9 @@ end `UNUSED_VAR (readdata_st1) `UNUSED_VAR (writeback) `UNUSED_VAR (dram_req_ready) - assign dreq_empty = 1; - assign dreq_full = 0; - assign dram_req_rw = 0; + assign dreq_empty = 1; + assign dreq_almost_full = 0; + assign dram_req_rw = 0; assign dram_req_byteen = 0; assign dram_req_addr = 0; assign dram_req_data = 0; @@ -689,9 +668,7 @@ end assign dram_req_valid = !dreq_empty; // bank pipeline stall - assign pipeline_stall = mshr_push_stall - || crsq_push_stall - || dreq_push_stall; + assign pipeline_stall = crsq_push_stall; `SCOPE_ASSIGN (valid_st0, valid_st0); `SCOPE_ASSIGN (valid_st1, valid_st1); @@ -708,23 +685,27 @@ end `ifdef PERF_ENABLE assign perf_read_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && !mem_rw_st1; assign perf_write_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && mem_rw_st1; - assign perf_mshr_stalls = mshr_going_full; - assign perf_pipe_stalls = pipeline_stall || mshr_going_full; + assign perf_pipe_stalls = pipeline_stall || mshr_almost_full || dreq_going_full; + assign perf_mshr_stalls = mshr_almost_full; `endif `ifdef DBG_PRINT_CACHE_BANK always @(posedge clk) begin + if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin + $display("%t: miss with incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + assert(!is_mshr_st1); + end if (pipeline_stall) begin $display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall); end if (drsq_pop) begin - $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata); + $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), filldata_st0); end if (creq_pop || mshr_pop) begin - if (creq_mem_rw) - $display("%t: cache%0d:%0d core-wr-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, creq_tag, creq_tid, creq_byteen, creq_writeword, debug_wid_st0, debug_pc_st0); + if (mem_rw_st0) + $display("%t: cache%0d:%0d core-wr-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, tag_st0, req_tid_st0, byteen_st0, writeword_st0, debug_wid_st0, debug_pc_st0); else - $display("%t: cache%0d:%0d core-rd-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, creq_tag, creq_tid, creq_byteen, debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d core-rd-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, tag_st0, req_tid_st0, byteen_st0, debug_wid_st0, debug_pc_st0); end if (crsq_push) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag_st1, crsq_tid_st1, crsq_data_st1, debug_wid_st1, debug_pc_st1); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index c207186b..c67e3abd 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -42,7 +42,7 @@ module VX_cache #( parameter CORE_TAG_ID_BITS = CORE_TAG_WIDTH, // dram request tag size - parameter DRAM_TAG_WIDTH = `LOG2UP(NUM_BANKS), + parameter DRAM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)), // bank offset from beginning of index range parameter BANK_ADDR_OFFSET = 0 @@ -89,6 +89,7 @@ module VX_cache #( ); `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) + `UNUSED_VAR (dram_rsp_tag) wire [NUM_BANKS-1:0] per_bank_core_req_valid; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; @@ -130,10 +131,10 @@ module VX_cache #( .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET), .BUFFERED ((NUM_BANKS > 1) && DRAM_ENABLE) ) cache_core_req_bank_sel ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), `ifdef PERF_ENABLE - .bank_stalls (perf_cache_if.bank_stalls), + .bank_stalls(perf_cache_if.bank_stalls), `else `UNUSED_PIN (bank_stalls), `endif @@ -154,11 +155,12 @@ module VX_cache #( .per_bank_core_req_ready (per_bank_core_req_ready) ); + assign dram_req_tag = dram_req_addr; if (NUM_BANKS == 1) begin `UNUSED_VAR (dram_rsp_tag) assign dram_rsp_ready = per_bank_dram_rsp_ready; end else begin - assign dram_rsp_ready = per_bank_dram_rsp_ready[dram_rsp_tag]; + assign dram_rsp_ready = per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag)]; end for (genvar i = 0; i < NUM_BANKS; i++) begin @@ -184,7 +186,8 @@ module VX_cache #( wire[`CACHE_LINE_WIDTH-1:0] curr_bank_dram_req_data; wire curr_bank_dram_req_ready; - wire curr_bank_dram_rsp_valid; + wire curr_bank_dram_rsp_valid; + wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; wire [`CACHE_LINE_WIDTH-1:0] curr_bank_dram_rsp_data; wire curr_bank_dram_rsp_ready; @@ -220,8 +223,10 @@ module VX_cache #( // DRAM response if (NUM_BANKS == 1) begin assign curr_bank_dram_rsp_valid = dram_rsp_valid; + assign curr_bank_dram_rsp_addr = dram_rsp_tag; end else begin - assign curr_bank_dram_rsp_valid = dram_rsp_valid && (dram_rsp_tag == i); + assign curr_bank_dram_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i); + assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag); end assign curr_bank_dram_rsp_data = dram_rsp_data; assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; @@ -284,7 +289,8 @@ module VX_cache #( .dram_req_ready (curr_bank_dram_req_ready), // DRAM response - .dram_rsp_valid (curr_bank_dram_rsp_valid), + .dram_rsp_valid (curr_bank_dram_rsp_valid), + .dram_rsp_addr (curr_bank_dram_rsp_addr), .dram_rsp_data (curr_bank_dram_rsp_data), .dram_rsp_ready (curr_bank_dram_rsp_ready) ); @@ -311,14 +317,14 @@ module VX_cache #( ); if (DRAM_ENABLE) begin - wire [NUM_BANKS-1:0][(DRAM_TAG_WIDTH + `DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; + wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; i++) begin - assign data_in[i] = {DRAM_TAG_WIDTH'(i), per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]}; + assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]}; end VX_stream_arbiter #( .NUM_REQS (NUM_BANKS), - .DATAW (DRAM_TAG_WIDTH + `DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), + .DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH), .BUFFERED (1) ) dram_req_arb ( .clk (clk), @@ -327,7 +333,7 @@ module VX_cache #( .data_in (data_in), .ready_in (per_bank_dram_req_ready), .valid_out (dram_req_valid), - .data_out ({dram_req_tag, dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}), + .data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}), .ready_out (dram_req_ready) ); end else begin @@ -342,7 +348,6 @@ module VX_cache #( assign dram_req_byteen = 0; assign dram_req_addr = 0; assign dram_req_data = 0; - assign dram_req_tag = 0; `UNUSED_VAR (dram_req_ready) end diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 1135495f..e7d6332f 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -77,7 +77,8 @@ module VX_data_access #( .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .WRITE_ENABLE (WRITE_ENABLE) + .WRITE_ENABLE (WRITE_ENABLE), + .WRITE_THROUGH (WRITE_THROUGH) ) data_store ( .clk (clk), .reset (reset), @@ -97,26 +98,51 @@ module VX_data_access #( wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] writedata_qual; if (`WORD_SELECT_BITS != 0) begin - for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin - assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; - assign writedata_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? writeword_in : readdata_in[i * `WORD_WIDTH +: `WORD_WIDTH]; + for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin + wire [`WORD_WIDTH-1:0] readdata_sel = readdata_in[i * `WORD_WIDTH +: `WORD_WIDTH]; + wire [`WORD_WIDTH-1:0] writeword_qual; + for (genvar j = 0; j < WORD_SIZE; j++) begin + assign writeword_qual[j * 8 +: 8] = wbyteen_in[j] ? writeword_in[j * 8 +: 8] : readdata_sel[j * 8 +: 8]; + end + wire wenable = (wwsel_in == `WORD_SELECT_BITS'(i)); + assign wbyteen_qual[i] = wenable ? wbyteen_in : {WORD_SIZE{1'b0}}; + assign writedata_qual[i] = wenable ? writeword_qual : readdata_sel; end end else begin `UNUSED_VAR (wwsel_in) - `UNUSED_VAR (readdata_in) + wire [`WORD_WIDTH-1:0] writeword_qual; + for (genvar i = 0; i < WORD_SIZE; i++) begin + assign writeword_qual[i * 8 +: 8] = wbyteen_in[i] ? writeword_in[i * 8 +: 8] : readdata_in[i * 8 +: 8]; + end assign wbyteen_qual = wbyteen_in; - assign writedata_qual = writeword_in; + assign writedata_qual = writeword_qual; end - assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual; - assign write_data = wfill_in ? filldata_in : writedata_qual; - assign write_enable = writeen_in && !stall; + assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual; + assign write_data = wfill_in ? filldata_in : writedata_qual; wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in; - for (genvar i = 0; i < CACHE_LINE_SIZE; i++) begin - assign dirtyb_out[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i]; - assign readdata_out[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8]; + + if (`WORD_SELECT_BITS != 0) begin + for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin + wire [`WORD_WIDTH-1:0] readdata_sel = read_data[i * `WORD_WIDTH +: `WORD_WIDTH]; + wire [`WORD_WIDTH-1:0] writeword_qual; + for (genvar j = 0; j < WORD_SIZE; j++) begin + assign writeword_qual[j * 8 +: 8] = wbyteen_in[j] ? writeword_in[j * 8 +: 8] : readdata_sel[j * 8 +: 8]; + end + wire wenable = (wwsel_in == `WORD_SELECT_BITS'(i)); + assign dirtyb_out[i * WORD_SIZE +: WORD_SIZE] = read_dirtyb[i * WORD_SIZE +: WORD_SIZE] | ({WORD_SIZE{rw_hazard && wenable}} & wbyteen_in); + assign readdata_out[i * `WORD_WIDTH +: `WORD_WIDTH] = (rw_hazard && wfill_in) ? filldata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : + (rw_hazard && wenable) ? writeword_qual : readdata_sel; + end + end else begin + wire [`WORD_WIDTH-1:0] writeword_qual; + for (genvar i = 0; i < WORD_SIZE; i++) begin + assign writeword_qual[i * 8 +: 8] = wbyteen_in[i] ? writeword_in[i * 8 +: 8] : read_data[i * 8 +: 8]; + end + assign dirtyb_out = read_dirtyb | ({WORD_SIZE{rw_hazard}} & wbyteen_in); + assign readdata_out = rw_hazard ? (wfill_in ? filldata_in : writeword_qual) : read_data; end `ifdef DBG_PRINT_CACHE_DATA diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index 4ab10bb0..8afd6e02 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -2,33 +2,36 @@ module VX_data_store #( // Size of cache in bytes - parameter CACHE_SIZE = 1, + parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes - parameter CACHE_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 1, // Enable cache writeable - parameter WRITE_ENABLE = 0 + parameter WRITE_ENABLE = 1, + + // Enable write-through + parameter WRITE_THROUGH = 1 ) ( input wire clk, input wire reset, input wire write_enable, input wire write_fill, - input wire[CACHE_LINE_SIZE-1:0] byte_enable, + input wire[CACHE_LINE_SIZE-1:0] byte_enable, input wire[`LINE_SELECT_BITS-1:0] write_addr, - input wire[`CACHE_LINE_WIDTH-1:0] write_data, + input wire[`CACHE_LINE_WIDTH-1:0] write_data, input wire[`LINE_SELECT_BITS-1:0] read_addr, output wire[`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] read_dirtyb, - output wire[`CACHE_LINE_WIDTH-1:0] read_data + output wire[`CACHE_LINE_WIDTH-1:0] read_data ); `UNUSED_VAR (reset) - if (WRITE_ENABLE) begin + if (WRITE_ENABLE && !WRITE_THROUGH) begin reg [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] dirtyb[`LINES_PER_BANK-1:0]; always @(posedge clk) begin if (write_enable) begin diff --git a/hw/rtl/cache/VX_input_queue.v b/hw/rtl/cache/VX_fifo_queue_xt.v similarity index 74% rename from hw/rtl/cache/VX_input_queue.v rename to hw/rtl/cache/VX_fifo_queue_xt.v index 6685e725..371ab00d 100644 --- a/hw/rtl/cache/VX_input_queue.v +++ b/hw/rtl/cache/VX_fifo_queue_xt.v @@ -1,8 +1,9 @@ `include "VX_platform.vh" -module VX_input_queue #( +module VX_fifo_queue_xt #( parameter DATAW = 1, parameter SIZE = 2, + parameter ALM_FULL = (SIZE - 1), parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1), parameter FASTRAM = 0 @@ -17,61 +18,41 @@ module VX_input_queue #( output wire [DATAW-1:0] data_out_next, output wire empty_next, output wire full, + output wire almost_full, output wire [SIZEW-1:0] size ); wire [DATAW-1:0] dout; reg [DATAW-1:0] dout_r, dout_n_r; reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n_r; - reg full_r; + reg full_r, almost_full_r; reg empty_r, empty_n_r; - reg [ADDRW-1:0] used_r; - - always @(*) begin - empty_n_r = empty_r; - if (reset) begin - empty_n_r = 1; - end else begin - if (push && !pop) begin - empty_n_r = 0; - end - if (pop && !push) begin - if (used_r == ADDRW'(1)) begin - empty_n_r = 1; - end; - end - end - end - - always @(*) begin - dout_n_r = dout_r; - if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin - dout_n_r = data_in; - end else if (pop) begin - dout_n_r = dout; - end - end + reg [ADDRW-1:0] used_r; always @(posedge clk) begin - if (reset) begin - full_r <= 0; - used_r <= 0; + if (reset) begin + used_r <= 0; + full_r <= 0; + almost_full_r <= 0; end else begin assert(!push || !full); assert(!pop || !empty_r); - if (push && !pop) begin - if (used_r == ADDRW'(SIZE-1)) begin - full_r <= 1; + if (push) begin + if (!pop) begin + if (used_r == ADDRW'(SIZE-1)) + full_r <= 1; + if (used_r == ADDRW'(ALM_FULL-1)) + almost_full_r <= 1; end - end - if (pop && !push) begin + end else if (pop) begin + if (used_r == ADDRW'(ALM_FULL)) + almost_full_r <= 0; full_r <= 0; end + used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop))); end - empty_r <= empty_n_r; - dout_r <= dout_n_r; - end + end always @(posedge clk) begin if (reset) begin @@ -108,13 +89,45 @@ module VX_input_queue #( .rden(1'b1), .din(data_in), .dout(dout) - ); + ); + + always @(*) begin + empty_n_r = empty_r; + if (reset) begin + empty_n_r = 1; + end else begin + if (push) begin + if (!pop) begin + empty_n_r = 0; + end + end else if (pop) begin + if (used_r == ADDRW'(1)) begin + empty_n_r = 1; + end + end + end + end + + always @(*) begin + dout_n_r = dout_r; + if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin + dout_n_r = data_in; + end else if (pop) begin + dout_n_r = dout; + end + end + + always @(posedge clk) begin + empty_r <= empty_n_r; + dout_r <= dout_n_r; + end assign data_out = dout_r; assign data_out_next = dout_n_r; assign empty = empty_r; assign empty_next = empty_n_r; assign full = full_r; + assign almost_full = almost_full_r; assign size = {full_r, used_r}; endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index fc97ef50..be5f0c0d 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -14,6 +14,7 @@ module VX_miss_resrv #( parameter NUM_REQS = 1, // Miss Reserv Queue Knob parameter MSHR_SIZE = 1, + parameter ALM_FULL = (MSHR_SIZE-1), // core request tag size parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag @@ -36,7 +37,8 @@ module VX_miss_resrv #( input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr, input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data, input wire enqueue_is_mshr, - input wire enqueue_ready, + input wire enqueue_as_ready, + output wire enqueue_almfull, // lookup input wire lookup_ready, @@ -55,19 +57,22 @@ module VX_miss_resrv #( // dequeue input wire dequeue ); + localparam ADDRW = $clog2(MSHR_SIZE); + reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; - reg [MSHR_SIZE-1:0] valid_table; - reg [MSHR_SIZE-1:0] ready_table; - reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, schedule_n_ptr; - reg [`LOG2UP(MSHR_SIZE)-1:0] restore_ptr; - reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr; - reg [`LOG2UP(MSHR_SIZE)-1:0] used_r; - reg full_r; - + reg [MSHR_SIZE-1:0] valid_table; + reg [MSHR_SIZE-1:0] ready_table; + reg [ADDRW-1:0] schedule_ptr, schedule_n_ptr; + reg [ADDRW-1:0] restore_ptr; + reg [ADDRW-1:0] head_ptr, tail_ptr; + reg [ADDRW-1:0] used_r; + reg full_r, almost_full_r; + reg schedule_valid_r, schedule_valid_n_r; reg [`LINE_ADDR_WIDTH-1:0] schedule_addr_r, schedule_addr_n_r; reg [`MSHR_DATA_WIDTH-1:0] dout_r, dout_n_r; + wire [`MSHR_DATA_WIDTH-1:0] dout; wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin @@ -79,17 +84,20 @@ module VX_miss_resrv #( wire push_new = enqueue && !enqueue_is_mshr; wire restore = enqueue && enqueue_is_mshr; - wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); + wire [ADDRW-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); always @(posedge clk) begin if (reset) begin - valid_table <= 0; - ready_table <= 0; - schedule_ptr <= 0; - schedule_n_ptr <= 1; - restore_ptr <= 0; - head_ptr <= 0; - tail_ptr <= 0; + valid_table <= 0; + ready_table <= 0; + schedule_ptr <= 0; + schedule_n_ptr <= 1; + restore_ptr <= 0; + head_ptr <= 0; + tail_ptr <= 0; + used_r <= 0; + full_r <= 0; + almost_full_r <= 0; end else begin // WARNING: lookup should happen enqueue for ready_table's correct update @@ -102,7 +110,7 @@ module VX_miss_resrv #( if (enqueue_is_mshr) begin // restore schedule, returning missed msrq entry valid_table[restore_ptr] <= 1; - ready_table[restore_ptr] <= enqueue_ready; + ready_table[restore_ptr] <= enqueue_as_ready; restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); schedule_ptr <= head_ptr; schedule_n_ptr <= head_ptr_n; @@ -110,7 +118,7 @@ module VX_miss_resrv #( // push new entry assert(!full_r); valid_table[tail_ptr] <= 1; - ready_table[tail_ptr] <= enqueue_ready; + ready_table[tail_ptr] <= enqueue_as_ready; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end end else if (dequeue) begin @@ -129,10 +137,25 @@ module VX_miss_resrv #( schedule_ptr <= schedule_n_ptr; if (MSHR_SIZE > 2) begin schedule_n_ptr <= schedule_ptr + $bits(schedule_ptr)'(2); - end else begin // (SIZE == 2); + end else begin // (MSHR_SIZE == 2); schedule_n_ptr <= ~schedule_n_ptr; end end + + if (push_new) begin + if (!dequeue) begin + if (used_r == ADDRW'(MSHR_SIZE-1)) + full_r <= 1; + if (used_r == ADDRW'(ALM_FULL-1)) + almost_full_r <= 1; + end + end else if (dequeue) begin + if (used_r == ADDRW'(ALM_FULL)) + almost_full_r <= 0; + full_r <= 0; + end + + used_r <= used_r + ADDRW'($signed(2'(push_new) - 2'(dequeue))); end end @@ -142,8 +165,6 @@ module VX_miss_resrv #( end end - wire [`MSHR_DATA_WIDTH-1:0] dout; - VX_dp_ram #( .DATAW(`MSHR_DATA_WIDTH), .SIZE(MSHR_SIZE), @@ -165,8 +186,10 @@ module VX_miss_resrv #( if (reset) begin schedule_valid_n_r = 0; end else begin - if (lookup_ready) begin - schedule_valid_n_r = 1; + if (restore) begin + schedule_valid_n_r = enqueue_as_ready; + end else if (lookup_ready) begin + schedule_valid_n_r = schedule_valid_r || (schedule_addr_r == lookup_addr); end else if (schedule) begin schedule_valid_n_r = ready_table[schedule_n_ptr]; end @@ -176,7 +199,8 @@ module VX_miss_resrv #( always @(*) begin schedule_addr_n_r = schedule_addr_r; dout_n_r = dout_r; - if ((push_new && (used_r == 0 || (used_r == 1 && schedule))) || restore) begin + if (restore + || (push_new && (used_r == 0 || (used_r == 1 && schedule)))) begin schedule_addr_n_r = enqueue_addr; dout_n_r = enqueue_data; end else if (schedule) begin @@ -186,13 +210,6 @@ module VX_miss_resrv #( end always @(posedge clk) begin - if (reset) begin - used_r <= 0; - full_r <= 0; - end else begin - used_r <= used_r + $bits(used_r)'($signed(2'(enqueue) - 2'(schedule))); - full_r <= (used_r == $bits(used_r)'(MSHR_SIZE-1)) && enqueue; - end schedule_valid_r <= schedule_valid_n_r; schedule_addr_r <= schedule_addr_n_r; dout_r <= dout_n_r; @@ -206,6 +223,8 @@ module VX_miss_resrv #( assign schedule_addr_next = schedule_addr_n_r; assign schedule_data_next = dout_n_r; + assign enqueue_almfull = almost_full_r; + `ifdef DBG_PRINT_CACHE_MSHR always @(posedge clk) begin if (lookup_ready || schedule || enqueue || dequeue) begin @@ -213,9 +232,9 @@ module VX_miss_resrv #( $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc); if (enqueue) begin if (enqueue_is_mshr) - $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_ready); + $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready); else - $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_ready, enq_debug_wid, enq_debug_pc); + $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready, enq_debug_wid, enq_debug_pc); end if (dequeue) $display("%t: cache%0d:%0d msrq-deq addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc); diff --git a/hw/rtl/libs/VX_fifo_queue.v b/hw/rtl/libs/VX_fifo_queue.v index 305379fb..ea039d84 100644 --- a/hw/rtl/libs/VX_fifo_queue.v +++ b/hw/rtl/libs/VX_fifo_queue.v @@ -30,11 +30,13 @@ module VX_fifo_queue #( head_r <= 0; size_r <= 0; end else begin - if (push && !pop) begin - assert(!full); - size_r <= 1; - end else if (pop && !push) begin - assert(!empty); + assert(!push || !full); + assert(!pop || !empty); + if (push) begin + if (!pop) begin + size_r <= 1; + end + end else if (pop) begin size_r <= 0; end if (push) begin @@ -62,13 +64,14 @@ module VX_fifo_queue #( end else begin assert(!push || !full); assert(!pop || !empty); - if (push && !pop) begin - empty_r <= 0; - if (used_r == ADDRW'(SIZE-1)) begin - full_r <= 1; + if (push) begin + if (!pop) begin + empty_r <= 0; + if (used_r == ADDRW'(SIZE-1)) begin + full_r <= 1; + end end - end - if (pop && !push) begin + end else if (pop) begin full_r <= 0; if (used_r == ADDRW'(1)) begin empty_r <= 1; diff --git a/hw/rtl/libs/VX_pending_size.v b/hw/rtl/libs/VX_pending_size.v new file mode 100644 index 00000000..595b10d0 --- /dev/null +++ b/hw/rtl/libs/VX_pending_size.v @@ -0,0 +1,35 @@ +`include "VX_platform.vh" + +module VX_pending_size #( + parameter SIZE = 1 +) ( + input wire clk, + input wire reset, + input wire push, + input wire pop, + output wire full +); + localparam ADDRW = $clog2(SIZE); + + reg [ADDRW-1:0] size_r; + reg full_r; + + always @(posedge clk) begin + if (reset) begin + size_r <= 0; + full_r <= 0; + end else begin + assert(!push || !full); + if (push) begin + if (!pop && (used_r == ADDRW'(SIZE-1))) + full_r <= 1; + end else if (pop) begin + full_r <= 0; + end + size_r <= size_r + ADDRW'($signed(2'(push && !pop) - 2'(pop && !push))); + end + end + + assign full = full_r; + +endmodule \ No newline at end of file