From 3c37db877a93d5df52857ad8df102142f65992da Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 13 Feb 2021 20:23:29 -0800 Subject: [PATCH] cache specialization for in-order DRAM reponses --- hw/rtl/VX_config.vh | 4 +- hw/rtl/VX_platform.vh | 2 +- hw/rtl/cache/VX_bank.v | 72 +++++++++++++++++++++++------------- hw/rtl/cache/VX_cache.v | 36 ++++++++++-------- hw/rtl/cache/VX_flush_ctrl.v | 9 ++--- hw/rtl/cache/VX_miss_resrv.v | 2 +- hw/rtl/cache/VX_tag_access.v | 23 +++++------- 7 files changed, 84 insertions(+), 64 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 17e73064..5b98ab49 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -244,7 +244,7 @@ // Size of LSU Request Queue `ifndef LSUQ_SIZE -`define LSUQ_SIZE (`NUM_WARPS * `NUM_THREADS) +`define LSUQ_SIZE 8 `endif // Size of FPU Request Queue @@ -313,7 +313,7 @@ // Miss Handling Register Size `ifndef DMSHR_SIZE -`define DMSHR_SIZE (`LSUQ_SIZE / 2) +`define DMSHR_SIZE `LSUQ_SIZE `endif // DRAM Request Queue Size diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index ff5afe69..39c605c7 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -46,7 +46,7 @@ if (!(cond)) $error msg; \ endgenerate -`define SASSERT(cond, msg) \ +`define RUNTIME_ASSERT(cond, msg) \ always @(posedge clk) \ assert(cond) else $error msg; \ diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 815c6aa1..8b0b8ac2 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -40,7 +40,10 @@ module VX_bank #( parameter CORE_TAG_ID_BITS = 0, // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0 + parameter BANK_ADDR_OFFSET = 0, + + // in-order DRAN + parameter IN_ORDER_DRAM = 0 ) ( `SCOPE_IO_VX_bank @@ -85,8 +88,11 @@ module VX_bank #( input wire dram_rsp_valid, input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr, input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data, - input wire dram_rsp_flush, - output wire dram_rsp_ready + output wire dram_rsp_ready, + + // flush + input wire flush_enable, + input wire [`LINE_SELECT_BITS-1:0] flush_addr ); `ifdef DBG_CACHE_REQ_INFO @@ -97,7 +103,8 @@ module VX_bank #( `endif wire creq_pop; - wire creq_full, creq_empty; + wire creq_full; + wire creq_empty; wire [NUM_PORTS-1:0] creq_pmask; wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen; @@ -181,14 +188,13 @@ module VX_bank #( wire mshr_pop_unqual = mshr_valid && !dreq_alm_full; // ensure DRAM request queue not full (deadlock prevention) wire drsq_pop_unqual = !mshr_pop_unqual && dram_rsp_valid; - wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty; + wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !flush_enable; - wire is_miss_st1 = valid_st1 && !is_fill_st1 && (miss_st1 || force_miss_st1); + wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1); assign mshr_pop = mshr_pop_unqual - && !crsq_alm_full // ensure core response ready - && !(is_miss_st1 && is_mshr_st1); // do not schedule another mshr request if the previous one missed + && !crsq_alm_full // ensure core response ready + && !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1); // do not schedule another mshr request if the previous one missed - assign drsq_pop = drsq_pop_unqual; assign creq_pop = creq_pop_unqual @@ -231,6 +237,14 @@ module VX_bank #( assign creq_line_data = creq_data; end + wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr_qual; + if (IN_ORDER_DRAM) begin + `UNUSED_VAR (dram_rsp_addr) + assign dram_rsp_addr_qual = mshr_addr; + end else begin + assign dram_rsp_addr_qual = dram_rsp_addr; + end + VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1 + 1), .RESETW (1) @@ -239,21 +253,21 @@ module VX_bank #( .reset (reset), .enable (1'b1), .data_in ({ - mshr_pop || drsq_pop || creq_pop, + flush_enable || mshr_pop || drsq_pop || creq_pop, + flush_enable, mshr_pop_unqual, - drsq_pop_unqual, + drsq_pop_unqual || flush_enable, mshr_pop_unqual ? 1'b0 : creq_rw, - mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr : creq_addr), + flush_enable ? (`LINE_ADDR_WIDTH'(flush_addr)) : (mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr_qual : creq_addr)), dram_rsp_valid ? dram_rsp_data : creq_line_data, mshr_pop_unqual ? mshr_wsel : creq_wsel, mshr_pop_unqual ? mshr_byteen : creq_byteen, mshr_pop_unqual ? mshr_tid : creq_tid, mshr_pop_unqual ? mshr_pmask : creq_pmask, mshr_pop_unqual ? mshr_tag : creq_tag, - mshr_pending_sel, - dram_rsp_flush + mshr_pending_sel }), - .data_out ({valid_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, data_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_pending_st0, is_flush_st0}) + .data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, data_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_pending_st0}) ); `ifdef DBG_CACHE_REQ_INFO @@ -263,6 +277,8 @@ module VX_bank #( assign {debug_pc_st0, debug_wid_st0} = 0; end `endif + + wire tag_match; VX_tag_access #( .BANK_ID (BANK_ID), @@ -286,24 +302,26 @@ module VX_bank #( .addr (addr_st0), .fill (valid_st0 && is_fill_st0), .is_flush (is_flush_st0), - .missed (miss_st0) + .tag_match (tag_match) ); // redundant fills - wire is_redundant_fill = is_fill_st0 && !miss_st0; + wire is_redundant_fill = !IN_ORDER_DRAM && is_fill_st0 && tag_match; // we had a miss with prior request for the current address assign prev_miss_dep_st0 = is_miss_st1 && (addr_st0 == addr_st1); + assign miss_st0 = !is_fill_st0 && !tag_match; + // force miss to ensure commit order when a new request has pending previous requests to same block // also force a miss for mshr requests when previous requests got a miss assign force_miss_st0 = (!is_fill_st0 && !is_mshr_st0 && (mshr_pending_st0 || prev_miss_dep_st0)) || (is_mshr_st0 && is_miss_st1 && is_mshr_st1); - assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0) + assign writeen_unqual_st0 = (!is_fill_st0 && tag_match && mem_rw_st0) || (is_fill_st0 && !is_redundant_fill); - assign incoming_fill_st0 = dram_rsp_valid && (addr_st0 == dram_rsp_addr); + assign incoming_fill_st0 = dram_rsp_valid && (addr_st0 == dram_rsp_addr_qual); VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH), @@ -330,7 +348,7 @@ module VX_bank #( wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1); - wire incoming_fill_qual_st1 = (dram_rsp_valid && (addr_st1 == dram_rsp_addr)) + wire incoming_fill_qual_st1 = (dram_rsp_valid && (addr_st1 == dram_rsp_addr_qual)) || incoming_fill_st1; wire send_fill_req_st1 = !is_fill_st1 && !mem_rw_st1 && miss_st1 @@ -394,13 +412,15 @@ module VX_bank #( assign mshr_push = valid_st1 && mshr_push_st1; wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1; + wire mshr_restore = !IN_ORDER_DRAM && is_mshr_st1; + `RUNTIME_ASSERT(!IN_ORDER_DRAM || !(mshr_push && mshr_restore), ("Oops!")) // push a missed request as 'ready' if it was a forced miss that actually had a hit // or the fill request for this block is comming wire mshr_init_ready_state = !miss_st1 || incoming_fill_qual_st1; // use dram rsp or core req address to lookup the mshr - wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = dram_rsp_valid ? dram_rsp_addr : creq_addr; + wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = dram_rsp_valid ? dram_rsp_addr_qual : creq_addr; VX_miss_resrv #( .BANK_ID (BANK_ID), @@ -429,7 +449,7 @@ module VX_bank #( .enqueue (mshr_push), .enqueue_addr (addr_st1), .enqueue_data ({wsel_st1, byteen_st1, tag_st1, req_tid_st1, pmask_st1}), - .enqueue_is_mshr (is_mshr_st1), + .enqueue_is_mshr (mshr_restore), .enqueue_as_ready (mshr_init_ready_state), `UNUSED_PIN (enqueue_almfull), `UNUSED_PIN (enqueue_full), @@ -562,11 +582,11 @@ module VX_bank #( if (crsq_alm_full || dreq_alm_full || mshr_alm_full) begin $display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_alm_full, dreq_alm_full, mshr_alm_full); end + if (flush_enable) begin + $display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID)); + end if (drsq_pop) begin - if (dram_rsp_flush) - $display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID)); - else - $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID), dram_rsp_data); + $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr_qual, BANK_ID), dram_rsp_data); end if (mshr_pop) begin $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 99a7313e..f3f3bf11 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -42,7 +42,10 @@ module VX_cache #( parameter DRAM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)), // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0 + parameter BANK_ADDR_OFFSET = 0, + + // in-order DRAN + parameter IN_ORDER_DRAM = 0 ) ( `SCOPE_IO_VX_cache @@ -117,7 +120,7 @@ module VX_cache #( wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data_qual; wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag_qual; - wire [`LINE_ADDR_WIDTH-1:0] flush_addr; + wire [`LINE_SELECT_BITS-1:0] flush_addr; wire flush_enable; `ifdef PERF_ENABLE @@ -151,13 +154,13 @@ module VX_cache #( `UNUSED_PIN (alm_full), `UNUSED_PIN (alm_empty), `UNUSED_PIN (size) - ); + ); if (NUM_BANKS == 1) begin `UNUSED_VAR (dram_rsp_tag_qual) - assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready && !flush_enable; + assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready; end else begin - assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag_qual)] && !flush_enable; + assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag_qual)]; end /////////////////////////////////////////////////////////////////////////// @@ -171,8 +174,7 @@ module VX_cache #( .clk (clk), .reset (reset), .flush (flush), - .addr (flush_addr), - .ready_out ((& per_bank_dram_rsp_ready)), + .addr_out (flush_addr), .valid_out (flush_enable) ); @@ -240,7 +242,6 @@ module VX_cache #( wire curr_bank_dram_rsp_valid; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; wire [`CACHE_LINE_WIDTH-1:0] curr_bank_dram_rsp_data; - wire curr_bank_dram_rsp_flush; wire curr_bank_dram_rsp_ready; // Core Req @@ -276,14 +277,13 @@ module VX_cache #( // DRAM response if (NUM_BANKS == 1) begin - assign curr_bank_dram_rsp_valid = !drsq_empty || flush_enable; - assign curr_bank_dram_rsp_addr = flush_enable ? flush_addr : dram_rsp_tag_qual; + assign curr_bank_dram_rsp_valid = !drsq_empty; + assign curr_bank_dram_rsp_addr = dram_rsp_tag_qual; end else begin - assign curr_bank_dram_rsp_valid = (!drsq_empty && (`DRAM_ADDR_BANK(dram_rsp_tag_qual) == i)) || flush_enable; - assign curr_bank_dram_rsp_addr = flush_enable ? flush_addr : `DRAM_TO_LINE_ADDR(dram_rsp_tag_qual); + assign curr_bank_dram_rsp_valid = !drsq_empty && (`DRAM_ADDR_BANK(dram_rsp_tag_qual) == i); + assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag_qual); end assign curr_bank_dram_rsp_data = dram_rsp_data_qual; - assign curr_bank_dram_rsp_flush = flush_enable; assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; VX_bank #( @@ -303,7 +303,8 @@ module VX_cache #( .WRITE_ENABLE (WRITE_ENABLE), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), - .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) + .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET), + .IN_ORDER_DRAM (IN_ORDER_DRAM) ) bank ( `SCOPE_BIND_VX_cache_bank(i) @@ -348,8 +349,11 @@ module VX_cache #( .dram_rsp_valid (curr_bank_dram_rsp_valid), .dram_rsp_addr (curr_bank_dram_rsp_addr), .dram_rsp_data (curr_bank_dram_rsp_data), - .dram_rsp_flush (curr_bank_dram_rsp_flush), - .dram_rsp_ready (curr_bank_dram_rsp_ready) + .dram_rsp_ready (curr_bank_dram_rsp_ready), + + // flush + .flush_enable (flush_enable), + .flush_addr (flush_addr) ); end diff --git a/hw/rtl/cache/VX_flush_ctrl.v b/hw/rtl/cache/VX_flush_ctrl.v index a67ce512..41926bcc 100644 --- a/hw/rtl/cache/VX_flush_ctrl.v +++ b/hw/rtl/cache/VX_flush_ctrl.v @@ -2,7 +2,7 @@ module VX_flush_ctrl #( // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = 16384, // Size of line inside a bank in bytes parameter CACHE_LINE_SIZE = 1, // Number of banks @@ -13,8 +13,7 @@ module VX_flush_ctrl #( input wire clk, input wire reset, input wire flush, - output wire [`LINE_ADDR_WIDTH-1:0] addr, - input wire ready_out, + output wire [`LINE_SELECT_BITS-1:0] addr_out, output wire valid_out ); reg flush_enable; @@ -25,7 +24,7 @@ module VX_flush_ctrl #( flush_enable <= 1; flush_ctr <= 0; end else begin - if (flush_enable && ready_out) begin + if (flush_enable) begin if (flush_ctr == ((2 ** `LINE_SELECT_BITS)-1)) begin flush_enable <= 0; end @@ -34,7 +33,7 @@ module VX_flush_ctrl #( end end - assign addr = `LINE_ADDR_WIDTH'(flush_ctr); + assign addr_out = flush_ctr; assign valid_out = flush_enable; endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 3cad5978..fce9e38f 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -21,7 +21,7 @@ module VX_miss_resrv #( // core request tag size parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 22d177c6..a92f3d12 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -29,9 +29,10 @@ module VX_tag_access #( input wire[`LINE_ADDR_WIDTH-1:0] addr, input wire fill, input wire is_flush, - output wire missed + output wire tag_match ); `UNUSED_VAR (reset) + `UNUSED_VAR (lookup) wire read_valid; wire [`TAG_SELECT_BITS-1:0] read_tag; @@ -54,25 +55,21 @@ module VX_tag_access #( .dout({read_valid, read_tag}) ); - wire tags_match = read_valid && (line_tag == read_tag); - - assign missed = !tags_match; - - wire do_lookup = lookup; - `UNUSED_VAR (do_lookup) + assign tag_match = read_valid && (line_tag == read_tag); `ifdef DBG_PRINT_CACHE_TAG always @(posedge clk) begin if (fill) begin - if (is_flush) + if (is_flush) begin $display("%t: cache%0d:%0d tag-flush: addr=%0h, blk_addr=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr); - else + end else begin $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag); - if (tags_match) begin - $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr, BANK_ID)); + if (tag_match) begin + $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr, BANK_ID)); + end end - end else if (do_lookup) begin - if (tags_match) begin + end else if (lookup) begin + if (tag_match) begin $display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag); end else begin $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag, read_tag);