From b307c40ae741e88940718359096a9efab09f0ee1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 26 Jul 2021 21:11:17 -0700 Subject: [PATCH] mshr critical path optimization --- hw/rtl/cache/VX_bank.v | 99 ++++++++++++------------ hw/rtl/cache/VX_miss_resrv.v | 144 +++++++++++++++++++++-------------- 2 files changed, 135 insertions(+), 108 deletions(-) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index cc41828f..6e8a89fb 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -127,7 +127,6 @@ module VX_bank #( wire mshr_alm_full; wire mshr_pop; - wire mshr_pending; wire mshr_valid; wire [`LINE_ADDR_WIDTH-1:0] mshr_addr; wire [CORE_TAG_WIDTH-1:0] mshr_tag; @@ -149,10 +148,10 @@ module VX_bank #( wire is_mshr_st0, is_mshr_st1; wire miss_st0, miss_st1; wire prev_miss_dep_st0; - wire fill_req_unqual_st0, fill_req_unqual_st1; wire force_miss_st0, force_miss_st1; + wire not_same_prev_mshr_st0, not_same_prev_mshr_st1; wire writeen_unqual_st0, writeen_unqual_st1; - wire incoming_fill_st0, incoming_fill_st1; + wire incoming_fill_unqual_st0, incoming_fill_unqual_st1; wire mshr_pending_st0; wire is_flush_st0; @@ -183,26 +182,22 @@ module VX_bank #( wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable; - wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1); + wire is_miss_st1 = (miss_st1 || force_miss_st1); assign mshr_pop = mshr_enable - && !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed + && !(valid_st1 && is_mshr_st1 && is_miss_st1) // do not schedule another mshr request if the previous one missed && !crsq_in_stall; // ensure core response ready assign creq_out_ready = creq_grant - && !mreq_alm_full // ensure memory request ready - && !mshr_alm_full // ensure mshr enqueue ready - && !crsq_in_stall; // ensure core response ready + && !mreq_alm_full // ensure memory request ready + && !mshr_alm_full // ensure mshr enqueue ready + && !crsq_in_stall; // ensure core response ready assign mem_rsp_ready = mrsq_grant && !crsq_in_stall; // ensure core response ready wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; - // we have a miss in mshr or entering it for the current address - wire mshr_pending_sel = mshr_pending - || (is_miss_st1 && (creq_addr == addr_st1)); - `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin assign {debug_pc_sel, debug_wid_sel} = mshr_enable ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; @@ -233,7 +228,7 @@ module VX_bank #( end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -251,10 +246,9 @@ module VX_bank #( creq_byteen, mshr_enable ? mshr_tid : creq_tid, mshr_enable ? mshr_pmask : creq_pmask, - mshr_enable ? mshr_tag : creq_tag, - mshr_pending_sel + mshr_enable ? mshr_tag : creq_tag }), - .data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_pending_st0}) + .data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}) ); `ifdef DBG_CACHE_REQ_INFO @@ -295,25 +289,25 @@ module VX_bank #( .tag_match (tag_match_st0) ); - // redundant fills - wire is_redundant_fill_st0 = is_fill_st0 && tag_match_st0; - // we had a miss with prior request for the current address - assign prev_miss_dep_st0 = is_miss_st1 && (addr_st0 == addr_st1); + assign prev_miss_dep_st0 = valid_st1 && is_miss_st1 && (addr_st0 == addr_st1); + // we have a core request hit assign miss_st0 = !is_fill_st0 && !tag_match_st0; - // force miss to ensure commit order when a new request has pending previous requests to same block - // also force a miss for mshr requests when previous requests got a miss + // force a miss to ensure commit order when a new request has pending previous requests to same block + // also force a miss for mshr requests when previous request was a missed assign force_miss_st0 = (!is_fill_st0 && !is_mshr_st0 && (mshr_pending_st0 || prev_miss_dep_st0)) - || (is_mshr_st0 && is_mshr_st1 && is_miss_st1); + || (is_mshr_st0 && valid_st1 && is_mshr_st1 && is_miss_st1); - assign writeen_unqual_st0 = (WRITE_ENABLE && !is_fill_st0 && tag_match_st0 && mem_rw_st0) - || (is_fill_st0 && !is_redundant_fill_st0); + // previous mshr request doesn't have same address + assign not_same_prev_mshr_st0 = valid_st1 && is_mshr_st1 && (addr_st1 != addr_st0); - assign incoming_fill_st0 = mem_rsp_valid && (addr_st0 == mem_rsp_addr); + // enable write when we have a fill request that is not redundant + assign writeen_unqual_st0 = is_fill_st0 && !tag_match_st0; - assign fill_req_unqual_st0 = !mem_rw_st0 && (!force_miss_st0 || (is_mshr_st0 && !prev_miss_dep_st0)); + // check if incoming memory response match current address + assign incoming_fill_unqual_st0 = mem_rsp_valid && (addr_st0 == mem_rsp_addr); VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH), @@ -322,8 +316,8 @@ module VX_bank #( .clk (clk), .reset (reset), .enable (!crsq_in_stall), - .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1}) + .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, incoming_fill_unqual_st0, miss_st0, force_miss_st0, mem_rw_st0, not_same_prev_mshr_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}), + .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, incoming_fill_unqual_st1, miss_st1, force_miss_st1, mem_rw_st1, not_same_prev_mshr_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -334,18 +328,21 @@ module VX_bank #( end `endif - wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1); + wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && ~is_miss_st1) + || writeen_unqual_st1; - wire crsq_push_st1 = !is_fill_st1 && !mem_rw_st1 && !miss_st1 && !force_miss_st1; + wire readen_st1 = !is_fill_st1 && !mem_rw_st1; - wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1); + wire crsq_push_st1 = readen_st1 && ~is_miss_st1; - wire incoming_fill_qual_st1 = (mem_rsp_valid && (addr_st1 == mem_rsp_addr)) - || incoming_fill_st1; + wire mshr_push_st1 = readen_st1 && is_miss_st1; - wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1; + wire incoming_fill_st1 = (mem_rsp_valid && (addr_st1 == mem_rsp_addr)) + || incoming_fill_unqual_st1; - wire mreq_push_st1 = (miss_st1 && fill_req_unqual_st1 && !incoming_fill_qual_st1) + wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1; + + wire mreq_push_st1 = (readen_st1 && miss_st1 && (~force_miss_st1 || not_same_prev_mshr_st1) && !incoming_fill_st1) || do_writeback_st1; wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1; @@ -385,7 +382,7 @@ module VX_bank #( .addr (addr_st1), // reading - .readen (valid_st1 && !is_fill_st1 && !mem_rw_st1), + .readen (valid_st1 && readen_st1), .rdata (rdata_st1), // writing @@ -401,10 +398,7 @@ module VX_bank #( // push a missed request as 'ready' if it was a forced miss that actually had a hit // or the fill request for this block is comming - wire mshr_init_ready_state = !miss_st1 || incoming_fill_qual_st1; - - // use memory rsp or core req address to lookup the mshr - wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = mem_rsp_valid ? mem_rsp_addr : creq_addr; + wire mshr_init_ready_state = !miss_st1 || incoming_fill_unqual_st1; VX_miss_resrv #( .BANK_ID (BANK_ID), @@ -437,13 +431,15 @@ module VX_bank #( `UNUSED_PIN (enqueue_almfull), `UNUSED_PIN (enqueue_full), - // lookup - .lookup_addr (lookup_addr), - .lookup_match (mshr_pending), + // fill + .fill_start (mem_rsp_fire), + .fill_addr (mem_rsp_addr), + + // lookup + .lookup_addr (addr_st0), + .lookup_match (mshr_pending_st0), + .lookup_fill (do_fill_st0), - // fill update - .fill_update (mem_rsp_fire), - // schedule .schedule (mshr_pop), .schedule_valid (mshr_valid), @@ -477,8 +473,9 @@ module VX_bank #( end VX_elastic_buffer #( - .DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), - .SIZE (CRSQ_SIZE) + .DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), + .SIZE (CRSQ_SIZE), + .OUTPUT_REG (1 == NUM_BANKS) ) core_rsp_req ( .clk (clk), .reset (reset), @@ -551,7 +548,7 @@ module VX_bank #( /*if (crsq_in_fire && (NUM_PORTS > 1) && $countones(crsq_pmask) > 1) begin $display("%t: *** cache%0d:%0d multi-port-out: pmask=%b, addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, crsq_pmask, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag); end*/ - if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_qual_st1) begin + if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin $display("%t: *** cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); assert(!is_mshr_st1); end @@ -565,7 +562,7 @@ module VX_bank #( $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data); end if (mshr_pop) begin - $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel); + $display("%t: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel); end if (creq_out_fire) begin if (creq_rw) diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 8644ba28..74799e9d 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -42,12 +42,14 @@ module VX_miss_resrv #( output wire enqueue_full, output wire enqueue_almfull, - // lookup + // fill + input wire fill_start, + input wire [`LINE_ADDR_WIDTH-1:0] fill_addr, + + // lookup input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr, output wire lookup_match, - - // fill update - input wire fill_update, + input wire lookup_fill, // schedule input wire schedule, @@ -64,13 +66,16 @@ module VX_miss_resrv #( reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; - reg [MSHR_SIZE-1:0] valid_table; - reg [MSHR_SIZE-1:0] ready_table; - reg [ADDRW-1:0] head_ptr, tail_ptr; - reg [ADDRW-1:0] schedule_ptr, restore_ptr; + reg [MSHR_SIZE-1:0] valid_table, valid_table_n; + reg [MSHR_SIZE-1:0] ready_table, ready_table_n; + reg [ADDRW-1:0] head_ptr, head_ptr_n; + reg [ADDRW-1:0] tail_ptr, tail_ptr_n; + reg [ADDRW-1:0] restore_ptr, restore_ptr_n; + reg [ADDRW-1:0] schedule_ptr, schedule_ptr_n; reg [ADDRW-1:0] used_r; reg alm_full_r, full_r; - + reg valid_out_r; + wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr); @@ -80,7 +85,47 @@ module VX_miss_resrv #( wire restore = enqueue && enqueue_is_mshr; - wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); + always @(*) begin + valid_table_n = valid_table; + ready_table_n = ready_table; + head_ptr_n = head_ptr; + tail_ptr_n = tail_ptr; + schedule_ptr_n = schedule_ptr; + restore_ptr_n = restore_ptr; + + if (lookup_fill) begin + // unlock pending requests for scheduling + ready_table_n |= valid_address_match; + end + + if (schedule) begin + // schedule next entry + schedule_ptr_n = schedule_ptr + 1; + valid_table_n[schedule_ptr] = 0; + ready_table_n[schedule_ptr] = 0; + end + + if (fill_start && (fill_addr == addr_table[schedule_ptr])) begin + ready_table_n[schedule_ptr] = valid_table[schedule_ptr]; + end + + if (push_new) begin + // push new entry + valid_table_n[tail_ptr] = 1; + ready_table_n[tail_ptr] = enqueue_as_ready; + tail_ptr_n = tail_ptr + 1; + end else if (restore) begin + // restore schedule, returning missed mshr entry + valid_table_n[restore_ptr] = 1; + ready_table_n[restore_ptr] = enqueue_as_ready; + restore_ptr_n = restore_ptr + 1; + schedule_ptr_n = head_ptr; + end else if (dequeue) begin + // clear scheduled entry + head_ptr_n = head_ptr + 1; + restore_ptr_n = head_ptr_n; + end + end always @(posedge clk) begin if (reset) begin @@ -92,42 +137,21 @@ module VX_miss_resrv #( restore_ptr <= 0; used_r <= 0; alm_full_r <= 0; - full_r <= 0; + full_r <= 0; + valid_out_r <= 0; end else begin - - if (fill_update) begin - // unlock pending requests for scheduling - ready_table <= ready_table | valid_address_match; + if (schedule) begin + assert(schedule_valid); + assert(!fill_start); + assert(!restore); end - - if (push_new) begin - // push new entry + + if (push_new) begin assert(!full_r); - valid_table[tail_ptr] <= 1; - ready_table[tail_ptr] <= enqueue_as_ready; - tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end else if (restore) begin assert(!schedule); - // restore schedule, returning missed mshr entry - valid_table[restore_ptr] <= 1; - ready_table[restore_ptr] <= enqueue_as_ready; - restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); - schedule_ptr <= head_ptr; end else if (dequeue) begin - // clear scheduled entry - assert(((head_ptr+$bits(head_ptr)'(1)) == schedule_ptr) - || ((head_ptr+$bits(head_ptr)'(2)) == schedule_ptr)) else $error("schedule_ptr=%0d, head_ptr=%0d", schedule_ptr, head_ptr); - valid_table[head_ptr] <= 0; - head_ptr <= head_ptr_n; - restore_ptr <= head_ptr_n; - end - - if (schedule) begin - // schedule next entry - assert(schedule_valid); - valid_table[schedule_ptr] <= 0; - ready_table[schedule_ptr] <= 0; - schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); + assert(head_ptr != schedule_ptr); end if (push_new) begin @@ -144,40 +168,46 @@ module VX_miss_resrv #( end used_r <= used_r + ADDRW'($signed(2'(push_new) - 2'(dequeue))); - end - end - always @(posedge clk) begin + valid_table <= valid_table_n; + ready_table <= ready_table_n; + head_ptr <= head_ptr_n; + tail_ptr <= tail_ptr_n; + schedule_ptr <= schedule_ptr_n; + restore_ptr <= restore_ptr_n; + valid_out_r <= ready_table_n[schedule_ptr_n]; + end + if (push_new) begin addr_table[tail_ptr] <= enqueue_addr; end end VX_dp_ram #( - .DATAW(`MSHR_DATA_WIDTH), - .SIZE(MSHR_SIZE), - .RWCHECK(1), - .FASTRAM(1) + .DATAW (`MSHR_DATA_WIDTH), + .SIZE (MSHR_SIZE), + .RWCHECK (1), + .FASTRAM (1) ) entries ( - .clk(clk), - .waddr(tail_ptr), - .raddr(schedule_ptr), - .wren(push_new), - .byteen(1'b1), - .rden(1'b1), - .din(enqueue_data), - .dout(schedule_data) + .clk (clk), + .waddr (tail_ptr), + .raddr (schedule_ptr), + .wren (push_new), + .byteen (1'b1), + .rden (1'b1), + .din (enqueue_data), + .dout (schedule_data) ); assign lookup_match = (| valid_address_match); - assign schedule_valid = ready_table[schedule_ptr]; + assign schedule_valid = valid_out_r; assign schedule_addr = addr_table[schedule_ptr]; assign enqueue_almfull = alm_full_r; assign enqueue_full = full_r; `ifdef DBG_PRINT_CACHE_MSHR always @(posedge clk) begin - if (fill_update || schedule || enqueue || dequeue) begin + if (lookup_fill || schedule || enqueue || dequeue) begin if (schedule) $display("%t: cache%0d:%0d mshr-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc); if (enqueue) begin