diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index b3874b22..082f7f66 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -92,17 +92,8 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ - wire [31:0] debug_pc_st0; - wire [`NW_BITS-1:0] debug_wid_st0; - - wire [31:0] debug_pc_st1; - wire [`NW_BITS-1:0] debug_wid_st1; - - wire [31:0] debug_pc_st12; - wire [`NW_BITS-1:0] debug_wid_st12; - - wire [31:0] debug_pc_st2; - wire [`NW_BITS-1:0] debug_wid_st2; + wire [31:0] debug_pc_st0, debug_pc_st1, debug_pc_st01; + wire [`NW_BITS-1:0] debug_wid_st0, debug_wid_st1, debug_wid_st01; /* verilator lint_on UNUSED */ `endif @@ -198,86 +189,48 @@ module VX_bank #( wire [WORD_SIZE-1:0] mshr_byteen_st0; wire mshr_pending_hazard_unqual_st0; - wire is_fill_st0; - wire is_mshr_st0; - wire valid_st0; - wire [`LINE_ADDR_WIDTH-1:0] addr_st0; - wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0; - wire [`WORD_WIDTH-1:0] writeword_st0; - wire [`CACHE_LINE_WIDTH-1:0] writedata_st0; - wire [`REQ_TAG_WIDTH-1:0] tag_st0; - wire mem_rw_st0; - wire [WORD_SIZE-1:0] byteen_st0; - wire [`REQS_BITS-1:0] req_tid_st0; - - wire is_fill_st1; - wire is_mshr_st1; - wire valid_st1; - wire [`LINE_ADDR_WIDTH-1:0] addr_st1; - wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st1; - wire [`WORD_WIDTH-1:0] readword_st1; - wire [`CACHE_LINE_WIDTH-1:0] readdata_st1; - wire [CACHE_LINE_SIZE-1:0] dirtyb_st1; - wire [`WORD_WIDTH-1:0] writeword_st1; - wire [`CACHE_LINE_WIDTH-1:0] writedata_st1; - wire [`TAG_SELECT_BITS-1:0] readtag_st1; - wire miss_st1; - wire force_miss_st1; - wire dirty_st1; - wire writeen_st1; - wire [`REQ_TAG_WIDTH-1:0] tag_st1; - wire mem_rw_st1; - wire [WORD_SIZE-1:0] byteen_st1; - wire [`REQS_BITS-1:0] req_tid_st1; - wire core_req_hit_st1; - wire incoming_fill_st1; - wire do_writeback_st1; - wire mshr_push_st1; - wire crsq_push_st1; - wire dreq_push_st1; + wire valid_st0, valid_st1; + wire is_fill_st0, is_fill_st1; + wire is_mshr_st0, is_mshr_st1; + wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1; + wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; + wire [`WORD_WIDTH-1:0] readword_st0, readword_st1; + wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1; + wire [`WORD_WIDTH-1:0] writeword_st0, writeword_st1; + wire [`CACHE_LINE_WIDTH-1:0] writedata_st0, writedata_st1; + wire [`TAG_SELECT_BITS-1:0] readtag_st0, readtag_st1; + wire miss_st0, miss_st1; + wire force_miss_st0, force_miss_st1; + wire dirty_st0; + wire [CACHE_LINE_SIZE-1:0] dirtyb_st0, dirtyb_st1; + wire writeen_st0, writeen_st1; + wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1; + wire mem_rw_st0, mem_rw_st1; + wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; + wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1; + wire core_req_hit_st0, core_req_hit_st1; + wire do_writeback_st0, do_writeback_st1; + wire mshr_push_st0, mshr_push_st1; + wire crsq_push_st0, crsq_push_st1; + wire dreq_push_st0, dreq_push_st1; - wire valid_st12; - wire writeen_st12; - wire [`LINE_ADDR_WIDTH-1:0] addr_st12; - wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st12; - wire [WORD_SIZE-1:0] byteen_st12; - wire [`WORD_WIDTH-1:0] writeword_st12; - wire [`REQ_TAG_WIDTH-1:0] tag_st12; - - wire valid_st2; - wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st2; - wire [`WORD_WIDTH-1:0] readword_st2; - wire [`WORD_WIDTH-1:0] writeword_st2; - wire [`CACHE_LINE_WIDTH-1:0] readdata_st2; - wire [`CACHE_LINE_WIDTH-1:0] writedata_st2; - wire [CACHE_LINE_SIZE-1:0] dirtyb_st2; - wire [`TAG_SELECT_BITS-1:0] readtag_st2; - wire is_fill_st2; - wire is_mshr_st2; - wire miss_st2; - wire force_miss_st2; - wire[`LINE_ADDR_WIDTH-1:0] addr_st2; - wire writeen_st2; - wire [`REQ_TAG_WIDTH-1:0] tag_st2; - wire mem_rw_st2; - wire [WORD_SIZE-1:0] byteen_st2; - wire [`REQS_BITS-1:0] req_tid_st2; - wire core_req_hit_st2; - wire incoming_fill_st2; - wire do_writeback_st2; - wire mshr_push_st2; - wire crsq_push_st2; - wire dreq_push_st2; + wire valid_st01; + wire writeen_st01; + wire [`LINE_ADDR_WIDTH-1:0] addr_st01; + wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st01; + wire [WORD_SIZE-1:0] byteen_st01; + wire [`WORD_WIDTH-1:0] writeword_st01; + wire [`REQ_TAG_WIDTH-1:0] tag_st01; wire mshr_push_stall; wire crsq_push_stall; wire dreq_push_stall; wire pipeline_stall; - wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); + wire is_mshr_miss_st1 = valid_st1 && is_mshr_st1 && (miss_st1 || force_miss_st1); - wire creq_commit = valid_st2 && !is_fill_st2 - && (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2)) + wire creq_commit = valid_st1 && !is_fill_st1 + && (core_req_hit_st1 || (WRITE_THROUGH && mem_rw_st1)) && !pipeline_stall; // determine which queue to pop next in piority order @@ -286,7 +239,7 @@ module VX_bank #( wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full; assign mshr_pop = mshr_pop_unqual && !pipeline_stall - && !is_mshr_miss_st2; // stop if previous request was a miss + && !is_mshr_miss_st1; // stop if previous request was a miss assign drsq_pop = drsq_pop_unqual && !pipeline_stall; assign creq_pop = creq_pop_unqual && !pipeline_stall; @@ -306,10 +259,15 @@ module VX_bank #( assign is_mshr_st0 = mshr_pop_unqual; assign is_fill_st0 = drsq_pop_unqual; - assign valid_st0 = drsq_pop || mshr_pop || creq_pop; + assign valid_st0 = drsq_pop || mshr_pop || creq_pop; + assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0; + assign tag_st0 = creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : `REQ_TAG_WIDTH'(mshr_tag_st0); + assign mem_rw_st0 = creq_pop_unqual ? creq_rw_st0 : mshr_rw_st0; + assign byteen_st0 = creq_pop_unqual ? creq_byteen_st0 : mshr_byteen_st0; + assign req_tid_st0 = creq_pop_unqual ? creq_tid_st0 : mshr_tid_st0; + assign writeword_st0 = creq_pop_unqual ? creq_writeword_st0 : mshr_writeword_st0; + assign writedata_st0 = drsq_filldata_st0; - assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0; - if (`WORD_SELECT_BITS != 0) begin assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_wsel_st0; end else begin @@ -318,18 +276,6 @@ module VX_bank #( assign wsel_st0 = 0; end - assign writedata_st0 = drsq_filldata_st0; - - assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : `REQ_TAG_WIDTH'(creq_tag_st0); - - assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : creq_rw_st0; - - assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : creq_byteen_st0; - - assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : creq_tid_st0; - - assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : creq_writeword_st0; - `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin assign {debug_pc_st0, debug_wid_st0} = tag_st0[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; @@ -338,40 +284,7 @@ module VX_bank #( end `endif -if (DRAM_ENABLE) begin - - wire mshr_pending_hazard_st1; - - // we have a miss in mshr or in stage 3 for the current address - wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 - || (valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st0)); - - VX_pipe_register #( - .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), - .RESETW (1) - ) pipe_reg1 ( - .clk (clk), - .reset (reset), - .enable (!pipeline_stall), - .data_in ({valid_st0, is_mshr_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, is_fill_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, is_fill_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) - ); - -`ifdef DBG_CACHE_REQ_INFO - if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st1, debug_wid_st1} = tag_st1[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; - end else begin - assign {debug_pc_st1, debug_wid_st1} = 0; - end -`endif - - // force miss to ensure commit order when a new request has pending previous requests to same block - // also force a miss for mshr requests when previous requests got a miss - wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1); - assign force_miss_st1 = (valid_st1 && !is_mshr_st1 && !is_fill_st1 - && (mshr_pending_hazard_st1 || st2_pending_hazard_st1)) - || (valid_st1 && is_mshr_st1 && is_mshr_miss_st2); - +if (DRAM_ENABLE) begin VX_tag_access #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), @@ -387,114 +300,108 @@ if (DRAM_ENABLE) begin .reset (reset), `ifdef DBG_CACHE_REQ_INFO - .debug_pc (debug_pc_st1), - .debug_wid (debug_wid_st1), + .debug_pc (debug_pc_st0), + .debug_wid (debug_wid_st0), `endif .stall (pipeline_stall), - // Inputs - .valid_in (valid_st1), - .addr_in (addr_st1), - .is_write_in (mem_rw_st1), - .is_fill_in (is_fill_st1), - .force_miss_in (force_miss_st1), + // read/Fill + .lookup_in (valid_st0 && !is_fill_st0), + .raddr_in (addr_st0), + .do_fill_in (valid_st0 && is_fill_st0), + .miss_out (miss_st0), + .readtag_out (readtag_st0), + .dirty_out (dirty_st0), - // Outputs - .readtag_out (readtag_st1), - .miss_out (miss_st1), - .dirty_out (dirty_st1), - .writeen_out (writeen_st1) + // write + .waddr_in (addr_st1), + .writeen_in (valid_st1 && writeen_st1) ); - assign valid_st12 = valid_st2; - assign writeen_st12 = writeen_st2; - assign addr_st12 = addr_st2; - assign wsel_st12 = wsel_st2; - assign byteen_st12 = byteen_st2; - assign writeword_st12 = writeword_st2; - assign tag_st12 = tag_st2; + assign valid_st01 = valid_st1; + assign writeen_st01 = writeen_st1; + assign addr_st01 = addr_st1; + assign wsel_st01 = wsel_st1; + assign byteen_st01 = byteen_st1; + assign writeword_st01 = writeword_st1; + assign tag_st01 = tag_st1; - assign core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1; + // redundant fills + wire is_redundant_fill = is_fill_st0 && !miss_st0; - assign incoming_fill_st1 = !drsq_empty && (addr_st1 == mshr_addr_st0); + // we have a miss in mshr for the current address + wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 + || (valid_st1 && (miss_st1 || force_miss_st1) && (creq_addr_st0 == addr_st1)); - wire do_fill_req_st1 = miss_st1 - && !(WRITE_THROUGH && mem_rw_st1) - && (!force_miss_st1 - || (is_mshr_st1 && addr_st1 != addr_st2)) - && !incoming_fill_st1; + // force miss to ensure commit order when a new request has pending previous requests to same block + assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_hazard_st0; - assign do_writeback_st1 = (WRITE_THROUGH && !is_fill_st1 && mem_rw_st1) - || (!WRITE_THROUGH && is_fill_st1 && dirty_st1); + assign core_req_hit_st0 = !is_fill_st0 && !miss_st0 && !force_miss_st0; + + assign writeen_st0 = (core_req_hit_st0 && mem_rw_st0) + || (is_fill_st0 && !is_redundant_fill); - assign dreq_push_st1 = do_fill_req_st1 || do_writeback_st1; + wire send_fill_req_st0 = !is_fill_st0 && miss_st0 && !force_miss_st0 + && !(WRITE_THROUGH && mem_rw_st0); - assign mshr_push_st1 = (miss_st1 || force_miss_st1) - && !(WRITE_THROUGH && !is_fill_st1 && mem_rw_st1); + assign do_writeback_st0 = (WRITE_THROUGH && !is_fill_st0 && mem_rw_st0) + || (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill); - assign crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; + assign dreq_push_st0 = send_fill_req_st0 || do_writeback_st0; + + assign mshr_push_st0 = !is_fill_st0 && (miss_st0 || force_miss_st0) + && !(WRITE_THROUGH && mem_rw_st0); + + assign crsq_push_st0 = core_req_hit_st0 && !mem_rw_st0; end else begin `UNUSED_VAR (mshr_pending_hazard_unqual_st0) `UNUSED_VAR (drsq_push) - `UNUSED_VAR (dirty_st1) - `UNUSED_VAR (writeen_st2) + `UNUSED_VAR (dirty_st0) + `UNUSED_VAR (writeen_st1) `ifdef DBG_CACHE_REQ_INFO assign debug_pc_st1 = debug_pc_st0; assign debug_wid_st1 = debug_wid_st0; `endif - - assign is_fill_st1 = is_fill_st0; - assign is_mshr_st1 = is_mshr_st0; - assign valid_st1 = valid_st0; - assign wsel_st1 = wsel_st0; - assign writeword_st1 = writeword_st0; - assign writedata_st1 = writedata_st0; - assign addr_st1 = creq_addr_st0; - assign tag_st1 = tag_st0; - assign mem_rw_st1 = mem_rw_st0; - assign byteen_st1 = byteen_st0; - assign req_tid_st1 = req_tid_st0; - assign dirty_st1 = 0; - assign readtag_st1 = 0; - assign miss_st1 = 0; - assign writeen_st1 = mem_rw_st0; - assign force_miss_st1 = 0; - assign valid_st12 = valid_st0; - assign writeen_st12 = mem_rw_st0; - assign addr_st12 = addr_st0; - assign wsel_st12 = wsel_st0; - assign byteen_st12 = byteen_st0; - assign writeword_st12 = writeword_st0; - assign tag_st12 = tag_st0; + assign valid_st01 = valid_st0; + assign writeen_st01 = mem_rw_st0; + assign addr_st01 = addr_st0; + assign wsel_st01 = wsel_st0; + assign byteen_st01 = byteen_st0; + assign writeword_st01 = writeword_st0; + assign tag_st01 = tag_st0; - assign incoming_fill_st1= 0; - assign core_req_hit_st1 = 1; - assign do_writeback_st1 = 0; - assign mshr_push_st1 = 0; - assign crsq_push_st1 = !mem_rw_st0; - assign dreq_push_st1 = 0; + assign writeen_st0 = mem_rw_st0; + assign miss_st0 = 0; + assign dirty_st0 = 0; + assign force_miss_st0 = 0; + assign readtag_st0 = 0; + assign core_req_hit_st0 = 1; + assign do_writeback_st0 = 0; + assign dreq_push_st0 = 0; + assign mshr_push_st0 = 0; + assign crsq_push_st0 = !mem_rw_st0; end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (!pipeline_stall), - .data_in ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}), - .data_out ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, do_writeback_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, is_fill_st2, addr_st2, wsel_st2, readword_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2}) + .data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, readword_st0, writeword_st0, readtag_st0, miss_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), + .data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) ); if (WRITE_THROUGH) begin - assign dirtyb_st2 = dirtyb_st1; - assign readdata_st2 = readdata_st1; + assign dirtyb_st1 = dirtyb_st0; + assign readdata_st1 = readdata_st0; end else begin @@ -505,20 +412,20 @@ end .clk (clk), .reset (reset), .enable (!pipeline_stall), - .data_in ({dirtyb_st1, readdata_st1}), - .data_out ({dirtyb_st2, readdata_st2}) + .data_in ({dirtyb_st0, readdata_st0}), + .data_out ({dirtyb_st1, readdata_st1}) ); end `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st12, debug_wid_st12} = tag_st12[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_pc_st01, debug_wid_st01} = tag_st01[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; end else begin - assign {debug_pc_st12, debug_wid_st12} = 0; + assign {debug_pc_st01, debug_wid_st01} = 0; end `endif - `UNUSED_VAR (tag_st12) + `UNUSED_VAR (tag_st01) VX_data_access #( .BANK_ID (BANK_ID), @@ -536,65 +443,62 @@ end .reset (reset), `ifdef DBG_CACHE_REQ_INFO - .rdebug_pc (debug_pc_st1), - .rdebug_wid (debug_wid_st1), - .wdebug_pc (debug_pc_st12), - .wdebug_wid (debug_wid_st12), + .rdebug_pc (debug_pc_st0), + .rdebug_wid (debug_wid_st0), + .wdebug_pc (debug_pc_st01), + .wdebug_wid (debug_wid_st01), `endif .stall (pipeline_stall), // reading - .readen_in (~writeen_st1 && valid_st1), - .raddr_in (addr_st1), - .rwsel_in (wsel_st1), - .rbyteen_in (byteen_st1), - .readword_out (readword_st1), - .readdata_out (readdata_st1), - .dirtyb_out (dirtyb_st1), + .readen_in (valid_st0 && !mem_rw_st0 && !is_fill_st0), + .raddr_in (addr_st0), + .rwsel_in (wsel_st0), + .rbyteen_in (byteen_st0), + .readword_out (readword_st0), + .readdata_out (readdata_st0), + .dirtyb_out (dirtyb_st0), // writing - .writeen_in (writeen_st12 && valid_st12), - .waddr_in (addr_st12), - .wfill_in (is_fill_st2), - .wwsel_in (wsel_st12), - .wbyteen_in (byteen_st12), - .writeword_in (writeword_st12), - .writedata_in (writedata_st2) + .writeen_in (valid_st01 && writeen_st01), + .waddr_in (addr_st01), + .wfill_in (is_fill_st1), + .wwsel_in (wsel_st01), + .wbyteen_in (byteen_st01), + .writeword_in (writeword_st01), + .writedata_in (writedata_st1) ); `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st2, debug_wid_st2} = tag_st2[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_pc_st1, debug_wid_st1} = tag_st1[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; end else begin - assign {debug_pc_st2, debug_wid_st2} = 0; + assign {debug_pc_st1, debug_wid_st1} = 0; end `endif - wire mshr_push_unqual = valid_st2 && mshr_push_st2; + wire mshr_push_unqual = valid_st1 && mshr_push_st1; assign mshr_push_stall = 0; wire mshr_push = mshr_push_unqual && !crsq_push_stall && !dreq_push_stall; - wire incoming_fill_qual_st2 = (!drsq_empty && (addr_st2 == mshr_addr_st0)) || incoming_fill_st2; + wire incoming_fill_st1 = (!drsq_empty && (addr_st1 == mshr_addr_st0)); if (DRAM_ENABLE) begin - wire mshr_dequeue_st2 = valid_st2 && is_mshr_st2 && !mshr_push_unqual && !pipeline_stall; + wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall; - // mark mshr entry that match DRAM fill as 'ready' - wire update_ready_st0 = drsq_pop; - - // push missed requests as 'ready' if it was a forced miss but actually had a hit - // or the fill request is comming for this block - wire mshr_init_ready_state_st2 = valid_st2 && (!miss_st2 || incoming_fill_qual_st2); + // push missed requests as 'ready' if it was a forced miss that actually had a hit + // or the fill request for this block is comming + wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1; VX_miss_resrv #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), - .CACHE_LINE_SIZE (CACHE_LINE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), @@ -607,20 +511,20 @@ end `ifdef DBG_CACHE_REQ_INFO .deq_debug_pc (debug_pc_st0), .deq_debug_wid (debug_wid_st0), - .enq_debug_pc (debug_pc_st2), - .enq_debug_wid (debug_wid_st2), + .enq_debug_pc (debug_pc_st1), + .enq_debug_wid (debug_wid_st1), `endif // enqueue .enqueue (mshr_push), - .enqueue_addr (addr_st2), - .enqueue_data ({writeword_st2, req_tid_st2, tag_st2, mem_rw_st2, byteen_st2, wsel_st2}), - .enqueue_is_mshr (is_mshr_st2), - .enqueue_ready (mshr_init_ready_state_st2), + .enqueue_addr (addr_st1), + .enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}), + .enqueue_is_mshr (is_mshr_st1), + .enqueue_ready (mshr_init_ready_state_st1), `UNUSED_PIN (enqueue_full), // lookup - .lookup_ready (update_ready_st0), + .lookup_ready (drsq_pop), .lookup_addr (addr_st0), .lookup_match (mshr_pending_hazard_unqual_st0), @@ -631,16 +535,16 @@ end .schedule_data ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0}), // dequeue - .dequeue (mshr_dequeue_st2) + .dequeue (mshr_dequeue_st1) ); end else begin - `UNUSED_VAR (valid_st2) + `UNUSED_VAR (valid_st1) `UNUSED_VAR (mshr_push) - `UNUSED_VAR (wsel_st2) - `UNUSED_VAR (writeword_st2) - `UNUSED_VAR (mem_rw_st2) - `UNUSED_VAR (byteen_st2) - `UNUSED_VAR (incoming_fill_st2) + `UNUSED_VAR (wsel_st1) + `UNUSED_VAR (writeword_st1) + `UNUSED_VAR (mem_rw_st1) + `UNUSED_VAR (byteen_st1) + `UNUSED_VAR (incoming_fill_st1) assign mshr_pending_hazard_unqual_st0 = 0; assign mshr_valid_st0 = 0; assign mshr_addr_st0 = 0; @@ -656,7 +560,7 @@ end wire crsq_empty, crsq_full; - wire crsq_push_unqual = valid_st2 && crsq_push_st2; + wire crsq_push_unqual = valid_st1 && crsq_push_st1; assign crsq_push_stall = crsq_push_unqual && crsq_full; wire crsq_push = crsq_push_unqual @@ -666,9 +570,9 @@ end wire crsq_pop = core_rsp_valid && core_rsp_ready; - wire [`REQS_BITS-1:0] crsq_tid_st2 = req_tid_st2; - wire [CORE_TAG_WIDTH-1:0] crsq_tag_st2 = CORE_TAG_WIDTH'(tag_st2); - wire [`WORD_WIDTH-1:0] crsq_data_st2 = readword_st2; + wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1; + wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1); + wire [`WORD_WIDTH-1:0] crsq_data_st1 = readword_st1; VX_fifo_queue #( .DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), @@ -680,7 +584,7 @@ end .reset (reset), .push (crsq_push), .pop (crsq_pop), - .data_in ({crsq_tid_st2, crsq_tag_st2, crsq_data_st2}), + .data_in ({crsq_tid_st1, crsq_tag_st1, crsq_data_st1}), .data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}), .empty (crsq_empty), .full (crsq_full), @@ -693,25 +597,28 @@ end wire dreq_empty, dreq_full; - wire dreq_push_unqual = valid_st2 && dreq_push_st2; + wire dreq_push_unqual = valid_st1 && dreq_push_st1; assign dreq_push_stall = dreq_push_unqual && dreq_full; - wire dreq_push = dreq_push_unqual - && (do_writeback_st2 || !incoming_fill_qual_st2) + wire dreq_push = dreq_push_unqual && !dreq_full && !mshr_push_stall && !crsq_push_stall; wire dreq_pop = dram_req_valid && dram_req_ready; - wire writeback = WRITE_ENABLE && do_writeback_st2; + wire writeback = WRITE_ENABLE && do_writeback_st1; - wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st2 : - {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; + wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st1 : + {readtag_st1, addr_st1[`LINE_SELECT_BITS-1:0]}; - wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st2 : {CACHE_LINE_SIZE{1'b1}}; + wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st1 : {CACHE_LINE_SIZE{1'b1}}; - if (DRAM_ENABLE) begin + if (DRAM_ENABLE) begin + always @(posedge clk) begin + assert (!(dreq_push && !do_writeback_st1 && incoming_fill_st1)) + else $error("%t: incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + end VX_fifo_queue #( .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (DREQ_SIZE), @@ -722,7 +629,7 @@ end .reset (reset), .push (dreq_push), .pop (dreq_pop), - .data_in ({writeback, dreq_byteen, dreq_addr, readdata_st2}), + .data_in ({writeback, dreq_byteen, dreq_addr, readdata_st1}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty), .full (dreq_full), @@ -733,9 +640,9 @@ end `UNUSED_VAR (dreq_pop) `UNUSED_VAR (dreq_addr) `UNUSED_VAR (dreq_byteen) - `UNUSED_VAR (readtag_st2) - `UNUSED_VAR (dirtyb_st2) - `UNUSED_VAR (readdata_st2) + `UNUSED_VAR (readtag_st1) + `UNUSED_VAR (dirtyb_st1) + `UNUSED_VAR (readdata_st1) `UNUSED_VAR (writeback) `UNUSED_VAR (dram_req_ready) assign dreq_empty = 1; @@ -755,32 +662,25 @@ end `SCOPE_ASSIGN (valid_st0, valid_st0); `SCOPE_ASSIGN (valid_st1, valid_st1); - `SCOPE_ASSIGN (valid_st2, valid_st2); `SCOPE_ASSIGN (is_fill_st0, is_fill_st0); `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); - `SCOPE_ASSIGN (miss_st1, miss_st1); - `SCOPE_ASSIGN (dirty_st1, dirty_st1); - `SCOPE_ASSIGN (force_miss_st1, force_miss_st1); + `SCOPE_ASSIGN (miss_st0, miss_st0); + `SCOPE_ASSIGN (dirty_st0, dirty_st0); + `SCOPE_ASSIGN (force_miss_st0, force_miss_st0); `SCOPE_ASSIGN (mshr_push, mshr_push); `SCOPE_ASSIGN (pipeline_stall, pipeline_stall); `SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); `SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); - `SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); `ifdef PERF_ENABLE - assign perf_read_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && !mem_rw_st2; - assign perf_write_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && mem_rw_st2; + assign perf_read_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && !mem_rw_st1; + assign perf_write_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && mem_rw_st1; assign perf_mshr_stalls = mshr_going_full; assign perf_pipe_stalls = pipeline_stall || mshr_going_full; `endif `ifdef DBG_PRINT_CACHE_BANK - wire incoming_fill_dfp_st2 = drsq_push && (addr_st2 == dram_rsp_addr); always @(posedge clk) begin - if (valid_st2 && miss_st2 && (incoming_fill_st2 || incoming_fill_dfp_st2)) begin - $display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), incoming_fill_st2, incoming_fill_dfp_st2); - assert(!is_mshr_st2); - end if (pipeline_stall) begin $display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall); end @@ -794,13 +694,13 @@ end $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, debug_wid_st0, debug_pc_st0); end if (crsq_push) begin - $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), crsq_tag_st2, crsq_tid_st2, crsq_data_st2, debug_wid_st2, debug_pc_st2); + $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag_st1, crsq_tid_st1, crsq_data_st1, debug_wid_st1, debug_pc_st1); end if (dreq_push) begin - if (do_writeback_st2) - $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st2, dreq_byteen, debug_wid_st2, debug_pc_st2); + if (do_writeback_st1) + $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st1, dreq_byteen, debug_wid_st1, debug_pc_st1); else - $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st2, debug_pc_st2); + $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1); end end `endif diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 4428679f..389a66a6 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -47,8 +47,8 @@ module VX_data_access #( input wire [`UP(`WORD_SELECT_BITS)-1:0] rwsel_in, input wire [WORD_SIZE-1:0] rbyteen_in, output wire[`WORD_WIDTH-1:0] readword_out, - output wire [`CACHE_LINE_WIDTH-1:0] readdata_out, - output wire [CACHE_LINE_SIZE-1:0] dirtyb_out, + output wire [`CACHE_LINE_WIDTH-1:0] readdata_out, + output wire [CACHE_LINE_SIZE-1:0] dirtyb_out, // writing input wire writeen_in, @@ -59,7 +59,7 @@ module VX_data_access #( input wire [WORD_SIZE-1:0] wbyteen_in, input wire wfill_in, input wire [`WORD_WIDTH-1:0] writeword_in, - input wire [`CACHE_LINE_WIDTH-1:0] writedata_in + input wire [`CACHE_LINE_WIDTH-1:0] writedata_in ); wire [CACHE_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual; @@ -67,7 +67,7 @@ module VX_data_access #( wire [CACHE_LINE_SIZE-1:0] byte_enable; wire [`CACHE_LINE_WIDTH-1:0] write_data; - wire write_enable; + wire write_enable; wire [`LINE_SELECT_BITS-1:0] raddr = raddr_in[`LINE_SELECT_BITS-1:0]; wire [`LINE_SELECT_BITS-1:0] waddr = waddr_in[`LINE_SELECT_BITS-1:0]; diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 34ecdf50..9c25ae09 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -30,18 +30,19 @@ module VX_tag_access #( input wire stall, - // Inputs - input wire valid_in, - input wire[`LINE_ADDR_WIDTH-1:0] addr_in, - input wire is_write_in, - input wire is_fill_in, - input wire force_miss_in, - - // Outputs - output wire[`TAG_SELECT_BITS-1:0] readtag_out, + // read/fill + input wire lookup_in, + input wire[`LINE_ADDR_WIDTH-1:0] raddr_in, + input wire do_fill_in, output wire miss_out, + output wire[`TAG_SELECT_BITS-1:0] readtag_out, output wire dirty_out, - output wire writeen_out + + // write +`IGNORE_WARNINGS_BEGIN + input wire[`LINE_ADDR_WIDTH-1:0] waddr_in, +`IGNORE_WARNINGS_END + input wire writeen_in ); wire read_valid; @@ -51,8 +52,9 @@ module VX_tag_access #( wire do_fill; wire do_write; - wire [`TAG_SELECT_BITS-1:0] addrtag = `LINE_TAG_ADDR(addr_in); - wire [`LINE_SELECT_BITS-1:0] addrline = addr_in [`LINE_SELECT_BITS-1:0]; + wire [`TAG_SELECT_BITS-1:0] raddr_tag = `LINE_TAG_ADDR(raddr_in); + wire [`LINE_SELECT_BITS-1:0] raddr = raddr_in [`LINE_SELECT_BITS-1:0]; + wire [`LINE_SELECT_BITS-1:0] waddr = waddr_in [`LINE_SELECT_BITS-1:0]; VX_tag_store #( .CACHE_SIZE (CACHE_SIZE), @@ -64,61 +66,50 @@ module VX_tag_access #( .clk (clk), .reset (reset), - .addr (addrline), - + .raddr (raddr), .read_valid (read_valid), .read_dirty (read_dirty), .read_tag (read_tag), - .do_fill (do_fill), - .do_write (do_write), - .write_tag (addrtag) + .fill_tag (raddr_tag), + + .waddr (waddr), + .do_write (do_write) ); - // use "case equality" to handle uninitialized tag when block entry is not valid - wire tags_match = read_valid && (addrtag == read_tag); + // read/fill stage + + wire tags_match = read_valid && (raddr_tag == read_tag); + + assign do_fill = do_fill_in && !stall; + + assign readtag_out = read_tag; + + assign miss_out = !tags_match; + + assign dirty_out = read_dirty || ((raddr == waddr) && writeen_in); + + // write stage - assign do_write = WRITE_ENABLE - && valid_in - && tags_match - && !is_fill_in - && is_write_in - && !force_miss_in - && !stall; + assign do_write = WRITE_ENABLE && writeen_in && !stall; - assign do_fill = valid_in - && is_fill_in - && !stall; - - assign miss_out = valid_in - && !tags_match - && !is_fill_in; - - assign dirty_out = WRITE_ENABLE - && valid_in - && read_valid - && read_dirty - && !(is_fill_in && tags_match); // discard writeback for redundant fills - - assign readtag_out = read_tag; - - assign writeen_out = do_write || (do_fill - && !tags_match); // discard data update for redundant fills + wire do_lookup = lookup_in && !stall; + `UNUSED_VAR (do_lookup) `ifdef DBG_PRINT_CACHE_TAG - always @(posedge clk) begin - if (valid_in && !stall) begin - if (do_fill) begin - $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), addrline, addrtag, read_tag); - if (tags_match) begin - $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID)); - end - end else if (tags_match) begin - $display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, read_dirty, addrline, addrtag); + always @(posedge clk) begin + if (do_fill) begin + $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), raddr, raddr_tag, read_tag); + if (tags_match) begin + $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID)); + end + end else if (do_lookup) begin + if (tags_match) begin + $display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), debug_wid, debug_pc, read_dirty, raddr, raddr_tag); end else begin - $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, read_dirty, addrline, addrtag, read_tag); - end - end + $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), debug_wid, debug_pc, read_dirty, raddr, raddr_tag, read_tag); + end + end end `endif diff --git a/hw/rtl/cache/VX_tag_store.v b/hw/rtl/cache/VX_tag_store.v index b0078a75..8d4bcc39 100644 --- a/hw/rtl/cache/VX_tag_store.v +++ b/hw/rtl/cache/VX_tag_store.v @@ -15,11 +15,12 @@ module VX_tag_store #( input wire clk, input wire reset, - input wire[`LINE_SELECT_BITS-1:0] addr, - - input wire do_fill, + input wire[`LINE_SELECT_BITS-1:0] raddr, + input wire do_fill, + input wire[`TAG_SELECT_BITS-1:0] fill_tag, + + input wire[`LINE_SELECT_BITS-1:0] waddr, input wire do_write, - input wire[`TAG_SELECT_BITS-1:0] write_tag, output wire[`TAG_SELECT_BITS-1:0] read_tag, output wire read_valid, @@ -36,10 +37,10 @@ module VX_tag_store #( end end else begin if (do_fill) begin - valid[addr] <= 1; - dirty[addr] <= 0; + valid[raddr] <= 1; + dirty[raddr] <= 0; end else if (do_write) begin - dirty[addr] <= 1; + dirty[waddr] <= 1; end end end @@ -50,15 +51,15 @@ module VX_tag_store #( .RWCHECK(1) ) tags ( .clk(clk), - .addr(addr), + .addr(raddr), .wren(do_fill), .byteen(1'b1), .rden(1'b1), - .din(write_tag), + .din(fill_tag), .dout(read_tag) ); - assign read_valid = valid[addr]; - assign read_dirty = dirty[addr]; + assign read_valid = valid[raddr]; + assign read_dirty = dirty[raddr]; endmodule diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 97ea030b..4dc38645 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -192,15 +192,13 @@ "afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": { "?valid_st0": 1, "?valid_st1": 1, - "?valid_st2": 1, "addr_st0": 32, "addr_st1": 32, - "addr_st2": 32, "is_fill_st0": 1, "is_mshr_st0": 1, - "miss_st1": 1, - "force_miss_st1": 1, - "dirty_st1": 1, + "miss_st0": 1, + "force_miss_st0": 1, + "dirty_st0": 1, "mshr_push": 1, "?pipeline_stall": 1 }