cache pipeline optimization - moved tag access to stage0

This commit is contained in:
Blaise Tine
2021-01-03 23:10:41 -05:00
parent 9cef1aae04
commit 4d55118545
5 changed files with 251 additions and 361 deletions

470
hw/rtl/cache/VX_bank.v vendored
View File

@@ -92,17 +92,8 @@ module VX_bank #(
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire [31:0] debug_pc_st0;
wire [`NW_BITS-1:0] debug_wid_st0;
wire [31:0] debug_pc_st1;
wire [`NW_BITS-1:0] debug_wid_st1;
wire [31:0] debug_pc_st12;
wire [`NW_BITS-1:0] debug_wid_st12;
wire [31:0] debug_pc_st2;
wire [`NW_BITS-1:0] debug_wid_st2;
wire [31:0] debug_pc_st0, debug_pc_st1, debug_pc_st01;
wire [`NW_BITS-1:0] debug_wid_st0, debug_wid_st1, debug_wid_st01;
/* verilator lint_on UNUSED */
`endif
@@ -198,86 +189,48 @@ module VX_bank #(
wire [WORD_SIZE-1:0] mshr_byteen_st0;
wire mshr_pending_hazard_unqual_st0;
wire is_fill_st0;
wire is_mshr_st0;
wire valid_st0;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0;
wire [`WORD_WIDTH-1:0] writeword_st0;
wire [`CACHE_LINE_WIDTH-1:0] writedata_st0;
wire [`REQ_TAG_WIDTH-1:0] tag_st0;
wire mem_rw_st0;
wire [WORD_SIZE-1:0] byteen_st0;
wire [`REQS_BITS-1:0] req_tid_st0;
wire is_fill_st1;
wire is_mshr_st1;
wire valid_st1;
wire [`LINE_ADDR_WIDTH-1:0] addr_st1;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st1;
wire [`WORD_WIDTH-1:0] readword_st1;
wire [`CACHE_LINE_WIDTH-1:0] readdata_st1;
wire [CACHE_LINE_SIZE-1:0] dirtyb_st1;
wire [`WORD_WIDTH-1:0] writeword_st1;
wire [`CACHE_LINE_WIDTH-1:0] writedata_st1;
wire [`TAG_SELECT_BITS-1:0] readtag_st1;
wire miss_st1;
wire force_miss_st1;
wire dirty_st1;
wire writeen_st1;
wire [`REQ_TAG_WIDTH-1:0] tag_st1;
wire mem_rw_st1;
wire [WORD_SIZE-1:0] byteen_st1;
wire [`REQS_BITS-1:0] req_tid_st1;
wire core_req_hit_st1;
wire incoming_fill_st1;
wire do_writeback_st1;
wire mshr_push_st1;
wire crsq_push_st1;
wire dreq_push_st1;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
wire [`WORD_WIDTH-1:0] readword_st0, readword_st1;
wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1;
wire [`WORD_WIDTH-1:0] writeword_st0, writeword_st1;
wire [`CACHE_LINE_WIDTH-1:0] writedata_st0, writedata_st1;
wire [`TAG_SELECT_BITS-1:0] readtag_st0, readtag_st1;
wire miss_st0, miss_st1;
wire force_miss_st0, force_miss_st1;
wire dirty_st0;
wire [CACHE_LINE_SIZE-1:0] dirtyb_st0, dirtyb_st1;
wire writeen_st0, writeen_st1;
wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire mem_rw_st0, mem_rw_st1;
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire core_req_hit_st0, core_req_hit_st1;
wire do_writeback_st0, do_writeback_st1;
wire mshr_push_st0, mshr_push_st1;
wire crsq_push_st0, crsq_push_st1;
wire dreq_push_st0, dreq_push_st1;
wire valid_st12;
wire writeen_st12;
wire [`LINE_ADDR_WIDTH-1:0] addr_st12;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st12;
wire [WORD_SIZE-1:0] byteen_st12;
wire [`WORD_WIDTH-1:0] writeword_st12;
wire [`REQ_TAG_WIDTH-1:0] tag_st12;
wire valid_st2;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st2;
wire [`WORD_WIDTH-1:0] readword_st2;
wire [`WORD_WIDTH-1:0] writeword_st2;
wire [`CACHE_LINE_WIDTH-1:0] readdata_st2;
wire [`CACHE_LINE_WIDTH-1:0] writedata_st2;
wire [CACHE_LINE_SIZE-1:0] dirtyb_st2;
wire [`TAG_SELECT_BITS-1:0] readtag_st2;
wire is_fill_st2;
wire is_mshr_st2;
wire miss_st2;
wire force_miss_st2;
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
wire writeen_st2;
wire [`REQ_TAG_WIDTH-1:0] tag_st2;
wire mem_rw_st2;
wire [WORD_SIZE-1:0] byteen_st2;
wire [`REQS_BITS-1:0] req_tid_st2;
wire core_req_hit_st2;
wire incoming_fill_st2;
wire do_writeback_st2;
wire mshr_push_st2;
wire crsq_push_st2;
wire dreq_push_st2;
wire valid_st01;
wire writeen_st01;
wire [`LINE_ADDR_WIDTH-1:0] addr_st01;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st01;
wire [WORD_SIZE-1:0] byteen_st01;
wire [`WORD_WIDTH-1:0] writeword_st01;
wire [`REQ_TAG_WIDTH-1:0] tag_st01;
wire mshr_push_stall;
wire crsq_push_stall;
wire dreq_push_stall;
wire pipeline_stall;
wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2);
wire is_mshr_miss_st1 = valid_st1 && is_mshr_st1 && (miss_st1 || force_miss_st1);
wire creq_commit = valid_st2 && !is_fill_st2
&& (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2))
wire creq_commit = valid_st1 && !is_fill_st1
&& (core_req_hit_st1 || (WRITE_THROUGH && mem_rw_st1))
&& !pipeline_stall;
// determine which queue to pop next in piority order
@@ -286,7 +239,7 @@ module VX_bank #(
wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full;
assign mshr_pop = mshr_pop_unqual && !pipeline_stall
&& !is_mshr_miss_st2; // stop if previous request was a miss
&& !is_mshr_miss_st1; // stop if previous request was a miss
assign drsq_pop = drsq_pop_unqual && !pipeline_stall;
assign creq_pop = creq_pop_unqual && !pipeline_stall;
@@ -306,10 +259,15 @@ module VX_bank #(
assign is_mshr_st0 = mshr_pop_unqual;
assign is_fill_st0 = drsq_pop_unqual;
assign valid_st0 = drsq_pop || mshr_pop || creq_pop;
assign valid_st0 = drsq_pop || mshr_pop || creq_pop;
assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0;
assign tag_st0 = creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : `REQ_TAG_WIDTH'(mshr_tag_st0);
assign mem_rw_st0 = creq_pop_unqual ? creq_rw_st0 : mshr_rw_st0;
assign byteen_st0 = creq_pop_unqual ? creq_byteen_st0 : mshr_byteen_st0;
assign req_tid_st0 = creq_pop_unqual ? creq_tid_st0 : mshr_tid_st0;
assign writeword_st0 = creq_pop_unqual ? creq_writeword_st0 : mshr_writeword_st0;
assign writedata_st0 = drsq_filldata_st0;
assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0;
if (`WORD_SELECT_BITS != 0) begin
assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_wsel_st0;
end else begin
@@ -318,18 +276,6 @@ module VX_bank #(
assign wsel_st0 = 0;
end
assign writedata_st0 = drsq_filldata_st0;
assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : `REQ_TAG_WIDTH'(creq_tag_st0);
assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : creq_rw_st0;
assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : creq_byteen_st0;
assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : creq_tid_st0;
assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : creq_writeword_st0;
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_wid_st0} = tag_st0[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
@@ -338,40 +284,7 @@ module VX_bank #(
end
`endif
if (DRAM_ENABLE) begin
wire mshr_pending_hazard_st1;
// we have a miss in mshr or in stage 3 for the current address
wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0
|| (valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st0));
VX_pipe_register #(
.DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!pipeline_stall),
.data_in ({valid_st0, is_mshr_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, is_fill_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}),
.data_out ({valid_st1, is_mshr_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, is_fill_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st1, debug_wid_st1} = tag_st1[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st1, debug_wid_st1} = 0;
end
`endif
// force miss to ensure commit order when a new request has pending previous requests to same block
// also force a miss for mshr requests when previous requests got a miss
wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1);
assign force_miss_st1 = (valid_st1 && !is_mshr_st1 && !is_fill_st1
&& (mshr_pending_hazard_st1 || st2_pending_hazard_st1))
|| (valid_st1 && is_mshr_st1 && is_mshr_miss_st2);
if (DRAM_ENABLE) begin
VX_tag_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
@@ -387,114 +300,108 @@ if (DRAM_ENABLE) begin
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc (debug_pc_st1),
.debug_wid (debug_wid_st1),
.debug_pc (debug_pc_st0),
.debug_wid (debug_wid_st0),
`endif
.stall (pipeline_stall),
// Inputs
.valid_in (valid_st1),
.addr_in (addr_st1),
.is_write_in (mem_rw_st1),
.is_fill_in (is_fill_st1),
.force_miss_in (force_miss_st1),
// read/Fill
.lookup_in (valid_st0 && !is_fill_st0),
.raddr_in (addr_st0),
.do_fill_in (valid_st0 && is_fill_st0),
.miss_out (miss_st0),
.readtag_out (readtag_st0),
.dirty_out (dirty_st0),
// Outputs
.readtag_out (readtag_st1),
.miss_out (miss_st1),
.dirty_out (dirty_st1),
.writeen_out (writeen_st1)
// write
.waddr_in (addr_st1),
.writeen_in (valid_st1 && writeen_st1)
);
assign valid_st12 = valid_st2;
assign writeen_st12 = writeen_st2;
assign addr_st12 = addr_st2;
assign wsel_st12 = wsel_st2;
assign byteen_st12 = byteen_st2;
assign writeword_st12 = writeword_st2;
assign tag_st12 = tag_st2;
assign valid_st01 = valid_st1;
assign writeen_st01 = writeen_st1;
assign addr_st01 = addr_st1;
assign wsel_st01 = wsel_st1;
assign byteen_st01 = byteen_st1;
assign writeword_st01 = writeword_st1;
assign tag_st01 = tag_st1;
assign core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1;
// redundant fills
wire is_redundant_fill = is_fill_st0 && !miss_st0;
assign incoming_fill_st1 = !drsq_empty && (addr_st1 == mshr_addr_st0);
// we have a miss in mshr for the current address
wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0
|| (valid_st1 && (miss_st1 || force_miss_st1) && (creq_addr_st0 == addr_st1));
wire do_fill_req_st1 = miss_st1
&& !(WRITE_THROUGH && mem_rw_st1)
&& (!force_miss_st1
|| (is_mshr_st1 && addr_st1 != addr_st2))
&& !incoming_fill_st1;
// force miss to ensure commit order when a new request has pending previous requests to same block
assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_hazard_st0;
assign do_writeback_st1 = (WRITE_THROUGH && !is_fill_st1 && mem_rw_st1)
|| (!WRITE_THROUGH && is_fill_st1 && dirty_st1);
assign core_req_hit_st0 = !is_fill_st0 && !miss_st0 && !force_miss_st0;
assign writeen_st0 = (core_req_hit_st0 && mem_rw_st0)
|| (is_fill_st0 && !is_redundant_fill);
assign dreq_push_st1 = do_fill_req_st1 || do_writeback_st1;
wire send_fill_req_st0 = !is_fill_st0 && miss_st0 && !force_miss_st0
&& !(WRITE_THROUGH && mem_rw_st0);
assign mshr_push_st1 = (miss_st1 || force_miss_st1)
&& !(WRITE_THROUGH && !is_fill_st1 && mem_rw_st1);
assign do_writeback_st0 = (WRITE_THROUGH && !is_fill_st0 && mem_rw_st0)
|| (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill);
assign crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1;
assign dreq_push_st0 = send_fill_req_st0 || do_writeback_st0;
assign mshr_push_st0 = !is_fill_st0 && (miss_st0 || force_miss_st0)
&& !(WRITE_THROUGH && mem_rw_st0);
assign crsq_push_st0 = core_req_hit_st0 && !mem_rw_st0;
end else begin
`UNUSED_VAR (mshr_pending_hazard_unqual_st0)
`UNUSED_VAR (drsq_push)
`UNUSED_VAR (dirty_st1)
`UNUSED_VAR (writeen_st2)
`UNUSED_VAR (dirty_st0)
`UNUSED_VAR (writeen_st1)
`ifdef DBG_CACHE_REQ_INFO
assign debug_pc_st1 = debug_pc_st0;
assign debug_wid_st1 = debug_wid_st0;
`endif
assign is_fill_st1 = is_fill_st0;
assign is_mshr_st1 = is_mshr_st0;
assign valid_st1 = valid_st0;
assign wsel_st1 = wsel_st0;
assign writeword_st1 = writeword_st0;
assign writedata_st1 = writedata_st0;
assign addr_st1 = creq_addr_st0;
assign tag_st1 = tag_st0;
assign mem_rw_st1 = mem_rw_st0;
assign byteen_st1 = byteen_st0;
assign req_tid_st1 = req_tid_st0;
assign dirty_st1 = 0;
assign readtag_st1 = 0;
assign miss_st1 = 0;
assign writeen_st1 = mem_rw_st0;
assign force_miss_st1 = 0;
assign valid_st12 = valid_st0;
assign writeen_st12 = mem_rw_st0;
assign addr_st12 = addr_st0;
assign wsel_st12 = wsel_st0;
assign byteen_st12 = byteen_st0;
assign writeword_st12 = writeword_st0;
assign tag_st12 = tag_st0;
assign valid_st01 = valid_st0;
assign writeen_st01 = mem_rw_st0;
assign addr_st01 = addr_st0;
assign wsel_st01 = wsel_st0;
assign byteen_st01 = byteen_st0;
assign writeword_st01 = writeword_st0;
assign tag_st01 = tag_st0;
assign incoming_fill_st1= 0;
assign core_req_hit_st1 = 1;
assign do_writeback_st1 = 0;
assign mshr_push_st1 = 0;
assign crsq_push_st1 = !mem_rw_st0;
assign dreq_push_st1 = 0;
assign writeen_st0 = mem_rw_st0;
assign miss_st0 = 0;
assign dirty_st0 = 0;
assign force_miss_st0 = 0;
assign readtag_st0 = 0;
assign core_req_hit_st0 = 1;
assign do_writeback_st0 = 0;
assign dreq_push_st0 = 0;
assign mshr_push_st0 = 0;
assign crsq_push_st0 = !mem_rw_st0;
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
.RESETW (1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.enable (!pipeline_stall),
.data_in ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}),
.data_out ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, do_writeback_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, is_fill_st2, addr_st2, wsel_st2, readword_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2})
.data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, readword_st0, writeword_st0, readtag_st0, miss_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}),
.data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1})
);
if (WRITE_THROUGH) begin
assign dirtyb_st2 = dirtyb_st1;
assign readdata_st2 = readdata_st1;
assign dirtyb_st1 = dirtyb_st0;
assign readdata_st1 = readdata_st0;
end else begin
@@ -505,20 +412,20 @@ end
.clk (clk),
.reset (reset),
.enable (!pipeline_stall),
.data_in ({dirtyb_st1, readdata_st1}),
.data_out ({dirtyb_st2, readdata_st2})
.data_in ({dirtyb_st0, readdata_st0}),
.data_out ({dirtyb_st1, readdata_st1})
);
end
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st12, debug_wid_st12} = tag_st12[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
assign {debug_pc_st01, debug_wid_st01} = tag_st01[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st12, debug_wid_st12} = 0;
assign {debug_pc_st01, debug_wid_st01} = 0;
end
`endif
`UNUSED_VAR (tag_st12)
`UNUSED_VAR (tag_st01)
VX_data_access #(
.BANK_ID (BANK_ID),
@@ -536,65 +443,62 @@ end
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.rdebug_pc (debug_pc_st1),
.rdebug_wid (debug_wid_st1),
.wdebug_pc (debug_pc_st12),
.wdebug_wid (debug_wid_st12),
.rdebug_pc (debug_pc_st0),
.rdebug_wid (debug_wid_st0),
.wdebug_pc (debug_pc_st01),
.wdebug_wid (debug_wid_st01),
`endif
.stall (pipeline_stall),
// reading
.readen_in (~writeen_st1 && valid_st1),
.raddr_in (addr_st1),
.rwsel_in (wsel_st1),
.rbyteen_in (byteen_st1),
.readword_out (readword_st1),
.readdata_out (readdata_st1),
.dirtyb_out (dirtyb_st1),
.readen_in (valid_st0 && !mem_rw_st0 && !is_fill_st0),
.raddr_in (addr_st0),
.rwsel_in (wsel_st0),
.rbyteen_in (byteen_st0),
.readword_out (readword_st0),
.readdata_out (readdata_st0),
.dirtyb_out (dirtyb_st0),
// writing
.writeen_in (writeen_st12 && valid_st12),
.waddr_in (addr_st12),
.wfill_in (is_fill_st2),
.wwsel_in (wsel_st12),
.wbyteen_in (byteen_st12),
.writeword_in (writeword_st12),
.writedata_in (writedata_st2)
.writeen_in (valid_st01 && writeen_st01),
.waddr_in (addr_st01),
.wfill_in (is_fill_st1),
.wwsel_in (wsel_st01),
.wbyteen_in (byteen_st01),
.writeword_in (writeword_st01),
.writedata_in (writedata_st1)
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st2, debug_wid_st2} = tag_st2[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
assign {debug_pc_st1, debug_wid_st1} = tag_st1[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st2, debug_wid_st2} = 0;
assign {debug_pc_st1, debug_wid_st1} = 0;
end
`endif
wire mshr_push_unqual = valid_st2 && mshr_push_st2;
wire mshr_push_unqual = valid_st1 && mshr_push_st1;
assign mshr_push_stall = 0;
wire mshr_push = mshr_push_unqual
&& !crsq_push_stall
&& !dreq_push_stall;
wire incoming_fill_qual_st2 = (!drsq_empty && (addr_st2 == mshr_addr_st0)) || incoming_fill_st2;
wire incoming_fill_st1 = (!drsq_empty && (addr_st1 == mshr_addr_st0));
if (DRAM_ENABLE) begin
wire mshr_dequeue_st2 = valid_st2 && is_mshr_st2 && !mshr_push_unqual && !pipeline_stall;
wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall;
// mark mshr entry that match DRAM fill as 'ready'
wire update_ready_st0 = drsq_pop;
// push missed requests as 'ready' if it was a forced miss but actually had a hit
// or the fill request is comming for this block
wire mshr_init_ready_state_st2 = valid_st2 && (!miss_st2 || incoming_fill_qual_st2);
// push missed requests as 'ready' if it was a forced miss that actually had a hit
// or the fill request for this block is comming
wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1;
VX_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
@@ -607,20 +511,20 @@ end
`ifdef DBG_CACHE_REQ_INFO
.deq_debug_pc (debug_pc_st0),
.deq_debug_wid (debug_wid_st0),
.enq_debug_pc (debug_pc_st2),
.enq_debug_wid (debug_wid_st2),
.enq_debug_pc (debug_pc_st1),
.enq_debug_wid (debug_wid_st1),
`endif
// enqueue
.enqueue (mshr_push),
.enqueue_addr (addr_st2),
.enqueue_data ({writeword_st2, req_tid_st2, tag_st2, mem_rw_st2, byteen_st2, wsel_st2}),
.enqueue_is_mshr (is_mshr_st2),
.enqueue_ready (mshr_init_ready_state_st2),
.enqueue_addr (addr_st1),
.enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}),
.enqueue_is_mshr (is_mshr_st1),
.enqueue_ready (mshr_init_ready_state_st1),
`UNUSED_PIN (enqueue_full),
// lookup
.lookup_ready (update_ready_st0),
.lookup_ready (drsq_pop),
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_hazard_unqual_st0),
@@ -631,16 +535,16 @@ end
.schedule_data ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0}),
// dequeue
.dequeue (mshr_dequeue_st2)
.dequeue (mshr_dequeue_st1)
);
end else begin
`UNUSED_VAR (valid_st2)
`UNUSED_VAR (valid_st1)
`UNUSED_VAR (mshr_push)
`UNUSED_VAR (wsel_st2)
`UNUSED_VAR (writeword_st2)
`UNUSED_VAR (mem_rw_st2)
`UNUSED_VAR (byteen_st2)
`UNUSED_VAR (incoming_fill_st2)
`UNUSED_VAR (wsel_st1)
`UNUSED_VAR (writeword_st1)
`UNUSED_VAR (mem_rw_st1)
`UNUSED_VAR (byteen_st1)
`UNUSED_VAR (incoming_fill_st1)
assign mshr_pending_hazard_unqual_st0 = 0;
assign mshr_valid_st0 = 0;
assign mshr_addr_st0 = 0;
@@ -656,7 +560,7 @@ end
wire crsq_empty, crsq_full;
wire crsq_push_unqual = valid_st2 && crsq_push_st2;
wire crsq_push_unqual = valid_st1 && crsq_push_st1;
assign crsq_push_stall = crsq_push_unqual && crsq_full;
wire crsq_push = crsq_push_unqual
@@ -666,9 +570,9 @@ end
wire crsq_pop = core_rsp_valid && core_rsp_ready;
wire [`REQS_BITS-1:0] crsq_tid_st2 = req_tid_st2;
wire [CORE_TAG_WIDTH-1:0] crsq_tag_st2 = CORE_TAG_WIDTH'(tag_st2);
wire [`WORD_WIDTH-1:0] crsq_data_st2 = readword_st2;
wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1;
wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1);
wire [`WORD_WIDTH-1:0] crsq_data_st1 = readword_st1;
VX_fifo_queue #(
.DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
@@ -680,7 +584,7 @@ end
.reset (reset),
.push (crsq_push),
.pop (crsq_pop),
.data_in ({crsq_tid_st2, crsq_tag_st2, crsq_data_st2}),
.data_in ({crsq_tid_st1, crsq_tag_st1, crsq_data_st1}),
.data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}),
.empty (crsq_empty),
.full (crsq_full),
@@ -693,25 +597,28 @@ end
wire dreq_empty, dreq_full;
wire dreq_push_unqual = valid_st2 && dreq_push_st2;
wire dreq_push_unqual = valid_st1 && dreq_push_st1;
assign dreq_push_stall = dreq_push_unqual && dreq_full;
wire dreq_push = dreq_push_unqual
&& (do_writeback_st2 || !incoming_fill_qual_st2)
wire dreq_push = dreq_push_unqual
&& !dreq_full
&& !mshr_push_stall
&& !crsq_push_stall;
wire dreq_pop = dram_req_valid && dram_req_ready;
wire writeback = WRITE_ENABLE && do_writeback_st2;
wire writeback = WRITE_ENABLE && do_writeback_st1;
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st2 :
{readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st1 :
{readtag_st1, addr_st1[`LINE_SELECT_BITS-1:0]};
wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st2 : {CACHE_LINE_SIZE{1'b1}};
wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st1 : {CACHE_LINE_SIZE{1'b1}};
if (DRAM_ENABLE) begin
if (DRAM_ENABLE) begin
always @(posedge clk) begin
assert (!(dreq_push && !do_writeback_st1 && incoming_fill_st1))
else $error("%t: incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
end
VX_fifo_queue #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (DREQ_SIZE),
@@ -722,7 +629,7 @@ end
.reset (reset),
.push (dreq_push),
.pop (dreq_pop),
.data_in ({writeback, dreq_byteen, dreq_addr, readdata_st2}),
.data_in ({writeback, dreq_byteen, dreq_addr, readdata_st1}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dreq_empty),
.full (dreq_full),
@@ -733,9 +640,9 @@ end
`UNUSED_VAR (dreq_pop)
`UNUSED_VAR (dreq_addr)
`UNUSED_VAR (dreq_byteen)
`UNUSED_VAR (readtag_st2)
`UNUSED_VAR (dirtyb_st2)
`UNUSED_VAR (readdata_st2)
`UNUSED_VAR (readtag_st1)
`UNUSED_VAR (dirtyb_st1)
`UNUSED_VAR (readdata_st1)
`UNUSED_VAR (writeback)
`UNUSED_VAR (dram_req_ready)
assign dreq_empty = 1;
@@ -755,32 +662,25 @@ end
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
`SCOPE_ASSIGN (valid_st2, valid_st2);
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
`SCOPE_ASSIGN (miss_st1, miss_st1);
`SCOPE_ASSIGN (dirty_st1, dirty_st1);
`SCOPE_ASSIGN (force_miss_st1, force_miss_st1);
`SCOPE_ASSIGN (miss_st0, miss_st0);
`SCOPE_ASSIGN (dirty_st0, dirty_st0);
`SCOPE_ASSIGN (force_miss_st0, force_miss_st0);
`SCOPE_ASSIGN (mshr_push, mshr_push);
`SCOPE_ASSIGN (pipeline_stall, pipeline_stall);
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
`ifdef PERF_ENABLE
assign perf_read_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && !mem_rw_st2;
assign perf_write_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && mem_rw_st2;
assign perf_read_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && !mem_rw_st1;
assign perf_write_misses = !pipeline_stall && miss_st1 && !is_mshr_st1 && mem_rw_st1;
assign perf_mshr_stalls = mshr_going_full;
assign perf_pipe_stalls = pipeline_stall || mshr_going_full;
`endif
`ifdef DBG_PRINT_CACHE_BANK
wire incoming_fill_dfp_st2 = drsq_push && (addr_st2 == dram_rsp_addr);
always @(posedge clk) begin
if (valid_st2 && miss_st2 && (incoming_fill_st2 || incoming_fill_dfp_st2)) begin
$display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), incoming_fill_st2, incoming_fill_dfp_st2);
assert(!is_mshr_st2);
end
if (pipeline_stall) begin
$display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall);
end
@@ -794,13 +694,13 @@ end
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, debug_wid_st0, debug_pc_st0);
end
if (crsq_push) begin
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), crsq_tag_st2, crsq_tid_st2, crsq_data_st2, debug_wid_st2, debug_pc_st2);
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag_st1, crsq_tid_st1, crsq_data_st1, debug_wid_st1, debug_pc_st1);
end
if (dreq_push) begin
if (do_writeback_st2)
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st2, dreq_byteen, debug_wid_st2, debug_pc_st2);
if (do_writeback_st1)
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st1, dreq_byteen, debug_wid_st1, debug_pc_st1);
else
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st2, debug_pc_st2);
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1);
end
end
`endif

View File

@@ -47,8 +47,8 @@ module VX_data_access #(
input wire [`UP(`WORD_SELECT_BITS)-1:0] rwsel_in,
input wire [WORD_SIZE-1:0] rbyteen_in,
output wire[`WORD_WIDTH-1:0] readword_out,
output wire [`CACHE_LINE_WIDTH-1:0] readdata_out,
output wire [CACHE_LINE_SIZE-1:0] dirtyb_out,
output wire [`CACHE_LINE_WIDTH-1:0] readdata_out,
output wire [CACHE_LINE_SIZE-1:0] dirtyb_out,
// writing
input wire writeen_in,
@@ -59,7 +59,7 @@ module VX_data_access #(
input wire [WORD_SIZE-1:0] wbyteen_in,
input wire wfill_in,
input wire [`WORD_WIDTH-1:0] writeword_in,
input wire [`CACHE_LINE_WIDTH-1:0] writedata_in
input wire [`CACHE_LINE_WIDTH-1:0] writedata_in
);
wire [CACHE_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual;
@@ -67,7 +67,7 @@ module VX_data_access #(
wire [CACHE_LINE_SIZE-1:0] byte_enable;
wire [`CACHE_LINE_WIDTH-1:0] write_data;
wire write_enable;
wire write_enable;
wire [`LINE_SELECT_BITS-1:0] raddr = raddr_in[`LINE_SELECT_BITS-1:0];
wire [`LINE_SELECT_BITS-1:0] waddr = waddr_in[`LINE_SELECT_BITS-1:0];

View File

@@ -30,18 +30,19 @@ module VX_tag_access #(
input wire stall,
// Inputs
input wire valid_in,
input wire[`LINE_ADDR_WIDTH-1:0] addr_in,
input wire is_write_in,
input wire is_fill_in,
input wire force_miss_in,
// Outputs
output wire[`TAG_SELECT_BITS-1:0] readtag_out,
// read/fill
input wire lookup_in,
input wire[`LINE_ADDR_WIDTH-1:0] raddr_in,
input wire do_fill_in,
output wire miss_out,
output wire[`TAG_SELECT_BITS-1:0] readtag_out,
output wire dirty_out,
output wire writeen_out
// write
`IGNORE_WARNINGS_BEGIN
input wire[`LINE_ADDR_WIDTH-1:0] waddr_in,
`IGNORE_WARNINGS_END
input wire writeen_in
);
wire read_valid;
@@ -51,8 +52,9 @@ module VX_tag_access #(
wire do_fill;
wire do_write;
wire [`TAG_SELECT_BITS-1:0] addrtag = `LINE_TAG_ADDR(addr_in);
wire [`LINE_SELECT_BITS-1:0] addrline = addr_in [`LINE_SELECT_BITS-1:0];
wire [`TAG_SELECT_BITS-1:0] raddr_tag = `LINE_TAG_ADDR(raddr_in);
wire [`LINE_SELECT_BITS-1:0] raddr = raddr_in [`LINE_SELECT_BITS-1:0];
wire [`LINE_SELECT_BITS-1:0] waddr = waddr_in [`LINE_SELECT_BITS-1:0];
VX_tag_store #(
.CACHE_SIZE (CACHE_SIZE),
@@ -64,61 +66,50 @@ module VX_tag_access #(
.clk (clk),
.reset (reset),
.addr (addrline),
.raddr (raddr),
.read_valid (read_valid),
.read_dirty (read_dirty),
.read_tag (read_tag),
.do_fill (do_fill),
.do_write (do_write),
.write_tag (addrtag)
.fill_tag (raddr_tag),
.waddr (waddr),
.do_write (do_write)
);
// use "case equality" to handle uninitialized tag when block entry is not valid
wire tags_match = read_valid && (addrtag == read_tag);
// read/fill stage
wire tags_match = read_valid && (raddr_tag == read_tag);
assign do_fill = do_fill_in && !stall;
assign readtag_out = read_tag;
assign miss_out = !tags_match;
assign dirty_out = read_dirty || ((raddr == waddr) && writeen_in);
// write stage
assign do_write = WRITE_ENABLE
&& valid_in
&& tags_match
&& !is_fill_in
&& is_write_in
&& !force_miss_in
&& !stall;
assign do_write = WRITE_ENABLE && writeen_in && !stall;
assign do_fill = valid_in
&& is_fill_in
&& !stall;
assign miss_out = valid_in
&& !tags_match
&& !is_fill_in;
assign dirty_out = WRITE_ENABLE
&& valid_in
&& read_valid
&& read_dirty
&& !(is_fill_in && tags_match); // discard writeback for redundant fills
assign readtag_out = read_tag;
assign writeen_out = do_write || (do_fill
&& !tags_match); // discard data update for redundant fills
wire do_lookup = lookup_in && !stall;
`UNUSED_VAR (do_lookup)
`ifdef DBG_PRINT_CACHE_TAG
always @(posedge clk) begin
if (valid_in && !stall) begin
if (do_fill) begin
$display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), addrline, addrtag, read_tag);
if (tags_match) begin
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID));
end
end else if (tags_match) begin
$display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, read_dirty, addrline, addrtag);
always @(posedge clk) begin
if (do_fill) begin
$display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), raddr, raddr_tag, read_tag);
if (tags_match) begin
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID));
end
end else if (do_lookup) begin
if (tags_match) begin
$display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), debug_wid, debug_pc, read_dirty, raddr, raddr_tag);
end else begin
$display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, read_dirty, addrline, addrtag, read_tag);
end
end
$display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), debug_wid, debug_pc, read_dirty, raddr, raddr_tag, read_tag);
end
end
end
`endif

View File

@@ -15,11 +15,12 @@ module VX_tag_store #(
input wire clk,
input wire reset,
input wire[`LINE_SELECT_BITS-1:0] addr,
input wire do_fill,
input wire[`LINE_SELECT_BITS-1:0] raddr,
input wire do_fill,
input wire[`TAG_SELECT_BITS-1:0] fill_tag,
input wire[`LINE_SELECT_BITS-1:0] waddr,
input wire do_write,
input wire[`TAG_SELECT_BITS-1:0] write_tag,
output wire[`TAG_SELECT_BITS-1:0] read_tag,
output wire read_valid,
@@ -36,10 +37,10 @@ module VX_tag_store #(
end
end else begin
if (do_fill) begin
valid[addr] <= 1;
dirty[addr] <= 0;
valid[raddr] <= 1;
dirty[raddr] <= 0;
end else if (do_write) begin
dirty[addr] <= 1;
dirty[waddr] <= 1;
end
end
end
@@ -50,15 +51,15 @@ module VX_tag_store #(
.RWCHECK(1)
) tags (
.clk(clk),
.addr(addr),
.addr(raddr),
.wren(do_fill),
.byteen(1'b1),
.rden(1'b1),
.din(write_tag),
.din(fill_tag),
.dout(read_tag)
);
assign read_valid = valid[addr];
assign read_dirty = dirty[addr];
assign read_valid = valid[raddr];
assign read_dirty = dirty[raddr];
endmodule

View File

@@ -192,15 +192,13 @@
"afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank, afu/vortex/cluster/core/mem_unit/smem/bank": {
"?valid_st0": 1,
"?valid_st1": 1,
"?valid_st2": 1,
"addr_st0": 32,
"addr_st1": 32,
"addr_st2": 32,
"is_fill_st0": 1,
"is_mshr_st0": 1,
"miss_st1": 1,
"force_miss_st1": 1,
"dirty_st1": 1,
"miss_st0": 1,
"force_miss_st0": 1,
"dirty_st0": 1,
"mshr_push": 1,
"?pipeline_stall": 1
}