diff --git a/rtl/VX_cache/VX_bank.v b/rtl/VX_cache/VX_bank.v index a7778af9..143a5ab6 100644 --- a/rtl/VX_cache/VX_bank.v +++ b/rtl/VX_cache/VX_bank.v @@ -28,6 +28,8 @@ module VX_bank ( // Dram Fill Requests output wire dram_fill_req, output wire[31:0] dram_fill_req_addr, + output wire dram_because_of_snp, + output wire dram_snp_full, input wire dram_fill_req_queue_full, // Dram Fill Response @@ -40,10 +42,37 @@ module VX_bank ( input wire dram_wb_queue_pop, output wire dram_wb_req, output wire[31:0] dram_wb_req_addr, - output wire[`BANK_LINE_SIZE_RNG][31:0] dram_wb_req_data + output wire[`BANK_LINE_SIZE_RNG][31:0] dram_wb_req_data, + + // Snp Request + input wire snp_req, + input wire[31:0] snp_req_addr ); + + + wire snrq_pop; + wire snrq_empty; + wire snrq_full; + + wire snrq_valid_st0; + wire[31:0] snrq_addr_st0; + + reg snrq_hazard_st0; + + assign snrq_valid_st0 = !snrq_empty; + VX_generic_queue #(.DATAW(32), .SIZE(`SNRQ_SIZE)) snr_queue( + .clk (clk), + .reset (reset), + .push (snp_req), + .in_data (snp_req_addr), + .pop (snrq_pop), + .out_data(snrq_addr_st0), + .empty (snrq_empty), + .full (snrq_full) + ); + wire dfpq_pop; wire dfpq_empty; wire dfpq_full; @@ -171,6 +200,7 @@ module VX_bank ( assign dfpq_pop = !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0; assign mrvq_pop = !dfpq_pop && mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0; assign reqq_pop = !mrvq_pop && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !reqq_hazard_st0; + assign snrq_pop = !reqq_pop && snrq_valid_st0 && !stall_bank_pipe && !snrq_hazard_st0; integer st1_cycle; @@ -179,11 +209,13 @@ module VX_bank ( dfpq_hazard_st0 = 0; mrvq_hazard_st0 = 0; reqq_hazard_st0 = 0; + snrq_hazard_st0 = 0; for (st1_cycle = 0; st1_cycle < `STAGE_1_CYCLES; st1_cycle = st1_cycle + 1) begin if (valid_st1[st1_cycle] && going_to_write_st1[st1_cycle]) begin if (dfpq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) dfpq_hazard_st0 = 1; if (mrvq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) mrvq_hazard_st0 = 1; if (reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) reqq_hazard_st0 = 1; + if (snrq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) snrq_hazard_st0 = 1; end end end @@ -198,6 +230,7 @@ module VX_bank ( wire [`BANK_LINE_SIZE_RNG][31:0] qual_writedata_st0; wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0; wire qual_going_to_write_st0; + wire qual_is_snp; wire valid_st1 [`STAGE_1_CYCLES-1:0]; wire going_to_write_st1[`STAGE_1_CYCLES-1:0]; @@ -206,13 +239,15 @@ module VX_bank ( wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [`STAGE_1_CYCLES-1:0]; wire is_fill_st1 [`STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_SIZE_RNG][31:0] writedata_st1 [`STAGE_1_CYCLES-1:0]; + wire is_snp_st1 [`STAGE_1_CYCLES-1:0]; assign qual_is_fill_st0 = dfpq_pop; - assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop; + assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; assign qual_addr_st0 = dfpq_pop ? dfpq_addr_st0 : mrvq_pop ? mrvq_addr_st0 : reqq_pop ? reqq_req_addr_st0 : + snrq_pop ? snrq_addr_st0 : 0; assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 : @@ -228,27 +263,30 @@ module VX_bank ( assign qual_going_to_write_st0 = dfpq_pop ? 1 : (mrvq_pop && (mrvq_mem_write_st0 != `NO_MEM_WRITE)) ? 1 : (reqq_pop && (reqq_req_mem_write_st0 != `NO_MEM_WRITE)) ? 1 : - 0; + (snrq_pop) ? 1 : + 0; - VX_generic_register #(.N( 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_c0 ( + assign qual_is_snp = snrq_pop ? 1 : 0; + + VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_c0 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0]}) + .in ({qual_is_snp , qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), + .out ({is_snp_st1[0], going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0]}) ); genvar curr_stage; generate for (curr_stage = 1; curr_stage < `STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin - VX_generic_register #(.N( 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_cc ( + VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_cc ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({going_to_write_st1[curr_stage-1], valid_st1[curr_stage-1], addr_st1[curr_stage-1], writeword_st1[curr_stage-1], inst_meta_st1[curr_stage-1], is_fill_st1[curr_stage-1] , writedata_st1[curr_stage-1]}), - .out ({going_to_write_st1[curr_stage] , valid_st1[curr_stage] , addr_st1[curr_stage] , writeword_st1[curr_stage] , inst_meta_st1[curr_stage] , is_fill_st1[curr_stage] , writedata_st1[curr_stage] }) + .in ({is_snp_st1[curr_stage-1], going_to_write_st1[curr_stage-1], valid_st1[curr_stage-1], addr_st1[curr_stage-1], writeword_st1[curr_stage-1], inst_meta_st1[curr_stage-1], is_fill_st1[curr_stage-1] , writedata_st1[curr_stage-1]}), + .out ({is_snp_st1[curr_stage] , going_to_write_st1[curr_stage] , valid_st1[curr_stage] , addr_st1[curr_stage] , writeword_st1[curr_stage] , inst_meta_st1[curr_stage] , is_fill_st1[curr_stage] , writedata_st1[curr_stage] }) ); end endgenerate @@ -268,6 +306,9 @@ module VX_bank ( wire [2:0] mem_write_st1e; wire [`vx_clog2(`NUMBER_REQUESTS)-1:0] tid_st1e; wire fill_saw_dirty_st1e; + wire is_snp_st1e; + + assign is_snp_st1e = is_snp_st1[`STAGE_1_CYCLES-1]; assign {rd_st1e, wb_st1e, warp_num_st1e, mem_read_st1e, mem_write_st1e, tid_st1e} = inst_meta_st1[`STAGE_1_CYCLES-1]; @@ -290,6 +331,8 @@ module VX_bank ( .mem_write_st1e(mem_write_st1e), .mem_read_st1e (mem_read_st1e), + .is_snp_st1e (is_snp_st1e), + // Read Data .readword_st1e (readword_st1e), .readdata_st1e (readdata_st1e), @@ -312,14 +355,15 @@ module VX_bank ( wire[`TAG_SELECT_SIZE_RNG] readtag_st2; wire is_fill_st2; wire fill_saw_dirty_st2; + wire is_snp_st2; - VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1 + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS)) st_1e_2 ( + VX_generic_register #(.N( 1 + 1 + 1 + 1 + 32 + 32 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1 + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS)) st_1e_2 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({fill_saw_dirty_st1e, is_fill_st1[`STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1[`STAGE_1_CYCLES-1], writeword_st1[`STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[`STAGE_1_CYCLES-1]}), - .out ({fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) + .in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[`STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1[`STAGE_1_CYCLES-1], writeword_st1[`STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[`STAGE_1_CYCLES-1]}), + .out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) ); @@ -331,7 +375,7 @@ module VX_bank ( // Enqueue to CWB Queue - wire cwbq_push = valid_st2 && !miss_st2; + wire cwbq_push = (valid_st2 && !miss_st2); wire [31:0] cwbq_data = readword_st2; wire [`vx_clog2(`NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid; wire [4:0] cwbq_rd = miss_add_rd; @@ -361,8 +405,23 @@ module VX_bank ( wire dwbq_empty; wire dwbq_full; + + wire invalidate_fill; + wire possible_fill = valid_st2 && miss_st2; + VX_fill_invalidator VX_fill_invalidator( + .clk (clk), + .reset (reset), + .possible_fill (possible_fill), + .success_fill (is_fill_st2), + .fill_addr (addr_st2), + + .invalidate_fill (invalidate_fill) + ); + // Enqueu in dram_fill_req - assign dram_fill_req = valid_st2 && miss_st2; + assign dram_fill_req = valid_st2 && miss_st2 && !invalidate_fill; + assign dram_because_of_snp = is_snp_st2 && valid_st2 && miss_st2; + assign dram_snp_full = snrq_full && snp_req; assign dram_fill_req_addr = addr_st2; assign dram_wb_req = !dwbq_empty; diff --git a/rtl/VX_cache/VX_cache.v b/rtl/VX_cache/VX_cache.v index 4eb25663..62210c02 100644 --- a/rtl/VX_cache/VX_cache.v +++ b/rtl/VX_cache/VX_cache.v @@ -38,7 +38,15 @@ module VX_cache ( output wire dram_req_read, output wire [31:0] dram_req_addr, output wire [31:0] dram_req_size, - output wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data + output wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data, + output wire dram_req_because_of_wb, + output wire dram_snp_full, + + + // Snoop Req + input wire snp_req, + input wire[31:0] snp_req_addr + ); @@ -59,6 +67,7 @@ module VX_cache ( wire[`NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop; wire[`NUMBER_BANKS-1:0] per_bank_dram_wb_req; + wire[`NUMBER_BANKS-1:0] per_bank_dram_because_of_snp; wire[`NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr; wire[`NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_dram_wb_req_data; @@ -77,6 +86,7 @@ module VX_cache ( .per_bank_dram_fill_req_addr(per_bank_dram_fill_req_addr), .per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop), .per_bank_dram_wb_req (per_bank_dram_wb_req), + .per_bank_dram_because_of_snp(per_bank_dram_because_of_snp), .per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr), .per_bank_dram_wb_req_data (per_bank_dram_wb_req_data), .dram_req (dram_req), @@ -84,7 +94,8 @@ module VX_cache ( .dram_req_read (dram_req_read), .dram_req_addr (dram_req_addr), .dram_req_size (dram_req_size), - .dram_req_data (dram_req_data) + .dram_req_data (dram_req_data), + .dram_req_because_of_wb (dram_req_because_of_wb) ); @@ -139,6 +150,8 @@ module VX_cache ( wire curr_bank_dfqq_full; wire curr_bank_dram_fill_req; + wire curr_bank_dram_because_of_snp; + wire curr_bank_dram_snp_full; wire[31:0] curr_bank_dram_fill_req_addr; wire curr_bank_dram_wb_queue_pop; @@ -146,6 +159,9 @@ module VX_cache ( wire[31:0] curr_bank_dram_wb_req_addr; wire[`BANK_LINE_SIZE_RNG][31:0] curr_bank_dram_wb_req_data; + wire curr_bank_snp_req; + wire[31:0] curr_bank_snp_req_addr; + wire curr_bank_reqq_full; // Core Req @@ -180,10 +196,15 @@ module VX_cache ( assign per_bank_dram_fill_accept[curr_bank] = curr_bank_dram_fill_accept; // Dram writeback request - assign curr_bank_dram_wb_queue_pop = per_bank_dram_wb_queue_pop[curr_bank]; - assign per_bank_dram_wb_req[curr_bank] = curr_bank_dram_wb_req; - assign per_bank_dram_wb_req_addr[curr_bank] = curr_bank_dram_wb_req_addr; - assign per_bank_dram_wb_req_data[curr_bank] = curr_bank_dram_wb_req_data; + assign curr_bank_dram_wb_queue_pop = per_bank_dram_wb_queue_pop[curr_bank]; + assign per_bank_dram_wb_req[curr_bank] = curr_bank_dram_wb_req; + assign per_bank_dram_because_of_snp[curr_bank] = curr_bank_dram_because_of_snp; + assign per_bank_dram_wb_req_addr[curr_bank] = curr_bank_dram_wb_req_addr; + assign per_bank_dram_wb_req_data[curr_bank] = curr_bank_dram_wb_req_data; + + // Snoop Request + assign curr_bank_snp_req = snp_req && (snp_req_addr[`BANK_SELECT_ADDR_RNG] == curr_bank); + assign curr_bank_snp_req_addr = snp_req_addr; VX_bank bank ( @@ -225,7 +246,13 @@ module VX_cache ( .dram_wb_queue_pop (curr_bank_dram_wb_queue_pop), .dram_wb_req (curr_bank_dram_wb_req), .dram_wb_req_addr (curr_bank_dram_wb_req_addr), - .dram_wb_req_data (curr_bank_dram_wb_req_data) + .dram_wb_req_data (curr_bank_dram_wb_req_data), + .dram_because_of_snp (curr_bank_dram_because_of_snp), + .dram_snp_full (curr_bank_dram_snp_full), + + // Snoop Request + .snp_req (curr_bank_snp_req), + .snp_req_addr (curr_bank_snp_req_addr) ); end diff --git a/rtl/VX_cache/VX_cache_config.v b/rtl/VX_cache/VX_cache_config.v index 18ca6616..07a49242 100644 --- a/rtl/VX_cache/VX_cache_config.v +++ b/rtl/VX_cache/VX_cache_config.v @@ -27,6 +27,8 @@ `define MRVQ_SIZE `REQQ_SIZE // Dram Fill Rsp Queue Size `define DFPQ_SIZE 2 + // Snoop Req Queue + `define SNRQ_SIZE 8 // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size @@ -36,6 +38,11 @@ // Dram Fill Req Queue Size `define DFQQ_SIZE `REQQ_SIZE + // Fill Invalidator Active {Comment out define statement to invalidate} + `define FILL_INVALIDATOR_ACTIVE 1 + // Fill Invalidator Size {Fill invalidator must be active} + `define FILL_INVALIDAOR_SIZE 16 + // Dram knobs `define SIMULATED_DRAM_LATENCY_CYCLES 10 diff --git a/rtl/VX_cache/VX_cache_dram_req_arb.v b/rtl/VX_cache/VX_cache_dram_req_arb.v index 747a2ac6..af5f9ba6 100644 --- a/rtl/VX_cache/VX_cache_dram_req_arb.v +++ b/rtl/VX_cache/VX_cache_dram_req_arb.v @@ -15,6 +15,7 @@ module VX_cache_dram_req_arb ( input wire[`NUMBER_BANKS-1:0] per_bank_dram_wb_req, input wire[`NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr, input wire[`NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_dram_wb_req_data, + input wire[`NUMBER_BANKS-1:0] per_bank_dram_because_of_snp, // real Dram request output wire dram_req, @@ -22,7 +23,8 @@ module VX_cache_dram_req_arb ( output wire dram_req_read, output wire [31:0] dram_req_addr, output wire [31:0] dram_req_size, - output wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data + output wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data, + output wire dram_req_because_of_wb ); @@ -47,8 +49,9 @@ module VX_cache_dram_req_arb ( ); wire[`vx_clog2(`NUMBER_BANKS)-1:0] dwb_bank; + wire[`NUMBER_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req | per_bank_dram_because_of_snp; VX_generic_priority_encoder #(.N(`NUMBER_BANKS)) VX_sel_dwb( - .valids(per_bank_dram_wb_req), + .valids(use_wb_valid), .index (dwb_bank), .found (dwb_valid) ); @@ -57,11 +60,12 @@ module VX_cache_dram_req_arb ( assign per_bank_dram_wb_queue_pop = per_bank_dram_wb_req & (~(1 << dwb_bank)); - assign dram_req = dwb_valid || dfqq_req; - assign dram_req_write = dwb_valid; - assign dram_req_read = dfqq_req && !dwb_valid; - assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr) & `BASE_ADDR_MASK; - assign dram_req_size = `BANK_LINE_SIZE_BYTES; - assign dram_req_data = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0; + assign dram_req = dwb_valid || dfqq_req; + assign dram_req_write = dwb_valid; + assign dram_req_read = dfqq_req && !dwb_valid; + assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr) & `BASE_ADDR_MASK; + assign dram_req_size = `BANK_LINE_SIZE_BYTES; + assign dram_req_data = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0; + assign dram_req_because_of_wb = dwb_valid ? per_bank_dram_because_of_snp[dwb_bank] : 0; endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_fill_invalidator.v b/rtl/VX_cache/VX_fill_invalidator.v new file mode 100644 index 00000000..c9546610 --- /dev/null +++ b/rtl/VX_cache/VX_fill_invalidator.v @@ -0,0 +1,89 @@ + +`include "VX_cache_config.v" + +module VX_fill_invalidator ( + input wire clk, + input wire reset, + + input wire possible_fill, + input wire success_fill, + + input wire[31:0] fill_addr, + + output reg invalidate_fill + +); + + + `ifndef FILL_INVALIDATOR_ACTIVE + + assign invalidate_fill = 0; + + `else + + reg[`FILL_INVALIDAOR_SIZE-1:0] fills_active; + reg[`FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address; + + + reg success_found; + reg[(`vx_clog2(`FILL_INVALIDAOR_SIZE))-1:0] success_index; + + integer curr_fill; + always @(*) begin + assign invalidate_fill = 0; + assign success_found = 0; + assign success_index = 0; + for (curr_fill = 0; curr_fill < `FILL_INVALIDAOR_SIZE; curr_fill=curr_fill+1) begin + + if (fill_addr[31:`LINE_SELECT_ADDR_START] == fills_address[curr_fill][31:`LINE_SELECT_ADDR_START]) begin + if (possible_fill && fills_active[curr_fill]) begin + assign invalidate_fill = 1; + end + + if (success_fill) begin + assign success_found = 1; + assign success_index = curr_fill; + end + end + end + end + + + + + wire [(`vx_clog2(`FILL_INVALIDAOR_SIZE))-1:0] enqueue_index; + wire enqueue_found; + + VX_generic_priority_encoder #(.N(`FILL_INVALIDAOR_SIZE)) VX_sel_bank( + .valids(fills_active), + .index (enqueue_index), + .found (enqueue_found) + ); + + + reg[`FILL_INVALIDAOR_SIZE-1:0] new_valids; + + + + always @(posedge clk) begin + if (reset) begin + fills_active <= 0; + fills_address <= 0; + end else begin + if (enqueue_found && !invalidate_fill) begin + fills_active[enqueue_index] <= 1; + fills_address[enqueue_index] <= fill_addr; + end + + if (success_found) begin + fills_active[success_index] <= 0; + end + + end + end + + + `endif + + +endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_tag_data_access.v b/rtl/VX_cache/VX_tag_data_access.v index 2f342ab4..3ce6622c 100644 --- a/rtl/VX_cache/VX_tag_data_access.v +++ b/rtl/VX_cache/VX_tag_data_access.v @@ -4,7 +4,7 @@ module VX_tag_data_access ( input wire clk, input wire reset, input wire stall, - + input wire is_snp_st1e, // Initial Reading input wire[31:0] readaddr_st10, @@ -49,6 +49,7 @@ module VX_tag_data_access ( wire fill_sent; + wire invalidate_line; VX_tag_data_structure VX_tag_data_structure( .clk (clk), .reset (reset), @@ -59,6 +60,7 @@ module VX_tag_data_access ( .read_tag (qual_read_tag_st1), .read_data (qual_read_data_st1), + .invalidate (invalidate_line), .write_enable(use_write_enable), .write_fill (writefill_st1e), .write_addr (writeaddr_st1e), @@ -191,12 +193,13 @@ module VX_tag_data_access ( /////////////////////// assign readword_st1e = data_Qual; - assign miss_st1e = (valid_req_st1e && !use_read_valid_st1e) || (valid_req_st1e && use_read_valid_st1e && !writefill_st1e && (writeaddr_st1e[`TAG_SELECT_ADDR_RNG] != use_read_tag_st1e)); + assign miss_st1e = ((valid_req_st1e || is_snp_st1e) && !use_read_valid_st1e) || (valid_req_st1e && use_read_valid_st1e && !writefill_st1e && (writeaddr_st1e[`TAG_SELECT_ADDR_RNG] != use_read_tag_st1e)); assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e; assign readdata_st1e = use_read_data_st1e; assign readtag_st1e = use_read_tag_st1e; assign fill_sent = miss_st1e; assign fill_saw_dirty_st1e = force_write && dirty_st1e; + assign invalidate_line = is_snp_st1e && !miss_st1e; endmodule diff --git a/rtl/VX_cache/VX_tag_data_structure.v b/rtl/VX_cache/VX_tag_data_structure.v index bad6f0ea..42c5d086 100644 --- a/rtl/VX_cache/VX_tag_data_structure.v +++ b/rtl/VX_cache/VX_tag_data_structure.v @@ -8,6 +8,7 @@ module VX_tag_data_structure ( output wire[`TAG_SELECT_SIZE_RNG] read_tag, output wire[`BANK_LINE_SIZE_RNG][31:0] read_data, + input wire invalidate, input wire[`BANK_LINE_SIZE_RNG][3:0] write_enable, input wire write_fill, input wire[31:0] write_addr, @@ -43,6 +44,10 @@ module VX_tag_data_structure ( dirty[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0; end + if (invalidate) begin + valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0; + end + for (f = 0; f < `BANK_LINE_SIZE_WORDS; f = f + 1) begin if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ]; if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ]; diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index d029a123..18226a3b 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -112,7 +112,15 @@ module VX_dmem_controller ( .dram_req_read (VX_gpu_dcache_dram_req.dram_req_read), .dram_req_addr (VX_gpu_dcache_dram_req.dram_req_addr), .dram_req_size (VX_gpu_dcache_dram_req.dram_req_size), - .dram_req_data (VX_gpu_dcache_dram_req.dram_req_data) + .dram_req_data (VX_gpu_dcache_dram_req.dram_req_data), + + // Snoop Response + .dram_req_because_of_wb(VX_gpu_dcache_dram_req.dram_because_of_snp), + .dram_snp_full (VX_gpu_dcache_dram_req.dram_snp_full), + + // Snoop Request + .snp_req (0), + .snp_req_addr (0) ); diff --git a/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v b/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v index 58170aa3..ccc58dc6 100644 --- a/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v +++ b/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v @@ -16,6 +16,10 @@ interface VX_gpu_dcache_dram_req_inter (); wire [31:0] dram_req_size; wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data; + // Snoop + wire dram_because_of_snp; + wire dram_snp_full; + // DRAM Cache can't accept response wire dram_fill_accept; diff --git a/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v b/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v new file mode 100644 index 00000000..bc7695d4 --- /dev/null +++ b/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v @@ -0,0 +1,18 @@ + + + +`include "../VX_cache/VX_cache_config.v" + +`ifndef VX_GPU_SNP_REQ + +`define VX_GPU_SNP_REQ + +interface VX_gpu_dcache_snp_req_inter (); + // Snoop Req + wire snp_req; + wire [31:0] snp_req_addr; + +endinterface + + +`endif \ No newline at end of file