diff --git a/rtl/VX_cache/VX_bank.v b/rtl/VX_cache/VX_bank.v index eb98adce..239d1778 100644 --- a/rtl/VX_cache/VX_bank.v +++ b/rtl/VX_cache/VX_bank.v @@ -106,6 +106,15 @@ module VX_bank ); + reg snoop_state = 0; + + always @(posedge clk) begin + if (reset) begin + snoop_state <= 0; + end else begin + snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)); + end + end wire snrq_pop; @@ -504,7 +513,7 @@ module VX_bank wire invalidate_fill; // Enqueue to miss reserv if it's a valid miss - assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); + assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !(should_flush && dwbq_push) && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); assign miss_add_pc = pc_st2; assign miss_add_addr = addr_st2; assign miss_add_data = writeword_st2; @@ -535,12 +544,23 @@ module VX_bank .full (cwbq_full) ); + wire should_flush = snoop_state && valid_st2 && (miss_add_mem_write != `NO_MEM_WRITE) && !is_snp_st2 && !is_fill_st2; // Enqueue to DWB Queue - wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); - wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK; - wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data = readdata_st2; + wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); + wire[31:0] dwbq_req_addr; wire dwbq_empty; - + + wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data; + if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin + assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; + assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK); + end else begin + assign dwbq_req_data = readdata_st2; + assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK; + end + + + wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_queue_full && !is_snp_st2; wire[31:0] fill_invalidator_addr = addr_st2 & `BASE_ADDR_MASK; VX_fill_invalidator #( diff --git a/rtl/VX_cache/VX_cache_wb_sel_merge.v b/rtl/VX_cache/VX_cache_wb_sel_merge.v index 148b443b..12cd04c7 100644 --- a/rtl/VX_cache/VX_cache_wb_sel_merge.v +++ b/rtl/VX_cache/VX_cache_wb_sel_merge.v @@ -105,15 +105,34 @@ module VX_cache_wb_sel_merge core_wb_pc = 0; core_wb_address = 0; for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin - if (((FUNC_ID == `LLFUNC_ID) && found_bank && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) || ((FUNC_ID != `LLFUNC_ID) && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) && found_bank && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index]))) begin - core_wb_valid[per_bank_wb_tid[this_bank]] = 1; - core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; - core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank]; - core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank]; - per_bank_wb_pop_unqual[this_bank] = 1; + if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin + + if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin + core_wb_valid[per_bank_wb_tid[this_bank]] = 1; + core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; + core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank]; + core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank]; + per_bank_wb_pop_unqual[this_bank] = 1; + end else begin + per_bank_wb_pop_unqual[this_bank] = 0; + end + end else begin - per_bank_wb_pop_unqual[this_bank] = 0; + + + if (((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) && found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin + core_wb_valid[per_bank_wb_tid[this_bank]] = 1; + core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; + core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank]; + core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank]; + per_bank_wb_pop_unqual[this_bank] = 1; + end else begin + per_bank_wb_pop_unqual[this_bank] = 0; + + end + end + end end endgenerate diff --git a/rtl/VX_cache/VX_tag_data_access.v b/rtl/VX_cache/VX_tag_data_access.v index 39c54ba1..f66cfb85 100644 --- a/rtl/VX_cache/VX_tag_data_access.v +++ b/rtl/VX_cache/VX_tag_data_access.v @@ -290,6 +290,6 @@ module VX_tag_data_access assign readtag_st1e = use_read_tag_st1e; assign fill_sent = miss_st1e; assign fill_saw_dirty_st1e = real_writefill && dirty_st1e; - assign invalidate_line = is_snp_st1e && miss_st1e; + assign invalidate_line = snoop_hit; endmodule \ No newline at end of file diff --git a/rtl/VX_define.v b/rtl/VX_define.v index 870a5414..df54f8b0 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -253,6 +253,15 @@ `define DFFSQ_SIZE 32 `endif +// Prefetcher +`ifndef DPRFQ_SIZE +`define DPRFQ_SIZE 32 +`endif + +`ifndef DPRFQ_STRIDE +`define DPRFQ_STRIDE 0 +`endif + // Fill Invalidator Size {Fill invalidator must be active} `ifndef DFILL_INVALIDAOR_SIZE `define DFILL_INVALIDAOR_SIZE 32 @@ -361,6 +370,15 @@ `define IFFSQ_SIZE 8 `endif +// Prefetcher +`ifndef IPRFQ_SIZE +`define IPRFQ_SIZE 32 +`endif + +`ifndef IPRFQ_STRIDE +`define IPRFQ_STRIDE 0 +`endif + // Fill Invalidator Size {Fill invalidator must be active} `ifndef IFILL_INVALIDAOR_SIZE `define IFILL_INVALIDAOR_SIZE 32 @@ -467,6 +485,15 @@ `define SFFSQ_SIZE 16 `endif +// Prefetcher +`ifndef SPRFQ_SIZE +`define SPRFQ_SIZE 4 +`endif + +`ifndef SPRFQ_STRIDE +`define SPRFQ_STRIDE 0 +`endif + // Fill Invalidator Size {Fill invalidator must be active} `ifndef SFILL_INVALIDAOR_SIZE `define SFILL_INVALIDAOR_SIZE 32 @@ -572,6 +599,15 @@ `define LLFFSQ_SIZE 32 `endif +// Prefetcher +`ifndef LLPRFQ_SIZE +`define LLPRFQ_SIZE 32 +`endif + +`ifndef LLPRFQ_STRIDE +`define LLPRFQ_STRIDE 0 +`endif + // Fill Invalidator Size {Fill invalidator must be active} `ifndef LLFILL_INVALIDAOR_SIZE `define LLFILL_INVALIDAOR_SIZE 32 @@ -677,6 +713,15 @@ `define L3FFSQ_SIZE 8 `endif +// Prefetcher +`ifndef L3PRFQ_SIZE +`define L3PRFQ_SIZE 32 +`endif + +`ifndef L3PRFQ_STRIDE +`define L3PRFQ_STRIDE 0 +`endif + // Fill Invalidator Size {Fill invalidator must be active} `ifndef L3FILL_INVALIDAOR_SIZE `define L3FILL_INVALIDAOR_SIZE 32 diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index c8f7b761..b033896e 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -95,6 +95,8 @@ module VX_dmem_controller ( .DFQQ_SIZE (`SDFQQ_SIZE), .LLVQ_SIZE (`SLLVQ_SIZE), .FFSQ_SIZE (`SFFSQ_SIZE), + .PRFQ_SIZE (`SPRFQ_SIZE), + .PRFQ_STRIDE (`SPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES) ) @@ -177,6 +179,8 @@ module VX_dmem_controller ( .DFQQ_SIZE (`DDFQQ_SIZE), .LLVQ_SIZE (`DLLVQ_SIZE), .FFSQ_SIZE (`DFFSQ_SIZE), + .PRFQ_SIZE (`DPRFQ_SIZE), + .PRFQ_STRIDE (`DPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES) ) @@ -263,6 +267,8 @@ module VX_dmem_controller ( .DFQQ_SIZE (`IDFQQ_SIZE), .LLVQ_SIZE (`ILLVQ_SIZE), .FFSQ_SIZE (`IFFSQ_SIZE), + .PRFQ_SIZE (`IPRFQ_SIZE), + .PRFQ_STRIDE (`IPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES) ) diff --git a/rtl/Vortex_Cluster.v b/rtl/Vortex_Cluster.v index 08aeb7ea..5b78b8cd 100644 --- a/rtl/Vortex_Cluster.v +++ b/rtl/Vortex_Cluster.v @@ -230,6 +230,8 @@ module Vortex_Cluster .DFQQ_SIZE (`LLDFQQ_SIZE), .LLVQ_SIZE (`LLLLVQ_SIZE), .FFSQ_SIZE (`LLFFSQ_SIZE), + .PRFQ_SIZE (`LLPRFQ_SIZE), + .PRFQ_STRIDE (`LLPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES) ) diff --git a/rtl/Vortex_SOC.v b/rtl/Vortex_SOC.v index 718828a0..7dc5e6de 100644 --- a/rtl/Vortex_SOC.v +++ b/rtl/Vortex_SOC.v @@ -234,6 +234,8 @@ module Vortex_SOC ( .DFQQ_SIZE (`L3DFQQ_SIZE), .LLVQ_SIZE (`L3LLVQ_SIZE), .FFSQ_SIZE (`L3FFSQ_SIZE), + .PRFQ_SIZE (`L3PRFQ_SIZE), + .PRFQ_STRIDE (`L3PRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES) )