diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index c98bf2b5..83dbad0a 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -5,9 +5,9 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#DEBUG = 1 +DEBUG = 1 CFLAGS += -fPIC diff --git a/driver/tests/basic/common.h b/driver/tests/basic/common.h index 97310d5a..dfb4b0a2 100644 --- a/driver/tests/basic/common.h +++ b/driver/tests/basic/common.h @@ -3,6 +3,6 @@ #define DEV_MEM_SRC_ADDR 0x10000000 #define DEV_MEM_DST_ADDR 0x20000000 -#define NUM_BLOCKS 16 +#define NUM_BLOCKS 1 #endif \ No newline at end of file diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index 05143e85..824960c1 100755 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 3de18fd0..123e0a01 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -111,7 +111,7 @@ `define DDFPQ_SIZE 32 `endif -// Snoop Req Queue +// Snoop Req Queue Size `ifndef DSNRQ_SIZE `define DSNRQ_SIZE 32 `endif @@ -136,9 +136,9 @@ `define DLLVQ_SIZE 0 `endif -// Fill Forward SNP Queue -`ifndef DFFSQ_SIZE -`define DFFSQ_SIZE 32 +// Snoop Rsp Queue Size +`ifndef DSRPQ_SIZE +`define DSRPQ_SIZE 32 `endif // Prefetcher @@ -197,7 +197,7 @@ `define IDFPQ_SIZE 32 `endif -// Snoop Req Queue +// Snoop Req Queue Size `ifndef ISNRQ_SIZE `define ISNRQ_SIZE 32 `endif @@ -222,9 +222,9 @@ `define ILLVQ_SIZE 16 `endif -// Fill Forward SNP Queue -`ifndef IFFSQ_SIZE -`define IFFSQ_SIZE 8 +// Snoop Rsp Queue Size +`ifndef ISRPQ_SIZE +`define ISRPQ_SIZE 8 `endif // Prefetcher @@ -283,7 +283,7 @@ `define SDFPQ_SIZE 0 `endif -// Snoop Req Queue +// Snoop Req Queue Size `ifndef SSNRQ_SIZE `define SSNRQ_SIZE 16 `endif @@ -308,9 +308,9 @@ `define SLLVQ_SIZE 16 `endif -// Fill Forward SNP Queue -`ifndef SFFSQ_SIZE -`define SFFSQ_SIZE 16 +// Snoop Rsp Queue Size +`ifndef SSRPQ_SIZE +`define SSRPQ_SIZE 16 `endif // Prefetcher @@ -369,7 +369,7 @@ `define L2DFPQ_SIZE 32 `endif -// Snoop Req Queue +// Snoop Req Queue Size `ifndef L2SNRQ_SIZE `define L2SNRQ_SIZE 32 `endif @@ -394,9 +394,9 @@ `define L2LLVQ_SIZE 32 `endif -// Fill Forward SNP Queue -`ifndef L2FFSQ_SIZE -`define L2FFSQ_SIZE 32 +// Snoop Rsp Queue Size +`ifndef L2SRPQ_SIZE +`define L2SRPQ_SIZE 32 `endif // Prefetcher @@ -455,7 +455,7 @@ `define L3DFPQ_SIZE 32 `endif -// Snoop Req Queue +// Snoop Req Queue Size `ifndef L3SNRQ_SIZE `define L3SNRQ_SIZE 32 `endif @@ -480,9 +480,9 @@ `define L3LLVQ_SIZE 0 `endif -// Fill Forward SNP Queue -`ifndef L3FFSQ_SIZE -`define L3FFSQ_SIZE 8 +// Snoop Rsp Queue Size +`ifndef L3SRPQ_SIZE +`define L3SRPQ_SIZE 8 `endif // Prefetcher diff --git a/hw/rtl/VX_csr_pipe.v b/hw/rtl/VX_csr_pipe.v index a9c857bc..397aa0f0 100644 --- a/hw/rtl/VX_csr_pipe.v +++ b/hw/rtl/VX_csr_pipe.v @@ -68,7 +68,7 @@ module VX_csr_pipe #( assign csr_wb_if.wb = wb_s2; genvar i; - for (i = 0; i < `NUM_THREADS; i = i + 1) begin + for (i = 0; i < `NUM_THREADS; i++) begin assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i : (csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) : csr_read_data_s2; diff --git a/hw/rtl/VX_csr_wrapper.v b/hw/rtl/VX_csr_wrapper.v index 5c34b8d4..87e24a23 100644 --- a/hw/rtl/VX_csr_wrapper.v +++ b/hw/rtl/VX_csr_wrapper.v @@ -11,11 +11,11 @@ module VX_csr_wrapper ( genvar i; generate - for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init + for (i = 0; i < `NUM_THREADS; i++) begin : thread_ids_init assign thread_ids[i] = i; end - for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init + for (i = 0; i < `NUM_THREADS; i++) begin : warp_ids_init assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num}; end endgenerate diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 266e75a4..ef8a09ff 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -29,6 +29,11 @@ if (!(cond)) $error(msg); \ endgenerate +`define UNUSED(x) \ + `IGNORE_WARNINGS_BEGIN \ + if (x != 0) begin end \ + `IGNORE_WARNINGS_END + `define CLOG2(x) $clog2(x) `define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > x) ? 1 : 0)) `define LOG2UP(x) ((x > 1) ? $clog2(x) : 1) @@ -138,6 +143,9 @@ // Number of Word requests per cycle {1, 2, 4, 8, ...} `define DNUM_REQUESTS `NUM_THREADS +// Snoop request tag bits +`define DSNP_TAG_WIDTH `LOG2UP(`L2SNRQ_SIZE) + ////////////////////////// Icache Configurable Knobs ////////////////////////// // DRAM request data bits @@ -177,6 +185,9 @@ // DRAM request tag bits `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2))) +// Snoop request tag bits +`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SNRQ_SIZE) : 1) + // Number of Word requests per cycle {1, 2, 4, 8, ...} `define L2NUM_REQUESTS (2*`NUM_CORES) @@ -191,6 +202,9 @@ // DRAM request tag bits `define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH) +// Snoop request tag bits +`define L3SNP_TAG_WIDTH 1 + // Number of Word requests per cycle {1, 2, 4, 8, ...} `define L3NUM_REQUESTS `NUM_CLUSTERS diff --git a/hw/rtl/VX_dmem_ctrl.v b/hw/rtl/VX_dmem_ctrl.v index 8a8a9f4a..6531cdc7 100644 --- a/hw/rtl/VX_dmem_ctrl.v +++ b/hw/rtl/VX_dmem_ctrl.v @@ -12,6 +12,7 @@ module VX_dmem_ctrl ( VX_cache_dram_req_if dcache_dram_req_if, VX_cache_dram_rsp_if dcache_dram_rsp_if, VX_cache_snp_req_if dcache_snp_req_if, + VX_cache_snp_rsp_if dcache_snp_rsp_if, // Core <-> Icache VX_cache_core_req_if icache_core_req_if, @@ -63,7 +64,7 @@ module VX_dmem_ctrl ( .DWBQ_SIZE (`SDWBQ_SIZE), .DFQQ_SIZE (`SDFQQ_SIZE), .LLVQ_SIZE (`SLLVQ_SIZE), - .FFSQ_SIZE (`SFFSQ_SIZE), + .SRPQ_SIZE (`SSRPQ_SIZE), .PRFQ_SIZE (`SPRFQ_SIZE), .PRFQ_STRIDE (`SPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), @@ -110,12 +111,24 @@ module VX_dmem_ctrl ( // Snoop request .snp_req_valid (0), .snp_req_addr (0), + .snp_req_tag (0), .snp_req_ready (), - // Snoop forwarding - .snp_fwd_valid (), - .snp_fwd_addr (), - .snp_fwd_ready (0) + // Snoop response + .snp_rsp_valid (), + .snp_rsp_tag (), + .snp_rsp_ready (0), + + // Snoop forward out + .snp_fwdout_valid (), + .snp_fwdout_addr (), + .snp_fwdout_tag (), + .snp_fwdout_ready (0), + + // Snoop forward in + .snp_fwdin_valid (0), + .snp_fwdin_tag (0), + .snp_fwdin_ready () `IGNORE_WARNINGS_END ); @@ -134,7 +147,7 @@ module VX_dmem_ctrl ( .DWBQ_SIZE (`DDWBQ_SIZE), .DFQQ_SIZE (`DDFQQ_SIZE), .LLVQ_SIZE (`DLLVQ_SIZE), - .FFSQ_SIZE (`DFFSQ_SIZE), + .SRPQ_SIZE (`DSRPQ_SIZE), .PRFQ_SIZE (`DPRFQ_SIZE), .PRFQ_STRIDE (`DPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), @@ -143,7 +156,8 @@ module VX_dmem_ctrl ( .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), .CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS), - .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH) + .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH), + .SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH) ) gpu_dcache ( .clk (clk), .reset (reset), @@ -177,16 +191,28 @@ module VX_dmem_ctrl ( .dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag), .dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready), - // Snoop Request + // Snoop request .snp_req_valid (dcache_snp_req_if.snp_req_valid), .snp_req_addr (dcache_snp_req_if.snp_req_addr), + .snp_req_tag (dcache_snp_req_if.snp_req_tag), .snp_req_ready (dcache_snp_req_if.snp_req_ready), + + // Snoop response + .snp_rsp_valid (dcache_snp_rsp_if.snp_rsp_valid), + .snp_rsp_tag (dcache_snp_rsp_if.snp_rsp_tag), + .snp_rsp_ready (dcache_snp_rsp_if.snp_rsp_ready), `IGNORE_WARNINGS_BEGIN - // Snoop Forward - .snp_fwd_valid (), - .snp_fwd_addr (), - .snp_fwd_ready (0) + // Snoop forward out + .snp_fwdout_valid (), + .snp_fwdout_addr (), + .snp_fwdout_tag (), + .snp_fwdout_ready (0), + + // Snoop forward in + .snp_fwdin_valid (0), + .snp_fwdin_tag (0), + .snp_fwdin_ready () `IGNORE_WARNINGS_END ); @@ -205,7 +231,7 @@ module VX_dmem_ctrl ( .DWBQ_SIZE (`IDWBQ_SIZE), .DFQQ_SIZE (`IDFQQ_SIZE), .LLVQ_SIZE (`ILLVQ_SIZE), - .FFSQ_SIZE (`IFFSQ_SIZE), + .SRPQ_SIZE (`ISRPQ_SIZE), .PRFQ_SIZE (`IPRFQ_SIZE), .PRFQ_STRIDE (`IPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), @@ -249,15 +275,27 @@ module VX_dmem_ctrl ( .dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready), `IGNORE_WARNINGS_BEGIN - // Snoop Request + // Snoop request .snp_req_valid (0), .snp_req_addr (0), + .snp_req_tag (0), .snp_req_ready (), - // Snoop Forward - .snp_fwd_valid (), - .snp_fwd_addr (), - .snp_fwd_ready (0) + // Snoop response + .snp_rsp_valid (), + .snp_rsp_tag (), + .snp_rsp_ready (0), + + // Snoop forward out + .snp_fwdout_valid (), + .snp_fwdout_addr (), + .snp_fwdout_tag (), + .snp_fwdout_ready (0), + + // Snoop forward in + .snp_fwdin_valid (0), + .snp_fwdin_tag (0), + .snp_fwdin_ready () `IGNORE_WARNINGS_END ); diff --git a/hw/rtl/VX_dram_arb.v b/hw/rtl/VX_dram_arb.v index 5ebbbd6e..b8f1262a 100644 --- a/hw/rtl/VX_dram_arb.v +++ b/hw/rtl/VX_dram_arb.v @@ -15,7 +15,7 @@ module VX_dram_arb #( input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr, input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data, input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, - output reg [NUM_REQUESTS-1:0] core_req_ready, + output wire [NUM_REQUESTS-1:0] core_req_ready, // Core response output wire [NUM_REQUESTS-1:0] core_rsp_valid, @@ -24,11 +24,11 @@ module VX_dram_arb #( input wire [NUM_REQUESTS-1:0] core_rsp_ready, // DRAM request - output reg dram_req_read, - output reg dram_req_write, - output reg [`DRAM_ADDR_WIDTH-1:0] dram_req_addr, - output reg [`BANK_LINE_WIDTH-1:0] dram_req_data, - output reg [DRAM_TAG_WIDTH-1:0] dram_req_tag, + output wire dram_req_read, + output wire dram_req_write, + output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr, + output wire [`BANK_LINE_WIDTH-1:0] dram_req_data, + output wire [DRAM_TAG_WIDTH-1:0] dram_req_tag, input wire dram_req_ready, // DRAM response @@ -37,47 +37,34 @@ module VX_dram_arb #( input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready ); - reg [`REQS_BITS-1:0] bus_req_idx; + reg [`REQS_BITS-1:0] bus_req_sel; always @(posedge clk) begin if (reset) begin - bus_req_idx <= 0; + bus_req_sel <= 0; end else begin - bus_req_idx <= bus_req_idx + 1; + bus_req_sel <= bus_req_sel + 1; end end - integer i; - generate - always @(*) begin - dram_req_read = 0; - dram_req_write = 0; - dram_req_addr = 'z; - dram_req_data = 'z; - dram_req_tag = 'z; + assign dram_req_read = core_req_read [bus_req_sel]; + assign dram_req_write = core_req_write [bus_req_sel]; + assign dram_req_addr = core_req_addr [bus_req_sel]; + assign dram_req_data = core_req_data [bus_req_sel]; + assign dram_req_tag = {core_req_tag [bus_req_sel], (`REQS_BITS)'(bus_req_sel)}; - for (i = 0; i < NUM_REQUESTS; i++) begin - if (bus_req_idx == (`REQS_BITS)'(i)) begin - dram_req_read = core_req_read[i]; - dram_req_write = core_req_write[i]; - dram_req_addr = core_req_addr[i]; - dram_req_data = core_req_data[i]; - dram_req_tag = {core_req_tag[i], (`REQS_BITS)'(i)}; - core_req_ready[i] = dram_req_ready; - end else begin - core_req_ready[i] = 0; - end - end - end - endgenerate - - genvar j; - wire [`REQS_BITS-1:0] bus_rsp_idx = dram_rsp_tag[`REQS_BITS-1:0]; - for (j = 0; j < NUM_REQUESTS; j++) begin - assign core_rsp_valid[j] = dram_rsp_valid && (bus_rsp_idx == (`REQS_BITS)'(j)); - assign core_rsp_data[j] = dram_rsp_data; - assign core_rsp_tag[j] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH]; + for (i = 0; i < NUM_REQUESTS; i++) begin + assign core_req_ready[i] = dram_req_ready && (bus_req_sel == `REQS_BITS'(i)); end - assign dram_rsp_ready = core_rsp_ready[bus_rsp_idx]; + + wire [`REQS_BITS-1:0] bus_rsp_sel = dram_rsp_tag[`REQS_BITS-1:0]; + + genvar i; + for (i = 0; i < NUM_REQUESTS; i++) begin + assign core_rsp_valid[i] = dram_rsp_valid && (bus_rsp_sel == `REQS_BITS'(i)); + assign core_rsp_data[i] = dram_rsp_data; + assign core_rsp_tag[i] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH]; + end + assign dram_rsp_ready = core_rsp_ready[bus_rsp_sel]; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_exec_unit.v b/hw/rtl/VX_exec_unit.v index de3fb5e3..a43de1b7 100644 --- a/hw/rtl/VX_exec_unit.v +++ b/hw/rtl/VX_exec_unit.v @@ -47,7 +47,7 @@ module VX_exec_unit ( genvar i; generate - for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs + for (i = 0; i < `NUM_THREADS; i++) begin : alu_defs VX_alu_unit alu_unit ( .clk (clk), .reset (reset), @@ -102,7 +102,7 @@ module VX_exec_unit ( wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data; generate - for (i = 0; i < `NUM_THREADS; i=i+1) begin + for (i = 0; i < `NUM_THREADS; i++) begin assign duplicate_PC_data[i] = exec_unit_req_if.PC_next; end endgenerate diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index 542a17bb..4a83569c 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -39,7 +39,7 @@ module VX_gpr ( wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; genvar i; - for (i = 0; i < `NUM_THREADS; i = i + 1) begin + for (i = 0; i < `NUM_THREADS; i++) begin wire local_write = write_enable & writeback_if.wb_valid[i]; assign write_bit_mask[i] = {`NUM_GPRS{~local_write}}; end @@ -57,8 +57,8 @@ module VX_gpr ( `ifndef SYN genvar j; - for (i = 0; i < `NUM_THREADS; i = i + 1) begin - for (j = 0; j < `NUM_GPRS; j = j + 1) begin + for (i = 0; i < `NUM_THREADS; i++) begin + for (j = 0; j < `NUM_GPRS; j++) begin assign a_reg_data_uqual[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j]; assign b_reg_data_uqual[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j]; end diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index b34bd8f0..d89843d3 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -21,7 +21,7 @@ module VX_gpr_ram ( end else begin if (we) begin integer i; - for (i = 0; i < `NUM_THREADS; i = i + 1) begin + for (i = 0; i < `NUM_THREADS; i++) begin if (be[i]) begin ram[waddr][i][0] <= wdata[i][7:0]; ram[waddr][i][1] <= wdata[i][15:8]; diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index 2f22deb7..f2dddb2d 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -17,7 +17,7 @@ module VX_gpr_wrapper ( wire[`NUM_THREADS-1:0][31:0] jal_data; genvar i; generate - for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign + for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign assign jal_data[i] = gpr_jal_if.curr_PC; end endgenerate @@ -47,7 +47,7 @@ module VX_gpr_wrapper ( `endif generate - for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs + for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs wire valid_write_request = i == writeback_if.warp_num; VX_gpr gpr( .clk (clk), diff --git a/hw/rtl/VX_gpu_inst.v b/hw/rtl/VX_gpu_inst.v index 4029c17d..bfde3b60 100644 --- a/hw/rtl/VX_gpu_inst.v +++ b/hw/rtl/VX_gpu_inst.v @@ -15,7 +15,7 @@ module VX_gpu_inst ( genvar i; generate - for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init + for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0]; end endgenerate @@ -34,7 +34,7 @@ module VX_gpu_inst ( wire[`NUM_WARPS-1:0] wspawn_new_active; generate - for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init + for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0]; end endgenerate @@ -56,7 +56,7 @@ module VX_gpu_inst ( wire[`NUM_THREADS-1:0] split_new_later_mask; generate - for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init + for (i = 0; i < `NUM_THREADS; i++) begin : masks_init wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1); assign split_new_use_mask[i] = curr_valids[i] & (curr_bool); assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool); diff --git a/hw/rtl/VX_inst_multiplex.v b/hw/rtl/VX_inst_multiplex.v index 199fd83c..f1ef35db 100644 --- a/hw/rtl/VX_inst_multiplex.v +++ b/hw/rtl/VX_inst_multiplex.v @@ -23,7 +23,7 @@ module VX_inst_multiplex ( genvar i; generate - for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init + for (i = 0; i < `NUM_THREADS; i++) begin : mask_init assign is_mem_mask[i] = is_mem; assign is_gpu_mask[i] = is_gpu; assign is_csr_mask[i] = is_csr; diff --git a/hw/rtl/VX_lsu_addr_gen.v b/hw/rtl/VX_lsu_addr_gen.v index 6785b0fe..a59d0dd4 100644 --- a/hw/rtl/VX_lsu_addr_gen.v +++ b/hw/rtl/VX_lsu_addr_gen.v @@ -8,7 +8,7 @@ module VX_lsu_addr_gen ( ); genvar i; generate - for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses + for (i = 0; i < `NUM_THREADS; i++) begin : addresses assign address[i] = base_address[i] + offset; end endgenerate diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index bbff0c83..4bb39e7f 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -52,7 +52,7 @@ module VX_scheduler ( always @(posedge clk) begin if (reset) begin for (w = 0; w < `NUM_WARPS; w=w+1) begin - for (i = 0; i < 32; i = i + 1) begin + for (i = 0; i < 32; i++) begin rename_table[w][i] <= 0; end end diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 40f96071..f163f9c6 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -260,7 +260,7 @@ module VX_warp_sched ( genvar i; generate - for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks + for (i = 0; i < `NUM_WARPS; i++) begin : stacks wire correct_warp_s = (i == split_warp_num); wire correct_warp_j = (i == join_warp_num); diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 9cf751c7..0ebe6ba8 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -8,7 +8,7 @@ module Vortex #( input wire clk, input wire reset, - // DRAM Dcache Req + // DRAM Dcache request output wire D_dram_req_read, output wire D_dram_req_write, output wire [`DDRAM_ADDR_WIDTH-1:0] D_dram_req_addr, @@ -16,13 +16,13 @@ module Vortex #( output wire [`DDRAM_TAG_WIDTH-1:0] D_dram_req_tag, input wire D_dram_req_ready, - // DRAM Dcache Rsp + // DRAM Dcache reponse input wire D_dram_rsp_valid, input wire [`DDRAM_LINE_WIDTH-1:0] D_dram_rsp_data, input wire [`DDRAM_TAG_WIDTH-1:0] D_dram_rsp_tag, output wire D_dram_rsp_ready, - // DRAM Icache Req + // DRAM Icache request output wire I_dram_req_read, output wire I_dram_req_write, output wire [`IDRAM_ADDR_WIDTH-1:0] I_dram_req_addr, @@ -30,17 +30,22 @@ module Vortex #( output wire [`IDRAM_TAG_WIDTH-1:0] I_dram_req_tag, input wire I_dram_req_ready, - // DRAM Icache Rsp + // DRAM Icache response input wire I_dram_rsp_valid, input wire [`IDRAM_LINE_WIDTH-1:0] I_dram_rsp_data, input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag, output wire I_dram_rsp_ready, - // Cache Snooping + // Snoop request input wire snp_req_valid, input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, + output wire snp_rsp_valid, + output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready, + // I/O request output wire io_req_read, output wire io_req_write, @@ -172,12 +177,24 @@ module Vortex #( VX_warp_ctl_if warp_ctl_if(); // Cache snooping - VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if(); + VX_cache_snp_req_if #( + .DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH), + .SNP_TAG_WIDTH(`DSNP_TAG_WIDTH) + ) dcache_snp_req_if(); + + VX_cache_snp_rsp_if #( + .SNP_TAG_WIDTH(`DSNP_TAG_WIDTH) + ) dcache_snp_rsp_if(); assign dcache_snp_req_if.snp_req_valid = snp_req_valid; assign dcache_snp_req_if.snp_req_addr = snp_req_addr; + assign dcache_snp_req_if.snp_req_tag = snp_req_tag; assign snp_req_ready = dcache_snp_req_if.snp_req_ready; + assign snp_rsp_valid = dcache_snp_rsp_if.snp_rsp_valid; + assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag; + assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready; + VX_front_end #( .CORE_ID(CORE_ID) ) front_end ( @@ -236,6 +253,7 @@ module Vortex #( .dcache_dram_req_if (dcache_dram_req_if), .dcache_dram_rsp_if (dcache_dram_rsp_if), .dcache_snp_req_if (dcache_snp_req_if), + .dcache_snp_rsp_if (dcache_snp_rsp_if), // Core <-> Icache .icache_core_req_if (icache_core_req_if), diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index bc234902..7b48c01d 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -8,7 +8,7 @@ module Vortex_Cluster #( input wire clk, input wire reset, - // DRAM Req + // DRAM request output wire dram_req_read, output wire dram_req_write, output wire[`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr, @@ -16,16 +16,22 @@ module Vortex_Cluster #( output wire[`L2DRAM_TAG_WIDTH-1:0] dram_req_tag, input wire dram_req_ready, - // DRAM Rsp + // DRAM response input wire dram_rsp_valid, input wire[`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data, input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, - // Cache Snooping - input wire snp_req_valid, - input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, - output wire snp_req_ready, + // Snoop request + input wire snp_req_valid, + input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire[`L2SNP_TAG_WIDTH-1:0] snp_req_tag, + output wire snp_req_ready, + + // Snoop response + output wire snp_rsp_valid, + output wire[`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready, // I/O request output wire io_req_read, @@ -69,9 +75,14 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready; - wire[`NUM_CORES-1:0] per_core_snp_fwd_valid; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_fwd_addr; - wire[`NUM_CORES-1:0] per_core_snp_fwd_ready; + wire[`NUM_CORES-1:0] per_core_snp_req_valid; + wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag; + wire[`NUM_CORES-1:0] per_core_snp_req_ready; + + wire[`NUM_CORES-1:0] per_core_snp_rsp_valid; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag; + wire[`NUM_CORES-1:0] per_core_snp_rsp_ready; `IGNORE_WARNINGS_BEGIN wire[`NUM_CORES-1:0] per_core_io_req_read; @@ -88,7 +99,7 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0] per_core_ebreak; genvar i; - for (i = 0; i < `NUM_CORES; i = i + 1) begin + for (i = 0; i < `NUM_CORES; i++) begin Vortex #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) vortex_core ( @@ -118,9 +129,14 @@ module Vortex_Cluster #( .I_dram_rsp_data (per_core_I_dram_rsp_data [i]), .I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]), - .snp_req_valid (per_core_snp_fwd_valid [i]), - .snp_req_addr (per_core_snp_fwd_addr [i]), - .snp_req_ready (per_core_snp_fwd_ready [i]), + .snp_req_valid (per_core_snp_req_valid [i]), + .snp_req_addr (per_core_snp_req_addr [i]), + .snp_req_tag (per_core_snp_req_tag [i]), + .snp_req_ready (per_core_snp_req_ready [i]), + + .snp_rsp_valid (per_core_snp_rsp_valid [i]), + .snp_rsp_tag (per_core_snp_rsp_tag [i]), + .snp_rsp_ready (per_core_snp_rsp_ready [i]), .io_req_read (per_core_io_req_read [i]), .io_req_write (per_core_io_req_write [i]), @@ -169,9 +185,14 @@ module Vortex_Cluster #( wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag; wire l2_core_rsp_ready; - wire l2_snp_fwd_valid; - wire[`L3DRAM_ADDR_WIDTH-1:0] l2_snp_fwd_addr; - wire l2_snp_fwd_ready; + wire[`NUM_CORES-1:0] l2_snp_fwdout_valid; + wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag; + wire[`NUM_CORES-1:0] l2_snp_fwdout_ready; + + wire[`NUM_CORES-1:0] l2_snp_fwdin_valid; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag; + wire[`NUM_CORES-1:0] l2_snp_fwdin_ready; for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]); @@ -204,12 +225,17 @@ module Vortex_Cluster #( assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i]; assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1]; - assign per_core_snp_fwd_valid [(i/2)] = l2_snp_fwd_valid && l2_snp_fwd_ready; - assign per_core_snp_fwd_addr [(i/2)] = l2_snp_fwd_addr; + assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)]; + assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)]; + assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)]; + assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; + + assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; + assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; + assign per_core_snp_rsp_ready [(i/2)] = l2_snp_fwdin_ready [(i/2)]; end assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready); - assign l2_snp_fwd_ready = (& per_core_snp_fwd_ready); VX_cache #( .CACHE_SIZE (`L2CACHE_SIZE), @@ -226,7 +252,7 @@ module Vortex_Cluster #( .DWBQ_SIZE (`L2DWBQ_SIZE), .DFQQ_SIZE (`L2DFQQ_SIZE), .LLVQ_SIZE (`L2LLVQ_SIZE), - .FFSQ_SIZE (`L2FFSQ_SIZE), + .SRPQ_SIZE (`L2SRPQ_SIZE), .PRFQ_SIZE (`L2PRFQ_SIZE), .PRFQ_STRIDE (`L2PRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE), @@ -235,7 +261,10 @@ module Vortex_Cluster #( .SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), - .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH) + .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH), + .NUM_SNP_REQUESTS (`NUM_CORES), + .SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH), + .SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH) ) gpu_l2cache ( .clk (clk), .reset (reset), @@ -267,17 +296,29 @@ module Vortex_Cluster #( .dram_rsp_valid (dram_rsp_valid), .dram_rsp_tag (dram_rsp_tag), .dram_rsp_data (dram_rsp_data), - .dram_rsp_ready (dram_rsp_ready), + .dram_rsp_ready (dram_rsp_ready), // Snoop request .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), - // Snoop forwarding - .snp_fwd_valid (l2_snp_fwd_valid), - .snp_fwd_addr (l2_snp_fwd_addr), - .snp_fwd_ready (l2_snp_fwd_ready) + // Snoop response + .snp_rsp_valid (snp_rsp_valid), + .snp_rsp_tag (snp_rsp_tag), + .snp_rsp_ready (snp_rsp_ready), + + // Snoop forwarding out + .snp_fwdout_valid (l2_snp_fwdout_valid), + .snp_fwdout_addr (l2_snp_fwdout_addr), + .snp_fwdout_tag (l2_snp_fwdout_tag), + .snp_fwdout_ready (l2_snp_fwdout_ready), + + // Snoop forwarding in + .snp_fwdin_valid (l2_snp_fwdin_valid), + .snp_fwdin_tag (l2_snp_fwdin_tag), + .snp_fwdin_ready (l2_snp_fwdin_ready) ); end else begin @@ -294,9 +335,14 @@ module Vortex_Cluster #( wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag; wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready; - wire arb_snp_fwd_valid; - wire[`L3DRAM_ADDR_WIDTH-1:0] arb_snp_fwd_addr; - wire arb_snp_fwd_ready; + wire[`NUM_CORES-1:0] arb_snp_fwdout_valid; + wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag; + wire[`NUM_CORES-1:0] arb_snp_fwdout_ready; + + wire[`NUM_CORES-1:0] arb_snp_fwdin_valid; + wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag; + wire[`NUM_CORES-1:0] arb_snp_fwdin_ready; for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin assign arb_core_req_read [i] = per_core_D_dram_req_read[(i/2)]; @@ -329,15 +375,47 @@ module Vortex_Cluster #( assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; - assign per_core_snp_fwd_valid [(i/2)] = arb_snp_fwd_valid && arb_snp_fwd_ready; - assign per_core_snp_fwd_addr [(i/2)] = arb_snp_fwd_addr; - end - - assign arb_snp_fwd_valid = snp_req_valid; - assign arb_snp_fwd_addr = snp_req_addr; - assign arb_snp_fwd_ready = (& per_core_snp_fwd_ready); + assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)]; + assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)]; + assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)]; + assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; - assign snp_req_ready = arb_snp_fwd_ready; + assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; + assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; + assign per_core_snp_rsp_ready [(i/2)] = arb_snp_fwdin_ready [(i/2)]; + end + + VX_snp_forwarder #( + .BANK_LINE_SIZE(`L2BANK_LINE_SIZE), + .NUM_REQUESTS(`NUM_CORES), + .SNRQ_SIZE(`L2SNRQ_SIZE), + .SNP_REQ_TAG_WIDTH(`L2SNP_TAG_WIDTH), + .SNP_FWD_TAG_WIDTH(`DSNP_TAG_WIDTH) + ) snp_forwarder ( + .clk (clk), + .reset (reset), + + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_tag (snp_req_tag), + .snp_req_ready (snp_req_ready), + + .snp_rsp_valid (snp_rsp_valid), + .snp_rsp_tag (snp_rsp_tag), + `IGNORE_WARNINGS_BEGIN + .snp_rsp_addr (), + `IGNORE_WARNINGS_END + .snp_rsp_ready (snp_rsp_ready), + + .snp_fwdout_valid (arb_snp_fwdout_valid), + .snp_fwdout_addr (arb_snp_fwdout_addr), + .snp_fwdout_tag (arb_snp_fwdout_tag), + .snp_fwdout_ready (arb_snp_fwdout_ready), + + .snp_fwdin_valid (arb_snp_fwdin_valid), + .snp_fwdin_tag (arb_snp_fwdin_tag), + .snp_fwdin_ready (arb_snp_fwdin_ready) + ); VX_dram_arb #( .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index 791e0162..2fafaeb6 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -15,16 +15,22 @@ module Vortex_Socket ( input wire dram_req_ready, // DRAM response - input wire dram_rsp_valid, - input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data, - input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag, + input wire dram_rsp_valid, + input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data, + input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, - // Cache snooping - input wire snp_req_valid, - input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr, + // Snoop request + input wire snp_req_valid, + input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire[`L3SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, + // Snoop response + output wire snp_rsp_valid, + output wire[`L3SNP_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready, + // I/O request output wire io_req_read, output wire io_req_write, @@ -66,8 +72,13 @@ module Vortex_Socket ( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), + .snp_rsp_valid (snp_rsp_valid), + .snp_rsp_tag (snp_rsp_tag), + .snp_rsp_ready (snp_rsp_ready), + .io_req_read (io_req_read), .io_req_write (io_req_write), .io_req_addr (io_req_addr), @@ -99,9 +110,14 @@ module Vortex_Socket ( wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_valid; - wire[`NUM_CLUSTERS-1:0][`L3DRAM_ADDR_WIDTH-1:0] per_cluster_snp_fwd_addr; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready; + wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; + wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; + wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; + wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; + + wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; + wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; + wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; `IGNORE_WARNINGS_BEGIN wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_read; @@ -118,7 +134,7 @@ module Vortex_Socket ( wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak; genvar i; - for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin + for (i = 0; i < `NUM_CLUSTERS; i++) begin Vortex_Cluster #( .CLUSTER_ID(i) ) Vortex_Cluster ( @@ -137,9 +153,14 @@ module Vortex_Socket ( .dram_rsp_tag (per_cluster_dram_rsp_tag [i]), .dram_rsp_ready (per_cluster_dram_rsp_ready [i]), - .snp_req_valid (per_cluster_snp_fwd_valid [i]), - .snp_req_addr (per_cluster_snp_fwd_addr [i]), - .snp_req_ready (per_cluster_snp_fwd_ready [i]), + .snp_req_valid (per_cluster_snp_req_valid [i]), + .snp_req_addr (per_cluster_snp_req_addr [i]), + .snp_req_tag (per_cluster_snp_req_tag [i]), + .snp_req_ready (per_cluster_snp_req_ready [i]), + + .snp_rsp_valid (per_cluster_snp_rsp_valid [i]), + .snp_rsp_tag (per_cluster_snp_rsp_tag [i]), + .snp_rsp_ready (per_cluster_snp_rsp_ready [i]), .io_req_read (per_cluster_io_req_read [i]), .io_req_write (per_cluster_io_req_write [i]), @@ -185,11 +206,16 @@ module Vortex_Socket ( wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; wire l3_core_rsp_ready; - wire l3_snp_fwd_valid; - wire[`L3DRAM_ADDR_WIDTH-1:0] l3_snp_fwd_addr; - wire l3_snp_fwd_ready; + wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid; + wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr; + wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag; + wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready; - for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin + wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid; + wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag; + wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready; + + for (i = 0; i < `L3NUM_REQUESTS; i++) begin // Core Request assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]); assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `BYTE_EN_LW : `BYTE_EN_NO; @@ -203,13 +229,19 @@ module Vortex_Socket ( assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i]; assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i]; - // Snoop Forwarding - assign per_cluster_snp_fwd_valid [i] = l3_snp_fwd_valid && l3_snp_fwd_ready; - assign per_cluster_snp_fwd_addr [i] = l3_snp_fwd_addr; + // Snoop Forwarding out + assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i]; + assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i]; + assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i]; + assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i]; + + // Snoop Forwarding in + assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i]; + assign l3_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i]; + assign per_cluster_snp_rsp_ready [i] = l3_snp_fwdin_ready [i]; end assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready); - assign l3_snp_fwd_ready = (& per_cluster_snp_fwd_ready); VX_cache #( .CACHE_SIZE (`L3CACHE_SIZE), @@ -226,7 +258,7 @@ module Vortex_Socket ( .DWBQ_SIZE (`L3DWBQ_SIZE), .DFQQ_SIZE (`L3DFQQ_SIZE), .LLVQ_SIZE (`L3LLVQ_SIZE), - .FFSQ_SIZE (`L3FFSQ_SIZE), + .SRPQ_SIZE (`L3SRPQ_SIZE), .PRFQ_SIZE (`L3PRFQ_SIZE), .PRFQ_STRIDE (`L3PRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), @@ -235,7 +267,10 @@ module Vortex_Socket ( .SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), - .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH) + .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH), + .NUM_SNP_REQUESTS (`NUM_CLUSTERS), + .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), + .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) ) gpu_l3cache ( .clk (clk), .reset (reset), @@ -272,12 +307,24 @@ module Vortex_Socket ( // Snoop request .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), - // Snoop forwarding - .snp_fwd_valid (l3_snp_fwd_valid), - .snp_fwd_addr (l3_snp_fwd_addr), - .snp_fwd_ready (l3_snp_fwd_ready) + // Snoop response + .snp_rsp_valid (snp_rsp_valid), + .snp_rsp_tag (snp_rsp_tag), + .snp_rsp_ready (snp_rsp_ready), + + // Snoop forwarding out + .snp_fwdout_valid (l3_snp_fwdout_valid), + .snp_fwdout_addr (l3_snp_fwdout_addr), + .snp_fwdout_tag (l3_snp_fwdout_tag), + .snp_fwdout_ready (l3_snp_fwdout_ready), + + // Snoop forwarding in + .snp_fwdin_valid (l3_snp_fwdin_valid), + .snp_fwdin_tag (l3_snp_fwdin_tag), + .snp_fwdin_ready (l3_snp_fwdin_ready) ); end diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index da525ac1..29386a1c 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -21,7 +21,7 @@ module VX_bank #( parameter MRVQ_SIZE = 0, // Dram Fill Rsp Queue Size parameter DFPQ_SIZE = 0, - // Snoop Req Queue + // Snoop Req Queue Size parameter SNRQ_SIZE = 0, // Queues for writebacks Knobs {1, 2, 4, 8, ...} @@ -33,8 +33,8 @@ module VX_bank #( parameter DFQQ_SIZE = 0, // Lower Level Cache Hit Queue Size parameter LLVQ_SIZE = 0, - // Fill Forward SNP Queue - parameter FFSQ_SIZE = 0, + // Snoop Rsp Queue Size + parameter SRPQ_SIZE = 0, // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 0, @@ -52,33 +52,34 @@ module VX_bank #( parameter CORE_TAG_WIDTH = 0, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0, + + // Snooping request tag width + parameter SNP_REQ_TAG_WIDTH = 0 ) ( input wire clk, input wire reset, // Core Request - input wire core_req_ready, input wire [NUM_REQUESTS-1:0] core_req_valids, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write, input wire [NUM_REQUESTS-1:0][31:0] core_req_addr, input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, - output wire core_req_full, - + output wire core_req_ready, + // Core Response output wire core_rsp_valid, output wire [`REQS_BITS-1:0] core_rsp_tid, output wire [`WORD_WIDTH-1:0] core_rsp_data, output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, - input wire core_rsp_pop, + input wire core_rsp_ready, // Dram Fill Requests output wire dram_fill_req_valid, output wire[`LINE_ADDR_WIDTH-1:0] dram_fill_req_addr, - output wire dram_fill_req_is_snp, - input wire dram_fill_req_full, + input wire dram_fill_req_ready, // Dram Fill Response input wire dram_fill_rsp_valid, @@ -90,57 +91,47 @@ module VX_bank #( output wire dram_wb_req_valid, output wire [`LINE_ADDR_WIDTH-1:0] dram_wb_req_addr, output wire [`BANK_LINE_WIDTH-1:0] dram_wb_req_data, - input wire dram_wb_req_pop, + input wire dram_wb_req_ready, // Snp Request input wire snp_req_valid, input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, - output wire snp_req_full, + input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, + output wire snp_req_ready, - output wire snp_fwd_valid, - output wire [`LINE_ADDR_WIDTH-1:0] snp_fwd_addr, - input wire snp_fwd_pop + output wire snp_rsp_valid, + output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready ); - reg snoop_state = 0; - - always @(posedge clk) begin - if (reset) begin - snoop_state <= 0; - end else begin - snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING; - end - end - - wire snrq_pop; - wire snrq_empty; - - wire snrq_valid_st0; - wire[`LINE_ADDR_WIDTH-1:0] snrq_addr_st0; - - assign snrq_valid_st0 = !snrq_empty; + wire snrq_pop; + wire snrq_empty; + wire snrq_full; + wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0; + wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0; + VX_generic_queue #( - .DATAW(`LINE_ADDR_WIDTH), + .DATAW(`LINE_ADDR_WIDTH + SNP_REQ_TAG_WIDTH), .SIZE(SNRQ_SIZE) - ) snr_queue ( + ) snp_req_queue ( .clk (clk), .reset (reset), .push (snp_req_valid), - .data_in (snp_req_addr), + .data_in ({snp_req_addr, snp_req_tag}), .pop (snrq_pop), - .data_out(snrq_addr_st0), + .data_out({snrq_addr_st0, snrq_tag_st0}), .empty (snrq_empty), - .full (snp_req_full) + .full (snrq_full) ); + assign snp_req_ready = ~snrq_full; + wire dfpq_pop; wire dfpq_empty; wire dfpq_full; wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0; - wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; - - assign dram_fill_rsp_ready = !dfpq_full; + wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + $bits(dram_fill_rsp_data)), @@ -156,9 +147,12 @@ module VX_bank #( .full (dfpq_full) ); + assign dram_fill_rsp_ready = !dfpq_full; + wire reqq_pop; wire reqq_push; wire reqq_empty; + wire reqq_full; wire reqq_req_st0; wire[`REQS_BITS-1:0] reqq_req_tid_st0; `IGNORE_WARNINGS_BEGIN @@ -169,14 +163,12 @@ module VX_bank #( wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0; wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0; - assign reqq_push = core_req_ready && (| core_req_valids); - VX_cache_req_queue #( - .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), - .REQQ_SIZE (REQQ_SIZE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) + .WORD_SIZE (WORD_SIZE), + .NUM_REQUESTS (NUM_REQUESTS), + .REQQ_SIZE (REQQ_SIZE), + .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_TAG_ID_BITS(CORE_TAG_ID_BITS) ) req_queue ( .clk (clk), .reset (reset), @@ -199,8 +191,11 @@ module VX_bank #( .reqq_req_mem_read_st0 (reqq_req_mem_read_st0), .reqq_req_mem_write_st0(reqq_req_mem_write_st0), .reqq_empty (reqq_empty), - .reqq_full (core_req_full) - ); + .reqq_full (reqq_full) + ); + + assign core_req_ready = ~reqq_full; + assign reqq_push = (| core_req_valids) && core_req_ready; wire mrvq_pop; wire mrvq_full; @@ -237,7 +232,7 @@ module VX_bank #( integer j; always @(*) begin is_fill_in_pipe = 0; - for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin + for (j = 0; j < STAGE_1_CYCLES; j++) begin if (is_fill_st1[j]) begin is_fill_in_pipe = 1; end @@ -251,7 +246,7 @@ module VX_bank #( assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe; assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe; assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe; - assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe; + assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && !snrq_empty && !stall_bank_pipe; wire qual_is_fill_st0; wire qual_valid_st0; @@ -262,7 +257,7 @@ module VX_bank #( wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0; wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0; wire qual_going_to_write_st0; - wire qual_is_snp; + wire qual_is_snp_st0; wire valid_st1 [STAGE_1_CYCLES-1:0]; wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; @@ -270,6 +265,7 @@ module VX_bank #( wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0]; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0]; + wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st1 [STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0]; assign qual_is_fill_st0 = dfpq_pop; @@ -298,34 +294,34 @@ module VX_bank #( (snrq_pop) ? 1 : 0; - assign qual_is_snp = snrq_pop ? 1 : 0; + assign qual_is_snp_st0 = snrq_pop ? 1 : 0; assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 : reqq_pop ? reqq_req_writeword_st0 : 0; VX_generic_register #( - .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH) ) s0_1_c0 ( .clk (clk), .reset (reset), .stall (stall_bank_pipe), .flush (0), - .in ({qual_is_snp, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) + .in ({qual_is_snp_st0, snrq_tag_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), + .out ({is_snp_st1[0], snrq_tag_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) ); genvar i; - for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin + for (i = 1; i < STAGE_1_CYCLES; i++) begin VX_generic_register #( - .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH) ) s0_1_cc ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), - .out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) + .in ({is_snp_st1[i-1], snrq_tag_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), + .out ({is_snp_st1[i], snrq_tag_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) ); end @@ -355,10 +351,10 @@ module VX_bank #( .DRAM_ENABLE (DRAM_ENABLE), .WRITE_ENABLE (WRITE_ENABLE) ) tag_data_access ( - .clk (clk), - .reset (reset), - .stall (stall_bank_pipe), - .stall_bank_pipe(stall_bank_pipe), + .clk (clk), + .reset (reset), + .stall (stall_bank_pipe), + .stall_bank_pipe (stall_bank_pipe), // Initial Read .readaddr_st10 (addr_st1[0][`LINE_SELECT_BITS-1:0]), @@ -397,17 +393,18 @@ module VX_bank #( wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; wire [`TAG_SELECT_BITS-1:0] readtag_st2; wire fill_saw_dirty_st2; + wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st2; wire is_snp_st2; VX_generic_register #( - .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH) + .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH + SNP_REQ_TAG_WIDTH) ) st_1e_2 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), - .out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) + .in ({is_snp_st1e, snrq_tag_st1[STAGE_1_CYCLES-1], fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), + .out ({is_snp_st2 , snrq_tag_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) ); wire should_flush; @@ -415,7 +412,7 @@ module VX_bank #( wire cwbq_full; wire dwbq_full; - wire ffsq_full; + wire srpq_full; wire invalidate_fill; // Enqueue to miss reserv if it's a valid miss @@ -424,11 +421,11 @@ module VX_bank #( && miss_st2 && !mrvq_full && !(should_flush && dwbq_push) - && !((is_snp_st2 && valid_st2 && ffsq_full) + && !((is_snp_st2 && valid_st2 && srpq_full) || ((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) - || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); + || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready)); assign miss_add_addr = addr_st2; assign miss_add_wsel = wsel_st2; @@ -474,21 +471,23 @@ module VX_bank #( ); // Enqueue to CWB Queue - // TODO: should investigae the need for "SNOOP_FORWARDING" here wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && (miss_add_mem_write == `BYTE_EN_NO) - && !((is_snp_st2 && valid_st2 && ffsq_full) + && !((is_snp_st2 && valid_st2 && srpq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) - || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); + || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready)); wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid; wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag; wire cwbq_empty; + wire cwbq_pop; + assign core_rsp_valid = !cwbq_empty; + assign cwbq_pop = core_rsp_valid && core_rsp_ready; VX_generic_queue #( .DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), @@ -500,29 +499,28 @@ module VX_bank #( .push (cwbq_push), .data_in ({cwbq_tid, cwbq_tag, cwbq_data}), - .pop (core_rsp_pop), + .pop (cwbq_pop), .data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}), .empty (cwbq_empty), .full (cwbq_full) ); - assign should_flush = snoop_state - && valid_st2 + assign should_flush = valid_st2 && (miss_add_mem_write != `BYTE_EN_NO) - && !is_snp_st2 && !is_fill_st2; + && !is_snp_st2 + && !is_fill_st2; // Enqueue to DWB Queue assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full - && !((is_snp_st2 && valid_st2 && ffsq_full) + && !((is_snp_st2 && valid_st2 && srpq_full) || ((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) - || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); + || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready)); wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr; - wire dwbq_empty; - wire[`BANK_LINE_WIDTH-1:0] dwbq_req_data; + wire dwbq_empty; if (SNOOP_FORWARDING) begin assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; @@ -532,7 +530,7 @@ module VX_bank #( assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; end - wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_full && !is_snp_st2; + wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2; wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2; VX_fill_invalidator #( @@ -549,9 +547,8 @@ module VX_bank #( ); // Enqueue in dram_fill_req - assign dram_fill_req_valid = possible_fill && !invalidate_fill; - assign dram_fill_req_is_snp = is_snp_st2 && valid_st2 && miss_st2; - assign dram_fill_req_addr = addr_st2; + assign dram_fill_req_valid = possible_fill && !invalidate_fill; + assign dram_fill_req_addr = addr_st2; assign dram_wb_req_valid = !dwbq_empty; @@ -565,43 +562,43 @@ module VX_bank #( .push (dwbq_push), .data_in ({dwbq_req_addr, dwbq_req_data}), - .pop (dram_wb_req_pop), + .pop (dram_wb_req_ready), .data_out({dram_wb_req_addr, dram_wb_req_data}), .empty (dwbq_empty), .full (dwbq_full) ); - wire snp_fwd_push; - wire ffsq_empty; + wire snp_rsp_push; + wire srpq_empty; - assign snp_fwd_push = is_snp_st2 + assign snp_rsp_push = is_snp_st2 && valid_st2 - && !ffsq_full + && !srpq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) - || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); + || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready)); - assign snp_fwd_valid = !ffsq_empty; + assign snp_rsp_valid = !srpq_empty; VX_generic_queue #( - .DATAW(`LINE_ADDR_WIDTH), - .SIZE(FFSQ_SIZE) - ) ffs_queue ( + .DATAW(SNP_REQ_TAG_WIDTH), + .SIZE(SRPQ_SIZE) + ) snp_rsp_queue ( .clk (clk), .reset (reset), - .push (snp_fwd_push), - .data_in (addr_st2), - .pop (snp_fwd_pop), - .data_out(snp_fwd_addr), - .empty (ffsq_empty), - .full (ffsq_full) + .push (snp_rsp_push), + .data_in (snrq_tag_st2), + .pop (snp_rsp_ready), + .data_out(snp_rsp_tag), + .empty (srpq_empty), + .full (srpq_full) ); - assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full) + assign stall_bank_pipe = (is_snp_st2 && valid_st2 && srpq_full) || ((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) - || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full); + || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready); endmodule : VX_bank \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 3eb2c976..92aa3fd9 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -22,7 +22,7 @@ module VX_cache #( parameter MRVQ_SIZE = 8, // Dram Fill Rsp Queue Size parameter DFPQ_SIZE = 2, - // Snoop Req Queue + // Snoop Req Queue Size parameter SNRQ_SIZE = 8, // Queues for writebacks Knobs {1, 2, 4, 8, ...} @@ -34,8 +34,8 @@ module VX_cache #( parameter DFQQ_SIZE = 8, // Lower Level Cache Hit Queue Size parameter LLVQ_SIZE = 16, - // Fill Forward SNP Queue - parameter FFSQ_SIZE = 8, + // Snoop Rsp Queue Size + parameter SRPQ_SIZE = 8, // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, @@ -60,7 +60,16 @@ module VX_cache #( parameter CORE_TAG_ID_BITS = 0, // dram request tag size - parameter DRAM_TAG_WIDTH = 1 + parameter DRAM_TAG_WIDTH = 1, + + // Number of snoop forwarding requests + parameter NUM_SNP_REQUESTS = 2, + + // Snooping request tag width + parameter SNP_REQ_TAG_WIDTH = 1, + + // Snooping forward tag width + parameter SNP_FWD_TAG_WIDTH = 1 ) ( input wire clk, input wire reset, @@ -94,56 +103,117 @@ module VX_cache #( input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, - // Snoop Req + // Snoop request input wire snp_req_valid, input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, - // Snoop Forward - output wire snp_fwd_valid, - output wire [`DRAM_ADDR_WIDTH-1:0] snp_fwd_addr, - input wire snp_fwd_ready + // Snoop response + output wire snp_rsp_valid, + output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready, + + // Snoop Forwarding out + output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid, + output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr, + output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag, +`IGNORE_WARNINGS_BEGIN + input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready, + + // Snoop forwarding in + input wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_valid, + input wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag, +`IGNORE_WARNINGS_END + output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready ); wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids; + + wire [NUM_BANKS-1:0] per_bank_core_req_ready; - wire [NUM_BANKS-1:0] per_bank_core_rsp_pop; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; + wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; - wire dfqq_full; wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid; wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr; + wire dram_fill_req_ready; + wire [NUM_BANKS-1:0] per_bank_dram_fill_rsp_ready; - wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop; + wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready; wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid; wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr; wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data; - wire [NUM_BANKS-1:0] per_bank_reqq_full; - wire [NUM_BANKS-1:0] per_bank_snp_req_full; + wire [NUM_BANKS-1:0] per_bank_snp_req_ready; - wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid; - wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_snp_fwd_addr; - wire [NUM_BANKS-1:0] per_bank_snp_fwd_pop; + wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid; + wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag; + wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready; -`DEBUG_BEGIN - wire [NUM_BANKS-1:0] per_bank_dram_fill_req_is_snp; -`DEBUG_END + wire snp_req_valid_qual; + wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual; + wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual; + wire snp_req_ready_qual; - assign dram_req_tag = dram_req_addr; - assign core_req_ready = ~(| per_bank_reqq_full); - assign snp_req_ready = ~(| per_bank_snp_req_full); - assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); + if (SNOOP_FORWARDING) begin + VX_snp_forwarder #( + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_REQUESTS (NUM_SNP_REQUESTS), + .SNRQ_SIZE (SNRQ_SIZE), + .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH), + .SNP_FWD_TAG_WIDTH (SNP_FWD_TAG_WIDTH) + ) snp_forwarder ( + .clk (clk), + .reset (reset), + + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_tag (snp_req_tag), + .snp_req_ready (snp_req_ready), + + .snp_rsp_valid (snp_req_valid_qual), + .snp_rsp_addr (snp_req_addr_qual), + .snp_rsp_tag (snp_req_tag_qual), + .snp_rsp_ready (snp_req_ready_qual), + + .snp_fwdout_valid (snp_fwdout_valid), + .snp_fwdout_addr (snp_fwdout_addr), + .snp_fwdout_tag (snp_fwdout_tag), + .snp_fwdout_ready (snp_fwdout_ready), + + .snp_fwdin_valid (snp_fwdin_valid), + .snp_fwdin_tag (snp_fwdin_tag), + .snp_fwdin_ready (snp_fwdin_ready) + ); + end else begin + assign snp_fwdout_valid = 0; + assign snp_fwdout_addr = 0; + assign snp_fwdout_tag = 0; + + assign snp_fwdin_ready = 0; + + assign snp_req_valid_qual = snp_req_valid; + assign snp_req_addr_qual = snp_req_addr; + assign snp_req_tag_qual = snp_req_tag; + assign snp_req_ready = snp_req_ready_qual; + end + + assign dram_req_tag = dram_req_addr; + + assign core_req_ready = (& per_bank_core_req_ready); + assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); + assign snp_req_ready_qual = (& per_bank_snp_req_ready); VX_cache_core_req_bank_sel #( - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS) + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQUESTS (NUM_REQUESTS) ) cache_core_req_bank_sell ( .core_req_valid (core_req_valid), .core_req_addr (core_req_addr), @@ -152,7 +222,7 @@ module VX_cache #( genvar i; generate - for (i = 0; i < NUM_BANKS; i = i + 1) begin + for (i = 0; i < NUM_BANKS; i++) begin wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids; wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; @@ -160,58 +230,57 @@ module VX_cache #( wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_read; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_write; - wire curr_bank_core_rsp_pop; wire curr_bank_core_rsp_valid; wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; + wire curr_bank_core_rsp_ready; wire curr_bank_dram_fill_rsp_valid; wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr; wire curr_bank_dram_fill_rsp_ready; - wire curr_bank_dram_fill_req_full; wire curr_bank_dram_fill_req_valid; - wire curr_bank_dram_fill_req_is_snp; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr; + wire curr_bank_dram_fill_req_ready; - wire curr_bank_dram_wb_req_pop; wire curr_bank_dram_wb_req_valid; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr; wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data; + wire curr_bank_dram_wb_req_ready; wire curr_bank_snp_req_valid; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; - wire curr_bank_snp_req_full; + wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag; + wire curr_bank_snp_req_ready; - wire curr_bank_snp_fwd_valid; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_fwd_addr; - wire curr_bank_snp_fwd_pop; + wire curr_bank_snp_rsp_valid; + wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag; + wire curr_bank_snp_rsp_ready; - wire curr_bank_reqq_full; + wire curr_bank_core_req_ready; // Core Req - assign curr_bank_core_req_valids = per_bank_valids[i]; + assign curr_bank_core_req_valids = per_bank_valids[i] & {NUM_REQUESTS{core_req_ready}}; assign curr_bank_core_req_addr = core_req_addr; assign curr_bank_core_req_data = core_req_data; assign curr_bank_core_req_tag = core_req_tag; assign curr_bank_core_req_read = core_req_read; assign curr_bank_core_req_write = core_req_write; - assign per_bank_reqq_full[i] = curr_bank_reqq_full; + assign per_bank_core_req_ready[i] = curr_bank_core_req_ready; // Core WB - assign curr_bank_core_rsp_pop = per_bank_core_rsp_pop[i]; + assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i]; assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid; assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid; assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag; assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data; - // Dram fill request - assign curr_bank_dram_fill_req_full = dfqq_full; + // Dram fill request assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid; assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i); - assign per_bank_dram_fill_req_is_snp[i] = curr_bank_dram_fill_req_is_snp; + assign curr_bank_dram_fill_req_ready = dram_fill_req_ready; // Dram fill response assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i); @@ -219,44 +288,46 @@ module VX_cache #( assign curr_bank_dram_fill_rsp_data = dram_rsp_data; assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready; - // Dram writeback request - assign curr_bank_dram_wb_req_pop = per_bank_dram_wb_queue_pop[i]; + // Dram writeback request assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid; assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i); assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data; + assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i]; - // Snoop Request - assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i); - assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr); - assign per_bank_snp_req_full[i] = curr_bank_snp_req_full; + // Snoop request + assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i); + assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual); + assign curr_bank_snp_req_tag = snp_req_tag_qual; + assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; - // Snoop Fwd - assign per_bank_snp_fwd_valid[i] = curr_bank_snp_fwd_valid; - assign per_bank_snp_fwd_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_snp_fwd_addr, i); - assign curr_bank_snp_fwd_pop = per_bank_snp_fwd_pop[i]; + // Snoop response + assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid; + assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag; + assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i]; VX_bank #( - .CACHE_SIZE (CACHE_SIZE), - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQUESTS (NUM_REQUESTS), - .STAGE_1_CYCLES (STAGE_1_CYCLES), - .REQQ_SIZE (REQQ_SIZE), - .MRVQ_SIZE (MRVQ_SIZE), - .DFPQ_SIZE (DFPQ_SIZE), - .SNRQ_SIZE (SNRQ_SIZE), - .CWBQ_SIZE (CWBQ_SIZE), - .DWBQ_SIZE (DWBQ_SIZE), - .DFQQ_SIZE (DFQQ_SIZE), - .LLVQ_SIZE (LLVQ_SIZE), - .FFSQ_SIZE (FFSQ_SIZE), - .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), - .DRAM_ENABLE (DRAM_ENABLE), - .WRITE_ENABLE (WRITE_ENABLE), - .SNOOP_FORWARDING (SNOOP_FORWARDING), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) + .CACHE_SIZE (CACHE_SIZE), + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQUESTS (NUM_REQUESTS), + .STAGE_1_CYCLES (STAGE_1_CYCLES), + .REQQ_SIZE (REQQ_SIZE), + .MRVQ_SIZE (MRVQ_SIZE), + .DFPQ_SIZE (DFPQ_SIZE), + .SNRQ_SIZE (SNRQ_SIZE), + .CWBQ_SIZE (CWBQ_SIZE), + .DWBQ_SIZE (DWBQ_SIZE), + .DFQQ_SIZE (DFQQ_SIZE), + .LLVQ_SIZE (LLVQ_SIZE), + .SRPQ_SIZE (SRPQ_SIZE), + .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), + .DRAM_ENABLE (DRAM_ENABLE), + .WRITE_ENABLE (WRITE_ENABLE), + .SNOOP_FORWARDING (SNOOP_FORWARDING), + .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), + .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) ) bank ( .clk (clk), .reset (reset), @@ -267,21 +338,19 @@ module VX_cache #( .core_req_addr (curr_bank_core_req_addr), .core_req_data (curr_bank_core_req_data), .core_req_tag (curr_bank_core_req_tag), - .core_req_full (curr_bank_reqq_full), - .core_req_ready (core_req_ready), + .core_req_ready (curr_bank_core_req_ready), // Core response .core_rsp_valid (curr_bank_core_rsp_valid), .core_rsp_tid (curr_bank_core_rsp_tid), .core_rsp_data (curr_bank_core_rsp_data), .core_rsp_tag (curr_bank_core_rsp_tag), - .core_rsp_pop (curr_bank_core_rsp_pop), + .core_rsp_ready (curr_bank_core_rsp_ready), // Dram fill request .dram_fill_req_valid (curr_bank_dram_fill_req_valid), .dram_fill_req_addr (curr_bank_dram_fill_req_addr), - .dram_fill_req_is_snp (curr_bank_dram_fill_req_is_snp), - .dram_fill_req_full (curr_bank_dram_fill_req_full), + .dram_fill_req_ready (curr_bank_dram_fill_req_ready), // Dram fill response .dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid), @@ -293,20 +362,45 @@ module VX_cache #( .dram_wb_req_valid (curr_bank_dram_wb_req_valid), .dram_wb_req_addr (curr_bank_dram_wb_req_addr), .dram_wb_req_data (curr_bank_dram_wb_req_data), - .dram_wb_req_pop (curr_bank_dram_wb_req_pop), + .dram_wb_req_ready (curr_bank_dram_wb_req_ready), // Snoop request .snp_req_valid (curr_bank_snp_req_valid), .snp_req_addr (curr_bank_snp_req_addr), - .snp_req_full (curr_bank_snp_req_full), + .snp_req_tag (curr_bank_snp_req_tag), + .snp_req_ready (curr_bank_snp_req_ready), - // Snoop forwarding - .snp_fwd_valid (curr_bank_snp_fwd_valid), - .snp_fwd_addr (curr_bank_snp_fwd_addr), - .snp_fwd_pop (curr_bank_snp_fwd_pop) + // Snoop response + .snp_rsp_valid (curr_bank_snp_rsp_valid), + .snp_rsp_tag (curr_bank_snp_rsp_tag), + .snp_rsp_ready (curr_bank_snp_rsp_ready) ); end - endgenerate + endgenerate + + VX_cache_dram_req_arb #( + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE), + .DFQQ_SIZE (DFQQ_SIZE), + .PRFQ_SIZE (PRFQ_SIZE), + .PRFQ_STRIDE (PRFQ_STRIDE) + ) cache_dram_req_arb ( + .clk (clk), + .reset (reset), + .per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid), + .per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr), + .dram_fill_req_ready (dram_fill_req_ready), + .per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid), + .per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr), + .per_bank_dram_wb_req_data (per_bank_dram_wb_req_data), + .per_bank_dram_wb_req_ready (per_bank_dram_wb_req_ready), + .dram_req_read (dram_req_read), + .dram_req_write (dram_req_write), + .dram_req_addr (dram_req_addr), + .dram_req_data (dram_req_data), + .dram_req_ready (dram_req_ready) + ); VX_cache_core_rsp_merge #( .NUM_BANKS (NUM_BANKS), @@ -319,48 +413,24 @@ module VX_cache #( .per_bank_core_rsp_valid (per_bank_core_rsp_valid), .per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_tag (per_bank_core_rsp_tag), - .per_bank_core_rsp_pop (per_bank_core_rsp_pop), - + .per_bank_core_rsp_ready (per_bank_core_rsp_ready), .core_rsp_valid (core_rsp_valid), .core_rsp_data (core_rsp_data), .core_rsp_tag (core_rsp_tag), .core_rsp_ready (core_rsp_ready) - ); + ); - VX_cache_dram_req_arb #( - .BANK_LINE_SIZE (BANK_LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .DFQQ_SIZE (DFQQ_SIZE), - .PRFQ_SIZE (PRFQ_SIZE), - .PRFQ_STRIDE (PRFQ_STRIDE) - ) cache_dram_req_arb ( - .clk (clk), - .reset (reset), - .dfqq_full (dfqq_full), - .per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid), - .per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr), - .per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop), - .per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid), - .per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr), - .per_bank_dram_wb_req_data (per_bank_dram_wb_req_data), - .dram_req_read (dram_req_read), - .dram_req_write (dram_req_write), - .dram_req_addr (dram_req_addr), - .dram_req_data (dram_req_data), - .dram_req_ready (dram_req_ready) - ); - - VX_snp_fwd_arb #( - .NUM_BANKS(NUM_BANKS), - .BANK_LINE_SIZE(BANK_LINE_SIZE) - ) snp_fwd_arb ( - .per_bank_snp_fwd_valid (per_bank_snp_fwd_valid), - .per_bank_snp_fwd_addr (per_bank_snp_fwd_addr), - .per_bank_snp_fwd_pop (per_bank_snp_fwd_pop), - .snp_fwd_valid (snp_fwd_valid), - .snp_fwd_addr (snp_fwd_addr), - .snp_fwd_ready (snp_fwd_ready) + VX_snp_rsp_arb #( + .NUM_BANKS (NUM_BANKS), + .BANK_LINE_SIZE (BANK_LINE_SIZE), + .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH) + ) snp_rsp_arb ( + .per_bank_snp_rsp_valid (per_bank_snp_rsp_valid), + .per_bank_snp_rsp_tag (per_bank_snp_rsp_tag), + .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready), + .snp_rsp_valid (snp_rsp_valid), + .snp_rsp_tag (snp_rsp_tag), + .snp_rsp_ready (snp_rsp_ready) ); endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index a5ea2e32..06fde46f 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -21,7 +21,7 @@ module VX_cache_core_req_bank_sel #( integer i; always @(*) begin per_bank_valids = 0; - for (i = 0; i < NUM_REQUESTS; i = i + 1) begin + for (i = 0; i < NUM_REQUESTS; i++) begin if (NUM_BANKS == 1) begin // If there is only one bank, then only map requests to that bank per_bank_valids[0][i] = core_req_valid[i]; diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 4ad4b44e..cc2ac2b5 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -17,7 +17,7 @@ module VX_cache_core_rsp_merge #( input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, - output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop, + output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, // Core Writeback output reg [NUM_REQUESTS-1:0] core_rsp_valid, @@ -28,7 +28,7 @@ module VX_cache_core_rsp_merge #( reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual; - assign per_bank_core_rsp_pop = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}}; + assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}}; wire [`BANK_BITS-1:0] main_bank_index; wire found_bank; @@ -48,7 +48,7 @@ module VX_cache_core_rsp_merge #( always @(*) begin core_rsp_valid = 0; core_rsp_data = 0; - for (i = 0; i < NUM_BANKS; i = i + 1) begin + for (i = 0; i < NUM_BANKS; i++) begin if (found_bank && per_bank_core_rsp_valid[i] && !core_rsp_valid[per_bank_core_rsp_tid[i]] @@ -68,7 +68,7 @@ module VX_cache_core_rsp_merge #( core_rsp_valid = 0; core_rsp_data = 0; core_rsp_tag = 0; - for (i = 0; i < NUM_BANKS; i = i + 1) begin + for (i = 0; i < NUM_BANKS; i++) begin if (found_bank && per_bank_core_rsp_valid[i] && !core_rsp_valid[per_bank_core_rsp_tid[i]] diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index 6170f2c0..6072af4b 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -19,13 +19,13 @@ module VX_cache_dram_req_arb #( // Fill Request input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid, input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr, - output wire dfqq_full, + output wire dram_fill_req_ready, // Writeback Request input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid, input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr, input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data, - output wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop, + output wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready, // Merged Request output wire dram_req_read, @@ -70,6 +70,7 @@ module VX_cache_dram_req_arb #( wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop wire dfqq_push = (| per_bank_dram_fill_req_valid); + wire dfqq_full; VX_cache_dfq_queue #( .BANK_LINE_SIZE(BANK_LINE_SIZE), @@ -100,7 +101,9 @@ module VX_cache_dram_req_arb #( .found (dwb_valid) ); - assign per_bank_dram_wb_queue_pop = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0; + assign dram_fill_req_ready = ~dfqq_full; + + assign per_bank_dram_wb_req_ready = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0; wire dram_req_valid = dwb_valid || dfqq_req || pref_pop; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 08e20199..2e31e8b7 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -66,7 +66,7 @@ module VX_cache_miss_resrv #( reg [MRVQ_SIZE-1:0] make_ready; genvar i; generate - for (i = 0; i < MRVQ_SIZE; i=i+1) begin + for (i = 0; i < MRVQ_SIZE; i++) begin assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1); end endgenerate diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v new file mode 100644 index 00000000..0ee2a953 --- /dev/null +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -0,0 +1,116 @@ +`include "VX_define.vh" + +module VX_snp_forwarder #( + parameter BANK_LINE_SIZE = 0, + parameter NUM_REQUESTS = 0, + parameter SNRQ_SIZE = 0, + parameter SNP_REQ_TAG_WIDTH = 0, + parameter SNP_FWD_TAG_WIDTH = 0 +) ( + input wire clk, + input wire reset, + + // Snoop request + input wire snp_req_valid, + input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, + output wire snp_req_ready, + + // Snoop response + output wire snp_rsp_valid, + output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr, + output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready, + + // Snoop Forwarding out + output wire [NUM_REQUESTS-1:0] snp_fwdout_valid, + output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr, + output wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag, + input wire [NUM_REQUESTS-1:0] snp_fwdout_ready, + + // Snoop forwarding in + input wire [NUM_REQUESTS-1:0] snp_fwdin_valid, + input wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag, + output wire [NUM_REQUESTS-1:0] snp_fwdin_ready +); + reg [`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH-1:0] pending_reqs [SNRQ_SIZE-1:0]; + reg [`REQS_BITS-1:0] pending_cntrs [SNRQ_SIZE-1:0]; + reg [`LOG2UP(SNRQ_SIZE)-1:0] rd_ptr, wr_ptr; + reg [`LOG2UP(SNRQ_SIZE)-1:0] pending_size; + reg [`REQS_BITS-1:0] fwdin_sel; + wire enqueue, dequeue; + + wire fwdout_ready; + + wire fwdin_valid; + wire [SNP_FWD_TAG_WIDTH-1:0] fwdin_tag; + wire fwdin_ready; + wire fwdin_taken; + + + assign fwdout_ready = (& snp_fwdout_ready); + + assign snp_req_ready = (pending_size != `LOG2UP(SNRQ_SIZE)'(SNRQ_SIZE-1)) // not full + && fwdout_ready; + + genvar i; + + for (i = 0; i < NUM_REQUESTS; i++) begin + assign snp_fwdout_valid[i] = enqueue && fwdout_ready; + assign snp_fwdout_addr[i] = snp_req_addr; + assign snp_fwdout_tag[i] = wr_ptr; + end + + assign fwdin_ready = snp_rsp_ready; + + assign fwdin_taken = fwdin_valid && fwdin_ready; + + assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[fwdin_tag]); // send response + assign {snp_rsp_addr, snp_rsp_tag} = pending_reqs[fwdin_tag]; + + assign enqueue = snp_req_valid && snp_req_ready; + assign dequeue = snp_rsp_valid && (rd_ptr == fwdin_tag); + + always @(posedge clk) begin + if (reset) begin + rd_ptr <= 0; + wr_ptr <= 0; + pending_size <= 0; + fwdin_sel <= 0; + end else begin + if (enqueue) begin + pending_reqs[wr_ptr] <= {snp_req_addr, snp_req_tag}; + pending_cntrs[wr_ptr] <= `REQS_BITS'(NUM_REQUESTS); + wr_ptr <= wr_ptr + 1; + if (!dequeue) begin + pending_size <= pending_size + 1; + end + end + if (dequeue) begin + rd_ptr <= rd_ptr + 1; + if (!enqueue) begin + pending_size <= pending_size - 1; + end + end + if (fwdin_taken) begin + pending_cntrs[fwdin_tag] <= pending_cntrs[fwdin_tag] - 1; + end + end + end + + always @(posedge clk) begin + if (reset) begin + fwdin_sel <= 0; + end else begin + fwdin_sel <= fwdin_sel + 1; + end + end + + assign fwdin_valid = snp_fwdin_valid[fwdin_sel]; + assign fwdin_tag = snp_fwdin_tag[fwdin_sel]; + + for (i = 0; i < NUM_REQUESTS; i++) begin + assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i)); + end + +endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_snp_fwd_arb.v b/hw/rtl/cache/VX_snp_fwd_arb.v deleted file mode 100644 index bfd3bb57..00000000 --- a/hw/rtl/cache/VX_snp_fwd_arb.v +++ /dev/null @@ -1,39 +0,0 @@ -`include "VX_cache_config.vh" - -module VX_snp_fwd_arb #( - parameter NUM_BANKS = 1, - parameter BANK_LINE_SIZE = 1 -) ( - input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid, - input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_snp_fwd_addr, - output reg [NUM_BANKS-1:0] per_bank_snp_fwd_pop, - - output wire snp_fwd_valid, - output wire [`DRAM_ADDR_WIDTH-1:0] snp_fwd_addr, - input wire snp_fwd_ready -); - - wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}}; - - wire [`BANK_BITS-1:0] fsq_bank; - wire fsq_valid; - - VX_generic_priority_encoder #( - .N(NUM_BANKS) - ) sel_ffsq ( - .valids (qual_per_bank_snp_fwd), - .index (fsq_bank), - .found (fsq_valid) - ); - - assign snp_fwd_valid = fsq_valid; - assign snp_fwd_addr = per_bank_snp_fwd_addr[fsq_bank]; - - always @(*) begin - per_bank_snp_fwd_pop = 0; - if (fsq_valid) begin - per_bank_snp_fwd_pop[fsq_bank] = 1; - end - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_snp_rsp_arb.v b/hw/rtl/cache/VX_snp_rsp_arb.v new file mode 100644 index 00000000..4ddcf82c --- /dev/null +++ b/hw/rtl/cache/VX_snp_rsp_arb.v @@ -0,0 +1,38 @@ +`include "VX_cache_config.vh" + +module VX_snp_rsp_arb #( + parameter NUM_BANKS = 0, + parameter BANK_LINE_SIZE = 0, + parameter SNP_REQ_TAG_WIDTH = 0 +) ( + input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid, + input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag, + output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready, + + output wire snp_rsp_valid, + output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, + input wire snp_rsp_ready +); + + wire [NUM_BANKS-1:0] qual_per_bank_snp_rsp = per_bank_snp_rsp_valid & {NUM_BANKS{snp_rsp_ready}}; + + wire [`BANK_BITS-1:0] fsq_bank; + wire fsq_valid; + + VX_generic_priority_encoder #( + .N(NUM_BANKS) + ) sel_ffsq ( + .valids (qual_per_bank_snp_rsp), + .index (fsq_bank), + .found (fsq_valid) + ); + + assign snp_rsp_valid = fsq_valid; + assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank]; + + genvar i; + for (i = 0; i < NUM_BANKS; i++) begin + assign per_bank_snp_rsp_ready[i] = fsq_valid && (fsq_bank == `BANK_BITS'(i)); + end + +endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 4614136e..51330171 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -110,7 +110,7 @@ module VX_tag_data_access #( ); genvar i; - for (i = 1; i < STAGE_1_CYCLES-1; i = i + 1) begin + for (i = 1; i < STAGE_1_CYCLES-1; i++) begin VX_generic_register #( .N( 1 + 1 + `TAG_SELECT_BITS + `BANK_LINE_WIDTH) ) s0_1_cc ( @@ -127,7 +127,7 @@ module VX_tag_data_access #( assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM - for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin + for (i = 0; i < `BANK_LINE_WORDS; i++) begin assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH]; end @@ -144,7 +144,7 @@ module VX_tag_data_access #( && !miss_st1e && !is_snp_st1e; - for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin + for (i = 0; i < `BANK_LINE_WORDS; i++) begin assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000; end @@ -199,7 +199,7 @@ module VX_tag_data_access #( assign readword_st1e = data_Qual; - for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin + for (i = 0; i < `BANK_LINE_WORDS; i++) begin wire normal_write = (block_offset == i[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill; assign we[i] = (force_write) ? 4'b1111 : diff --git a/hw/rtl/cache/VX_tag_data_structure.v b/hw/rtl/cache/VX_tag_data_structure.v index 11be2f05..474f8d38 100644 --- a/hw/rtl/cache/VX_tag_data_structure.v +++ b/hw/rtl/cache/VX_tag_data_structure.v @@ -44,7 +44,7 @@ module VX_tag_data_structure #( integer i; always @(posedge clk) begin if (reset) begin - for (i = 0; i < `BANK_LINE_COUNT; i = i + 1) begin + for (i = 0; i < `BANK_LINE_COUNT; i++) begin valid[i] <= 0; dirty[i] <= 0; end @@ -65,7 +65,7 @@ module VX_tag_data_structure #( valid[write_addr] <= 0; end - for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin + for (i = 0; i < `BANK_LINE_WORDS; i++) begin if (write_enable[i][0]) data[write_addr][i][0] <= write_data[i * `WORD_WIDTH + 0 * `BYTE_WIDTH +: `BYTE_WIDTH]; if (write_enable[i][1]) data[write_addr][i][1] <= write_data[i * `WORD_WIDTH + 1 * `BYTE_WIDTH +: `BYTE_WIDTH]; if (write_enable[i][2]) data[write_addr][i][2] <= write_data[i * `WORD_WIDTH + 2 * `BYTE_WIDTH +: `BYTE_WIDTH]; diff --git a/hw/rtl/interfaces/VX_cache_snp_req_if.v b/hw/rtl/interfaces/VX_cache_snp_req_if.v index 931520d8..935bd3a7 100644 --- a/hw/rtl/interfaces/VX_cache_snp_req_if.v +++ b/hw/rtl/interfaces/VX_cache_snp_req_if.v @@ -4,11 +4,13 @@ `include "../cache/VX_cache_config.vh" interface VX_cache_snp_req_if #( - parameter DRAM_ADDR_WIDTH = 1 + parameter DRAM_ADDR_WIDTH = 0, + parameter SNP_TAG_WIDTH = 0 ) (); wire snp_req_valid; wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr; + wire [SNP_TAG_WIDTH-1:0] snp_req_tag; wire snp_req_ready; endinterface diff --git a/hw/rtl/interfaces/VX_cache_snp_rsp_if.v b/hw/rtl/interfaces/VX_cache_snp_rsp_if.v new file mode 100644 index 00000000..12f6f733 --- /dev/null +++ b/hw/rtl/interfaces/VX_cache_snp_rsp_if.v @@ -0,0 +1,16 @@ +`ifndef VX_CACHE_SNP_RSP_IF +`define VX_CACHE_SNP_RSP_IF + +`include "../cache/VX_cache_config.vh" + +interface VX_cache_snp_rsp_if #( + parameter SNP_TAG_WIDTH = 0 +) (); + + wire snp_rsp_valid; + wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag; + wire snp_rsp_ready; + +endinterface + +`endif \ No newline at end of file diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 152cfa6d..8573c49d 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -160,21 +160,29 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { // align address to LLC block boundaries auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE; auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; + int outstanding_snp_reqs = 0; // submit snoop requests for the needed blocks - vortex_->snp_req_addr = aligned_addr_start; + vortex_->snp_req_addr = aligned_addr_start; vortex_->snp_req_valid = true; + vortex_->snp_rsp_ready = true; for (;;) { this->step(); + if (vortex_->snp_rsp_valid) { + --outstanding_snp_reqs; + } if (vortex_->snp_req_valid && vortex_->snp_req_ready) { + ++outstanding_snp_reqs; vortex_->snp_req_addr += 1; if (vortex_->snp_req_addr >= aligned_addr_end) { - vortex_->snp_req_valid = false; - break; + vortex_->snp_req_valid = false; } } - } - this->wait(PIPELINE_FLUSH_LATENCY); + if (!vortex_->snp_req_valid + && 0 == outstanding_snp_reqs) { + break; + } + } } bool Simulator::run() { diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index b7238996..92057fe3 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -18,7 +18,6 @@ #define DRAM_LATENCY 100 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16 -#define PIPELINE_FLUSH_LATENCY 1000 typedef struct { int cycles_left;