From 330bbc4f5654db8ab2acc7d9c5b94d07c4f72782 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 6 May 2020 09:05:10 -0400 Subject: [PATCH] rtl gpr multicore fix --- driver/rtlsim/Makefile | 3 + driver/tests/demo/demo.cpp | 10 +- hw/rtl/VX_config.vh | 25 ----- hw/rtl/VX_define.vh | 15 +++ hw/rtl/VX_dram_arb.v | 42 +++---- hw/rtl/Vortex.v | 12 +- hw/rtl/Vortex_Cluster.v | 148 ++++++++++++++----------- hw/rtl/Vortex_Socket.v | 58 +++++----- hw/rtl/cache/VX_bank.v | 39 +++---- hw/rtl/cache/VX_cache.v | 4 +- hw/rtl/cache/VX_cache_config.vh | 8 +- hw/rtl/cache/VX_cache_core_rsp_merge.v | 10 +- hw/rtl/cache/VX_cache_dfq_queue.v | 4 +- hw/rtl/cache/VX_cache_dram_req_arb.v | 2 +- hw/rtl/cache/VX_cache_miss_resrv.v | 4 +- hw/rtl/cache/VX_cache_req_queue.v | 6 +- hw/rtl/cache/VX_snp_fwd_arb.v | 4 +- hw/simulate/simulator.cpp | 16 +-- 18 files changed, 210 insertions(+), 200 deletions(-) diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index f3e8f0f5..81a4dcdf 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -4,6 +4,9 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2 +#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 +MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 + DEBUG = 1 CFLAGS += -fPIC diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 08d7c404..32c30dbe 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -116,9 +116,9 @@ int main(int argc, char *argv[]) { // parse command arguments parse_args(argc, argv); - uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); - uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES); - uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS); + uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); + uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES); + uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS); uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS); if (data_stride == 0xffffffff) { @@ -204,9 +204,9 @@ int main(int argc, char *argv[]) { // upload kernel argument std::cout << "upload kernel argument" << std::endl; { - kernel_arg.num_warps = max_warps; + kernel_arg.num_warps = max_warps; kernel_arg.num_threads = max_threads; - kernel_arg.stride = data_stride; + kernel_arg.stride = data_stride; auto buf_ptr = (int*)vx_host_ptr(buffer); memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a90c5499..26bd155b 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -87,11 +87,6 @@ `define DWORD_SIZE 4 `endif -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef DNUM_REQUESTS -`define DNUM_REQUESTS `NUM_THREADS -`endif - // Number of cycles to complete stage 1 (read from memory) `ifndef DSTAGE_1_CYCLES `define DSTAGE_1_CYCLES 1 @@ -178,11 +173,6 @@ `define IWORD_SIZE 4 `endif -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef INUM_REQUESTS -`define INUM_REQUESTS 1 -`endif - // Number of cycles to complete stage 1 (read from memory) `ifndef ISTAGE_1_CYCLES `define ISTAGE_1_CYCLES 1 @@ -269,11 +259,6 @@ `define SWORD_SIZE 4 `endif -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef SNUM_REQUESTS -`define SNUM_REQUESTS `NUM_THREADS -`endif - // Number of cycles to complete stage 1 (read from memory) `ifndef SSTAGE_1_CYCLES `define SSTAGE_1_CYCLES 1 @@ -360,11 +345,6 @@ `define L2WORD_SIZE `L2BANK_LINE_SIZE `endif -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef L2NUM_REQUESTS -`define L2NUM_REQUESTS (2*`NUM_CORES) -`endif - // Number of cycles to complete stage 1 (read from memory) `ifndef L2STAGE_1_CYCLES `define L2STAGE_1_CYCLES 1 @@ -451,11 +431,6 @@ `define L3WORD_SIZE `L3BANK_LINE_SIZE `endif -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef L3NUM_REQUESTS -`define L3NUM_REQUESTS `NUM_CLUSTERS -`endif - // Number of cycles to complete stage 1 (read from memory) `ifndef L3STAGE_1_CYCLES `define L3STAGE_1_CYCLES 1 diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 188145d0..ac39b969 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -125,6 +125,9 @@ // DRAM request tag bits `define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`define DNUM_REQUESTS `NUM_THREADS + ////////////////////////// Icache Configurable Knobs ////////////////////////// // DRAM request data bits @@ -136,6 +139,9 @@ // DRAM request tag bits `define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`define INUM_REQUESTS 1 + ////////////////////////// SM Configurable Knobs ////////////////////////////// // DRAM request data bits @@ -147,6 +153,9 @@ // DRAM request tag bits `define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`define SNUM_REQUESTS `NUM_THREADS + ////////////////////////// L2cache Configurable Knobs ///////////////////////// // DRAM request data bits @@ -158,6 +167,9 @@ // DRAM request tag bits `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2))) +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`define L2NUM_REQUESTS (2*`NUM_CORES) + ////////////////////////// L3cache Configurable Knobs ///////////////////////// // DRAM request data bits @@ -169,5 +181,8 @@ // DRAM request tag bits `define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH) +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`define L3NUM_REQUESTS `NUM_CLUSTERS + // VX_DEFINE `endif diff --git a/hw/rtl/VX_dram_arb.v b/hw/rtl/VX_dram_arb.v index 3dc0cdad..22a42df5 100644 --- a/hw/rtl/VX_dram_arb.v +++ b/hw/rtl/VX_dram_arb.v @@ -37,33 +37,32 @@ module VX_dram_arb #( input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready ); - reg [`LOG2UP(NUM_REQUESTS)-1:0] bus_sel; + reg [`REQS_BITS-1:0] bus_req_idx; always @(posedge clk) begin if (reset) begin - bus_sel <= 0; + bus_req_idx <= 0; end else begin - bus_sel <= bus_sel + 1; + bus_req_idx <= bus_req_idx + 1; end end integer i; - generate always @(*) begin - dram_req_read = 'z; - dram_req_write = 'z; + dram_req_read = 0; + dram_req_write = 0; dram_req_addr = 'z; dram_req_data = 'z; dram_req_tag = 'z; for (i = 0; i < NUM_REQUESTS; i++) begin - if (bus_sel == (`LOG2UP(NUM_REQUESTS))'(i)) begin + if (bus_req_idx == (`REQS_BITS)'(i)) begin dram_req_read = core_req_read[i]; dram_req_write = core_req_write[i]; dram_req_addr = core_req_addr[i]; dram_req_data = core_req_data[i]; - dram_req_tag = {core_req_tag[i], (`LOG2UP(NUM_REQUESTS))'(i)}; + dram_req_tag = {core_req_tag[i], (`REQS_BITS)'(i)}; core_req_ready[i] = dram_req_ready; end else begin core_req_ready[i] = 0; @@ -72,24 +71,13 @@ module VX_dram_arb #( end endgenerate - reg is_valid; - - generate - always @(*) begin - dram_rsp_ready = 0; - - for (i = 0; i < NUM_REQUESTS; i++) begin - is_valid = (dram_rsp_tag[`LOG2UP(NUM_REQUESTS)-1:0] == (`LOG2UP(NUM_REQUESTS))'(i)); - - core_rsp_valid[i] = dram_rsp_valid & is_valid; - core_rsp_data[i] = dram_rsp_data; - core_rsp_tag[i] = dram_rsp_tag[`LOG2UP(NUM_REQUESTS) +: CORE_TAG_WIDTH]; - - if (is_valid) begin - dram_rsp_ready = core_rsp_ready[i]; - end - end - end - endgenerate + genvar j; + wire [`REQS_BITS-1:0] bus_rsp_idx = dram_rsp_tag[`REQS_BITS-1:0]; + for (j = 0; j < NUM_REQUESTS; j++) begin + assign core_rsp_valid[j] = dram_rsp_valid && (bus_rsp_idx == (`REQS_BITS)'(j)); + assign core_rsp_data[j] = dram_rsp_data; + assign core_rsp_tag[j] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH]; + end + assign dram_rsp_ready = core_rsp_ready[bus_rsp_idx]; endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 3a975bd0..ba9c1de7 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -37,9 +37,9 @@ module Vortex #( output wire I_dram_rsp_ready, // Cache Snooping - input wire llc_snp_req_valid, - input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr, - output wire llc_snp_req_ready, + input wire snp_req_valid, + input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr, + output wire snp_req_ready, // I/O request output wire io_req_read, @@ -174,9 +174,9 @@ module Vortex #( // Cache snooping VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if(); - assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid; - assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr; - assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready; + assign dcache_snp_req_if.snp_req_valid = snp_req_valid; + assign dcache_snp_req_if.snp_req_addr = snp_req_addr; + assign snp_req_ready = dcache_snp_req_if.snp_req_ready; VX_front_end front_end ( .clk (clk), diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index 506578b4..bc234902 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -23,9 +23,9 @@ module Vortex_Cluster #( output wire dram_rsp_ready, // Cache Snooping - input wire llc_snp_req_valid, - input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr, - output wire llc_snp_req_ready, + input wire snp_req_valid, + input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, + output wire snp_req_ready, // I/O request output wire io_req_read, @@ -69,8 +69,8 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready; - wire snp_fwd_valid; - wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr; + wire[`NUM_CORES-1:0] per_core_snp_fwd_valid; + wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_fwd_addr; wire[`NUM_CORES-1:0] per_core_snp_fwd_ready; `IGNORE_WARNINGS_BEGIN @@ -118,9 +118,9 @@ module Vortex_Cluster #( .I_dram_rsp_data (per_core_I_dram_rsp_data [i]), .I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]), - .llc_snp_req_valid (snp_fwd_valid), - .llc_snp_req_addr (snp_fwd_addr), - .llc_snp_req_ready (per_core_snp_fwd_ready [i]), + .snp_req_valid (per_core_snp_fwd_valid [i]), + .snp_req_addr (per_core_snp_fwd_addr [i]), + .snp_req_ready (per_core_snp_fwd_ready [i]), .io_req_read (per_core_io_req_read [i]), .io_req_write (per_core_io_req_write [i]), @@ -167,7 +167,11 @@ module Vortex_Cluster #( wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid; wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data; wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag; - wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_ready; + wire l2_core_rsp_ready; + + wire l2_snp_fwd_valid; + wire[`L3DRAM_ADDR_WIDTH-1:0] l2_snp_fwd_addr; + wire l2_snp_fwd_ready; for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]); @@ -191,19 +195,22 @@ module Vortex_Cluster #( assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready; assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready; - assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i]; - assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1]; + assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i] && l2_core_rsp_ready; + assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1] && l2_core_rsp_ready; assign per_core_D_dram_rsp_data [(i/2)] = l2_core_rsp_data[i]; assign per_core_I_dram_rsp_data [(i/2)] = l2_core_rsp_data[i+1]; assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i]; - assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1]; + assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1]; - assign l2_core_rsp_ready [i] = per_core_D_dram_rsp_ready [(i/2)]; - assign l2_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; + assign per_core_snp_fwd_valid [(i/2)] = l2_snp_fwd_valid && l2_snp_fwd_ready; + assign per_core_snp_fwd_addr [(i/2)] = l2_snp_fwd_addr; end + assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready); + assign l2_snp_fwd_ready = (& per_core_snp_fwd_ready); + VX_cache #( .CACHE_SIZE (`L2CACHE_SIZE), .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), @@ -246,7 +253,7 @@ module Vortex_Cluster #( .core_rsp_valid (l2_core_rsp_valid), .core_rsp_data (l2_core_rsp_data), .core_rsp_tag (l2_core_rsp_tag), - .core_rsp_ready (& l2_core_rsp_ready), + .core_rsp_ready (l2_core_rsp_ready), // DRAM request .dram_req_read (dram_req_read), @@ -263,61 +270,74 @@ module Vortex_Cluster #( .dram_rsp_ready (dram_rsp_ready), // Snoop request - .snp_req_valid (llc_snp_req_valid), - .snp_req_addr (llc_snp_req_addr), - .snp_req_ready (llc_snp_req_ready), + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_ready (snp_req_ready), // Snoop forwarding - .snp_fwd_valid (snp_fwd_valid), - .snp_fwd_addr (snp_fwd_addr), - .snp_fwd_ready (& per_core_snp_fwd_ready) + .snp_fwd_valid (l2_snp_fwd_valid), + .snp_fwd_addr (l2_snp_fwd_addr), + .snp_fwd_ready (l2_snp_fwd_ready) ); end else begin - wire[`L2NUM_REQUESTS-1:0] per_core_req_read; - wire[`L2NUM_REQUESTS-1:0] per_core_req_write; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_req_addr; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_req_tag; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_req_data; - wire[`L2NUM_REQUESTS-1:0] per_core_req_ready; + wire[`L2NUM_REQUESTS-1:0] arb_core_req_read; + wire[`L2NUM_REQUESTS-1:0] arb_core_req_write; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_core_req_addr; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_req_tag; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_core_req_data; + wire[`L2NUM_REQUESTS-1:0] arb_core_req_ready; - wire[`L2NUM_REQUESTS-1:0] per_core_rsp_valid; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_rsp_data; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_rsp_tag; - wire[`L2NUM_REQUESTS-1:0] per_core_rsp_ready; + wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_valid; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_core_rsp_data; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag; + wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready; + + wire arb_snp_fwd_valid; + wire[`L3DRAM_ADDR_WIDTH-1:0] arb_snp_fwd_addr; + wire arb_snp_fwd_ready; for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin - assign per_core_req_read [i] = per_core_D_dram_req_read[(i/2)]; - assign per_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)]; + assign arb_core_req_read [i] = per_core_D_dram_req_read[(i/2)]; + assign arb_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)]; - assign per_core_req_write [i] = per_core_D_dram_req_write[(i/2)]; - assign per_core_req_write [i+1] = 0; + assign arb_core_req_write [i] = per_core_D_dram_req_write[(i/2)]; + assign arb_core_req_write [i+1] = 0; - assign per_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; - assign per_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; + assign arb_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; + assign arb_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; - assign per_core_req_data [i] = per_core_D_dram_req_data[(i/2)]; - assign per_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; + assign arb_core_req_data [i] = per_core_D_dram_req_data[(i/2)]; + assign arb_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; - assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; - assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; + assign arb_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; + assign arb_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; - assign per_core_D_dram_req_ready [(i/2)] = per_core_req_ready[i]; - assign per_core_I_dram_req_ready [(i/2)] = per_core_req_ready[i+1]; + assign per_core_D_dram_req_ready [(i/2)] = arb_core_req_ready[i]; + assign per_core_I_dram_req_ready [(i/2)] = arb_core_req_ready[i+1]; - assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i]; - assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1]; + assign per_core_D_dram_rsp_valid [(i/2)] = arb_core_rsp_valid[i]; + assign per_core_I_dram_rsp_valid [(i/2)] = arb_core_rsp_valid[i+1]; - assign per_core_D_dram_rsp_data [(i/2)] = per_core_rsp_data[i]; - assign per_core_I_dram_rsp_data [(i/2)] = per_core_rsp_data[i+1]; + assign per_core_D_dram_rsp_data [(i/2)] = arb_core_rsp_data[i]; + assign per_core_I_dram_rsp_data [(i/2)] = arb_core_rsp_data[i+1]; - assign per_core_D_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i]; - assign per_core_I_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i+1]; + assign per_core_D_dram_rsp_tag [(i/2)] = arb_core_rsp_tag[i]; + assign per_core_I_dram_rsp_tag [(i/2)] = arb_core_rsp_tag[i+1]; - assign per_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; - assign per_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; + assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; + assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; + + assign per_core_snp_fwd_valid [(i/2)] = arb_snp_fwd_valid && arb_snp_fwd_ready; + assign per_core_snp_fwd_addr [(i/2)] = arb_snp_fwd_addr; end + + assign arb_snp_fwd_valid = snp_req_valid; + assign arb_snp_fwd_addr = snp_req_addr; + assign arb_snp_fwd_ready = (& per_core_snp_fwd_ready); + + assign snp_req_ready = arb_snp_fwd_ready; VX_dram_arb #( .BANK_LINE_SIZE (`L2BANK_LINE_SIZE), @@ -329,18 +349,18 @@ module Vortex_Cluster #( .reset (reset), // Core request - .core_req_read (per_core_req_read), - .core_req_write (per_core_req_write), - .core_req_addr (per_core_req_addr), - .core_req_data (per_core_req_data), - .core_req_tag (per_core_req_tag), - .core_req_ready (per_core_req_ready), + .core_req_read (arb_core_req_read), + .core_req_write (arb_core_req_write), + .core_req_addr (arb_core_req_addr), + .core_req_data (arb_core_req_data), + .core_req_tag (arb_core_req_tag), + .core_req_ready (arb_core_req_ready), // Core response - .core_rsp_valid (per_core_rsp_valid), - .core_rsp_data (per_core_rsp_data), - .core_rsp_tag (per_core_rsp_tag), - .core_rsp_ready (per_core_rsp_ready), + .core_rsp_valid (arb_core_rsp_valid), + .core_rsp_data (arb_core_rsp_data), + .core_rsp_tag (arb_core_rsp_tag), + .core_rsp_ready (arb_core_rsp_ready), // DRAM request .dram_req_read (dram_req_read), @@ -356,11 +376,7 @@ module Vortex_Cluster #( .dram_rsp_data (dram_rsp_data), .dram_rsp_ready (dram_rsp_ready) ); - - // Cache snooping - assign snp_fwd_valid = llc_snp_req_valid; - assign snp_fwd_addr = llc_snp_req_addr; - assign llc_snp_req_ready = & per_core_snp_fwd_ready; + end endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index 7a2d379f..964cc275 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -21,9 +21,9 @@ module Vortex_Socket ( output wire dram_rsp_ready, // Cache snooping - input wire llc_snp_req_valid, - input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr, - output wire llc_snp_req_ready, + input wire snp_req_valid, + input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr, + output wire snp_req_ready, // I/O request output wire io_req_read, @@ -64,9 +64,9 @@ module Vortex_Socket ( .dram_rsp_tag (dram_rsp_tag), .dram_rsp_ready (dram_rsp_ready), - .llc_snp_req_valid (llc_snp_req_valid), - .llc_snp_req_addr (llc_snp_req_addr), - .llc_snp_req_ready (llc_snp_req_ready), + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_ready (snp_req_ready), .io_req_read (io_req_read), .io_req_write (io_req_write), @@ -99,8 +99,8 @@ module Vortex_Socket ( wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; - wire snp_fwd_valid; - wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr; + wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_valid; + wire[`NUM_CLUSTERS-1:0][`L3DRAM_ADDR_WIDTH-1:0] per_cluster_snp_fwd_addr; wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready; `IGNORE_WARNINGS_BEGIN @@ -137,9 +137,9 @@ module Vortex_Socket ( .dram_rsp_tag (per_cluster_dram_rsp_tag [i]), .dram_rsp_ready (per_cluster_dram_rsp_ready [i]), - .llc_snp_req_valid (snp_fwd_valid), - .llc_snp_req_addr (snp_fwd_addr), - .llc_snp_req_ready (per_cluster_snp_fwd_ready [i]), + .snp_req_valid (per_cluster_snp_fwd_valid [i]), + .snp_req_addr (per_cluster_snp_fwd_addr [i]), + .snp_req_ready (per_cluster_snp_fwd_ready [i]), .io_req_read (per_cluster_io_req_read [i]), .io_req_write (per_cluster_io_req_write [i]), @@ -183,7 +183,11 @@ module Vortex_Socket ( wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_valid; wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data; wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; - wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_ready; + wire l3_core_rsp_ready; + + wire l3_snp_fwd_valid; + wire[`L3DRAM_ADDR_WIDTH-1:0] l3_snp_fwd_addr; + wire l3_snp_fwd_ready; for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin // Core Request @@ -192,17 +196,21 @@ module Vortex_Socket ( assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `BYTE_EN_LW : `BYTE_EN_NO; assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}}; assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i]; - assign l3_core_req_data [i] = per_cluster_dram_req_data [i]; + assign l3_core_req_data [i] = per_cluster_dram_req_data [i]; - // Core Response - assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i]; - - // Cache Fill Response - assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i]; + // Core Response + assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i] && l3_core_rsp_ready; assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i]; assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i]; + + // Snoop Forwarding + assign per_cluster_snp_fwd_valid [i] = l3_snp_fwd_valid && l3_snp_fwd_ready; + assign per_cluster_snp_fwd_addr [i] = l3_snp_fwd_addr; end + assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready); + assign l3_snp_fwd_ready = (& per_cluster_snp_fwd_ready); + VX_cache #( .CACHE_SIZE (`L3CACHE_SIZE), .BANK_LINE_SIZE (`L3BANK_LINE_SIZE), @@ -245,7 +253,7 @@ module Vortex_Socket ( .core_rsp_valid (l3_core_rsp_valid), .core_rsp_data (l3_core_rsp_data), .core_rsp_tag (l3_core_rsp_tag), - .core_rsp_ready (& l3_core_rsp_ready), + .core_rsp_ready (l3_core_rsp_ready), // DRAM request .dram_req_write (dram_req_write), @@ -262,14 +270,14 @@ module Vortex_Socket ( .dram_rsp_ready (dram_rsp_ready), // Snoop request - .snp_req_valid (llc_snp_req_valid), - .snp_req_addr (llc_snp_req_addr), - .snp_req_ready (llc_snp_req_ready), + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_ready (snp_req_ready), // Snoop forwarding - .snp_fwd_valid (snp_fwd_valid), - .snp_fwd_addr (snp_fwd_addr), - .snp_fwd_ready (& per_cluster_snp_fwd_ready) + .snp_fwd_valid (l3_snp_fwd_valid), + .snp_fwd_addr (l3_snp_fwd_addr), + .snp_fwd_ready (l3_snp_fwd_ready) ); end diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 93dd3bcf..f03b7874 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -69,7 +69,7 @@ module VX_bank #( // Core Response output wire core_rsp_valid, - output wire [`LOG2UP(NUM_REQUESTS)-1:0] core_rsp_tid, + output wire [`REQS_BITS-1:0] core_rsp_tid, output wire [`WORD_WIDTH-1:0] core_rsp_data, output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, input wire core_rsp_pop, @@ -156,18 +156,18 @@ module VX_bank #( .full (dfpq_full) ); - wire reqq_pop; - wire reqq_push; - wire reqq_empty; - wire reqq_req_st0; - wire[`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0; + wire reqq_pop; + wire reqq_push; + wire reqq_empty; + wire reqq_req_st0; + wire[`REQS_BITS-1:0] reqq_req_tid_st0; `IGNORE_WARNINGS_BEGIN - wire [31:0] reqq_req_addr_st0; + wire [31:0] reqq_req_addr_st0; `IGNORE_WARNINGS_END - wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0; - wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0; - wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0; - wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0; + wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0; + wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0; + wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0; + wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0; assign reqq_push = core_req_ready && (| core_req_valids); @@ -218,7 +218,7 @@ module VX_bank #( wire mrvq_full; wire mrvq_stop; wire mrvq_valid_st0; - wire[`LOG2UP(NUM_REQUESTS)-1:0] mrvq_tid_st0; + wire[`REQS_BITS-1:0] mrvq_tid_st0; wire [`LINE_ADDR_WIDTH-1:0] mrvq_addr_st0; wire [`BASE_ADDR_BITS-1:0] mrvq_wsel_st0; wire [`WORD_WIDTH-1:0] mrvq_writeword_st0; @@ -230,7 +230,7 @@ module VX_bank #( wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr; wire[`BASE_ADDR_BITS-1:0] miss_add_wsel; wire[`WORD_WIDTH-1:0] miss_add_data; - wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid; + wire[`REQS_BITS-1:0] miss_add_tid; wire[CORE_TAG_WIDTH-1:0] miss_add_tag; wire[`BYTE_EN_BITS-1:0] miss_add_mem_read; wire[`BYTE_EN_BITS-1:0] miss_add_mem_write; @@ -348,7 +348,7 @@ module VX_bank #( wire dirty_st1e; `DEBUG_BEGIN wire [CORE_TAG_WIDTH-1:0] tag_st1e; - wire [`LOG2UP(NUM_REQUESTS)-1:0] tid_st1e; + wire [`REQS_BITS-1:0] tid_st1e; `DEBUG_END wire [`BYTE_EN_BITS-1:0] mem_read_st1e; wire [`BYTE_EN_BITS-1:0] mem_write_st1e; @@ -515,14 +515,15 @@ module VX_bank #( || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); - wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; - wire [`LOG2UP(NUM_REQUESTS)-1:0] cwbq_tid = miss_add_tid; - wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag; + wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; + wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid; + wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag; - wire cwbq_empty; + wire cwbq_empty; assign core_rsp_valid = !cwbq_empty; + VX_generic_queue #( - .DATAW(`LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `WORD_WIDTH), + .DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), .SIZE(CWBQ_SIZE) ) cwb_queue ( .clk (clk), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 759201b2..5eb4e69e 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -109,7 +109,7 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_core_rsp_pop; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; - wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_core_rsp_tid; + wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; @@ -173,7 +173,7 @@ module VX_cache #( wire curr_bank_core_rsp_pop; wire curr_bank_core_rsp_valid; - wire [`LOG2UP(NUM_REQUESTS)-1:0] curr_bank_core_rsp_tid; + wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 9a753d24..5cc6d422 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -12,10 +12,14 @@ `define BYTE_EN_BITS 3 // data tid tag read write base addr -`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS) +`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS) // tag read write reqs -`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `LOG2UP(NUM_REQUESTS)) +`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `REQS_BITS) + +`define REQS_BITS `LOG2UP(NUM_REQUESTS) + +`define BANK_BITS `LOG2UP(NUM_BANKS) `define WORD_WIDTH (8 * WORD_SIZE) `define BYTE_WIDTH (`WORD_WIDTH / 4) diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 68d9b155..acaf1d1c 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -46,7 +46,7 @@ module VX_cache_core_rsp_merge #( parameter DRAM_TAG_WIDTH = 1 ) ( // Per Bank WB - input wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_core_rsp_tid, + input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, @@ -63,8 +63,8 @@ module VX_cache_core_rsp_merge #( assign per_bank_core_rsp_pop = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}}; - wire [`LOG2UP(NUM_BANKS)-1:0] main_bank_index; - wire found_bank; + wire [`BANK_BITS-1:0] main_bank_index; + wire found_bank; VX_generic_priority_encoder #( .N(NUM_BANKS) @@ -86,7 +86,7 @@ module VX_cache_core_rsp_merge #( if (found_bank && per_bank_core_rsp_valid[i] && !core_rsp_valid[per_bank_core_rsp_tid[i]] - && ((main_bank_index == `LOG2UP(NUM_BANKS)'(i)) + && ((main_bank_index == `BANK_BITS'(i)) || (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index])) && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin core_rsp_valid[per_bank_core_rsp_tid[i]] = 1; @@ -106,7 +106,7 @@ module VX_cache_core_rsp_merge #( if (found_bank && per_bank_core_rsp_valid[i] && !core_rsp_valid[per_bank_core_rsp_tid[i]] - && ((main_bank_index == `LOG2UP(NUM_BANKS)'(i)) + && ((main_bank_index == `BANK_BITS'(i)) || (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin core_rsp_valid[per_bank_core_rsp_tid[i]] = 1; core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; diff --git a/hw/rtl/cache/VX_cache_dfq_queue.v b/hw/rtl/cache/VX_cache_dfq_queue.v index d1fad1ed..b651e024 100644 --- a/hw/rtl/cache/VX_cache_dfq_queue.v +++ b/hw/rtl/cache/VX_cache_dfq_queue.v @@ -86,8 +86,8 @@ module VX_cache_dfq_queue #( assign use_per_bqual_bank_dram_fill_req_valid = use_empty ? (out_per_bank_dram_fill_req_valid & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req_valid & {NUM_BANKS{!use_empty}}); assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr; - wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index; - wire qual_has_request; + wire[`BANK_BITS-1:0] qual_request_index; + wire qual_has_request; VX_generic_priority_encoder #( .N(NUM_BANKS) diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index dbe16f49..fa895762 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -113,7 +113,7 @@ module VX_cache_dram_req_arb #( .dfqq_full (dfqq_full) ); - wire [`LOG2UP(NUM_BANKS)-1:0] dwb_bank; + wire [`BANK_BITS-1:0] dwb_bank; wire [NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index eb8e1440..ac444caa 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -48,7 +48,7 @@ module VX_cache_miss_resrv #( input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr, input wire[`BASE_ADDR_BITS-1:0] miss_add_wsel, input wire[`WORD_WIDTH-1:0] miss_add_data, - input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid, + input wire[`REQS_BITS-1:0] miss_add_tid, input wire[CORE_TAG_WIDTH-1:0] miss_add_tag, input wire[`BYTE_EN_BITS-1:0] miss_add_mem_read, input wire[`BYTE_EN_BITS-1:0] miss_add_mem_write, @@ -69,7 +69,7 @@ module VX_cache_miss_resrv #( output wire[`LINE_ADDR_WIDTH-1:0] miss_resrv_addr_st0, output wire[`BASE_ADDR_BITS-1:0] miss_resrv_wsel_st0, output wire[`WORD_WIDTH-1:0] miss_resrv_data_st0, - output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0, + output wire[`REQS_BITS-1:0] miss_resrv_tid_st0, output wire[CORE_TAG_WIDTH-1:0] miss_resrv_tag_st0, output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_read_st0, output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_write_st0 diff --git a/hw/rtl/cache/VX_cache_req_queue.v b/hw/rtl/cache/VX_cache_req_queue.v index 2325e606..a8552e8b 100644 --- a/hw/rtl/cache/VX_cache_req_queue.v +++ b/hw/rtl/cache/VX_cache_req_queue.v @@ -58,7 +58,7 @@ module VX_cache_req_queue #( // Dequeue Data input wire reqq_pop, output wire reqq_req_st0, - output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0, + output wire [`REQS_BITS-1:0] reqq_req_tid_st0, output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0, output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0, output wire [`WORD_WIDTH-1:0] reqq_req_writedata_st0, @@ -126,8 +126,8 @@ module VX_cache_req_queue #( assign qual_mem_read = use_per_mem_read; assign qual_mem_write = use_per_mem_write; - wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index; - wire qual_has_request; + wire[`REQS_BITS-1:0] qual_request_index; + wire qual_has_request; VX_generic_priority_encoder #( .N(NUM_REQUESTS) diff --git a/hw/rtl/cache/VX_snp_fwd_arb.v b/hw/rtl/cache/VX_snp_fwd_arb.v index 2d87bf86..703f9a65 100644 --- a/hw/rtl/cache/VX_snp_fwd_arb.v +++ b/hw/rtl/cache/VX_snp_fwd_arb.v @@ -15,8 +15,8 @@ module VX_snp_fwd_arb #( wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}}; - wire [`LOG2UP(NUM_BANKS)-1:0] fsq_bank; - wire fsq_valid; + wire [`BANK_BITS-1:0] fsq_bank; + wire fsq_valid; VX_generic_priority_encoder #( .N(NUM_BANKS) diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 8dee1c7c..ea4f68e8 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -157,18 +157,18 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; // submit snoop requests for the needed blocks - vortex_->llc_snp_req_addr = aligned_addr_start; - vortex_->llc_snp_req_valid = false; + vortex_->snp_req_addr = aligned_addr_start; + vortex_->snp_req_valid = false; for (;;) { this->step(); - if (vortex_->llc_snp_req_valid) { - vortex_->llc_snp_req_valid = false; - if (vortex_->llc_snp_req_addr >= aligned_addr_end) + if (vortex_->snp_req_valid) { + vortex_->snp_req_valid = false; + if (vortex_->snp_req_addr >= aligned_addr_end) break; - vortex_->llc_snp_req_addr += 1; + vortex_->snp_req_addr += 1; } - if (vortex_->llc_snp_req_ready) { - vortex_->llc_snp_req_valid = true; + if (vortex_->snp_req_ready) { + vortex_->snp_req_valid = true; } } this->wait(PIPELINE_FLUSH_LATENCY);