diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 90109354..7b8f01cb 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -6,7 +6,7 @@ set -e show_usage() { echo "Vortex BlackBox Test Driver v1.0" - echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=] [--help]]" + echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=] [--help]]" } DRIVER=vlsim @@ -16,6 +16,7 @@ CORES=2 WARPS=4 THREADS=4 L2=0 +L3=0 DEBUG=0 SCOPE=0 HAS_ARGS=0 @@ -51,6 +52,10 @@ case $i in L2=1 shift ;; + --l3cache) + L3=1 + shift + ;; --debug) DEBUG=1 shift @@ -112,7 +117,7 @@ case $APP in ;; esac -CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2" +CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3" echo "CONFIGS=$CONFIGS" diff --git a/hw/opae/VX_avs_wrapper.v b/hw/opae/VX_avs_wrapper.v index ff0b5f98..c24ae25a 100644 --- a/hw/opae/VX_avs_wrapper.v +++ b/hw/opae/VX_avs_wrapper.v @@ -71,7 +71,8 @@ module VX_avs_wrapper #( VX_generic_queue #( .DATAW (REQ_TAGW), - .SIZE (RD_QUEUE_SIZE) + .SIZE (RD_QUEUE_SIZE), + .BUFFERED (1) ) rd_req_queue ( .clk (clk), .reset (reset), @@ -86,7 +87,8 @@ module VX_avs_wrapper #( VX_generic_queue #( .DATAW (AVS_DATAW), - .SIZE (RD_QUEUE_SIZE) + .SIZE (RD_QUEUE_SIZE), + .BUFFERED (1) ) rd_rsp_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 6c72b45c..5ff4a768 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -255,51 +255,55 @@ module VX_cluster #( assign busy = (| per_core_busy); assign ebreak = (| per_core_ebreak); + wire snp_fwd_rsp_valid; + wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; + wire snp_fwd_rsp_inv; + wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; + wire snp_fwd_rsp_ready; + + VX_snp_forwarder #( + .CACHE_ID (`L2CACHE_ID), + .NUM_REQS (`NUM_CORES), + .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), + .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH), + .SNRQ_SIZE (`L2SNRQ_SIZE), + .TAG_IN_WIDTH (`L2SNP_TAG_WIDTH), + .TAG_OUT_WIDTH (`DSNP_TAG_WIDTH) + ) snp_forwarder ( + .clk (clk), + .reset (reset), + + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_inv (snp_req_inv), + .snp_req_tag (snp_req_tag), + .snp_req_ready (snp_req_ready), + + .snp_rsp_valid (snp_fwd_rsp_valid), + .snp_rsp_addr (snp_fwd_rsp_addr), + .snp_rsp_inv (snp_fwd_rsp_inv), + .snp_rsp_tag (snp_fwd_rsp_tag), + .snp_rsp_ready (snp_fwd_rsp_ready), + + .snp_fwdout_valid (per_core_snp_req_valid), + .snp_fwdout_addr (per_core_snp_req_addr), + .snp_fwdout_inv (per_core_snp_req_inv), + .snp_fwdout_tag (per_core_snp_req_tag), + .snp_fwdout_ready (per_core_snp_req_ready), + + .snp_fwdin_valid (per_core_snp_rsp_valid), + .snp_fwdin_tag (per_core_snp_rsp_tag), + .snp_fwdin_ready (per_core_snp_rsp_ready) + ); + if (`L2_ENABLE) begin - // L2 Cache /////////////////////////////////////////////////////////// - - wire [`NUM_CORES-1:0] core_dram_req_valid; - wire [`NUM_CORES-1:0] core_dram_req_rw; - wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen; - wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr; - wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_req_tag; - wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data; - wire core_dram_req_ready; - wire [`NUM_CORES-1:0] core_dram_rsp_valid; wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data; wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag; wire core_dram_rsp_ready; - wire [`NUM_CORES-1:0] core_snp_fwdout_valid; - wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr; - wire [`NUM_CORES-1:0] core_snp_fwdout_inv; - wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag; - wire [`NUM_CORES-1:0] core_snp_fwdout_ready; - - wire [`NUM_CORES-1:0] core_snp_fwdin_valid; - wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag; - wire [`NUM_CORES-1:0] core_snp_fwdin_ready; - - wire snp_fwd_rsp_valid; - wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; - wire snp_fwd_rsp_inv; - wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; - wire snp_fwd_rsp_ready; - - for (genvar i = 0; i < `NUM_CORES; i++) begin - assign core_dram_req_valid [i] = per_core_dram_req_valid [i]; - assign core_dram_req_rw [i] = per_core_dram_req_rw [i]; - assign core_dram_req_byteen [i] = per_core_dram_req_byteen [i]; - assign core_dram_req_addr [i] = per_core_dram_req_addr [i]; - assign core_dram_req_data [i] = per_core_dram_req_data [i]; - assign core_dram_req_tag [i] = per_core_dram_req_tag [i]; - assign per_core_dram_req_ready [i] = core_dram_req_ready; - end - reg [`NUM_CORES-1:0] core_dram_rsp_ready_other; - always @(*) begin core_dram_rsp_ready_other = {`NUM_CORES{1'b1}}; for (integer i = 0; i < `NUM_CORES; i++) begin @@ -318,51 +322,10 @@ module VX_cluster #( end assign core_dram_rsp_ready = & (per_core_dram_rsp_ready | ~core_dram_rsp_valid); + wire core_dram_req_ready; for (genvar i = 0; i < `NUM_CORES; i++) begin - assign per_core_snp_req_valid [i] = core_snp_fwdout_valid [i]; - assign per_core_snp_req_addr [i] = core_snp_fwdout_addr [i]; - assign per_core_snp_req_inv [i] = core_snp_fwdout_inv [i]; - assign per_core_snp_req_tag [i] = core_snp_fwdout_tag [i]; - assign core_snp_fwdout_ready [i] = per_core_snp_req_ready[i]; - - assign core_snp_fwdin_valid [i] = per_core_snp_rsp_valid [i]; - assign core_snp_fwdin_tag [i] = per_core_snp_rsp_tag [i]; - assign per_core_snp_rsp_ready [i] = core_snp_fwdin_ready [i]; - end - - VX_snp_forwarder #( - .CACHE_ID (`L2CACHE_ID), - .NUM_REQS (`NUM_CORES), - .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), - .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH), - .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH), - .SNRQ_SIZE (`L2SNRQ_SIZE) - ) snp_forwarder ( - .clk (clk), - .reset (reset), - - .snp_req_valid (snp_req_valid), - .snp_req_addr (snp_req_addr), - .snp_req_inv (snp_req_inv), - .snp_req_tag (snp_req_tag), - .snp_req_ready (snp_req_ready), - - .snp_rsp_valid (snp_fwd_rsp_valid), - .snp_rsp_addr (snp_fwd_rsp_addr), - .snp_rsp_inv (snp_fwd_rsp_inv), - .snp_rsp_tag (snp_fwd_rsp_tag), - .snp_rsp_ready (snp_fwd_rsp_ready), - - .snp_fwdout_valid (core_snp_fwdout_valid), - .snp_fwdout_addr (core_snp_fwdout_addr), - .snp_fwdout_inv (core_snp_fwdout_inv), - .snp_fwdout_tag (core_snp_fwdout_tag), - .snp_fwdout_ready (core_snp_fwdout_ready), - - .snp_fwdin_valid (core_snp_fwdin_valid), - .snp_fwdin_tag (core_snp_fwdin_tag), - .snp_fwdin_ready (core_snp_fwdin_ready) - ); + assign per_core_dram_req_ready[i] = core_dram_req_ready; + end VX_cache #( .CACHE_ID (`L2CACHE_ID), @@ -392,12 +355,12 @@ module VX_cluster #( .reset (reset), // Core request - .core_req_valid (core_dram_req_valid), - .core_req_rw (core_dram_req_rw), - .core_req_byteen (core_dram_req_byteen), - .core_req_addr (core_dram_req_addr), - .core_req_data (core_dram_req_data), - .core_req_tag (core_dram_req_tag), + .core_req_valid (per_core_dram_req_valid), + .core_req_rw (per_core_dram_req_rw), + .core_req_byteen (per_core_dram_req_byteen), + .core_req_addr (per_core_dram_req_addr), + .core_req_data (per_core_dram_req_data), + .core_req_tag (per_core_dram_req_tag), .core_req_ready (core_dram_req_ready), // Core response @@ -438,100 +401,6 @@ module VX_cluster #( ); end else begin - - wire[`NUM_CORES-1:0] core_dram_req_valid; - wire[`NUM_CORES-1:0] core_dram_req_rw; - wire[`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr; - wire[`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_req_tag; - wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data; - wire[`NUM_CORES-1:0] core_dram_req_ready; - - wire[`NUM_CORES-1:0] core_dram_rsp_valid; - wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data; - wire[`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag; - wire[`NUM_CORES-1:0] core_dram_rsp_ready; - - wire[`NUM_CORES-1:0] core_snp_fwdout_valid; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr; - wire[`NUM_CORES-1:0] core_snp_fwdout_inv; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag; - wire[`NUM_CORES-1:0] core_snp_fwdout_ready; - - wire[`NUM_CORES-1:0] core_snp_fwdin_valid; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag; - wire[`NUM_CORES-1:0] core_snp_fwdin_ready; - - for (genvar i = 0; i < `NUM_CORES; i++) begin - assign core_dram_req_valid [i] = per_core_dram_req_valid [i]; - assign core_dram_req_rw [i] = per_core_dram_req_rw [i]; - assign core_dram_req_byteen [i] = per_core_dram_req_byteen [i]; - assign core_dram_req_addr [i] = per_core_dram_req_addr [i]; - assign core_dram_req_data [i] = per_core_dram_req_data [i]; - assign core_dram_req_tag [i] = per_core_dram_req_tag [i]; - assign per_core_dram_req_ready [i] = core_dram_req_ready [i]; - - assign per_core_dram_rsp_valid [i] = core_dram_rsp_valid [i]; - assign per_core_dram_rsp_data [i] = core_dram_rsp_data [i]; - assign per_core_dram_rsp_tag [i] = core_dram_rsp_tag [i]; - assign core_dram_rsp_ready [i] = per_core_dram_rsp_ready [i]; - - assign per_core_snp_req_valid [i] = core_snp_fwdout_valid [i]; - assign per_core_snp_req_addr [i] = core_snp_fwdout_addr [i]; - assign per_core_snp_req_inv [i] = core_snp_fwdout_inv [i]; - assign per_core_snp_req_tag [i] = core_snp_fwdout_tag [i]; - assign core_snp_fwdout_ready [i] = per_core_snp_req_ready [i]; - - assign core_snp_fwdin_valid [i] = per_core_snp_rsp_valid [i]; - assign core_snp_fwdin_tag [i] = per_core_snp_rsp_tag [i]; - assign per_core_snp_rsp_ready [i] = core_snp_fwdin_ready [i]; - end - - if (`NUM_CORES > 1) begin - VX_snp_forwarder #( - .CACHE_ID (`L2CACHE_ID), - .NUM_REQS (`NUM_CORES), - .SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), - .DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH), - .SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH), - .SNRQ_SIZE (`L2SNRQ_SIZE) - ) snp_forwarder ( - .clk (clk), - .reset (reset), - - .snp_req_valid (snp_req_valid), - .snp_req_addr (snp_req_addr), - .snp_req_inv (snp_req_inv), - .snp_req_tag (snp_req_tag), - .snp_req_ready (snp_req_ready), - - .snp_rsp_valid (snp_rsp_valid), - `UNUSED_PIN (snp_rsp_addr), - `UNUSED_PIN (snp_rsp_inv), - .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready), - - .snp_fwdout_valid (core_snp_fwdout_valid), - .snp_fwdout_addr (core_snp_fwdout_addr), - .snp_fwdout_inv (core_snp_fwdout_inv), - .snp_fwdout_tag (core_snp_fwdout_tag), - .snp_fwdout_ready (core_snp_fwdout_ready), - - .snp_fwdin_valid (core_snp_fwdin_valid), - .snp_fwdin_tag (core_snp_fwdin_tag), - .snp_fwdin_ready (core_snp_fwdin_ready) - ); - end else begin - assign core_snp_fwdout_valid= snp_req_valid; - assign core_snp_fwdout_addr = snp_req_addr; - assign core_snp_fwdout_inv = snp_req_inv; - assign core_snp_fwdout_tag = snp_req_tag; - assign snp_req_ready = core_snp_fwdout_ready; - - assign snp_rsp_valid = core_snp_fwdin_valid; - assign snp_rsp_tag = core_snp_fwdin_tag; - assign core_snp_fwdin_ready = snp_rsp_ready; - end VX_mem_arb #( .NUM_REQS (`NUM_CORES), @@ -543,13 +412,13 @@ module VX_cluster #( .reset (reset), // Core request - .req_valid_in (core_dram_req_valid), - .req_rw_in (core_dram_req_rw), - .req_byteen_in (core_dram_req_byteen), - .req_addr_in (core_dram_req_addr), - .req_data_in (core_dram_req_data), - .req_tag_in (core_dram_req_tag), - .req_ready_in (core_dram_req_ready), + .req_valid_in (per_core_dram_req_valid), + .req_rw_in (per_core_dram_req_rw), + .req_byteen_in (per_core_dram_req_byteen), + .req_addr_in (per_core_dram_req_addr), + .req_data_in (per_core_dram_req_data), + .req_tag_in (per_core_dram_req_tag), + .req_ready_in (per_core_dram_req_ready), // DRAM request .req_valid_out (dram_req_valid), @@ -561,10 +430,10 @@ module VX_cluster #( .req_ready_out (dram_req_ready), // Core response - .rsp_valid_out (core_dram_rsp_valid), - .rsp_data_out (core_dram_rsp_data), - .rsp_tag_out (core_dram_rsp_tag), - .rsp_ready_out (core_dram_rsp_ready), + .rsp_valid_out (per_core_dram_rsp_valid), + .rsp_data_out (per_core_dram_rsp_data), + .rsp_tag_out (per_core_dram_rsp_tag), + .rsp_ready_out (per_core_dram_rsp_ready), // DRAM response .rsp_valid_in (dram_rsp_valid), @@ -573,6 +442,13 @@ module VX_cluster #( .rsp_ready_in (dram_rsp_ready) ); + `UNUSED_VAR (snp_fwd_rsp_addr) + `UNUSED_VAR (snp_fwd_rsp_inv) + + assign snp_rsp_valid = snp_fwd_rsp_valid; + assign snp_rsp_tag = snp_fwd_rsp_tag; + assign snp_fwd_rsp_ready = snp_rsp_ready; + end endmodule diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 92bd362d..bc7a2558 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -24,11 +24,11 @@ `endif `ifndef L2_ENABLE -`define L2_ENABLE (`NUM_CORES > 2) +`define L2_ENABLE (`NUM_CORES >= 4) `endif `ifndef L3_ENABLE -`define L3_ENABLE (`NUM_CLUSTERS > 1) +`define L3_ENABLE (`NUM_CLUSTERS >= 4) `endif `ifndef GLOBAL_BLOCK_SIZE diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index d8796383..1d00afcb 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -366,7 +366,7 @@ `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES))) // Snoop request tag bits -`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH) +`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH) ////////////////////////// L3cache Configurable Knobs ///////////////////////// @@ -392,10 +392,10 @@ `define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE // DRAM request tag bits -`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH) +`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS))) // Snoop request tag bits -`define L3SNP_TAG_WIDTH 16 +`define L3SNP_TAG_WIDTH `VX_SNP_TAG_WIDTH /////////////////////////////////////////////////////////////////////////////// @@ -403,7 +403,7 @@ `define VX_DRAM_ADDR_WIDTH `L3DRAM_ADDR_WIDTH `define VX_DRAM_LINE_WIDTH `L3DRAM_LINE_WIDTH `define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH -`define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH +`define VX_SNP_TAG_WIDTH 16 `define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH `define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES) diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 59a40422..cf88ed21 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -66,275 +66,246 @@ module Vortex ( output wire busy, output wire ebreak ); - if (`NUM_CLUSTERS == 1) begin + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; + + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_inv; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; + + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; + + wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0] per_cluster_io_req_valid; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw; + wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][3:0] per_cluster_io_req_byteen; + wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][29:0] per_cluster_io_req_addr; + wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][31:0] per_cluster_io_req_data; + wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready; + + wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; + + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid; + wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready; + + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready; + + wire [`NUM_CLUSTERS-1:0] per_cluster_busy; + wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak; + + wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_io_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CORES)); + wire [`NC_BITS-1:0] csr_io_core_id = `NC_BITS'(csr_io_req_coreid); + + for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin VX_cluster #( - .CLUSTER_ID(0) + .CLUSTER_ID(i) ) cluster ( - `SCOPE_BIND_Vortex_cluster(0) + `SCOPE_BIND_Vortex_cluster(i) .clk (clk), .reset (reset), - - .dram_req_valid (dram_req_valid), - .dram_req_rw (dram_req_rw), - .dram_req_byteen (dram_req_byteen), - .dram_req_addr (dram_req_addr), - .dram_req_data (dram_req_data), - .dram_req_tag (dram_req_tag), - .dram_req_ready (dram_req_ready), - .dram_rsp_valid (dram_rsp_valid), - .dram_rsp_data (dram_rsp_data), - .dram_rsp_tag (dram_rsp_tag), - .dram_rsp_ready (dram_rsp_ready), + .dram_req_valid (per_cluster_dram_req_valid [i]), + .dram_req_rw (per_cluster_dram_req_rw [i]), + .dram_req_byteen (per_cluster_dram_req_byteen[i]), + .dram_req_addr (per_cluster_dram_req_addr [i]), + .dram_req_data (per_cluster_dram_req_data [i]), + .dram_req_tag (per_cluster_dram_req_tag [i]), + .dram_req_ready (per_cluster_dram_req_ready [i]), - .snp_req_valid (snp_req_valid), - .snp_req_addr (snp_req_addr), - .snp_req_inv (snp_req_inv), - .snp_req_tag (snp_req_tag), - .snp_req_ready (snp_req_ready), + .dram_rsp_valid (per_cluster_dram_rsp_valid [i]), + .dram_rsp_data (per_cluster_dram_rsp_data [i]), + .dram_rsp_tag (per_cluster_dram_rsp_tag [i]), + .dram_rsp_ready (per_cluster_dram_rsp_ready [i]), - .snp_rsp_valid (snp_rsp_valid), - .snp_rsp_tag (snp_rsp_tag), - .snp_rsp_ready (snp_rsp_ready), + .snp_req_valid (per_cluster_snp_req_valid [i]), + .snp_req_addr (per_cluster_snp_req_addr [i]), + .snp_req_inv (per_cluster_snp_req_inv [i]), + .snp_req_tag (per_cluster_snp_req_tag [i]), + .snp_req_ready (per_cluster_snp_req_ready [i]), - .io_req_valid (io_req_valid), - .io_req_rw (io_req_rw), - .io_req_byteen (io_req_byteen), - .io_req_addr (io_req_addr), - .io_req_data (io_req_data), - .io_req_tag (io_req_tag), - .io_req_ready (io_req_ready), + .snp_rsp_valid (per_cluster_snp_rsp_valid [i]), + .snp_rsp_tag (per_cluster_snp_rsp_tag [i]), + .snp_rsp_ready (per_cluster_snp_rsp_ready [i]), - .io_rsp_valid (io_rsp_valid), - .io_rsp_data (io_rsp_data), - .io_rsp_tag (io_rsp_tag), - .io_rsp_ready (io_rsp_ready), + .io_req_valid (per_cluster_io_req_valid [i]), + .io_req_rw (per_cluster_io_req_rw [i]), + .io_req_byteen (per_cluster_io_req_byteen [i]), + .io_req_addr (per_cluster_io_req_addr [i]), + .io_req_data (per_cluster_io_req_data [i]), + .io_req_tag (per_cluster_io_req_tag [i]), + .io_req_ready (per_cluster_io_req_ready [i]), - .csr_io_req_valid (csr_io_req_valid), - .csr_io_req_coreid (csr_io_req_coreid), - .csr_io_req_rw (csr_io_req_rw), - .csr_io_req_addr (csr_io_req_addr), - .csr_io_req_data (csr_io_req_data), - .csr_io_req_ready (csr_io_req_ready), + .io_rsp_valid (per_cluster_io_rsp_valid [i]), + .io_rsp_data (per_cluster_io_rsp_data [i]), + .io_rsp_tag (per_cluster_io_rsp_tag [i]), + .io_rsp_ready (per_cluster_io_rsp_ready [i]), - .csr_io_rsp_valid (csr_io_rsp_valid), - .csr_io_rsp_data (csr_io_rsp_data), - .csr_io_rsp_ready (csr_io_rsp_ready), + .csr_io_req_valid (per_cluster_csr_io_req_valid[i]), + .csr_io_req_coreid (csr_io_core_id), + .csr_io_req_rw (per_cluster_csr_io_req_rw [i]), + .csr_io_req_addr (per_cluster_csr_io_req_addr[i]), + .csr_io_req_data (per_cluster_csr_io_req_data[i]), + .csr_io_req_ready (per_cluster_csr_io_req_ready[i]), - .busy (busy), - .ebreak (ebreak) + .csr_io_rsp_valid (per_cluster_csr_io_rsp_valid[i]), + .csr_io_rsp_data (per_cluster_csr_io_rsp_data[i]), + .csr_io_rsp_ready (per_cluster_csr_io_rsp_ready[i]), + + .busy (per_cluster_busy [i]), + .ebreak (per_cluster_ebreak [i]) ); + end - end else begin + VX_databus_arb #( + .NUM_REQS (`NUM_CLUSTERS), + .WORD_SIZE (4), + .TAG_IN_WIDTH (`L2CORE_TAG_WIDTH), + .TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH) + ) io_arb ( + .clk (clk), + .reset (reset), - wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid; - wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; - wire cluster_dram_req_ready; - - wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; - wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; + // input requests + .req_valid_in (per_cluster_io_req_valid), + .req_rw_in (per_cluster_io_req_rw), + .req_byteen_in (per_cluster_io_req_byteen), + .req_addr_in (per_cluster_io_req_addr), + .req_data_in (per_cluster_io_req_data), + .req_tag_in (per_cluster_io_req_tag), + .req_ready_in (per_cluster_io_req_ready), - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_inv; - wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; + // output request + .req_valid_out (io_req_valid), + .req_rw_out (io_req_rw), + .req_byteen_out (io_req_byteen), + .req_addr_out (io_req_addr), + .req_data_out (io_req_data), + .req_tag_out (io_req_tag), + .req_ready_out (io_req_ready), - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; - wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; + // input responses + .rsp_valid_in (per_cluster_io_rsp_valid), + .rsp_data_in (per_cluster_io_rsp_data), + .rsp_tag_in (per_cluster_io_rsp_tag), + .rsp_ready_in (per_cluster_io_rsp_ready), + + // output response + .rsp_valid_out (io_rsp_valid), + .rsp_tag_out (io_rsp_tag), + .rsp_data_out (io_rsp_data), + .rsp_ready_out (io_rsp_ready) + ); - wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0] per_cluster_io_req_valid; - wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw; - wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][3:0] per_cluster_io_req_byteen; - wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][29:0] per_cluster_io_req_addr; - wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][31:0] per_cluster_io_req_data; - wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag; - wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready; + VX_csr_io_arb #( + .NUM_REQS (`NUM_CLUSTERS), + .DATA_WIDTH (32), + .ADDR_WIDTH (12) + ) csr_io_arb ( + .clk (clk), + .reset (reset), - wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid; - wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; + .request_id (csr_io_cluster_id), - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid; - wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready; + // input requests + .req_valid_in (csr_io_req_valid), + .req_addr_in (csr_io_req_addr), + .req_rw_in (csr_io_req_rw), + .req_data_in (csr_io_req_data), + .req_ready_in (csr_io_req_ready), - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid; - wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data; - wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready; + // output request + .req_valid_out (per_cluster_csr_io_req_valid), + .req_addr_out (per_cluster_csr_io_req_addr), + .req_rw_out (per_cluster_csr_io_req_rw), + .req_data_out (per_cluster_csr_io_req_data), + .req_ready_out (per_cluster_csr_io_req_ready), - wire [`NUM_CLUSTERS-1:0] per_cluster_busy; - wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak; + // input responses + .rsp_valid_in (per_cluster_csr_io_rsp_valid), + .rsp_data_in (per_cluster_csr_io_rsp_data), + .rsp_ready_in (per_cluster_csr_io_rsp_ready), + + // output response + .rsp_valid_out (csr_io_rsp_valid), + .rsp_data_out (csr_io_rsp_data), + .rsp_ready_out (csr_io_rsp_ready) + ); - wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS)); - wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid); + assign busy = (| per_cluster_busy); + assign ebreak = (| per_cluster_ebreak); - for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin - VX_cluster #( - .CLUSTER_ID(i) - ) cluster ( - `SCOPE_BIND_Vortex_cluster(i) + wire snp_fwd_rsp_valid; + wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; + wire snp_fwd_rsp_inv; + wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; + wire snp_fwd_rsp_ready; - .clk (clk), - .reset (reset), + VX_snp_forwarder #( + .CACHE_ID (`L3CACHE_ID), + .NUM_REQS (`NUM_CLUSTERS), + .SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH), + .DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), + .TAG_IN_WIDTH (`L3SNP_TAG_WIDTH), + .TAG_OUT_WIDTH (`L2SNP_TAG_WIDTH), + .SNRQ_SIZE (`L3SNRQ_SIZE) + ) snp_forwarder ( + .clk (clk), + .reset (reset), - .dram_req_valid (per_cluster_dram_req_valid [i]), - .dram_req_rw (per_cluster_dram_req_rw [i]), - .dram_req_byteen (per_cluster_dram_req_byteen[i]), - .dram_req_addr (per_cluster_dram_req_addr [i]), - .dram_req_data (per_cluster_dram_req_data [i]), - .dram_req_tag (per_cluster_dram_req_tag [i]), - .dram_req_ready (cluster_dram_req_ready), + .snp_req_valid (snp_req_valid), + .snp_req_addr (snp_req_addr), + .snp_req_inv (snp_req_inv), + .snp_req_tag (snp_req_tag), + .snp_req_ready (snp_req_ready), - .dram_rsp_valid (per_cluster_dram_rsp_valid [i]), - .dram_rsp_data (per_cluster_dram_rsp_data [i]), - .dram_rsp_tag (per_cluster_dram_rsp_tag [i]), - .dram_rsp_ready (per_cluster_dram_rsp_ready [i]), + .snp_rsp_valid (snp_fwd_rsp_valid), + .snp_rsp_addr (snp_fwd_rsp_addr), + .snp_rsp_inv (snp_fwd_rsp_inv), + .snp_rsp_tag (snp_fwd_rsp_tag), + .snp_rsp_ready (snp_fwd_rsp_ready), - .snp_req_valid (per_cluster_snp_req_valid [i]), - .snp_req_addr (per_cluster_snp_req_addr [i]), - .snp_req_inv (per_cluster_snp_req_inv [i]), - .snp_req_tag (per_cluster_snp_req_tag [i]), - .snp_req_ready (per_cluster_snp_req_ready [i]), + .snp_fwdout_valid (per_cluster_snp_req_valid), + .snp_fwdout_addr (per_cluster_snp_req_addr), + .snp_fwdout_inv (per_cluster_snp_req_inv), + .snp_fwdout_tag (per_cluster_snp_req_tag), + .snp_fwdout_ready (per_cluster_snp_req_ready), - .snp_rsp_valid (per_cluster_snp_rsp_valid [i]), - .snp_rsp_tag (per_cluster_snp_rsp_tag [i]), - .snp_rsp_ready (per_cluster_snp_rsp_ready [i]), + .snp_fwdin_valid (per_cluster_snp_rsp_valid), + .snp_fwdin_tag (per_cluster_snp_rsp_tag), + .snp_fwdin_ready (per_cluster_snp_rsp_ready) + ); - .io_req_valid (per_cluster_io_req_valid [i]), - .io_req_rw (per_cluster_io_req_rw [i]), - .io_req_byteen (per_cluster_io_req_byteen [i]), - .io_req_addr (per_cluster_io_req_addr [i]), - .io_req_data (per_cluster_io_req_data [i]), - .io_req_tag (per_cluster_io_req_tag [i]), - .io_req_ready (per_cluster_io_req_ready [i]), - - .io_rsp_valid (per_cluster_io_rsp_valid [i]), - .io_rsp_data (per_cluster_io_rsp_data [i]), - .io_rsp_tag (per_cluster_io_rsp_tag [i]), - .io_rsp_ready (per_cluster_io_rsp_ready [i]), - - .csr_io_req_valid (per_cluster_csr_io_req_valid[i]), - .csr_io_req_coreid (per_cluster_csr_io_req_coreid), - .csr_io_req_rw (per_cluster_csr_io_req_rw [i]), - .csr_io_req_addr (per_cluster_csr_io_req_addr[i]), - .csr_io_req_data (per_cluster_csr_io_req_data[i]), - .csr_io_req_ready (per_cluster_csr_io_req_ready[i]), - - .csr_io_rsp_valid (per_cluster_csr_io_rsp_valid[i]), - .csr_io_rsp_data (per_cluster_csr_io_rsp_data[i]), - .csr_io_rsp_ready (per_cluster_csr_io_rsp_ready[i]), - - .busy (per_cluster_busy [i]), - .ebreak (per_cluster_ebreak [i]) - ); - end - - VX_databus_arb #( - .NUM_REQS (`NUM_CLUSTERS), - .WORD_SIZE (4), - .TAG_IN_WIDTH (`L2CORE_TAG_WIDTH), - .TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH) - ) io_arb ( - .clk (clk), - .reset (reset), - - // input requests - .req_valid_in (per_cluster_io_req_valid), - .req_rw_in (per_cluster_io_req_rw), - .req_byteen_in (per_cluster_io_req_byteen), - .req_addr_in (per_cluster_io_req_addr), - .req_data_in (per_cluster_io_req_data), - .req_tag_in (per_cluster_io_req_tag), - .req_ready_in (per_cluster_io_req_ready), - - // output request - .req_valid_out (io_req_valid), - .req_rw_out (io_req_rw), - .req_byteen_out (io_req_byteen), - .req_addr_out (io_req_addr), - .req_data_out (io_req_data), - .req_tag_out (io_req_tag), - .req_ready_out (io_req_ready), - - // input responses - .rsp_valid_in (per_cluster_io_rsp_valid), - .rsp_data_in (per_cluster_io_rsp_data), - .rsp_tag_in (per_cluster_io_rsp_tag), - .rsp_ready_in (per_cluster_io_rsp_ready), - - // output response - .rsp_valid_out (io_rsp_valid), - .rsp_tag_out (io_rsp_tag), - .rsp_data_out (io_rsp_data), - .rsp_ready_out (io_rsp_ready) - ); - - VX_csr_io_arb #( - .NUM_REQS (`NUM_CLUSTERS), - .DATA_WIDTH (32), - .ADDR_WIDTH (12) - ) csr_io_arb ( - .clk (clk), - .reset (reset), - - .request_id (csr_io_request_id), - - // input requests - .req_valid_in (csr_io_req_valid), - .req_addr_in (csr_io_req_addr), - .req_rw_in (csr_io_req_rw), - .req_data_in (csr_io_req_data), - .req_ready_in (csr_io_req_ready), - - // output request - .req_valid_out (per_cluster_csr_io_req_valid), - .req_addr_out (per_cluster_csr_io_req_addr), - .req_rw_out (per_cluster_csr_io_req_rw), - .req_data_out (per_cluster_csr_io_req_data), - .req_ready_out (per_cluster_csr_io_req_ready), - - // input responses - .rsp_valid_in (per_cluster_csr_io_rsp_valid), - .rsp_data_in (per_cluster_csr_io_rsp_data), - .rsp_ready_in (per_cluster_csr_io_rsp_ready), - - // output response - .rsp_valid_out (csr_io_rsp_valid), - .rsp_data_out (csr_io_rsp_data), - .rsp_ready_out (csr_io_rsp_ready) - ); - - assign busy = (| per_cluster_busy); - assign ebreak = (| per_cluster_ebreak); - - // L3 Cache /////////////////////////////////////////////////////////// + if (`L3_ENABLE) begin wire [`NUM_CLUSTERS-1:0] cluster_dram_rsp_valid; wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data; wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag; - wire cluster_dram_rsp_ready; - - wire snp_fwd_rsp_valid; - wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; - wire snp_fwd_rsp_inv; - wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; - wire snp_fwd_rsp_ready; + wire cluster_dram_rsp_ready; reg [`NUM_CLUSTERS-1:0] cluster_dram_rsp_ready_other; - always @(*) begin cluster_dram_rsp_ready_other = {`NUM_CLUSTERS{1'b1}}; for (integer i = 0; i < `NUM_CLUSTERS; i++) begin @@ -354,39 +325,10 @@ module Vortex ( end assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid); - VX_snp_forwarder #( - .CACHE_ID (`L3CACHE_ID), - .NUM_REQS (`NUM_CLUSTERS), - .SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH), - .DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH), - .SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH), - .SNRQ_SIZE (`L3SNRQ_SIZE) - ) snp_forwarder ( - .clk (clk), - .reset (reset), - - .snp_req_valid (snp_req_valid), - .snp_req_addr (snp_req_addr), - .snp_req_inv (snp_req_inv), - .snp_req_tag (snp_req_tag), - .snp_req_ready (snp_req_ready), - - .snp_rsp_valid (snp_fwd_rsp_valid), - .snp_rsp_addr (snp_fwd_rsp_addr), - .snp_rsp_inv (snp_fwd_rsp_inv), - .snp_rsp_tag (snp_fwd_rsp_tag), - .snp_rsp_ready (snp_fwd_rsp_ready), - - .snp_fwdout_valid (per_cluster_snp_req_valid), - .snp_fwdout_addr (per_cluster_snp_req_addr), - .snp_fwdout_inv (per_cluster_snp_req_inv), - .snp_fwdout_tag (per_cluster_snp_req_tag), - .snp_fwdout_ready (per_cluster_snp_req_ready), - - .snp_fwdin_valid (per_cluster_snp_rsp_valid), - .snp_fwdin_tag (per_cluster_snp_rsp_tag), - .snp_fwdin_ready (per_cluster_snp_rsp_ready) - ); + wire cluster_dram_req_ready; + for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin + assign per_cluster_dram_req_ready[i] = cluster_dram_req_ready; + end VX_cache #( .CACHE_ID (`L3CACHE_ID), @@ -460,6 +402,56 @@ module Vortex ( // Miss status `UNUSED_PIN (miss_vec) ); + + end else begin + + VX_mem_arb #( + .NUM_REQS (`NUM_CLUSTERS), + .DATA_WIDTH (`L3DRAM_LINE_WIDTH), + .TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH), + .TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH) + ) dram_arb ( + .clk (clk), + .reset (reset), + + // Core request + .req_valid_in (per_cluster_dram_req_valid), + .req_rw_in (per_cluster_dram_req_rw), + .req_byteen_in (per_cluster_dram_req_byteen), + .req_addr_in (per_cluster_dram_req_addr), + .req_data_in (per_cluster_dram_req_data), + .req_tag_in (per_cluster_dram_req_tag), + .req_ready_in (per_cluster_dram_req_ready), + + // DRAM request + .req_valid_out (dram_req_valid), + .req_rw_out (dram_req_rw), + .req_byteen_out (dram_req_byteen), + .req_addr_out (dram_req_addr), + .req_data_out (dram_req_data), + .req_tag_out (dram_req_tag), + .req_ready_out (dram_req_ready), + + // Core response + .rsp_valid_out (per_cluster_dram_rsp_valid), + .rsp_data_out (per_cluster_dram_rsp_data), + .rsp_tag_out (per_cluster_dram_rsp_tag), + .rsp_ready_out (per_cluster_dram_rsp_ready), + + // DRAM response + .rsp_valid_in (dram_rsp_valid), + .rsp_tag_in (dram_rsp_tag), + .rsp_data_in (dram_rsp_data), + .rsp_ready_in (dram_rsp_ready) + ); + + `UNUSED_VAR (snp_fwd_rsp_addr) + `UNUSED_VAR (snp_fwd_rsp_inv) + + assign snp_rsp_valid = snp_fwd_rsp_valid; + assign snp_rsp_tag = snp_fwd_rsp_tag; + assign snp_fwd_rsp_ready = snp_rsp_ready; + end `SCOPE_ASSIGN (reset, reset); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index eae6de5c..8b7f06ad 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -150,7 +150,7 @@ module VX_cache #( .per_bank_ready (per_bank_core_req_ready) ); - assign dram_req_tag = dram_req_addr; + assign dram_req_tag = dram_req_addr; if (NUM_BANKS == 1) begin assign dram_rsp_ready = per_bank_dram_rsp_ready; end else begin diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index d090cac1..c96ef798 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -5,9 +5,9 @@ module VX_snp_forwarder #( parameter SRC_ADDR_WIDTH = 1, parameter DST_ADDR_WIDTH = 1, parameter NUM_REQS = 1, - parameter SNP_TAG_WIDTH = 1, parameter SNRQ_SIZE = 1, - parameter LOG_SNRQ_SIZE = `LOG2UP(SNRQ_SIZE) + parameter TAG_IN_WIDTH = 1, + parameter TAG_OUT_WIDTH = `LOG2UP(SNRQ_SIZE) ) ( input wire clk, input wire reset, @@ -16,185 +16,204 @@ module VX_snp_forwarder #( input wire snp_req_valid, input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr, input wire snp_req_inv, - input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, + input wire [TAG_IN_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, // Snoop response output wire snp_rsp_valid, output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr, output wire snp_rsp_inv, - output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, + output wire [TAG_IN_WIDTH-1:0] snp_rsp_tag, input wire snp_rsp_ready, // Snoop Forwarding out output wire [NUM_REQS-1:0] snp_fwdout_valid, output wire [NUM_REQS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr, output wire [NUM_REQS-1:0] snp_fwdout_inv, - output wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdout_tag, + output wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdout_tag, input wire [NUM_REQS-1:0] snp_fwdout_ready, // Snoop forwarding in input wire [NUM_REQS-1:0] snp_fwdin_valid, - input wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag, + input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag, output wire [NUM_REQS-1:0] snp_fwdin_ready ); localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH; localparam NUM_REQUESTS_QUAL = NUM_REQS * (1 << ADDR_DIFF); localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL); - `STATIC_ASSERT(NUM_REQS > 1, ("invalid value")) + if (NUM_REQS > 1) begin - // Inputs buffering - wire [NUM_REQS-1:0] snp_fwdin_valid_qual; - wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag_qual; - wire [NUM_REQS-1:0] snp_fwdin_ready_qual; - for (genvar i = 0; i < NUM_REQS; ++i) begin - VX_skid_buffer #( - .DATAW (LOG_SNRQ_SIZE), - .PASSTHRU (NUM_REQS < 4) - ) snp_fwdin_buffer ( - .clk (clk), - .reset (reset), - .valid_in (snp_fwdin_valid[i]), - .data_in (snp_fwdin_tag[i]), - .ready_in (snp_fwdin_ready[i]), - .valid_out (snp_fwdin_valid_qual[i]), - .data_out (snp_fwdin_tag_qual[i]), - .ready_out (snp_fwdin_ready_qual[i]) - ); - end + // Inputs buffering + wire [NUM_REQS-1:0] snp_fwdin_valid_qual; + wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag_qual; + wire [NUM_REQS-1:0] snp_fwdin_ready_qual; + for (genvar i = 0; i < NUM_REQS; ++i) begin + VX_skid_buffer #( + .DATAW (TAG_OUT_WIDTH), + .PASSTHRU (NUM_REQS < 4) + ) snp_fwdin_buffer ( + .clk (clk), + .reset (reset), + .valid_in (snp_fwdin_valid[i]), + .data_in (snp_fwdin_tag[i]), + .ready_in (snp_fwdin_ready[i]), + .valid_out (snp_fwdin_valid_qual[i]), + .data_out (snp_fwdin_tag_qual[i]), + .ready_out (snp_fwdin_ready_qual[i]) + ); + end - reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0]; - - wire [LOG_SNRQ_SIZE-1:0] sfq_write_addr, sfq_read_addr; - wire sfq_full; + reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0]; + + wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr; + wire sfq_full; - wire [LOG_SNRQ_SIZE-1:0] fwdin_tag; - wire fwdin_valid; - - wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]); - wire fwdin_fire = fwdin_valid && fwdin_ready; + wire [TAG_OUT_WIDTH-1:0] fwdin_tag; + wire fwdin_valid; + + wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]); + wire fwdin_fire = fwdin_valid && fwdin_ready; - assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); - - assign sfq_read_addr = fwdin_tag; - - wire sfq_acquire = snp_req_valid && snp_req_ready; - wire sfq_release = snp_rsp_valid && snp_rsp_ready; + assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]); + + assign sfq_read_addr = fwdin_tag; + + wire sfq_acquire = snp_req_valid && snp_req_ready; + wire sfq_release = snp_rsp_valid && snp_rsp_ready; - VX_cam_buffer #( - .DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), - .SIZE (SNRQ_SIZE) - ) req_metadata_buf ( - .clk (clk), - .reset (reset), - .write_addr (sfq_write_addr), - .acquire_slot (sfq_acquire), - .read_addr (sfq_read_addr), - .write_data ({snp_req_addr, snp_req_inv, snp_req_tag}), - .read_data ({snp_rsp_addr, snp_rsp_inv, snp_rsp_tag}), - .release_addr (sfq_read_addr), - .release_slot (sfq_release), - .full (sfq_full) - ); - - wire fwdout_valid; - wire [LOG_SNRQ_SIZE-1:0] fwdout_tag; - wire [DST_ADDR_WIDTH-1:0] fwdout_addr; - wire fwdout_inv; - wire fwdout_ready; - wire dispatch_hold; + VX_cam_buffer #( + .DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH), + .SIZE (SNRQ_SIZE) + ) req_metadata_buf ( + .clk (clk), + .reset (reset), + .write_addr (sfq_write_addr), + .acquire_slot (sfq_acquire), + .read_addr (sfq_read_addr), + .write_data ({snp_req_addr, snp_req_inv, snp_req_tag}), + .read_data ({snp_rsp_addr, snp_rsp_inv, snp_rsp_tag}), + .release_addr (sfq_read_addr), + .release_slot (sfq_release), + .full (sfq_full) + ); + + wire fwdout_valid; + wire [TAG_OUT_WIDTH-1:0] fwdout_tag; + wire [DST_ADDR_WIDTH-1:0] fwdout_addr; + wire fwdout_inv; + wire fwdout_ready; + wire dispatch_hold; - if (ADDR_DIFF != 0) begin - reg [LOG_SNRQ_SIZE-1:0] fwdout_tag_r; - reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r; - reg fwdout_inv_r; - reg dispatch_hold_r; + if (ADDR_DIFF != 0) begin + reg [TAG_OUT_WIDTH-1:0] fwdout_tag_r; + reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r; + reg fwdout_inv_r; + reg dispatch_hold_r; - always @(posedge clk) begin - if (reset) begin - dispatch_hold_r <= 0; - end else begin - if (snp_req_valid && snp_req_ready) begin - dispatch_hold_r <= 1; + always @(posedge clk) begin + if (reset) begin + dispatch_hold_r <= 0; + end else begin + if (snp_req_valid && snp_req_ready) begin + dispatch_hold_r <= 1; + end + + if (dispatch_hold_r + && fwdout_ready + && (fwdout_addr[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1))) begin + dispatch_hold_r <= 0; + end end - if (dispatch_hold_r - && fwdout_ready - && (fwdout_addr[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1))) begin - dispatch_hold_r <= 0; - end - end + if (fwdout_valid && fwdout_ready) begin + fwdout_addr_r <= fwdout_addr + DST_ADDR_WIDTH'(1'b1); + end - if (fwdout_valid && fwdout_ready) begin - fwdout_addr_r <= fwdout_addr + DST_ADDR_WIDTH'(1'b1); + if (snp_req_valid && snp_req_ready) begin + fwdout_inv_r <= snp_req_inv; + fwdout_tag_r <= sfq_write_addr; + end end + assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full); + assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr; + assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)}; + assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv; + assign dispatch_hold= dispatch_hold_r; + end else begin + assign fwdout_valid = snp_req_valid && !sfq_full; + assign fwdout_tag = sfq_write_addr; + assign fwdout_addr = snp_req_addr; + assign fwdout_inv = snp_req_inv; + assign dispatch_hold= 1'b0; + end - if (snp_req_valid && snp_req_ready) begin - fwdout_inv_r <= snp_req_inv; - fwdout_tag_r <= sfq_write_addr; + always @(posedge clk) begin + if (sfq_acquire) begin + pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL; + end + if (fwdin_fire) begin + pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; end end - assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full); - assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr; - assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)}; - assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv; - assign dispatch_hold= dispatch_hold_r; - end else begin - assign fwdout_valid = snp_req_valid && !sfq_full; - assign fwdout_tag = sfq_write_addr; - assign fwdout_addr = snp_req_addr; - assign fwdout_inv = snp_req_inv; - assign dispatch_hold= 1'b0; - end - always @(posedge clk) begin - if (sfq_acquire) begin - pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL; - end - if (fwdin_fire) begin - pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1; + reg [NUM_REQS-1:0] snp_fwdout_ready_other; + + for (genvar i = 0; i < NUM_REQS; i++) begin + assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i]; + assign snp_fwdout_addr[i] = fwdout_addr; + assign snp_fwdout_inv[i] = fwdout_inv; + assign snp_fwdout_tag[i] = fwdout_tag; end - end - reg [NUM_REQS-1:0] snp_fwdout_ready_other; - - for (genvar i = 0; i < NUM_REQS; i++) begin - assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i]; - assign snp_fwdout_addr[i] = fwdout_addr; - assign snp_fwdout_inv[i] = fwdout_inv; - assign snp_fwdout_tag[i] = fwdout_tag; - end - - always @(*) begin - snp_fwdout_ready_other = {NUM_REQS{1'b1}}; - for (integer i = 0; i < NUM_REQS; i++) begin - for (integer j = 0; j < NUM_REQS; j++) begin - if (i != j) - snp_fwdout_ready_other[i] &= snp_fwdout_ready[j]; + always @(*) begin + snp_fwdout_ready_other = {NUM_REQS{1'b1}}; + for (integer i = 0; i < NUM_REQS; i++) begin + for (integer j = 0; j < NUM_REQS; j++) begin + if (i != j) + snp_fwdout_ready_other[i] &= snp_fwdout_ready[j]; + end end end + + assign fwdout_ready = (& snp_fwdout_ready); + + assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold; + + VX_stream_arbiter #( + .NUM_REQS(NUM_REQS), + .DATAW(TAG_OUT_WIDTH), + .BUFFERED(NUM_REQS >= 4) + ) snp_fwdin_arb ( + .clk (clk), + .reset (reset), + .valid_in (snp_fwdin_valid_qual), + .data_in (snp_fwdin_tag_qual), + .ready_in (snp_fwdin_ready_qual), + .valid_out (fwdin_valid), + .data_out (fwdin_tag), + .ready_out (fwdin_ready) + ); + + end else begin + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + assign snp_fwdout_valid = snp_req_valid; + assign snp_fwdout_addr = snp_req_addr; + assign snp_fwdout_inv = snp_req_inv; + assign snp_fwdout_tag = snp_req_tag; + assign snp_req_ready = snp_fwdout_ready; + + assign snp_rsp_valid = snp_fwdin_valid; + assign snp_rsp_addr = snp_req_addr; + assign snp_rsp_inv = snp_req_inv; + assign snp_rsp_tag = snp_fwdin_tag; + assign snp_fwdin_ready = snp_rsp_ready; + end - assign fwdout_ready = (& snp_fwdout_ready); - - assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold; - - VX_stream_arbiter #( - .NUM_REQS(NUM_REQS), - .DATAW(LOG_SNRQ_SIZE), - .BUFFERED(NUM_REQS >= 4) - ) snp_fwdin_arb ( - .clk (clk), - .reset (reset), - .valid_in (snp_fwdin_valid_qual), - .data_in (snp_fwdin_tag_qual), - .ready_in (snp_fwdin_ready_qual), - .valid_out (fwdin_valid), - .data_out (fwdin_tag), - .ready_out (fwdin_ready) - ); - `ifdef DBG_PRINT_CACHE_SNP always @(posedge clk) begin if (snp_req_valid && snp_req_ready) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index c81c0892..f3796fdb 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -66,8 +66,7 @@ module VX_generic_queue #( empty_r <= 0; if (used_r == ADDRW'(SIZE-1)) begin full_r <= 1; - end - used_r <= used_r + ADDRW'(1); + end end end if (pop) begin @@ -76,10 +75,10 @@ module VX_generic_queue #( full_r <= 0; if (used_r == ADDRW'(1)) begin empty_r <= 1; - end; - used_r <= used_r - ADDRW'(1); + end; end end + used_r <= used_r + ADDRW'(push) - ADDRW'(pop); end end @@ -96,12 +95,8 @@ module VX_generic_queue #( rd_ptr_r <= 0; wr_ptr_r <= 0; end else begin - if (push) begin - wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1); - end - if (pop) begin - rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1); - end + wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(push); + rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(pop); end end