refactoring cores clustering

This commit is contained in:
Blaise Tine
2020-12-06 14:42:12 -08:00
parent b2652527bb
commit 1332970636
9 changed files with 504 additions and 615 deletions

View File

@@ -6,7 +6,7 @@ set -e
show_usage()
{
echo "Vortex BlackBox Test Driver v1.0"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
}
DRIVER=vlsim
@@ -16,6 +16,7 @@ CORES=2
WARPS=4
THREADS=4
L2=0
L3=0
DEBUG=0
SCOPE=0
HAS_ARGS=0
@@ -51,6 +52,10 @@ case $i in
L2=1
shift
;;
--l3cache)
L3=1
shift
;;
--debug)
DEBUG=1
shift
@@ -112,7 +117,7 @@ case $APP in
;;
esac
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2"
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3"
echo "CONFIGS=$CONFIGS"

View File

@@ -71,7 +71,8 @@ module VX_avs_wrapper #(
VX_generic_queue #(
.DATAW (REQ_TAGW),
.SIZE (RD_QUEUE_SIZE)
.SIZE (RD_QUEUE_SIZE),
.BUFFERED (1)
) rd_req_queue (
.clk (clk),
.reset (reset),
@@ -86,7 +87,8 @@ module VX_avs_wrapper #(
VX_generic_queue #(
.DATAW (AVS_DATAW),
.SIZE (RD_QUEUE_SIZE)
.SIZE (RD_QUEUE_SIZE),
.BUFFERED (1)
) rd_rsp_queue (
.clk (clk),
.reset (reset),

View File

@@ -255,51 +255,55 @@ module VX_cluster #(
assign busy = (| per_core_busy);
assign ebreak = (| per_core_ebreak);
wire snp_fwd_rsp_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_inv;
wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE),
.TAG_IN_WIDTH (`L2SNP_TAG_WIDTH),
.TAG_OUT_WIDTH (`DSNP_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_inv (snp_fwd_rsp_inv),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (per_core_snp_req_valid),
.snp_fwdout_addr (per_core_snp_req_addr),
.snp_fwdout_inv (per_core_snp_req_inv),
.snp_fwdout_tag (per_core_snp_req_tag),
.snp_fwdout_ready (per_core_snp_req_ready),
.snp_fwdin_valid (per_core_snp_rsp_valid),
.snp_fwdin_tag (per_core_snp_rsp_tag),
.snp_fwdin_ready (per_core_snp_rsp_ready)
);
if (`L2_ENABLE) begin
// L2 Cache ///////////////////////////////////////////////////////////
wire [`NUM_CORES-1:0] core_dram_req_valid;
wire [`NUM_CORES-1:0] core_dram_req_rw;
wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen;
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_req_tag;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data;
wire core_dram_req_ready;
wire [`NUM_CORES-1:0] core_dram_rsp_valid;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
wire core_dram_rsp_ready;
wire [`NUM_CORES-1:0] core_snp_fwdout_valid;
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr;
wire [`NUM_CORES-1:0] core_snp_fwdout_inv;
wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag;
wire [`NUM_CORES-1:0] core_snp_fwdout_ready;
wire [`NUM_CORES-1:0] core_snp_fwdin_valid;
wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
wire [`NUM_CORES-1:0] core_snp_fwdin_ready;
wire snp_fwd_rsp_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_inv;
wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign core_dram_req_valid [i] = per_core_dram_req_valid [i];
assign core_dram_req_rw [i] = per_core_dram_req_rw [i];
assign core_dram_req_byteen [i] = per_core_dram_req_byteen [i];
assign core_dram_req_addr [i] = per_core_dram_req_addr [i];
assign core_dram_req_data [i] = per_core_dram_req_data [i];
assign core_dram_req_tag [i] = per_core_dram_req_tag [i];
assign per_core_dram_req_ready [i] = core_dram_req_ready;
end
reg [`NUM_CORES-1:0] core_dram_rsp_ready_other;
always @(*) begin
core_dram_rsp_ready_other = {`NUM_CORES{1'b1}};
for (integer i = 0; i < `NUM_CORES; i++) begin
@@ -318,51 +322,10 @@ module VX_cluster #(
end
assign core_dram_rsp_ready = & (per_core_dram_rsp_ready | ~core_dram_rsp_valid);
wire core_dram_req_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign per_core_snp_req_valid [i] = core_snp_fwdout_valid [i];
assign per_core_snp_req_addr [i] = core_snp_fwdout_addr [i];
assign per_core_snp_req_inv [i] = core_snp_fwdout_inv [i];
assign per_core_snp_req_tag [i] = core_snp_fwdout_tag [i];
assign core_snp_fwdout_ready [i] = per_core_snp_req_ready[i];
assign core_snp_fwdin_valid [i] = per_core_snp_rsp_valid [i];
assign core_snp_fwdin_tag [i] = per_core_snp_rsp_tag [i];
assign per_core_snp_rsp_ready [i] = core_snp_fwdin_ready [i];
end
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_inv (snp_fwd_rsp_inv),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_inv (core_snp_fwdout_inv),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready)
);
assign per_core_dram_req_ready[i] = core_dram_req_ready;
end
VX_cache #(
.CACHE_ID (`L2CACHE_ID),
@@ -392,12 +355,12 @@ module VX_cluster #(
.reset (reset),
// Core request
.core_req_valid (core_dram_req_valid),
.core_req_rw (core_dram_req_rw),
.core_req_byteen (core_dram_req_byteen),
.core_req_addr (core_dram_req_addr),
.core_req_data (core_dram_req_data),
.core_req_tag (core_dram_req_tag),
.core_req_valid (per_core_dram_req_valid),
.core_req_rw (per_core_dram_req_rw),
.core_req_byteen (per_core_dram_req_byteen),
.core_req_addr (per_core_dram_req_addr),
.core_req_data (per_core_dram_req_data),
.core_req_tag (per_core_dram_req_tag),
.core_req_ready (core_dram_req_ready),
// Core response
@@ -438,100 +401,6 @@ module VX_cluster #(
);
end else begin
wire[`NUM_CORES-1:0] core_dram_req_valid;
wire[`NUM_CORES-1:0] core_dram_req_rw;
wire[`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr;
wire[`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_req_tag;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data;
wire[`NUM_CORES-1:0] core_dram_req_ready;
wire[`NUM_CORES-1:0] core_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
wire[`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
wire[`NUM_CORES-1:0] core_dram_rsp_ready;
wire[`NUM_CORES-1:0] core_snp_fwdout_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr;
wire[`NUM_CORES-1:0] core_snp_fwdout_inv;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag;
wire[`NUM_CORES-1:0] core_snp_fwdout_ready;
wire[`NUM_CORES-1:0] core_snp_fwdin_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign core_dram_req_valid [i] = per_core_dram_req_valid [i];
assign core_dram_req_rw [i] = per_core_dram_req_rw [i];
assign core_dram_req_byteen [i] = per_core_dram_req_byteen [i];
assign core_dram_req_addr [i] = per_core_dram_req_addr [i];
assign core_dram_req_data [i] = per_core_dram_req_data [i];
assign core_dram_req_tag [i] = per_core_dram_req_tag [i];
assign per_core_dram_req_ready [i] = core_dram_req_ready [i];
assign per_core_dram_rsp_valid [i] = core_dram_rsp_valid [i];
assign per_core_dram_rsp_data [i] = core_dram_rsp_data [i];
assign per_core_dram_rsp_tag [i] = core_dram_rsp_tag [i];
assign core_dram_rsp_ready [i] = per_core_dram_rsp_ready [i];
assign per_core_snp_req_valid [i] = core_snp_fwdout_valid [i];
assign per_core_snp_req_addr [i] = core_snp_fwdout_addr [i];
assign per_core_snp_req_inv [i] = core_snp_fwdout_inv [i];
assign per_core_snp_req_tag [i] = core_snp_fwdout_tag [i];
assign core_snp_fwdout_ready [i] = per_core_snp_req_ready [i];
assign core_snp_fwdin_valid [i] = per_core_snp_rsp_valid [i];
assign core_snp_fwdin_tag [i] = per_core_snp_rsp_tag [i];
assign per_core_snp_rsp_ready [i] = core_snp_fwdin_ready [i];
end
if (`NUM_CORES > 1) begin
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_addr),
`UNUSED_PIN (snp_rsp_inv),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_inv (core_snp_fwdout_inv),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready)
);
end else begin
assign core_snp_fwdout_valid= snp_req_valid;
assign core_snp_fwdout_addr = snp_req_addr;
assign core_snp_fwdout_inv = snp_req_inv;
assign core_snp_fwdout_tag = snp_req_tag;
assign snp_req_ready = core_snp_fwdout_ready;
assign snp_rsp_valid = core_snp_fwdin_valid;
assign snp_rsp_tag = core_snp_fwdin_tag;
assign core_snp_fwdin_ready = snp_rsp_ready;
end
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
@@ -543,13 +412,13 @@ module VX_cluster #(
.reset (reset),
// Core request
.req_valid_in (core_dram_req_valid),
.req_rw_in (core_dram_req_rw),
.req_byteen_in (core_dram_req_byteen),
.req_addr_in (core_dram_req_addr),
.req_data_in (core_dram_req_data),
.req_tag_in (core_dram_req_tag),
.req_ready_in (core_dram_req_ready),
.req_valid_in (per_core_dram_req_valid),
.req_rw_in (per_core_dram_req_rw),
.req_byteen_in (per_core_dram_req_byteen),
.req_addr_in (per_core_dram_req_addr),
.req_data_in (per_core_dram_req_data),
.req_tag_in (per_core_dram_req_tag),
.req_ready_in (per_core_dram_req_ready),
// DRAM request
.req_valid_out (dram_req_valid),
@@ -561,10 +430,10 @@ module VX_cluster #(
.req_ready_out (dram_req_ready),
// Core response
.rsp_valid_out (core_dram_rsp_valid),
.rsp_data_out (core_dram_rsp_data),
.rsp_tag_out (core_dram_rsp_tag),
.rsp_ready_out (core_dram_rsp_ready),
.rsp_valid_out (per_core_dram_rsp_valid),
.rsp_data_out (per_core_dram_rsp_data),
.rsp_tag_out (per_core_dram_rsp_tag),
.rsp_ready_out (per_core_dram_rsp_ready),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
@@ -573,6 +442,13 @@ module VX_cluster #(
.rsp_ready_in (dram_rsp_ready)
);
`UNUSED_VAR (snp_fwd_rsp_addr)
`UNUSED_VAR (snp_fwd_rsp_inv)
assign snp_rsp_valid = snp_fwd_rsp_valid;
assign snp_rsp_tag = snp_fwd_rsp_tag;
assign snp_fwd_rsp_ready = snp_rsp_ready;
end
endmodule

View File

@@ -24,11 +24,11 @@
`endif
`ifndef L2_ENABLE
`define L2_ENABLE (`NUM_CORES > 2)
`define L2_ENABLE (`NUM_CORES >= 4)
`endif
`ifndef L3_ENABLE
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`define L3_ENABLE (`NUM_CLUSTERS >= 4)
`endif
`ifndef GLOBAL_BLOCK_SIZE

View File

@@ -366,7 +366,7 @@
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES)))
// Snoop request tag bits
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
////////////////////////// L3cache Configurable Knobs /////////////////////////
@@ -392,10 +392,10 @@
`define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS)))
// Snoop request tag bits
`define L3SNP_TAG_WIDTH 16
`define L3SNP_TAG_WIDTH `VX_SNP_TAG_WIDTH
///////////////////////////////////////////////////////////////////////////////
@@ -403,7 +403,7 @@
`define VX_DRAM_ADDR_WIDTH `L3DRAM_ADDR_WIDTH
`define VX_DRAM_LINE_WIDTH `L3DRAM_LINE_WIDTH
`define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH
`define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH
`define VX_SNP_TAG_WIDTH 16
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)

View File

@@ -66,275 +66,246 @@ module Vortex (
output wire busy,
output wire ebreak
);
if (`NUM_CLUSTERS == 1) begin
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_inv;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0] per_cluster_io_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][3:0] per_cluster_io_req_byteen;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][29:0] per_cluster_io_req_addr;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][31:0] per_cluster_io_req_data;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_io_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CORES));
wire [`NC_BITS-1:0] csr_io_core_id = `NC_BITS'(csr_io_req_coreid);
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
VX_cluster #(
.CLUSTER_ID(0)
.CLUSTER_ID(i)
) cluster (
`SCOPE_BIND_Vortex_cluster(0)
`SCOPE_BIND_Vortex_cluster(i)
.clk (clk),
.reset (reset),
.dram_req_valid (dram_req_valid),
.dram_req_rw (dram_req_rw),
.dram_req_byteen (dram_req_byteen),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready),
.dram_req_valid (per_cluster_dram_req_valid [i]),
.dram_req_rw (per_cluster_dram_req_rw [i]),
.dram_req_byteen (per_cluster_dram_req_byteen[i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_ready (per_cluster_dram_req_ready [i]),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.snp_req_valid (per_cluster_snp_req_valid [i]),
.snp_req_addr (per_cluster_snp_req_addr [i]),
.snp_req_inv (per_cluster_snp_req_inv [i]),
.snp_req_tag (per_cluster_snp_req_tag [i]),
.snp_req_ready (per_cluster_snp_req_ready [i]),
.io_req_valid (io_req_valid),
.io_req_rw (io_req_rw),
.io_req_byteen (io_req_byteen),
.io_req_addr (io_req_addr),
.io_req_data (io_req_data),
.io_req_tag (io_req_tag),
.io_req_ready (io_req_ready),
.snp_rsp_valid (per_cluster_snp_rsp_valid [i]),
.snp_rsp_tag (per_cluster_snp_rsp_tag [i]),
.snp_rsp_ready (per_cluster_snp_rsp_ready [i]),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (io_rsp_ready),
.io_req_valid (per_cluster_io_req_valid [i]),
.io_req_rw (per_cluster_io_req_rw [i]),
.io_req_byteen (per_cluster_io_req_byteen [i]),
.io_req_addr (per_cluster_io_req_addr [i]),
.io_req_data (per_cluster_io_req_data [i]),
.io_req_tag (per_cluster_io_req_tag [i]),
.io_req_ready (per_cluster_io_req_ready [i]),
.csr_io_req_valid (csr_io_req_valid),
.csr_io_req_coreid (csr_io_req_coreid),
.csr_io_req_rw (csr_io_req_rw),
.csr_io_req_addr (csr_io_req_addr),
.csr_io_req_data (csr_io_req_data),
.csr_io_req_ready (csr_io_req_ready),
.io_rsp_valid (per_cluster_io_rsp_valid [i]),
.io_rsp_data (per_cluster_io_rsp_data [i]),
.io_rsp_tag (per_cluster_io_rsp_tag [i]),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.csr_io_rsp_valid (csr_io_rsp_valid),
.csr_io_rsp_data (csr_io_rsp_data),
.csr_io_rsp_ready (csr_io_rsp_ready),
.csr_io_req_valid (per_cluster_csr_io_req_valid[i]),
.csr_io_req_coreid (csr_io_core_id),
.csr_io_req_rw (per_cluster_csr_io_req_rw [i]),
.csr_io_req_addr (per_cluster_csr_io_req_addr[i]),
.csr_io_req_data (per_cluster_csr_io_req_data[i]),
.csr_io_req_ready (per_cluster_csr_io_req_ready[i]),
.busy (busy),
.ebreak (ebreak)
.csr_io_rsp_valid (per_cluster_csr_io_rsp_valid[i]),
.csr_io_rsp_data (per_cluster_csr_io_rsp_data[i]),
.csr_io_rsp_ready (per_cluster_csr_io_rsp_ready[i]),
.busy (per_cluster_busy [i]),
.ebreak (per_cluster_ebreak [i])
);
end
end else begin
VX_databus_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`L2CORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH)
) io_arb (
.clk (clk),
.reset (reset),
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire cluster_dram_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
// input requests
.req_valid_in (per_cluster_io_req_valid),
.req_rw_in (per_cluster_io_req_rw),
.req_byteen_in (per_cluster_io_req_byteen),
.req_addr_in (per_cluster_io_req_addr),
.req_data_in (per_cluster_io_req_data),
.req_tag_in (per_cluster_io_req_tag),
.req_ready_in (per_cluster_io_req_ready),
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_inv;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
// output request
.req_valid_out (io_req_valid),
.req_rw_out (io_req_rw),
.req_byteen_out (io_req_byteen),
.req_addr_out (io_req_addr),
.req_data_out (io_req_data),
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
// input responses
.rsp_valid_in (per_cluster_io_rsp_valid),
.rsp_data_in (per_cluster_io_rsp_data),
.rsp_tag_in (per_cluster_io_rsp_tag),
.rsp_ready_in (per_cluster_io_rsp_ready),
// output response
.rsp_valid_out (io_rsp_valid),
.rsp_tag_out (io_rsp_tag),
.rsp_data_out (io_rsp_data),
.rsp_ready_out (io_rsp_ready)
);
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0] per_cluster_io_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][3:0] per_cluster_io_req_byteen;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][29:0] per_cluster_io_req_addr;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][31:0] per_cluster_io_req_data;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
VX_csr_io_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32),
.ADDR_WIDTH (12)
) csr_io_arb (
.clk (clk),
.reset (reset),
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
.request_id (csr_io_cluster_id),
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
// input requests
.req_valid_in (csr_io_req_valid),
.req_addr_in (csr_io_req_addr),
.req_rw_in (csr_io_req_rw),
.req_data_in (csr_io_req_data),
.req_ready_in (csr_io_req_ready),
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
// output request
.req_valid_out (per_cluster_csr_io_req_valid),
.req_addr_out (per_cluster_csr_io_req_addr),
.req_rw_out (per_cluster_csr_io_req_rw),
.req_data_out (per_cluster_csr_io_req_data),
.req_ready_out (per_cluster_csr_io_req_ready),
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
// input responses
.rsp_valid_in (per_cluster_csr_io_rsp_valid),
.rsp_data_in (per_cluster_csr_io_rsp_data),
.rsp_ready_in (per_cluster_csr_io_rsp_ready),
// output response
.rsp_valid_out (csr_io_rsp_valid),
.rsp_data_out (csr_io_rsp_data),
.rsp_ready_out (csr_io_rsp_ready)
);
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
assign busy = (| per_cluster_busy);
assign ebreak = (| per_cluster_ebreak);
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
VX_cluster #(
.CLUSTER_ID(i)
) cluster (
`SCOPE_BIND_Vortex_cluster(i)
wire snp_fwd_rsp_valid;
wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_inv;
wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
.clk (clk),
.reset (reset),
VX_snp_forwarder #(
.CACHE_ID (`L3CACHE_ID),
.NUM_REQS (`NUM_CLUSTERS),
.SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.TAG_IN_WIDTH (`L3SNP_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L3SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.dram_req_valid (per_cluster_dram_req_valid [i]),
.dram_req_rw (per_cluster_dram_req_rw [i]),
.dram_req_byteen (per_cluster_dram_req_byteen[i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_ready (cluster_dram_req_ready),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_inv (snp_fwd_rsp_inv),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_req_valid (per_cluster_snp_req_valid [i]),
.snp_req_addr (per_cluster_snp_req_addr [i]),
.snp_req_inv (per_cluster_snp_req_inv [i]),
.snp_req_tag (per_cluster_snp_req_tag [i]),
.snp_req_ready (per_cluster_snp_req_ready [i]),
.snp_fwdout_valid (per_cluster_snp_req_valid),
.snp_fwdout_addr (per_cluster_snp_req_addr),
.snp_fwdout_inv (per_cluster_snp_req_inv),
.snp_fwdout_tag (per_cluster_snp_req_tag),
.snp_fwdout_ready (per_cluster_snp_req_ready),
.snp_rsp_valid (per_cluster_snp_rsp_valid [i]),
.snp_rsp_tag (per_cluster_snp_rsp_tag [i]),
.snp_rsp_ready (per_cluster_snp_rsp_ready [i]),
.snp_fwdin_valid (per_cluster_snp_rsp_valid),
.snp_fwdin_tag (per_cluster_snp_rsp_tag),
.snp_fwdin_ready (per_cluster_snp_rsp_ready)
);
.io_req_valid (per_cluster_io_req_valid [i]),
.io_req_rw (per_cluster_io_req_rw [i]),
.io_req_byteen (per_cluster_io_req_byteen [i]),
.io_req_addr (per_cluster_io_req_addr [i]),
.io_req_data (per_cluster_io_req_data [i]),
.io_req_tag (per_cluster_io_req_tag [i]),
.io_req_ready (per_cluster_io_req_ready [i]),
.io_rsp_valid (per_cluster_io_rsp_valid [i]),
.io_rsp_data (per_cluster_io_rsp_data [i]),
.io_rsp_tag (per_cluster_io_rsp_tag [i]),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.csr_io_req_valid (per_cluster_csr_io_req_valid[i]),
.csr_io_req_coreid (per_cluster_csr_io_req_coreid),
.csr_io_req_rw (per_cluster_csr_io_req_rw [i]),
.csr_io_req_addr (per_cluster_csr_io_req_addr[i]),
.csr_io_req_data (per_cluster_csr_io_req_data[i]),
.csr_io_req_ready (per_cluster_csr_io_req_ready[i]),
.csr_io_rsp_valid (per_cluster_csr_io_rsp_valid[i]),
.csr_io_rsp_data (per_cluster_csr_io_rsp_data[i]),
.csr_io_rsp_ready (per_cluster_csr_io_rsp_ready[i]),
.busy (per_cluster_busy [i]),
.ebreak (per_cluster_ebreak [i])
);
end
VX_databus_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`L2CORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH)
) io_arb (
.clk (clk),
.reset (reset),
// input requests
.req_valid_in (per_cluster_io_req_valid),
.req_rw_in (per_cluster_io_req_rw),
.req_byteen_in (per_cluster_io_req_byteen),
.req_addr_in (per_cluster_io_req_addr),
.req_data_in (per_cluster_io_req_data),
.req_tag_in (per_cluster_io_req_tag),
.req_ready_in (per_cluster_io_req_ready),
// output request
.req_valid_out (io_req_valid),
.req_rw_out (io_req_rw),
.req_byteen_out (io_req_byteen),
.req_addr_out (io_req_addr),
.req_data_out (io_req_data),
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input responses
.rsp_valid_in (per_cluster_io_rsp_valid),
.rsp_data_in (per_cluster_io_rsp_data),
.rsp_tag_in (per_cluster_io_rsp_tag),
.rsp_ready_in (per_cluster_io_rsp_ready),
// output response
.rsp_valid_out (io_rsp_valid),
.rsp_tag_out (io_rsp_tag),
.rsp_data_out (io_rsp_data),
.rsp_ready_out (io_rsp_ready)
);
VX_csr_io_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32),
.ADDR_WIDTH (12)
) csr_io_arb (
.clk (clk),
.reset (reset),
.request_id (csr_io_request_id),
// input requests
.req_valid_in (csr_io_req_valid),
.req_addr_in (csr_io_req_addr),
.req_rw_in (csr_io_req_rw),
.req_data_in (csr_io_req_data),
.req_ready_in (csr_io_req_ready),
// output request
.req_valid_out (per_cluster_csr_io_req_valid),
.req_addr_out (per_cluster_csr_io_req_addr),
.req_rw_out (per_cluster_csr_io_req_rw),
.req_data_out (per_cluster_csr_io_req_data),
.req_ready_out (per_cluster_csr_io_req_ready),
// input responses
.rsp_valid_in (per_cluster_csr_io_rsp_valid),
.rsp_data_in (per_cluster_csr_io_rsp_data),
.rsp_ready_in (per_cluster_csr_io_rsp_ready),
// output response
.rsp_valid_out (csr_io_rsp_valid),
.rsp_data_out (csr_io_rsp_data),
.rsp_ready_out (csr_io_rsp_ready)
);
assign busy = (| per_cluster_busy);
assign ebreak = (| per_cluster_ebreak);
// L3 Cache ///////////////////////////////////////////////////////////
if (`L3_ENABLE) begin
wire [`NUM_CLUSTERS-1:0] cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
wire cluster_dram_rsp_ready;
wire snp_fwd_rsp_valid;
wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_inv;
wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
wire cluster_dram_rsp_ready;
reg [`NUM_CLUSTERS-1:0] cluster_dram_rsp_ready_other;
always @(*) begin
cluster_dram_rsp_ready_other = {`NUM_CLUSTERS{1'b1}};
for (integer i = 0; i < `NUM_CLUSTERS; i++) begin
@@ -354,39 +325,10 @@ module Vortex (
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
VX_snp_forwarder #(
.CACHE_ID (`L3CACHE_ID),
.NUM_REQS (`NUM_CLUSTERS),
.SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNRQ_SIZE (`L3SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_inv (snp_fwd_rsp_inv),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (per_cluster_snp_req_valid),
.snp_fwdout_addr (per_cluster_snp_req_addr),
.snp_fwdout_inv (per_cluster_snp_req_inv),
.snp_fwdout_tag (per_cluster_snp_req_tag),
.snp_fwdout_ready (per_cluster_snp_req_ready),
.snp_fwdin_valid (per_cluster_snp_rsp_valid),
.snp_fwdin_tag (per_cluster_snp_rsp_tag),
.snp_fwdin_ready (per_cluster_snp_rsp_ready)
);
wire cluster_dram_req_ready;
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
assign per_cluster_dram_req_ready[i] = cluster_dram_req_ready;
end
VX_cache #(
.CACHE_ID (`L3CACHE_ID),
@@ -460,6 +402,56 @@ module Vortex (
// Miss status
`UNUSED_PIN (miss_vec)
);
end else begin
VX_mem_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH)
) dram_arb (
.clk (clk),
.reset (reset),
// Core request
.req_valid_in (per_cluster_dram_req_valid),
.req_rw_in (per_cluster_dram_req_rw),
.req_byteen_in (per_cluster_dram_req_byteen),
.req_addr_in (per_cluster_dram_req_addr),
.req_data_in (per_cluster_dram_req_data),
.req_tag_in (per_cluster_dram_req_tag),
.req_ready_in (per_cluster_dram_req_ready),
// DRAM request
.req_valid_out (dram_req_valid),
.req_rw_out (dram_req_rw),
.req_byteen_out (dram_req_byteen),
.req_addr_out (dram_req_addr),
.req_data_out (dram_req_data),
.req_tag_out (dram_req_tag),
.req_ready_out (dram_req_ready),
// Core response
.rsp_valid_out (per_cluster_dram_rsp_valid),
.rsp_data_out (per_cluster_dram_rsp_data),
.rsp_tag_out (per_cluster_dram_rsp_tag),
.rsp_ready_out (per_cluster_dram_rsp_ready),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
.rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
.rsp_ready_in (dram_rsp_ready)
);
`UNUSED_VAR (snp_fwd_rsp_addr)
`UNUSED_VAR (snp_fwd_rsp_inv)
assign snp_rsp_valid = snp_fwd_rsp_valid;
assign snp_rsp_tag = snp_fwd_rsp_tag;
assign snp_fwd_rsp_ready = snp_rsp_ready;
end
`SCOPE_ASSIGN (reset, reset);

View File

@@ -150,7 +150,7 @@ module VX_cache #(
.per_bank_ready (per_bank_core_req_ready)
);
assign dram_req_tag = dram_req_addr;
assign dram_req_tag = dram_req_addr;
if (NUM_BANKS == 1) begin
assign dram_rsp_ready = per_bank_dram_rsp_ready;
end else begin

View File

@@ -5,9 +5,9 @@ module VX_snp_forwarder #(
parameter SRC_ADDR_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter NUM_REQS = 1,
parameter SNP_TAG_WIDTH = 1,
parameter SNRQ_SIZE = 1,
parameter LOG_SNRQ_SIZE = `LOG2UP(SNRQ_SIZE)
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = `LOG2UP(SNRQ_SIZE)
) (
input wire clk,
input wire reset,
@@ -16,185 +16,204 @@ module VX_snp_forwarder #(
input wire snp_req_valid,
input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_inv,
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
input wire [TAG_IN_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire snp_rsp_inv,
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
output wire [TAG_IN_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_REQS-1:0] snp_fwdout_valid,
output wire [NUM_REQS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQS-1:0] snp_fwdout_inv,
output wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdout_tag,
output wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdout_tag,
input wire [NUM_REQS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_REQS-1:0] snp_fwdin_valid,
input wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag,
input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag,
output wire [NUM_REQS-1:0] snp_fwdin_ready
);
localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH;
localparam NUM_REQUESTS_QUAL = NUM_REQS * (1 << ADDR_DIFF);
localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL);
`STATIC_ASSERT(NUM_REQS > 1, ("invalid value"))
if (NUM_REQS > 1) begin
// Inputs buffering
wire [NUM_REQS-1:0] snp_fwdin_valid_qual;
wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag_qual;
wire [NUM_REQS-1:0] snp_fwdin_ready_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (LOG_SNRQ_SIZE),
.PASSTHRU (NUM_REQS < 4)
) snp_fwdin_buffer (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid[i]),
.data_in (snp_fwdin_tag[i]),
.ready_in (snp_fwdin_ready[i]),
.valid_out (snp_fwdin_valid_qual[i]),
.data_out (snp_fwdin_tag_qual[i]),
.ready_out (snp_fwdin_ready_qual[i])
);
end
// Inputs buffering
wire [NUM_REQS-1:0] snp_fwdin_valid_qual;
wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag_qual;
wire [NUM_REQS-1:0] snp_fwdin_ready_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (TAG_OUT_WIDTH),
.PASSTHRU (NUM_REQS < 4)
) snp_fwdin_buffer (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid[i]),
.data_in (snp_fwdin_tag[i]),
.ready_in (snp_fwdin_ready[i]),
.valid_out (snp_fwdin_valid_qual[i]),
.data_out (snp_fwdin_tag_qual[i]),
.ready_out (snp_fwdin_ready_qual[i])
);
end
reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [LOG_SNRQ_SIZE-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_full;
reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_full;
wire [LOG_SNRQ_SIZE-1:0] fwdin_tag;
wire fwdin_valid;
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
wire fwdin_fire = fwdin_valid && fwdin_ready;
wire [TAG_OUT_WIDTH-1:0] fwdin_tag;
wire fwdin_valid;
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
wire fwdin_fire = fwdin_valid && fwdin_ready;
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
assign sfq_read_addr = fwdin_tag;
wire sfq_acquire = snp_req_valid && snp_req_ready;
wire sfq_release = snp_rsp_valid && snp_rsp_ready;
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
assign sfq_read_addr = fwdin_tag;
wire sfq_acquire = snp_req_valid && snp_req_ready;
wire sfq_release = snp_rsp_valid && snp_rsp_ready;
VX_cam_buffer #(
.DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.SIZE (SNRQ_SIZE)
) req_metadata_buf (
.clk (clk),
.reset (reset),
.write_addr (sfq_write_addr),
.acquire_slot (sfq_acquire),
.read_addr (sfq_read_addr),
.write_data ({snp_req_addr, snp_req_inv, snp_req_tag}),
.read_data ({snp_rsp_addr, snp_rsp_inv, snp_rsp_tag}),
.release_addr (sfq_read_addr),
.release_slot (sfq_release),
.full (sfq_full)
);
wire fwdout_valid;
wire [LOG_SNRQ_SIZE-1:0] fwdout_tag;
wire [DST_ADDR_WIDTH-1:0] fwdout_addr;
wire fwdout_inv;
wire fwdout_ready;
wire dispatch_hold;
VX_cam_buffer #(
.DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH),
.SIZE (SNRQ_SIZE)
) req_metadata_buf (
.clk (clk),
.reset (reset),
.write_addr (sfq_write_addr),
.acquire_slot (sfq_acquire),
.read_addr (sfq_read_addr),
.write_data ({snp_req_addr, snp_req_inv, snp_req_tag}),
.read_data ({snp_rsp_addr, snp_rsp_inv, snp_rsp_tag}),
.release_addr (sfq_read_addr),
.release_slot (sfq_release),
.full (sfq_full)
);
wire fwdout_valid;
wire [TAG_OUT_WIDTH-1:0] fwdout_tag;
wire [DST_ADDR_WIDTH-1:0] fwdout_addr;
wire fwdout_inv;
wire fwdout_ready;
wire dispatch_hold;
if (ADDR_DIFF != 0) begin
reg [LOG_SNRQ_SIZE-1:0] fwdout_tag_r;
reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r;
reg fwdout_inv_r;
reg dispatch_hold_r;
if (ADDR_DIFF != 0) begin
reg [TAG_OUT_WIDTH-1:0] fwdout_tag_r;
reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r;
reg fwdout_inv_r;
reg dispatch_hold_r;
always @(posedge clk) begin
if (reset) begin
dispatch_hold_r <= 0;
end else begin
if (snp_req_valid && snp_req_ready) begin
dispatch_hold_r <= 1;
always @(posedge clk) begin
if (reset) begin
dispatch_hold_r <= 0;
end else begin
if (snp_req_valid && snp_req_ready) begin
dispatch_hold_r <= 1;
end
if (dispatch_hold_r
&& fwdout_ready
&& (fwdout_addr[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1))) begin
dispatch_hold_r <= 0;
end
end
if (dispatch_hold_r
&& fwdout_ready
&& (fwdout_addr[ADDR_DIFF-1:0] == ((1 << ADDR_DIFF)-1))) begin
dispatch_hold_r <= 0;
end
end
if (fwdout_valid && fwdout_ready) begin
fwdout_addr_r <= fwdout_addr + DST_ADDR_WIDTH'(1'b1);
end
if (fwdout_valid && fwdout_ready) begin
fwdout_addr_r <= fwdout_addr + DST_ADDR_WIDTH'(1'b1);
if (snp_req_valid && snp_req_ready) begin
fwdout_inv_r <= snp_req_inv;
fwdout_tag_r <= sfq_write_addr;
end
end
assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full);
assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr;
assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv;
assign dispatch_hold= dispatch_hold_r;
end else begin
assign fwdout_valid = snp_req_valid && !sfq_full;
assign fwdout_tag = sfq_write_addr;
assign fwdout_addr = snp_req_addr;
assign fwdout_inv = snp_req_inv;
assign dispatch_hold= 1'b0;
end
if (snp_req_valid && snp_req_ready) begin
fwdout_inv_r <= snp_req_inv;
fwdout_tag_r <= sfq_write_addr;
always @(posedge clk) begin
if (sfq_acquire) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL;
end
if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
end
end
assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full);
assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr;
assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv;
assign dispatch_hold= dispatch_hold_r;
end else begin
assign fwdout_valid = snp_req_valid && !sfq_full;
assign fwdout_tag = sfq_write_addr;
assign fwdout_addr = snp_req_addr;
assign fwdout_inv = snp_req_inv;
assign dispatch_hold= 1'b0;
end
always @(posedge clk) begin
if (sfq_acquire) begin
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL;
end
if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
reg [NUM_REQS-1:0] snp_fwdout_ready_other;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i];
assign snp_fwdout_addr[i] = fwdout_addr;
assign snp_fwdout_inv[i] = fwdout_inv;
assign snp_fwdout_tag[i] = fwdout_tag;
end
end
reg [NUM_REQS-1:0] snp_fwdout_ready_other;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i];
assign snp_fwdout_addr[i] = fwdout_addr;
assign snp_fwdout_inv[i] = fwdout_inv;
assign snp_fwdout_tag[i] = fwdout_tag;
end
always @(*) begin
snp_fwdout_ready_other = {NUM_REQS{1'b1}};
for (integer i = 0; i < NUM_REQS; i++) begin
for (integer j = 0; j < NUM_REQS; j++) begin
if (i != j)
snp_fwdout_ready_other[i] &= snp_fwdout_ready[j];
always @(*) begin
snp_fwdout_ready_other = {NUM_REQS{1'b1}};
for (integer i = 0; i < NUM_REQS; i++) begin
for (integer j = 0; j < NUM_REQS; j++) begin
if (i != j)
snp_fwdout_ready_other[i] &= snp_fwdout_ready[j];
end
end
end
assign fwdout_ready = (& snp_fwdout_ready);
assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold;
VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.DATAW(TAG_OUT_WIDTH),
.BUFFERED(NUM_REQS >= 4)
) snp_fwdin_arb (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid_qual),
.data_in (snp_fwdin_tag_qual),
.ready_in (snp_fwdin_ready_qual),
.valid_out (fwdin_valid),
.data_out (fwdin_tag),
.ready_out (fwdin_ready)
);
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign snp_fwdout_valid = snp_req_valid;
assign snp_fwdout_addr = snp_req_addr;
assign snp_fwdout_inv = snp_req_inv;
assign snp_fwdout_tag = snp_req_tag;
assign snp_req_ready = snp_fwdout_ready;
assign snp_rsp_valid = snp_fwdin_valid;
assign snp_rsp_addr = snp_req_addr;
assign snp_rsp_inv = snp_req_inv;
assign snp_rsp_tag = snp_fwdin_tag;
assign snp_fwdin_ready = snp_rsp_ready;
end
assign fwdout_ready = (& snp_fwdout_ready);
assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold;
VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.DATAW(LOG_SNRQ_SIZE),
.BUFFERED(NUM_REQS >= 4)
) snp_fwdin_arb (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid_qual),
.data_in (snp_fwdin_tag_qual),
.ready_in (snp_fwdin_ready_qual),
.valid_out (fwdin_valid),
.data_out (fwdin_tag),
.ready_out (fwdin_ready)
);
`ifdef DBG_PRINT_CACHE_SNP
always @(posedge clk) begin
if (snp_req_valid && snp_req_ready) begin

View File

@@ -66,8 +66,7 @@ module VX_generic_queue #(
empty_r <= 0;
if (used_r == ADDRW'(SIZE-1)) begin
full_r <= 1;
end
used_r <= used_r + ADDRW'(1);
end
end
end
if (pop) begin
@@ -76,10 +75,10 @@ module VX_generic_queue #(
full_r <= 0;
if (used_r == ADDRW'(1)) begin
empty_r <= 1;
end;
used_r <= used_r - ADDRW'(1);
end;
end
end
used_r <= used_r + ADDRW'(push) - ADDRW'(pop);
end
end
@@ -96,12 +95,8 @@ module VX_generic_queue #(
rd_ptr_r <= 0;
wr_ptr_r <= 0;
end else begin
if (push) begin
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
end
if (pop) begin
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
end
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(push);
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(pop);
end
end