refactoring cores clustering

This commit is contained in:
Blaise Tine
2020-12-06 14:42:12 -08:00
parent b2652527bb
commit 1332970636
9 changed files with 504 additions and 615 deletions

View File

@@ -6,7 +6,7 @@ set -e
show_usage()
{
echo "Vortex BlackBox Test Driver v1.0"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
}
DRIVER=vlsim
@@ -16,6 +16,7 @@ CORES=2
WARPS=4
THREADS=4
L2=0
L3=0
DEBUG=0
SCOPE=0
HAS_ARGS=0
@@ -51,6 +52,10 @@ case $i in
L2=1
shift
;;
--l3cache)
L3=1
shift
;;
--debug)
DEBUG=1
shift
@@ -112,7 +117,7 @@ case $APP in
;;
esac
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2"
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3"
echo "CONFIGS=$CONFIGS"

View File

@@ -71,7 +71,8 @@ module VX_avs_wrapper #(
VX_generic_queue #(
.DATAW (REQ_TAGW),
.SIZE (RD_QUEUE_SIZE)
.SIZE (RD_QUEUE_SIZE),
.BUFFERED (1)
) rd_req_queue (
.clk (clk),
.reset (reset),
@@ -86,7 +87,8 @@ module VX_avs_wrapper #(
VX_generic_queue #(
.DATAW (AVS_DATAW),
.SIZE (RD_QUEUE_SIZE)
.SIZE (RD_QUEUE_SIZE),
.BUFFERED (1)
) rd_rsp_queue (
.clk (clk),
.reset (reset),

View File

@@ -255,51 +255,55 @@ module VX_cluster #(
assign busy = (| per_core_busy);
assign ebreak = (| per_core_ebreak);
if (`L2_ENABLE) begin
// L2 Cache ///////////////////////////////////////////////////////////
wire [`NUM_CORES-1:0] core_dram_req_valid;
wire [`NUM_CORES-1:0] core_dram_req_rw;
wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen;
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_req_tag;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data;
wire core_dram_req_ready;
wire [`NUM_CORES-1:0] core_dram_rsp_valid;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
wire core_dram_rsp_ready;
wire [`NUM_CORES-1:0] core_snp_fwdout_valid;
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr;
wire [`NUM_CORES-1:0] core_snp_fwdout_inv;
wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag;
wire [`NUM_CORES-1:0] core_snp_fwdout_ready;
wire [`NUM_CORES-1:0] core_snp_fwdin_valid;
wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
wire [`NUM_CORES-1:0] core_snp_fwdin_ready;
wire snp_fwd_rsp_valid;
wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_inv;
wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign core_dram_req_valid [i] = per_core_dram_req_valid [i];
assign core_dram_req_rw [i] = per_core_dram_req_rw [i];
assign core_dram_req_byteen [i] = per_core_dram_req_byteen [i];
assign core_dram_req_addr [i] = per_core_dram_req_addr [i];
assign core_dram_req_data [i] = per_core_dram_req_data [i];
assign core_dram_req_tag [i] = per_core_dram_req_tag [i];
assign per_core_dram_req_ready [i] = core_dram_req_ready;
end
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE),
.TAG_IN_WIDTH (`L2SNP_TAG_WIDTH),
.TAG_OUT_WIDTH (`DSNP_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_inv (snp_fwd_rsp_inv),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (per_core_snp_req_valid),
.snp_fwdout_addr (per_core_snp_req_addr),
.snp_fwdout_inv (per_core_snp_req_inv),
.snp_fwdout_tag (per_core_snp_req_tag),
.snp_fwdout_ready (per_core_snp_req_ready),
.snp_fwdin_valid (per_core_snp_rsp_valid),
.snp_fwdin_tag (per_core_snp_rsp_tag),
.snp_fwdin_ready (per_core_snp_rsp_ready)
);
if (`L2_ENABLE) begin
wire [`NUM_CORES-1:0] core_dram_rsp_valid;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
wire core_dram_rsp_ready;
reg [`NUM_CORES-1:0] core_dram_rsp_ready_other;
always @(*) begin
core_dram_rsp_ready_other = {`NUM_CORES{1'b1}};
for (integer i = 0; i < `NUM_CORES; i++) begin
@@ -318,52 +322,11 @@ module VX_cluster #(
end
assign core_dram_rsp_ready = & (per_core_dram_rsp_ready | ~core_dram_rsp_valid);
wire core_dram_req_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign per_core_snp_req_valid [i] = core_snp_fwdout_valid [i];
assign per_core_snp_req_addr [i] = core_snp_fwdout_addr [i];
assign per_core_snp_req_inv [i] = core_snp_fwdout_inv [i];
assign per_core_snp_req_tag [i] = core_snp_fwdout_tag [i];
assign core_snp_fwdout_ready [i] = per_core_snp_req_ready[i];
assign core_snp_fwdin_valid [i] = per_core_snp_rsp_valid [i];
assign core_snp_fwdin_tag [i] = per_core_snp_rsp_tag [i];
assign per_core_snp_rsp_ready [i] = core_snp_fwdin_ready [i];
assign per_core_dram_req_ready[i] = core_dram_req_ready;
end
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_fwd_rsp_valid),
.snp_rsp_addr (snp_fwd_rsp_addr),
.snp_rsp_inv (snp_fwd_rsp_inv),
.snp_rsp_tag (snp_fwd_rsp_tag),
.snp_rsp_ready (snp_fwd_rsp_ready),
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_inv (core_snp_fwdout_inv),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready)
);
VX_cache #(
.CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE),
@@ -392,12 +355,12 @@ module VX_cluster #(
.reset (reset),
// Core request
.core_req_valid (core_dram_req_valid),
.core_req_rw (core_dram_req_rw),
.core_req_byteen (core_dram_req_byteen),
.core_req_addr (core_dram_req_addr),
.core_req_data (core_dram_req_data),
.core_req_tag (core_dram_req_tag),
.core_req_valid (per_core_dram_req_valid),
.core_req_rw (per_core_dram_req_rw),
.core_req_byteen (per_core_dram_req_byteen),
.core_req_addr (per_core_dram_req_addr),
.core_req_data (per_core_dram_req_data),
.core_req_tag (per_core_dram_req_tag),
.core_req_ready (core_dram_req_ready),
// Core response
@@ -439,100 +402,6 @@ module VX_cluster #(
end else begin
wire[`NUM_CORES-1:0] core_dram_req_valid;
wire[`NUM_CORES-1:0] core_dram_req_rw;
wire[`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr;
wire[`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_req_tag;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data;
wire[`NUM_CORES-1:0] core_dram_req_ready;
wire[`NUM_CORES-1:0] core_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
wire[`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
wire[`NUM_CORES-1:0] core_dram_rsp_ready;
wire[`NUM_CORES-1:0] core_snp_fwdout_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr;
wire[`NUM_CORES-1:0] core_snp_fwdout_inv;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag;
wire[`NUM_CORES-1:0] core_snp_fwdout_ready;
wire[`NUM_CORES-1:0] core_snp_fwdin_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign core_dram_req_valid [i] = per_core_dram_req_valid [i];
assign core_dram_req_rw [i] = per_core_dram_req_rw [i];
assign core_dram_req_byteen [i] = per_core_dram_req_byteen [i];
assign core_dram_req_addr [i] = per_core_dram_req_addr [i];
assign core_dram_req_data [i] = per_core_dram_req_data [i];
assign core_dram_req_tag [i] = per_core_dram_req_tag [i];
assign per_core_dram_req_ready [i] = core_dram_req_ready [i];
assign per_core_dram_rsp_valid [i] = core_dram_rsp_valid [i];
assign per_core_dram_rsp_data [i] = core_dram_rsp_data [i];
assign per_core_dram_rsp_tag [i] = core_dram_rsp_tag [i];
assign core_dram_rsp_ready [i] = per_core_dram_rsp_ready [i];
assign per_core_snp_req_valid [i] = core_snp_fwdout_valid [i];
assign per_core_snp_req_addr [i] = core_snp_fwdout_addr [i];
assign per_core_snp_req_inv [i] = core_snp_fwdout_inv [i];
assign per_core_snp_req_tag [i] = core_snp_fwdout_tag [i];
assign core_snp_fwdout_ready [i] = per_core_snp_req_ready [i];
assign core_snp_fwdin_valid [i] = per_core_snp_rsp_valid [i];
assign core_snp_fwdin_tag [i] = per_core_snp_rsp_tag [i];
assign per_core_snp_rsp_ready [i] = core_snp_fwdin_ready [i];
end
if (`NUM_CORES > 1) begin
VX_snp_forwarder #(
.CACHE_ID (`L2CACHE_ID),
.NUM_REQS (`NUM_CORES),
.SRC_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L2SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_addr),
`UNUSED_PIN (snp_rsp_inv),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.snp_fwdout_valid (core_snp_fwdout_valid),
.snp_fwdout_addr (core_snp_fwdout_addr),
.snp_fwdout_inv (core_snp_fwdout_inv),
.snp_fwdout_tag (core_snp_fwdout_tag),
.snp_fwdout_ready (core_snp_fwdout_ready),
.snp_fwdin_valid (core_snp_fwdin_valid),
.snp_fwdin_tag (core_snp_fwdin_tag),
.snp_fwdin_ready (core_snp_fwdin_ready)
);
end else begin
assign core_snp_fwdout_valid= snp_req_valid;
assign core_snp_fwdout_addr = snp_req_addr;
assign core_snp_fwdout_inv = snp_req_inv;
assign core_snp_fwdout_tag = snp_req_tag;
assign snp_req_ready = core_snp_fwdout_ready;
assign snp_rsp_valid = core_snp_fwdin_valid;
assign snp_rsp_tag = core_snp_fwdin_tag;
assign core_snp_fwdin_ready = snp_rsp_ready;
end
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
@@ -543,13 +412,13 @@ module VX_cluster #(
.reset (reset),
// Core request
.req_valid_in (core_dram_req_valid),
.req_rw_in (core_dram_req_rw),
.req_byteen_in (core_dram_req_byteen),
.req_addr_in (core_dram_req_addr),
.req_data_in (core_dram_req_data),
.req_tag_in (core_dram_req_tag),
.req_ready_in (core_dram_req_ready),
.req_valid_in (per_core_dram_req_valid),
.req_rw_in (per_core_dram_req_rw),
.req_byteen_in (per_core_dram_req_byteen),
.req_addr_in (per_core_dram_req_addr),
.req_data_in (per_core_dram_req_data),
.req_tag_in (per_core_dram_req_tag),
.req_ready_in (per_core_dram_req_ready),
// DRAM request
.req_valid_out (dram_req_valid),
@@ -561,10 +430,10 @@ module VX_cluster #(
.req_ready_out (dram_req_ready),
// Core response
.rsp_valid_out (core_dram_rsp_valid),
.rsp_data_out (core_dram_rsp_data),
.rsp_tag_out (core_dram_rsp_tag),
.rsp_ready_out (core_dram_rsp_ready),
.rsp_valid_out (per_core_dram_rsp_valid),
.rsp_data_out (per_core_dram_rsp_data),
.rsp_tag_out (per_core_dram_rsp_tag),
.rsp_ready_out (per_core_dram_rsp_ready),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
@@ -573,6 +442,13 @@ module VX_cluster #(
.rsp_ready_in (dram_rsp_ready)
);
`UNUSED_VAR (snp_fwd_rsp_addr)
`UNUSED_VAR (snp_fwd_rsp_inv)
assign snp_rsp_valid = snp_fwd_rsp_valid;
assign snp_rsp_tag = snp_fwd_rsp_tag;
assign snp_fwd_rsp_ready = snp_rsp_ready;
end
endmodule

View File

@@ -24,11 +24,11 @@
`endif
`ifndef L2_ENABLE
`define L2_ENABLE (`NUM_CORES > 2)
`define L2_ENABLE (`NUM_CORES >= 4)
`endif
`ifndef L3_ENABLE
`define L3_ENABLE (`NUM_CLUSTERS > 1)
`define L3_ENABLE (`NUM_CLUSTERS >= 4)
`endif
`ifndef GLOBAL_BLOCK_SIZE

View File

@@ -366,7 +366,7 @@
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES)))
// Snoop request tag bits
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
////////////////////////// L3cache Configurable Knobs /////////////////////////
@@ -392,10 +392,10 @@
`define L3DRAM_BYTEEN_WIDTH `L3BANK_LINE_SIZE
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS)))
// Snoop request tag bits
`define L3SNP_TAG_WIDTH 16
`define L3SNP_TAG_WIDTH `VX_SNP_TAG_WIDTH
///////////////////////////////////////////////////////////////////////////////
@@ -403,7 +403,7 @@
`define VX_DRAM_ADDR_WIDTH `L3DRAM_ADDR_WIDTH
`define VX_DRAM_LINE_WIDTH `L3DRAM_LINE_WIDTH
`define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH
`define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH
`define VX_SNP_TAG_WIDTH 16
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)

View File

@@ -66,76 +66,13 @@ module Vortex (
output wire busy,
output wire ebreak
);
if (`NUM_CLUSTERS == 1) begin
VX_cluster #(
.CLUSTER_ID(0)
) cluster (
`SCOPE_BIND_Vortex_cluster(0)
.clk (clk),
.reset (reset),
.dram_req_valid (dram_req_valid),
.dram_req_rw (dram_req_rw),
.dram_req_byteen (dram_req_byteen),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_inv (snp_req_inv),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.io_req_valid (io_req_valid),
.io_req_rw (io_req_rw),
.io_req_byteen (io_req_byteen),
.io_req_addr (io_req_addr),
.io_req_data (io_req_data),
.io_req_tag (io_req_tag),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (io_rsp_ready),
.csr_io_req_valid (csr_io_req_valid),
.csr_io_req_coreid (csr_io_req_coreid),
.csr_io_req_rw (csr_io_req_rw),
.csr_io_req_addr (csr_io_req_addr),
.csr_io_req_data (csr_io_req_data),
.csr_io_req_ready (csr_io_req_ready),
.csr_io_rsp_valid (csr_io_rsp_valid),
.csr_io_rsp_data (csr_io_rsp_data),
.csr_io_rsp_ready (csr_io_rsp_ready),
.busy (busy),
.ebreak (ebreak)
);
end else begin
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire cluster_dram_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
@@ -178,8 +115,8 @@ module Vortex (
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_io_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CORES));
wire [`NC_BITS-1:0] csr_io_core_id = `NC_BITS'(csr_io_req_coreid);
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
VX_cluster #(
@@ -196,7 +133,7 @@ module Vortex (
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_ready (cluster_dram_req_ready),
.dram_req_ready (per_cluster_dram_req_ready [i]),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
@@ -227,7 +164,7 @@ module Vortex (
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.csr_io_req_valid (per_cluster_csr_io_req_valid[i]),
.csr_io_req_coreid (per_cluster_csr_io_req_coreid),
.csr_io_req_coreid (csr_io_core_id),
.csr_io_req_rw (per_cluster_csr_io_req_rw [i]),
.csr_io_req_addr (per_cluster_csr_io_req_addr[i]),
.csr_io_req_data (per_cluster_csr_io_req_data[i]),
@@ -290,7 +227,7 @@ module Vortex (
.clk (clk),
.reset (reset),
.request_id (csr_io_request_id),
.request_id (csr_io_cluster_id),
// input requests
.req_valid_in (csr_io_req_valid),
@@ -320,46 +257,19 @@ module Vortex (
assign busy = (| per_cluster_busy);
assign ebreak = (| per_cluster_ebreak);
// L3 Cache ///////////////////////////////////////////////////////////
wire [`NUM_CLUSTERS-1:0] cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
wire cluster_dram_rsp_ready;
wire snp_fwd_rsp_valid;
wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr;
wire snp_fwd_rsp_inv;
wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag;
wire snp_fwd_rsp_ready;
reg [`NUM_CLUSTERS-1:0] cluster_dram_rsp_ready_other;
always @(*) begin
cluster_dram_rsp_ready_other = {`NUM_CLUSTERS{1'b1}};
for (integer i = 0; i < `NUM_CLUSTERS; i++) begin
for (integer j = 0; j < `NUM_CLUSTERS; j++) begin
if (i != j) begin
cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]);
end
end
end
end
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
VX_snp_forwarder #(
.CACHE_ID (`L3CACHE_ID),
.NUM_REQS (`NUM_CLUSTERS),
.SRC_ADDR_WIDTH (`L3DRAM_ADDR_WIDTH),
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.TAG_IN_WIDTH (`L3SNP_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2SNP_TAG_WIDTH),
.SNRQ_SIZE (`L3SNRQ_SIZE)
) snp_forwarder (
.clk (clk),
@@ -388,6 +298,38 @@ module Vortex (
.snp_fwdin_ready (per_cluster_snp_rsp_ready)
);
if (`L3_ENABLE) begin
wire [`NUM_CLUSTERS-1:0] cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
wire cluster_dram_rsp_ready;
reg [`NUM_CLUSTERS-1:0] cluster_dram_rsp_ready_other;
always @(*) begin
cluster_dram_rsp_ready_other = {`NUM_CLUSTERS{1'b1}};
for (integer i = 0; i < `NUM_CLUSTERS; i++) begin
for (integer j = 0; j < `NUM_CLUSTERS; j++) begin
if (i != j) begin
cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]);
end
end
end
end
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
wire cluster_dram_req_ready;
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
assign per_cluster_dram_req_ready[i] = cluster_dram_req_ready;
end
VX_cache #(
.CACHE_ID (`L3CACHE_ID),
.CACHE_SIZE (`L3CACHE_SIZE),
@@ -460,6 +402,56 @@ module Vortex (
// Miss status
`UNUSED_PIN (miss_vec)
);
end else begin
VX_mem_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH)
) dram_arb (
.clk (clk),
.reset (reset),
// Core request
.req_valid_in (per_cluster_dram_req_valid),
.req_rw_in (per_cluster_dram_req_rw),
.req_byteen_in (per_cluster_dram_req_byteen),
.req_addr_in (per_cluster_dram_req_addr),
.req_data_in (per_cluster_dram_req_data),
.req_tag_in (per_cluster_dram_req_tag),
.req_ready_in (per_cluster_dram_req_ready),
// DRAM request
.req_valid_out (dram_req_valid),
.req_rw_out (dram_req_rw),
.req_byteen_out (dram_req_byteen),
.req_addr_out (dram_req_addr),
.req_data_out (dram_req_data),
.req_tag_out (dram_req_tag),
.req_ready_out (dram_req_ready),
// Core response
.rsp_valid_out (per_cluster_dram_rsp_valid),
.rsp_data_out (per_cluster_dram_rsp_data),
.rsp_tag_out (per_cluster_dram_rsp_tag),
.rsp_ready_out (per_cluster_dram_rsp_ready),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
.rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
.rsp_ready_in (dram_rsp_ready)
);
`UNUSED_VAR (snp_fwd_rsp_addr)
`UNUSED_VAR (snp_fwd_rsp_inv)
assign snp_rsp_valid = snp_fwd_rsp_valid;
assign snp_rsp_tag = snp_fwd_rsp_tag;
assign snp_fwd_rsp_ready = snp_rsp_ready;
end
`SCOPE_ASSIGN (reset, reset);

View File

@@ -5,9 +5,9 @@ module VX_snp_forwarder #(
parameter SRC_ADDR_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter NUM_REQS = 1,
parameter SNP_TAG_WIDTH = 1,
parameter SNRQ_SIZE = 1,
parameter LOG_SNRQ_SIZE = `LOG2UP(SNRQ_SIZE)
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = `LOG2UP(SNRQ_SIZE)
) (
input wire clk,
input wire reset,
@@ -16,41 +16,41 @@ module VX_snp_forwarder #(
input wire snp_req_valid,
input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_inv,
input wire [SNP_TAG_WIDTH-1:0] snp_req_tag,
input wire [TAG_IN_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire snp_rsp_inv,
output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag,
output wire [TAG_IN_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_REQS-1:0] snp_fwdout_valid,
output wire [NUM_REQS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQS-1:0] snp_fwdout_inv,
output wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdout_tag,
output wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdout_tag,
input wire [NUM_REQS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_REQS-1:0] snp_fwdin_valid,
input wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag,
input wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag,
output wire [NUM_REQS-1:0] snp_fwdin_ready
);
localparam ADDR_DIFF = DST_ADDR_WIDTH - SRC_ADDR_WIDTH;
localparam NUM_REQUESTS_QUAL = NUM_REQS * (1 << ADDR_DIFF);
localparam REQ_QUAL_BITS = `LOG2UP(NUM_REQUESTS_QUAL);
`STATIC_ASSERT(NUM_REQS > 1, ("invalid value"))
if (NUM_REQS > 1) begin
// Inputs buffering
wire [NUM_REQS-1:0] snp_fwdin_valid_qual;
wire [NUM_REQS-1:0][LOG_SNRQ_SIZE-1:0] snp_fwdin_tag_qual;
wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag_qual;
wire [NUM_REQS-1:0] snp_fwdin_ready_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (LOG_SNRQ_SIZE),
.DATAW (TAG_OUT_WIDTH),
.PASSTHRU (NUM_REQS < 4)
) snp_fwdin_buffer (
.clk (clk),
@@ -66,10 +66,10 @@ module VX_snp_forwarder #(
reg [REQ_QUAL_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
wire [LOG_SNRQ_SIZE-1:0] sfq_write_addr, sfq_read_addr;
wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr;
wire sfq_full;
wire [LOG_SNRQ_SIZE-1:0] fwdin_tag;
wire [TAG_OUT_WIDTH-1:0] fwdin_tag;
wire fwdin_valid;
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
@@ -83,7 +83,7 @@ module VX_snp_forwarder #(
wire sfq_release = snp_rsp_valid && snp_rsp_ready;
VX_cam_buffer #(
.DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH),
.SIZE (SNRQ_SIZE)
) req_metadata_buf (
.clk (clk),
@@ -99,14 +99,14 @@ module VX_snp_forwarder #(
);
wire fwdout_valid;
wire [LOG_SNRQ_SIZE-1:0] fwdout_tag;
wire [TAG_OUT_WIDTH-1:0] fwdout_tag;
wire [DST_ADDR_WIDTH-1:0] fwdout_addr;
wire fwdout_inv;
wire fwdout_ready;
wire dispatch_hold;
if (ADDR_DIFF != 0) begin
reg [LOG_SNRQ_SIZE-1:0] fwdout_tag_r;
reg [TAG_OUT_WIDTH-1:0] fwdout_tag_r;
reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r;
reg fwdout_inv_r;
reg dispatch_hold_r;
@@ -182,7 +182,7 @@ module VX_snp_forwarder #(
VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.DATAW(LOG_SNRQ_SIZE),
.DATAW(TAG_OUT_WIDTH),
.BUFFERED(NUM_REQS >= 4)
) snp_fwdin_arb (
.clk (clk),
@@ -195,6 +195,25 @@ module VX_snp_forwarder #(
.ready_out (fwdin_ready)
);
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign snp_fwdout_valid = snp_req_valid;
assign snp_fwdout_addr = snp_req_addr;
assign snp_fwdout_inv = snp_req_inv;
assign snp_fwdout_tag = snp_req_tag;
assign snp_req_ready = snp_fwdout_ready;
assign snp_rsp_valid = snp_fwdin_valid;
assign snp_rsp_addr = snp_req_addr;
assign snp_rsp_inv = snp_req_inv;
assign snp_rsp_tag = snp_fwdin_tag;
assign snp_fwdin_ready = snp_rsp_ready;
end
`ifdef DBG_PRINT_CACHE_SNP
always @(posedge clk) begin
if (snp_req_valid && snp_req_ready) begin

View File

@@ -67,7 +67,6 @@ module VX_generic_queue #(
if (used_r == ADDRW'(SIZE-1)) begin
full_r <= 1;
end
used_r <= used_r + ADDRW'(1);
end
end
if (pop) begin
@@ -77,9 +76,9 @@ module VX_generic_queue #(
if (used_r == ADDRW'(1)) begin
empty_r <= 1;
end;
used_r <= used_r - ADDRW'(1);
end
end
used_r <= used_r + ADDRW'(push) - ADDRW'(pop);
end
end
@@ -96,12 +95,8 @@ module VX_generic_queue #(
rd_ptr_r <= 0;
wr_ptr_r <= 0;
end else begin
if (push) begin
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
end
if (pop) begin
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
end
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(push);
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(pop);
end
end