cache req datapath optimizations

This commit is contained in:
Blaise Tine
2020-12-08 02:58:08 -08:00
parent 268ad15098
commit d5fa82f5e4
17 changed files with 393 additions and 410 deletions

View File

@@ -149,8 +149,6 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
std::cout << "OK" << std::flush << std::endl;
uint64_t frame_width, max_frames, data_valid, offset, delta;
uint64_t timestamp = 0;
uint64_t frame_offset = 0;
@@ -167,8 +165,6 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true);
std::cout << "OK" << std::flush << std::endl;
// get frame width
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
@@ -239,7 +235,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
signal_id = num_taps;
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
ofs << std::flush;
std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::flush << std::endl;
std::cout << "*** " << frame_no << "/" << max_frames << " frames" << std::endl;
}
}
}

View File

@@ -548,7 +548,7 @@ VX_mem_arb #(
.ADDR_WIDTH ($bits(t_local_mem_addr)),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
) vx_cci_avs_arb (
) dram_arb (
.clk (clk),
.reset (reset),

View File

@@ -204,17 +204,17 @@ module VX_cluster #(
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input responses
.rsp_valid_in (per_core_io_rsp_valid),
.rsp_data_in (per_core_io_rsp_data),
.rsp_tag_in (per_core_io_rsp_tag),
.rsp_ready_in (per_core_io_rsp_ready),
// output response
.rsp_valid_out (io_rsp_valid),
.rsp_tag_out (io_rsp_tag),
.rsp_data_out (io_rsp_data),
.rsp_ready_out (io_rsp_ready)
// input response
.rsp_valid_in (io_rsp_valid),
.rsp_tag_in (io_rsp_tag),
.rsp_data_in (io_rsp_data),
.rsp_ready_in (io_rsp_ready),
// output responses
.rsp_valid_out (per_core_io_rsp_valid),
.rsp_data_out (per_core_io_rsp_data),
.rsp_tag_out (per_core_io_rsp_tag),
.rsp_ready_out (per_core_io_rsp_ready)
);
VX_csr_io_arb #(
@@ -298,35 +298,30 @@ module VX_cluster #(
if (`L2_ENABLE) begin
wire [`NUM_CORES-1:0] core_dram_rsp_valid;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
wire core_dram_rsp_ready;
wire [`NUM_CORES-1:0] per_core_dram_req_valid_qual;
wire [`NUM_CORES-1:0] per_core_dram_req_rw_qual;
wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] per_core_dram_req_byteen_qual;
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_dram_req_addr_qual;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_req_data_qual;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_req_tag_qual;
wire [`NUM_CORES-1:0] per_core_dram_req_ready_qual;
reg [`NUM_CORES-1:0] core_dram_rsp_ready_other;
always @(*) begin
core_dram_rsp_ready_other = {`NUM_CORES{1'b1}};
for (integer i = 0; i < `NUM_CORES; i++) begin
for (integer j = 0; j < `NUM_CORES; j++) begin
if (i != j) begin
core_dram_rsp_ready_other[i] &= (per_core_dram_rsp_ready [j] | !core_dram_rsp_valid [j]);
end
end
end
for (genvar i = 0; i < `NUM_CORES; i++) begin
VX_skid_buffer #(
.DATAW (1 + `DDRAM_BYTEEN_WIDTH + `DDRAM_ADDR_WIDTH + `DDRAM_LINE_WIDTH + `XDRAM_TAG_WIDTH),
.PASSTHRU (`NUM_CORES < 4)
) dram_req_buffer (
.clk (clk),
.reset (reset),
.valid_in (per_core_dram_req_valid[i]),
.data_in ({per_core_dram_req_rw[i], per_core_dram_req_byteen[i], per_core_dram_req_addr[i], per_core_dram_req_data[i], per_core_dram_req_tag[i]}),
.ready_in (per_core_dram_req_ready[i]),
.valid_out (per_core_dram_req_valid_qual[i]),
.data_out ({per_core_dram_req_rw_qual[i], per_core_dram_req_byteen_qual[i], per_core_dram_req_addr_qual[i], per_core_dram_req_data_qual[i], per_core_dram_req_tag_qual[i]}),
.ready_out (per_core_dram_req_ready_qual[i])
);
end
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign per_core_dram_rsp_valid [i] = core_dram_rsp_valid[i] & core_dram_rsp_ready_other [i];
assign per_core_dram_rsp_data [i] = core_dram_rsp_data[i];
assign per_core_dram_rsp_tag [i] = core_dram_rsp_tag[i];
end
assign core_dram_rsp_ready = & (per_core_dram_rsp_ready | ~core_dram_rsp_valid);
wire core_dram_req_ready;
for (genvar i = 0; i < `NUM_CORES; i++) begin
assign per_core_dram_req_ready[i] = core_dram_req_ready;
end
VX_cache #(
.CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE),
@@ -355,19 +350,19 @@ module VX_cluster #(
.reset (reset),
// Core request
.core_req_valid (per_core_dram_req_valid),
.core_req_rw (per_core_dram_req_rw),
.core_req_byteen (per_core_dram_req_byteen),
.core_req_addr (per_core_dram_req_addr),
.core_req_data (per_core_dram_req_data),
.core_req_tag (per_core_dram_req_tag),
.core_req_ready (core_dram_req_ready),
.core_req_valid (per_core_dram_req_valid_qual),
.core_req_rw (per_core_dram_req_rw_qual),
.core_req_byteen (per_core_dram_req_byteen_qual),
.core_req_addr (per_core_dram_req_addr_qual),
.core_req_data (per_core_dram_req_data_qual),
.core_req_tag (per_core_dram_req_tag_qual),
.core_req_ready (per_core_dram_req_ready_qual),
// Core response
.core_rsp_valid (core_dram_rsp_valid),
.core_rsp_data (core_dram_rsp_data),
.core_rsp_tag (core_dram_rsp_tag),
.core_rsp_ready (core_dram_rsp_ready),
.core_rsp_valid (per_core_dram_rsp_valid),
.core_rsp_data (per_core_dram_rsp_data),
.core_rsp_tag (per_core_dram_rsp_tag),
.core_rsp_ready (per_core_dram_rsp_ready),
// DRAM request
.dram_req_valid (dram_req_valid),

View File

@@ -4,14 +4,14 @@ module VX_csr_io_arb #(
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter REQS_BITS = `LOG2UP(NUM_REQS)
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [REQS_BITS-1:0] request_id,
input wire [LOG_NUM_REQS-1:0] request_id,
// input requests
input wire req_valid_in,
@@ -40,7 +40,7 @@ module VX_csr_io_arb #(
if (NUM_REQS > 1) begin
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_valid_out[i] = req_valid_in && (request_id == `REQS_BITS'(i));
assign req_valid_out[i] = req_valid_in && (request_id == LOG_NUM_REQS'(i));
assign req_addr_out[i] = req_addr_in;
assign req_rw_out[i] = req_rw_in;
assign req_data_out[i] = req_data_in;
@@ -50,8 +50,6 @@ module VX_csr_io_arb #(
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (request_id)
assign req_valid_out = req_valid_in;
@@ -64,36 +62,17 @@ module VX_csr_io_arb #(
///////////////////////////////////////////////////////////////////////
// Inputs buffering
wire [NUM_REQS-1:0] rsp_valid_in_qual;
wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_in_qual;
wire [NUM_REQS-1:0] rsp_ready_in_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATA_WIDTH),
.PASSTHRU (NUM_REQS < 4)
) rsp_buffer (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in[i]),
.data_in (rsp_data_in[i]),
.ready_in (rsp_ready_in[i]),
.valid_out (rsp_valid_in_qual[i]),
.data_out (rsp_data_in_qual[i]),
.ready_out (rsp_ready_in_qual[i])
);
end
VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.DATAW(DATA_WIDTH),
.BUFFERED(NUM_REQS >= 4)
.NUM_REQS (NUM_REQS),
.DATAW (DATA_WIDTH),
.IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in_qual),
.data_in (rsp_data_in_qual),
.ready_in (rsp_ready_in_qual),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_data_out),
.ready_out (rsp_ready_out)

View File

@@ -6,9 +6,9 @@ module VX_databus_arb #(
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter WORD_WIDTH = WORD_SIZE * 8,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter REQS_BITS = `CLOG2(NUM_REQS)
parameter WORD_WIDTH = WORD_SIZE * 8,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
@@ -32,64 +32,42 @@ module VX_databus_arb #(
input wire req_ready_out,
// input response
output wire [NUM_REQS-1:0] rsp_valid_in,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_in,
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_in,
input wire [NUM_REQS-1:0] rsp_ready_in,
input wire rsp_valid_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [WORD_WIDTH-1:0] rsp_data_in,
output wire rsp_ready_in,
// output response
input wire rsp_valid_out,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_out,
input wire [WORD_WIDTH-1:0] rsp_data_out,
output wire rsp_ready_out
// output responses
output wire [NUM_REQS-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0] valids;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valids[i] = (| req_valid_in[i]);
end
wire [NUM_REQS-1:0][DATAW-1:0] data_in;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign data_in[i] = {req_valid_in[i], {req_tag_in[i], REQS_BITS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
// Inputs buffering
wire [NUM_REQS-1:0] req_valid_in_qual;
wire [NUM_REQS-1:0][DATAW-1:0] req_data_in_qual;
wire [NUM_REQS-1:0] req_ready_in_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (NUM_REQS < 4)
) req_buffer (
.clk (clk),
.reset (reset),
.valid_in (valids[i]),
.data_in (data_in[i]),
.ready_in (req_ready_in[i]),
.valid_out (req_valid_in_qual[i]),
.data_out (req_data_in_qual[i]),
.ready_out (req_ready_in_qual[i])
);
end
wire [`NUM_THREADS-1:0] req_tmask_out;
wire req_valid_out_unqual;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valids[i] = (| req_valid_in[i]);
assign data_in[i] = {req_valid_in[i], {req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (DATAW),
.BUFFERED (NUM_REQS >= 4)
.NUM_REQS (NUM_REQS),
.DATAW (DATAW),
.IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in_qual),
.data_in (req_data_in_qual),
.ready_in (req_ready_in_qual),
.valid_in (valids),
.data_in (data_in),
.ready_in (req_ready_in),
.valid_out (req_valid_out_unqual),
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
@@ -99,15 +77,15 @@ module VX_databus_arb #(
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = rsp_tag_out[REQS_BITS-1:0];
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_valid_in[i] = rsp_valid_out && (rsp_sel == REQS_BITS'(i));
assign rsp_tag_in[i] = rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign rsp_data_in[i] = rsp_data_out;
assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
assign rsp_data_out[i] = rsp_data_in;
end
assign rsp_ready_out = rsp_ready_in[rsp_sel];
assign rsp_ready_in = rsp_ready_out[rsp_sel];
end else begin
@@ -122,10 +100,10 @@ module VX_databus_arb #(
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_in = rsp_valid_out;
assign rsp_tag_in = rsp_tag_out;
assign rsp_data_in = rsp_data_out;
assign rsp_ready_out = rsp_ready_in;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end

View File

@@ -36,7 +36,7 @@ module VX_dcache_arb (
wire core_req_valid;
VX_skid_buffer #(
.DATAW (REQ_DATAW)
.DATAW (REQ_DATAW)
) req_buffer (
.clk (clk),
.reset (reset),
@@ -121,9 +121,10 @@ module VX_dcache_arb (
assign rsp_valid_in[2] = (| io_rsp_if.valid);
VX_stream_arbiter #(
.NUM_REQS (3),
.DATAW (RSP_DATAW),
.BUFFERED (1)
.NUM_REQS (3),
.DATAW (RSP_DATAW),
.IN_BUFFER (1),
.OUT_BUFFER (1)
) rsp_arb (
.clk (clk),
.reset (reset),
@@ -138,6 +139,7 @@ module VX_dcache_arb (
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign io_rsp_if.ready = rsp_ready_in[2];
assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}};
endmodule

View File

@@ -6,9 +6,9 @@ module VX_mem_arb #(
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter REQS_BITS = `CLOG2(NUM_REQS)
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
@@ -43,45 +43,27 @@ module VX_mem_arb #(
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_IN_WIDTH + DATA_WIDTH;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][DATAW-1:0] data_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign data_in[i] = {{req_tag_in[i], REQS_BITS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
// Inputs buffering
wire [NUM_REQS-1:0] req_valid_in_qual;
wire [NUM_REQS-1:0][DATAW-1:0] req_data_in_qual;
wire [NUM_REQS-1:0] req_ready_in_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (NUM_REQS < 4)
) req_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in[i]),
.data_in (data_in[i]),
.ready_in (req_ready_in[i]),
.valid_out (req_valid_in_qual[i]),
.data_out (req_data_in_qual[i]),
.ready_out (req_ready_in_qual[i])
);
assign data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (DATAW),
.BUFFERED (NUM_REQS >= 4)
.NUM_REQS (NUM_REQS),
.DATAW (REQ_DATAW),
.IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in_qual),
.data_in (req_data_in_qual),
.ready_in (req_ready_in_qual),
.valid_in (req_valid_in),
.data_in (data_in),
.ready_in (req_ready_in),
.valid_out (req_valid_out),
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
@@ -89,15 +71,15 @@ module VX_mem_arb #(
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in [REQS_BITS-1:0];
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_valid_out [i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i));
assign rsp_tag_out [i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH];
assign rsp_data_out [i] = rsp_data_in;
assign rsp_valid_out [i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
assign rsp_tag_out [i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
assign rsp_data_out [i] = rsp_data_in;
end
assign rsp_ready_in = rsp_ready_out [rsp_sel];
assign rsp_ready_in = rsp_ready_out [rsp_sel];
end else begin

View File

@@ -206,17 +206,17 @@ module Vortex (
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input responses
.rsp_valid_in (per_cluster_io_rsp_valid),
.rsp_data_in (per_cluster_io_rsp_data),
.rsp_tag_in (per_cluster_io_rsp_tag),
.rsp_ready_in (per_cluster_io_rsp_ready),
// output response
.rsp_valid_out (io_rsp_valid),
.rsp_tag_out (io_rsp_tag),
.rsp_data_out (io_rsp_data),
.rsp_ready_out (io_rsp_ready)
// input response
.rsp_valid_in (io_rsp_valid),
.rsp_tag_in (io_rsp_tag),
.rsp_data_in (io_rsp_data),
.rsp_ready_in (io_rsp_ready),
// output responses
.rsp_valid_out (per_cluster_io_rsp_valid),
.rsp_data_out (per_cluster_io_rsp_data),
.rsp_tag_out (per_cluster_io_rsp_tag),
.rsp_ready_out (per_cluster_io_rsp_ready)
);
VX_csr_io_arb #(
@@ -300,36 +300,30 @@ module Vortex (
if (`L3_ENABLE) begin
wire [`NUM_CLUSTERS-1:0] cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
wire cluster_dram_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid_qual;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_qual;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag_qual;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready_qual;
reg [`NUM_CLUSTERS-1:0] cluster_dram_rsp_ready_other;
always @(*) begin
cluster_dram_rsp_ready_other = {`NUM_CLUSTERS{1'b1}};
for (integer i = 0; i < `NUM_CLUSTERS; i++) begin
for (integer j = 0; j < `NUM_CLUSTERS; j++) begin
if (i != j) begin
cluster_dram_rsp_ready_other[i] &= (per_cluster_dram_rsp_ready [j] | !cluster_dram_rsp_valid [j]);
end
end
end
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
VX_skid_buffer #(
.DATAW (1 + `L2DRAM_BYTEEN_WIDTH + `L2DRAM_ADDR_WIDTH + `L2DRAM_LINE_WIDTH + `L2DRAM_TAG_WIDTH),
.PASSTHRU (`NUM_CLUSTERS < 4)
) dram_req_buffer (
.clk (clk),
.reset (reset),
.valid_in (per_cluster_dram_req_valid[i]),
.data_in ({per_cluster_dram_req_rw[i], per_cluster_dram_req_byteen[i], per_cluster_dram_req_addr[i], per_cluster_dram_req_data[i], per_cluster_dram_req_tag[i]}),
.ready_in (per_cluster_dram_req_ready[i]),
.valid_out (per_cluster_dram_req_valid_qual[i]),
.data_out ({per_cluster_dram_req_rw_qual[i], per_cluster_dram_req_byteen_qual[i], per_cluster_dram_req_addr_qual[i], per_cluster_dram_req_data_qual[i], per_cluster_dram_req_tag_qual[i]}),
.ready_out (per_cluster_dram_req_ready_qual[i])
);
end
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
// Core Response
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] & cluster_dram_rsp_ready_other [i];
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
end
assign cluster_dram_rsp_ready = & (per_cluster_dram_rsp_ready | ~cluster_dram_rsp_valid);
wire cluster_dram_req_ready;
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
assign per_cluster_dram_req_ready[i] = cluster_dram_req_ready;
end
VX_cache #(
.CACHE_ID (`L3CACHE_ID),
.CACHE_SIZE (`L3CACHE_SIZE),
@@ -358,19 +352,19 @@ module Vortex (
.reset (reset),
// Core request
.core_req_valid (per_cluster_dram_req_valid),
.core_req_rw (per_cluster_dram_req_rw),
.core_req_byteen (per_cluster_dram_req_byteen),
.core_req_addr (per_cluster_dram_req_addr),
.core_req_data (per_cluster_dram_req_data),
.core_req_tag (per_cluster_dram_req_tag),
.core_req_ready (cluster_dram_req_ready),
.core_req_valid (per_cluster_dram_req_valid_qual),
.core_req_rw (per_cluster_dram_req_rw_qual),
.core_req_byteen (per_cluster_dram_req_byteen_qual),
.core_req_addr (per_cluster_dram_req_addr_qual),
.core_req_data (per_cluster_dram_req_data_qual),
.core_req_tag (per_cluster_dram_req_tag_qual),
.core_req_ready (per_cluster_dram_req_ready_qual),
// Core response
.core_rsp_valid (cluster_dram_rsp_valid),
.core_rsp_data (cluster_dram_rsp_data),
.core_rsp_tag (cluster_dram_rsp_tag),
.core_rsp_ready (cluster_dram_rsp_ready),
.core_rsp_valid (per_cluster_dram_rsp_valid),
.core_rsp_data (per_cluster_dram_rsp_data),
.core_rsp_tag (per_cluster_dram_rsp_tag),
.core_rsp_ready (per_cluster_dram_rsp_ready),
// DRAM request
.dram_req_valid (dram_req_valid),

View File

@@ -264,7 +264,9 @@ module VX_bank #(
.full (creq_full)
);
reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size;
reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size;
wire [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size_n;
reg mshr_going_full;
wire mshr_pop;
wire mshr_valid_st0;
wire[`REQS_BITS-1:0] mshr_tid_st0;
@@ -346,14 +348,12 @@ module VX_bank #(
wire dreq_push_stall;
wire srsq_push_stall;
wire pipeline_stall;
wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2);
wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3);
wire creq_commit = valid_st1 && core_req_hit_st1 && !pipeline_stall;
wire mshr_going_full = (mshr_pending_size == MSHR_SIZE);
// determine which queue to pop next in piority order
wire mshr_pop_unqual = mshr_valid_st0;
wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty;
@@ -367,13 +367,16 @@ module VX_bank #(
assign sreq_pop = sreq_pop_unqual && !pipeline_stall;
// MSHR pending size
assign mshr_pending_size_n = mshr_pending_size +
((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0));
always @(posedge clk) begin
if (reset) begin
mshr_pending_size <= 0;
mshr_going_full <= 0;
end else begin
mshr_pending_size <= mshr_pending_size +
((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0));
end
mshr_pending_size <= mshr_pending_size_n;
mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE);
end
end
assign is_mshr_st0 = mshr_pop_unqual;
@@ -736,7 +739,7 @@ end
.enqueue_byteen_st3 (req_byteen_st3),
.enqueue_is_snp_st3 (is_snp_st3),
.enqueue_snp_inv_st3(snp_inv_st3),
.enqueue_mshr_st3 (is_mshr_st3),
.enqueue_is_mshr_st3(is_mshr_st3),
.enqueue_ready_st3 (mshr_init_ready_state_st3),
.enqueue_full (mshr_full),

View File

@@ -39,11 +39,11 @@ module VX_cache #(
// Enable cache flush
parameter FLUSH_ENABLE = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = $clog2(MSHR_SIZE),
// core request tag size
parameter CORE_TAG_WIDTH = CORE_TAG_ID_BITS,
parameter CORE_TAG_WIDTH = $clog2(MSHR_SIZE),
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = (32 - $clog2(BANK_LINE_SIZE)),
@@ -63,13 +63,13 @@ module VX_cache #(
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready,
// Core response
output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
// DRAM request
output wire dram_req_valid,
@@ -139,9 +139,10 @@ module VX_cache #(
VX_cache_core_req_bank_sel #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS)
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_req_bank_sel (
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
@@ -197,7 +198,7 @@ module VX_cache #(
wire curr_bank_miss;
// Core Req
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQS{core_req_ready}});
assign curr_bank_core_req_valid = per_bank_valid[i];
assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_rw = core_req_rw;
assign curr_bank_core_req_byteen = core_req_byteen;
@@ -355,18 +356,18 @@ module VX_cache #(
end
VX_stream_arbiter #(
.NUM_REQS(NUM_BANKS),
.DATAW(`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
.BUFFERED(NUM_BANKS >= 4)
.NUM_REQS (NUM_BANKS),
.DATAW (`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
.OUT_BUFFER (NUM_BANKS >= 4)
) dram_req_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
);
end else begin
`UNUSED_VAR (per_bank_dram_req_valid)
@@ -385,18 +386,18 @@ module VX_cache #(
if (FLUSH_ENABLE) begin
VX_stream_arbiter #(
.NUM_REQS(NUM_BANKS),
.DATAW(SNP_TAG_WIDTH),
.BUFFERED(NUM_BANKS >= 4)
.NUM_REQS (NUM_BANKS),
.DATAW (SNP_TAG_WIDTH),
.OUT_BUFFER (NUM_BANKS >= 4)
) snp_rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_snp_rsp_valid),
.data_in (per_bank_snp_rsp_tag),
.ready_in (per_bank_snp_rsp_ready),
.valid_out (snp_rsp_valid),
.data_out (snp_rsp_tag),
.ready_out (snp_rsp_ready)
.clk (clk),
.reset (reset),
.valid_in (per_bank_snp_rsp_valid),
.data_in (per_bank_snp_rsp_tag),
.ready_in (per_bank_snp_rsp_ready),
.valid_out (snp_rsp_valid),
.data_out (snp_rsp_tag),
.ready_out (snp_rsp_ready)
);
end else begin
`UNUSED_VAR (per_bank_snp_rsp_valid)

View File

@@ -15,7 +15,7 @@
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)
// data metadata word_sel is_snp snp_inv
`define MSHR_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1)
`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1)
`define BANK_BITS `LOG2UP(NUM_BANKS)

View File

@@ -8,53 +8,72 @@ module VX_cache_core_req_bank_sel #(
// Number of banks
parameter NUM_BANKS = 1,
// Number of Word requests per cycle
parameter NUM_REQS = 1
parameter NUM_REQS = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 1
) (
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
output wire core_req_ready,
output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready,
output wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid,
input wire [NUM_BANKS-1:0] per_bank_ready
);
if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0] per_bank_ready_ignore;
reg [NUM_BANKS-1:0] per_bank_ready_other;
always @(*) begin
per_bank_valid_r = 0;
per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j) begin
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end
end
reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r;
always @(*) begin
per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] & per_bank_ready_other[i];
end
end
if (CORE_TAG_ID_BITS != 0) begin
reg [NUM_BANKS-1:0] per_bank_ready_other, per_bank_ready_ignore;
always @(*) begin
per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
assign core_req_ready = & (per_bank_ready | per_bank_ready_ignore);
for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j) begin
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end
end
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] && per_bank_ready_other[i];
end
end
assign core_req_ready[0] = & (per_bank_ready | per_bank_ready_ignore);
end else begin
assign per_bank_valid = per_bank_valid_r;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign core_req_ready[i] = per_bank_ready[core_req_addr[i][`BANK_SELECT_ADDR_RNG]];
end
end
end else begin
`UNUSED_VAR (core_req_addr)
assign per_bank_valid = core_req_valid;
assign core_req_ready = per_bank_ready;
assign core_req_ready[0] = per_bank_ready;
end

View File

@@ -26,18 +26,20 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire core_rsp_ready
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready
);
if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
if (CORE_TAG_ID_BITS != 0) begin
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [CORE_TAG_ID_BITS-1:0] sel_tag_id;
wire stall = ~core_rsp_ready && (| core_rsp_valid);
always @(*) begin
core_rsp_valid_unqual = 0;
@@ -60,13 +62,32 @@ module VX_cache_core_rsp_merge #(
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == sel_tag_id)) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1;
core_rsp_bank_select[i] = ~stall;
end
end
end
VX_generic_register #(
.N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + CORE_TAG_WIDTH),
.R(NUM_REQS)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
end
end else begin
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0] stall;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
@@ -79,29 +100,32 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1;
core_rsp_bank_select[i] = ~stall[per_bank_core_rsp_tid[i]];
end
end
end
end
for (genvar i = 0; i < NUM_REQS; i++) begin
wire stall = ~core_rsp_ready && (| core_rsp_valid);
assign stall[i] = ~core_rsp_ready[i] && core_rsp_valid[i];
VX_generic_register #(
.N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
.R(NUM_REQS)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
VX_generic_register #(
.N(1 + `WORD_WIDTH + CORE_TAG_WIDTH),
.R(1)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall[i]),
.flush (1'b0),
.data_in ({core_rsp_valid_unqual[i], core_rsp_data_unqual[i], core_rsp_tag_unqual[i]}),
.data_out ({core_rsp_valid[i], core_rsp_data[i], core_rsp_tag[i]})
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i] && ~stall;
end
end else begin
@@ -116,14 +140,19 @@ module VX_cache_core_rsp_merge #(
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
if (CORE_TAG_ID_BITS != 0) begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag[0];
core_rsp_data_unqual = 'x;
core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid;
core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0];
end
end
assign per_bank_core_rsp_ready[0] = core_rsp_ready;
end else begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
@@ -131,14 +160,16 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid;
core_rsp_tag_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_tag[0];
core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0];
end
end
assign per_bank_core_rsp_ready[0] = core_rsp_ready[per_bank_core_rsp_tid[0]];
end
assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tag = core_rsp_tag_unqual;
assign core_rsp_data = core_rsp_data_unqual;
assign per_bank_core_rsp_ready[0] = core_rsp_ready;
end else begin
`UNUSED_VAR(per_bank_core_rsp_tid)

View File

@@ -48,7 +48,7 @@ module VX_miss_resrv #(
input wire[WORD_SIZE-1:0] enqueue_byteen_st3,
input wire enqueue_is_snp_st3,
input wire enqueue_snp_inv_st3,
input wire enqueue_mshr_st3,
input wire enqueue_is_mshr_st3,
input wire enqueue_ready_st3,
output wire enqueue_full,
@@ -71,7 +71,7 @@ module VX_miss_resrv #(
output wire dequeue_snp_inv_st0,
input wire dequeue_st3
);
wire [`MSHR_METADATA_WIDTH-1:0] metadata_table;
wire [`MSHR_DATA_WIDTH-1:0] data_table;
reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
@@ -91,7 +91,7 @@ module VX_miss_resrv #(
assign pending_hazard_st0 = (| valid_address_match);
wire dequeue_ready = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire dequeue_ready = ready_table[schedule_ptr];
assign dequeue_valid_st0 = dequeue_ready;
assign dequeue_addr_st0 = addr_table[schedule_ptr];
@@ -102,9 +102,9 @@ module VX_miss_resrv #(
dequeue_byteen_st0,
dequeue_wsel_st0,
dequeue_is_snp_st0,
dequeue_snp_inv_st0} = metadata_table;
dequeue_snp_inv_st0} = data_table;
wire mshr_push = enqueue_st3 && !enqueue_mshr_st3;
wire mshr_push = enqueue_st3 && !enqueue_is_mshr_st3;
wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1);
@@ -124,7 +124,7 @@ module VX_miss_resrv #(
if (enqueue_st3) begin
assert(!enqueue_full);
if (enqueue_mshr_st3) begin
if (enqueue_is_mshr_st3) begin
// returning missed msrq entry, restore schedule
valid_table[restore_ptr] <= 1;
ready_table[restore_ptr] <= enqueue_ready_st3;
@@ -146,19 +146,20 @@ module VX_miss_resrv #(
if (schedule_st0) begin
assert(dequeue_valid_st0);
valid_table[schedule_ptr] <= 0;
ready_table[schedule_ptr] <= 0;
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
end
end
end
always @(posedge clk) begin
if (enqueue_st3 && !enqueue_mshr_st3) begin
if (enqueue_st3 && !enqueue_is_mshr_st3) begin
addr_table[tail_ptr] <= enqueue_addr_st3;
end
end
VX_dp_ram #(
.DATAW(`MSHR_METADATA_WIDTH),
.DATAW(`MSHR_DATA_WIDTH),
.SIZE(MSHR_SIZE),
.BYTEENW(1),
.BUFFERED(0),
@@ -171,7 +172,7 @@ module VX_miss_resrv #(
.byteen(1'b1),
.rden(1'b1),
.din({enqueue_data_st3, enqueue_tid_st3, enqueue_tag_st3, enqueue_rw_st3, enqueue_byteen_st3, enqueue_wsel_st3, enqueue_is_snp_st3, enqueue_snp_inv_st3}),
.dout(metadata_table)
.dout(data_table)
);
`ifdef DBG_PRINT_CACHE_MSHR
@@ -180,7 +181,7 @@ module VX_miss_resrv #(
if (schedule_st0)
$display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
if (enqueue_st3) begin
if (enqueue_mshr_st3)
if (enqueue_is_mshr_st3)
$display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3);
else
$display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3, debug_wid_st3, debug_pc_st3);

View File

@@ -44,26 +44,6 @@ module VX_snp_forwarder #(
if (NUM_REQS > 1) begin
// Inputs buffering
wire [NUM_REQS-1:0] snp_fwdin_valid_qual;
wire [NUM_REQS-1:0][TAG_OUT_WIDTH-1:0] snp_fwdin_tag_qual;
wire [NUM_REQS-1:0] snp_fwdin_ready_qual;
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (TAG_OUT_WIDTH),
.PASSTHRU (NUM_REQS < 4)
) snp_fwdin_buffer (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid[i]),
.data_in (snp_fwdin_tag[i]),
.ready_in (snp_fwdin_ready[i]),
.valid_out (snp_fwdin_valid_qual[i]),
.data_out (snp_fwdin_tag_qual[i]),
.ready_out (snp_fwdin_ready_qual[i])
);
end
reg [REQ_QUAL_BITS:0] pending_cntrs [SREQ_SIZE-1:0];
wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr;
@@ -181,15 +161,16 @@ module VX_snp_forwarder #(
assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold;
VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.DATAW(TAG_OUT_WIDTH),
.BUFFERED(NUM_REQS >= 4)
.NUM_REQS (NUM_REQS),
.DATAW (TAG_OUT_WIDTH),
.IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
) snp_fwdin_arb (
.clk (clk),
.reset (reset),
.valid_in (snp_fwdin_valid_qual),
.data_in (snp_fwdin_tag_qual),
.ready_in (snp_fwdin_ready_qual),
.valid_in (snp_fwdin_valid),
.data_in (snp_fwdin_tag),
.ready_in (snp_fwdin_ready),
.valid_out (fwdin_valid),
.data_out (fwdin_tag),
.ready_out (fwdin_ready)

View File

@@ -52,9 +52,7 @@ module VX_cam_buffer #(
write_addr_r <= ADDRW'(1'b0);
end else begin
if (release_slot) begin
assert(0 == free_slots[release_addr]) else begin
$display("%t: releasing invalid slot at port %d", $time, release_addr);
end
assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr);
end
free_slots <= free_slots_n;
write_addr_r <= free_index;

View File

@@ -1,10 +1,11 @@
`include "VX_platform.vh"
module VX_stream_arbiter #(
parameter NUM_REQS = 1,
parameter DATAW = 1,
parameter TYPE = "R",
parameter BUFFERED = 0
parameter NUM_REQS = 1,
parameter DATAW = 1,
parameter TYPE = "R",
parameter IN_BUFFER = 0,
parameter OUT_BUFFER = 0
) (
input wire clk,
input wire reset,
@@ -17,18 +18,30 @@ module VX_stream_arbiter #(
output wire [DATAW-1:0] data_out,
input wire ready_out
);
localparam LOG_NUM_REQS = $clog2(NUM_REQS);
if (NUM_REQS == 1) begin
if (NUM_REQS > 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign valid_out = valid_in;
assign data_out = data_in;
assign ready_in = ready_out;
wire [NUM_REQS-1:0] valid_in_qual;
wire [NUM_REQS-1:0][DATAW-1:0] data_in_qual;
wire [NUM_REQS-1:0] ready_in_qual;
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (!IN_BUFFER)
) req_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in[i]),
.data_in (data_in[i]),
.ready_in (ready_in[i]),
.valid_out (valid_in_qual[i]),
.data_out (data_in_qual[i]),
.ready_out (ready_in_qual[i])
);
end
wire sel_enable;
wire sel_valid;
@@ -41,13 +54,13 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
.clk (clk),
.reset (reset),
.requests (valid_in_qual),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end else if (TYPE == "R") begin
@@ -56,13 +69,13 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
.clk (clk),
.reset (reset),
.requests (valid_in_qual),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end else if (TYPE == "F") begin
@@ -71,13 +84,13 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
.clk (clk),
.reset (reset),
.requests (valid_in_qual),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end else if (TYPE == "M") begin
@@ -86,18 +99,18 @@ module VX_stream_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (valid_in),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
.clk (clk),
.reset (reset),
.requests (valid_in_qual),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
end
if (BUFFERED) begin
if (OUT_BUFFER) begin
wire stall = ~ready_out && valid_out;
assign sel_enable = ~stall;
@@ -110,25 +123,35 @@ module VX_stream_arbiter #(
.reset (reset),
.stall (stall),
.flush (1'b0),
.data_in ({sel_valid, data_in[sel_idx]}),
.data_in ({sel_valid, data_in_qual[sel_idx]}),
.data_out ({valid_out, data_out})
);
for (genvar i = 0; i < NUM_REQS; i++) begin
assign ready_in[i] = sel_1hot[i] && ~stall;
assign ready_in_qual[i] = sel_1hot[i] && ~stall;
end
end else begin
assign sel_enable = ready_out;
assign valid_out = sel_valid;
assign data_out = data_in_qual[sel_idx];
assign valid_out = sel_valid;
assign data_out = data_in[sel_idx];
for (genvar i = 0; i < NUM_REQS; i++) begin
assign ready_in[i] = sel_1hot[i] && ready_out;
assign ready_in_qual[i] = sel_1hot[i] && ready_out;
end
end
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign valid_out = valid_in;
assign data_out = data_in;
assign ready_in = ready_out;
end
endmodule