377 lines
15 KiB
Verilog
377 lines
15 KiB
Verilog
`include "VX_cache_define.vh"
|
|
|
|
module VX_shared_mem #(
|
|
parameter CACHE_ID = 0,
|
|
|
|
// Size of cache in bytes
|
|
parameter CACHE_SIZE = (1024*16),
|
|
// Number of banks
|
|
parameter NUM_BANKS = 2,
|
|
// Size of a word in bytes
|
|
parameter WORD_SIZE = 4,
|
|
// Number of Word requests per cycle
|
|
parameter NUM_REQS = 4,
|
|
|
|
// Core Request Queue Size
|
|
parameter CREQ_SIZE = 2,
|
|
// Core Response Queue Size
|
|
parameter CRSQ_SIZE = 2,
|
|
|
|
// size of tag id in core request tag
|
|
parameter CORE_TAG_ID_BITS = 8,
|
|
|
|
// core request tag size
|
|
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
|
|
|
|
// bank offset from beginning of index range
|
|
parameter BANK_ADDR_OFFSET = `CLOG2(256)
|
|
) (
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
// PERF
|
|
`ifdef PERF_ENABLE
|
|
VX_perf_cache_if perf_cache_if,
|
|
`endif
|
|
|
|
// Core request
|
|
input wire [NUM_REQS-1:0] core_req_valid,
|
|
input wire [NUM_REQS-1:0] core_req_rw,
|
|
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
|
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
|
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
|
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
|
output wire [NUM_REQS-1:0] core_req_ready,
|
|
|
|
// Core response
|
|
output wire core_rsp_valid,
|
|
output wire [NUM_REQS-1:0] core_rsp_tmask,
|
|
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
|
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
|
input wire core_rsp_ready
|
|
);
|
|
|
|
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
|
`UNUSED_PARAM (CACHE_ID)
|
|
`UNUSED_PARAM (CORE_TAG_ID_BITS)
|
|
|
|
localparam CACHE_LINE_SIZE = WORD_SIZE;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
|
|
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
|
|
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_unqual;
|
|
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
|
|
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
|
|
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
|
|
wire per_bank_core_req_ready_unqual;
|
|
|
|
VX_core_req_bank_sel #(
|
|
.CACHE_ID (CACHE_ID),
|
|
.CACHE_LINE_SIZE (WORD_SIZE),
|
|
.NUM_BANKS (NUM_BANKS),
|
|
.NUM_PORTS (1),
|
|
.WORD_SIZE (WORD_SIZE),
|
|
.NUM_REQS (NUM_REQS),
|
|
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
|
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
|
|
.SHARED_BANK_READY(1)
|
|
) core_req_bank_sel (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
`ifdef PERF_ENABLE
|
|
.bank_stalls(perf_cache_if.bank_stalls),
|
|
`endif
|
|
.core_req_valid (core_req_valid),
|
|
.core_req_rw (core_req_rw),
|
|
.core_req_addr (core_req_addr),
|
|
.core_req_byteen (core_req_byteen),
|
|
.core_req_data (core_req_data),
|
|
.core_req_tag (core_req_tag),
|
|
.core_req_ready (core_req_ready),
|
|
.per_bank_core_req_valid (per_bank_core_req_valid_unqual),
|
|
.per_bank_core_req_tid (per_bank_core_req_tid_unqual),
|
|
.per_bank_core_req_rw (per_bank_core_req_rw_unqual),
|
|
.per_bank_core_req_addr (per_bank_core_req_addr_unqual),
|
|
.per_bank_core_req_byteen(per_bank_core_req_byteen_unqual),
|
|
.per_bank_core_req_tag (per_bank_core_req_tag_unqual),
|
|
.per_bank_core_req_data (per_bank_core_req_data_unqual),
|
|
.per_bank_core_req_ready (per_bank_core_req_ready_unqual),
|
|
`UNUSED_PIN (per_bank_core_req_pmask),
|
|
`UNUSED_PIN (per_bank_core_req_wsel)
|
|
);
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
|
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
|
|
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
|
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
|
|
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
|
|
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
|
|
|
wire creq_in_ready;
|
|
wire creq_out_valid;
|
|
wire crsq_in_fire_last;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_req_reads = per_bank_core_req_valid & ~per_bank_core_req_rw;
|
|
|
|
wire per_bank_req_has_reads = (| per_bank_req_reads);
|
|
|
|
wire creq_in_valid = (| core_req_valid);
|
|
|
|
wire creq_out_ready = ~per_bank_req_has_reads // is write only
|
|
|| crsq_in_fire_last; // is sending last read response
|
|
|
|
assign per_bank_core_req_ready_unqual = creq_in_ready;
|
|
|
|
wire creq_in_fire = creq_in_valid && creq_in_ready;
|
|
|
|
wire creq_out_fire = creq_out_valid && creq_out_ready;
|
|
|
|
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
|
|
`UNUSED_VAR (per_bank_core_req_addr_unqual)
|
|
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
|
assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0];
|
|
end
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (NUM_BANKS * (1 + 1 + `LINE_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS)),
|
|
.SIZE (CREQ_SIZE),
|
|
.OUTPUT_REG (1) // output should be registered for the data_store addr port
|
|
) core_req_queue (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.ready_in (creq_in_ready),
|
|
.valid_in (creq_in_valid),
|
|
.data_in ({per_bank_core_req_valid_unqual,
|
|
per_bank_core_req_rw_unqual,
|
|
per_bank_core_req_addr_qual,
|
|
per_bank_core_req_byteen_unqual,
|
|
per_bank_core_req_data_unqual,
|
|
per_bank_core_req_tag_unqual,
|
|
per_bank_core_req_tid_unqual}),
|
|
.data_out ({per_bank_core_req_valid,
|
|
per_bank_core_req_rw,
|
|
per_bank_core_req_addr,
|
|
per_bank_core_req_byteen,
|
|
per_bank_core_req_data,
|
|
per_bank_core_req_tag,
|
|
per_bank_core_req_tid}),
|
|
.ready_out (creq_out_ready),
|
|
.valid_out (creq_out_valid)
|
|
);
|
|
`UNUSED_VAR (creq_in_fire)
|
|
|
|
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
|
|
|
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
|
|
|
wire wren = per_bank_core_req_rw[i]
|
|
&& per_bank_core_req_valid[i]
|
|
&& creq_out_fire;
|
|
|
|
VX_sp_ram #(
|
|
.DATAW (`WORD_WIDTH),
|
|
.SIZE (`LINES_PER_BANK),
|
|
.BYTEENW (WORD_SIZE),
|
|
.RWCHECK (1)
|
|
) data_store (
|
|
.clk (clk),
|
|
.addr (per_bank_core_req_addr[i]),
|
|
.wren (wren),
|
|
.byteen (per_bank_core_req_byteen[i]),
|
|
.rden (1'b1),
|
|
.din (per_bank_core_req_data[i]),
|
|
.dout (per_bank_core_rsp_data[i])
|
|
);
|
|
end
|
|
|
|
// The core response bus handles a single tag at the time
|
|
// We first need to select the current tag to process,
|
|
// then send all bank responses for that tag as a batch
|
|
|
|
wire crsq_in_valid, crsq_in_ready;
|
|
|
|
reg [NUM_BANKS-1:0] bank_rsp_sel_prv, bank_rsp_sel_cur;
|
|
|
|
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel_prv | bank_rsp_sel_cur;
|
|
|
|
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
|
|
|
|
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_req_reads);
|
|
|
|
always @(posedge clk) begin
|
|
if (reset) begin
|
|
bank_rsp_sel_prv <= 0;
|
|
end else begin
|
|
if (crsq_in_fire) begin
|
|
if (bank_rsp_sel_n == per_bank_req_reads) begin
|
|
bank_rsp_sel_prv <= 0;
|
|
end else begin
|
|
bank_rsp_sel_prv <= bank_rsp_sel_n;
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
reg [NUM_REQS-1:0] core_rsp_valids_in;
|
|
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
|
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
|
|
|
always @(*) begin
|
|
core_rsp_valids_in = 0;
|
|
core_rsp_data_in = 'x;
|
|
core_rsp_tag_in = 'x;
|
|
bank_rsp_sel_cur = 0;
|
|
|
|
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
|
if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin
|
|
core_rsp_tag_in = per_bank_core_req_tag[i];
|
|
end
|
|
end
|
|
|
|
for (integer i = 0; i < NUM_BANKS; i++) begin
|
|
if (per_bank_core_req_valid[i]
|
|
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
|
|
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
|
|
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
|
|
bank_rsp_sel_cur[i] = 1;
|
|
end
|
|
end
|
|
end
|
|
|
|
assign crsq_in_valid = creq_out_valid && per_bank_req_has_reads;
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
|
|
.SIZE (CRSQ_SIZE)
|
|
) core_rsp_req (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (crsq_in_valid),
|
|
.data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}),
|
|
.ready_in (crsq_in_ready),
|
|
.valid_out (core_rsp_valid),
|
|
.data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
|
|
.ready_out (core_rsp_ready)
|
|
);
|
|
|
|
`ifdef DBG_CACHE_REQ_INFO
|
|
`IGNORE_WARNINGS_BEGIN
|
|
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
|
|
wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1;
|
|
`IGNORE_WARNINGS_END
|
|
|
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
|
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
|
assign {debug_pc_st0[i], debug_wid_st0[i]} = per_bank_core_req_tag_unqual[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
|
|
assign {debug_pc_st1[i], debug_wid_st1[i]} = per_bank_core_req_tag[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
|
|
end else begin
|
|
assign {debug_pc_st0[i], debug_wid_st0[i]} = 0;
|
|
assign {debug_pc_st1[i], debug_wid_st1[i]} = 0;
|
|
end
|
|
end
|
|
`endif
|
|
|
|
`ifdef DBG_PRINT_CACHE_BANK
|
|
|
|
reg is_multi_tag_req;
|
|
`IGNORE_WARNINGS_BEGIN
|
|
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
|
|
`IGNORE_WARNINGS_END
|
|
|
|
always @(*) begin
|
|
core_req_tag_sel ='x;
|
|
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
|
if (per_bank_core_req_valid[i]) begin
|
|
core_req_tag_sel = per_bank_core_req_tag[i];
|
|
end
|
|
end
|
|
is_multi_tag_req = 0;
|
|
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
|
if (per_bank_core_req_valid[i]
|
|
&& (core_req_tag_sel[CORE_TAG_ID_BITS-1:0] != per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
|
|
is_multi_tag_req = creq_out_valid;
|
|
end
|
|
end
|
|
end
|
|
|
|
always @(posedge clk) begin
|
|
if (!crsq_in_ready) begin
|
|
$display("%t: *** cache%0d pipeline-stall", $time, CACHE_ID);
|
|
end
|
|
if (is_multi_tag_req) begin
|
|
$display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID);
|
|
end
|
|
if (creq_in_fire) begin
|
|
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
|
if (per_bank_core_req_valid_unqual[i]) begin
|
|
if (per_bank_core_req_rw_unqual[i]) begin
|
|
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
|
|
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i],
|
|
debug_wid_st0[i], debug_pc_st0[i]);
|
|
end else begin
|
|
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
|
|
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i],
|
|
debug_wid_st0[i], debug_pc_st0[i]);
|
|
end
|
|
end
|
|
end
|
|
end
|
|
if (creq_out_fire) begin
|
|
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
|
if (per_bank_core_req_valid[i]) begin
|
|
if (per_bank_core_req_rw[i]) begin
|
|
$display("%t: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
|
|
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i],
|
|
debug_wid_st1[i], debug_pc_st1[i]);
|
|
end else begin
|
|
$display("%t: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
|
|
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_rsp_data[i],
|
|
debug_wid_st1[i], debug_pc_st1[i]);
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
`endif
|
|
|
|
`ifdef PERF_ENABLE
|
|
// per cycle: core_reads, core_writes
|
|
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
|
|
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
|
|
|
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
|
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
|
|
|
|
if (CORE_TAG_ID_BITS != 0) begin
|
|
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
|
|
end else begin
|
|
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
|
end
|
|
|
|
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
|
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
|
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
|
|
|
always @(posedge clk) begin
|
|
if (reset) begin
|
|
perf_core_reads <= 0;
|
|
perf_core_writes <= 0;
|
|
perf_crsp_stalls <= 0;
|
|
end else begin
|
|
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
|
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
|
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
|
end
|
|
end
|
|
|
|
assign perf_cache_if.reads = perf_core_reads;
|
|
assign perf_cache_if.writes = perf_core_writes;
|
|
assign perf_cache_if.read_misses = '0;
|
|
assign perf_cache_if.write_misses = '0;
|
|
assign perf_cache_if.pipe_stalls = '0;
|
|
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
|
`endif
|
|
|
|
endmodule |