Files
kernels/hw/rtl/cache/VX_cache_wrap.sv
Blaise Tine d47cccc157 Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
2023-10-19 20:51:22 -07:00

502 lines
20 KiB
Systemverilog

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_wrap #(
parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// enable bypass for non-cacheable addresses
parameter NC_TAG_BIT = 0,
parameter NC_ENABLE = 0,
// Force bypass for all requests
parameter PASSTHRU = 0,
// Core response output register
parameter CORE_OUT_REG = 0,
// Memory request output register
parameter MEM_OUT_REG = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_cache_perf_if.master cache_perf_if,
`endif
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
VX_mem_bus_if.master mem_bus_if
);
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid parameter"))
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
localparam CORE_TAG_X_WIDTH = TAG_WIDTH - NC_ENABLE;
localparam MEM_TAG_X_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
localparam NC_BYPASS = (NC_ENABLE || PASSTHRU);
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
wire [NUM_REQS-1:0] core_req_valid;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_ready;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid[i] = core_bus_if[i].req_valid;
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
assign core_req_data[i] = core_bus_if[i].req_data.data;
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
assign core_bus_if[i].req_ready = core_req_ready[i];
end
///////////////////////////////////////////////////////////////////////////
// Core response buffering
wire [NUM_REQS-1:0] core_rsp_valid_s;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
wire [NUM_REQS-1:0] core_rsp_ready_s;
for (genvar i = 0; i < NUM_REQS; ++i) begin
`RESET_RELAY (core_rsp_reset, reset);
VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(CORE_OUT_REG) : 0),
.OUT_REG (`OUT_REG_TO_EB_REG(CORE_OUT_REG))
) core_rsp_buf (
.clk (clk),
.reset (core_rsp_reset),
.valid_in (core_rsp_valid_s[i]),
.ready_in (core_rsp_ready_s[i]),
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
.valid_out (core_bus_if[i].rsp_valid),
.ready_out (core_bus_if[i].rsp_ready)
);
end
///////////////////////////////////////////////////////////////////////////
// Memory request buffering
wire mem_req_valid_s;
wire mem_req_rw_s;
wire [LINE_SIZE-1:0] mem_req_byteen_s;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_ready_s;
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
.SIZE ((NC_BYPASS && !DIRECT_PASSTHRU) ? `OUT_REG_TO_EB_SIZE(MEM_OUT_REG) : 0),
.OUT_REG (`OUT_REG_TO_EB_REG(MEM_OUT_REG))
) mem_req_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
.valid_out (mem_bus_if.req_valid),
.ready_out (mem_bus_if.req_ready)
);
///////////////////////////////////////////////////////////////////////////
// Core request
wire [NUM_REQS-1:0] core_req_valid_b;
wire [NUM_REQS-1:0] core_req_rw_b;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr_b;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_b;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data_b;
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_b;
wire [NUM_REQS-1:0] core_req_ready_b;
// Core response
wire [NUM_REQS-1:0] core_rsp_valid_b;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_b;
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_b;
wire [NUM_REQS-1:0] core_rsp_ready_b;
// Memory request
wire mem_req_valid_b;
wire mem_req_rw_b;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [LINE_SIZE-1:0] mem_req_byteen_b;
wire [`CS_LINE_WIDTH-1:0] mem_req_data_b;
wire [MEM_TAG_X_WIDTH-1:0] mem_req_tag_b;
wire mem_req_ready_b;
// Memory response
wire mem_rsp_valid_b;
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_b;
wire [MEM_TAG_X_WIDTH-1:0] mem_rsp_tag_b;
wire mem_rsp_ready_b;
if (NC_BYPASS) begin
`RESET_RELAY (nc_bypass_reset, reset);
VX_cache_bypass #(
.NUM_REQS (NUM_REQS),
.NC_TAG_BIT (NC_TAG_BIT),
.NC_ENABLE (NC_ENABLE),
.PASSTHRU (PASSTHRU),
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
.CORE_DATA_SIZE (WORD_SIZE),
.CORE_TAG_IN_WIDTH (TAG_WIDTH),
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
.MEM_DATA_SIZE (LINE_SIZE),
.MEM_TAG_IN_WIDTH (MEM_TAG_X_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH)
) cache_bypass (
.clk (clk),
.reset (nc_bypass_reset),
// Core request in
.core_req_valid_in (core_req_valid),
.core_req_rw_in (core_req_rw),
.core_req_byteen_in (core_req_byteen),
.core_req_addr_in (core_req_addr),
.core_req_data_in (core_req_data),
.core_req_tag_in (core_req_tag),
.core_req_ready_in (core_req_ready),
// Core request out
.core_req_valid_out (core_req_valid_b),
.core_req_rw_out (core_req_rw_b),
.core_req_byteen_out(core_req_byteen_b),
.core_req_addr_out (core_req_addr_b),
.core_req_data_out (core_req_data_b),
.core_req_tag_out (core_req_tag_b),
.core_req_ready_out (core_req_ready_b),
// Core response in
.core_rsp_valid_in (core_rsp_valid_b),
.core_rsp_data_in (core_rsp_data_b),
.core_rsp_tag_in (core_rsp_tag_b),
.core_rsp_ready_in (core_rsp_ready_b),
// Core response out
.core_rsp_valid_out (core_rsp_valid_s),
.core_rsp_data_out (core_rsp_data_s),
.core_rsp_tag_out (core_rsp_tag_s),
.core_rsp_ready_out (core_rsp_ready_s),
// Memory request in
.mem_req_valid_in (mem_req_valid_b),
.mem_req_rw_in (mem_req_rw_b),
.mem_req_addr_in (mem_req_addr_b),
.mem_req_byteen_in (mem_req_byteen_b),
.mem_req_data_in (mem_req_data_b),
.mem_req_tag_in (mem_req_tag_b),
.mem_req_ready_in (mem_req_ready_b),
// Memory request out
.mem_req_valid_out (mem_req_valid_s),
.mem_req_addr_out (mem_req_addr_s),
.mem_req_rw_out (mem_req_rw_s),
.mem_req_byteen_out (mem_req_byteen_s),
.mem_req_data_out (mem_req_data_s),
.mem_req_tag_out (mem_req_tag_s),
.mem_req_ready_out (mem_req_ready_s),
// Memory response in
.mem_rsp_valid_in (mem_bus_if.rsp_valid),
.mem_rsp_data_in (mem_bus_if.rsp_data.data),
.mem_rsp_tag_in (mem_bus_if.rsp_data.tag),
.mem_rsp_ready_in (mem_bus_if.rsp_ready),
// Memory response out
.mem_rsp_valid_out (mem_rsp_valid_b),
.mem_rsp_data_out (mem_rsp_data_b),
.mem_rsp_tag_out (mem_rsp_tag_b),
.mem_rsp_ready_out (mem_rsp_ready_b)
);
end else begin
assign core_req_valid_b = core_req_valid;
assign core_req_rw_b = core_req_rw;
assign core_req_addr_b = core_req_addr;
assign core_req_byteen_b= core_req_byteen;
assign core_req_data_b = core_req_data;
assign core_req_tag_b = core_req_tag;
assign core_req_ready = core_req_ready_b;
assign core_rsp_valid_s = core_rsp_valid_b;
assign core_rsp_data_s = core_rsp_data_b;
assign core_rsp_tag_s = core_rsp_tag_b;
assign core_rsp_ready_b = core_rsp_ready_s;
assign mem_req_valid_s = mem_req_valid_b;
assign mem_req_addr_s = mem_req_addr_b;
assign mem_req_rw_s = mem_req_rw_b;
assign mem_req_byteen_s = mem_req_byteen_b;
assign mem_req_data_s = mem_req_data_b;
assign mem_req_ready_b = mem_req_ready_s;
// Add explicit NC=0 flag to the memory request tag
VX_bits_insert #(
.N (MEM_TAG_WIDTH-1),
.POS (NC_TAG_BIT)
) mem_req_tag_insert (
.data_in (mem_req_tag_b),
.sel_in (1'b0),
.data_out (mem_req_tag_s)
);
assign mem_rsp_valid_b = mem_bus_if.rsp_valid;
assign mem_rsp_data_b = mem_bus_if.rsp_data.data;
assign mem_bus_if.rsp_ready = mem_rsp_ready_b;
// Remove NC flag from the memory response tag
VX_bits_remove #(
.N (MEM_TAG_WIDTH),
.POS (NC_TAG_BIT)
) mem_rsp_tag_remove (
.data_in (mem_bus_if.rsp_data.tag),
.data_out (mem_rsp_tag_b)
);
end
if (PASSTHRU != 0) begin
`UNUSED_VAR (core_req_valid_b)
`UNUSED_VAR (core_req_rw_b)
`UNUSED_VAR (core_req_addr_b)
`UNUSED_VAR (core_req_byteen_b)
`UNUSED_VAR (core_req_data_b)
`UNUSED_VAR (core_req_tag_b)
assign core_req_ready_b = '0;
assign core_rsp_valid_b = '0;
assign core_rsp_data_b = '0;
assign core_rsp_tag_b = '0;
`UNUSED_VAR (core_rsp_ready_b)
assign mem_req_valid_b = 0;
assign mem_req_addr_b = '0;
assign mem_req_rw_b = '0;
assign mem_req_byteen_b = '0;
assign mem_req_data_b = '0;
assign mem_req_tag_b = '0;
`UNUSED_VAR (mem_req_ready_b)
`UNUSED_VAR (mem_rsp_valid_b)
`UNUSED_VAR (mem_rsp_data_b)
`UNUSED_VAR (mem_rsp_tag_b)
assign mem_rsp_ready_b = 0;
`ifdef PERF_ENABLE
assign cache_perf_if.reads = '0;
assign cache_perf_if.writes = '0;
assign cache_perf_if.read_misses = '0;
assign cache_perf_if.write_misses = '0;
assign cache_perf_if.bank_stalls = '0;
assign cache_perf_if.mshr_stalls = '0;
assign cache_perf_if.mem_stalls = '0;
assign cache_perf_if.crsp_stalls = '0;
`endif
end else begin
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (CORE_TAG_X_WIDTH)
) core_bus_wrap_if[NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_X_WIDTH)
) mem_bus_wrap_if();
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_bus_wrap_if[i].req_valid = core_req_valid_b[i];
assign core_bus_wrap_if[i].req_data.rw = core_req_rw_b[i];
assign core_bus_wrap_if[i].req_data.addr = core_req_addr_b[i];
assign core_bus_wrap_if[i].req_data.byteen = core_req_byteen_b[i];
assign core_bus_wrap_if[i].req_data.data = core_req_data_b[i];
assign core_bus_wrap_if[i].req_data.tag = core_req_tag_b[i];
assign core_req_ready_b[i] = core_bus_wrap_if[i].req_ready;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_valid_b[i] = core_bus_wrap_if[i].rsp_valid;
assign core_rsp_data_b[i] = core_bus_wrap_if[i].rsp_data.data;
assign core_rsp_tag_b[i] = core_bus_wrap_if[i].rsp_data.tag;
assign core_bus_wrap_if[i].rsp_ready = core_rsp_ready_b[i];
end
assign mem_req_valid_b = mem_bus_wrap_if.req_valid;
assign mem_req_addr_b = mem_bus_wrap_if.req_data.addr;
assign mem_req_rw_b = mem_bus_wrap_if.req_data.rw;
assign mem_req_byteen_b = mem_bus_wrap_if.req_data.byteen;
assign mem_req_data_b = mem_bus_wrap_if.req_data.data;
assign mem_req_tag_b = mem_bus_wrap_if.req_data.tag;
assign mem_bus_wrap_if.req_ready = mem_req_ready_b;
assign mem_bus_wrap_if.rsp_valid = mem_rsp_valid_b;
assign mem_bus_wrap_if.rsp_data.data = mem_rsp_data_b;
assign mem_bus_wrap_if.rsp_data.tag = mem_rsp_tag_b;
assign mem_rsp_ready_b = mem_bus_wrap_if.rsp_ready;
`RESET_RELAY (cache_reset, reset);
VX_cache #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (CORE_TAG_X_WIDTH),
.CORE_OUT_REG (NC_BYPASS ? 1 : CORE_OUT_REG),
.MEM_OUT_REG (NC_BYPASS ? 1 : MEM_OUT_REG)
) cache (
.clk (clk),
.reset (cache_reset),
`ifdef PERF_ENABLE
.cache_perf_if (cache_perf_if),
`endif
.core_bus_if (core_bus_wrap_if),
.mem_bus_if (mem_bus_wrap_if)
);
end
`ifdef DBG_TRACE_CACHE_BANK
for (genvar i = 0; i < NUM_REQS; ++i) begin
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
if (UUID_WIDTH != 0) begin
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign core_req_uuid = 0;
assign core_rsp_uuid = 0;
end
wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready;
wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready;
always @(posedge clk) begin
if (core_req_fire) begin
if (core_bus_if[i].req_data.rw)
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
else
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
end
if (core_rsp_fire) begin
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
end
end
end
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
if ((UUID_WIDTH != 0) && (NC_BYPASS != 0)) begin
assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign mem_req_uuid = 0;
assign mem_rsp_uuid = 0;
end
wire mem_req_fire = mem_bus_if.req_valid && mem_bus_if.req_ready;
wire mem_rsp_fire = mem_bus_if.rsp_valid && mem_bus_if.rsp_ready;
always @(posedge clk) begin
if (mem_req_fire) begin
if (mem_bus_if.req_data.rw)
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
else
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
end
if (mem_rsp_fire) begin
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
end
end
`endif
endmodule