Files
kernels/hw/rtl/cache/VX_cache_cluster.sv
Blaise Tine d47cccc157 Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
2023-10-19 20:51:22 -07:00

369 lines
13 KiB
Systemverilog

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_cluster #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 1,
parameter NUM_INPUTS = 1,
parameter TAG_SEL_IDX = 0,
// Number of requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
// Core response output register
parameter CORE_OUT_REG = 0,
// Memory request output register
parameter MEM_OUT_REG = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_cache_perf_if.master cache_perf_if,
`endif
VX_mem_bus_if.slave core_bus_if [NUM_INPUTS * NUM_REQS],
VX_mem_bus_if.master mem_bus_if
);
localparam NUM_CACHES = `UP(NUM_UNITS);
localparam PASSTHRU = (NUM_UNITS == 0);
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
`ifdef PERF_ENABLE
VX_cache_perf_if perf_cache_unit_if[NUM_CACHES]();
`PERF_CACHE_ADD (cache_perf_if, perf_cache_unit_if, NUM_CACHES);
`endif
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH)
) cache_mem_bus_if[NUM_CACHES]();
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_if[NUM_CACHES * NUM_REQS]();
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_tmp_if[NUM_INPUTS]();
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (ARB_TAG_WIDTH)
) arb_core_bus_tmp_if[NUM_CACHES]();
for (genvar j = 0; j < NUM_INPUTS; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]);
end
`RESET_RELAY (cache_arb_reset, reset);
VX_mem_arb #(
.NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_CACHES),
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX),
.ARBITER ("R"),
.OUT_REG_REQ ((NUM_INPUTS != NUM_CACHES) ? 2 : 0),
.OUT_REG_RSP ((NUM_INPUTS != NUM_CACHES) ? 2 : 0)
) cache_arb (
.clk (clk),
.reset (cache_arb_reset),
.bus_in_if (core_bus_tmp_if),
.bus_out_if (arb_core_bus_tmp_if)
);
for (genvar k = 0; k < NUM_CACHES; ++k) begin
`ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]);
end
end
for (genvar i = 0; i < NUM_CACHES; ++i) begin
`RESET_RELAY (cache_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH),
.CORE_OUT_REG ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_REG),
.MEM_OUT_REG ((NUM_CACHES > 1) ? 2 : MEM_OUT_REG),
.NC_ENABLE (NC_ENABLE),
.PASSTHRU (PASSTHRU)
) cache_wrap (
`ifdef PERF_ENABLE
.cache_perf_if (perf_cache_unit_if[i]),
`endif
.clk (clk),
.reset (cache_reset),
.core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]),
.mem_bus_if (cache_mem_bus_if[i])
);
end
`RESET_RELAY (mem_arb_reset, reset);
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH + `ARB_SEL_BITS(NUM_CACHES, 1))
) mem_bus_tmp_if[1]();
VX_mem_arb #(
.NUM_INPUTS (NUM_CACHES),
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.ARBITER ("R"),
.OUT_REG_REQ ((NUM_CACHES > 1) ? 2 : 0),
.OUT_REG_RSP ((NUM_CACHES > 1) ? 2 : 0)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
.bus_in_if (cache_mem_bus_if),
.bus_out_if (mem_bus_tmp_if)
);
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
endmodule
///////////////////////////////////////////////////////////////////////////////
module VX_cache_cluster_top #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 2,
parameter NUM_INPUTS = 4,
parameter TAG_SEL_IDX = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 16,
// Number of banks
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = 16,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 1,
// Core response output register
parameter CORE_OUT_REG = 2,
// Memory request output register
parameter MEM_OUT_REG = 2,
parameter NUM_CACHES = `UP(NUM_UNITS),
parameter PASSTHRU = (NUM_UNITS == 0),
parameter ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES),
parameter MEM_TAG_WIDTH = PASSTHRU ? (NC_ENABLE ? `CACHE_NC_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH)) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS))
) (
input wire clk,
input wire reset,
// Core request
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_valid,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_rw,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_INPUTS-1:0][NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_INPUTS-1:0][NUM_REQS-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_if[NUM_INPUTS * NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (MEM_TAG_WIDTH)
) mem_bus_if();
// Core request
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_bus_if[i * NUM_REQS + r].req_valid = core_req_valid[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.rw = core_req_rw[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.byteen = core_req_byteen[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.addr = core_req_addr[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.data = core_req_data[i][r];
assign core_bus_if[i * NUM_REQS + r].req_data.tag = core_req_tag[i][r];
assign core_req_ready[i][r] = core_bus_if[i * NUM_REQS + r].req_ready;
end
end
// Core response
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
for (genvar r = 0; r < NUM_REQS; ++r) begin
assign core_rsp_valid[i][r] = core_bus_if[i * NUM_REQS + r].rsp_valid;
assign core_rsp_data[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.data;
assign core_rsp_tag[i][r] = core_bus_if[i * NUM_REQS + r].rsp_data.tag;
assign core_bus_if[i * NUM_REQS + r].rsp_ready = core_rsp_ready[i][r];
end
end
// Memory request
assign mem_req_valid = mem_bus_if.req_valid;
assign mem_req_rw = mem_bus_if.req_data.rw;
assign mem_req_byteen = mem_bus_if.req_data.byteen;
assign mem_req_addr = mem_bus_if.req_data.addr;
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;
assign mem_bus_if.rsp_data.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_bus_if.rsp_ready;
VX_cache_cluster #(
.INSTANCE_ID (INSTANCE_ID),
.NUM_UNITS (NUM_UNITS),
.NUM_INPUTS (NUM_INPUTS),
.TAG_SEL_IDX (TAG_SEL_IDX),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_OUT_REG (CORE_OUT_REG),
.MEM_OUT_REG (MEM_OUT_REG)
) cache (
`ifdef PERF_ENABLE
.cache_perf_if (perf_icache_if),
`endif
.clk (clk),
.reset (reset),
.core_bus_if (core_bus_if),
.mem_bus_if (mem_bus_if)
);
endmodule