+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
349 lines
13 KiB
Systemverilog
349 lines
13 KiB
Systemverilog
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
`include "VX_platform.vh"
|
|
|
|
module VX_cache_bypass #(
|
|
parameter NUM_REQS = 1,
|
|
parameter NC_TAG_BIT = 0,
|
|
|
|
parameter NC_ENABLE = 0,
|
|
parameter PASSTHRU = 0,
|
|
|
|
parameter CORE_ADDR_WIDTH = 1,
|
|
parameter CORE_DATA_SIZE = 1,
|
|
parameter CORE_TAG_IN_WIDTH = 1,
|
|
|
|
parameter MEM_ADDR_WIDTH = 1,
|
|
parameter MEM_DATA_SIZE = 1,
|
|
parameter MEM_TAG_IN_WIDTH = 1,
|
|
parameter MEM_TAG_OUT_WIDTH = 1,
|
|
|
|
parameter UUID_WIDTH = 0,
|
|
|
|
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
|
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
|
parameter CORE_TAG_OUT_WIDTH= CORE_TAG_IN_WIDTH - NC_ENABLE
|
|
) (
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
// Core request in
|
|
input wire [NUM_REQS-1:0] core_req_valid_in,
|
|
input wire [NUM_REQS-1:0] core_req_rw_in,
|
|
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
|
|
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
|
|
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
|
|
input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in,
|
|
output wire [NUM_REQS-1:0] core_req_ready_in,
|
|
|
|
// Core request out
|
|
output wire [NUM_REQS-1:0] core_req_valid_out,
|
|
output wire [NUM_REQS-1:0] core_req_rw_out,
|
|
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
|
|
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
|
|
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
|
|
output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out,
|
|
input wire [NUM_REQS-1:0] core_req_ready_out,
|
|
|
|
// Core response in
|
|
input wire [NUM_REQS-1:0] core_rsp_valid_in,
|
|
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
|
|
input wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in,
|
|
output wire [NUM_REQS-1:0] core_rsp_ready_in,
|
|
|
|
// Core response out
|
|
output wire [NUM_REQS-1:0] core_rsp_valid_out,
|
|
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
|
|
output wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out,
|
|
input wire [NUM_REQS-1:0] core_rsp_ready_out,
|
|
|
|
// Memory request in
|
|
input wire mem_req_valid_in,
|
|
input wire mem_req_rw_in,
|
|
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
|
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
|
|
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
|
|
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
|
output wire mem_req_ready_in,
|
|
|
|
// Memory request out
|
|
output wire mem_req_valid_out,
|
|
output wire mem_req_rw_out,
|
|
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
|
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
|
|
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
|
|
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
|
input wire mem_req_ready_out,
|
|
|
|
// Memory response in
|
|
input wire mem_rsp_valid_in,
|
|
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
|
|
input wire [MEM_TAG_OUT_WIDTH-1:0] mem_rsp_tag_in,
|
|
output wire mem_rsp_ready_in,
|
|
|
|
// Memory response out
|
|
output wire mem_rsp_valid_out,
|
|
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
|
|
output wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_out,
|
|
input wire mem_rsp_ready_out
|
|
);
|
|
`UNUSED_VAR (clk)
|
|
`UNUSED_VAR (reset)
|
|
|
|
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
|
localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1;
|
|
|
|
localparam WORDS_PER_LINE = MEM_DATA_SIZE / CORE_DATA_SIZE;
|
|
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
|
|
|
localparam CORE_TAG_ID_BITS = CORE_TAG_IN_WIDTH - UUID_WIDTH;
|
|
localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS;
|
|
|
|
localparam MEM_TAG_OUT_NC_WIDTH = MEM_TAG_OUT_WIDTH - 1 + NC_ENABLE;
|
|
|
|
// core request handling
|
|
|
|
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
|
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
|
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
|
wire [NUM_REQS-1:0] core_req_nc_sel;
|
|
wire core_req_nc_valid;
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
if (PASSTHRU != 0) begin
|
|
assign core_req_nc_idxs[i] = 1'b1;
|
|
end else begin
|
|
assign core_req_nc_idxs[i] = core_req_tag_in[i][NC_TAG_BIT];
|
|
end
|
|
end
|
|
|
|
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
|
|
|
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
|
|
|
|
VX_generic_arbiter #(
|
|
.NUM_REQS (NUM_REQS),
|
|
.TYPE (PASSTHRU ? "R" : "P"),
|
|
.LOCK_ENABLE (1)
|
|
) req_arb (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.unlock (core_req_in_fire),
|
|
.requests (core_req_valid_in_nc),
|
|
.grant_index (core_req_nc_idx),
|
|
.grant_onehot (core_req_nc_sel),
|
|
.grant_valid (core_req_nc_valid)
|
|
);
|
|
|
|
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
|
assign core_req_rw_out = core_req_rw_in;
|
|
assign core_req_addr_out = core_req_addr_in;
|
|
assign core_req_byteen_out = core_req_byteen_in;
|
|
assign core_req_data_out = core_req_data_in;
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
VX_bits_remove #(
|
|
.N (CORE_TAG_IN_WIDTH),
|
|
.S (NC_ENABLE),
|
|
.POS (NC_TAG_BIT)
|
|
) core_req_tag_nc_remove (
|
|
.data_in (core_req_tag_in[i]),
|
|
.data_out (core_req_tag_out[i])
|
|
);
|
|
end
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
|
|
: core_req_ready_out[i];
|
|
end
|
|
|
|
// memory request handling
|
|
|
|
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
|
assign mem_req_ready_in = mem_req_ready_out;
|
|
|
|
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
|
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
|
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
|
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
|
wire core_req_rw_in_sel;
|
|
|
|
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
|
end
|
|
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_idx];
|
|
|
|
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_tag_in_sel[CORE_TAG_ID_BITS-1:0];
|
|
|
|
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
|
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[WSEL_BITS +: MEM_ADDR_WIDTH];
|
|
|
|
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
|
|
|
if (WORDS_PER_LINE > 1) begin
|
|
reg [WORDS_PER_LINE-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
|
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
|
|
|
wire [WSEL_BITS-1:0] req_wsel = core_req_addr_in_sel[WSEL_BITS-1:0];
|
|
|
|
always @(*) begin
|
|
mem_req_byteen_in_r = '0;
|
|
mem_req_byteen_in_r[req_wsel] = core_req_byteen_in_sel;
|
|
|
|
mem_req_data_in_r = 'x;
|
|
mem_req_data_in_r[req_wsel] = core_req_data_in_sel;
|
|
end
|
|
|
|
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
|
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
|
|
if (NUM_REQS > 1) begin
|
|
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
|
end else begin
|
|
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
|
end
|
|
end else begin
|
|
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
|
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
|
|
if (NUM_REQS > 1) begin
|
|
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
|
|
end else begin
|
|
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id});
|
|
end
|
|
end
|
|
|
|
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_req_tag_bypass;
|
|
|
|
if (UUID_WIDTH != 0) begin
|
|
assign mem_req_tag_bypass = {core_req_tag_in_sel[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
|
end else begin
|
|
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
|
|
end
|
|
|
|
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_bypass_nc;
|
|
wire [(MEM_TAG_IN_WIDTH + 1)-1:0] mem_req_tag_in_nc;
|
|
|
|
VX_bits_insert #(
|
|
.N (MEM_TAG_OUT_NC_WIDTH),
|
|
.S (NC_ENABLE ? 0 : 1),
|
|
.POS (NC_TAG_BIT)
|
|
) mem_req_tag_bypass_nc_insert (
|
|
.data_in (mem_req_tag_bypass),
|
|
.sel_in (1'b0),
|
|
.data_out (mem_req_tag_bypass_nc)
|
|
);
|
|
|
|
VX_bits_insert #(
|
|
.N (MEM_TAG_IN_WIDTH),
|
|
.POS (NC_TAG_BIT)
|
|
) mem_req_tag_in_nc_insert (
|
|
.data_in (mem_req_tag_in),
|
|
.sel_in (1'b0),
|
|
.data_out (mem_req_tag_in_nc)
|
|
);
|
|
|
|
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : mem_req_tag_bypass_nc;
|
|
|
|
// core response handling
|
|
|
|
wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_in_nc;
|
|
|
|
wire is_mem_rsp_nc;
|
|
if (PASSTHRU != 0) begin
|
|
assign is_mem_rsp_nc = mem_rsp_valid_in;
|
|
end else begin
|
|
assign is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
|
end
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
VX_bits_insert #(
|
|
.N (CORE_TAG_OUT_WIDTH),
|
|
.S (NC_ENABLE),
|
|
.POS (NC_TAG_BIT)
|
|
) core_rsp_tag_in_nc_insert (
|
|
.data_in (core_rsp_tag_in[i]),
|
|
.sel_in ('0),
|
|
.data_out (core_rsp_tag_in_nc[i])
|
|
);
|
|
end
|
|
|
|
wire [MEM_TAG_OUT_NC_WIDTH-1:0] mem_rsp_tag_in_nc;
|
|
|
|
VX_bits_remove #(
|
|
.N (MEM_TAG_OUT_WIDTH),
|
|
.S (NC_ENABLE ? 0 : 1),
|
|
.POS (NC_TAG_BIT)
|
|
) mem_rsp_tag_in_nc_remove (
|
|
.data_in (mem_rsp_tag_in),
|
|
.data_out (mem_rsp_tag_in_nc)
|
|
);
|
|
|
|
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
|
if (NUM_REQS > 1) begin
|
|
assign rsp_idx = mem_rsp_tag_in_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
|
end else begin
|
|
assign rsp_idx = 1'b0;
|
|
end
|
|
|
|
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
|
always @(*) begin
|
|
rsp_nc_valid_r = '0;
|
|
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
|
|
end
|
|
|
|
assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
|
|
assign core_rsp_ready_in = core_rsp_ready_out;
|
|
|
|
if (WORDS_PER_LINE > 1) begin
|
|
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_in_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ?
|
|
core_rsp_data_in[i] : mem_rsp_data_in[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
|
end
|
|
end else begin
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
|
|
end
|
|
end
|
|
|
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
|
if (UUID_WIDTH != 0) begin
|
|
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : {mem_rsp_tag_in_nc[MEM_TAG_OUT_NC_WIDTH-1 -: UUID_WIDTH], mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0]};
|
|
end else begin
|
|
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in_nc[i] : mem_rsp_tag_in_nc[CORE_TAG_ID_BITS-1:0];
|
|
end
|
|
end
|
|
|
|
// memory response handling
|
|
|
|
if (PASSTHRU != 0) begin
|
|
assign mem_rsp_valid_out = 1'b0;
|
|
end else begin
|
|
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
|
|
end
|
|
|
|
assign mem_rsp_data_out = mem_rsp_data_in;
|
|
|
|
VX_bits_remove #(
|
|
.N (MEM_TAG_IN_WIDTH + 1),
|
|
.POS (NC_TAG_BIT)
|
|
) mem_rsp_tag_out_remove (
|
|
.data_in (mem_rsp_tag_in[(MEM_TAG_IN_WIDTH + 1)-1:0]),
|
|
.data_out (mem_rsp_tag_out)
|
|
);
|
|
|
|
assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_idx] && core_rsp_ready_out[rsp_idx]) : mem_rsp_ready_out;
|
|
|
|
endmodule
|