Merge remote-tracking branch 'upstream/master' into vortex2
This commit is contained in:
@@ -37,8 +37,8 @@ jobs:
|
||||
script:
|
||||
- rm -rf $HOME/build32 && cp -r $PWD $HOME/build32
|
||||
- rm -rf $HOME/build64 && cp -r $PWD $HOME/build64
|
||||
- make -C $HOME/build32
|
||||
- XLEN=64 make -C $HOME/build64
|
||||
- make -C $HOME/build32 > /dev/null
|
||||
- XLEN=64 make -C $HOME/build64 > /dev/null
|
||||
- stage: test
|
||||
name: unittest
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||
|
||||
@@ -35,7 +35,7 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
||||
## Build Instructions
|
||||
More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||
### Supported OS Platforms
|
||||
- Ubuntu 18.04
|
||||
- Ubuntu 18.04, 20.04
|
||||
- Centos 7
|
||||
### Toolchain Dependencies
|
||||
- [POCL](http://portablecl.org/)
|
||||
@@ -54,9 +54,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
### Install prebuilt toolchain
|
||||
By default, the toolchain will install to /opt folder.
|
||||
You can install the toolchain to a different directory by overriding TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
||||
|
||||
By default, the toolchain will install to /opt folder which requires sudo access.
|
||||
You can install the toolchain to a different location of your choice by setting TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
||||
$ export TOOLDIR=/opt
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
### Build Vortex sources
|
||||
|
||||
4
RELEASE
4
RELEASE
@@ -1,4 +0,0 @@
|
||||
|
||||
Release Notes!
|
||||
|
||||
* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels.
|
||||
23
TODO
23
TODO
@@ -1,23 +0,0 @@
|
||||
|
||||
|
||||
|
||||
Functionality:
|
||||
1) vx_cl_warpSpawn()
|
||||
-> To be used by pocl->ops->run
|
||||
|
||||
2) newlib Integration (LoadFile(""))
|
||||
-> To be used by the Rhinio benchmarks
|
||||
|
||||
3) POCL OPS Vortex Suite
|
||||
|
||||
Performance:
|
||||
1) Icache doesn't need SEND_MEM_REQUEST Stage
|
||||
-> Blocks are never dirty, so why not evict right away
|
||||
|
||||
2) Branch not taken speculation
|
||||
|
||||
3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator
|
||||
|
||||
|
||||
Vector:
|
||||
1) Cycle accurate simulator (would require Cache Simulator)
|
||||
@@ -22,7 +22,7 @@ rm -f blackbox.*.cache
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
make -C hw/unittest
|
||||
make -C hw/unittest > /dev/null
|
||||
}
|
||||
|
||||
isa()
|
||||
@@ -31,33 +31,36 @@ echo "begin isa tests..."
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
CONFIGS="-DDPI_DISABLE" make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DDPI_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d || true
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
@@ -134,15 +137,16 @@ debug()
|
||||
echo "begin debugging tests..."
|
||||
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
make -C sim/simx clean && make -C sim/simx
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/simx clean && make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright © 2019-2023
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -34,11 +34,11 @@ def monitor(stop):
|
||||
break
|
||||
|
||||
def execute(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE)
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
output = process.stdout.readline()
|
||||
if output:
|
||||
line = output.decode('ascii').rstrip()
|
||||
line = output.decode('utf-8').rstrip()
|
||||
print(">>> " + line)
|
||||
process.stdout.flush()
|
||||
ret = process.poll()
|
||||
|
||||
@@ -136,6 +136,18 @@
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
|
||||
@@ -291,16 +291,11 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||
`ifdef ICACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`else
|
||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||
`ifdef DCACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
|
||||
10
hw/rtl/cache/VX_cache_bypass.sv
vendored
10
hw/rtl/cache/VX_cache_bypass.sv
vendored
@@ -130,20 +130,20 @@ module VX_cache_bypass #(
|
||||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
||||
|
||||
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
|
||||
wire core_req_nc_ready = ~mem_req_valid_in && mem_req_ready_out;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P"),
|
||||
.LOCK_ENABLE (1)
|
||||
) req_arb (
|
||||
) core_req_nc_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (core_req_in_fire),
|
||||
.requests (core_req_valid_in_nc),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid)
|
||||
.grant_valid (core_req_nc_valid),
|
||||
.grant_unlock (core_req_nc_ready)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
||||
@@ -164,7 +164,7 @@ module VX_cache_bypass #(
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
: core_req_ready_out[i];
|
||||
end
|
||||
|
||||
|
||||
@@ -533,8 +533,9 @@ module VX_decode #(
|
||||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE_VCS
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -32,7 +32,6 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
|
||||
wire icache_req_valid;
|
||||
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
||||
@@ -44,8 +43,6 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
|
||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||
|
||||
wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
|
||||
assign req_tag = schedule_if.data.wid;
|
||||
|
||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
@@ -68,9 +65,12 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||
wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_pending_size #(
|
||||
@@ -85,13 +85,16 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
end
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||
`else
|
||||
wire ibuf_ready = 1'b1;
|
||||
`endif
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||
|
||||
@@ -66,8 +66,9 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
.valid_out (ibuffer_if[i].valid),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
);
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||
`endif
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -47,8 +47,6 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg valid_out_r;
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
@@ -60,7 +58,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire ready_out = operands_if[i].ready;
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||
@@ -85,7 +83,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (valid_out_r && ready_out) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||
@@ -177,33 +175,11 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||
state <= STATE_IDLE;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
valid_out_r <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= scoreboard_if[i].valid && data_ready;
|
||||
end else if (ready_out) begin
|
||||
valid_out_r <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= {scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd};
|
||||
end
|
||||
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
@@ -218,8 +194,33 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
|
||||
assign operands_if[i].valid = valid_out_r;
|
||||
assign {operands_if[i].data.uuid,
|
||||
assign stg_valid_in = scoreboard_if[i].valid && data_ready;
|
||||
assign scoreboard_if[i].ready = stg_ready_in && data_ready;
|
||||
|
||||
VX_toggle_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) staging_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.wis,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd
|
||||
}),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (operands_if[i].valid),
|
||||
.data_out ({
|
||||
operands_if[i].data.uuid,
|
||||
operands_if[i].data.wis,
|
||||
operands_if[i].data.tmask,
|
||||
operands_if[i].data.PC,
|
||||
@@ -230,13 +231,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
||||
operands_if[i].data.use_PC,
|
||||
operands_if[i].data.use_imm,
|
||||
operands_if[i].data.imm,
|
||||
operands_if[i].data.rd} = data_out_r;
|
||||
operands_if[i].data.rd
|
||||
}),
|
||||
.ready_out (operands_if[i].ready)
|
||||
);
|
||||
|
||||
assign operands_if[i].data.rs1_data = rs1_data;
|
||||
assign operands_if[i].data.rs2_data = rs2_data;
|
||||
assign operands_if[i].data.rs3_data = rs3_data;
|
||||
|
||||
assign scoreboard_if[i].ready = ~valid_out_r && data_ready;
|
||||
|
||||
// GPR banks
|
||||
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
|
||||
@@ -111,7 +111,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
case (scoreboard_if[i].data.op_type)
|
||||
case (ibuffer_if[i].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
||||
@@ -152,50 +152,46 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||
`endif
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg valid_out_r;
|
||||
wire ready_out;
|
||||
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire operands_ready = ~(| operands_busy);
|
||||
|
||||
wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire deps_ready = (& ready_masks);
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
assign stg_valid_in = ibuffer_if[i].valid && operands_ready;
|
||||
assign ibuffer_if[i].ready = stg_ready_in && operands_ready;
|
||||
|
||||
wire valid_in = ibuffer_if[i].valid && deps_ready;
|
||||
wire ready_in = ~valid_out_r && deps_ready;
|
||||
wire [DATAW-1:0] data_in = ibuffer_if[i].data;
|
||||
|
||||
assign ready_out = scoreboard_if[i].ready;
|
||||
VX_stream_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) staging_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.data_out (scoreboard_if[i].data),
|
||||
.ready_out (scoreboard_if[i].ready)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
if (writeback_fire) begin
|
||||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
valid_out_r <= valid_in;
|
||||
end else if (ready_out) begin
|
||||
if (scoreboard_if[i].data.wb) begin
|
||||
inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1;
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||
inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1;
|
||||
end
|
||||
end
|
||||
`ifdef PERF_ENABLE
|
||||
inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type;
|
||||
if (scoreboard_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= sfu_type;
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||
inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type;
|
||||
if (ibuffer_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
valid_out_r <= 0;
|
||||
end
|
||||
end
|
||||
if (~valid_out_r) begin
|
||||
data_out_r <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ibuffer_if[i].ready = ready_in;
|
||||
assign scoreboard_if[i].valid = valid_out_r;
|
||||
assign scoreboard_if[i].data = data_out_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
@@ -208,7 +204,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
~ready_masks, ibuffer_if[i].data.uuid));
|
||||
operands_busy, ibuffer_if[i].data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
|
||||
@@ -220,7 +216,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
~ready_masks, ibuffer_if[i].data.uuid));
|
||||
operands_busy, ibuffer_if[i].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
|
||||
@@ -36,21 +36,26 @@ interface VX_decode_if ();
|
||||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||
`ifndef L1_ENABLE
|
||||
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||
`endif
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
input ibuf_pop,
|
||||
input ready
|
||||
`ifndef L1_ENABLE
|
||||
, input ibuf_pop
|
||||
`endif
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input data,
|
||||
output ibuf_pop,
|
||||
output ready
|
||||
`ifndef L1_ENABLE
|
||||
, output ibuf_pop
|
||||
`endif
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -26,21 +26,26 @@ interface VX_fetch_if ();
|
||||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
|
||||
`ifndef L1_ENABLE
|
||||
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||
`endif
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
input ibuf_pop,
|
||||
input ready
|
||||
`ifndef L1_ENABLE
|
||||
, input ibuf_pop
|
||||
`endif
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input data,
|
||||
output ibuf_pop,
|
||||
output ready
|
||||
`ifndef L1_ENABLE
|
||||
, output ibuf_pop
|
||||
`endif
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -11,6 +11,14 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// A bypass elastic buffer operates at full bandwidth where pop can happen if the buffer is empty but is going full
|
||||
// It has the following benefits:
|
||||
// + Full-bandwidth throughput
|
||||
// + use only one register for storage
|
||||
// It has the following limitations:
|
||||
// + data_out is not registered
|
||||
// + ready_in and ready_out are coupled
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
@@ -35,29 +43,26 @@ module VX_bypass_buffer #(
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg buffer_valid;
|
||||
reg has_data;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
buffer_valid <= 0;
|
||||
has_data <= 0;
|
||||
end else begin
|
||||
if (ready_out) begin
|
||||
buffer_valid <= 0;
|
||||
end
|
||||
if (valid_in && ~ready_out) begin
|
||||
`ASSERT(!buffer_valid, ("runtime error"));
|
||||
buffer_valid <= 1;
|
||||
has_data <= 0;
|
||||
end else if (~has_data) begin
|
||||
has_data <= valid_in;
|
||||
end
|
||||
end
|
||||
|
||||
if (valid_in && ~ready_out) begin
|
||||
if (~has_data) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ready_out || !buffer_valid;
|
||||
assign data_out = buffer_valid ? buffer : data_in;
|
||||
assign valid_out = valid_in || buffer_valid;
|
||||
assign ready_in = ready_out || ~has_data;
|
||||
assign data_out = has_data ? buffer : data_in;
|
||||
assign valid_out = valid_in || has_data;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -22,14 +22,11 @@ module VX_cyclic_arbiter #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire unlock,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (unlock)
|
||||
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
@@ -51,7 +48,7 @@ module VX_cyclic_arbiter #(
|
||||
end else begin
|
||||
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
||||
grant_index_r <= '0;
|
||||
end else begin
|
||||
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin
|
||||
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -42,34 +42,33 @@ module VX_elastic_buffer #(
|
||||
|
||||
end else if (SIZE == 1) begin
|
||||
|
||||
wire stall = valid_out && ~ready_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_register (
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) pipe_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, data_in}),
|
||||
.data_out ({valid_out, data_out})
|
||||
.valid_in (valid_in),
|
||||
.data_in (data_in),
|
||||
.ready_in (ready_in),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
|
||||
end else if (SIZE == 2) begin
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.FULL_BW (OUT_REG != 2),
|
||||
.OUT_REG (OUT_REG)
|
||||
) skid_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
.data_out (data_out),
|
||||
.ready_in (ready_in),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
@@ -111,10 +110,10 @@ module VX_elastic_buffer #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (~empty),
|
||||
.ready_in (ready_out_t),
|
||||
.data_in (data_out_t),
|
||||
.data_out (data_out),
|
||||
.ready_in (ready_out_t),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
|
||||
@@ -21,17 +21,17 @@ module VX_fair_arbiter #(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
@@ -48,18 +48,14 @@ module VX_fair_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
buffer <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
buffer <= buffer_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) priority_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests_qual),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot),
|
||||
|
||||
@@ -22,21 +22,22 @@ module VX_generic_arbiter #(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (TYPE == "P") begin
|
||||
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE)
|
||||
.NUM_REQS (NUM_REQS)
|
||||
) priority_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
@@ -51,11 +52,11 @@ module VX_generic_arbiter #(
|
||||
) rr_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else if (TYPE == "F") begin
|
||||
@@ -66,11 +67,11 @@ module VX_generic_arbiter #(
|
||||
) fair_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else if (TYPE == "M") begin
|
||||
@@ -81,11 +82,11 @@ module VX_generic_arbiter #(
|
||||
) matrix_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else if (TYPE == "C") begin
|
||||
@@ -96,11 +97,11 @@ module VX_generic_arbiter #(
|
||||
) cyclic_arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (unlock),
|
||||
.requests (requests),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
.grant_onehot (grant_onehot)
|
||||
.grant_onehot (grant_onehot),
|
||||
.grant_unlock (grant_unlock)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
@@ -21,17 +21,17 @@ module VX_matrix_arbiter #(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
@@ -71,18 +71,18 @@ module VX_matrix_arbiter #(
|
||||
end
|
||||
|
||||
if (LOCK_ENABLE == 0) begin
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
assign grant_onehot = grant_unqual;
|
||||
end else begin
|
||||
reg [NUM_REQS-1:0] grant_unqual_prev;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
grant_unqual_prev <= '0;
|
||||
end else if (unlock) begin
|
||||
end else if (grant_unlock) begin
|
||||
grant_unqual_prev <= grant_unqual;
|
||||
end
|
||||
end
|
||||
assign grant_onehot = unlock ? grant_unqual : grant_unqual_prev;
|
||||
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
|
||||
@@ -46,18 +46,20 @@ input wire clk,
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||
wire grant_valid;
|
||||
wire rsp_fire;
|
||||
wire grant_ready;
|
||||
|
||||
VX_priority_arbiter #(
|
||||
.NUM_REQS (NUM_REQS)
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE ("P")
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.unlock (rsp_fire),
|
||||
.requests (rsp_valid_in),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_unlock(grant_ready)
|
||||
);
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_valid_sel;
|
||||
@@ -78,7 +80,7 @@ input wire clk,
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_fire = grant_valid && rsp_ready_unqual;
|
||||
assign grant_ready = rsp_ready_unqual;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
|
||||
|
||||
63
hw/rtl/libs/VX_pipe_buffer.sv
Normal file
63
hw/rtl/libs/VX_pipe_buffer.sv
Normal file
@@ -0,0 +1,63 @@
|
||||
// Copyright 2024 blaise
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// A pipelined elastic buffer operates at full bandwidth where push can happen if the buffer is not empty but is going empty
|
||||
// It has the following benefits:
|
||||
// + Full-bandwidth throughput
|
||||
// + use only one register for storage
|
||||
// + data_out is fully registered
|
||||
// It has the following limitations:
|
||||
// + ready_in and ready_out are coupled
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_pipe_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (PASSTHRU != 0) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign ready_in = ready_out;
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
wire stall = valid_out && ~ready_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW),
|
||||
.RESETW (1)
|
||||
) pipe_register (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, data_in}),
|
||||
.data_out ({valid_out, data_out})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
@@ -16,22 +16,13 @@
|
||||
`TRACING_OFF
|
||||
module VX_priority_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
input wire unlock,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
);
|
||||
`UNUSED_PARAM (LOCK_ENABLE)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
assign grant_index = '0;
|
||||
|
||||
@@ -22,17 +22,17 @@ module VX_rr_arbiter #(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire unlock,
|
||||
input wire [NUM_REQS-1:0] requests,
|
||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||
output wire [NUM_REQS-1:0] grant_onehot,
|
||||
output wire grant_valid
|
||||
output wire grant_valid,
|
||||
input wire grant_unlock
|
||||
);
|
||||
if (NUM_REQS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (unlock)
|
||||
`UNUSED_VAR (grant_unlock)
|
||||
|
||||
assign grant_index = '0;
|
||||
assign grant_onehot = requests;
|
||||
@@ -55,7 +55,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -85,7 +85,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -121,7 +121,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -165,7 +165,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -219,7 +219,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -285,7 +285,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -365,7 +365,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
@@ -399,7 +399,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pointer_reg <= {NUM_REQS{1'b1}};
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
if (|req_masked) begin
|
||||
pointer_reg <= mask_higher_pri_regs;
|
||||
end else if (|requests) begin
|
||||
@@ -443,7 +443,7 @@ module VX_rr_arbiter #(
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= '0;
|
||||
end else if (!LOCK_ENABLE || unlock) begin
|
||||
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||
state <= grant_index_r;
|
||||
end
|
||||
end
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
module VX_skid_buffer #(
|
||||
parameter DATAW = 32,
|
||||
parameter PASSTHRU = 0,
|
||||
parameter FULL_BW = 0,
|
||||
parameter OUT_REG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
@@ -30,8 +31,6 @@ module VX_skid_buffer #(
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`STATIC_ASSERT ((OUT_REG <= 2), ("invalid parameter"))
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
@@ -41,112 +40,36 @@ module VX_skid_buffer #(
|
||||
assign data_out = data_in;
|
||||
assign ready_in = ready_out;
|
||||
|
||||
end else if (OUT_REG == 0) begin
|
||||
end else if (FULL_BW != 0) begin
|
||||
|
||||
reg [1:0][DATAW-1:0] shift_reg;
|
||||
reg valid_out_r, ready_in_r, rd_ptr_r;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = valid_out_r && ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
ready_in_r <= 1;
|
||||
rd_ptr_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
if (!pop) begin
|
||||
ready_in_r <= rd_ptr_r;
|
||||
valid_out_r <= 1;
|
||||
end
|
||||
end else if (pop) begin
|
||||
ready_in_r <= 1;
|
||||
valid_out_r <= rd_ptr_r;
|
||||
end
|
||||
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ready_in_r;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = shift_reg[rd_ptr_r];
|
||||
|
||||
end else if (OUT_REG == 1) begin
|
||||
|
||||
// Full-bandwidth operation: input is consummed every cycle.
|
||||
// However, data_out register has an additional multiplexer.
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg valid_out_r;
|
||||
reg use_buffer;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire stall_out = valid_out_r && ~ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
use_buffer <= 0;
|
||||
end else begin
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end else if (valid_in && valid_out) begin
|
||||
use_buffer <= 1;
|
||||
end
|
||||
if (~stall_out) begin
|
||||
valid_out_r <= valid_in || use_buffer;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (~stall_out) begin
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ~use_buffer;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
VX_stream_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.OUT_REG (OUT_REG)
|
||||
) stream_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (data_in),
|
||||
.ready_in (ready_in),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
// Half-bandwidth operation: input is consummed every other cycle.
|
||||
// However, data_out register has no additional multiplexer.
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg has_data;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
has_data <= 0;
|
||||
end else begin
|
||||
if (~has_data) begin
|
||||
has_data <= valid_in;
|
||||
end else if (ready_out) begin
|
||||
has_data <= 0;
|
||||
end
|
||||
end
|
||||
if (~has_data) begin
|
||||
data_out_r <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ~has_data;
|
||||
assign valid_out = has_data;
|
||||
assign data_out = data_out_r;
|
||||
VX_toggle_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) toggle_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (data_in),
|
||||
.ready_in (ready_in),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ module VX_stream_arb #(
|
||||
parameter NUM_OUTPUTS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter `STRING ARBITER = "P",
|
||||
parameter LOCK_ENABLE = 1,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter OUT_REG = 0 ,
|
||||
parameter NUM_REQS = (NUM_INPUTS + NUM_OUTPUTS - 1) / NUM_OUTPUTS,
|
||||
@@ -57,7 +56,6 @@ module VX_stream_arb #(
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) arb_slice (
|
||||
@@ -102,7 +100,6 @@ module VX_stream_arb #(
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_slice_arb (
|
||||
@@ -129,7 +126,6 @@ module VX_stream_arb #(
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_join_arb (
|
||||
@@ -158,25 +154,25 @@ module VX_stream_arb #(
|
||||
wire arb_valid;
|
||||
wire [NUM_REQS_W-1:0] arb_index;
|
||||
wire [NUM_REQS-1:0] arb_onehot;
|
||||
wire arb_unlock;
|
||||
wire arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in),
|
||||
.unlock (arb_unlock),
|
||||
.grant_valid (arb_valid),
|
||||
.grant_index (arb_index),
|
||||
.grant_onehot (arb_onehot)
|
||||
.grant_onehot (arb_onehot),
|
||||
.grant_unlock (arb_ready)
|
||||
);
|
||||
|
||||
assign valid_in_r = arb_valid;
|
||||
assign data_in_r = data_in[arb_index];
|
||||
assign arb_unlock = | (valid_in_r & ready_in_r);
|
||||
assign arb_ready = ready_in_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign ready_in[i] = ready_in_r & arb_onehot[i];
|
||||
@@ -217,7 +213,6 @@ module VX_stream_arb #(
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) arb_slice (
|
||||
@@ -252,7 +247,6 @@ module VX_stream_arb #(
|
||||
.NUM_OUTPUTS (NUM_BATCHES),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_fork_arb (
|
||||
@@ -280,7 +274,6 @@ module VX_stream_arb #(
|
||||
.NUM_OUTPUTS (BATCH_SIZE),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) fanout_slice_arb (
|
||||
@@ -305,24 +298,24 @@ module VX_stream_arb #(
|
||||
wire [NUM_OUTPUTS-1:0] arb_requests;
|
||||
wire arb_valid;
|
||||
wire [NUM_OUTPUTS-1:0] arb_onehot;
|
||||
wire arb_unlock;
|
||||
wire arb_ready;
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_OUTPUTS),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.LOCK_ENABLE (1),
|
||||
.TYPE (ARBITER)
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (arb_requests),
|
||||
.unlock (arb_unlock),
|
||||
.grant_valid (arb_valid),
|
||||
`UNUSED_PIN (grant_index),
|
||||
.grant_onehot (arb_onehot)
|
||||
.grant_onehot (arb_onehot),
|
||||
.grant_unlock (arb_ready)
|
||||
);
|
||||
|
||||
assign arb_requests = ready_in_r;
|
||||
assign arb_unlock = | (valid_in & ready_in);
|
||||
assign arb_ready = valid_in[0];
|
||||
assign ready_in = arb_valid;
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
|
||||
128
hw/rtl/libs/VX_stream_buffer.sv
Normal file
128
hw/rtl/libs/VX_stream_buffer.sv
Normal file
@@ -0,0 +1,128 @@
|
||||
// Copyright 2024 blaise
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously
|
||||
// It has the following benefits:
|
||||
// + full-bandwidth throughput
|
||||
// + ready_in and ready_out are decoupled
|
||||
// + data_out can be fully registered
|
||||
// It has the following limitations:
|
||||
// - requires two registers for storage
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_stream_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (PASSTHRU != 0) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign ready_in = ready_out;
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
if (OUT_REG != 0) begin
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg valid_out_r;
|
||||
reg use_buffer;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire stall_out = valid_out_r && ~ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
use_buffer <= 0;
|
||||
end else begin
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end else if (valid_in && valid_out) begin
|
||||
use_buffer <= 1;
|
||||
end
|
||||
if (~stall_out) begin
|
||||
valid_out_r <= valid_in || use_buffer;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (~stall_out) begin
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ~use_buffer;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
|
||||
end else begin
|
||||
|
||||
reg [1:0][DATAW-1:0] shift_reg;
|
||||
reg valid_out_r, ready_in_r, rd_ptr_r;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = valid_out_r && ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
ready_in_r <= 1;
|
||||
rd_ptr_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
if (!pop) begin
|
||||
ready_in_r <= rd_ptr_r;
|
||||
valid_out_r <= 1;
|
||||
end
|
||||
end else if (pop) begin
|
||||
ready_in_r <= 1;
|
||||
valid_out_r <= rd_ptr_r;
|
||||
end
|
||||
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ready_in_r;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = shift_reg[rd_ptr_r];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
||||
@@ -21,7 +21,6 @@ module VX_stream_xbar #(
|
||||
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
||||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||
parameter ARBITER = "P",
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter OUT_REG = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
|
||||
@@ -66,7 +65,6 @@ module VX_stream_xbar #(
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) xbar_arb (
|
||||
@@ -95,7 +93,6 @@ module VX_stream_xbar #(
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.LOCK_ENABLE (LOCK_ENABLE),
|
||||
.MAX_FANOUT (MAX_FANOUT),
|
||||
.OUT_REG (OUT_REG)
|
||||
) xbar_arb (
|
||||
|
||||
70
hw/rtl/libs/VX_toggle_buffer.sv
Normal file
70
hw/rtl/libs/VX_toggle_buffer.sv
Normal file
@@ -0,0 +1,70 @@
|
||||
// Copyright 2024 blaise
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// A toggle elastic buffer operates at half-bandwidth where push can only trigger after pop
|
||||
// It has the following benefits:
|
||||
// + use only one register for storage
|
||||
// + ready_in and ready_out are decoupled
|
||||
// + data_out is fully registered
|
||||
// It has the following limitations:
|
||||
// - Half-bandwidth throughput
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_toggle_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (PASSTHRU != 0) begin
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign ready_in = ready_out;
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
end else begin
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg has_data;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
has_data <= 0;
|
||||
end else begin
|
||||
if (~has_data) begin
|
||||
has_data <= valid_in;
|
||||
end else if (ready_out) begin
|
||||
has_data <= 0;
|
||||
end
|
||||
end
|
||||
if (~has_data) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ~has_data;
|
||||
assign valid_out = has_data;
|
||||
assign data_out = buffer;
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
@@ -64,8 +64,8 @@ Cluster::Cluster(const SimContext& ctx,
|
||||
!L2_ENABLED,
|
||||
log2ceil(L2_CACHE_SIZE),// C
|
||||
log2ceil(MEM_BLOCK_SIZE),// L
|
||||
log2ceil(L2_NUM_WAYS), // W
|
||||
0, // A
|
||||
log2ceil(L1_LINE_SIZE), // W
|
||||
log2ceil(L2_NUM_WAYS), // A
|
||||
log2ceil(L2_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
|
||||
@@ -210,7 +210,7 @@ void Core::schedule() {
|
||||
void Core::fetch() {
|
||||
perf_stats_.ifetch_latency += pending_ifetches_;
|
||||
|
||||
// handle icache reponse
|
||||
// handle icache response
|
||||
auto& icache_rsp_port = icache_rsp_ports.at(0);
|
||||
if (!icache_rsp_port.empty()){
|
||||
auto& mem_rsp = icache_rsp_port.front();
|
||||
|
||||
@@ -207,7 +207,7 @@ void LsuUnit::tick() {
|
||||
for (uint32_t t = 1; t < num_lanes_; ++t) {
|
||||
if (!trace->tmask.test(t0 + t))
|
||||
continue;
|
||||
auto mem_addr = trace_data->mem_addrs.at(t).addr & ~addr_mask;
|
||||
auto mem_addr = trace_data->mem_addrs.at(t + t0).addr & ~addr_mask;
|
||||
matches += (addr0 == mem_addr);
|
||||
}
|
||||
#ifdef LSU_DUP_ENABLE
|
||||
@@ -229,7 +229,7 @@ void LsuUnit::tick() {
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->smem_demuxs_.at(t)->ReqIn;
|
||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
||||
auto mem_addr = trace_data->mem_addrs.at(t + t0);
|
||||
auto type = core_->get_addr_type(mem_addr.addr);
|
||||
|
||||
MemReq mem_req;
|
||||
|
||||
@@ -339,7 +339,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
// RV64I: SLLI
|
||||
// RV32I: SLLI
|
||||
rddata[t].i = rsdata[t][0].i << immsrc;
|
||||
break;
|
||||
}
|
||||
@@ -360,11 +360,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
||||
}
|
||||
case 5: {
|
||||
if (func7) {
|
||||
// RV64I: SRAI
|
||||
// RV32I: SRAI
|
||||
Word result = rsdata[t][0].i >> immsrc;
|
||||
rddata[t].i = result;
|
||||
} else {
|
||||
// RV64I: SRLI
|
||||
// RV32I: SRLI
|
||||
Word result = rsdata[t][0].u >> immsrc;
|
||||
rddata[t].i = result;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ static void show_usage() {
|
||||
uint32_t num_threads = NUM_THREADS;
|
||||
uint32_t num_warps = NUM_WARPS;
|
||||
uint32_t num_cores = NUM_CORES;
|
||||
bool showStats = false;;
|
||||
bool showStats = false;
|
||||
bool riscv_test = false;
|
||||
const char* program = nullptr;
|
||||
|
||||
|
||||
@@ -33,8 +33,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
||||
!L3_ENABLED,
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // L
|
||||
log2ceil(L3_NUM_WAYS), // W
|
||||
0, // A
|
||||
log2ceil(L2_LINE_SIZE), // W
|
||||
log2ceil(L3_NUM_WAYS), // A
|
||||
log2ceil(L3_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
@@ -58,7 +58,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
||||
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
||||
}
|
||||
|
||||
// set up memory perf recording
|
||||
// set up memory profiling
|
||||
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
|
||||
@@ -44,7 +44,7 @@ Socket::Socket(const SimContext& ctx,
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
1, // number of inputs
|
||||
true, // write-through
|
||||
false, // write-through
|
||||
false, // write response
|
||||
(uint8_t)arch.num_warps(), // mshr
|
||||
2, // pipeline latency
|
||||
|
||||
Reference in New Issue
Block a user