Merge remote-tracking branch 'upstream/master' into vortex2
This commit is contained in:
@@ -37,8 +37,8 @@ jobs:
|
|||||||
script:
|
script:
|
||||||
- rm -rf $HOME/build32 && cp -r $PWD $HOME/build32
|
- rm -rf $HOME/build32 && cp -r $PWD $HOME/build32
|
||||||
- rm -rf $HOME/build64 && cp -r $PWD $HOME/build64
|
- rm -rf $HOME/build64 && cp -r $PWD $HOME/build64
|
||||||
- make -C $HOME/build32
|
- make -C $HOME/build32 > /dev/null
|
||||||
- XLEN=64 make -C $HOME/build64
|
- XLEN=64 make -C $HOME/build64 > /dev/null
|
||||||
- stage: test
|
- stage: test
|
||||||
name: unittest
|
name: unittest
|
||||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest
|
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ Vortex is a full-stack open-source RISC-V GPGPU.
|
|||||||
## Build Instructions
|
## Build Instructions
|
||||||
More detailed build instructions can be found [here](docs/install_vortex.md).
|
More detailed build instructions can be found [here](docs/install_vortex.md).
|
||||||
### Supported OS Platforms
|
### Supported OS Platforms
|
||||||
- Ubuntu 18.04
|
- Ubuntu 18.04, 20.04
|
||||||
- Centos 7
|
- Centos 7
|
||||||
### Toolchain Dependencies
|
### Toolchain Dependencies
|
||||||
- [POCL](http://portablecl.org/)
|
- [POCL](http://portablecl.org/)
|
||||||
@@ -54,9 +54,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||||||
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||||
$ cd Vortex
|
$ cd Vortex
|
||||||
### Install prebuilt toolchain
|
### Install prebuilt toolchain
|
||||||
By default, the toolchain will install to /opt folder.
|
By default, the toolchain will install to /opt folder which requires sudo access.
|
||||||
You can install the toolchain to a different directory by overriding TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
You can install the toolchain to a different location of your choice by setting TOOLDIR (e.g. export TOOLDIR=$HOME/tools).
|
||||||
|
$ export TOOLDIR=/opt
|
||||||
$ ./ci/toolchain_install.sh --all
|
$ ./ci/toolchain_install.sh --all
|
||||||
$ source ./ci/toolchain_env.sh
|
$ source ./ci/toolchain_env.sh
|
||||||
### Build Vortex sources
|
### Build Vortex sources
|
||||||
|
|||||||
4
RELEASE
4
RELEASE
@@ -1,4 +0,0 @@
|
|||||||
|
|
||||||
Release Notes!
|
|
||||||
|
|
||||||
* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels.
|
|
||||||
23
TODO
23
TODO
@@ -1,23 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
Functionality:
|
|
||||||
1) vx_cl_warpSpawn()
|
|
||||||
-> To be used by pocl->ops->run
|
|
||||||
|
|
||||||
2) newlib Integration (LoadFile(""))
|
|
||||||
-> To be used by the Rhinio benchmarks
|
|
||||||
|
|
||||||
3) POCL OPS Vortex Suite
|
|
||||||
|
|
||||||
Performance:
|
|
||||||
1) Icache doesn't need SEND_MEM_REQUEST Stage
|
|
||||||
-> Blocks are never dirty, so why not evict right away
|
|
||||||
|
|
||||||
2) Branch not taken speculation
|
|
||||||
|
|
||||||
3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator
|
|
||||||
|
|
||||||
|
|
||||||
Vector:
|
|
||||||
1) Cycle accurate simulator (would require Cache Simulator)
|
|
||||||
@@ -22,7 +22,7 @@ rm -f blackbox.*.cache
|
|||||||
unittest()
|
unittest()
|
||||||
{
|
{
|
||||||
make -C tests/unittest run
|
make -C tests/unittest run
|
||||||
make -C hw/unittest
|
make -C hw/unittest > /dev/null
|
||||||
}
|
}
|
||||||
|
|
||||||
isa()
|
isa()
|
||||||
@@ -31,33 +31,36 @@ echo "begin isa tests..."
|
|||||||
|
|
||||||
make -C tests/riscv/isa run-simx
|
make -C tests/riscv/isa run-simx
|
||||||
make -C tests/riscv/isa run-rtlsim
|
make -C tests/riscv/isa run-rtlsim
|
||||||
CONFIGS="-DDPI_DISABLE" make -C tests/riscv/isa run-rtlsim
|
|
||||||
|
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DDPI_DISABLE" make -C sim/rtlsim > /dev/null
|
||||||
|
make -C tests/riscv/isa run-rtlsim
|
||||||
|
|
||||||
|
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-32f
|
make -C tests/riscv/isa run-rtlsim-32f
|
||||||
|
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-32f
|
make -C tests/riscv/isa run-rtlsim-32f
|
||||||
|
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-32f
|
make -C tests/riscv/isa run-rtlsim-32f
|
||||||
|
|
||||||
if [ "$XLEN" == "64" ]
|
if [ "$XLEN" == "64" ]
|
||||||
then
|
then
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-64f
|
make -C tests/riscv/isa run-rtlsim-64f
|
||||||
|
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-64d || true
|
make -C tests/riscv/isa run-rtlsim-64d || true
|
||||||
|
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-64f
|
make -C tests/riscv/isa run-rtlsim-64f
|
||||||
|
|
||||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim
|
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-rtlsim-64fx
|
make -C tests/riscv/isa run-rtlsim-64fx
|
||||||
fi
|
fi
|
||||||
|
|
||||||
make -C sim/rtlsim clean && make -C sim/rtlsim
|
# restore default prebuilt configuration
|
||||||
|
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||||
|
|
||||||
echo "isa tests done!"
|
echo "isa tests done!"
|
||||||
}
|
}
|
||||||
@@ -134,15 +137,16 @@ debug()
|
|||||||
echo "begin debugging tests..."
|
echo "begin debugging tests..."
|
||||||
|
|
||||||
# test CSV trace generation
|
# test CSV trace generation
|
||||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx
|
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim
|
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||||
diff trace_rtlsim.csv trace_simx.csv
|
diff trace_rtlsim.csv trace_simx.csv
|
||||||
make -C sim/simx clean && make -C sim/simx
|
# restore default prebuilt configuration
|
||||||
make -C sim/rtlsim clean && make -C sim/rtlsim
|
make -C sim/simx clean && make -C sim/simx > /dev/null
|
||||||
|
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||||
|
|
||||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
||||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --perf=1 --app=demo --args="-n1"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Copyright © 2019-2023
|
# Copyright 2019-2023
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
# You may obtain a copy of the License at
|
# You may obtain a copy of the License at
|
||||||
@@ -34,11 +34,11 @@ def monitor(stop):
|
|||||||
break
|
break
|
||||||
|
|
||||||
def execute(command):
|
def execute(command):
|
||||||
process = subprocess.Popen(command, stdout=subprocess.PIPE)
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
while True:
|
while True:
|
||||||
output = process.stdout.readline()
|
output = process.stdout.readline()
|
||||||
if output:
|
if output:
|
||||||
line = output.decode('ascii').rstrip()
|
line = output.decode('utf-8').rstrip()
|
||||||
print(">>> " + line)
|
print(">>> " + line)
|
||||||
process.stdout.flush()
|
process.stdout.flush()
|
||||||
ret = process.poll()
|
ret = process.poll()
|
||||||
|
|||||||
@@ -136,6 +136,18 @@
|
|||||||
`endif
|
`endif
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
`ifdef L2_ENABLE
|
||||||
|
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
||||||
|
`else
|
||||||
|
`define L2_LINE_SIZE `L1_LINE_SIZE
|
||||||
|
`endif
|
||||||
|
|
||||||
|
`ifdef L3_ENABLE
|
||||||
|
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||||
|
`else
|
||||||
|
`define L3_LINE_SIZE `L2_LINE_SIZE
|
||||||
|
`endif
|
||||||
|
|
||||||
`ifdef XLEN_64
|
`ifdef XLEN_64
|
||||||
|
|
||||||
`ifndef STARTUP_ADDR
|
`ifndef STARTUP_ADDR
|
||||||
|
|||||||
@@ -291,16 +291,11 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`ifdef L2_ENABLE
|
`ifdef ICACHE_ENABLE
|
||||||
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
|
`define L1_ENABLE
|
||||||
`else
|
|
||||||
`define L2_LINE_SIZE `L1_LINE_SIZE
|
|
||||||
`endif
|
`endif
|
||||||
|
`ifdef DCACHE_ENABLE
|
||||||
`ifdef L3_ENABLE
|
`define L1_ENABLE
|
||||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
|
||||||
`else
|
|
||||||
`define L3_LINE_SIZE `L2_LINE_SIZE
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||||
|
|||||||
12
hw/rtl/cache/VX_cache_bypass.sv
vendored
12
hw/rtl/cache/VX_cache_bypass.sv
vendored
@@ -130,20 +130,20 @@ module VX_cache_bypass #(
|
|||||||
|
|
||||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_idxs;
|
||||||
|
|
||||||
wire core_req_in_fire = | (core_req_valid_in & core_req_ready_in);
|
wire core_req_nc_ready = ~mem_req_valid_in && mem_req_ready_out;
|
||||||
|
|
||||||
VX_generic_arbiter #(
|
VX_generic_arbiter #(
|
||||||
.NUM_REQS (NUM_REQS),
|
.NUM_REQS (NUM_REQS),
|
||||||
.TYPE (PASSTHRU ? "R" : "P"),
|
.TYPE (PASSTHRU ? "R" : "P"),
|
||||||
.LOCK_ENABLE (1)
|
.LOCK_ENABLE (1)
|
||||||
) req_arb (
|
) core_req_nc_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.unlock (core_req_in_fire),
|
|
||||||
.requests (core_req_valid_in_nc),
|
.requests (core_req_valid_in_nc),
|
||||||
.grant_index (core_req_nc_idx),
|
.grant_index (core_req_nc_idx),
|
||||||
.grant_onehot (core_req_nc_sel),
|
.grant_onehot (core_req_nc_sel),
|
||||||
.grant_valid (core_req_nc_valid)
|
.grant_valid (core_req_nc_valid),
|
||||||
|
.grant_unlock (core_req_nc_ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_idxs;
|
||||||
@@ -164,7 +164,7 @@ module VX_cache_bypass #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i])
|
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||||
: core_req_ready_out[i];
|
: core_req_ready_out[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -533,8 +533,9 @@ module VX_decode #(
|
|||||||
assign decode_sched_if.valid = fetch_fire;
|
assign decode_sched_if.valid = fetch_fire;
|
||||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||||
assign decode_sched_if.is_wstall = is_wstall;
|
assign decode_sched_if.is_wstall = is_wstall;
|
||||||
|
`ifndef L1_ENABLE
|
||||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||||
|
`endif
|
||||||
|
|
||||||
`ifdef DBG_TRACE_CORE_PIPELINE_VCS
|
`ifdef DBG_TRACE_CORE_PIPELINE_VCS
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||||||
);
|
);
|
||||||
`UNUSED_PARAM (CORE_ID)
|
`UNUSED_PARAM (CORE_ID)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
|
||||||
|
|
||||||
wire icache_req_valid;
|
wire icache_req_valid;
|
||||||
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
|
||||||
@@ -44,8 +43,6 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||||
|
|
||||||
wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
|
||||||
|
|
||||||
assign req_tag = schedule_if.data.wid;
|
assign req_tag = schedule_if.data.wid;
|
||||||
|
|
||||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||||
@@ -68,9 +65,12 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||||||
.rdata ({rsp_PC, rsp_tmask})
|
.rdata ({rsp_PC, rsp_tmask})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
`ifndef L1_ENABLE
|
||||||
// Ensure that the ibuffer doesn't fill up.
|
// Ensure that the ibuffer doesn't fill up.
|
||||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||||
|
wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||||
|
|
||||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||||
VX_pending_size #(
|
VX_pending_size #(
|
||||||
@@ -85,13 +85,16 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||||||
`UNUSED_PIN (empty)
|
`UNUSED_PIN (empty)
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||||
|
`else
|
||||||
|
wire ibuf_ready = 1'b1;
|
||||||
|
`endif
|
||||||
|
|
||||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||||
|
|
||||||
// Icache Request
|
// Icache Request
|
||||||
|
|
||||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
|
||||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||||
|
|||||||
@@ -66,8 +66,9 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||||||
.valid_out (ibuffer_if[i].valid),
|
.valid_out (ibuffer_if[i].valid),
|
||||||
.ready_out(ibuffer_if[i].ready)
|
.ready_out(ibuffer_if[i].ready)
|
||||||
);
|
);
|
||||||
|
`ifndef L1_ENABLE
|
||||||
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
|
||||||
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -47,8 +47,6 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||||
|
|
||||||
reg valid_out_r;
|
|
||||||
reg [DATAW-1:0] data_out_r;
|
|
||||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||||
@@ -60,7 +58,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
reg rs3_ready, rs3_ready_n;
|
reg rs3_ready, rs3_ready_n;
|
||||||
reg data_ready, data_ready_n;
|
reg data_ready, data_ready_n;
|
||||||
|
|
||||||
wire ready_out = operands_if[i].ready;
|
wire stg_valid_in, stg_ready_in;
|
||||||
|
|
||||||
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
wire is_rs1_zero = (scoreboard_if[i].data.rs1 == 0);
|
||||||
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
wire is_rs2_zero = (scoreboard_if[i].data.rs2 == 0);
|
||||||
@@ -85,7 +83,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
case (state)
|
case (state)
|
||||||
STATE_IDLE: begin
|
STATE_IDLE: begin
|
||||||
if (valid_out_r && ready_out) begin
|
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||||
data_ready_n = 0;
|
data_ready_n = 0;
|
||||||
end
|
end
|
||||||
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
if (scoreboard_if[i].valid && data_ready_n == 0) begin
|
||||||
@@ -173,37 +171,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= STATE_IDLE;
|
state <= STATE_IDLE;
|
||||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||||
data_ready <= 0;
|
data_ready <= 0;
|
||||||
valid_out_r <= 0;
|
|
||||||
end else begin
|
end else begin
|
||||||
state <= state_n;
|
state <= state_n;
|
||||||
cache_eop <= cache_eop_n;
|
cache_eop <= cache_eop_n;
|
||||||
data_ready <= data_ready_n;
|
data_ready <= data_ready_n;
|
||||||
if (~valid_out_r) begin
|
|
||||||
valid_out_r <= scoreboard_if[i].valid && data_ready;
|
|
||||||
end else if (ready_out) begin
|
|
||||||
valid_out_r <= 0;
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if (~valid_out_r) begin
|
|
||||||
data_out_r <= {scoreboard_if[i].data.uuid,
|
|
||||||
scoreboard_if[i].data.wis,
|
|
||||||
scoreboard_if[i].data.tmask,
|
|
||||||
scoreboard_if[i].data.PC,
|
|
||||||
scoreboard_if[i].data.wb,
|
|
||||||
scoreboard_if[i].data.ex_type,
|
|
||||||
scoreboard_if[i].data.op_type,
|
|
||||||
scoreboard_if[i].data.op_mod,
|
|
||||||
scoreboard_if[i].data.use_PC,
|
|
||||||
scoreboard_if[i].data.use_imm,
|
|
||||||
scoreboard_if[i].data.imm,
|
|
||||||
scoreboard_if[i].data.rd};
|
|
||||||
end
|
|
||||||
|
|
||||||
gpr_rd_rid <= gpr_rd_rid_n;
|
gpr_rd_rid <= gpr_rd_rid_n;
|
||||||
gpr_rd_wis <= gpr_rd_wis_n;
|
gpr_rd_wis <= gpr_rd_wis_n;
|
||||||
rs2_ready <= rs2_ready_n;
|
rs2_ready <= rs2_ready_n;
|
||||||
@@ -216,10 +192,35 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
cache_data <= cache_data_n;
|
cache_data <= cache_data_n;
|
||||||
cache_reg <= cache_reg_n;
|
cache_reg <= cache_reg_n;
|
||||||
cache_tmask <= cache_tmask_n;
|
cache_tmask <= cache_tmask_n;
|
||||||
end
|
end
|
||||||
|
|
||||||
assign operands_if[i].valid = valid_out_r;
|
assign stg_valid_in = scoreboard_if[i].valid && data_ready;
|
||||||
assign {operands_if[i].data.uuid,
|
assign scoreboard_if[i].ready = stg_ready_in && data_ready;
|
||||||
|
|
||||||
|
VX_toggle_buffer #(
|
||||||
|
.DATAW (DATAW)
|
||||||
|
) staging_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (stg_valid_in),
|
||||||
|
.data_in ({
|
||||||
|
scoreboard_if[i].data.uuid,
|
||||||
|
scoreboard_if[i].data.wis,
|
||||||
|
scoreboard_if[i].data.tmask,
|
||||||
|
scoreboard_if[i].data.PC,
|
||||||
|
scoreboard_if[i].data.wb,
|
||||||
|
scoreboard_if[i].data.ex_type,
|
||||||
|
scoreboard_if[i].data.op_type,
|
||||||
|
scoreboard_if[i].data.op_mod,
|
||||||
|
scoreboard_if[i].data.use_PC,
|
||||||
|
scoreboard_if[i].data.use_imm,
|
||||||
|
scoreboard_if[i].data.imm,
|
||||||
|
scoreboard_if[i].data.rd
|
||||||
|
}),
|
||||||
|
.ready_in (stg_ready_in),
|
||||||
|
.valid_out (operands_if[i].valid),
|
||||||
|
.data_out ({
|
||||||
|
operands_if[i].data.uuid,
|
||||||
operands_if[i].data.wis,
|
operands_if[i].data.wis,
|
||||||
operands_if[i].data.tmask,
|
operands_if[i].data.tmask,
|
||||||
operands_if[i].data.PC,
|
operands_if[i].data.PC,
|
||||||
@@ -230,13 +231,15 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||||||
operands_if[i].data.use_PC,
|
operands_if[i].data.use_PC,
|
||||||
operands_if[i].data.use_imm,
|
operands_if[i].data.use_imm,
|
||||||
operands_if[i].data.imm,
|
operands_if[i].data.imm,
|
||||||
operands_if[i].data.rd} = data_out_r;
|
operands_if[i].data.rd
|
||||||
|
}),
|
||||||
|
.ready_out (operands_if[i].ready)
|
||||||
|
);
|
||||||
|
|
||||||
assign operands_if[i].data.rs1_data = rs1_data;
|
assign operands_if[i].data.rs1_data = rs1_data;
|
||||||
assign operands_if[i].data.rs2_data = rs2_data;
|
assign operands_if[i].data.rs2_data = rs2_data;
|
||||||
assign operands_if[i].data.rs3_data = rs3_data;
|
assign operands_if[i].data.rs3_data = rs3_data;
|
||||||
|
|
||||||
assign scoreboard_if[i].ready = ~valid_out_r && data_ready;
|
|
||||||
|
|
||||||
// GPR banks
|
// GPR banks
|
||||||
|
|
||||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (scoreboard_if[i].data.op_type)
|
case (ibuffer_if[i].data.op_type)
|
||||||
`INST_SFU_CSRRW,
|
`INST_SFU_CSRRW,
|
||||||
`INST_SFU_CSRRS,
|
`INST_SFU_CSRRS,
|
||||||
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
`INST_SFU_CSRRC: sfu_type = `SFU_CSRS;
|
||||||
@@ -152,51 +152,47 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
reg [DATAW-1:0] data_out_r;
|
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||||
reg valid_out_r;
|
wire operands_ready = ~(| operands_busy);
|
||||||
wire ready_out;
|
|
||||||
|
wire stg_valid_in, stg_ready_in;
|
||||||
|
assign stg_valid_in = ibuffer_if[i].valid && operands_ready;
|
||||||
|
assign ibuffer_if[i].ready = stg_ready_in && operands_ready;
|
||||||
|
|
||||||
wire [3:0] ready_masks = ~{inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
VX_stream_buffer #(
|
||||||
wire deps_ready = (& ready_masks);
|
.DATAW (DATAW)
|
||||||
|
) staging_buffer (
|
||||||
wire valid_in = ibuffer_if[i].valid && deps_ready;
|
.clk (clk),
|
||||||
wire ready_in = ~valid_out_r && deps_ready;
|
.reset (reset),
|
||||||
wire [DATAW-1:0] data_in = ibuffer_if[i].data;
|
.valid_in (stg_valid_in),
|
||||||
|
.data_in (ibuffer_if[i].data),
|
||||||
assign ready_out = scoreboard_if[i].ready;
|
.ready_in (stg_ready_in),
|
||||||
|
.valid_out (scoreboard_if[i].valid),
|
||||||
|
.data_out (scoreboard_if[i].data),
|
||||||
|
.ready_out (scoreboard_if[i].ready)
|
||||||
|
);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
valid_out_r <= 0;
|
|
||||||
inuse_regs <= '0;
|
inuse_regs <= '0;
|
||||||
end else begin
|
end else begin
|
||||||
if (writeback_fire) begin
|
if (writeback_fire) begin
|
||||||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||||
end
|
end
|
||||||
if (~valid_out_r) begin
|
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||||
valid_out_r <= valid_in;
|
inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1;
|
||||||
end else if (ready_out) begin
|
|
||||||
if (scoreboard_if[i].data.wb) begin
|
|
||||||
inuse_regs[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= 1;
|
|
||||||
`ifdef PERF_ENABLE
|
|
||||||
inuse_units[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= scoreboard_if[i].data.ex_type;
|
|
||||||
if (scoreboard_if[i].data.ex_type == `EX_SFU) begin
|
|
||||||
inuse_sfu[scoreboard_if[i].data.wis][scoreboard_if[i].data.rd] <= sfu_type;
|
|
||||||
end
|
|
||||||
`endif
|
|
||||||
end
|
|
||||||
valid_out_r <= 0;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (~valid_out_r) begin
|
`ifdef PERF_ENABLE
|
||||||
data_out_r <= data_in;
|
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||||
|
inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type;
|
||||||
|
if (ibuffer_if[i].data.ex_type == `EX_SFU) begin
|
||||||
|
inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
assign ibuffer_if[i].ready = ready_in;
|
|
||||||
assign scoreboard_if[i].valid = valid_out_r;
|
|
||||||
assign scoreboard_if[i].data = data_out_r;
|
|
||||||
|
|
||||||
`ifdef SIMULATION
|
`ifdef SIMULATION
|
||||||
reg [31:0] timeout_ctr;
|
reg [31:0] timeout_ctr;
|
||||||
|
|
||||||
@@ -208,7 +204,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||||
~ready_masks, ibuffer_if[i].data.uuid));
|
operands_busy, ibuffer_if[i].data.uuid));
|
||||||
`endif
|
`endif
|
||||||
timeout_ctr <= timeout_ctr + 1;
|
timeout_ctr <= timeout_ctr + 1;
|
||||||
end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
|
end else if (ibuffer_if[i].valid && ibuffer_if[i].ready) begin
|
||||||
@@ -220,7 +216,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||||
~ready_masks, ibuffer_if[i].data.uuid));
|
operands_busy, ibuffer_if[i].data.uuid));
|
||||||
|
|
||||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
||||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||||
|
|||||||
@@ -36,21 +36,26 @@ interface VX_decode_if ();
|
|||||||
logic valid;
|
logic valid;
|
||||||
data_t data;
|
data_t data;
|
||||||
logic ready;
|
logic ready;
|
||||||
|
`ifndef L1_ENABLE
|
||||||
wire [`ISSUE_WIDTH-1:0] ibuf_pop;
|
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||||
|
`endif
|
||||||
|
|
||||||
modport master (
|
modport master (
|
||||||
output valid,
|
output valid,
|
||||||
output data,
|
output data,
|
||||||
input ibuf_pop,
|
|
||||||
input ready
|
input ready
|
||||||
|
`ifndef L1_ENABLE
|
||||||
|
, input ibuf_pop
|
||||||
|
`endif
|
||||||
);
|
);
|
||||||
|
|
||||||
modport slave (
|
modport slave (
|
||||||
input valid,
|
input valid,
|
||||||
input data,
|
input data,
|
||||||
output ibuf_pop,
|
|
||||||
output ready
|
output ready
|
||||||
|
`ifndef L1_ENABLE
|
||||||
|
, output ibuf_pop
|
||||||
|
`endif
|
||||||
);
|
);
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -26,21 +26,26 @@ interface VX_fetch_if ();
|
|||||||
logic valid;
|
logic valid;
|
||||||
data_t data;
|
data_t data;
|
||||||
logic ready;
|
logic ready;
|
||||||
|
`ifndef L1_ENABLE
|
||||||
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||||
|
`endif
|
||||||
|
|
||||||
modport master (
|
modport master (
|
||||||
output valid,
|
output valid,
|
||||||
output data,
|
output data,
|
||||||
input ibuf_pop,
|
|
||||||
input ready
|
input ready
|
||||||
|
`ifndef L1_ENABLE
|
||||||
|
, input ibuf_pop
|
||||||
|
`endif
|
||||||
);
|
);
|
||||||
|
|
||||||
modport slave (
|
modport slave (
|
||||||
input valid,
|
input valid,
|
||||||
input data,
|
input data,
|
||||||
output ibuf_pop,
|
|
||||||
output ready
|
output ready
|
||||||
|
`ifndef L1_ENABLE
|
||||||
|
, output ibuf_pop
|
||||||
|
`endif
|
||||||
);
|
);
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -11,6 +11,14 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
// A bypass elastic buffer operates at full bandwidth where pop can happen if the buffer is empty but is going full
|
||||||
|
// It has the following benefits:
|
||||||
|
// + Full-bandwidth throughput
|
||||||
|
// + use only one register for storage
|
||||||
|
// It has the following limitations:
|
||||||
|
// + data_out is not registered
|
||||||
|
// + ready_in and ready_out are coupled
|
||||||
|
|
||||||
`include "VX_platform.vh"
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
`TRACING_OFF
|
`TRACING_OFF
|
||||||
@@ -35,30 +43,27 @@ module VX_bypass_buffer #(
|
|||||||
assign data_out = data_in;
|
assign data_out = data_in;
|
||||||
end else begin
|
end else begin
|
||||||
reg [DATAW-1:0] buffer;
|
reg [DATAW-1:0] buffer;
|
||||||
reg buffer_valid;
|
reg has_data;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
buffer_valid <= 0;
|
has_data <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (ready_out) begin
|
if (ready_out) begin
|
||||||
buffer_valid <= 0;
|
has_data <= 0;
|
||||||
end
|
end else if (~has_data) begin
|
||||||
if (valid_in && ~ready_out) begin
|
has_data <= valid_in;
|
||||||
`ASSERT(!buffer_valid, ("runtime error"));
|
|
||||||
buffer_valid <= 1;
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
if (~has_data) begin
|
||||||
if (valid_in && ~ready_out) begin
|
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign ready_in = ready_out || !buffer_valid;
|
assign ready_in = ready_out || ~has_data;
|
||||||
assign data_out = buffer_valid ? buffer : data_in;
|
assign data_out = has_data ? buffer : data_in;
|
||||||
assign valid_out = valid_in || buffer_valid;
|
assign valid_out = valid_in || has_data;
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
`TRACING_ON
|
`TRACING_ON
|
||||||
|
|||||||
@@ -21,15 +21,12 @@ module VX_cyclic_arbiter #(
|
|||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire [NUM_REQS-1:0] requests,
|
input wire [NUM_REQS-1:0] requests,
|
||||||
input wire unlock,
|
|
||||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||||
output wire [NUM_REQS-1:0] grant_onehot,
|
output wire [NUM_REQS-1:0] grant_onehot,
|
||||||
output wire grant_valid
|
output wire grant_valid,
|
||||||
|
input wire grant_unlock
|
||||||
);
|
);
|
||||||
`UNUSED_PARAM (LOCK_ENABLE)
|
|
||||||
`UNUSED_VAR (unlock)
|
|
||||||
|
|
||||||
if (NUM_REQS == 1) begin
|
if (NUM_REQS == 1) begin
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
@@ -51,7 +48,7 @@ module VX_cyclic_arbiter #(
|
|||||||
end else begin
|
end else begin
|
||||||
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin
|
||||||
grant_index_r <= '0;
|
grant_index_r <= '0;
|
||||||
end else begin
|
end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin
|
||||||
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
|
grant_index_r <= grant_index_r + LOG_NUM_REQS'(1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -42,34 +42,33 @@ module VX_elastic_buffer #(
|
|||||||
|
|
||||||
end else if (SIZE == 1) begin
|
end else if (SIZE == 1) begin
|
||||||
|
|
||||||
wire stall = valid_out && ~ready_out;
|
VX_pipe_buffer #(
|
||||||
|
.DATAW (DATAW)
|
||||||
VX_pipe_register #(
|
) pipe_buffer (
|
||||||
.DATAW (1 + DATAW),
|
.clk (clk),
|
||||||
.RESETW (1)
|
.reset (reset),
|
||||||
) pipe_register (
|
.valid_in (valid_in),
|
||||||
.clk (clk),
|
.data_in (data_in),
|
||||||
.reset (reset),
|
.ready_in (ready_in),
|
||||||
.enable (~stall),
|
.valid_out (valid_out),
|
||||||
.data_in ({valid_in, data_in}),
|
.data_out (data_out),
|
||||||
.data_out ({valid_out, data_out})
|
.ready_out (ready_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ready_in = ~stall;
|
|
||||||
|
|
||||||
end else if (SIZE == 2) begin
|
end else if (SIZE == 2) begin
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
|
.FULL_BW (OUT_REG != 2),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) skid_buffer (
|
) skid_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (valid_in),
|
.valid_in (valid_in),
|
||||||
|
.data_in (data_in),
|
||||||
.ready_in (ready_in),
|
.ready_in (ready_in),
|
||||||
.data_in (data_in),
|
|
||||||
.data_out (data_out),
|
|
||||||
.valid_out (valid_out),
|
.valid_out (valid_out),
|
||||||
|
.data_out (data_out),
|
||||||
.ready_out (ready_out)
|
.ready_out (ready_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -111,10 +110,10 @@ module VX_elastic_buffer #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (~empty),
|
.valid_in (~empty),
|
||||||
.ready_in (ready_out_t),
|
|
||||||
.data_in (data_out_t),
|
.data_in (data_out_t),
|
||||||
.data_out (data_out),
|
.ready_in (ready_out_t),
|
||||||
.valid_out (valid_out),
|
.valid_out (valid_out),
|
||||||
|
.data_out (data_out),
|
||||||
.ready_out (ready_out)
|
.ready_out (ready_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -21,17 +21,17 @@ module VX_fair_arbiter #(
|
|||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire unlock,
|
|
||||||
input wire [NUM_REQS-1:0] requests,
|
input wire [NUM_REQS-1:0] requests,
|
||||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||||
output wire [NUM_REQS-1:0] grant_onehot,
|
output wire [NUM_REQS-1:0] grant_onehot,
|
||||||
output wire grant_valid
|
output wire grant_valid,
|
||||||
|
input wire grant_unlock
|
||||||
);
|
);
|
||||||
if (NUM_REQS == 1) begin
|
if (NUM_REQS == 1) begin
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
`UNUSED_VAR (unlock)
|
`UNUSED_VAR (grant_unlock)
|
||||||
|
|
||||||
assign grant_index = '0;
|
assign grant_index = '0;
|
||||||
assign grant_onehot = requests;
|
assign grant_onehot = requests;
|
||||||
@@ -48,18 +48,14 @@ module VX_fair_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
buffer <= '0;
|
buffer <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
buffer <= buffer_n;
|
buffer <= buffer_n;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_priority_arbiter #(
|
VX_priority_arbiter #(
|
||||||
.NUM_REQS (NUM_REQS),
|
.NUM_REQS (NUM_REQS)
|
||||||
.LOCK_ENABLE (LOCK_ENABLE)
|
|
||||||
) priority_arbiter (
|
) priority_arbiter (
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.unlock (unlock),
|
|
||||||
.requests (requests_qual),
|
.requests (requests_qual),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
.grant_onehot (grant_onehot),
|
.grant_onehot (grant_onehot),
|
||||||
|
|||||||
@@ -21,22 +21,23 @@ module VX_generic_arbiter #(
|
|||||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire unlock,
|
|
||||||
input wire [NUM_REQS-1:0] requests,
|
input wire [NUM_REQS-1:0] requests,
|
||||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||||
output wire [NUM_REQS-1:0] grant_onehot,
|
output wire [NUM_REQS-1:0] grant_onehot,
|
||||||
output wire grant_valid
|
output wire grant_valid,
|
||||||
|
input wire grant_unlock
|
||||||
);
|
);
|
||||||
if (TYPE == "P") begin
|
if (TYPE == "P") begin
|
||||||
|
|
||||||
|
`UNUSED_PARAM (LOCK_ENABLE)
|
||||||
|
`UNUSED_VAR (clk)
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
`UNUSED_VAR (grant_unlock)
|
||||||
|
|
||||||
VX_priority_arbiter #(
|
VX_priority_arbiter #(
|
||||||
.NUM_REQS (NUM_REQS),
|
.NUM_REQS (NUM_REQS)
|
||||||
.LOCK_ENABLE (LOCK_ENABLE)
|
|
||||||
) priority_arbiter (
|
) priority_arbiter (
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.unlock (unlock),
|
|
||||||
.requests (requests),
|
.requests (requests),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
@@ -50,12 +51,12 @@ module VX_generic_arbiter #(
|
|||||||
.LOCK_ENABLE (LOCK_ENABLE)
|
.LOCK_ENABLE (LOCK_ENABLE)
|
||||||
) rr_arbiter (
|
) rr_arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.unlock (unlock),
|
|
||||||
.requests (requests),
|
.requests (requests),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
.grant_onehot (grant_onehot)
|
.grant_onehot (grant_onehot),
|
||||||
|
.grant_unlock (grant_unlock)
|
||||||
);
|
);
|
||||||
|
|
||||||
end else if (TYPE == "F") begin
|
end else if (TYPE == "F") begin
|
||||||
@@ -66,11 +67,11 @@ module VX_generic_arbiter #(
|
|||||||
) fair_arbiter (
|
) fair_arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.unlock (unlock),
|
|
||||||
.requests (requests),
|
.requests (requests),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
.grant_onehot (grant_onehot)
|
.grant_onehot (grant_onehot),
|
||||||
|
.grant_unlock (grant_unlock)
|
||||||
);
|
);
|
||||||
|
|
||||||
end else if (TYPE == "M") begin
|
end else if (TYPE == "M") begin
|
||||||
@@ -81,11 +82,11 @@ module VX_generic_arbiter #(
|
|||||||
) matrix_arbiter (
|
) matrix_arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.unlock (unlock),
|
|
||||||
.requests (requests),
|
.requests (requests),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
.grant_onehot (grant_onehot)
|
.grant_onehot (grant_onehot),
|
||||||
|
.grant_unlock (grant_unlock)
|
||||||
);
|
);
|
||||||
|
|
||||||
end else if (TYPE == "C") begin
|
end else if (TYPE == "C") begin
|
||||||
@@ -96,11 +97,11 @@ module VX_generic_arbiter #(
|
|||||||
) cyclic_arbiter (
|
) cyclic_arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.unlock (unlock),
|
|
||||||
.requests (requests),
|
.requests (requests),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
.grant_onehot (grant_onehot)
|
.grant_onehot (grant_onehot),
|
||||||
|
.grant_unlock (grant_unlock)
|
||||||
);
|
);
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|||||||
@@ -20,18 +20,18 @@ module VX_matrix_arbiter #(
|
|||||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire unlock,
|
|
||||||
input wire [NUM_REQS-1:0] requests,
|
input wire [NUM_REQS-1:0] requests,
|
||||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||||
output wire [NUM_REQS-1:0] grant_onehot,
|
output wire [NUM_REQS-1:0] grant_onehot,
|
||||||
output wire grant_valid
|
output wire grant_valid,
|
||||||
|
input wire grant_unlock
|
||||||
);
|
);
|
||||||
if (NUM_REQS == 1) begin
|
if (NUM_REQS == 1) begin
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
`UNUSED_VAR (unlock)
|
`UNUSED_VAR (grant_unlock)
|
||||||
|
|
||||||
assign grant_index = '0;
|
assign grant_index = '0;
|
||||||
assign grant_onehot = requests;
|
assign grant_onehot = requests;
|
||||||
@@ -71,18 +71,18 @@ module VX_matrix_arbiter #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
if (LOCK_ENABLE == 0) begin
|
if (LOCK_ENABLE == 0) begin
|
||||||
`UNUSED_VAR (unlock)
|
`UNUSED_VAR (grant_unlock)
|
||||||
assign grant_onehot = grant_unqual;
|
assign grant_onehot = grant_unqual;
|
||||||
end else begin
|
end else begin
|
||||||
reg [NUM_REQS-1:0] grant_unqual_prev;
|
reg [NUM_REQS-1:0] grant_unqual_prev;
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
grant_unqual_prev <= '0;
|
grant_unqual_prev <= '0;
|
||||||
end else if (unlock) begin
|
end else if (grant_unlock) begin
|
||||||
grant_unqual_prev <= grant_unqual;
|
grant_unqual_prev <= grant_unqual;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign grant_onehot = unlock ? grant_unqual : grant_unqual_prev;
|
assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev;
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_onehot_encoder #(
|
VX_onehot_encoder #(
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ module VX_mem_rsp_sel #(
|
|||||||
parameter TAG_SEL_BITS = 0,
|
parameter TAG_SEL_BITS = 0,
|
||||||
parameter OUT_REG = 0
|
parameter OUT_REG = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
// input response
|
// input response
|
||||||
@@ -46,18 +46,20 @@ input wire clk,
|
|||||||
|
|
||||||
wire [LOG_NUM_REQS-1:0] grant_index;
|
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||||
wire grant_valid;
|
wire grant_valid;
|
||||||
wire rsp_fire;
|
wire grant_ready;
|
||||||
|
|
||||||
VX_priority_arbiter #(
|
VX_generic_arbiter #(
|
||||||
.NUM_REQS (NUM_REQS)
|
.NUM_REQS (NUM_REQS),
|
||||||
|
.LOCK_ENABLE (1),
|
||||||
|
.TYPE ("P")
|
||||||
) arbiter (
|
) arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.unlock (rsp_fire),
|
|
||||||
.requests (rsp_valid_in),
|
.requests (rsp_valid_in),
|
||||||
.grant_valid (grant_valid),
|
.grant_valid (grant_valid),
|
||||||
.grant_index (grant_index),
|
.grant_index (grant_index),
|
||||||
`UNUSED_PIN (grant_onehot)
|
`UNUSED_PIN (grant_onehot),
|
||||||
|
.grant_unlock(grant_ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [NUM_REQS-1:0] rsp_valid_sel;
|
reg [NUM_REQS-1:0] rsp_valid_sel;
|
||||||
@@ -78,7 +80,7 @@ input wire clk,
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign rsp_fire = grant_valid && rsp_ready_unqual;
|
assign grant_ready = rsp_ready_unqual;
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
|
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
|
||||||
|
|||||||
63
hw/rtl/libs/VX_pipe_buffer.sv
Normal file
63
hw/rtl/libs/VX_pipe_buffer.sv
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
// Copyright 2024 blaise
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// A pipelined elastic buffer operates at full bandwidth where push can happen if the buffer is not empty but is going empty
|
||||||
|
// It has the following benefits:
|
||||||
|
// + Full-bandwidth throughput
|
||||||
|
// + use only one register for storage
|
||||||
|
// + data_out is fully registered
|
||||||
|
// It has the following limitations:
|
||||||
|
// + ready_in and ready_out are coupled
|
||||||
|
|
||||||
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
`TRACING_OFF
|
||||||
|
module VX_pipe_buffer #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter PASSTHRU = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
input wire valid_in,
|
||||||
|
output wire ready_in,
|
||||||
|
input wire [DATAW-1:0] data_in,
|
||||||
|
output wire [DATAW-1:0] data_out,
|
||||||
|
input wire ready_out,
|
||||||
|
output wire valid_out
|
||||||
|
);
|
||||||
|
if (PASSTHRU != 0) begin
|
||||||
|
`UNUSED_VAR (clk)
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
assign ready_in = ready_out;
|
||||||
|
assign valid_out = valid_in;
|
||||||
|
assign data_out = data_in;
|
||||||
|
end else begin
|
||||||
|
wire stall = valid_out && ~ready_out;
|
||||||
|
|
||||||
|
VX_pipe_register #(
|
||||||
|
.DATAW (1 + DATAW),
|
||||||
|
.RESETW (1)
|
||||||
|
) pipe_register (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.enable (~stall),
|
||||||
|
.data_in ({valid_in, data_in}),
|
||||||
|
.data_out ({valid_out, data_out})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign ready_in = ~stall;
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
`TRACING_ON
|
||||||
@@ -16,22 +16,13 @@
|
|||||||
`TRACING_OFF
|
`TRACING_OFF
|
||||||
module VX_priority_arbiter #(
|
module VX_priority_arbiter #(
|
||||||
parameter NUM_REQS = 1,
|
parameter NUM_REQS = 1,
|
||||||
parameter LOCK_ENABLE = 0,
|
|
||||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire [NUM_REQS-1:0] requests,
|
||||||
input wire reset,
|
|
||||||
input wire [NUM_REQS-1:0] requests,
|
|
||||||
input wire unlock,
|
|
||||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||||
output wire [NUM_REQS-1:0] grant_onehot,
|
output wire [NUM_REQS-1:0] grant_onehot,
|
||||||
output wire grant_valid
|
output wire grant_valid
|
||||||
);
|
);
|
||||||
`UNUSED_PARAM (LOCK_ENABLE)
|
|
||||||
`UNUSED_VAR (clk)
|
|
||||||
`UNUSED_VAR (reset)
|
|
||||||
`UNUSED_VAR (unlock)
|
|
||||||
|
|
||||||
if (NUM_REQS == 1) begin
|
if (NUM_REQS == 1) begin
|
||||||
|
|
||||||
assign grant_index = '0;
|
assign grant_index = '0;
|
||||||
|
|||||||
@@ -21,18 +21,18 @@ module VX_rr_arbiter #(
|
|||||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire unlock,
|
|
||||||
input wire [NUM_REQS-1:0] requests,
|
input wire [NUM_REQS-1:0] requests,
|
||||||
output wire [LOG_NUM_REQS-1:0] grant_index,
|
output wire [LOG_NUM_REQS-1:0] grant_index,
|
||||||
output wire [NUM_REQS-1:0] grant_onehot,
|
output wire [NUM_REQS-1:0] grant_onehot,
|
||||||
output wire grant_valid
|
output wire grant_valid,
|
||||||
|
input wire grant_unlock
|
||||||
);
|
);
|
||||||
if (NUM_REQS == 1) begin
|
if (NUM_REQS == 1) begin
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
`UNUSED_VAR (unlock)
|
`UNUSED_VAR (grant_unlock)
|
||||||
|
|
||||||
assign grant_index = '0;
|
assign grant_index = '0;
|
||||||
assign grant_onehot = requests;
|
assign grant_onehot = requests;
|
||||||
@@ -55,7 +55,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -85,7 +85,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -121,7 +121,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -165,7 +165,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -219,7 +219,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -285,7 +285,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -365,7 +365,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -399,7 +399,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
pointer_reg <= {NUM_REQS{1'b1}};
|
pointer_reg <= {NUM_REQS{1'b1}};
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
if (|req_masked) begin
|
if (|req_masked) begin
|
||||||
pointer_reg <= mask_higher_pri_regs;
|
pointer_reg <= mask_higher_pri_regs;
|
||||||
end else if (|requests) begin
|
end else if (|requests) begin
|
||||||
@@ -443,7 +443,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= '0;
|
state <= '0;
|
||||||
end else if (!LOCK_ENABLE || unlock) begin
|
end else if (!LOCK_ENABLE || grant_unlock) begin
|
||||||
state <= grant_index_r;
|
state <= grant_index_r;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
module VX_skid_buffer #(
|
module VX_skid_buffer #(
|
||||||
parameter DATAW = 32,
|
parameter DATAW = 32,
|
||||||
parameter PASSTHRU = 0,
|
parameter PASSTHRU = 0,
|
||||||
|
parameter FULL_BW = 0,
|
||||||
parameter OUT_REG = 0
|
parameter OUT_REG = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
@@ -30,8 +31,6 @@ module VX_skid_buffer #(
|
|||||||
input wire ready_out,
|
input wire ready_out,
|
||||||
output wire valid_out
|
output wire valid_out
|
||||||
);
|
);
|
||||||
`STATIC_ASSERT ((OUT_REG <= 2), ("invalid parameter"))
|
|
||||||
|
|
||||||
if (PASSTHRU != 0) begin
|
if (PASSTHRU != 0) begin
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
@@ -41,112 +40,36 @@ module VX_skid_buffer #(
|
|||||||
assign data_out = data_in;
|
assign data_out = data_in;
|
||||||
assign ready_in = ready_out;
|
assign ready_in = ready_out;
|
||||||
|
|
||||||
end else if (OUT_REG == 0) begin
|
end else if (FULL_BW != 0) begin
|
||||||
|
|
||||||
reg [1:0][DATAW-1:0] shift_reg;
|
VX_stream_buffer #(
|
||||||
reg valid_out_r, ready_in_r, rd_ptr_r;
|
.DATAW (DATAW),
|
||||||
|
.OUT_REG (OUT_REG)
|
||||||
wire push = valid_in && ready_in;
|
) stream_buffer (
|
||||||
wire pop = valid_out_r && ready_out;
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
always @(posedge clk) begin
|
.valid_in (valid_in),
|
||||||
if (reset) begin
|
.data_in (data_in),
|
||||||
valid_out_r <= 0;
|
.ready_in (ready_in),
|
||||||
ready_in_r <= 1;
|
.valid_out (valid_out),
|
||||||
rd_ptr_r <= 1;
|
.data_out (data_out),
|
||||||
end else begin
|
.ready_out (ready_out)
|
||||||
if (push) begin
|
);
|
||||||
if (!pop) begin
|
|
||||||
ready_in_r <= rd_ptr_r;
|
|
||||||
valid_out_r <= 1;
|
|
||||||
end
|
|
||||||
end else if (pop) begin
|
|
||||||
ready_in_r <= 1;
|
|
||||||
valid_out_r <= rd_ptr_r;
|
|
||||||
end
|
|
||||||
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (push) begin
|
|
||||||
shift_reg[1] <= shift_reg[0];
|
|
||||||
shift_reg[0] <= data_in;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign ready_in = ready_in_r;
|
|
||||||
assign valid_out = valid_out_r;
|
|
||||||
assign data_out = shift_reg[rd_ptr_r];
|
|
||||||
|
|
||||||
end else if (OUT_REG == 1) begin
|
|
||||||
|
|
||||||
// Full-bandwidth operation: input is consummed every cycle.
|
|
||||||
// However, data_out register has an additional multiplexer.
|
|
||||||
|
|
||||||
reg [DATAW-1:0] data_out_r;
|
|
||||||
reg [DATAW-1:0] buffer;
|
|
||||||
reg valid_out_r;
|
|
||||||
reg use_buffer;
|
|
||||||
|
|
||||||
wire push = valid_in && ready_in;
|
|
||||||
wire stall_out = valid_out_r && ~ready_out;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
valid_out_r <= 0;
|
|
||||||
use_buffer <= 0;
|
|
||||||
end else begin
|
|
||||||
if (ready_out) begin
|
|
||||||
use_buffer <= 0;
|
|
||||||
end else if (valid_in && valid_out) begin
|
|
||||||
use_buffer <= 1;
|
|
||||||
end
|
|
||||||
if (~stall_out) begin
|
|
||||||
valid_out_r <= valid_in || use_buffer;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (push) begin
|
|
||||||
buffer <= data_in;
|
|
||||||
end
|
|
||||||
if (~stall_out) begin
|
|
||||||
data_out_r <= use_buffer ? buffer : data_in;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign ready_in = ~use_buffer;
|
|
||||||
assign valid_out = valid_out_r;
|
|
||||||
assign data_out = data_out_r;
|
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
// Half-bandwidth operation: input is consummed every other cycle.
|
VX_toggle_buffer #(
|
||||||
// However, data_out register has no additional multiplexer.
|
.DATAW (DATAW)
|
||||||
|
) toggle_buffer (
|
||||||
reg [DATAW-1:0] data_out_r;
|
.clk (clk),
|
||||||
reg has_data;
|
.reset (reset),
|
||||||
|
.valid_in (valid_in),
|
||||||
always @(posedge clk) begin
|
.data_in (data_in),
|
||||||
if (reset) begin
|
.ready_in (ready_in),
|
||||||
has_data <= 0;
|
.valid_out (valid_out),
|
||||||
end else begin
|
.data_out (data_out),
|
||||||
if (~has_data) begin
|
.ready_out (ready_out)
|
||||||
has_data <= valid_in;
|
);
|
||||||
end else if (ready_out) begin
|
|
||||||
has_data <= 0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if (~has_data) begin
|
|
||||||
data_out_r <= data_in;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign ready_in = ~has_data;
|
|
||||||
assign valid_out = has_data;
|
|
||||||
assign data_out = data_out_r;
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ module VX_stream_arb #(
|
|||||||
parameter NUM_OUTPUTS = 1,
|
parameter NUM_OUTPUTS = 1,
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter `STRING ARBITER = "P",
|
parameter `STRING ARBITER = "P",
|
||||||
parameter LOCK_ENABLE = 1,
|
|
||||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||||
parameter OUT_REG = 0 ,
|
parameter OUT_REG = 0 ,
|
||||||
parameter NUM_REQS = (NUM_INPUTS + NUM_OUTPUTS - 1) / NUM_OUTPUTS,
|
parameter NUM_REQS = (NUM_INPUTS + NUM_OUTPUTS - 1) / NUM_OUTPUTS,
|
||||||
@@ -57,7 +56,6 @@ module VX_stream_arb #(
|
|||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) arb_slice (
|
) arb_slice (
|
||||||
@@ -102,7 +100,6 @@ module VX_stream_arb #(
|
|||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) fanout_slice_arb (
|
) fanout_slice_arb (
|
||||||
@@ -129,7 +126,6 @@ module VX_stream_arb #(
|
|||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW + LOG_NUM_REQS2),
|
.DATAW (DATAW + LOG_NUM_REQS2),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) fanout_join_arb (
|
) fanout_join_arb (
|
||||||
@@ -158,25 +154,25 @@ module VX_stream_arb #(
|
|||||||
wire arb_valid;
|
wire arb_valid;
|
||||||
wire [NUM_REQS_W-1:0] arb_index;
|
wire [NUM_REQS_W-1:0] arb_index;
|
||||||
wire [NUM_REQS-1:0] arb_onehot;
|
wire [NUM_REQS-1:0] arb_onehot;
|
||||||
wire arb_unlock;
|
wire arb_ready;
|
||||||
|
|
||||||
VX_generic_arbiter #(
|
VX_generic_arbiter #(
|
||||||
.NUM_REQS (NUM_REQS),
|
.NUM_REQS (NUM_REQS),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
.LOCK_ENABLE (1),
|
||||||
.TYPE (ARBITER)
|
.TYPE (ARBITER)
|
||||||
) arbiter (
|
) arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.requests (valid_in),
|
.requests (valid_in),
|
||||||
.unlock (arb_unlock),
|
|
||||||
.grant_valid (arb_valid),
|
.grant_valid (arb_valid),
|
||||||
.grant_index (arb_index),
|
.grant_index (arb_index),
|
||||||
.grant_onehot (arb_onehot)
|
.grant_onehot (arb_onehot),
|
||||||
|
.grant_unlock (arb_ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign valid_in_r = arb_valid;
|
assign valid_in_r = arb_valid;
|
||||||
assign data_in_r = data_in[arb_index];
|
assign data_in_r = data_in[arb_index];
|
||||||
assign arb_unlock = | (valid_in_r & ready_in_r);
|
assign arb_ready = ready_in_r;
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||||
assign ready_in[i] = ready_in_r & arb_onehot[i];
|
assign ready_in[i] = ready_in_r & arb_onehot[i];
|
||||||
@@ -217,7 +213,6 @@ module VX_stream_arb #(
|
|||||||
.NUM_OUTPUTS (BATCH_SIZE),
|
.NUM_OUTPUTS (BATCH_SIZE),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) arb_slice (
|
) arb_slice (
|
||||||
@@ -252,7 +247,6 @@ module VX_stream_arb #(
|
|||||||
.NUM_OUTPUTS (NUM_BATCHES),
|
.NUM_OUTPUTS (NUM_BATCHES),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) fanout_fork_arb (
|
) fanout_fork_arb (
|
||||||
@@ -280,7 +274,6 @@ module VX_stream_arb #(
|
|||||||
.NUM_OUTPUTS (BATCH_SIZE),
|
.NUM_OUTPUTS (BATCH_SIZE),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) fanout_slice_arb (
|
) fanout_slice_arb (
|
||||||
@@ -305,24 +298,24 @@ module VX_stream_arb #(
|
|||||||
wire [NUM_OUTPUTS-1:0] arb_requests;
|
wire [NUM_OUTPUTS-1:0] arb_requests;
|
||||||
wire arb_valid;
|
wire arb_valid;
|
||||||
wire [NUM_OUTPUTS-1:0] arb_onehot;
|
wire [NUM_OUTPUTS-1:0] arb_onehot;
|
||||||
wire arb_unlock;
|
wire arb_ready;
|
||||||
|
|
||||||
VX_generic_arbiter #(
|
VX_generic_arbiter #(
|
||||||
.NUM_REQS (NUM_OUTPUTS),
|
.NUM_REQS (NUM_OUTPUTS),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
.LOCK_ENABLE (1),
|
||||||
.TYPE (ARBITER)
|
.TYPE (ARBITER)
|
||||||
) arbiter (
|
) arbiter (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.requests (arb_requests),
|
.requests (arb_requests),
|
||||||
.unlock (arb_unlock),
|
|
||||||
.grant_valid (arb_valid),
|
.grant_valid (arb_valid),
|
||||||
`UNUSED_PIN (grant_index),
|
`UNUSED_PIN (grant_index),
|
||||||
.grant_onehot (arb_onehot)
|
.grant_onehot (arb_onehot),
|
||||||
|
.grant_unlock (arb_ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign arb_requests = ready_in_r;
|
assign arb_requests = ready_in_r;
|
||||||
assign arb_unlock = | (valid_in & ready_in);
|
assign arb_ready = valid_in[0];
|
||||||
assign ready_in = arb_valid;
|
assign ready_in = arb_valid;
|
||||||
|
|
||||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||||
|
|||||||
128
hw/rtl/libs/VX_stream_buffer.sv
Normal file
128
hw/rtl/libs/VX_stream_buffer.sv
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
// Copyright 2024 blaise
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously
|
||||||
|
// It has the following benefits:
|
||||||
|
// + full-bandwidth throughput
|
||||||
|
// + ready_in and ready_out are decoupled
|
||||||
|
// + data_out can be fully registered
|
||||||
|
// It has the following limitations:
|
||||||
|
// - requires two registers for storage
|
||||||
|
|
||||||
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
`TRACING_OFF
|
||||||
|
module VX_stream_buffer #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter OUT_REG = 0,
|
||||||
|
parameter PASSTHRU = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
input wire valid_in,
|
||||||
|
output wire ready_in,
|
||||||
|
input wire [DATAW-1:0] data_in,
|
||||||
|
output wire [DATAW-1:0] data_out,
|
||||||
|
input wire ready_out,
|
||||||
|
output wire valid_out
|
||||||
|
);
|
||||||
|
if (PASSTHRU != 0) begin
|
||||||
|
`UNUSED_VAR (clk)
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
assign ready_in = ready_out;
|
||||||
|
assign valid_out = valid_in;
|
||||||
|
assign data_out = data_in;
|
||||||
|
end else begin
|
||||||
|
if (OUT_REG != 0) begin
|
||||||
|
|
||||||
|
reg [DATAW-1:0] data_out_r;
|
||||||
|
reg [DATAW-1:0] buffer;
|
||||||
|
reg valid_out_r;
|
||||||
|
reg use_buffer;
|
||||||
|
|
||||||
|
wire push = valid_in && ready_in;
|
||||||
|
wire stall_out = valid_out_r && ~ready_out;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
valid_out_r <= 0;
|
||||||
|
use_buffer <= 0;
|
||||||
|
end else begin
|
||||||
|
if (ready_out) begin
|
||||||
|
use_buffer <= 0;
|
||||||
|
end else if (valid_in && valid_out) begin
|
||||||
|
use_buffer <= 1;
|
||||||
|
end
|
||||||
|
if (~stall_out) begin
|
||||||
|
valid_out_r <= valid_in || use_buffer;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (push) begin
|
||||||
|
buffer <= data_in;
|
||||||
|
end
|
||||||
|
if (~stall_out) begin
|
||||||
|
data_out_r <= use_buffer ? buffer : data_in;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign ready_in = ~use_buffer;
|
||||||
|
assign valid_out = valid_out_r;
|
||||||
|
assign data_out = data_out_r;
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
|
||||||
|
reg [1:0][DATAW-1:0] shift_reg;
|
||||||
|
reg valid_out_r, ready_in_r, rd_ptr_r;
|
||||||
|
|
||||||
|
wire push = valid_in && ready_in;
|
||||||
|
wire pop = valid_out_r && ready_out;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
valid_out_r <= 0;
|
||||||
|
ready_in_r <= 1;
|
||||||
|
rd_ptr_r <= 1;
|
||||||
|
end else begin
|
||||||
|
if (push) begin
|
||||||
|
if (!pop) begin
|
||||||
|
ready_in_r <= rd_ptr_r;
|
||||||
|
valid_out_r <= 1;
|
||||||
|
end
|
||||||
|
end else if (pop) begin
|
||||||
|
ready_in_r <= 1;
|
||||||
|
valid_out_r <= rd_ptr_r;
|
||||||
|
end
|
||||||
|
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (push) begin
|
||||||
|
shift_reg[1] <= shift_reg[0];
|
||||||
|
shift_reg[0] <= data_in;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign ready_in = ready_in_r;
|
||||||
|
assign valid_out = valid_out_r;
|
||||||
|
assign data_out = shift_reg[rd_ptr_r];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
`TRACING_ON
|
||||||
|
|
||||||
@@ -21,8 +21,7 @@ module VX_stream_xbar #(
|
|||||||
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
parameter IN_WIDTH = `LOG2UP(NUM_INPUTS),
|
||||||
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS),
|
||||||
parameter ARBITER = "P",
|
parameter ARBITER = "P",
|
||||||
parameter LOCK_ENABLE = 0,
|
parameter OUT_REG = 0,
|
||||||
parameter OUT_REG = 0,
|
|
||||||
parameter MAX_FANOUT = `MAX_FANOUT,
|
parameter MAX_FANOUT = `MAX_FANOUT,
|
||||||
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
|
parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1)
|
||||||
) (
|
) (
|
||||||
@@ -66,7 +65,6 @@ module VX_stream_xbar #(
|
|||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) xbar_arb (
|
) xbar_arb (
|
||||||
@@ -95,7 +93,6 @@ module VX_stream_xbar #(
|
|||||||
.NUM_OUTPUTS (1),
|
.NUM_OUTPUTS (1),
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
.ARBITER (ARBITER),
|
.ARBITER (ARBITER),
|
||||||
.LOCK_ENABLE (LOCK_ENABLE),
|
|
||||||
.MAX_FANOUT (MAX_FANOUT),
|
.MAX_FANOUT (MAX_FANOUT),
|
||||||
.OUT_REG (OUT_REG)
|
.OUT_REG (OUT_REG)
|
||||||
) xbar_arb (
|
) xbar_arb (
|
||||||
|
|||||||
70
hw/rtl/libs/VX_toggle_buffer.sv
Normal file
70
hw/rtl/libs/VX_toggle_buffer.sv
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
// Copyright 2024 blaise
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// A toggle elastic buffer operates at half-bandwidth where push can only trigger after pop
|
||||||
|
// It has the following benefits:
|
||||||
|
// + use only one register for storage
|
||||||
|
// + ready_in and ready_out are decoupled
|
||||||
|
// + data_out is fully registered
|
||||||
|
// It has the following limitations:
|
||||||
|
// - Half-bandwidth throughput
|
||||||
|
|
||||||
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
`TRACING_OFF
|
||||||
|
module VX_toggle_buffer #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter PASSTHRU = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
input wire valid_in,
|
||||||
|
output wire ready_in,
|
||||||
|
input wire [DATAW-1:0] data_in,
|
||||||
|
output wire [DATAW-1:0] data_out,
|
||||||
|
input wire ready_out,
|
||||||
|
output wire valid_out
|
||||||
|
);
|
||||||
|
if (PASSTHRU != 0) begin
|
||||||
|
`UNUSED_VAR (clk)
|
||||||
|
`UNUSED_VAR (reset)
|
||||||
|
assign ready_in = ready_out;
|
||||||
|
assign valid_out = valid_in;
|
||||||
|
assign data_out = data_in;
|
||||||
|
end else begin
|
||||||
|
reg [DATAW-1:0] buffer;
|
||||||
|
reg has_data;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
has_data <= 0;
|
||||||
|
end else begin
|
||||||
|
if (~has_data) begin
|
||||||
|
has_data <= valid_in;
|
||||||
|
end else if (ready_out) begin
|
||||||
|
has_data <= 0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if (~has_data) begin
|
||||||
|
buffer <= data_in;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign ready_in = ~has_data;
|
||||||
|
assign valid_out = has_data;
|
||||||
|
assign data_out = buffer;
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
`TRACING_ON
|
||||||
@@ -62,10 +62,10 @@ Cluster::Cluster(const SimContext& ctx,
|
|||||||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||||
!L2_ENABLED,
|
!L2_ENABLED,
|
||||||
log2ceil(L2_CACHE_SIZE), // C
|
log2ceil(L2_CACHE_SIZE),// C
|
||||||
log2ceil(MEM_BLOCK_SIZE), // L
|
log2ceil(MEM_BLOCK_SIZE),// L
|
||||||
log2ceil(L2_NUM_WAYS), // W
|
log2ceil(L1_LINE_SIZE), // W
|
||||||
0, // A
|
log2ceil(L2_NUM_WAYS), // A
|
||||||
log2ceil(L2_NUM_BANKS), // B
|
log2ceil(L2_NUM_BANKS), // B
|
||||||
XLEN, // address bits
|
XLEN, // address bits
|
||||||
1, // number of ports
|
1, // number of ports
|
||||||
|
|||||||
@@ -210,7 +210,7 @@ void Core::schedule() {
|
|||||||
void Core::fetch() {
|
void Core::fetch() {
|
||||||
perf_stats_.ifetch_latency += pending_ifetches_;
|
perf_stats_.ifetch_latency += pending_ifetches_;
|
||||||
|
|
||||||
// handle icache reponse
|
// handle icache response
|
||||||
auto& icache_rsp_port = icache_rsp_ports.at(0);
|
auto& icache_rsp_port = icache_rsp_ports.at(0);
|
||||||
if (!icache_rsp_port.empty()){
|
if (!icache_rsp_port.empty()){
|
||||||
auto& mem_rsp = icache_rsp_port.front();
|
auto& mem_rsp = icache_rsp_port.front();
|
||||||
|
|||||||
@@ -207,7 +207,7 @@ void LsuUnit::tick() {
|
|||||||
for (uint32_t t = 1; t < num_lanes_; ++t) {
|
for (uint32_t t = 1; t < num_lanes_; ++t) {
|
||||||
if (!trace->tmask.test(t0 + t))
|
if (!trace->tmask.test(t0 + t))
|
||||||
continue;
|
continue;
|
||||||
auto mem_addr = trace_data->mem_addrs.at(t).addr & ~addr_mask;
|
auto mem_addr = trace_data->mem_addrs.at(t + t0).addr & ~addr_mask;
|
||||||
matches += (addr0 == mem_addr);
|
matches += (addr0 == mem_addr);
|
||||||
}
|
}
|
||||||
#ifdef LSU_DUP_ENABLE
|
#ifdef LSU_DUP_ENABLE
|
||||||
@@ -229,7 +229,7 @@ void LsuUnit::tick() {
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
auto& dcache_req_port = core_->smem_demuxs_.at(t)->ReqIn;
|
auto& dcache_req_port = core_->smem_demuxs_.at(t)->ReqIn;
|
||||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
auto mem_addr = trace_data->mem_addrs.at(t + t0);
|
||||||
auto type = core_->get_addr_type(mem_addr.addr);
|
auto type = core_->get_addr_type(mem_addr.addr);
|
||||||
|
|
||||||
MemReq mem_req;
|
MemReq mem_req;
|
||||||
@@ -324,4 +324,4 @@ void SfuUnit::tick() {
|
|||||||
break; // single block
|
break; // single block
|
||||||
}
|
}
|
||||||
++input_idx_;
|
++input_idx_;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -339,7 +339,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 1: {
|
case 1: {
|
||||||
// RV64I: SLLI
|
// RV32I: SLLI
|
||||||
rddata[t].i = rsdata[t][0].i << immsrc;
|
rddata[t].i = rsdata[t][0].i << immsrc;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -360,11 +360,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
|
|||||||
}
|
}
|
||||||
case 5: {
|
case 5: {
|
||||||
if (func7) {
|
if (func7) {
|
||||||
// RV64I: SRAI
|
// RV32I: SRAI
|
||||||
Word result = rsdata[t][0].i >> immsrc;
|
Word result = rsdata[t][0].i >> immsrc;
|
||||||
rddata[t].i = result;
|
rddata[t].i = result;
|
||||||
} else {
|
} else {
|
||||||
// RV64I: SRLI
|
// RV32I: SRLI
|
||||||
Word result = rsdata[t][0].u >> immsrc;
|
Word result = rsdata[t][0].u >> immsrc;
|
||||||
rddata[t].i = result;
|
rddata[t].i = result;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ static void show_usage() {
|
|||||||
uint32_t num_threads = NUM_THREADS;
|
uint32_t num_threads = NUM_THREADS;
|
||||||
uint32_t num_warps = NUM_WARPS;
|
uint32_t num_warps = NUM_WARPS;
|
||||||
uint32_t num_cores = NUM_CORES;
|
uint32_t num_cores = NUM_CORES;
|
||||||
bool showStats = false;;
|
bool showStats = false;
|
||||||
bool riscv_test = false;
|
bool riscv_test = false;
|
||||||
const char* program = nullptr;
|
const char* program = nullptr;
|
||||||
|
|
||||||
|
|||||||
@@ -33,8 +33,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||||||
!L3_ENABLED,
|
!L3_ENABLED,
|
||||||
log2ceil(L3_CACHE_SIZE), // C
|
log2ceil(L3_CACHE_SIZE), // C
|
||||||
log2ceil(MEM_BLOCK_SIZE), // L
|
log2ceil(MEM_BLOCK_SIZE), // L
|
||||||
log2ceil(L3_NUM_WAYS), // W
|
log2ceil(L2_LINE_SIZE), // W
|
||||||
0, // A
|
log2ceil(L3_NUM_WAYS), // A
|
||||||
log2ceil(L3_NUM_BANKS), // B
|
log2ceil(L3_NUM_BANKS), // B
|
||||||
XLEN, // address bits
|
XLEN, // address bits
|
||||||
1, // number of ports
|
1, // number of ports
|
||||||
@@ -58,7 +58,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||||||
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
||||||
}
|
}
|
||||||
|
|
||||||
// set up memory perf recording
|
// set up memory profiling
|
||||||
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||||
__unused (cycle);
|
__unused (cycle);
|
||||||
perf_mem_reads_ += !req.write;
|
perf_mem_reads_ += !req.write;
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ Socket::Socket(const SimContext& ctx,
|
|||||||
XLEN, // address bits
|
XLEN, // address bits
|
||||||
1, // number of ports
|
1, // number of ports
|
||||||
1, // number of inputs
|
1, // number of inputs
|
||||||
true, // write-through
|
false, // write-through
|
||||||
false, // write response
|
false, // write response
|
||||||
(uint8_t)arch.num_warps(), // mshr
|
(uint8_t)arch.num_warps(), // mshr
|
||||||
2, // pipeline latency
|
2, // pipeline latency
|
||||||
|
|||||||
Reference in New Issue
Block a user