Merge branch 'master' of https://github.com/vortexgpgpu/vortex-dev
This commit is contained in:
@@ -15,6 +15,9 @@ set -e
|
|||||||
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
||||||
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
||||||
|
|
||||||
|
# disable shared memory
|
||||||
|
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
|
||||||
|
|
||||||
# Blackbox tests
|
# Blackbox tests
|
||||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||||
|
|||||||
@@ -5,16 +5,19 @@ Description: Makes the build in the opae directory with the specified core
|
|||||||
exists, a make clean command is ran before the build. Script waits
|
exists, a make clean command is ran before the build. Script waits
|
||||||
until the inteldev script or quartus program is finished running.
|
until the inteldev script or quartus program is finished running.
|
||||||
|
|
||||||
Usage: ./build.sh -c [1|2|4|8|16] [-p [y|n]]
|
Usage: ./build.sh -c [1|2|4|8|16] [-p perf] [-w wait]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-c
|
-c
|
||||||
Core count (1, 2, 4, 8, or 16).
|
Core count (1, 2, 4, 8, or 16).
|
||||||
|
|
||||||
-p
|
-p
|
||||||
Performance profiling enable (y or n). Changes the source file in the
|
Performance profiling enable. Changes the source file in the
|
||||||
opae directory to include/exclude "+define+PERF_ENABLE".
|
opae directory to include/exclude "+define+PERF_ENABLE".
|
||||||
|
|
||||||
|
-w
|
||||||
|
Wait for the build to complete
|
||||||
|
|
||||||
_______________________________________________________________________________
|
_______________________________________________________________________________
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,23 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
while getopts c:p: flag
|
BUILD_DIR=../../hw/syn/opae
|
||||||
|
|
||||||
|
perf=0
|
||||||
|
wait=0
|
||||||
|
|
||||||
|
while getopts c:pwh flag
|
||||||
do
|
do
|
||||||
case "${flag}" in
|
case "${flag}" in
|
||||||
c) cores=${OPTARG};; #1, 2, 4, 8, 16
|
c) cores=${OPTARG};; #1, 2, 4, 8, 16
|
||||||
p) perf=${OPTARG};; #perf counters enable (y/n)
|
p) perf=1;; #perf counters enable
|
||||||
|
w) wait=1;; # wait for build to complete
|
||||||
|
h) echo "Usage: -c <cores> [-p perf] [-w wait] [-h help]"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
\?)
|
||||||
|
echo "Invalid option: -$OPTARG" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -13,25 +26,22 @@ if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cd ../../hw/syn/opae
|
cd ${BUILD_DIR}
|
||||||
|
|
||||||
sources_file="./sources_${cores}c.txt"
|
sources_file="./sources_${cores}c.txt"
|
||||||
|
|
||||||
if [ ${perf:0:1} = "n" ]; then
|
if [ ${perf} = 1 ]; then
|
||||||
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
|
|
||||||
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
|
||||||
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
|
||||||
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
|
|
||||||
fi
|
|
||||||
elif [ ${perf:0:1} = "y" ]; then
|
|
||||||
if grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
if grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
||||||
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
||||||
elif ! grep -Fxq '+define+PERF_ENABLE' ${sources_file}; then
|
elif ! grep -Fxq '+define+PERF_ENABLE' ${sources_file}; then
|
||||||
sed -i '1s/^/+define+PERF_ENABLE\n/' ${sources_file}
|
sed -i '1s/^/+define+PERF_ENABLE\n/' ${sources_file}
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo 'Invalid parameter for argument -p (y/n expected)'
|
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
|
||||||
exit 1
|
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
||||||
|
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
||||||
|
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -d "./build_fpga_{$cores}c" ]; then
|
if [ -d "./build_fpga_{$cores}c" ]; then
|
||||||
@@ -39,12 +49,12 @@ if [ -d "./build_fpga_{$cores}c" ]; then
|
|||||||
fi
|
fi
|
||||||
make "fpga-${cores}c"
|
make "fpga-${cores}c"
|
||||||
|
|
||||||
sleep 30
|
if [ ${wait} = 1 ]; then
|
||||||
|
sleep 30
|
||||||
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
|
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
|
||||||
for pid in ${pids[@]}; do
|
for pid in ${pids[@]}; do
|
||||||
while kill -0 ${pid} 2> /dev/null; do
|
while kill -0 ${pid} 2> /dev/null; do
|
||||||
sleep 30
|
sleep 30
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
fi
|
||||||
|
|||||||
@@ -2,6 +2,6 @@
|
|||||||
|
|
||||||
for ((i=1; i <= 16; i=i*2)); do
|
for ((i=1; i <= 16; i=i*2)); do
|
||||||
echo "Building ${i} core build..."
|
echo "Building ${i} core build..."
|
||||||
./build.sh -c ${i} -p y
|
./build.sh -c ${i} -p -w
|
||||||
echo "Done ${i} core build."
|
echo "Done ${i} core build."
|
||||||
done
|
done
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ extern "C" {
|
|||||||
void dpi_utof(int a, int frm, int* result, int* fflags);
|
void dpi_utof(int a, int frm, int* result, int* fflags);
|
||||||
|
|
||||||
void dpi_fclss(int a, int* result);
|
void dpi_fclss(int a, int* result);
|
||||||
void dpi_fsgnj(int a, int* result);
|
void dpi_fsgnj(int a, int b, int* result);
|
||||||
void dpi_fsgnjn(int a, int* result);
|
void dpi_fsgnjn(int a, int b, int* result);
|
||||||
void dpi_fsgnjx(int a, int* result);
|
void dpi_fsgnjx(int a, int b, int* result);
|
||||||
|
|
||||||
void dpi_flt(int a, int b, int* result, int* fflags);
|
void dpi_flt(int a, int b, int* result, int* fflags);
|
||||||
void dpi_fle(int a, int b, int* result, int* fflags);
|
void dpi_fle(int a, int b, int* result, int* fflags);
|
||||||
@@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void dpi_fclss(int a, int* result) {
|
void dpi_fclss(int a, int* result) {
|
||||||
// TODO
|
|
||||||
*result = 0;
|
int r = 0; // clear all bits
|
||||||
|
|
||||||
|
bool fsign = (a >> 31);
|
||||||
|
uint32_t expo = (a >> 23) & 0xFF;
|
||||||
|
uint32_t fraction = a & 0x7FFFFF;
|
||||||
|
|
||||||
|
if ((expo == 0) && (fraction == 0)) {
|
||||||
|
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
|
||||||
|
} else if ((expo == 0) && (fraction != 0)) {
|
||||||
|
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
|
||||||
|
} else if ((expo == 0xFF) && (fraction == 0)) {
|
||||||
|
r = fsign ? (1<<0) : (1<<7); // +/- infinity
|
||||||
|
} else if ((expo == 0xFF ) && (fraction != 0)) {
|
||||||
|
if (!fsign && (fraction == 0x00400000)) {
|
||||||
|
r = (1 << 9); // quiet NaN
|
||||||
|
} else {
|
||||||
|
r = (1 << 8); // signaling NaN
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
|
||||||
|
}
|
||||||
|
|
||||||
|
*result = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dpi_fsgnj(int a, int* result) {
|
void dpi_fsgnj(int a, int b, int* result) {
|
||||||
// TODO
|
|
||||||
*result = 0;
|
int sign = b & 0x80000000;
|
||||||
|
int r = sign | (a & 0x7FFFFFFF);
|
||||||
|
|
||||||
|
*result = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dpi_fsgnjn(int a, int* result) {
|
void dpi_fsgnjn(int a, int b, int* result) {
|
||||||
// TODO
|
|
||||||
*result = 0;
|
int sign = ~b & 0x80000000;
|
||||||
|
int r = sign | (a & 0x7FFFFFFF);
|
||||||
|
|
||||||
|
*result = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dpi_fsgnjx(int a, int* result) {
|
void dpi_fsgnjx(int a, int b, int* result) {
|
||||||
// TODO
|
|
||||||
*result = 0;
|
int sign1 = a & 0x80000000;
|
||||||
|
int sign2 = b & 0x80000000;
|
||||||
|
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
|
||||||
|
|
||||||
|
*result = r;
|
||||||
}
|
}
|
||||||
@@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
|
|||||||
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||||
|
|
||||||
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
||||||
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
|
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
|
||||||
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
|
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
|
||||||
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
|
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
|
||||||
|
|
||||||
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||||
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ module VX_cluster #(
|
|||||||
.DATA_WIDTH (32),
|
.DATA_WIDTH (32),
|
||||||
.ADDR_WIDTH (12),
|
.ADDR_WIDTH (12),
|
||||||
.BUFFERED_REQ (1),
|
.BUFFERED_REQ (1),
|
||||||
.BUFFERED_RSP (`NUM_CORES >= 4)
|
.BUFFERED_RSP (1)
|
||||||
) csr_arb (
|
) csr_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -225,7 +225,7 @@ module VX_cluster #(
|
|||||||
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
|
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
|
||||||
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
|
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
|
||||||
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
|
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||||
.BUFFERED_REQ (`NUM_CORES >= 4),
|
.BUFFERED_REQ (1),
|
||||||
.BUFFERED_RSP (1)
|
.BUFFERED_RSP (1)
|
||||||
) dram_arb (
|
) dram_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ module VX_databus_arb (
|
|||||||
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
|
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
|
||||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||||
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -30,41 +30,42 @@ module VX_databus_arb (
|
|||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||||
|
|
||||||
wire cache_req_ready_in;
|
wire cache_req_valid_out, cache_req_ready_out;
|
||||||
wire smem_req_ready_in;
|
wire is_smem_addr_in, is_smem_addr_out;
|
||||||
|
|
||||||
// select shared memory bus
|
// select shared memory bus
|
||||||
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
|
assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
|
||||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (REQ_DATAW)
|
.DATAW (REQ_DATAW)
|
||||||
) cache_out_buffer (
|
) out_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (core_req_if.valid[i] && !is_smem_addr),
|
.valid_in (core_req_if.valid[i]),
|
||||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||||
.ready_in (cache_req_ready_in),
|
.ready_in (core_req_if.ready[i]),
|
||||||
.valid_out (cache_req_if.valid[i]),
|
.valid_out (cache_req_valid_out),
|
||||||
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||||
.ready_out (cache_req_if.ready[i])
|
.ready_out (cache_req_ready_out)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
if (`SM_ENABLE ) begin
|
||||||
.DATAW (REQ_DATAW)
|
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
|
||||||
) smem_out_buffer (
|
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
|
||||||
.clk (clk),
|
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
|
||||||
.reset (reset),
|
|
||||||
.valid_in (core_req_if.valid[i] && is_smem_addr),
|
|
||||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
|
||||||
.ready_in (smem_req_ready_in),
|
|
||||||
.valid_out (smem_req_if.valid[i]),
|
|
||||||
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
|
|
||||||
.ready_out (smem_req_if.ready[i])
|
|
||||||
);
|
|
||||||
|
|
||||||
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
|
assign smem_req_if.addr[i] = cache_req_if.addr[i];
|
||||||
|
assign smem_req_if.rw[i] = cache_req_if.rw[i];
|
||||||
|
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
|
||||||
|
assign smem_req_if.data[i] = cache_req_if.data[i];
|
||||||
|
assign smem_req_if.tag[i] = cache_req_if.tag[i];
|
||||||
|
end else begin
|
||||||
|
`UNUSED_VAR (is_smem_addr_out)
|
||||||
|
assign cache_req_if.valid[i] = cache_req_valid_out;
|
||||||
|
assign cache_req_ready_out = cache_req_if.ready[i];
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -1,6 +1,12 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
`include "VX_print_instr.vh"
|
`include "VX_print_instr.vh"
|
||||||
|
|
||||||
|
`ifdef EXT_F_ENABLE
|
||||||
|
`define USED_REGS(f,r) used_regs[{f,r}] = 1
|
||||||
|
`else
|
||||||
|
`define USED_REGS(f,r) used_regs[r] = 1
|
||||||
|
`endif
|
||||||
|
|
||||||
module VX_decode #(
|
module VX_decode #(
|
||||||
parameter CORE_ID = 0
|
parameter CORE_ID = 0
|
||||||
) (
|
) (
|
||||||
@@ -22,10 +28,12 @@ module VX_decode #(
|
|||||||
reg [`EX_BITS-1:0] ex_type;
|
reg [`EX_BITS-1:0] ex_type;
|
||||||
reg [`OP_BITS-1:0] op_type;
|
reg [`OP_BITS-1:0] op_type;
|
||||||
reg [`MOD_BITS-1:0] op_mod;
|
reg [`MOD_BITS-1:0] op_mod;
|
||||||
|
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||||
reg [31:0] imm;
|
reg [31:0] imm;
|
||||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
reg use_rd, use_PC, use_imm;
|
||||||
reg rd_fp, rs1_fp, rs2_fp;
|
reg rd_fp, rs1_fp, rs2_fp;
|
||||||
reg is_join, is_wstall;
|
reg is_join, is_wstall;
|
||||||
|
reg [`NUM_REGS-1:0] used_regs;
|
||||||
|
|
||||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||||
wire [6:0] opcode = instr[6:0];
|
wire [6:0] opcode = instr[6:0];
|
||||||
@@ -45,14 +53,11 @@ module VX_decode #(
|
|||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
|
|
||||||
ex_type = `EX_NOP;
|
ex_type = 0;
|
||||||
op_type = 'x;
|
op_type = 'x;
|
||||||
op_mod = 'x;
|
op_mod = 'x;
|
||||||
imm = 'x;
|
imm = 'x;
|
||||||
use_rd = 0;
|
use_rd = 0;
|
||||||
use_rs1 = 0;
|
|
||||||
use_rs2 = 0;
|
|
||||||
use_rs3 = 0;
|
|
||||||
use_PC = 0;
|
use_PC = 0;
|
||||||
use_imm = 0;
|
use_imm = 0;
|
||||||
rd_fp = 0;
|
rd_fp = 0;
|
||||||
@@ -60,6 +65,11 @@ module VX_decode #(
|
|||||||
rs2_fp = 0;
|
rs2_fp = 0;
|
||||||
is_join = 0;
|
is_join = 0;
|
||||||
is_wstall = 0;
|
is_wstall = 0;
|
||||||
|
used_regs = 0;
|
||||||
|
rd_r = rd;
|
||||||
|
rs1_r = rs1;
|
||||||
|
rs2_r = rs2;
|
||||||
|
rs3_r = rs3;
|
||||||
|
|
||||||
case (opcode)
|
case (opcode)
|
||||||
`INST_I: begin
|
`INST_I: begin
|
||||||
@@ -78,8 +88,9 @@ module VX_decode #(
|
|||||||
op_mod = 0;
|
op_mod = 0;
|
||||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = 1;
|
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
end
|
end
|
||||||
`INST_R: begin
|
`INST_R: begin
|
||||||
ex_type = `EX_ALU;
|
ex_type = `EX_ALU;
|
||||||
@@ -114,17 +125,20 @@ module VX_decode #(
|
|||||||
op_mod = 0;
|
op_mod = 0;
|
||||||
end
|
end
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = 1;
|
`USED_REGS (1'b0, rd);
|
||||||
use_rs2 = 1;
|
`USED_REGS (1'b0, rs1);
|
||||||
|
`USED_REGS (1'b0, rs2);
|
||||||
end
|
end
|
||||||
`INST_LUI: begin
|
`INST_LUI: begin
|
||||||
ex_type = `EX_ALU;
|
ex_type = `EX_ALU;
|
||||||
op_type = `OP_BITS'(`ALU_LUI);
|
op_type = `OP_BITS'(`ALU_LUI);
|
||||||
op_mod = 0;
|
op_mod = 0;
|
||||||
|
rs1_r = 0;
|
||||||
imm = {upper_imm, 12'(0)};
|
imm = {upper_imm, 12'(0)};
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = 1;
|
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
`USED_REGS (1'b0, 5'b0);
|
||||||
end
|
end
|
||||||
`INST_AUIPC: begin
|
`INST_AUIPC: begin
|
||||||
ex_type = `EX_ALU;
|
ex_type = `EX_ALU;
|
||||||
@@ -134,6 +148,7 @@ module VX_decode #(
|
|||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_PC = 1;
|
use_PC = 1;
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
end
|
end
|
||||||
`INST_JAL: begin
|
`INST_JAL: begin
|
||||||
ex_type = `EX_ALU;
|
ex_type = `EX_ALU;
|
||||||
@@ -144,6 +159,7 @@ module VX_decode #(
|
|||||||
use_PC = 1;
|
use_PC = 1;
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
is_wstall = 1;
|
is_wstall = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
end
|
end
|
||||||
`INST_JALR: begin
|
`INST_JALR: begin
|
||||||
ex_type = `EX_ALU;
|
ex_type = `EX_ALU;
|
||||||
@@ -151,9 +167,10 @@ module VX_decode #(
|
|||||||
op_mod = 1;
|
op_mod = 1;
|
||||||
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = 1;
|
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
is_wstall = 1;
|
is_wstall = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
end
|
end
|
||||||
`INST_B: begin
|
`INST_B: begin
|
||||||
ex_type = `EX_ALU;
|
ex_type = `EX_ALU;
|
||||||
@@ -168,11 +185,11 @@ module VX_decode #(
|
|||||||
endcase
|
endcase
|
||||||
op_mod = 1;
|
op_mod = 1;
|
||||||
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
use_PC = 1;
|
use_PC = 1;
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
is_wstall = 1;
|
is_wstall = 1;
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
|
`USED_REGS (1'b0, rs2);
|
||||||
end
|
end
|
||||||
`INST_SYS : begin
|
`INST_SYS : begin
|
||||||
if (func3 == 0) begin
|
if (func3 == 0) begin
|
||||||
@@ -190,6 +207,7 @@ module VX_decode #(
|
|||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_PC = 1;
|
use_PC = 1;
|
||||||
use_imm = 1;
|
use_imm = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
end else begin
|
end else begin
|
||||||
ex_type = `EX_CSR;
|
ex_type = `EX_CSR;
|
||||||
case (func3[1:0])
|
case (func3[1:0])
|
||||||
@@ -201,8 +219,10 @@ module VX_decode #(
|
|||||||
endcase
|
endcase
|
||||||
imm = 32'(u_12);
|
imm = 32'(u_12);
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = !func3[2];
|
|
||||||
use_imm = func3[2];
|
use_imm = func3[2];
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
if (!func3[2])
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
@@ -213,7 +233,8 @@ module VX_decode #(
|
|||||||
op_type = `OP_BITS'({1'b0, func3});
|
op_type = `OP_BITS'({1'b0, func3});
|
||||||
imm = {{20{u_12[11]}}, u_12};
|
imm = {{20{u_12[11]}}, u_12};
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = 1;
|
`USED_REGS (1'b0, rs1);
|
||||||
|
`USED_REGS ((opcode == `INST_FL), rd);
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
rd_fp = (opcode == `INST_FL);
|
rd_fp = (opcode == `INST_FL);
|
||||||
`endif
|
`endif
|
||||||
@@ -225,8 +246,8 @@ module VX_decode #(
|
|||||||
ex_type = `EX_LSU;
|
ex_type = `EX_LSU;
|
||||||
op_type = `OP_BITS'({1'b1, func3});
|
op_type = `OP_BITS'({1'b1, func3});
|
||||||
imm = {{20{func7[6]}}, func7, rd};
|
imm = {{20{func7[6]}}, func7, rd};
|
||||||
use_rs1 = 1;
|
`USED_REGS (1'b0, rs1);
|
||||||
use_rs2 = 1;
|
`USED_REGS ((opcode == `INST_FS), rs2);
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
rs2_fp = (opcode == `INST_FS);
|
rs2_fp = (opcode == `INST_FS);
|
||||||
`endif
|
`endif
|
||||||
@@ -240,12 +261,13 @@ module VX_decode #(
|
|||||||
op_type = `OP_BITS'(opcode[3:0]);
|
op_type = `OP_BITS'(opcode[3:0]);
|
||||||
op_mod = func3;
|
op_mod = func3;
|
||||||
use_rd = 1;
|
use_rd = 1;
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
use_rs3 = 1;
|
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
rs2_fp = 1;
|
rs2_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
|
`USED_REGS (1'b1, rs2);
|
||||||
|
`USED_REGS (1'b1, rs3);
|
||||||
end
|
end
|
||||||
`INST_FCI: begin
|
`INST_FCI: begin
|
||||||
ex_type = `EX_FPU;
|
ex_type = `EX_FPU;
|
||||||
@@ -258,55 +280,61 @@ module VX_decode #(
|
|||||||
7'h0C: // FDIV
|
7'h0C: // FDIV
|
||||||
begin
|
begin
|
||||||
op_type = `OP_BITS'(func7[3:0]);
|
op_type = `OP_BITS'(func7[3:0]);
|
||||||
use_rd = 1;
|
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
rs2_fp = 1;
|
rs2_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
|
`USED_REGS (1'b1, rs2);
|
||||||
end
|
end
|
||||||
7'h2C: begin
|
7'h2C: begin
|
||||||
op_type = `OP_BITS'(`FPU_SQRT);
|
op_type = `OP_BITS'(`FPU_SQRT);
|
||||||
use_rs1 = 1;
|
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
end
|
end
|
||||||
7'h50: begin
|
7'h50: begin
|
||||||
op_type = `OP_BITS'(`FPU_CMP);
|
op_type = `OP_BITS'(`FPU_CMP);
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
rs2_fp = 1;
|
rs2_fp = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
|
`USED_REGS (1'b1, rs2);
|
||||||
end
|
end
|
||||||
7'h60: begin
|
7'h60: begin
|
||||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||||
use_rs1 = 1;
|
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
end
|
end
|
||||||
7'h68: begin
|
7'h68: begin
|
||||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||||
use_rs1 = 1;
|
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
end
|
end
|
||||||
7'h10: begin
|
7'h10: begin
|
||||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||||
op_type = `OP_BITS'(`FPU_MISC);
|
op_type = `OP_BITS'(`FPU_MISC);
|
||||||
op_mod = {1'b0, func3[1:0]};
|
op_mod = {1'b0, func3[1:0]};
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
rs2_fp = 1;
|
rs2_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
|
`USED_REGS (1'b1, rs2);
|
||||||
end
|
end
|
||||||
7'h14: begin
|
7'h14: begin
|
||||||
// FMIN=3, FMAX=4
|
// FMIN=3, FMAX=4
|
||||||
op_type = `OP_BITS'(`FPU_MISC);
|
op_type = `OP_BITS'(`FPU_MISC);
|
||||||
op_mod = func3[0] ? 4 : 3;
|
op_mod = func3[0] ? 4 : 3;
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
rs2_fp = 1;
|
rs2_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
|
`USED_REGS (1'b1, rs2);
|
||||||
end
|
end
|
||||||
7'h70: begin
|
7'h70: begin
|
||||||
if (func3[0]) begin
|
if (func3[0]) begin
|
||||||
@@ -317,14 +345,16 @@ module VX_decode #(
|
|||||||
op_type = `OP_BITS'(`FPU_MISC);
|
op_type = `OP_BITS'(`FPU_MISC);
|
||||||
op_mod = 5;
|
op_mod = 5;
|
||||||
end
|
end
|
||||||
use_rs1 = 1;
|
|
||||||
rs1_fp = 1;
|
rs1_fp = 1;
|
||||||
|
`USED_REGS (1'b0, rd);
|
||||||
|
`USED_REGS (1'b1, rs1);
|
||||||
end
|
end
|
||||||
7'h78: begin
|
7'h78: begin
|
||||||
// FMV.W.X=6
|
// FMV.W.X=6
|
||||||
op_type = `OP_BITS'(`FPU_MISC);
|
op_type = `OP_BITS'(`FPU_MISC);
|
||||||
op_mod = 6;
|
op_mod = 6;
|
||||||
rd_fp = 1;
|
rd_fp = 1;
|
||||||
|
`USED_REGS (1'b1, rd);
|
||||||
end
|
end
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
@@ -335,18 +365,18 @@ module VX_decode #(
|
|||||||
case (func3)
|
case (func3)
|
||||||
3'h0: begin
|
3'h0: begin
|
||||||
op_type = `OP_BITS'(`GPU_TMC);
|
op_type = `OP_BITS'(`GPU_TMC);
|
||||||
use_rs1 = 1;
|
|
||||||
is_wstall = 1;
|
is_wstall = 1;
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
end
|
end
|
||||||
3'h1: begin
|
3'h1: begin
|
||||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||||
use_rs1 = 1;
|
`USED_REGS (1'b0, rs1);
|
||||||
use_rs2 = 1;
|
`USED_REGS (1'b0, rs2);
|
||||||
end
|
end
|
||||||
3'h2: begin
|
3'h2: begin
|
||||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||||
use_rs1 = 1;
|
|
||||||
is_wstall = 1;
|
is_wstall = 1;
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
end
|
end
|
||||||
3'h3: begin
|
3'h3: begin
|
||||||
op_type = `OP_BITS'(`GPU_JOIN);
|
op_type = `OP_BITS'(`GPU_JOIN);
|
||||||
@@ -354,9 +384,9 @@ module VX_decode #(
|
|||||||
end
|
end
|
||||||
3'h4: begin
|
3'h4: begin
|
||||||
op_type = `OP_BITS'(`GPU_BAR);
|
op_type = `OP_BITS'(`GPU_BAR);
|
||||||
use_rs1 = 1;
|
|
||||||
use_rs2 = 1;
|
|
||||||
is_wstall = 1;
|
is_wstall = 1;
|
||||||
|
`USED_REGS (1'b0, rs1);
|
||||||
|
`USED_REGS (1'b0, rs2);
|
||||||
end
|
end
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
@@ -366,10 +396,7 @@ module VX_decode #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
// disable write to integer register r0
|
// disable write to integer register r0
|
||||||
wire use_rd_qual = use_rd && (rd_fp || (rd != 0));
|
wire wb = use_rd && (rd_fp || (rd_r != 0));
|
||||||
|
|
||||||
// EX_ALU needs rs1=0 for LUI operation
|
|
||||||
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
|
|
||||||
|
|
||||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||||
@@ -378,31 +405,27 @@ module VX_decode #(
|
|||||||
assign decode_if.ex_type = ex_type;
|
assign decode_if.ex_type = ex_type;
|
||||||
assign decode_if.op_type = op_type;
|
assign decode_if.op_type = op_type;
|
||||||
assign decode_if.op_mod = op_mod;
|
assign decode_if.op_mod = op_mod;
|
||||||
assign decode_if.wb = use_rd_qual;
|
assign decode_if.wb = wb;
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
assign decode_if.rd = {rd_fp, rd};
|
assign decode_if.rd = {rd_fp, rd_r};
|
||||||
assign decode_if.rs1 = {rs1_fp, rs1_qual};
|
assign decode_if.rs1 = {rs1_fp, rs1_r};
|
||||||
assign decode_if.rs2 = {rs2_fp, rs2};
|
assign decode_if.rs2 = {rs2_fp, rs2_r};
|
||||||
assign decode_if.rs3 = {1'b1, rs3};
|
assign decode_if.rs3 = {1'b1, rs3_r};
|
||||||
`else
|
`else
|
||||||
`UNUSED_VAR (rd_fp)
|
`UNUSED_VAR (rd_fp)
|
||||||
`UNUSED_VAR (rs1_fp)
|
`UNUSED_VAR (rs1_fp)
|
||||||
`UNUSED_VAR (rs2_fp)
|
`UNUSED_VAR (rs2_fp)
|
||||||
assign decode_if.rd = rd;
|
assign decode_if.rd = rd_r;
|
||||||
assign decode_if.rs1 = rs1_qual;
|
assign decode_if.rs1 = rs1_r;
|
||||||
assign decode_if.rs2 = rs2;
|
assign decode_if.rs2 = rs2_r;
|
||||||
assign decode_if.rs3 = rs3;
|
assign decode_if.rs3 = rs3_r;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign decode_if.imm = imm;
|
assign decode_if.imm = imm;
|
||||||
assign decode_if.use_PC = use_PC;
|
assign decode_if.use_PC = use_PC;
|
||||||
assign decode_if.use_imm = use_imm;
|
assign decode_if.use_imm = use_imm;
|
||||||
|
assign decode_if.used_regs = used_regs;
|
||||||
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
|
|
||||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
|
||||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
|
||||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,8 @@ module VX_fpu_unit #(
|
|||||||
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||||
.release_slot (fpuq_pop),
|
.release_slot (fpuq_pop),
|
||||||
.full (fpuq_full)
|
.full (fpuq_full),
|
||||||
|
`UNUSED_PIN (empty)
|
||||||
);
|
);
|
||||||
|
|
||||||
// can accept new request?
|
// can accept new request?
|
||||||
|
|||||||
@@ -82,8 +82,7 @@ module VX_ibuffer #(
|
|||||||
|
|
||||||
if (writing && is_slot0) begin
|
if (writing && is_slot0) begin
|
||||||
q_data_out[i] <= q_data_in;
|
q_data_out[i] <= q_data_in;
|
||||||
end
|
end else if (pop) begin
|
||||||
if (pop) begin
|
|
||||||
q_data_out[i] <= q_data_prev[i];
|
q_data_out[i] <= q_data_prev[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -38,7 +38,8 @@ module VX_instr_demux (
|
|||||||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||||
|
.BUFFERED (1)
|
||||||
) alu_buffer (
|
) alu_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -55,7 +56,8 @@ module VX_instr_demux (
|
|||||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||||
|
.BUFFERED (1)
|
||||||
) lsu_buffer (
|
) lsu_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -72,7 +74,8 @@ module VX_instr_demux (
|
|||||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||||
|
.BUFFERED (1)
|
||||||
) csr_buffer (
|
) csr_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -90,7 +93,8 @@ module VX_instr_demux (
|
|||||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||||
|
.BUFFERED (1)
|
||||||
) fpu_buffer (
|
) fpu_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -111,7 +115,8 @@ module VX_instr_demux (
|
|||||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||||
|
.BUFFERED (1)
|
||||||
) gpu_buffer (
|
) gpu_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -183,19 +183,44 @@ module VX_issue #(
|
|||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
|
||||||
|
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
|
||||||
|
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||||
|
$write(", rs2_data=");
|
||||||
|
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||||
|
$write("\n");
|
||||||
end
|
end
|
||||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||||
|
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
|
||||||
|
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||||
|
$write(", data=");
|
||||||
|
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||||
|
$write("\n");
|
||||||
end
|
end
|
||||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data);
|
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||||
|
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
|
||||||
|
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||||
|
$write("\n");
|
||||||
end
|
end
|
||||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
|
||||||
|
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
|
||||||
|
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
|
||||||
|
$write(", rs2_data=");
|
||||||
|
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||||
|
$write(", rs3_data=");
|
||||||
|
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||||
|
$write("\n");
|
||||||
end
|
end
|
||||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
|
||||||
|
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
|
||||||
|
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
|
||||||
|
$write(", rs2_data=");
|
||||||
|
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||||
|
$write("\n");
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -75,10 +75,11 @@ module VX_lsu_unit #(
|
|||||||
`UNUSED_VAR (rsp_type)
|
`UNUSED_VAR (rsp_type)
|
||||||
|
|
||||||
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
|
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
|
||||||
reg [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||||
|
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||||
wire req_sent_all;
|
wire sent_all_ready;
|
||||||
|
|
||||||
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||||
wire mbuf_full;
|
wire mbuf_full;
|
||||||
@@ -88,18 +89,20 @@ module VX_lsu_unit #(
|
|||||||
assign req_offset[i] = req_addr[i][1:0];
|
assign req_offset[i] = req_addr[i][1:0];
|
||||||
end
|
end
|
||||||
|
|
||||||
wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready))
|
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||||
|
|
||||||
|
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||||
|
|
||||||
|
wire mbuf_push = (| dcache_req_fire)
|
||||||
&& (0 == req_sent_mask) // first submission only
|
&& (0 == req_sent_mask) // first submission only
|
||||||
&& req_wb; // loads only
|
&& req_wb; // loads only
|
||||||
|
|
||||||
wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||||
|
|
||||||
wire mbuf_pop = mbuf_pop_part && (rsp_rem_mask_n == 0 || rsp_is_dup);
|
|
||||||
|
|
||||||
assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0];
|
assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0];
|
||||||
|
|
||||||
VX_index_buffer #(
|
VX_index_buffer #(
|
||||||
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
|
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
|
||||||
.SIZE (`LSUQ_SIZE)
|
.SIZE (`LSUQ_SIZE)
|
||||||
) req_metadata (
|
) req_metadata (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
@@ -107,24 +110,32 @@ module VX_lsu_unit #(
|
|||||||
.write_addr (mbuf_waddr),
|
.write_addr (mbuf_waddr),
|
||||||
.acquire_slot (mbuf_push),
|
.acquire_slot (mbuf_push),
|
||||||
.read_addr (mbuf_raddr),
|
.read_addr (mbuf_raddr),
|
||||||
.write_data ({req_wid, req_pc, req_rd, req_wb, req_type, req_offset, req_is_dup}),
|
.write_data ({req_wid, req_pc, req_tmask, req_rd, req_wb, req_type, req_offset, req_is_dup}),
|
||||||
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
|
.read_data ({rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
|
||||||
.release_addr (mbuf_raddr),
|
.release_addr (mbuf_raddr),
|
||||||
.release_slot (mbuf_pop),
|
.release_slot (mbuf_pop),
|
||||||
.full (mbuf_full)
|
.full (mbuf_full),
|
||||||
|
`UNUSED_PIN (empty)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|
always @(posedge clk) begin
|
||||||
|| (req_is_dup && dcache_req_if.ready[0]);
|
if (mbuf_push) begin
|
||||||
|
pending_tags[mbuf_waddr] <= req_tag;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
req_sent_mask <= 0;
|
req_sent_mask <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (req_sent_all)
|
if (sent_all_ready)
|
||||||
req_sent_mask <= 0;
|
req_sent_mask <= 0;
|
||||||
else
|
else
|
||||||
req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready);
|
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -136,20 +147,21 @@ module VX_lsu_unit #(
|
|||||||
req_tag_hold <= mbuf_waddr;
|
req_tag_hold <= mbuf_waddr;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||||
|
|
||||||
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
|
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mbuf_push) begin
|
if (mbuf_push) begin
|
||||||
rsp_rem_mask[mbuf_waddr] <= req_tmask;
|
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
|
||||||
pending_tags[mbuf_waddr] <= req_tag;
|
|
||||||
end
|
end
|
||||||
if (mbuf_pop_part) begin
|
if (dcache_rsp_fire) begin
|
||||||
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
|
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready);
|
wire req_ready_dep = (req_wb && ~mbuf_full)
|
||||||
|
|| (~req_wb && st_commit_if.ready);
|
||||||
wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
|
||||||
|
|
||||||
// DCache Request
|
// DCache Request
|
||||||
|
|
||||||
@@ -181,23 +193,23 @@ module VX_lsu_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask;
|
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
|
||||||
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
|
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
|
||||||
assign dcache_req_if.addr = mem_req_addr;
|
assign dcache_req_if.addr = mem_req_addr;
|
||||||
assign dcache_req_if.byteen = mem_req_byteen;
|
assign dcache_req_if.byteen = mem_req_byteen;
|
||||||
assign dcache_req_if.data = mem_req_data;
|
assign dcache_req_if.data = mem_req_data;
|
||||||
|
|
||||||
`ifdef DBG_CACHE_REQ_INFO
|
`ifdef DBG_CACHE_REQ_INFO
|
||||||
assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}};
|
assign dcache_req_if.tag = {`NUM_THREADS{req_pc, req_wid, req_tag}};
|
||||||
`else
|
`else
|
||||||
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
|
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign ready_in = req_ready_dep && req_sent_all;
|
assign ready_in = req_ready_dep && sent_all_ready;
|
||||||
|
|
||||||
// send store commit
|
// send store commit
|
||||||
|
|
||||||
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
|
wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
|
||||||
|
|
||||||
assign st_commit_if.valid = is_store_rsp;
|
assign st_commit_if.valid = is_store_rsp;
|
||||||
assign st_commit_if.wid = req_wid;
|
assign st_commit_if.wid = req_wid;
|
||||||
@@ -211,7 +223,7 @@ module VX_lsu_unit #(
|
|||||||
// load response formatting
|
// load response formatting
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][31:0] rsp_data;
|
reg [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
||||||
@@ -234,7 +246,7 @@ module VX_lsu_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid;
|
assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.valid;
|
||||||
|
|
||||||
// send load commit
|
// send load commit
|
||||||
|
|
||||||
@@ -247,7 +259,7 @@ module VX_lsu_unit #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (!load_rsp_stall),
|
.enable (!load_rsp_stall),
|
||||||
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -255,7 +267,7 @@ module VX_lsu_unit #(
|
|||||||
assign dcache_rsp_if.ready = ~load_rsp_stall;
|
assign dcache_rsp_if.ready = ~load_rsp_stall;
|
||||||
|
|
||||||
// scope registration
|
// scope registration
|
||||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready);
|
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
|
||||||
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
|
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
|
||||||
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
|
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
|
||||||
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
|
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
|
||||||
@@ -269,15 +281,15 @@ module VX_lsu_unit #(
|
|||||||
|
|
||||||
`ifdef DBG_PRINT_CORE_DCACHE
|
`ifdef DBG_PRINT_CORE_DCACHE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin
|
if ((| dcache_req_fire)) begin
|
||||||
if ((| dcache_req_if.rw))
|
if ((| dcache_req_if.rw))
|
||||||
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
|
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
|
||||||
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
|
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
|
||||||
else
|
else
|
||||||
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b",
|
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b",
|
||||||
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
|
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
|
||||||
end
|
end
|
||||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
if (dcache_rsp_fire) begin
|
||||||
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b",
|
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b",
|
||||||
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup);
|
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup);
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -81,4 +81,25 @@
|
|||||||
|
|
||||||
`define LTRIM(x,s) x[s-1:0]
|
`define LTRIM(x,s) x[s-1:0]
|
||||||
|
|
||||||
|
`define PRINT_ARRAY1D(a, m) \
|
||||||
|
$write("{"); \
|
||||||
|
for (integer i = (m-1); i >= 0; --i) begin \
|
||||||
|
if (i != (m-1)) $write(", "); \
|
||||||
|
$write("0x%0h", a[i]); \
|
||||||
|
end \
|
||||||
|
$write("}"); \
|
||||||
|
|
||||||
|
`define PRINT_ARRAY2D(a, m, n) \
|
||||||
|
$write("{"); \
|
||||||
|
for (integer i = n-1; i >= 0; --i) begin \
|
||||||
|
if (i != (n-1)) $write(", "); \
|
||||||
|
$write("{"); \
|
||||||
|
for (integer j = (m-1); j >= 0; --j) begin \
|
||||||
|
if (j != (m-1)) $write(", "); \
|
||||||
|
$write("0x%0h", a[i][j]); \
|
||||||
|
end \
|
||||||
|
$write("}"); \
|
||||||
|
end \
|
||||||
|
$write("}")
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
@@ -121,7 +121,7 @@ module Vortex (
|
|||||||
.NUM_REQS (`NUM_CLUSTERS),
|
.NUM_REQS (`NUM_CLUSTERS),
|
||||||
.DATA_WIDTH (32),
|
.DATA_WIDTH (32),
|
||||||
.ADDR_WIDTH (12),
|
.ADDR_WIDTH (12),
|
||||||
.BUFFERED_REQ (`NUM_CLUSTERS >= 4),
|
.BUFFERED_REQ (1),
|
||||||
.BUFFERED_RSP (1)
|
.BUFFERED_RSP (1)
|
||||||
) csr_arb (
|
) csr_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
@@ -228,7 +228,7 @@ module Vortex (
|
|||||||
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
|
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||||
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
|
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
|
||||||
.BUFFERED_REQ (1),
|
.BUFFERED_REQ (1),
|
||||||
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
|
.BUFFERED_RSP (1)
|
||||||
) dram_arb (
|
) dram_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
`ifndef NOPAE
|
`ifndef NOPAE
|
||||||
import local_mem_cfg_pkg::*;
|
|
||||||
`include "afu_json_info.vh"
|
`include "afu_json_info.vh"
|
||||||
`else
|
`else
|
||||||
`include "vortex_afu.vh"
|
`include "vortex_afu.vh"
|
||||||
|
`endif
|
||||||
/* verilator lint_off IMPORTSTAR */
|
/* verilator lint_off IMPORTSTAR */
|
||||||
import ccip_if_pkg::*;
|
import ccip_if_pkg::*;
|
||||||
import local_mem_cfg_pkg::*;
|
import local_mem_cfg_pkg::*;
|
||||||
/* verilator lint_on IMPORTSTAR */
|
/* verilator lint_on IMPORTSTAR */
|
||||||
`endif
|
|
||||||
|
|
||||||
module vortex_afu #(
|
module vortex_afu #(
|
||||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||||
|
|||||||
3
hw/rtl/cache/VX_bank.v
vendored
3
hw/rtl/cache/VX_bank.v
vendored
@@ -487,7 +487,8 @@ module VX_bank #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
|
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||||
|
.BUFFERED (NUM_BANKS == 1)
|
||||||
) core_rsp_req (
|
) core_rsp_req (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
3
hw/rtl/cache/VX_cache.v
vendored
3
hw/rtl/cache/VX_cache.v
vendored
@@ -168,8 +168,7 @@ module VX_cache #(
|
|||||||
.NUM_BANKS (NUM_BANKS)
|
.NUM_BANKS (NUM_BANKS)
|
||||||
) flush_ctrl (
|
) flush_ctrl (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset || flush),
|
||||||
.flush (flush),
|
|
||||||
.addr_out (flush_addr),
|
.addr_out (flush_addr),
|
||||||
.valid_out (flush_enable)
|
.valid_out (flush_enable)
|
||||||
);
|
);
|
||||||
|
|||||||
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -98,7 +98,8 @@ module VX_cache_core_rsp_merge #(
|
|||||||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||||
|
.BUFFERED (1)
|
||||||
) pipe_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
@@ -146,7 +147,8 @@ module VX_cache_core_rsp_merge #(
|
|||||||
|
|
||||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||||
|
.BUFFERED (1)
|
||||||
) pipe_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
3
hw/rtl/cache/VX_flush_ctrl.v
vendored
3
hw/rtl/cache/VX_flush_ctrl.v
vendored
@@ -10,7 +10,6 @@ module VX_flush_ctrl #(
|
|||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire flush,
|
|
||||||
output wire [`LINE_SELECT_BITS-1:0] addr_out,
|
output wire [`LINE_SELECT_BITS-1:0] addr_out,
|
||||||
output wire valid_out
|
output wire valid_out
|
||||||
);
|
);
|
||||||
@@ -18,7 +17,7 @@ module VX_flush_ctrl #(
|
|||||||
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
|
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset || flush) begin
|
if (reset) begin
|
||||||
flush_enable <= 1;
|
flush_enable <= 1;
|
||||||
flush_ctr <= 0;
|
flush_ctr <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
|
|||||||
@@ -3,10 +3,6 @@
|
|||||||
/// Modified port of cast module from fpnew Libray
|
/// Modified port of cast module from fpnew Libray
|
||||||
/// reference: https://github.com/pulp-platform/fpnew
|
/// reference: https://github.com/pulp-platform/fpnew
|
||||||
|
|
||||||
`ifndef SYNTHESIS
|
|
||||||
`include "float_dpi.vh"
|
|
||||||
`endif
|
|
||||||
|
|
||||||
module VX_fp_cvt #(
|
module VX_fp_cvt #(
|
||||||
parameter TAGW = 1,
|
parameter TAGW = 1,
|
||||||
parameter LANES = 1
|
parameter LANES = 1
|
||||||
@@ -74,7 +70,7 @@ module VX_fp_cvt #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
||||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
||||||
wire [LANES-1:0] input_sign;
|
wire [LANES-1:0] input_sign;
|
||||||
|
|
||||||
for (genvar i = 0; i < LANES; ++i) begin
|
for (genvar i = 0; i < LANES; ++i) begin
|
||||||
@@ -82,10 +78,10 @@ module VX_fp_cvt #(
|
|||||||
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||||
wire fmt_sign = dataa[i][31];
|
wire fmt_sign = dataa[i][31];
|
||||||
wire int_sign = dataa[i][31] & is_signed;
|
wire int_sign = dataa[i][31] & is_signed;
|
||||||
assign int_mantissa = int_sign ? $unsigned(-dataa[i]) : dataa[i];
|
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
|
||||||
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||||
|
|
||||||
assign fmt_exponent[i] = $signed({1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]});
|
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
|
||||||
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
||||||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||||
end
|
end
|
||||||
@@ -115,7 +111,7 @@ module VX_fp_cvt #(
|
|||||||
wire [2:0] rnd_mode_s0;
|
wire [2:0] rnd_mode_s0;
|
||||||
fp_type_t [LANES-1:0] in_a_type_s0;
|
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||||
wire [LANES-1:0] input_sign_s0;
|
wire [LANES-1:0] input_sign_s0;
|
||||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||||
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
|
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
|
||||||
wire [LANES-1:0] mant_is_zero_s0;
|
wire [LANES-1:0] mant_is_zero_s0;
|
||||||
@@ -136,37 +132,92 @@ module VX_fp_cvt #(
|
|||||||
// Normalization
|
// Normalization
|
||||||
|
|
||||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||||
|
|
||||||
for (genvar i = 0; i < LANES; ++i) begin
|
for (genvar i = 0; i < LANES; ++i) begin
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
// Input mantissa needs to be normalized
|
// Input mantissa needs to be normalized
|
||||||
wire signed [INT_EXP_WIDTH-1:0] fp_input_exp;
|
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||||
wire signed [INT_EXP_WIDTH-1:0] int_input_exp;
|
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||||
wire [LZC_RESULT_WIDTH:0] renorm_shamt_sgn;
|
|
||||||
|
|
||||||
// signed form for calculations
|
|
||||||
assign renorm_shamt_sgn = $signed({1'b0, renorm_shamt_s0[i]});
|
|
||||||
|
|
||||||
// Realign input mantissa, append zeroes if destination is wider
|
// Realign input mantissa, append zeroes if destination is wider
|
||||||
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||||
|
|
||||||
// Unbias exponent and compensate for shift
|
// Unbias exponent and compensate for shift
|
||||||
assign fp_input_exp = $signed(fmt_exponent_s0[i] +
|
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||||
(($signed({1'b0, in_a_type_s0[i].is_subnormal}) +
|
{1'b0, in_a_type_s0[i].is_subnormal} +
|
||||||
$signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) -
|
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
|
||||||
renorm_shamt_sgn));
|
{1'b0, renorm_shamt_s0[i]};
|
||||||
|
|
||||||
assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
|
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||||
|
|
||||||
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||||
|
|
||||||
// Rebias the exponent
|
// Rebias the exponent
|
||||||
assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS);
|
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// Perform adjustments to mantissa and exponent
|
||||||
|
|
||||||
|
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
|
||||||
|
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
|
||||||
|
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
|
||||||
|
wire [LANES-1:0] of_before_round_s0;
|
||||||
|
|
||||||
|
for (genvar i = 0; i < LANES; ++i) begin
|
||||||
|
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||||
|
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||||
|
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||||
|
reg of_before_round;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
// Default assignment
|
||||||
|
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
|
||||||
|
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||||
|
denorm_shamt = 0; // right of mantissa
|
||||||
|
of_before_round = 1'b0;
|
||||||
|
|
||||||
|
// Handle INT casts
|
||||||
|
if (is_itof_s0) begin
|
||||||
|
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
|
||||||
|
// Overflow or infinities (for proper rounding)
|
||||||
|
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||||
|
preshift_mant = ~0; // largest normal value and RS bits set
|
||||||
|
of_before_round = 1'b1;
|
||||||
|
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
|
||||||
|
// Limit the shift to retain sticky bits
|
||||||
|
final_exp = 0; // denormal result
|
||||||
|
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||||
|
end else if ($signed(destination_exp[i]) < $signed(1)) begin
|
||||||
|
// Denormalize underflowing values
|
||||||
|
final_exp = 0; // denormal result
|
||||||
|
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
|
||||||
|
// overflow: when converting to unsigned the range is larger by one
|
||||||
|
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
|
||||||
|
of_before_round = 1'b1;
|
||||||
|
end else if ($signed(input_exp[i]) < $signed(-1)) begin
|
||||||
|
// underflow
|
||||||
|
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||||
|
end else begin
|
||||||
|
// By default right shift mantissa to be an integer
|
||||||
|
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
`IGNORE_WARNINGS_END
|
||||||
|
end
|
||||||
|
|
||||||
|
assign preshift_mant_s0[i] = preshift_mant;
|
||||||
|
assign denorm_shamt_s0[i] = denorm_shamt;
|
||||||
|
assign final_exp_s0[i] = final_exp;
|
||||||
|
assign of_before_round_s0[i] = of_before_round;
|
||||||
|
end
|
||||||
|
|
||||||
// Pipeline stage1
|
// Pipeline stage1
|
||||||
|
|
||||||
wire valid_in_s1;
|
wire valid_in_s1;
|
||||||
@@ -177,108 +228,55 @@ module VX_fp_cvt #(
|
|||||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||||
wire [LANES-1:0] mant_is_zero_s1;
|
wire [LANES-1:0] mant_is_zero_s1;
|
||||||
wire [LANES-1:0] input_sign_s1;
|
wire [LANES-1:0] input_sign_s1;
|
||||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
|
||||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp_s1;
|
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
|
||||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||||
|
wire [LANES-1:0] of_before_round_s1;
|
||||||
|
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)),
|
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
|
||||||
.RESETW (1)
|
.RESETW (1)
|
||||||
) pipe_reg1 (
|
) pipe_reg1 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (~stall),
|
.enable (~stall),
|
||||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, input_mant, input_exp, destination_exp}),
|
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
|
||||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
|
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
|
||||||
);
|
);
|
||||||
|
|
||||||
// Casting
|
|
||||||
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
|
||||||
|
|
||||||
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
|
||||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
|
|
||||||
wire [LANES-1:0][MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
|
||||||
wire [LANES-1:0][MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
|
||||||
|
|
||||||
reg [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
|
||||||
|
|
||||||
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
|
|
||||||
reg [LANES-1:0] of_before_round;
|
|
||||||
|
|
||||||
// Perform adjustments to mantissa and exponent
|
|
||||||
for (genvar i = 0; i < LANES; ++i) begin
|
|
||||||
always @(*) begin
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
|
||||||
// Default assignment
|
|
||||||
final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits
|
|
||||||
preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes
|
|
||||||
denorm_shamt[i] = 0; // right of mantissa
|
|
||||||
of_before_round[i] = 1'b0;
|
|
||||||
|
|
||||||
// Place mantissa to the left of the shifter
|
|
||||||
preshift_mant[i] = {input_mant_s1[i], 33'b0};
|
|
||||||
|
|
||||||
// Handle INT casts
|
|
||||||
if (is_itof_s1) begin
|
|
||||||
// Overflow or infinities (for proper rounding)
|
|
||||||
if ($signed(destination_exp_s1[i]) >= $signed(2**EXP_BITS-1)) begin
|
|
||||||
final_exp[i] = (2**EXP_BITS-2); // largest normal value
|
|
||||||
preshift_mant[i] = ~0; // largest normal value and RS bits set
|
|
||||||
of_before_round[i] = 1'b1;
|
|
||||||
// Denormalize underflowing values
|
|
||||||
end else if (($signed(destination_exp_s1[i]) < $signed(1))
|
|
||||||
&& ($signed(destination_exp_s1[i]) >= -$signed(MAN_BITS))) begin
|
|
||||||
final_exp[i] = 0; // denormal result
|
|
||||||
denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting
|
|
||||||
// Limit the shift to retain sticky bits
|
|
||||||
end else if ($signed(destination_exp_s1[i]) < -$signed(MAN_BITS)) begin
|
|
||||||
final_exp[i] = 0; // denormal result
|
|
||||||
denorm_shamt[i] = $unsigned(denorm_shamt[i] + (2 + MAN_BITS)); // to sticky
|
|
||||||
end
|
|
||||||
end else begin
|
|
||||||
// By default right shift mantissa to be an integer
|
|
||||||
denorm_shamt[i] = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
|
||||||
// overflow: when converting to unsigned the range is larger by one
|
|
||||||
if ($signed(input_exp_s1[i]) >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin
|
|
||||||
denorm_shamt[i] = SHAMT_BITS'(0); // prevent shifting
|
|
||||||
of_before_round[i] = 1'b1;
|
|
||||||
// underflow
|
|
||||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
|
||||||
denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
|
||||||
end
|
|
||||||
end
|
|
||||||
`IGNORE_WARNINGS_END
|
|
||||||
end
|
|
||||||
|
|
||||||
// Mantissa adjustment shift
|
|
||||||
assign destination_mant[i] = preshift_mant[i] >> denorm_shamt[i];
|
|
||||||
|
|
||||||
// Extract final mantissa and round bit, discard the normal bit (for FP)
|
|
||||||
assign {final_mant[i], fp_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
|
||||||
assign {final_int[i], int_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
|
||||||
|
|
||||||
// Collapse sticky bits
|
|
||||||
assign fp_round_sticky_bits[i][0] = (| destination_mant[i][NUM_FP_STICKY-1:0]);
|
|
||||||
assign int_round_sticky_bits[i][0] = (| destination_mant[i][NUM_INT_STICKY-1:0]);
|
|
||||||
|
|
||||||
// select RS bits for destination operation
|
|
||||||
assign round_sticky_bits[i] = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
|
||||||
end
|
|
||||||
|
|
||||||
// Rouding and classification
|
|
||||||
|
|
||||||
wire [LANES-1:0] rounded_sign;
|
wire [LANES-1:0] rounded_sign;
|
||||||
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
|
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
|
||||||
|
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits;
|
||||||
|
|
||||||
|
// Rouding and classification
|
||||||
|
|
||||||
for (genvar i = 0; i < LANES; ++i) begin
|
for (genvar i = 0; i < LANES; ++i) begin
|
||||||
// Pack exponent and mantissa into proper rounding form
|
wire [2*INT_MAN_WIDTH:0] destination_mant;
|
||||||
wire [31:0] fmt_pre_round_abs = {1'b0, final_exp[i][EXP_BITS-1:0], final_mant[i][MAN_BITS-1:0]};
|
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||||
|
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||||
|
wire [1:0] round_sticky_bits;
|
||||||
|
wire [31:0] fmt_pre_round_abs;
|
||||||
|
wire [31:0] pre_round_abs;
|
||||||
|
|
||||||
// Sign-extend integer result
|
// Mantissa adjustment shift
|
||||||
wire [31:0] ifmt_pre_round_abs = final_int[i];
|
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
|
||||||
|
|
||||||
|
// Extract final mantissa and round bit, discard the normal bit (for FP)
|
||||||
|
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||||
|
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||||
|
|
||||||
|
// Collapse sticky bits
|
||||||
|
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
|
||||||
|
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
|
||||||
|
|
||||||
|
// select RS bits for destination operation
|
||||||
|
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||||
|
|
||||||
|
// Pack exponent and mantissa into proper rounding form
|
||||||
|
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
|
||||||
|
|
||||||
// Select output with destination format and operation
|
// Select output with destination format and operation
|
||||||
wire [31:0] pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : ifmt_pre_round_abs;
|
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
|
||||||
|
|
||||||
// Perform the rounding
|
// Perform the rounding
|
||||||
VX_fp_rounding #(
|
VX_fp_rounding #(
|
||||||
@@ -286,9 +284,9 @@ module VX_fp_cvt #(
|
|||||||
) fp_rounding (
|
) fp_rounding (
|
||||||
.abs_value_i (pre_round_abs),
|
.abs_value_i (pre_round_abs),
|
||||||
.sign_i (input_sign_s1[i]),
|
.sign_i (input_sign_s1[i]),
|
||||||
.round_sticky_bits_i (round_sticky_bits[i]),
|
.round_sticky_bits_i(round_sticky_bits),
|
||||||
.rnd_mode_i (rnd_mode_s1),
|
.rnd_mode_i (rnd_mode_s1),
|
||||||
.effective_subtraction_i (1'b0),
|
.effective_subtraction_i(1'b0),
|
||||||
.abs_rounded_o (rounded_abs[i]),
|
.abs_rounded_o (rounded_abs[i]),
|
||||||
.sign_o (rounded_sign[i]),
|
.sign_o (rounded_sign[i]),
|
||||||
`UNUSED_PIN (exact_zero_o)
|
`UNUSED_PIN (exact_zero_o)
|
||||||
@@ -306,23 +304,22 @@ module VX_fp_cvt #(
|
|||||||
wire [LANES-1:0] input_sign_s2;
|
wire [LANES-1:0] input_sign_s2;
|
||||||
wire [LANES-1:0] rounded_sign_s2;
|
wire [LANES-1:0] rounded_sign_s2;
|
||||||
wire [LANES-1:0][31:0] rounded_abs_s2;
|
wire [LANES-1:0][31:0] rounded_abs_s2;
|
||||||
|
wire [LANES-1:0] of_before_round_s2;
|
||||||
|
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)),
|
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||||
.RESETW (1)
|
.RESETW (1)
|
||||||
) pipe_reg2 (
|
) pipe_reg2 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (~stall),
|
.enable (~stall),
|
||||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign}),
|
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
|
||||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2})
|
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [LANES-1:0] of_after_round;
|
wire [LANES-1:0] of_after_round;
|
||||||
wire [LANES-1:0] uf_after_round;
|
wire [LANES-1:0] uf_after_round;
|
||||||
|
|
||||||
wire [LANES-1:0][31:0] fmt_result;
|
wire [LANES-1:0][31:0] fmt_result;
|
||||||
|
|
||||||
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
|
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
|
||||||
wire [LANES-1:0] rounded_int_res_zero; // after rounding
|
wire [LANES-1:0] rounded_int_res_zero; // after rounding
|
||||||
|
|
||||||
@@ -335,7 +332,7 @@ module VX_fp_cvt #(
|
|||||||
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
|
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
|
||||||
|
|
||||||
// Negative integer result needs to be brought into two's complement
|
// Negative integer result needs to be brought into two's complement
|
||||||
assign rounded_int_res[i] = rounded_sign_s2[i] ? $unsigned(-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
||||||
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
|
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -373,7 +370,7 @@ module VX_fp_cvt #(
|
|||||||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||||
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
|
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
|
||||||
end else begin
|
end else begin
|
||||||
int_special_result[i][30:0] = 2**(31) -1; // alone yields 2**(31)-1
|
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
|
||||||
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
|
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -381,7 +378,7 @@ module VX_fp_cvt #(
|
|||||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||||
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
|
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
|
||||||
| in_a_type_s2[i].is_inf
|
| in_a_type_s2[i].is_inf
|
||||||
| of_before_round[i]
|
| of_before_round_s2[i]
|
||||||
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
|
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
|
||||||
|
|
||||||
// All integer special cases are invalid
|
// All integer special cases are invalid
|
||||||
@@ -399,11 +396,11 @@ module VX_fp_cvt #(
|
|||||||
wire [31:0] fp_result, int_result;
|
wire [31:0] fp_result, int_result;
|
||||||
|
|
||||||
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i]));
|
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
|
||||||
|
|
||||||
assign fp_regular_status.NV = is_itof_s2 & (of_before_round[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||||
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); // inf casts no OF
|
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
|
||||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||||
assign fp_regular_status.NX = inexact;
|
assign fp_regular_status.NX = inexact;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef SYNTHESIS
|
||||||
|
`include "float_dpi.vh"
|
||||||
|
`endif
|
||||||
|
|
||||||
module VX_fp_div #(
|
module VX_fp_div #(
|
||||||
parameter TAGW = 1,
|
parameter TAGW = 1,
|
||||||
parameter LANES = 1
|
parameter LANES = 1
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef SYNTHESIS
|
||||||
|
`include "float_dpi.vh"
|
||||||
|
`endif
|
||||||
|
|
||||||
module VX_fp_fma #(
|
module VX_fp_fma #(
|
||||||
parameter TAGW = 1,
|
parameter TAGW = 1,
|
||||||
parameter LANES = 1
|
parameter LANES = 1
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`ifndef SYNTHESIS
|
||||||
|
`include "float_dpi.vh"
|
||||||
|
`endif
|
||||||
|
|
||||||
module VX_fp_sqrt #(
|
module VX_fp_sqrt #(
|
||||||
parameter TAGW = 1,
|
parameter TAGW = 1,
|
||||||
parameter LANES = 1
|
parameter LANES = 1
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ module VX_fp_type (
|
|||||||
);
|
);
|
||||||
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
||||||
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
||||||
wire is_subnormal = (exp_i == 8'd0) && !is_zero;
|
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
|
||||||
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
||||||
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
||||||
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
||||||
|
|||||||
@@ -330,9 +330,9 @@ module VX_fpu_dpi #(
|
|||||||
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
|
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
|
||||||
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
|
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
|
||||||
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
|
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
|
||||||
dpi_fsgnj (dataa[i], result_fsgnj[i]);
|
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
|
||||||
dpi_fsgnjn (dataa[i], result_fsgnjn[i]);
|
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
|
||||||
dpi_fsgnjx (dataa[i], result_fsgnjx[i]);
|
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
|
||||||
result_fmv[i] = dataa[i];
|
result_fmv[i] = dataa[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -18,11 +18,12 @@ module VX_index_buffer #(
|
|||||||
input wire [ADDRW-1:0] release_addr,
|
input wire [ADDRW-1:0] release_addr,
|
||||||
input wire release_slot,
|
input wire release_slot,
|
||||||
|
|
||||||
|
output wire empty,
|
||||||
output wire full
|
output wire full
|
||||||
);
|
);
|
||||||
reg [SIZE-1:0] free_slots, free_slots_n;
|
reg [SIZE-1:0] free_slots, free_slots_n;
|
||||||
reg [ADDRW-1:0] write_addr_r;
|
reg [ADDRW-1:0] write_addr_r;
|
||||||
reg full_r;
|
reg empty_r, full_r;
|
||||||
|
|
||||||
wire free_valid;
|
wire free_valid;
|
||||||
wire [ADDRW-1:0] free_index;
|
wire [ADDRW-1:0] free_index;
|
||||||
@@ -51,6 +52,7 @@ module VX_index_buffer #(
|
|||||||
if (reset) begin
|
if (reset) begin
|
||||||
write_addr_r <= ADDRW'(1'b0);
|
write_addr_r <= ADDRW'(1'b0);
|
||||||
free_slots <= {SIZE{1'b1}};
|
free_slots <= {SIZE{1'b1}};
|
||||||
|
empty_r <= 1'b1;
|
||||||
full_r <= 1'b0;
|
full_r <= 1'b0;
|
||||||
end else begin
|
end else begin
|
||||||
if (release_slot) begin
|
if (release_slot) begin
|
||||||
@@ -60,6 +62,7 @@ module VX_index_buffer #(
|
|||||||
write_addr_r <= free_index;
|
write_addr_r <= free_index;
|
||||||
end
|
end
|
||||||
free_slots <= free_slots_n;
|
free_slots <= free_slots_n;
|
||||||
|
empty_r <= (& free_slots_n);
|
||||||
full_r <= ~free_valid;
|
full_r <= ~free_valid;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -81,6 +84,7 @@ module VX_index_buffer #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
assign write_addr = write_addr_r;
|
assign write_addr = write_addr_r;
|
||||||
|
assign empty = empty_r;
|
||||||
assign full = full_r;
|
assign full = full_r;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -67,8 +67,7 @@ module VX_skid_buffer #(
|
|||||||
end else begin
|
end else begin
|
||||||
if (ready_out) begin
|
if (ready_out) begin
|
||||||
use_buffer <= 0;
|
use_buffer <= 0;
|
||||||
end
|
end else if (push && valid_out_r) begin
|
||||||
if (push && !pop) begin
|
|
||||||
assert(!use_buffer);
|
assert(!use_buffer);
|
||||||
use_buffer <= 1;
|
use_buffer <= 1;
|
||||||
end
|
end
|
||||||
@@ -82,8 +81,10 @@ module VX_skid_buffer #(
|
|||||||
if (push) begin
|
if (push) begin
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
end
|
end
|
||||||
if (pop) begin
|
if (pop && !use_buffer) begin
|
||||||
data_out_r <= use_buffer ? buffer : data_in;
|
data_out_r <= data_in;
|
||||||
|
end else if (pop) begin
|
||||||
|
data_out_r <= buffer;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -139,4 +139,3 @@ clean-fpga-64c:
|
|||||||
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
|
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
|
||||||
|
|
||||||
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
|
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
|
||||||
rm sources.txt
|
|
||||||
@@ -6,7 +6,7 @@
|
|||||||
+define+QUARTUS
|
+define+QUARTUS
|
||||||
#+define+PERF_ENABLE
|
#+define+PERF_ENABLE
|
||||||
|
|
||||||
vortex_afu.json
|
vortex_afu16.json
|
||||||
QI:vortex_afu.qsf
|
QI:vortex_afu.qsf
|
||||||
|
|
||||||
C:sources.txt
|
C:sources.txt
|
||||||
@@ -2,6 +2,8 @@
|
|||||||
+define+NUM_CLUSTERS=4
|
+define+NUM_CLUSTERS=4
|
||||||
#+define+L3_ENABLE=1
|
#+define+L3_ENABLE=1
|
||||||
|
|
||||||
|
+define+GLOBAL_BLOCK_SIZE=16
|
||||||
|
|
||||||
+define+SYNTHESIS
|
+define+SYNTHESIS
|
||||||
+define+QUARTUS
|
+define+QUARTUS
|
||||||
#+define+PERF_ENABLE
|
#+define+PERF_ENABLE
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
+define+NUM_CLUSTERS=8
|
+define+NUM_CLUSTERS=8
|
||||||
#+define+L3_ENABLE=1
|
#+define+L3_ENABLE=1
|
||||||
|
|
||||||
|
+define+GLOBAL_BLOCK_SIZE=16
|
||||||
|
|
||||||
+define+SYNTHESIS
|
+define+SYNTHESIS
|
||||||
+define+QUARTUS
|
+define+QUARTUS
|
||||||
#+define+PERF_ENABLE
|
#+define+PERF_ENABLE
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
+define+QUARTUS
|
+define+QUARTUS
|
||||||
#+define+PERF_ENABLE
|
#+define+PERF_ENABLE
|
||||||
|
|
||||||
vortex_afu.json
|
vortex_afu8.json
|
||||||
QI:vortex_afu.qsf
|
QI:vortex_afu.qsf
|
||||||
|
|
||||||
C:sources.txt
|
C:sources.txt
|
||||||
56
hw/syn/opae/vortex_afu16.json
Normal file
56
hw/syn/opae/vortex_afu16.json
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"afu-image": {
|
||||||
|
"power": 0,
|
||||||
|
"clock-frequency-high": "auto-200",
|
||||||
|
"clock-frequency-low": "auto-200",
|
||||||
|
|
||||||
|
"cmd-mem-read": 1,
|
||||||
|
"cmd-mem-write": 2,
|
||||||
|
"cmd-run": 3,
|
||||||
|
"cmd-csr-read": 4,
|
||||||
|
"cmd-csr-write": 5,
|
||||||
|
|
||||||
|
"mmio-cmd-type": 10,
|
||||||
|
"mmio-io-addr": 12,
|
||||||
|
"mmio-mem-addr": 14,
|
||||||
|
"mmio-data-size": 16,
|
||||||
|
"mmio-status": 18,
|
||||||
|
"mmio-scope-read": 20,
|
||||||
|
"mmio-scope-write": 22,
|
||||||
|
"mmio-csr-core": 24,
|
||||||
|
"mmio-csr-addr": 26,
|
||||||
|
"mmio-csr-data": 28,
|
||||||
|
"mmio-csr-read": 30,
|
||||||
|
|
||||||
|
"afu-top-interface":
|
||||||
|
{
|
||||||
|
"class": "ccip_std_afu_avalon_mm",
|
||||||
|
"module-ports" :
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"class": "cci-p",
|
||||||
|
"params":
|
||||||
|
{
|
||||||
|
"clock": "uClk_usr"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"class": "local-memory",
|
||||||
|
"params":
|
||||||
|
{
|
||||||
|
"clock": "uClk_usr"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"accelerator-clusters":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "vortex_afu",
|
||||||
|
"total-contexts": 1,
|
||||||
|
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
57
hw/syn/opae/vortex_afu8.json
Normal file
57
hw/syn/opae/vortex_afu8.json
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"afu-image": {
|
||||||
|
"power": 0,
|
||||||
|
"clock-frequency-high": "auto-210",
|
||||||
|
"clock-frequency-low": "auto-210",
|
||||||
|
|
||||||
|
"cmd-mem-read": 1,
|
||||||
|
"cmd-mem-write": 2,
|
||||||
|
"cmd-run": 3,
|
||||||
|
"cmd-csr-read": 4,
|
||||||
|
"cmd-csr-write": 5,
|
||||||
|
|
||||||
|
"mmio-cmd-type": 10,
|
||||||
|
"mmio-io-addr": 12,
|
||||||
|
"mmio-mem-addr": 14,
|
||||||
|
"mmio-data-size": 16,
|
||||||
|
"mmio-status": 18,
|
||||||
|
"mmio-scope-read": 20,
|
||||||
|
"mmio-scope-write": 22,
|
||||||
|
"mmio-csr-core": 24,
|
||||||
|
"mmio-csr-addr": 26,
|
||||||
|
"mmio-csr-data": 28,
|
||||||
|
"mmio-csr-read": 30,
|
||||||
|
|
||||||
|
"afu-top-interface":
|
||||||
|
{
|
||||||
|
"class": "ccip_std_afu_avalon_mm",
|
||||||
|
"module-ports" :
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"class": "cci-p",
|
||||||
|
"params":
|
||||||
|
{
|
||||||
|
"clock": "uClk_usr"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"class": "local-memory",
|
||||||
|
"params":
|
||||||
|
{
|
||||||
|
"clock": "uClk_usr"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"accelerator-clusters":
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "vortex_afu",
|
||||||
|
"total-contexts": 1,
|
||||||
|
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
37
hw/syn/quartus/Makefile
Normal file
37
hw/syn/quartus/Makefile
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
|
||||||
|
|
||||||
|
unittest:
|
||||||
|
$(MAKE) -C unittest clean && $(MAKE) -C unittest > unittest/build.log 2>&1 &
|
||||||
|
|
||||||
|
pipeline:
|
||||||
|
$(MAKE) -C pipeline clean && $(MAKE) -C pipeline > pipeline/build.log 2>&1 &
|
||||||
|
|
||||||
|
cache:
|
||||||
|
$(MAKE) -C cache clean && $(MAKE) -C cache > cache/build.log 2>&1 &
|
||||||
|
|
||||||
|
core:
|
||||||
|
$(MAKE) -C core clean && $(MAKE) -C core > core/build.log 2>&1 &
|
||||||
|
|
||||||
|
vortex:
|
||||||
|
$(MAKE) -C vortex clean && $(MAKE) -C vortex > vortex/build.log 2>&1 &
|
||||||
|
|
||||||
|
top1:
|
||||||
|
$(MAKE) -C top1 clean && $(MAKE) -C top1 > top1/build.log 2>&1 &
|
||||||
|
|
||||||
|
top2:
|
||||||
|
$(MAKE) -C top2 clean && $(MAKE) -C top2 > top2/build.log 2>&1 &
|
||||||
|
|
||||||
|
top4:
|
||||||
|
$(MAKE) -C top4 clean && $(MAKE) -C top4 > top4/build.log 2>&1 &
|
||||||
|
|
||||||
|
top8:
|
||||||
|
$(MAKE) -C top8 clean && $(MAKE) -C top8 > top8/build.log 2>&1 &
|
||||||
|
|
||||||
|
top16:
|
||||||
|
$(MAKE) -C top16 clean && $(MAKE) -C top16 > top16/build.log 2>&1 &
|
||||||
|
|
||||||
|
top32:
|
||||||
|
$(MAKE) -C top32 clean && $(MAKE) -C top32 > top32/build.log 2>&1 &
|
||||||
|
|
||||||
|
top64:
|
||||||
|
$(MAKE) -C top64 clean && $(MAKE) -C top64 > top64/build.log 2>&1 &
|
||||||
@@ -41,10 +41,6 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||||||
set_global_assignment -name MESSAGE_DISABLE 16818
|
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||||
|
|
||||||
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
|
|
||||||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
|
||||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
|
||||||
|
|
||||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||||
|
|||||||
@@ -1,13 +1,20 @@
|
|||||||
PROJECT = Vortex
|
|
||||||
TOP_LEVEL_ENTITY = Vortex
|
|
||||||
SRC_FILE = Vortex.v
|
|
||||||
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
|
|
||||||
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
|
|
||||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
|
||||||
|
|
||||||
# Part, Family
|
|
||||||
FAMILY = "Arria 10"
|
FAMILY = "Arria 10"
|
||||||
DEVICE = 10AX115N3F40E2SG
|
DEVICE = 10AX115N3F40E2SG
|
||||||
|
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
|
||||||
|
|
||||||
|
#FAMILY = "Stratix 10"
|
||||||
|
#DEVICE = 1SX280HN2F43E2VG
|
||||||
|
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
|
||||||
|
|
||||||
|
PROJECT = Vortex
|
||||||
|
TOP_LEVEL_ENTITY = Vortex
|
||||||
|
SRC_FILE = Vortex.sv
|
||||||
|
|
||||||
|
RTL_DIR=../../../rtl
|
||||||
|
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
|
||||||
|
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE)
|
||||||
|
|
||||||
|
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||||
|
|
||||||
# Executable Configuration
|
# Executable Configuration
|
||||||
SYN_ARGS = --parallel --read_settings_files=on
|
SYN_ARGS = --parallel --read_settings_files=on
|
||||||
|
|||||||
Reference in New Issue
Block a user