This commit is contained in:
Malik Aki Burton
2021-04-05 14:05:22 -04:00
39 changed files with 667 additions and 362 deletions

View File

@@ -15,6 +15,9 @@ set -e
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# disable shared memory
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
# Blackbox tests # Blackbox tests
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1" ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1" ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"

View File

@@ -5,16 +5,19 @@ Description: Makes the build in the opae directory with the specified core
exists, a make clean command is ran before the build. Script waits exists, a make clean command is ran before the build. Script waits
until the inteldev script or quartus program is finished running. until the inteldev script or quartus program is finished running.
Usage: ./build.sh -c [1|2|4|8|16] [-p [y|n]] Usage: ./build.sh -c [1|2|4|8|16] [-p perf] [-w wait]
Options: Options:
-c -c
Core count (1, 2, 4, 8, or 16). Core count (1, 2, 4, 8, or 16).
-p -p
Performance profiling enable (y or n). Changes the source file in the Performance profiling enable. Changes the source file in the
opae directory to include/exclude "+define+PERF_ENABLE". opae directory to include/exclude "+define+PERF_ENABLE".
-w
Wait for the build to complete
_______________________________________________________________________________ _______________________________________________________________________________

View File

@@ -1,10 +1,23 @@
#!/bin/bash #!/bin/bash
while getopts c:p: flag BUILD_DIR=../../hw/syn/opae
perf=0
wait=0
while getopts c:pwh flag
do do
case "${flag}" in case "${flag}" in
c) cores=${OPTARG};; #1, 2, 4, 8, 16 c) cores=${OPTARG};; #1, 2, 4, 8, 16
p) perf=${OPTARG};; #perf counters enable (y/n) p) perf=1;; #perf counters enable
w) wait=1;; # wait for build to complete
h) echo "Usage: -c <cores> [-p perf] [-w wait] [-h help]"
exit 0
;;
\?)
echo "Invalid option: -$OPTARG" 1>&2
exit 1
;;
esac esac
done done
@@ -13,25 +26,22 @@ if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
exit 1 exit 1
fi fi
cd ../../hw/syn/opae cd ${BUILD_DIR}
sources_file="./sources_${cores}c.txt" sources_file="./sources_${cores}c.txt"
if [ ${perf:0:1} = "n" ]; then if [ ${perf} = 1 ]; then
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
fi
elif [ ${perf:0:1} = "y" ]; then
if grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then if grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file} sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
elif ! grep -Fxq '+define+PERF_ENABLE' ${sources_file}; then elif ! grep -Fxq '+define+PERF_ENABLE' ${sources_file}; then
sed -i '1s/^/+define+PERF_ENABLE\n/' ${sources_file} sed -i '1s/^/+define+PERF_ENABLE\n/' ${sources_file}
fi fi
else else
echo 'Invalid parameter for argument -p (y/n expected)' if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
exit 1 sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
fi
fi fi
if [ -d "./build_fpga_{$cores}c" ]; then if [ -d "./build_fpga_{$cores}c" ]; then
@@ -39,12 +49,12 @@ if [ -d "./build_fpga_{$cores}c" ]; then
fi fi
make "fpga-${cores}c" make "fpga-${cores}c"
if [ ${wait} = 1 ]; then
sleep 30 sleep 30
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus")) pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
for pid in ${pids[@]}; do for pid in ${pids[@]}; do
while kill -0 ${pid} 2> /dev/null; do while kill -0 ${pid} 2> /dev/null; do
sleep 30 sleep 30
done done
done done
fi

View File

@@ -2,6 +2,6 @@
for ((i=1; i <= 16; i=i*2)); do for ((i=1; i <= 16; i=i*2)); do
echo "Building ${i} core build..." echo "Building ${i} core build..."
./build.sh -c ${i} -p y ./build.sh -c ${i} -p -w
echo "Done ${i} core build." echo "Done ${i} core build."
done done

View File

@@ -26,9 +26,9 @@ extern "C" {
void dpi_utof(int a, int frm, int* result, int* fflags); void dpi_utof(int a, int frm, int* result, int* fflags);
void dpi_fclss(int a, int* result); void dpi_fclss(int a, int* result);
void dpi_fsgnj(int a, int* result); void dpi_fsgnj(int a, int b, int* result);
void dpi_fsgnjn(int a, int* result); void dpi_fsgnjn(int a, int b, int* result);
void dpi_fsgnjx(int a, int* result); void dpi_fsgnjx(int a, int b, int* result);
void dpi_flt(int a, int b, int* result, int* fflags); void dpi_flt(int a, int b, int* result, int* fflags);
void dpi_fle(int a, int b, int* result, int* fflags); void dpi_fle(int a, int b, int* result, int* fflags);
@@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
} }
void dpi_fclss(int a, int* result) { void dpi_fclss(int a, int* result) {
// TODO
*result = 0; int r = 0; // clear all bits
bool fsign = (a >> 31);
uint32_t expo = (a >> 23) & 0xFF;
uint32_t fraction = a & 0x7FFFFF;
if ((expo == 0) && (fraction == 0)) {
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
} else if ((expo == 0) && (fraction != 0)) {
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
} else if ((expo == 0xFF) && (fraction == 0)) {
r = fsign ? (1<<0) : (1<<7); // +/- infinity
} else if ((expo == 0xFF ) && (fraction != 0)) {
if (!fsign && (fraction == 0x00400000)) {
r = (1 << 9); // quiet NaN
} else {
r = (1 << 8); // signaling NaN
}
} else {
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
} }
void dpi_fsgnj(int a, int* result) { *result = r;
// TODO
*result = 0;
} }
void dpi_fsgnjn(int a, int* result) { void dpi_fsgnj(int a, int b, int* result) {
// TODO
*result = 0; int sign = b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
} }
void dpi_fsgnjx(int a, int* result) { void dpi_fsgnjn(int a, int b, int* result) {
// TODO
*result = 0; int sign = ~b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
}
void dpi_fsgnjx(int a, int b, int* result) {
int sign1 = a & 0x80000000;
int sign2 = b & 0x80000000;
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
*result = r;
} }

View File

@@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags); import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fclss(input int a, output int result); import "DPI-C" context function void dpi_fclss(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, output int result); import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result); import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result); import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags); import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags); import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);

View File

@@ -120,7 +120,7 @@ module VX_cluster #(
.DATA_WIDTH (32), .DATA_WIDTH (32),
.ADDR_WIDTH (12), .ADDR_WIDTH (12),
.BUFFERED_REQ (1), .BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CORES >= 4) .BUFFERED_RSP (1)
) csr_arb ( ) csr_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -225,7 +225,7 @@ module VX_cluster #(
.DATA_WIDTH (`L2DRAM_LINE_WIDTH), .DATA_WIDTH (`L2DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH), .TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH), .TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
.BUFFERED_REQ (`NUM_CORES >= 4), .BUFFERED_REQ (1),
.BUFFERED_RSP (1) .BUFFERED_RSP (1)
) dram_arb ( ) dram_arb (
.clk (clk), .clk (clk),

View File

@@ -21,7 +21,7 @@ module VX_databus_arb (
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN); localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE); localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
localparam REQ_ADDRW = 32 - REQ_ASHIFT; localparam REQ_ADDRW = 32 - REQ_ASHIFT;
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
// //
@@ -30,41 +30,42 @@ module VX_databus_arb (
for (genvar i = 0; i < `NUM_THREADS; ++i) begin for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire cache_req_ready_in; wire cache_req_valid_out, cache_req_ready_out;
wire smem_req_ready_in; wire is_smem_addr_in, is_smem_addr_out;
// select shared memory bus // select shared memory bus
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT)) && (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT)); && (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (REQ_DATAW) .DATAW (REQ_DATAW)
) cache_out_buffer ( ) out_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (core_req_if.valid[i] && !is_smem_addr), .valid_in (core_req_if.valid[i]),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}), .data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (cache_req_ready_in), .ready_in (core_req_if.ready[i]),
.valid_out (cache_req_if.valid[i]), .valid_out (cache_req_valid_out),
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}), .data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_if.ready[i]) .ready_out (cache_req_ready_out)
); );
VX_skid_buffer #( if (`SM_ENABLE ) begin
.DATAW (REQ_DATAW) assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
) smem_out_buffer ( assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
.clk (clk), assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
.reset (reset),
.valid_in (core_req_if.valid[i] && is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_if.valid[i]),
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
.ready_out (smem_req_if.ready[i])
);
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in; assign smem_req_if.addr[i] = cache_req_if.addr[i];
assign smem_req_if.rw[i] = cache_req_if.rw[i];
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
assign smem_req_if.data[i] = cache_req_if.data[i];
assign smem_req_if.tag[i] = cache_req_if.tag[i];
end else begin
`UNUSED_VAR (is_smem_addr_out)
assign cache_req_if.valid[i] = cache_req_valid_out;
assign cache_req_ready_out = cache_req_if.ready[i];
end
end end
// //

View File

@@ -1,6 +1,12 @@
`include "VX_define.vh" `include "VX_define.vh"
`include "VX_print_instr.vh" `include "VX_print_instr.vh"
`ifdef EXT_F_ENABLE
`define USED_REGS(f,r) used_regs[{f,r}] = 1
`else
`define USED_REGS(f,r) used_regs[r] = 1
`endif
module VX_decode #( module VX_decode #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
@@ -22,10 +28,12 @@ module VX_decode #(
reg [`EX_BITS-1:0] ex_type; reg [`EX_BITS-1:0] ex_type;
reg [`OP_BITS-1:0] op_type; reg [`OP_BITS-1:0] op_type;
reg [`MOD_BITS-1:0] op_mod; reg [`MOD_BITS-1:0] op_mod;
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm; reg [31:0] imm;
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm; reg use_rd, use_PC, use_imm;
reg rd_fp, rs1_fp, rs2_fp; reg rd_fp, rs1_fp, rs2_fp;
reg is_join, is_wstall; reg is_join, is_wstall;
reg [`NUM_REGS-1:0] used_regs;
wire [31:0] instr = ifetch_rsp_if.instr; wire [31:0] instr = ifetch_rsp_if.instr;
wire [6:0] opcode = instr[6:0]; wire [6:0] opcode = instr[6:0];
@@ -45,14 +53,11 @@ module VX_decode #(
always @(*) begin always @(*) begin
ex_type = `EX_NOP; ex_type = 0;
op_type = 'x; op_type = 'x;
op_mod = 'x; op_mod = 'x;
imm = 'x; imm = 'x;
use_rd = 0; use_rd = 0;
use_rs1 = 0;
use_rs2 = 0;
use_rs3 = 0;
use_PC = 0; use_PC = 0;
use_imm = 0; use_imm = 0;
rd_fp = 0; rd_fp = 0;
@@ -60,6 +65,11 @@ module VX_decode #(
rs2_fp = 0; rs2_fp = 0;
is_join = 0; is_join = 0;
is_wstall = 0; is_wstall = 0;
used_regs = 0;
rd_r = rd;
rs1_r = rs1;
rs2_r = rs2;
rs3_r = rs3;
case (opcode) case (opcode)
`INST_I: begin `INST_I: begin
@@ -78,8 +88,9 @@ module VX_decode #(
op_mod = 0; op_mod = 0;
imm = {{20{alu_imm[11]}}, alu_imm}; imm = {{20{alu_imm[11]}}, alu_imm};
use_rd = 1; use_rd = 1;
use_rs1 = 1;
use_imm = 1; use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
end end
`INST_R: begin `INST_R: begin
ex_type = `EX_ALU; ex_type = `EX_ALU;
@@ -114,17 +125,20 @@ module VX_decode #(
op_mod = 0; op_mod = 0;
end end
use_rd = 1; use_rd = 1;
use_rs1 = 1; `USED_REGS (1'b0, rd);
use_rs2 = 1; `USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end end
`INST_LUI: begin `INST_LUI: begin
ex_type = `EX_ALU; ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_LUI); op_type = `OP_BITS'(`ALU_LUI);
op_mod = 0; op_mod = 0;
rs1_r = 0;
imm = {upper_imm, 12'(0)}; imm = {upper_imm, 12'(0)};
use_rd = 1; use_rd = 1;
use_rs1 = 1;
use_imm = 1; use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, 5'b0);
end end
`INST_AUIPC: begin `INST_AUIPC: begin
ex_type = `EX_ALU; ex_type = `EX_ALU;
@@ -134,6 +148,7 @@ module VX_decode #(
use_rd = 1; use_rd = 1;
use_PC = 1; use_PC = 1;
use_imm = 1; use_imm = 1;
`USED_REGS (1'b0, rd);
end end
`INST_JAL: begin `INST_JAL: begin
ex_type = `EX_ALU; ex_type = `EX_ALU;
@@ -144,6 +159,7 @@ module VX_decode #(
use_PC = 1; use_PC = 1;
use_imm = 1; use_imm = 1;
is_wstall = 1; is_wstall = 1;
`USED_REGS (1'b0, rd);
end end
`INST_JALR: begin `INST_JALR: begin
ex_type = `EX_ALU; ex_type = `EX_ALU;
@@ -151,9 +167,10 @@ module VX_decode #(
op_mod = 1; op_mod = 1;
imm = {{20{jalr_imm[11]}}, jalr_imm}; imm = {{20{jalr_imm[11]}}, jalr_imm};
use_rd = 1; use_rd = 1;
use_rs1 = 1;
use_imm = 1; use_imm = 1;
is_wstall = 1; is_wstall = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
end end
`INST_B: begin `INST_B: begin
ex_type = `EX_ALU; ex_type = `EX_ALU;
@@ -168,11 +185,11 @@ module VX_decode #(
endcase endcase
op_mod = 1; op_mod = 1;
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0}; imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
use_rs1 = 1;
use_rs2 = 1;
use_PC = 1; use_PC = 1;
use_imm = 1; use_imm = 1;
is_wstall = 1; is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end end
`INST_SYS : begin `INST_SYS : begin
if (func3 == 0) begin if (func3 == 0) begin
@@ -190,6 +207,7 @@ module VX_decode #(
use_rd = 1; use_rd = 1;
use_PC = 1; use_PC = 1;
use_imm = 1; use_imm = 1;
`USED_REGS (1'b0, rd);
end else begin end else begin
ex_type = `EX_CSR; ex_type = `EX_CSR;
case (func3[1:0]) case (func3[1:0])
@@ -201,8 +219,10 @@ module VX_decode #(
endcase endcase
imm = 32'(u_12); imm = 32'(u_12);
use_rd = 1; use_rd = 1;
use_rs1 = !func3[2];
use_imm = func3[2]; use_imm = func3[2];
`USED_REGS (1'b0, rd);
if (!func3[2])
`USED_REGS (1'b0, rs1);
end end
end end
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
@@ -213,7 +233,8 @@ module VX_decode #(
op_type = `OP_BITS'({1'b0, func3}); op_type = `OP_BITS'({1'b0, func3});
imm = {{20{u_12[11]}}, u_12}; imm = {{20{u_12[11]}}, u_12};
use_rd = 1; use_rd = 1;
use_rs1 = 1; `USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FL), rd);
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
rd_fp = (opcode == `INST_FL); rd_fp = (opcode == `INST_FL);
`endif `endif
@@ -225,8 +246,8 @@ module VX_decode #(
ex_type = `EX_LSU; ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3}); op_type = `OP_BITS'({1'b1, func3});
imm = {{20{func7[6]}}, func7, rd}; imm = {{20{func7[6]}}, func7, rd};
use_rs1 = 1; `USED_REGS (1'b0, rs1);
use_rs2 = 1; `USED_REGS ((opcode == `INST_FS), rs2);
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
rs2_fp = (opcode == `INST_FS); rs2_fp = (opcode == `INST_FS);
`endif `endif
@@ -240,12 +261,13 @@ module VX_decode #(
op_type = `OP_BITS'(opcode[3:0]); op_type = `OP_BITS'(opcode[3:0]);
op_mod = func3; op_mod = func3;
use_rd = 1; use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rs3 = 1;
rd_fp = 1; rd_fp = 1;
rs1_fp = 1; rs1_fp = 1;
rs2_fp = 1; rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REGS (1'b1, rs3);
end end
`INST_FCI: begin `INST_FCI: begin
ex_type = `EX_FPU; ex_type = `EX_FPU;
@@ -258,55 +280,61 @@ module VX_decode #(
7'h0C: // FDIV 7'h0C: // FDIV
begin begin
op_type = `OP_BITS'(func7[3:0]); op_type = `OP_BITS'(func7[3:0]);
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1; rd_fp = 1;
rs1_fp = 1; rs1_fp = 1;
rs2_fp = 1; rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end end
7'h2C: begin 7'h2C: begin
op_type = `OP_BITS'(`FPU_SQRT); op_type = `OP_BITS'(`FPU_SQRT);
use_rs1 = 1;
rd_fp = 1; rd_fp = 1;
rs1_fp = 1; rs1_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
end end
7'h50: begin 7'h50: begin
op_type = `OP_BITS'(`FPU_CMP); op_type = `OP_BITS'(`FPU_CMP);
use_rs1 = 1;
use_rs2 = 1;
rs1_fp = 1; rs1_fp = 1;
rs2_fp = 1; rs2_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end end
7'h60: begin 7'h60: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS); op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
use_rs1 = 1;
rs1_fp = 1; rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end end
7'h68: begin 7'h68: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW); op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
use_rs1 = 1;
rd_fp = 1; rd_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b0, rs1);
end end
7'h10: begin 7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2 // FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `OP_BITS'(`FPU_MISC); op_type = `OP_BITS'(`FPU_MISC);
op_mod = {1'b0, func3[1:0]}; op_mod = {1'b0, func3[1:0]};
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1; rd_fp = 1;
rs1_fp = 1; rs1_fp = 1;
rs2_fp = 1; rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end end
7'h14: begin 7'h14: begin
// FMIN=3, FMAX=4 // FMIN=3, FMAX=4
op_type = `OP_BITS'(`FPU_MISC); op_type = `OP_BITS'(`FPU_MISC);
op_mod = func3[0] ? 4 : 3; op_mod = func3[0] ? 4 : 3;
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1; rd_fp = 1;
rs1_fp = 1; rs1_fp = 1;
rs2_fp = 1; rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end end
7'h70: begin 7'h70: begin
if (func3[0]) begin if (func3[0]) begin
@@ -317,14 +345,16 @@ module VX_decode #(
op_type = `OP_BITS'(`FPU_MISC); op_type = `OP_BITS'(`FPU_MISC);
op_mod = 5; op_mod = 5;
end end
use_rs1 = 1;
rs1_fp = 1; rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end end
7'h78: begin 7'h78: begin
// FMV.W.X=6 // FMV.W.X=6
op_type = `OP_BITS'(`FPU_MISC); op_type = `OP_BITS'(`FPU_MISC);
op_mod = 6; op_mod = 6;
rd_fp = 1; rd_fp = 1;
`USED_REGS (1'b1, rd);
end end
default:; default:;
endcase endcase
@@ -335,18 +365,18 @@ module VX_decode #(
case (func3) case (func3)
3'h0: begin 3'h0: begin
op_type = `OP_BITS'(`GPU_TMC); op_type = `OP_BITS'(`GPU_TMC);
use_rs1 = 1;
is_wstall = 1; is_wstall = 1;
`USED_REGS (1'b0, rs1);
end end
3'h1: begin 3'h1: begin
op_type = `OP_BITS'(`GPU_WSPAWN); op_type = `OP_BITS'(`GPU_WSPAWN);
use_rs1 = 1; `USED_REGS (1'b0, rs1);
use_rs2 = 1; `USED_REGS (1'b0, rs2);
end end
3'h2: begin 3'h2: begin
op_type = `OP_BITS'(`GPU_SPLIT); op_type = `OP_BITS'(`GPU_SPLIT);
use_rs1 = 1;
is_wstall = 1; is_wstall = 1;
`USED_REGS (1'b0, rs1);
end end
3'h3: begin 3'h3: begin
op_type = `OP_BITS'(`GPU_JOIN); op_type = `OP_BITS'(`GPU_JOIN);
@@ -354,9 +384,9 @@ module VX_decode #(
end end
3'h4: begin 3'h4: begin
op_type = `OP_BITS'(`GPU_BAR); op_type = `OP_BITS'(`GPU_BAR);
use_rs1 = 1;
use_rs2 = 1;
is_wstall = 1; is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end end
default:; default:;
endcase endcase
@@ -366,10 +396,7 @@ module VX_decode #(
end end
// disable write to integer register r0 // disable write to integer register r0
wire use_rd_qual = use_rd && (rd_fp || (rd != 0)); wire wb = use_rd && (rd_fp || (rd_r != 0));
// EX_ALU needs rs1=0 for LUI operation
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
assign decode_if.valid = ifetch_rsp_if.valid; assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.wid = ifetch_rsp_if.wid; assign decode_if.wid = ifetch_rsp_if.wid;
@@ -378,31 +405,27 @@ module VX_decode #(
assign decode_if.ex_type = ex_type; assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type; assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod; assign decode_if.op_mod = op_mod;
assign decode_if.wb = use_rd_qual; assign decode_if.wb = wb;
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd}; assign decode_if.rd = {rd_fp, rd_r};
assign decode_if.rs1 = {rs1_fp, rs1_qual}; assign decode_if.rs1 = {rs1_fp, rs1_r};
assign decode_if.rs2 = {rs2_fp, rs2}; assign decode_if.rs2 = {rs2_fp, rs2_r};
assign decode_if.rs3 = {1'b1, rs3}; assign decode_if.rs3 = {1'b1, rs3_r};
`else `else
`UNUSED_VAR (rd_fp) `UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp) `UNUSED_VAR (rs1_fp)
`UNUSED_VAR (rs2_fp) `UNUSED_VAR (rs2_fp)
assign decode_if.rd = rd; assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_qual; assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2; assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3; assign decode_if.rs3 = rs3_r;
`endif `endif
assign decode_if.imm = imm; assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC; assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm; assign decode_if.use_imm = use_imm;
assign decode_if.used_regs = used_regs;
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////

View File

@@ -54,7 +54,8 @@ module VX_fpu_unit #(
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}), .write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}), .read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
.release_slot (fpuq_pop), .release_slot (fpuq_pop),
.full (fpuq_full) .full (fpuq_full),
`UNUSED_PIN (empty)
); );
// can accept new request? // can accept new request?

View File

@@ -82,8 +82,7 @@ module VX_ibuffer #(
if (writing && is_slot0) begin if (writing && is_slot0) begin
q_data_out[i] <= q_data_in; q_data_out[i] <= q_data_in;
end end else if (pop) begin
if (pop) begin
q_data_out[i] <= q_data_prev[i]; q_data_out[i] <= q_data_prev[i];
end end
end end

View File

@@ -38,7 +38,8 @@ module VX_instr_demux (
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU); wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)) .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) alu_buffer ( ) alu_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -55,7 +56,8 @@ module VX_instr_demux (
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU); wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) .DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) lsu_buffer ( ) lsu_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -72,7 +74,8 @@ module VX_instr_demux (
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR); wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32) .DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.BUFFERED (1)
) csr_buffer ( ) csr_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -90,7 +93,8 @@ module VX_instr_demux (
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU); wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) .DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
) fpu_buffer ( ) fpu_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -111,7 +115,8 @@ module VX_instr_demux (
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU); wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)) .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.BUFFERED (1)
) gpu_buffer ( ) gpu_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -183,19 +183,44 @@ module VX_issue #(
`ifdef DBG_PRINT_PIPELINE `ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin always @(posedge clk) begin
if (alu_req_if.valid && alu_req_if.ready) begin if (alu_req_if.valid && alu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data); $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
$write("\n");
end end
if (lsu_req_if.valid && lsu_req_if.ready) begin if (lsu_req_if.valid && lsu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data); $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
$write(", data=");
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
$write("\n");
end end
if (csr_req_if.valid && csr_req_if.ready) begin if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data); $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
$write("\n");
end end
if (fpu_req_if.valid && fpu_req_if.ready) begin if (fpu_req_if.valid && fpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
$write(", rs3_data=");
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
$write("\n");
end end
if (gpu_req_if.valid && gpu_req_if.ready) begin if (gpu_req_if.valid && gpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data); $write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
$write("\n");
end end
end end
`endif `endif

View File

@@ -75,10 +75,11 @@ module VX_lsu_unit #(
`UNUSED_VAR (rsp_type) `UNUSED_VAR (rsp_type)
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask; reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
reg [`NUM_THREADS-1:0] rsp_rem_mask_n; wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
wire [`NUM_THREADS-1:0] rsp_tmask;
reg [`NUM_THREADS-1:0] req_sent_mask; reg [`NUM_THREADS-1:0] req_sent_mask;
wire req_sent_all; wire sent_all_ready;
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr; wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full; wire mbuf_full;
@@ -88,18 +89,20 @@ module VX_lsu_unit #(
assign req_offset[i] = req_addr[i][1:0]; assign req_offset[i] = req_addr[i][1:0];
end end
wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready)) wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire mbuf_push = (| dcache_req_fire)
&& (0 == req_sent_mask) // first submission only && (0 == req_sent_mask) // first submission only
&& req_wb; // loads only && req_wb; // loads only
wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
wire mbuf_pop = mbuf_pop_part && (rsp_rem_mask_n == 0 || rsp_is_dup);
assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0]; assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0];
VX_index_buffer #( VX_index_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1), .DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
.SIZE (`LSUQ_SIZE) .SIZE (`LSUQ_SIZE)
) req_metadata ( ) req_metadata (
.clk (clk), .clk (clk),
@@ -107,24 +110,32 @@ module VX_lsu_unit #(
.write_addr (mbuf_waddr), .write_addr (mbuf_waddr),
.acquire_slot (mbuf_push), .acquire_slot (mbuf_push),
.read_addr (mbuf_raddr), .read_addr (mbuf_raddr),
.write_data ({req_wid, req_pc, req_rd, req_wb, req_type, req_offset, req_is_dup}), .write_data ({req_wid, req_pc, req_tmask, req_rd, req_wb, req_type, req_offset, req_is_dup}),
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}), .read_data ({rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
.release_addr (mbuf_raddr), .release_addr (mbuf_raddr),
.release_slot (mbuf_pop), .release_slot (mbuf_pop),
.full (mbuf_full) .full (mbuf_full),
`UNUSED_PIN (empty)
); );
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask)) always @(posedge clk) begin
|| (req_is_dup && dcache_req_if.ready[0]); if (mbuf_push) begin
pending_tags[mbuf_waddr] <= req_tag;
end
end
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
req_sent_mask <= 0; req_sent_mask <= 0;
end else begin end else begin
if (req_sent_all) if (sent_all_ready)
req_sent_mask <= 0; req_sent_mask <= 0;
else else
req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready); req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
end end
end end
@@ -136,20 +147,21 @@ module VX_lsu_unit #(
req_tag_hold <= mbuf_waddr; req_tag_hold <= mbuf_waddr;
end end
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid; assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
always @(posedge clk) begin always @(posedge clk) begin
if (mbuf_push) begin if (mbuf_push) begin
rsp_rem_mask[mbuf_waddr] <= req_tmask; rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
pending_tags[mbuf_waddr] <= req_tag;
end end
if (mbuf_pop_part) begin if (dcache_rsp_fire) begin
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n; rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
end end
end end
wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready); wire req_ready_dep = (req_wb && ~mbuf_full)
|| (~req_wb && st_commit_if.ready);
wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
// DCache Request // DCache Request
@@ -181,23 +193,23 @@ module VX_lsu_unit #(
end end
end end
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask; assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}}; assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
assign dcache_req_if.addr = mem_req_addr; assign dcache_req_if.addr = mem_req_addr;
assign dcache_req_if.byteen = mem_req_byteen; assign dcache_req_if.byteen = mem_req_byteen;
assign dcache_req_if.data = mem_req_data; assign dcache_req_if.data = mem_req_data;
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}}; assign dcache_req_if.tag = {`NUM_THREADS{req_pc, req_wid, req_tag}};
`else `else
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}}; assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif `endif
assign ready_in = req_ready_dep && req_sent_all; assign ready_in = req_ready_dep && sent_all_ready;
// send store commit // send store commit
wire is_store_rsp = req_valid && ~req_wb && req_sent_all; wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
assign st_commit_if.valid = is_store_rsp; assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid; assign st_commit_if.wid = req_wid;
@@ -211,7 +223,7 @@ module VX_lsu_unit #(
// load response formatting // load response formatting
reg [`NUM_THREADS-1:0][31:0] rsp_data; reg [`NUM_THREADS-1:0][31:0] rsp_data;
wire [`NUM_THREADS-1:0] rsp_tmask; wire [`NUM_THREADS-1:0] rsp_tmask_qual;
for (genvar i = 0; i < `NUM_THREADS; i++) begin for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
@@ -234,7 +246,7 @@ module VX_lsu_unit #(
end end
end end
assign rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid; assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.valid;
// send load commit // send load commit
@@ -247,7 +259,7 @@ module VX_lsu_unit #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (!load_rsp_stall), .enable (!load_rsp_stall),
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), .data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
); );
@@ -255,7 +267,7 @@ module VX_lsu_unit #(
assign dcache_rsp_if.ready = ~load_rsp_stall; assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration // scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready); `SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
`SCOPE_ASSIGN (dcache_req_wid, req_wid); `SCOPE_ASSIGN (dcache_req_wid, req_wid);
`SCOPE_ASSIGN (dcache_req_pc, req_pc); `SCOPE_ASSIGN (dcache_req_pc, req_pc);
`SCOPE_ASSIGN (dcache_req_addr, req_addr); `SCOPE_ASSIGN (dcache_req_addr, req_addr);
@@ -269,15 +281,15 @@ module VX_lsu_unit #(
`ifdef DBG_PRINT_CORE_DCACHE `ifdef DBG_PRINT_CORE_DCACHE
always @(posedge clk) begin always @(posedge clk) begin
if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin if ((| dcache_req_fire)) begin
if ((| dcache_req_if.rw)) if ((| dcache_req_if.rw))
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h", $display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
else else
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b", $display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b",
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup); $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
end end
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin if (dcache_rsp_fire) begin
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b", $display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup); $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup);
end end

View File

@@ -81,4 +81,25 @@
`define LTRIM(x,s) x[s-1:0] `define LTRIM(x,s) x[s-1:0]
`define PRINT_ARRAY1D(a, m) \
$write("{"); \
for (integer i = (m-1); i >= 0; --i) begin \
if (i != (m-1)) $write(", "); \
$write("0x%0h", a[i]); \
end \
$write("}"); \
`define PRINT_ARRAY2D(a, m, n) \
$write("{"); \
for (integer i = n-1; i >= 0; --i) begin \
if (i != (n-1)) $write(", "); \
$write("{"); \
for (integer j = (m-1); j >= 0; --j) begin \
if (j != (m-1)) $write(", "); \
$write("0x%0h", a[i][j]); \
end \
$write("}"); \
end \
$write("}")
`endif `endif

View File

@@ -121,7 +121,7 @@ module Vortex (
.NUM_REQS (`NUM_CLUSTERS), .NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32), .DATA_WIDTH (32),
.ADDR_WIDTH (12), .ADDR_WIDTH (12),
.BUFFERED_REQ (`NUM_CLUSTERS >= 4), .BUFFERED_REQ (1),
.BUFFERED_RSP (1) .BUFFERED_RSP (1)
) csr_arb ( ) csr_arb (
.clk (clk), .clk (clk),
@@ -228,7 +228,7 @@ module Vortex (
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH), .TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH), .TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
.BUFFERED_REQ (1), .BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CLUSTERS >= 4) .BUFFERED_RSP (1)
) dram_arb ( ) dram_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -1,14 +1,13 @@
`include "VX_define.vh" `include "VX_define.vh"
`ifndef NOPAE `ifndef NOPAE
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh" `include "afu_json_info.vh"
`else `else
`include "vortex_afu.vh" `include "vortex_afu.vh"
`endif
/* verilator lint_off IMPORTSTAR */ /* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*; import ccip_if_pkg::*;
import local_mem_cfg_pkg::*; import local_mem_cfg_pkg::*;
/* verilator lint_on IMPORTSTAR */ /* verilator lint_on IMPORTSTAR */
`endif
module vortex_afu #( module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2 parameter NUM_LOCAL_MEM_BANKS = 2

View File

@@ -487,7 +487,8 @@ module VX_bank #(
end end
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS) .DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.BUFFERED (NUM_BANKS == 1)
) core_rsp_req ( ) core_rsp_req (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -168,8 +168,7 @@ module VX_cache #(
.NUM_BANKS (NUM_BANKS) .NUM_BANKS (NUM_BANKS)
) flush_ctrl ( ) flush_ctrl (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset || flush),
.flush (flush),
.addr_out (flush_addr), .addr_out (flush_addr),
.valid_out (flush_enable) .valid_out (flush_enable)
); );

View File

@@ -98,7 +98,8 @@ module VX_cache_core_rsp_merge #(
wire core_rsp_valid_any = (| per_bank_core_rsp_valid); wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)) .DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.BUFFERED (1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -146,7 +147,8 @@ module VX_cache_core_rsp_merge #(
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #( VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH) .DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.BUFFERED (1)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -10,7 +10,6 @@ module VX_flush_ctrl #(
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire flush,
output wire [`LINE_SELECT_BITS-1:0] addr_out, output wire [`LINE_SELECT_BITS-1:0] addr_out,
output wire valid_out output wire valid_out
); );
@@ -18,7 +17,7 @@ module VX_flush_ctrl #(
reg [`LINE_SELECT_BITS-1:0] flush_ctr; reg [`LINE_SELECT_BITS-1:0] flush_ctr;
always @(posedge clk) begin always @(posedge clk) begin
if (reset || flush) begin if (reset) begin
flush_enable <= 1; flush_enable <= 1;
flush_ctr <= 0; flush_ctr <= 0;
end else begin end else begin

View File

@@ -3,10 +3,6 @@
/// Modified port of cast module from fpnew Libray /// Modified port of cast module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew /// reference: https://github.com/pulp-platform/fpnew
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_cvt #( module VX_fp_cvt #(
parameter TAGW = 1, parameter TAGW = 1,
parameter LANES = 1 parameter LANES = 1
@@ -74,7 +70,7 @@ module VX_fp_cvt #(
end end
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent; wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
wire [LANES-1:0] input_sign; wire [LANES-1:0] input_sign;
for (genvar i = 0; i < LANES; ++i) begin for (genvar i = 0; i < LANES; ++i) begin
@@ -82,10 +78,10 @@ module VX_fp_cvt #(
wire [INT_MAN_WIDTH-1:0] fmt_mantissa; wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
wire fmt_sign = dataa[i][31]; wire fmt_sign = dataa[i][31];
wire int_sign = dataa[i][31] & is_signed; wire int_sign = dataa[i][31] & is_signed;
assign int_mantissa = int_sign ? $unsigned(-dataa[i]) : dataa[i]; assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]}); assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
assign fmt_exponent[i] = $signed({1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]}); assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa; assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
assign input_sign[i] = is_itof ? int_sign : fmt_sign; assign input_sign[i] = is_itof ? int_sign : fmt_sign;
end end
@@ -115,7 +111,7 @@ module VX_fp_cvt #(
wire [2:0] rnd_mode_s0; wire [2:0] rnd_mode_s0;
fp_type_t [LANES-1:0] in_a_type_s0; fp_type_t [LANES-1:0] in_a_type_s0;
wire [LANES-1:0] input_sign_s0; wire [LANES-1:0] input_sign_s0;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0; wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0; wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
wire [LANES-1:0] mant_is_zero_s0; wire [LANES-1:0] mant_is_zero_s0;
@@ -136,37 +132,92 @@ module VX_fp_cvt #(
// Normalization // Normalization
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
for (genvar i = 0; i < LANES; ++i) begin for (genvar i = 0; i < LANES; ++i) begin
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// Input mantissa needs to be normalized // Input mantissa needs to be normalized
wire signed [INT_EXP_WIDTH-1:0] fp_input_exp; wire [INT_EXP_WIDTH-1:0] fp_input_exp;
wire signed [INT_EXP_WIDTH-1:0] int_input_exp; wire [INT_EXP_WIDTH-1:0] int_input_exp;
wire [LZC_RESULT_WIDTH:0] renorm_shamt_sgn;
// signed form for calculations
assign renorm_shamt_sgn = $signed({1'b0, renorm_shamt_s0[i]});
// Realign input mantissa, append zeroes if destination is wider // Realign input mantissa, append zeroes if destination is wider
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i]; assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
// Unbias exponent and compensate for shift // Unbias exponent and compensate for shift
assign fp_input_exp = $signed(fmt_exponent_s0[i] + assign fp_input_exp = fmt_exponent_s0[i] +
(($signed({1'b0, in_a_type_s0[i].is_subnormal}) + {1'b0, in_a_type_s0[i].is_subnormal} +
$signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) - (FMT_SHIFT_COMPENSATION - EXP_BIAS) -
renorm_shamt_sgn)); {1'b0, renorm_shamt_s0[i]};
assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn); assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp; assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
// Rebias the exponent // Rebias the exponent
assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS); assign destination_exp[i] = input_exp[i] + EXP_BIAS;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
end end
// Perform adjustments to mantissa and exponent
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
wire [LANES-1:0] of_before_round_s0;
for (genvar i = 0; i < LANES; ++i) begin
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg of_before_round;
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
denorm_shamt = 0; // right of mantissa
of_before_round = 1'b0;
// Handle INT casts
if (is_itof_s0) begin
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
// Overflow or infinities (for proper rounding)
final_exp = (2**EXP_BITS-2); // largest normal value
preshift_mant = ~0; // largest normal value and RS bits set
of_before_round = 1'b1;
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
// Limit the shift to retain sticky bits
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
end else if ($signed(destination_exp[i]) < $signed(1)) begin
// Denormalize underflowing values
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
end
end else begin
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
// overflow: when converting to unsigned the range is larger by one
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
of_before_round = 1'b1;
end else if ($signed(input_exp[i]) < $signed(-1)) begin
// underflow
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
end else begin
// By default right shift mantissa to be an integer
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
end
end
`IGNORE_WARNINGS_END
end
assign preshift_mant_s0[i] = preshift_mant;
assign denorm_shamt_s0[i] = denorm_shamt;
assign final_exp_s0[i] = final_exp;
assign of_before_round_s0[i] = of_before_round;
end
// Pipeline stage1 // Pipeline stage1
wire valid_in_s1; wire valid_in_s1;
@@ -177,108 +228,55 @@ module VX_fp_cvt #(
fp_type_t [LANES-1:0] in_a_type_s1; fp_type_t [LANES-1:0] in_a_type_s1;
wire [LANES-1:0] mant_is_zero_s1; wire [LANES-1:0] mant_is_zero_s1;
wire [LANES-1:0] input_sign_s1; wire [LANES-1:0] input_sign_s1;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1; wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp_s1; wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1; wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
wire [LANES-1:0] of_before_round_s1;
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)), .DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
.RESETW (1) .RESETW (1)
) pipe_reg1 ( ) pipe_reg1 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~stall), .enable (~stall),
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, input_mant, input_exp, destination_exp}), .data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1}) .data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
); );
// Casting
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
wire [LANES-1:0][MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [LANES-1:0][MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
reg [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
reg [LANES-1:0] of_before_round;
// Perform adjustments to mantissa and exponent
for (genvar i = 0; i < LANES; ++i) begin
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits
preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes
denorm_shamt[i] = 0; // right of mantissa
of_before_round[i] = 1'b0;
// Place mantissa to the left of the shifter
preshift_mant[i] = {input_mant_s1[i], 33'b0};
// Handle INT casts
if (is_itof_s1) begin
// Overflow or infinities (for proper rounding)
if ($signed(destination_exp_s1[i]) >= $signed(2**EXP_BITS-1)) begin
final_exp[i] = (2**EXP_BITS-2); // largest normal value
preshift_mant[i] = ~0; // largest normal value and RS bits set
of_before_round[i] = 1'b1;
// Denormalize underflowing values
end else if (($signed(destination_exp_s1[i]) < $signed(1))
&& ($signed(destination_exp_s1[i]) >= -$signed(MAN_BITS))) begin
final_exp[i] = 0; // denormal result
denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting
// Limit the shift to retain sticky bits
end else if ($signed(destination_exp_s1[i]) < -$signed(MAN_BITS)) begin
final_exp[i] = 0; // denormal result
denorm_shamt[i] = $unsigned(denorm_shamt[i] + (2 + MAN_BITS)); // to sticky
end
end else begin
// By default right shift mantissa to be an integer
denorm_shamt[i] = (MAX_INT_WIDTH-1) - input_exp_s1[i];
// overflow: when converting to unsigned the range is larger by one
if ($signed(input_exp_s1[i]) >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin
denorm_shamt[i] = SHAMT_BITS'(0); // prevent shifting
of_before_round[i] = 1'b1;
// underflow
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky
end
end
`IGNORE_WARNINGS_END
end
// Mantissa adjustment shift
assign destination_mant[i] = preshift_mant[i] >> denorm_shamt[i];
// Extract final mantissa and round bit, discard the normal bit (for FP)
assign {final_mant[i], fp_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int[i], int_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
// Collapse sticky bits
assign fp_round_sticky_bits[i][0] = (| destination_mant[i][NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[i][NUM_INT_STICKY-1:0]);
// select RS bits for destination operation
assign round_sticky_bits[i] = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
end
// Rouding and classification
wire [LANES-1:0] rounded_sign; wire [LANES-1:0] rounded_sign;
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits;
// Rouding and classification
for (genvar i = 0; i < LANES; ++i) begin for (genvar i = 0; i < LANES; ++i) begin
// Pack exponent and mantissa into proper rounding form wire [2*INT_MAN_WIDTH:0] destination_mant;
wire [31:0] fmt_pre_round_abs = {1'b0, final_exp[i][EXP_BITS-1:0], final_mant[i][MAN_BITS-1:0]}; wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
wire [1:0] round_sticky_bits;
wire [31:0] fmt_pre_round_abs;
wire [31:0] pre_round_abs;
// Sign-extend integer result // Mantissa adjustment shift
wire [31:0] ifmt_pre_round_abs = final_int[i]; assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
// Extract final mantissa and round bit, discard the normal bit (for FP)
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
// Collapse sticky bits
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
// select RS bits for destination operation
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
// Pack exponent and mantissa into proper rounding form
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
// Select output with destination format and operation // Select output with destination format and operation
wire [31:0] pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : ifmt_pre_round_abs; assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
// Perform the rounding // Perform the rounding
VX_fp_rounding #( VX_fp_rounding #(
@@ -286,7 +284,7 @@ module VX_fp_cvt #(
) fp_rounding ( ) fp_rounding (
.abs_value_i (pre_round_abs), .abs_value_i (pre_round_abs),
.sign_i (input_sign_s1[i]), .sign_i (input_sign_s1[i]),
.round_sticky_bits_i (round_sticky_bits[i]), .round_sticky_bits_i(round_sticky_bits),
.rnd_mode_i (rnd_mode_s1), .rnd_mode_i (rnd_mode_s1),
.effective_subtraction_i(1'b0), .effective_subtraction_i(1'b0),
.abs_rounded_o (rounded_abs[i]), .abs_rounded_o (rounded_abs[i]),
@@ -306,23 +304,22 @@ module VX_fp_cvt #(
wire [LANES-1:0] input_sign_s2; wire [LANES-1:0] input_sign_s2;
wire [LANES-1:0] rounded_sign_s2; wire [LANES-1:0] rounded_sign_s2;
wire [LANES-1:0][31:0] rounded_abs_s2; wire [LANES-1:0][31:0] rounded_abs_s2;
wire [LANES-1:0] of_before_round_s2;
VX_pipe_register #( VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)), .DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
.RESETW (1) .RESETW (1)
) pipe_reg2 ( ) pipe_reg2 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (~stall), .enable (~stall),
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign}), .data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2}) .data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
); );
wire [LANES-1:0] of_after_round; wire [LANES-1:0] of_after_round;
wire [LANES-1:0] uf_after_round; wire [LANES-1:0] uf_after_round;
wire [LANES-1:0][31:0] fmt_result; wire [LANES-1:0][31:0] fmt_result;
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
wire [LANES-1:0] rounded_int_res_zero; // after rounding wire [LANES-1:0] rounded_int_res_zero; // after rounding
@@ -335,7 +332,7 @@ module VX_fp_cvt #(
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp. assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
// Negative integer result needs to be brought into two's complement // Negative integer result needs to be brought into two's complement
assign rounded_int_res[i] = rounded_sign_s2[i] ? $unsigned(-rounded_abs_s2[i]) : rounded_abs_s2[i]; assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0); assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
end end
@@ -381,7 +378,7 @@ module VX_fp_cvt #(
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
assign int_result_is_special[i] = in_a_type_s2[i].is_nan assign int_result_is_special[i] = in_a_type_s2[i].is_nan
| in_a_type_s2[i].is_inf | in_a_type_s2[i].is_inf
| of_before_round[i] | of_before_round_s2[i]
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]); | (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
// All integer special cases are invalid // All integer special cases are invalid
@@ -399,11 +396,11 @@ module VX_fp_cvt #(
wire [31:0] fp_result, int_result; wire [31:0] fp_result, int_result;
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f; wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); : (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
assign fp_regular_status.NV = is_itof_s2 & (of_before_round[i] | of_after_round[i]); // overflow is invalid for I2F casts assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.DZ = 1'b0; // no divisions assign fp_regular_status.DZ = 1'b0; // no divisions
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); // inf casts no OF assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.UF = uf_after_round[i] & inexact; assign fp_regular_status.UF = uf_after_round[i] & inexact;
assign fp_regular_status.NX = inexact; assign fp_regular_status.NX = inexact;

View File

@@ -1,5 +1,9 @@
`include "VX_define.vh" `include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_div #( module VX_fp_div #(
parameter TAGW = 1, parameter TAGW = 1,
parameter LANES = 1 parameter LANES = 1

View File

@@ -1,5 +1,9 @@
`include "VX_define.vh" `include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_fma #( module VX_fp_fma #(
parameter TAGW = 1, parameter TAGW = 1,
parameter LANES = 1 parameter LANES = 1

View File

@@ -1,5 +1,9 @@
`include "VX_define.vh" `include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_sqrt #( module VX_fp_sqrt #(
parameter TAGW = 1, parameter TAGW = 1,
parameter LANES = 1 parameter LANES = 1

View File

@@ -10,7 +10,7 @@ module VX_fp_type (
); );
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff); wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0); wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
wire is_subnormal = (exp_i == 8'd0) && !is_zero; wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0); wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0); wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
wire is_signaling = is_nan && (man_i[22] == 1'b0); wire is_signaling = is_nan && (man_i[22] == 1'b0);

View File

@@ -330,9 +330,9 @@ module VX_fpu_dpi #(
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]); dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]); dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]); dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (dataa[i], result_fsgnj[i]); dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], result_fsgnjn[i]); dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], result_fsgnjx[i]); dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
result_fmv[i] = dataa[i]; result_fmv[i] = dataa[i];
end end
end end

View File

@@ -18,11 +18,12 @@ module VX_index_buffer #(
input wire [ADDRW-1:0] release_addr, input wire [ADDRW-1:0] release_addr,
input wire release_slot, input wire release_slot,
output wire empty,
output wire full output wire full
); );
reg [SIZE-1:0] free_slots, free_slots_n; reg [SIZE-1:0] free_slots, free_slots_n;
reg [ADDRW-1:0] write_addr_r; reg [ADDRW-1:0] write_addr_r;
reg full_r; reg empty_r, full_r;
wire free_valid; wire free_valid;
wire [ADDRW-1:0] free_index; wire [ADDRW-1:0] free_index;
@@ -51,6 +52,7 @@ module VX_index_buffer #(
if (reset) begin if (reset) begin
write_addr_r <= ADDRW'(1'b0); write_addr_r <= ADDRW'(1'b0);
free_slots <= {SIZE{1'b1}}; free_slots <= {SIZE{1'b1}};
empty_r <= 1'b1;
full_r <= 1'b0; full_r <= 1'b0;
end else begin end else begin
if (release_slot) begin if (release_slot) begin
@@ -60,6 +62,7 @@ module VX_index_buffer #(
write_addr_r <= free_index; write_addr_r <= free_index;
end end
free_slots <= free_slots_n; free_slots <= free_slots_n;
empty_r <= (& free_slots_n);
full_r <= ~free_valid; full_r <= ~free_valid;
end end
end end
@@ -81,6 +84,7 @@ module VX_index_buffer #(
); );
assign write_addr = write_addr_r; assign write_addr = write_addr_r;
assign empty = empty_r;
assign full = full_r; assign full = full_r;
endmodule endmodule

View File

@@ -67,8 +67,7 @@ module VX_skid_buffer #(
end else begin end else begin
if (ready_out) begin if (ready_out) begin
use_buffer <= 0; use_buffer <= 0;
end end else if (push && valid_out_r) begin
if (push && !pop) begin
assert(!use_buffer); assert(!use_buffer);
use_buffer <= 1; use_buffer <= 1;
end end
@@ -82,8 +81,10 @@ module VX_skid_buffer #(
if (push) begin if (push) begin
buffer <= data_in; buffer <= data_in;
end end
if (pop) begin if (pop && !use_buffer) begin
data_out_r <= use_buffer ? buffer : data_in; data_out_r <= data_in;
end else if (pop) begin
data_out_r <= buffer;
end end
end end

View File

@@ -139,4 +139,3 @@ clean-fpga-64c:
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
rm sources.txt

View File

@@ -6,7 +6,7 @@
+define+QUARTUS +define+QUARTUS
#+define+PERF_ENABLE #+define+PERF_ENABLE
vortex_afu.json vortex_afu16.json
QI:vortex_afu.qsf QI:vortex_afu.qsf
C:sources.txt C:sources.txt

View File

@@ -2,6 +2,8 @@
+define+NUM_CLUSTERS=4 +define+NUM_CLUSTERS=4
#+define+L3_ENABLE=1 #+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS +define+SYNTHESIS
+define+QUARTUS +define+QUARTUS
#+define+PERF_ENABLE #+define+PERF_ENABLE

View File

@@ -2,6 +2,8 @@
+define+NUM_CLUSTERS=8 +define+NUM_CLUSTERS=8
#+define+L3_ENABLE=1 #+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS +define+SYNTHESIS
+define+QUARTUS +define+QUARTUS
#+define+PERF_ENABLE #+define+PERF_ENABLE

View File

@@ -6,7 +6,7 @@
+define+QUARTUS +define+QUARTUS
#+define+PERF_ENABLE #+define+PERF_ENABLE
vortex_afu.json vortex_afu8.json
QI:vortex_afu.qsf QI:vortex_afu.qsf
C:sources.txt C:sources.txt

View File

@@ -0,0 +1,56 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-200",
"clock-frequency-low": "auto-200",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

View File

@@ -0,0 +1,57 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-210",
"clock-frequency-low": "auto-210",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

37
hw/syn/quartus/Makefile Normal file
View File

@@ -0,0 +1,37 @@
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
unittest:
$(MAKE) -C unittest clean && $(MAKE) -C unittest > unittest/build.log 2>&1 &
pipeline:
$(MAKE) -C pipeline clean && $(MAKE) -C pipeline > pipeline/build.log 2>&1 &
cache:
$(MAKE) -C cache clean && $(MAKE) -C cache > cache/build.log 2>&1 &
core:
$(MAKE) -C core clean && $(MAKE) -C core > core/build.log 2>&1 &
vortex:
$(MAKE) -C vortex clean && $(MAKE) -C vortex > vortex/build.log 2>&1 &
top1:
$(MAKE) -C top1 clean && $(MAKE) -C top1 > top1/build.log 2>&1 &
top2:
$(MAKE) -C top2 clean && $(MAKE) -C top2 > top2/build.log 2>&1 &
top4:
$(MAKE) -C top4 clean && $(MAKE) -C top4 > top4/build.log 2>&1 &
top8:
$(MAKE) -C top8 clean && $(MAKE) -C top8 > top8/build.log 2>&1 &
top16:
$(MAKE) -C top16 clean && $(MAKE) -C top16 > top16/build.log 2>&1 &
top32:
$(MAKE) -C top32 clean && $(MAKE) -C top32 > top32/build.log 2>&1 &
top64:
$(MAKE) -C top64 clean && $(MAKE) -C top64 > top64/build.log 2>&1 &

View File

@@ -41,10 +41,6 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED #set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" #set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS #set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS

View File

@@ -1,13 +1,20 @@
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.v
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10" FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.sv
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration # Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on SYN_ARGS = --parallel --read_settings_files=on