minor updates

This commit is contained in:
Blaise Tine
2020-07-31 13:39:52 -07:00
parent c9755a0c48
commit 836a735555
19 changed files with 710 additions and 189 deletions

View File

@@ -1,6 +1,6 @@
#!/bin/bash
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src'
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores ../rtl/fp_cores/altera ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src'
inc_list=""
for dir in $dir_list; do

View File

@@ -42,7 +42,7 @@ module VX_commit #(
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
assign cmt_to_csr_if.num_commits = num_commits;
assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags;
assign cmt_to_csr_if.has_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.has_fflags;
integer i;

View File

@@ -37,7 +37,7 @@ module VX_csr_data #(
reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm
always @(posedge clk) begin
if (cmt_to_csr_if.upd_fflags) begin
if (cmt_to_csr_if.has_fflags) begin
csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags;
csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
end

View File

@@ -41,9 +41,9 @@
`define LATENCY_IDIV 24
`define LATENCY_IMUL 2
`define LATENCY_FMULADD 2
`define LATENCY_FDIVSQRT 2
`define LATENCY_FCONV 2
`define LATENCY_FMULADD 16
`define LATENCY_FDIVSQRT 16
`define LATENCY_FCONV 16
`define LATENCY_FNONCOMP 1
///////////////////////////////////////////////////////////////////////////////

View File

@@ -109,7 +109,7 @@ module VX_execute #(
assign fpu_commit_if.valid = 0;
assign fpu_commit_if.issue_tag = 0;
assign fpu_commit_if.data = 0;
assign fpu_commit_if.upd_fflags = 0;
assign fpu_commit_if.has_fflags = 0;
assign fpu_commit_if.fflags_NV = 0;
assign fpu_commit_if.fflags_DZ = 0;
assign fpu_commit_if.fflags_OF = 0;

View File

@@ -1,6 +1,4 @@
`include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_fpu_unit #(
parameter CORE_ID = 0
@@ -15,186 +13,72 @@ module VX_fpu_unit #(
// outputs
VX_fpu_to_cmt_if fpu_commit_if
);
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam FPU_DPATHW = 32'd32;
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
);
wire fpu_in_ready, fpu_in_valid;
wire fpu_out_ready, fpu_out_valid;
reg [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
wire [`FRM_BITS-1:0] real_frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
wire [`FRM_BITS-1:0] frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
wire is_class_op_i, is_class_op_o;
assign is_class_op_i = (fpu_req_if.fpu_op == `FPU_CLASS);
`ifdef SYNTHESIS
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fflags_en, fflags_en_o;
VX_fp_fpga fp_core (
.clk (clk),
.reset (reset),
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_rnd = real_frm;
fpu_op_mod = 0;
fflags_en = 1;
fpu_operands[0] = fpu_req_if.rs1_data;
fpu_operands[1] = fpu_req_if.rs2_data;
fpu_operands[2] = fpu_req_if.rs3_data;
case (fpu_req_if.fpu_op)
`FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = fpu_req_if.rs1_data;
fpu_operands[2] = fpu_req_if.rs2_data;
end
`FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = fpu_req_if.rs1_data;
fpu_operands[2] = fpu_req_if.rs2_data;
fpu_op_mod = 1;
end
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
default:;
endcase
end
.in_valid (fpu_req_if.valid),
.in_ready (fpu_req_if.ready),
genvar i;
`DISABLE_TRACING
for (i = 0; i < `NUM_THREADS; i++) begin
if (0 == i) begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status[0]),
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
end else begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i (1'b0),
.in_valid_i (fpu_in_valid),
`UNUSED_PIN (in_ready_o),
.flush_i (reset),
.result_o (fpu_result[i]),
.status_o (fpu_status[i]),
`UNUSED_PIN (tag_o),
`UNUSED_PIN (out_valid_o),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
end
end
`ENABLE_TRACING
assign fpu_in_valid = fpu_req_if.valid;
assign fpu_in_tag = fpu_req_if.issue_tag;
// can accept new request?
assign fpu_req_if.ready = fpu_in_ready;
assign fpu_commit_if.valid = fpu_out_valid;
assign fpu_commit_if.issue_tag = fpu_out_tag;
assign fpu_commit_if.data = fpu_result;
assign fpu_commit_if.upd_fflags = fflags_en_o;
for (i = 0; i < `NUM_THREADS; i++) begin
assign fpu_commit_if.fflags[i][0] = fpu_status[i].NX;
assign fpu_commit_if.fflags[i][1] = fpu_status[i].UF;
assign fpu_commit_if.fflags[i][2] = fpu_status[i].OF;
assign fpu_commit_if.fflags[i][3] = fpu_status[i].DZ;
assign fpu_commit_if.fflags[i][4] = fpu_status[i].NV;
end
.in_tag (fpu_req_if.issue_tag),
assign fpu_out_ready = fpu_commit_if.ready;
.op (fpu_req_if.fpu_op),
.frm (frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (fpu_commit_if.data),
.has_fflags (fpu_commit_if.has_fflags),
.fflags (fpu_commit_if.fflags),
.out_tag (fpu_commit_if.issue_tag),
.out_ready (fpu_commit_if.ready),
.out_valid (fpu_commit_if.valid)
);
`else
VX_fpnew #(
.FMULADD (0),
.FDIVSQRT (1),
.FNONCOMP (1),
.FCONV (1)
) fp_core (
.clk (clk),
.reset (reset),
.in_valid (fpu_req_if.valid),
.in_ready (fpu_req_if.ready),
.in_tag (fpu_req_if.issue_tag),
.op (fpu_req_if.fpu_op),
.frm (frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (fpu_commit_if.data),
.has_fflags (fpu_commit_if.has_fflags),
.fflags (fpu_commit_if.fflags),
.out_tag (fpu_commit_if.issue_tag),
.out_ready (fpu_commit_if.ready),
.out_valid (fpu_commit_if.valid)
);
`endif
endmodule

View File

@@ -0,0 +1,87 @@
`include "VX_define.vh"
module VX_fp_fpga (
input wire clk,
input wire reset,
output wire in_ready,
input wire in_valid,
input wire [`ISTAG_BITS-1:0] in_tag,
input wire [`FPU_BITS-1:0] op,
input wire [`FRM_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
input wire [`NUM_THREADS-1:0][31:0] datac,
output wire [`NUM_THREADS-1:0][31:0] result,
output wire has_fflags,
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
output wire [`ISTAG_BITS-1:0] out_tag,
input wire out_ready,
output wire out_valid
);
wire fpnew_in_ready;
wire [`NUM_THREADS-1:0][31:0] fpnew_result;
wire fpnew_has_fflags;
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fpnew_fflags;
wire [`ISTAG_BITS-1:0] fpnew_out_tag;
wire fpnew_out_ready;
wire fpnew_out_valid;
wire [`NUM_THREADS-1:0][31:0] add_result;
wire add_out_ready;
VX_fpnew #(
.FMULADD (0),
.FDIVSQRT (1),
.FNONCOMP (1),
.FCONV (1)
) fp_core (
.clk (clk),
.reset (reset),
.in_valid (in_valid),
.in_ready (fpnew_in_ready),
.in_tag (in_tag),
.op (op),
.frm (frm),
.dataa (dataa),
.datab (datab),
.datac (datac),
.result (fpnew_result),
.has_fflags (fpnew_has_fflags),
.fflags (fpnew_fflags),
.out_tag (fpnew_out_tag),
.out_ready (fpnew_out_ready),
.out_valid (fpnew_out_valid)
);
acl_fp_add fp_add (
.clock (clk),
.dataa (dataa),
.datab (datab),
.enable (add_out_ready),
.result (add_result)
);
assign in_reqady = fpnew_in_ready;
assign has_fflags = fpnew_has_fflags;
assign fflags = fpnew_fflags;
assign out_tag = fpnew_out_tag;
assign fpnew_out_ready = out_ready;
assign result = fpnew_out_valid ? fpnew_result : add_result;
assign out_valid = fpnew_out_valid;
endmodule

217
hw/rtl/fp_cores/VX_fpnew.v Normal file
View File

@@ -0,0 +1,217 @@
`include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_fpnew #(
parameter FMULADD = 1,
parameter FDIVSQRT = 1,
parameter FNONCOMP = 1,
parameter FCONV = 1
) (
input wire clk,
input wire reset,
output wire in_ready,
input wire in_valid,
input wire [`ISTAG_BITS-1:0] in_tag,
input wire [`FPU_BITS-1:0] op,
input wire [`FRM_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
input wire [`NUM_THREADS-1:0][31:0] datac,
output wire [`NUM_THREADS-1:0][31:0] result,
output wire has_fflags,
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
output wire [`ISTAG_BITS-1:0] out_tag,
input wire out_ready,
output wire out_valid
);
localparam UNIT_FMULADD = FMULADD ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
localparam UNIT_FDIVSQRT = FDIVSQRT ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
localparam UNIT_FNONCOMP = FNONCOMP ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
localparam UNIT_FCONV = FCONV ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam FPU_DPATHW = 32'd32;
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: UNIT_FMULADD}, // ADDMUL
'{default: UNIT_FDIVSQRT}, // DIVSQRT
'{default: UNIT_FNONCOMP}, // NONCOMP
'{default: UNIT_FCONV}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_in_ready, fpu_in_valid;
wire fpu_out_ready, fpu_out_valid;
reg [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
wire is_class_op_i, is_class_op_o;
assign is_class_op_i = (fpu_op == `FPU_CLASS);
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fflags_en, fflags_en_o;
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_rnd = frm;
fpu_op_mod = 0;
fflags_en = 1;
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
case (op)
`FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
end
`FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
fpu_op_mod = 1;
end
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
default:;
endcase
end
genvar i;
`DISABLE_TRACING
for (i = 0; i < `NUM_THREADS; i++) begin
if (0 == i) begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result[0]),
.status_o (fpu_status[0]),
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
end else begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i (1'b0),
.in_valid_i (fpu_in_valid),
`UNUSED_PIN (in_ready_o),
.flush_i (reset),
.result_o (fpu_result[i]),
.status_o (fpu_status[i]),
`UNUSED_PIN (tag_o),
`UNUSED_PIN (out_valid_o),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
end
end
`ENABLE_TRACING
assign fpu_in_valid = in_valid;
assign in_ready = fpu_in_ready;
assign fpu_in_tag = in_tag;
assign out_tag = fpu_out_tag;
assign result = fpu_result;
assign has_fflags = fflags_en_o;
for (i = 0; i < `NUM_THREADS; i++) begin
assign fflags[i][0] = fpu_status[i].NX;
assign fflags[i][1] = fpu_status[i].UF;
assign fflags[i][2] = fpu_status[i].OF;
assign fflags[i][3] = fpu_status[i].DZ;
assign fflags[i][4] = fpu_status[i].NV;
end
assign out_valid = fpu_out_valid;
assign fpu_out_ready = out_ready;
endmodule

View File

@@ -0,0 +1,67 @@
// (C) 1992-2016 Intel Corporation.
// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
// and/or other countries. Other marks and brands may be claimed as the property
// of others. See Trademarks on intel.com for full list of Intel trademarks or
// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly subject
// to the terms and conditions of the Altera Program License Subscription
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for the
// sole purpose of programming logic devices manufactured by Intel and sold by
// Intel or its authorized distributors. Please refer to the applicable
// agreement for further details.
module acl_fp_add(dataa, datab, clock, enable, result);
input [31:0] dataa;
input [31:0] datab;
input clock, enable;
output [31:0] result;
// FP MAC wysiwyg
twentynm_fp_mac mac_fp_wys (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(dataa),
.ay(datab),
.az(),
.clk({2'b00,clock}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result),
.chainout()
);
defparam mac_fp_wys.operation_mode = "sp_add";
defparam mac_fp_wys.use_chainin = "false";
defparam mac_fp_wys.adder_subtract = "false";
defparam mac_fp_wys.ax_clock = "0";
defparam mac_fp_wys.ay_clock = "0";
defparam mac_fp_wys.az_clock = "none";
defparam mac_fp_wys.output_clock = "0";
defparam mac_fp_wys.accumulate_clock = "none";
defparam mac_fp_wys.ax_chainin_pl_clock = "none";
defparam mac_fp_wys.accum_pipeline_clock = "none";
defparam mac_fp_wys.mult_pipeline_clock = "none";
defparam mac_fp_wys.adder_input_clock = "0";
defparam mac_fp_wys.accum_adder_clock = "none";
endmodule

View File

@@ -0,0 +1,63 @@
// (C) 1992-2014 Altera Corporation. All rights reserved.
// Your use of Altera Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly subject
// to the terms and conditions of the Altera Program License Subscription
// Agreement, Altera MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for the
// sole purpose of programming logic devices manufactured by Altera and sold by
// Altera or its authorized distributors. Please refer to the applicable
// agreement for further details.
module acl_fp_multadd(dataa, datab, datac, clock, enable, result);
// a*b + c
input [31:0] dataa;
input [31:0] datab;
input [31:0] datac;
input clock;
input enable;
output [31:0] result;
// FP MAC wysiwyg
twentynm_fp_mac mac_fp_wys (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(datac),
.ay(datab),
.az(dataa),
.clk({2'b00,clock}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result),
.chainout()
);
defparam mac_fp_wys.operation_mode = "sp_mult_add";
defparam mac_fp_wys.use_chainin = "false";
defparam mac_fp_wys.adder_subtract = "true";
defparam mac_fp_wys.ax_clock = "0";
defparam mac_fp_wys.ay_clock = "0";
defparam mac_fp_wys.az_clock = "0";
defparam mac_fp_wys.output_clock = "0";
defparam mac_fp_wys.accumulate_clock = "none";
defparam mac_fp_wys.ax_chainin_pl_clock = "0";
defparam mac_fp_wys.accum_pipeline_clock = "none";
defparam mac_fp_wys.mult_pipeline_clock = "0";
defparam mac_fp_wys.adder_input_clock = "0";
defparam mac_fp_wys.accum_adder_clock = "none";
endmodule

View File

@@ -0,0 +1,67 @@
// (C) 1992-2016 Intel Corporation.
// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
// and/or other countries. Other marks and brands may be claimed as the property
// of others. See Trademarks on intel.com for full list of Intel trademarks or
// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly subject
// to the terms and conditions of the Altera Program License Subscription
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for the
// sole purpose of programming logic devices manufactured by Intel and sold by
// Intel or its authorized distributors. Please refer to the applicable
// agreement for further details.
module acl_fp_mul(dataa, datab, clock, enable, result);
input [31:0] dataa;
input [31:0] datab;
input clock, enable;
output [31:0] result;
// FP MAC wysiwyg
twentynm_fp_mac mac_fp_wys (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(),
.ay(datab),
.az(dataa),
.clk({2'b00,clock}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result),
.chainout()
);
defparam mac_fp_wys.operation_mode = "sp_mult";
defparam mac_fp_wys.use_chainin = "false";
defparam mac_fp_wys.adder_subtract = "false";
defparam mac_fp_wys.ax_clock = "none";
defparam mac_fp_wys.ay_clock = "0";
defparam mac_fp_wys.az_clock = "0";
defparam mac_fp_wys.output_clock = "0";
defparam mac_fp_wys.accumulate_clock = "none";
defparam mac_fp_wys.ax_chainin_pl_clock = "none";
defparam mac_fp_wys.accum_pipeline_clock = "none";
defparam mac_fp_wys.mult_pipeline_clock = "0";
defparam mac_fp_wys.adder_input_clock = "none";
defparam mac_fp_wys.accum_adder_clock = "none";
endmodule

View File

@@ -0,0 +1,63 @@
// (C) 1992-2014 Altera Corporation. All rights reserved.
// Your use of Altera Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly subject
// to the terms and conditions of the Altera Program License Subscription
// Agreement, Altera MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for the
// sole purpose of programming logic devices manufactured by Altera and sold by
// Altera or its authorized distributors. Please refer to the applicable
// agreement for further details.
module acl_fp_multadd(dataa, datab, datac, clock, enable, result);
// a*b + c
input [31:0] dataa;
input [31:0] datab;
input [31:0] datac;
input clock;
input enable;
output [31:0] result;
// FP MAC wysiwyg
twentynm_fp_mac mac_fp_wys (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(datac),
.ay(datab),
.az(dataa),
.clk({2'b00,clock}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result),
.chainout()
);
defparam mac_fp_wys.operation_mode = "sp_mult_add";
defparam mac_fp_wys.use_chainin = "false";
defparam mac_fp_wys.adder_subtract = "false";
defparam mac_fp_wys.ax_clock = "0";
defparam mac_fp_wys.ay_clock = "0";
defparam mac_fp_wys.az_clock = "0";
defparam mac_fp_wys.output_clock = "0";
defparam mac_fp_wys.accumulate_clock = "none";
defparam mac_fp_wys.ax_chainin_pl_clock = "0";
defparam mac_fp_wys.accum_pipeline_clock = "none";
defparam mac_fp_wys.mult_pipeline_clock = "0";
defparam mac_fp_wys.adder_input_clock = "0";
defparam mac_fp_wys.accum_adder_clock = "none";
endmodule

View File

@@ -0,0 +1,67 @@
// (C) 1992-2016 Intel Corporation.
// Intel, the Intel logo, Intel, MegaCore, NIOS II, Quartus and TalkBack words
// and logos are trademarks of Intel Corporation or its subsidiaries in the U.S.
// and/or other countries. Other marks and brands may be claimed as the property
// of others. See Trademarks on intel.com for full list of Intel trademarks or
// the Trademarks & Brands Names Database (if Intel) or See www.Intel.com/legal (if Altera)
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly subject
// to the terms and conditions of the Altera Program License Subscription
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for the
// sole purpose of programming logic devices manufactured by Intel and sold by
// Intel or its authorized distributors. Please refer to the applicable
// agreement for further details.
module acl_fp_add(dataa, datab, clock, enable, result);
input [31:0] dataa;
input [31:0] datab;
input clock, enable;
output [31:0] result;
// FP MAC wysiwyg
twentynm_fp_mac mac_fp_wys (
// inputs
.accumulate(),
.chainin_overflow(),
.chainin_invalid(),
.chainin_underflow(),
.chainin_inexact(),
.ax(dataa),
.ay(datab),
.az(),
.clk({2'b00,clock}),
.ena({2'b11,enable}),
.aclr(2'b00),
.chainin(),
// outputs
.overflow(),
.invalid(),
.underflow(),
.inexact(),
.chainout_overflow(),
.chainout_invalid(),
.chainout_underflow(),
.chainout_inexact(),
.resulta(result),
.chainout()
);
defparam mac_fp_wys.operation_mode = "sp_add";
defparam mac_fp_wys.use_chainin = "false";
defparam mac_fp_wys.adder_subtract = "true";
defparam mac_fp_wys.ax_clock = "0";
defparam mac_fp_wys.ay_clock = "0";
defparam mac_fp_wys.az_clock = "none";
defparam mac_fp_wys.output_clock = "0";
defparam mac_fp_wys.accumulate_clock = "none";
defparam mac_fp_wys.ax_chainin_pl_clock = "none";
defparam mac_fp_wys.accum_pipeline_clock = "none";
defparam mac_fp_wys.mult_pipeline_clock = "none";
defparam mac_fp_wys.adder_input_clock = "0";
defparam mac_fp_wys.accum_adder_clock = "none";
endmodule

View File

@@ -11,7 +11,7 @@ interface VX_cmt_to_csr_if ();
wire [`NE_BITS:0] num_commits;
wire upd_fflags;
wire has_fflags;
wire [`FFG_BITS-1:0] fflags;
endinterface

View File

@@ -8,7 +8,7 @@ interface VX_fpu_to_cmt_if ();
wire valid;
wire [`ISTAG_BITS-1:0] issue_tag;
wire [`NUM_THREADS-1:0][31:0] data;
wire upd_fflags;
wire has_fflags;
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags;
wire ready;

View File

@@ -1,6 +1,8 @@
PROJECT = Core
TOP_LEVEL_ENTITY = VX_core
SRC_FILE = VX_core.v
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
@@ -49,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,7 +1,7 @@
PROJECT = VX_pipeline
TOP_LEVEL_ENTITY = VX_pipeline
SRC_FILE = VX_pipeline.v
FPU_INCLUDE = ../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf

View File

@@ -1,6 +1,8 @@
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../opae;../../../opae/ccip
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
@@ -49,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -set "NOPAE" -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../opae;../../../opae/ccip"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,6 +1,8 @@
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.v
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
@@ -49,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg