diff --git a/hw/opae/README b/hw/opae/README index 3596f509..f4f0d3ce 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -62,7 +62,7 @@ make ase # tests ./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256 ./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16 -./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n 16 +./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n 1 -s4 -e4 ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd # modify "vsim_run.tcl" to dump VCD trace diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index a07c4d79..f3e7c840 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -2,6 +2,8 @@ #+define+SCOPE +define+SYNTHESIS ++define+QUARTUS ++define+FPU_FAST #+define+DBG_PRINT_CORE_ICACHE #+define+DBG_PRINT_CORE_DCACHE diff --git a/hw/opae/sources_2c.txt b/hw/opae/sources_2c.txt index ca991ef9..34a20a0b 100644 --- a/hw/opae/sources_2c.txt +++ b/hw/opae/sources_2c.txt @@ -1,5 +1,8 @@ +define+NUM_CORES=2 +define+L2_ENABLE=0 ++define+SYNTHESIS ++define+QUARTUS ++define+FPU_FAST vortex_afu.json QI:vortex_afu.qsf diff --git a/hw/opae/sources_4c.txt b/hw/opae/sources_4c.txt index 6ee3aa06..4772d476 100644 --- a/hw/opae/sources_4c.txt +++ b/hw/opae/sources_4c.txt @@ -1,5 +1,8 @@ +define+NUM_CORES=4 +define+L2_ENABLE=0 ++define+SYNTHESIS ++define+QUARTUS ++define+FPU_FAST vortex_afu.json QI:vortex_afu.qsf diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index a2c68273..96b1c98b 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -5,4 +5,5 @@ set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009 set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG -set_global_assignment -name MESSAGE_DISABLE 16818 \ No newline at end of file +set_global_assignment -name MESSAGE_DISABLE 16818 +set_global_assignment -name VERILOG_MACRO FPU_FAST \ No newline at end of file diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a25280b8..358ec37d 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -59,10 +59,6 @@ `define EXT_F_ENABLE `endif -`ifndef FPNEW_DISABLE -`define FPNEW_ENABLE -`endif - // Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 5cf7aaa4..bb31f75f 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -56,7 +56,7 @@ module VX_fpu_unit #( // can accept new request? assign fpu_req_if.ready = ready_in && ~fpuq_full; -`ifndef FPNEW_ENABLE +`ifdef FPU_FAST VX_fp_fpga #( .TAGW (FPUQ_BITS) diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v index 41dd024f..38552a37 100644 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ b/hw/rtl/VX_gpr_fp_ctrl.v @@ -23,7 +23,11 @@ module VX_gpr_fp_ctrl ( always @(posedge clk) begin if (reset) begin - read_rs1 <= 1; + rs1_tmp_data <= 0; + rs2_tmp_data <= 0; + rs3_tmp_data <= 0; + read_rs1 <= 1; + rs3_wid <= 0; end else begin if (rs3_delay) begin read_rs1 <= 0; @@ -32,16 +36,18 @@ module VX_gpr_fp_ctrl ( read_rs1 <= 1; end + if (read_rs1) begin + rs1_tmp_data <= rs1_data; + end + rs2_tmp_data <= rs2_data; + rs3_tmp_data <= rs1_data; + assert(read_rs1 || rs3_wid == gpr_read_if.wid); end end always @(posedge clk) begin - if (read_rs1) begin - rs1_tmp_data <= rs1_data; - end - rs2_tmp_data <= rs2_data; - rs3_tmp_data <= rs1_data; + end // outputs diff --git a/hw/rtl/fp_cores/VX_fp_madd.v b/hw/rtl/fp_cores/VX_fp_madd.v index e77ebdd3..0d86f509 100644 --- a/hw/rtl/fp_cores/VX_fp_madd.v +++ b/hw/rtl/fp_cores/VX_fp_madd.v @@ -51,9 +51,9 @@ module VX_fp_madd #( .chainin_invalid(), .chainin_underflow(), .chainin_inexact(), - .ax(), + .ax(dataa[i]), .ay(datab[i]), - .az(dataa[i]), + .az(), .clk({2'b00,clk}), .ena({2'b11,~stall}), .aclr(2'b00), @@ -75,12 +75,12 @@ module VX_fp_madd #( defparam mac_fp_add.adder_subtract = "false"; defparam mac_fp_add.ax_clock = "0"; defparam mac_fp_add.ay_clock = "0"; - defparam mac_fp_add.az_clock = "0"; + defparam mac_fp_add.az_clock = "none"; defparam mac_fp_add.output_clock = "0"; defparam mac_fp_add.accumulate_clock = "none"; - defparam mac_fp_add.ax_chainin_pl_clock = "0"; + defparam mac_fp_add.ax_chainin_pl_clock = "none"; defparam mac_fp_add.accum_pipeline_clock = "none"; - defparam mac_fp_add.mult_pipeline_clock = "0"; + defparam mac_fp_add.mult_pipeline_clock = "none"; defparam mac_fp_add.adder_input_clock = "0"; defparam mac_fp_add.accum_adder_clock = "none"; @@ -91,9 +91,9 @@ module VX_fp_madd #( .chainin_invalid(), .chainin_underflow(), .chainin_inexact(), - .ax(), + .ax(dataa[i]), .ay(datab[i]), - .az(dataa[i]), + .az(), .clk({2'b00,clk}), .ena({2'b11,~stall}), .aclr(2'b00), diff --git a/hw/rtl/fp_cores/VX_fp_nmadd.v b/hw/rtl/fp_cores/VX_fp_nmadd.v index 29a4cc1b..2236ef69 100644 --- a/hw/rtl/fp_cores/VX_fp_nmadd.v +++ b/hw/rtl/fp_cores/VX_fp_nmadd.v @@ -52,7 +52,7 @@ module VX_fp_nmadd #( .ay(datab[i]), .az(dataa[i]), .clk({2'b00,clk}), - .ena({2'b11,~stall), + .ena({2'b11,~stall}), .aclr(2'b00), .chainin(), // outputs diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 030bd254..93102ec5 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -38,6 +38,8 @@ set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 +set_global_assignment -name VERILOG_MACRO FPU_FAST + set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"