diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 749ae8a8..6c1bb622 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -83,7 +83,7 @@ VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE # use DPI FPU -VL_FLAGS += -DFPU_FAST +VL_FLAGS += -DFPU_DPI PROJECT = libopae-c-vlsim.so diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 414616d8..d201c51a 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -72,7 +72,7 @@ ifdef PERF endif # use DPI FPU -VL_FLAGS += -DFPU_FAST +VL_FLAGS += -DFPU_DPI PROJECT = libvortex.so # PROJECT = libvortex.dylib diff --git a/hw/opae/vortex_afu.qsf b/hw/opae/vortex_afu.qsf index 5c79c605..af8af68d 100644 --- a/hw/opae/vortex_afu.qsf +++ b/hw/opae/vortex_afu.qsf @@ -8,7 +8,6 @@ set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 -set_global_assignment -name VERILOG_MACRO FPU_FAST set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index ba320873..0c721548 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -65,11 +65,11 @@ module VX_fpu_unit #( assign fpu_to_csr_if.read_wid = fpu_req_if.wid; wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod; -`ifdef FPU_FAST +`ifdef FPU_DPI - VX_fp_dpi #( + VX_fpu_dpi #( .TAGW (FPUQ_BITS) - ) fp_core ( + ) fpu_dpi ( .clk (clk), .reset (reset), @@ -97,13 +97,13 @@ module VX_fpu_unit #( `elsif FPU_FPNEW - VX_fpnew #( + VX_fpu_fpnew #( .FMULADD (1), .FDIVSQRT (1), .FNONCOMP (1), .FCONV (1), .TAGW (FPUQ_BITS) - ) fp_core ( + ) fpu_fpnew ( .clk (clk), .reset (reset), @@ -131,9 +131,9 @@ module VX_fpu_unit #( `else - VX_fp_fpga #( + VX_fpu_fpga #( .TAGW (FPUQ_BITS) - ) fp_core ( + ) fpu_fpga ( .clk (clk), .reset (fpu_reset), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index eb868aee..c207186b 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -127,7 +127,8 @@ module VX_cache #( .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET) + .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET), + .BUFFERED ((NUM_BANKS > 1) && DRAM_ENABLE) ) cache_core_req_bank_sel ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index f6c3b34c..04f6e86e 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -11,9 +11,10 @@ module VX_cache_core_req_bank_sel #( parameter NUM_REQS = 4, // core request tag size parameter CORE_TAG_WIDTH = 3, - // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0 + parameter BANK_ADDR_OFFSET = 0, + // buffer the output + parameter BUFFERED = 0 ) ( input wire clk, input wire reset, @@ -46,6 +47,8 @@ module VX_cache_core_req_bank_sel #( reg [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; reg [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; + reg [NUM_BANKS-1:0] per_bank_core_req_stall; + reg [NUM_REQS-1:0] core_req_ready_r; reg [NUM_BANKS-1:0] core_req_sel_r; wire [NUM_REQS-1:0][`BANK_SELECT_BITS-1:0] core_req_bid; @@ -83,7 +86,7 @@ module VX_cache_core_req_bank_sel #( for (integer j = 0; j < NUM_BANKS; ++j) begin for (integer i = 0; i < NUM_REQS; ++i) begin if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin - core_req_ready_r[i] = per_bank_core_req_ready[j]; + core_req_ready_r[i] = ~per_bank_core_req_stall[j]; core_req_sel_r[i] = 1; break; end @@ -100,15 +103,23 @@ module VX_cache_core_req_bank_sel #( end end - assign bank_stalls = bank_stalls_r; - assign per_bank_core_req_valid = per_bank_core_req_valid_r; - assign per_bank_core_req_tid = per_bank_core_req_tid_r; - assign per_bank_core_req_rw = per_bank_core_req_rw_r; - assign per_bank_core_req_byteen = per_bank_core_req_byteen_r; - assign per_bank_core_req_addr = per_bank_core_req_addr_r; - assign per_bank_core_req_tag = per_bank_core_req_tag_r; - assign per_bank_core_req_data = per_bank_core_req_data_r; - assign core_req_ready = core_req_ready_r; + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign per_bank_core_req_stall[i] = ~per_bank_core_req_ready[i] & per_bank_core_req_valid[i]; + VX_pipe_register #( + .DATAW (1 + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + CORE_TAG_WIDTH + `WORD_WIDTH), + .RESETW (1), + .DEPTH (BUFFERED) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .enable (~per_bank_core_req_stall[i]), + .data_in ({per_bank_core_req_valid_r[i], per_bank_core_req_tid_r[i], per_bank_core_req_rw_r[i], per_bank_core_req_byteen_r[i], per_bank_core_req_addr_r[i], per_bank_core_req_tag_r[i], per_bank_core_req_data_r[i]}), + .data_out ({per_bank_core_req_valid[i], per_bank_core_req_tid[i], per_bank_core_req_rw[i], per_bank_core_req_byteen[i], per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_data[i]}) + ); + end + + assign core_req_ready = core_req_ready_r; + assign bank_stalls = bank_stalls_r; end else begin diff --git a/hw/rtl/fp_cores/VX_fp_dpi.v b/hw/rtl/fp_cores/VX_fpu_dpi.v similarity index 99% rename from hw/rtl/fp_cores/VX_fp_dpi.v rename to hw/rtl/fp_cores/VX_fpu_dpi.v index 6f0ffb28..c18be4ec 100644 --- a/hw/rtl/fp_cores/VX_fp_dpi.v +++ b/hw/rtl/fp_cores/VX_fpu_dpi.v @@ -3,7 +3,7 @@ `include "VX_define.vh" `include "float_dpi.vh" -module VX_fp_dpi #( +module VX_fpu_dpi #( parameter TAGW = 1 ) ( input wire clk, diff --git a/hw/rtl/fp_cores/VX_fp_fpga.v b/hw/rtl/fp_cores/VX_fpu_fpga.v similarity index 99% rename from hw/rtl/fp_cores/VX_fp_fpga.v rename to hw/rtl/fp_cores/VX_fpu_fpga.v index c96399b4..123d4750 100644 --- a/hw/rtl/fp_cores/VX_fp_fpga.v +++ b/hw/rtl/fp_cores/VX_fpu_fpga.v @@ -1,6 +1,6 @@ `include "VX_define.vh" -module VX_fp_fpga #( +module VX_fpu_fpga #( parameter TAGW = 1 ) ( input wire clk, diff --git a/hw/rtl/fp_cores/VX_fpnew.v b/hw/rtl/fp_cores/VX_fpu_fpnew.v similarity index 99% rename from hw/rtl/fp_cores/VX_fpnew.v rename to hw/rtl/fp_cores/VX_fpu_fpnew.v index a9f349ab..3a8a8106 100644 --- a/hw/rtl/fp_cores/VX_fpnew.v +++ b/hw/rtl/fp_cores/VX_fpu_fpnew.v @@ -3,7 +3,7 @@ `include "defs_div_sqrt_mvp.sv" `TRACING_OFF -module VX_fpnew +module VX_fpu_fpnew #( parameter TAGW = 1, parameter FMULADD = 1, diff --git a/hw/rtl/libs/VX_reset_relay.v b/hw/rtl/libs/VX_reset_relay.v new file mode 100644 index 00000000..9fdbda8f --- /dev/null +++ b/hw/rtl/libs/VX_reset_relay.v @@ -0,0 +1,27 @@ +`include "VX_platform.vh" + +module VX_reset_relay #( + parameter NUM_NODES = 1, + parameter PASSTHRU = 0 +) ( + input wire clk, + input wire reset, + output wire [NUM_NODES-1:0] reset_out +); + + if (PASSTHRU == 0) begin + reg [NUM_NODES-1:0] reset_r; + always @(posedge clk) begin + for (integer i = 0; i < NUM_NODES; ++i) begin + reset_r[i] <= reset; + end + end + assign reset_out = reset_r; + end else begin + `UNUSED_VAR (clk) + for (genvar i = 0; i < NUM_NODES; ++i) begin + assign reset_out[i] = reset; + end + end + +endmodule \ No newline at end of file diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 287271ca..307f78af 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -47,6 +47,9 @@ VL_FLAGS += verilator.vlt VL_FLAGS += --exe $(SRCS) $(RTL_INCLUDE) VL_FLAGS += --cc Vortex.v --top-module $(TOP) +// Use FPNEW PFU core +VL_FLAGS += -DFPU_FPNEW + DBG_FLAGS += -DVCD_OUTPUT $(DBG_FLAGS) THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 16722a29..2a4d8a1a 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -86,7 +86,6 @@ void Simulator::reset() { vortex_->reset = 0; // Turn on assertion after reset - printf("*** enabling assertion at tick: %ld", timestamp); Verilated::assertOn(true); } diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 061841b8..b080e3bf 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -39,7 +39,6 @@ set_global_assignment -name VERILOG_MACRO QUARTUS set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 -set_global_assignment -name VERILOG_MACRO FPU_FAST set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED