From 461be0880dd0c10edf1984028d9d944f88dac31a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 25 Nov 2020 09:05:38 -0800 Subject: [PATCH] fixed FPU-CSR data dependence --- .travis.yml | 6 +- driver/opae/vlsim/Makefile | 5 +- driver/rtlsim/Makefile | 5 +- hw/rtl/VX_alu_unit.v | 2 +- hw/rtl/VX_commit.v | 51 +++------------ hw/rtl/VX_csr_arb.v | 4 +- hw/rtl/VX_csr_data.v | 10 +-- hw/rtl/VX_csr_unit.v | 33 ++++++++-- hw/rtl/VX_execute.v | 26 +++++--- hw/rtl/VX_fpu_unit.v | 65 +++++++++++++++---- hw/rtl/VX_gpu_unit.v | 10 +-- hw/rtl/VX_lsu_unit.v | 4 +- hw/rtl/VX_mul_unit.v | 4 +- hw/rtl/VX_pipeline.v | 12 ++-- hw/rtl/VX_writeback.v | 18 ++--- hw/rtl/interfaces/VX_cmt_to_csr_if.v | 6 +- .../{VX_exu_to_cmt_if.v => VX_commit_if.v} | 6 +- hw/rtl/interfaces/VX_csr_to_fpu_if.v | 17 ----- hw/rtl/interfaces/VX_fpu_to_csr_if.v | 17 ++--- hw/simulate/Makefile | 28 ++++---- hw/simulate/simulator.cpp | 14 +--- hw/simulate/simulator.h | 4 -- hw/simulate/testbench.cpp | 26 +++----- simX/Makefile | 5 +- 24 files changed, 191 insertions(+), 187 deletions(-) rename hw/rtl/interfaces/{VX_exu_to_cmt_if.v => VX_commit_if.v} (80%) delete mode 100644 hw/rtl/interfaces/VX_csr_to_fpu_if.v diff --git a/.travis.yml b/.travis.yml index 7051e6bf..ae0d98da 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,12 +22,12 @@ install: script: - make -s - ./ci/test_runtime.sh - - ./ci/test_driver.sh - - ./ci/test_riscv_isa.sh + - ./ci/test_riscv_isa.sh - ./ci/test_opencl.sh - - ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=4 --l2cache + - ./ci/test_driver.sh - ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --debug - ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1" + - ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache - ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 after_success: diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 2a3da54e..3d132af3 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -83,6 +83,9 @@ VL_FLAGS += -DFPU_FAST RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip +OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized" +OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized" + PROJECT = libopae-c-vlsim.so all: $(PROJECT) @@ -95,7 +98,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json $(PROJECT): $(SRCS) $(SCOPE_VH) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) - make OPT_FAST="-Wno-aligned-new -Wmaybe-uninitialized" OPT_SLOW="-Wno-aligned-new -Wmaybe-uninitialized" -j -C obj_dir -f V$(TOP).mk + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f V$(TOP).mk clean: rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index e77afa9d..064d098d 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -67,6 +67,9 @@ endif # use DPI FPU VL_FLAGS += -DFPU_FAST +OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized" +OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized" + PROJECT = libvortex.so # PROJECT = libvortex.dylib @@ -74,7 +77,7 @@ all: $(PROJECT) $(PROJECT): $(SRCS) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) - make OPT_FAST="-Wno-aligned-new -Wmaybe-uninitialized" OPT_SLOW="-Wno-aligned-new -Wmaybe-uninitialized" -j -C obj_dir -f V$(TOP).mk + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f V$(TOP).mk clean: rm -rf $(PROJECT) obj_dir diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 8140eeb1..24eebc52 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -11,7 +11,7 @@ module VX_alu_unit #( // Outputs VX_branch_ctl_if branch_ctl_if, - VX_exu_to_cmt_if alu_commit_if + VX_commit_if alu_commit_if ); reg [`NUM_THREADS-1:0][31:0] alu_result; reg [`NUM_THREADS-1:0][31:0] add_result; diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index 732e36b4..5958f75a 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -3,16 +3,16 @@ module VX_commit #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_exu_to_cmt_if alu_commit_if, - VX_exu_to_cmt_if lsu_commit_if, - VX_exu_to_cmt_if mul_commit_if, - VX_exu_to_cmt_if csr_commit_if, - VX_fpu_to_cmt_if fpu_commit_if, - VX_exu_to_cmt_if gpu_commit_if, + VX_commit_if alu_commit_if, + VX_commit_if lsu_commit_if, + VX_commit_if mul_commit_if, + VX_commit_if csr_commit_if, + VX_commit_if fpu_commit_if, + VX_commit_if gpu_commit_if, // outputs VX_writeback_if writeback_if, @@ -52,39 +52,8 @@ module VX_commit #( .count (commit_size) ); - fflags_t fflags; - always @(*) begin - fflags = 0; - for (integer i = 0; i < `NUM_THREADS; i++) begin - if (fpu_commit_if.tmask[i]) begin - fflags.NX |= fpu_commit_if.fflags[i].NX; - fflags.UF |= fpu_commit_if.fflags[i].UF; - fflags.OF |= fpu_commit_if.fflags[i].OF; - fflags.DZ |= fpu_commit_if.fflags[i].DZ; - fflags.NV |= fpu_commit_if.fflags[i].NV; - end - end - end - - reg csr_update_r; - reg [`NW_BITS-1:0] wid_r; - reg [CMTW-1:0] commit_size_r; - reg has_fflags_r; - fflags_t fflags_r; - - always @(posedge clk) begin - csr_update_r <= commit_fire; - wid_r <= fpu_commit_if.wid; - commit_size_r <= commit_size; - has_fflags_r <= fpu_commit_if.has_fflags; - fflags_r <= fflags; - end - - assign cmt_to_csr_if.valid = csr_update_r; - assign cmt_to_csr_if.wid = wid_r; - assign cmt_to_csr_if.commit_size = commit_size_r; - assign cmt_to_csr_if.has_fflags = has_fflags_r; - assign cmt_to_csr_if.fflags = fflags_r; + assign cmt_to_csr_if.valid = commit_fire; + assign cmt_to_csr_if.commit_size = commit_size; // Writeback diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index c059113e..9801976a 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -9,10 +9,10 @@ module VX_csr_arb ( VX_csr_req_if csr_req_if, // input - VX_exu_to_cmt_if csr_rsp_if, + VX_commit_if csr_rsp_if, // outputs - VX_exu_to_cmt_if csr_commit_if, + VX_commit_if csr_commit_if, VX_csr_io_rsp_if csr_io_rsp_if, input wire select_io_req, diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 655f155c..bd878237 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -7,7 +7,7 @@ module VX_csr_data #( input wire reset, VX_cmt_to_csr_if cmt_to_csr_if, - VX_csr_to_fpu_if csr_to_fpu_if, + VX_fpu_to_csr_if fpu_to_csr_if, input wire read_enable, input wire[`CSR_ADDR_BITS-1:0] read_addr, @@ -40,9 +40,9 @@ module VX_csr_data #( reg [31:0] read_data_r; always @(posedge clk) begin - if (cmt_to_csr_if.valid && cmt_to_csr_if.has_fflags) begin - csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags; - csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags; + if (fpu_to_csr_if.write_enable) begin + csr_fflags[fpu_to_csr_if.write_wid] <= fpu_to_csr_if.write_fflags; + csr_fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fpu_to_csr_if.write_fflags; end if (write_enable) begin @@ -144,6 +144,6 @@ module VX_csr_data #( end assign read_data = read_data_r; - assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid]; + assign fpu_to_csr_if.read_frm = csr_frm[fpu_to_csr_if.read_wid]; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index e39820e4..61755266 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -7,18 +7,20 @@ module VX_csr_unit #( input wire reset, VX_cmt_to_csr_if cmt_to_csr_if, - VX_csr_to_fpu_if csr_to_fpu_if, + VX_fpu_to_csr_if fpu_to_csr_if, VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_req_if csr_req_if, - VX_exu_to_cmt_if csr_commit_if, + VX_commit_if csr_commit_if, - input wire busy + input wire busy, + input wire[`NUM_WARPS-1:0] fpu_pending, + output wire[`NUM_WARPS-1:0] pending ); - VX_csr_req_if csr_pipe_req_if(); - VX_exu_to_cmt_if csr_pipe_rsp_if(); + VX_csr_req_if csr_pipe_req_if(); + VX_commit_if csr_pipe_rsp_if(); wire select_io_req = csr_io_req_if.valid; wire select_io_rsp; @@ -47,7 +49,7 @@ module VX_csr_unit #( .clk (clk), .reset (reset), .cmt_to_csr_if (cmt_to_csr_if), - .csr_to_fpu_if (csr_to_fpu_if), + .fpu_to_csr_if (fpu_to_csr_if), .read_enable (csr_pipe_req_if.valid), .read_addr (csr_pipe_req_if.csr_addr), .read_wid (csr_pipe_req_if.wid), @@ -90,7 +92,8 @@ module VX_csr_unit #( wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid; - wire stall = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid; + wire stall = (~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid) + || fpu_pending[csr_pipe_req_if.wid]; VX_generic_register #( .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32) @@ -112,4 +115,20 @@ module VX_csr_unit #( // can accept new request? assign csr_pipe_req_if.ready = ~stall; + // pending request + reg [`NUM_WARPS-1:0] pending_r; + always @(posedge clk) begin + if (reset) begin + pending_r <= 0; + end else begin + if (csr_pipe_rsp_if.valid && csr_pipe_rsp_if.ready) begin + pending_r[csr_pipe_rsp_if.wid] <= 0; + end + if (csr_pipe_req_if.valid && csr_pipe_req_if.ready) begin + pending_r[csr_pipe_req_if.wid] <= 1; + end + end + end + assign pending = pending_r; + endmodule diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index db27c44d..026b73c5 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -30,17 +30,19 @@ module VX_execute #( // outputs VX_branch_ctl_if branch_ctl_if, VX_warp_ctl_if warp_ctl_if, - VX_exu_to_cmt_if alu_commit_if, - VX_exu_to_cmt_if lsu_commit_if, - VX_exu_to_cmt_if csr_commit_if, - VX_exu_to_cmt_if mul_commit_if, - VX_fpu_to_cmt_if fpu_commit_if, - VX_exu_to_cmt_if gpu_commit_if, + VX_commit_if alu_commit_if, + VX_commit_if lsu_commit_if, + VX_commit_if csr_commit_if, + VX_commit_if mul_commit_if, + VX_commit_if fpu_commit_if, + VX_commit_if gpu_commit_if, input wire busy, output wire ebreak ); - VX_csr_to_fpu_if csr_to_fpu_if(); + VX_fpu_to_csr_if fpu_to_csr_if(); + wire[`NUM_WARPS-1:0] csr_pending; + wire[`NUM_WARPS-1:0] fpu_pending; VX_alu_unit #( .CORE_ID(CORE_ID) @@ -70,11 +72,13 @@ module VX_execute #( .clk (clk), .reset (reset), .cmt_to_csr_if (cmt_to_csr_if), - .csr_to_fpu_if (csr_to_fpu_if), + .fpu_to_csr_if (fpu_to_csr_if), .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), .csr_req_if (csr_req_if), .csr_commit_if (csr_commit_if), + .fpu_pending (fpu_pending), + .pending (csr_pending), .busy (busy) ); @@ -105,8 +109,10 @@ module VX_execute #( .clk (clk), .reset (reset), .fpu_req_if (fpu_req_if), - .csr_to_fpu_if (csr_to_fpu_if), - .fpu_commit_if (fpu_commit_if) + .fpu_to_csr_if (fpu_to_csr_if), + .fpu_commit_if (fpu_commit_if), + .csr_pending (csr_pending), + .pending (fpu_pending) ); `else assign fpu_req_if.ready = 0; diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index 79e0a63b..8540a9c5 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -9,10 +9,13 @@ module VX_fpu_unit #( // inputs VX_fpu_req_if fpu_req_if, - VX_csr_to_fpu_if csr_to_fpu_if, - // outputs - VX_fpu_to_cmt_if fpu_commit_if + // outputs + VX_fpu_to_csr_if fpu_to_csr_if, + VX_commit_if fpu_commit_if, + + input wire[`NUM_WARPS-1:0] csr_pending, + output wire[`NUM_WARPS-1:0] pending ); localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE); @@ -53,13 +56,13 @@ module VX_fpu_unit #( ); // can accept new request? - assign fpu_req_if.ready = ready_in && ~fpuq_full; + assign fpu_req_if.ready = ready_in && ~fpuq_full && !csr_pending[fpu_req_if.wid]; - wire valid_in = fpu_req_if.valid && ~fpuq_full; + wire valid_in = fpu_req_if.valid && ~fpuq_full && !csr_pending[fpu_req_if.wid]; - // resolve dynamic FRM - assign csr_to_fpu_if.wid = fpu_req_if.wid; - wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.op_mod; + // resolve dynamic FRM from CSR + assign fpu_to_csr_if.read_wid = fpu_req_if.wid; + wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod; `ifdef FPU_FAST @@ -127,19 +130,57 @@ module VX_fpu_unit #( `endif - wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid; + reg has_fflags_r; + fflags_t fflags_r; + + fflags_t rsp_fflags; + always @(*) begin + rsp_fflags = 0; + for (integer i = 0; i < `NUM_THREADS; i++) begin + if (rsp_tmask[i]) begin + rsp_fflags.NX |= fflags[i].NX; + rsp_fflags.UF |= fflags[i].UF; + rsp_fflags.OF |= fflags[i].OF; + rsp_fflags.DZ |= fflags[i].DZ; + rsp_fflags.NV |= fflags[i].NV; + end + end + end + + wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid; VX_generic_register #( - .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS)) + .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS) ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall_out), .flush (1'b0), - .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, fflags}), - .out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, fpu_commit_if.has_fflags, fpu_commit_if.fflags}) + .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}), + .out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r}) ); assign ready_out = ~stall_out; + // CSR fflags Update + assign fpu_to_csr_if.write_enable = fpu_commit_if.valid && fpu_commit_if.ready && has_fflags_r; + assign fpu_to_csr_if.write_wid = fpu_commit_if.wid; + assign fpu_to_csr_if.write_fflags = fflags_r; + + // pending request + reg [`NUM_WARPS-1:0] pending_r; + always @(posedge clk) begin + if (reset) begin + pending_r <= 0; + end else begin + if (fpu_commit_if.valid && fpu_commit_if.ready) begin + pending_r[fpu_commit_if.wid] <= 0; + end + if (fpu_req_if.valid && fpu_req_if.ready) begin + pending_r[fpu_req_if.wid] <= 1; + end + end + end + assign pending = pending_r; + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index cea69845..cfbc704f 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -5,15 +5,15 @@ module VX_gpu_unit #( ) ( `SCOPE_IO_VX_gpu_unit - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Inputs - VX_gpu_req_if gpu_req_if, + VX_gpu_req_if gpu_req_if, // Outputs - VX_warp_ctl_if warp_ctl_if, - VX_exu_to_cmt_if gpu_commit_if + VX_warp_ctl_if warp_ctl_if, + VX_commit_if gpu_commit_if ); `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 73fb1b84..e39dbd1a 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -13,10 +13,10 @@ module VX_lsu_unit #( VX_cache_core_rsp_if dcache_rsp_if, // inputs - VX_lsu_req_if lsu_req_if, + VX_lsu_req_if lsu_req_if, // outputs - VX_exu_to_cmt_if lsu_commit_if + VX_commit_if lsu_commit_if ); wire [`NUM_THREADS-1:0] req_tmask; wire req_rw; diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 49c0541e..39c0432b 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -7,10 +7,10 @@ module VX_mul_unit #( input wire reset, // Inputs - VX_mul_req_if mul_req_if, + VX_mul_req_if mul_req_if, // Outputs - VX_exu_to_cmt_if mul_commit_if + VX_commit_if mul_commit_if ); localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE); diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index e528a411..f7c1e4b5 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -112,12 +112,12 @@ module VX_pipeline #( VX_writeback_if writeback_if(); VX_wstall_if wstall_if(); VX_join_if join_if(); - VX_exu_to_cmt_if alu_commit_if(); - VX_exu_to_cmt_if lsu_commit_if(); - VX_exu_to_cmt_if csr_commit_if(); - VX_exu_to_cmt_if mul_commit_if(); - VX_fpu_to_cmt_if fpu_commit_if(); - VX_exu_to_cmt_if gpu_commit_if(); + VX_commit_if alu_commit_if(); + VX_commit_if lsu_commit_if(); + VX_commit_if csr_commit_if(); + VX_commit_if mul_commit_if(); + VX_commit_if fpu_commit_if(); + VX_commit_if gpu_commit_if(); VX_fetch #( .CORE_ID(CORE_ID) diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 3d763781..d771f167 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -3,19 +3,19 @@ module VX_writeback #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_exu_to_cmt_if alu_commit_if, - VX_exu_to_cmt_if lsu_commit_if, - VX_exu_to_cmt_if csr_commit_if, - VX_exu_to_cmt_if mul_commit_if, - VX_fpu_to_cmt_if fpu_commit_if, - VX_exu_to_cmt_if gpu_commit_if, + VX_commit_if alu_commit_if, + VX_commit_if lsu_commit_if, + VX_commit_if csr_commit_if, + VX_commit_if mul_commit_if, + VX_commit_if fpu_commit_if, + VX_commit_if gpu_commit_if, // outputs - VX_writeback_if writeback_if + VX_writeback_if writeback_if ); wire alu_valid = alu_commit_if.valid && alu_commit_if.wb; wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb; diff --git a/hw/rtl/interfaces/VX_cmt_to_csr_if.v b/hw/rtl/interfaces/VX_cmt_to_csr_if.v index a67a3d85..ce2549d6 100644 --- a/hw/rtl/interfaces/VX_cmt_to_csr_if.v +++ b/hw/rtl/interfaces/VX_cmt_to_csr_if.v @@ -5,12 +5,8 @@ interface VX_cmt_to_csr_if (); - wire valid; - - wire [`NW_BITS-1:0] wid; + wire valid; wire [$clog2(`NUM_THREADS+1)-1:0] commit_size; - wire has_fflags; - fflags_t fflags; endinterface diff --git a/hw/rtl/interfaces/VX_exu_to_cmt_if.v b/hw/rtl/interfaces/VX_commit_if.v similarity index 80% rename from hw/rtl/interfaces/VX_exu_to_cmt_if.v rename to hw/rtl/interfaces/VX_commit_if.v index abb8b487..1108e811 100644 --- a/hw/rtl/interfaces/VX_exu_to_cmt_if.v +++ b/hw/rtl/interfaces/VX_commit_if.v @@ -1,9 +1,9 @@ -`ifndef VX_EXU_TO_CMT_IF -`define VX_EXU_TO_CMT_IF +`ifndef VX_COMMIT_IF +`define VX_COMMIT_IF `include "VX_define.vh" -interface VX_exu_to_cmt_if (); +interface VX_commit_if (); wire valid; diff --git a/hw/rtl/interfaces/VX_csr_to_fpu_if.v b/hw/rtl/interfaces/VX_csr_to_fpu_if.v deleted file mode 100644 index 2b1aac5a..00000000 --- a/hw/rtl/interfaces/VX_csr_to_fpu_if.v +++ /dev/null @@ -1,17 +0,0 @@ -`ifndef VX_CSR_TO_FPU_IF -`define VX_CSR_TO_FPU_IF - -`include "VX_define.vh" - -`ifndef EXTF_F_ENABLE - `IGNORE_WARNINGS_BEGIN -`endif - -interface VX_csr_to_fpu_if (); - - wire [`NW_BITS-1:0] wid; - wire [`FRM_BITS-1:0] frm; - -endinterface - -`endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_fpu_to_csr_if.v b/hw/rtl/interfaces/VX_fpu_to_csr_if.v index d3bff987..ccb8856e 100644 --- a/hw/rtl/interfaces/VX_fpu_to_csr_if.v +++ b/hw/rtl/interfaces/VX_fpu_to_csr_if.v @@ -3,19 +3,14 @@ `include "VX_define.vh" -`ifndef EXTF_F_ENABLE - `IGNORE_WARNINGS_BEGIN -`endif - interface VX_fpu_to_csr_if (); - wire valid; - wire [`NW_BITS-1:0] wid; - wire fflags_NV; - wire fflags_DZ; - wire fflags_OF; - wire fflags_UF; - wire fflags_NX; + wire write_enable; + wire [`NW_BITS-1:0] write_wid; + fflags_t write_fflags; + + wire [`NW_BITS-1:0] read_wid; + wire [`FRM_BITS-1:0] read_frm; endinterface diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index d88bef75..c03aca63 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -29,9 +29,10 @@ SRCS += ../rtl/fp_cores/svdpi/float_dpi.cpp all: build-s -CF += -std=c++11 -fms-extensions -I../.. +CF += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors -I../.. +#CF += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors -I../.. -VF += --language 1800-2009 --assert -Wall -Wpedantic +VF += -O2 --language 1800-2009 --assert -Wall -Wpedantic VF += -Wno-DECLFILENAME VF += --x-initial unique --x-assign unique VF += --exe $(SRCS) $(INCLUDE) @@ -42,41 +43,44 @@ DBG += -DVCD_OUTPUT $(DBG_FLAGS) THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') +OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized" +OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized" + gen-s: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' gen-sd: - verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG) + verilator $(VF) $(SINGLECORE) -CFLAGS '$(CF) $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG) gen-st: - verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) + verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' --threads $(THREADS) gen-m: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: - verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG) + verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG) gen-mt: - verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) + verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' --threads $(THREADS) build-s: gen-s - (cd obj_dir && make -j -f VVortex.mk) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk build-sd: gen-sd - (cd obj_dir && make -j -f VVortex.mk) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk build-st: gen-st - (cd obj_dir && make -j -f VVortex.mk) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk build-m: gen-m - (cd obj_dir && make -j -f VVortex.mk) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk build-md: gen-md - (cd obj_dir && make -j -f VVortex.mk) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk build-mt: gen-mt - (cd obj_dir && make -j -f VVortex.mk) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk run: run-s diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 45fc9056..ed22d3a3 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -61,10 +61,6 @@ void Simulator::reset() { print_bufs_.clear(); dram_rsp_vec_.clear(); - dram_rsp_active_ = false; - snp_req_active_ = false; - csr_req_active_ = false; - snp_req_size_ = 0; pending_snp_reqs_ = 0; csr_rsp_value_ = nullptr; @@ -95,10 +91,6 @@ void Simulator::step() { vortex_->clk = 0; this->eval(); - - dram_rsp_ready_ = vortex_->dram_rsp_ready; - snp_req_ready_ = vortex_->snp_req_ready; - csr_io_req_ready_ = vortex_->csr_io_req_ready; vortex_->clk = 1; this->eval(); @@ -140,7 +132,7 @@ void Simulator::eval_dram_bus() { // send DRAM response if (dram_rsp_active_ - && vortex_->dram_rsp_valid && dram_rsp_ready_) { + && vortex_->dram_rsp_valid && vortex_->dram_rsp_ready) { dram_rsp_active_ = false; } if (!dram_rsp_active_) { @@ -213,7 +205,7 @@ void Simulator::eval_io_bus() { void Simulator::eval_snp_bus() { if (snp_req_active_) { - if (vortex_->snp_req_valid && snp_req_ready_) { + if (vortex_->snp_req_valid && vortex_->snp_req_ready) { assert(snp_req_size_); #ifdef DBG_PRINT_CACHE_SNP std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl; @@ -246,7 +238,7 @@ void Simulator::eval_snp_bus() { void Simulator::eval_csr_bus() { if (csr_req_active_) { - if (vortex_->csr_io_req_valid && csr_io_req_ready_) { + if (vortex_->csr_io_req_valid && vortex_->csr_io_req_ready) { #ifndef NDEBUG if (vortex_->csr_io_req_rw) std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl; diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index a1613d98..9330c265 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -66,10 +66,6 @@ private: std::list dram_rsp_vec_; bool dram_rsp_active_; - bool dram_rsp_ready_; - bool snp_req_ready_; - bool csr_io_req_ready_; - bool snp_req_active_; bool csr_req_active_; diff --git a/hw/simulate/testbench.cpp b/hw/simulate/testbench.cpp index 45f78798..9c071003 100644 --- a/hw/simulate/testbench.cpp +++ b/hw/simulate/testbench.cpp @@ -3,10 +3,6 @@ #include #include -#define GREEN "\\033[32m" -#define RED "\\033[31m" -#define DEFAULT "\\033[39m" - #define ALL_TESTS int main(int argc, char **argv) { @@ -14,7 +10,7 @@ int main(int argc, char **argv) { if (argc == 1) { #ifdef ALL_TESTS std::string tests[] = { - "../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex", + "../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-and.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex", @@ -81,7 +77,7 @@ int main(int argc, char **argv) { }; for (std::string test : tests) { - std::cout << DEFAULT << "\n---------------------------------------\n"; + std::cout << "\n---------------------------------------\n"; std::cout << test << std::endl; @@ -93,16 +89,15 @@ int main(int argc, char **argv) { bool status = (1 == simulator.get_last_wb_value(3)); - if (status) std::cout << GREEN << "Test Passed: " << test << std::endl; - if (!status) std::cout << RED << "Test Failed: " << test << std::endl; - std::cout << DEFAULT; + if (status) std::cout << "Passed: " << test << std::endl; + if (!status) std::cout << "Failed: " << test << std::endl; passed = passed && status; if (!passed) break; } for (std::string test : tests_fp) { - std::cout << DEFAULT << "\n---------------------------------------\n"; + std::cout << "\n---------------------------------------\n"; std::cout << test << std::endl; @@ -114,18 +109,17 @@ int main(int argc, char **argv) { bool status = (1 == simulator.get_last_wb_value(3)); - if (status) std::cout << GREEN << "Test Passed: " << test << std::endl; - if (!status) std::cout << RED << "Test Failed: " << test << std::endl; - std::cout << DEFAULT; + if (status) std::cout << "Passed: " << test << std::endl; + if (!status) std::cout << "Failed: " << test << std::endl; passed = passed && status; if (!passed) break; } - std::cout << DEFAULT << "\n***************************************\n"; + std::cout << "\n***************************************\n"; - if (passed) std::cout << DEFAULT << "PASSED ALL TESTS\n"; - if (!passed) std::cout << DEFAULT << "Failed one or more tests\n"; + if (passed) std::cout << "PASSED ALL TESTS\n"; + if (!passed) std::cout << "Failed one or more tests\n"; return !passed; diff --git a/simX/Makefile b/simX/Makefile index ea1701ea..a4adc3fa 100644 --- a/simX/Makefile +++ b/simX/Makefile @@ -20,6 +20,9 @@ LIGHTW=-Wno-UNOPTFLAT -Wno-WIDTH DEB=--trace -DVL_DEBUG=1 EXE=--exe $(LIB_OBJS) +OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized" +OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized" + all: simX # simX: @@ -28,7 +31,7 @@ all: simX simX: verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW) $(DEB) - (cd obj_dir && make -j`nproc` -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1) + make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1 clean: rm -rf *~ \#* *.o *.a *.so include/*~ include/\#* simX.run obj_dir