diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 8eeaa44e..3bc7e4e7 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -13,7 +13,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE -DBG_FLAGS += $(DBG_PRINT_FLAGS) +#DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 diff --git a/hw/opae/gen_sources.sh b/hw/opae/gen_sources.sh index ae9df189..3f6b5f42 100755 --- a/hw/opae/gen_sources.sh +++ b/hw/opae/gen_sources.sh @@ -2,11 +2,6 @@ dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores ../rtl/fp_cores/altera ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src' -inc_list="" -for dir in $dir_list; do - inc_list="$inc_list -I$dir" -done - # read design sources for dir in $dir_list; do echo "+incdir+$dir" diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 36a1ce57..39037f51 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -65,11 +65,6 @@ `define MULRQ_SIZE 8 `endif -// Size of FPU Request Queue Size -`ifndef FPURQ_SIZE -`define FPURQ_SIZE 8 -`endif - // Size of issue queue `ifndef ISSUEQ_SIZE `define ISSUEQ_SIZE (8 + `NUM_WARPS) diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 983f520f..ecb1038a 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -237,7 +237,6 @@ module VX_decode #( wire is_fl = 0; wire is_fs = 0; wire is_fci = 0; - wire is_fcmp = 0; wire is_fcvti = 0; wire is_fcvtf = 0; wire is_fmvcls = 0; @@ -287,10 +286,6 @@ module VX_decode #( wire use_rs3 = is_fr4; - wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS)); - wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX))); - wire rs2_is_fp = is_fs || is_fr4 || is_fci; - wire [4:0] rs1_qual = is_lui ? 5'h0 : rs1; /////////////////////////////////////////////////////////////////////////// @@ -324,6 +319,11 @@ module VX_decode #( assign decode_tmp_if.wb = use_rd; `ifdef EXT_F_ENABLE + + wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS)); + wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX))); + wire rs2_is_fp = is_fs || is_fr4 || is_fci; + assign decode_tmp_if.rd = {rd_is_fp, rd}; assign decode_tmp_if.rs1 = {rs1_is_fp, rs1_qual}; assign decode_tmp_if.rs2 = {rs2_is_fp, rs2}; diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index ca609d31..3ace93f6 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -110,11 +110,7 @@ module VX_execute #( assign fpu_commit_if.issue_tag = 0; assign fpu_commit_if.data = 0; assign fpu_commit_if.has_fflags = 0; - assign fpu_commit_if.fflags_NV = 0; - assign fpu_commit_if.fflags_DZ = 0; - assign fpu_commit_if.fflags_OF = 0; - assign fpu_commit_if.fflags_UF = 0; - assign fpu_commit_if.fflags_NX = 0; + assign fpu_commit_if.fflags = 0; `endif VX_gpu_unit #( diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v index 1a87a5c3..55e3873f 100644 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ b/hw/rtl/VX_gpr_fp_ctrl.v @@ -10,7 +10,7 @@ module VX_gpr_fp_ctrl ( input wire [`NUM_THREADS-1:0][31:0] rs2_data, // outputs - output wire [`NR_BITS-1:0] raddr1, + output wire [`NW_BITS+`NR_BITS-1:0] raddr1, VX_gpr_read_if gpr_read_if ); @@ -18,32 +18,30 @@ module VX_gpr_fp_ctrl ( reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data; reg read_rs3; - wire gpr_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3; + wire delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3; - wire gpr_fire = gpr_read_if.valid && gpr_read_if.ready; + wire read_fire = gpr_read_if.valid && gpr_read_if.out_ready; always @(posedge clk) begin if (reset) begin read_rs3 <= 0; - end else if (gpr_delay) begin + end else if (delay) begin read_rs3 <= 1; - end else if (gpr_fire) begin + end else if (read_fire) begin read_rs3 <= 0; end end // backup original rs1 data always @(posedge clk) begin - if (gpr_delay) begin + if (delay) begin tmp_rs1_data <= rs1_data; end end // outputs - - assign raddr1 = read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1; - - assign gpr_read_if.ready = ~gpr_delay; + assign raddr1 = {gpr_read_if.warp_num, (read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1)}; + assign gpr_read_if.in_ready = ~delay; assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data; assign gpr_read_if.rs2_data = rs2_data; assign gpr_read_if.rs3_data = rs1_data; diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index a9f3e50c..00c5f1f0 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -3,26 +3,28 @@ module VX_gpr_ram ( input wire clk, input wire [`NUM_THREADS-1:0] we, - input wire [`NR_BITS-1:0] waddr, + input wire [`NW_BITS+`NR_BITS-1:0] waddr, input wire [`NUM_THREADS-1:0][31:0] wdata, - input wire [`NR_BITS-1:0] rs1, - input wire [`NR_BITS-1:0] rs2, + input wire [`NW_BITS+`NR_BITS-1:0] rs1, + input wire [`NW_BITS+`NR_BITS-1:0] rs2, output wire [`NUM_THREADS-1:0][31:0] rs1_data, output wire [`NUM_THREADS-1:0][31:0] rs2_data ); `ifndef ASIC - reg [`NUM_THREADS-1:0][3:0][7:0] ram [`NUM_REGS-1:0]; + reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0]; - integer i; + integer i, j; initial begin // initialize r0 to 0 - for (i = 0; i < `NUM_THREADS; i++) begin - ram[0][i][0] = 0; - ram[0][i][1] = 0; - ram[0][i][2] = 0; - ram[0][i][3] = 0; + for (j = 0; j < `NUM_WARPS; j++) begin + for (i = 0; i < `NUM_THREADS; i++) begin + ram[j * `NUM_REGS][i][0] = 8'h0; + ram[j * `NUM_REGS][i][1] = 8'h0; + ram[j * `NUM_REGS][i][2] = 8'h0; + ram[j * `NUM_REGS][i][3] = 8'h0; + end end end diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 06e7d344..0b1870b1 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -14,52 +14,43 @@ module VX_gpr_stage #( ); `UNUSED_VAR (reset) - wire [`NUM_THREADS-1:0][31:0] rs1_data [`NUM_WARPS-1:0]; - wire [`NUM_THREADS-1:0][31:0] rs2_data [`NUM_WARPS-1:0]; + wire [`NUM_THREADS-1:0][31:0] rs1_data; + wire [`NUM_THREADS-1:0][31:0] rs2_data; + wire [`NW_BITS+`NR_BITS-1:0] raddr1; - wire [`NR_BITS-1:0] raddr1; - - genvar i; - - for (i = 0; i < `NUM_WARPS; i++) begin - wire [`NUM_THREADS-1:0] we = writeback_if.thread_mask - & {`NUM_THREADS{writeback_if.valid && (i == writeback_if.warp_num)}}; - VX_gpr_ram gpr_int_ram ( - .clk (clk), - .we (we), - .waddr (writeback_if.rd), - .wdata (writeback_if.data), - .rs1 (raddr1), - .rs2 (gpr_read_if.rs2), - .rs1_data (rs1_data[i]), - .rs2_data (rs2_data[i]) - ); - end + VX_gpr_ram gpr_int_ram ( + .clk (clk), + .we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.thread_mask), + .waddr ({writeback_if.warp_num, writeback_if.rd}), + .wdata (writeback_if.data), + .rs1 (raddr1), + .rs2 ({gpr_read_if.warp_num, gpr_read_if.rs2}), + .rs1_data (rs1_data), + .rs2_data (rs2_data) + ); `ifdef EXT_F_ENABLE VX_gpr_fp_ctrl VX_gpr_fp_ctrl ( .clk (clk), .reset (reset), - - //inputs - .rs1_data (rs1_data[gpr_read_if.warp_num]), - .rs2_data (rs2_data[gpr_read_if.warp_num]), - - // outputs + .rs1_data (rs1_data), + .rs2_data (rs2_data), .raddr1 (raddr1), .gpr_read_if(gpr_read_if) ); `else - assign raddr1 = gpr_read_if.rs1; - assign gpr_read_if.rs1_data = rs1_data[gpr_read_if.warp_num]; - assign gpr_read_if.rs2_data = rs2_data[gpr_read_if.warp_num]; + assign raddr1 = {gpr_read_if.warp_num, gpr_read_if.rs1}; + assign gpr_read_if.rs1_data = rs1_data; + assign gpr_read_if.rs2_data = rs2_data; assign gpr_read_if.rs3_data = 0; - assign gpr_read_if.ready = 1; + assign gpr_read_if.in_ready = 1; wire valid = gpr_read_if.valid; + wire out_ready = gpr_read_if.out_ready; wire use_rs3 = gpr_read_if.use_rs3; wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3; `UNUSED_VAR (valid); + `UNUSED_VAR (out_ready); `UNUSED_VAR (use_rs3); `UNUSED_VAR (rs3); `endif diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 78b8d2d2..7bf96889 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -24,10 +24,11 @@ module VX_issue #( assign gpr_read_if.rs2 = decode_if.rs2; assign gpr_read_if.rs3 = decode_if.rs3; assign gpr_read_if.use_rs3 = decode_if.use_rs3; + assign gpr_read_if.out_ready = decode_if.ready; wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag; - wire gpr_busy = ~gpr_read_if.ready; + wire gpr_busy = ~gpr_read_if.in_ready; wire alu_busy = ~alu_req_if.ready; wire lsu_busy = ~lsu_req_if.ready; wire csr_busy = ~csr_req_if.ready; @@ -63,7 +64,7 @@ module VX_issue #( ); VX_decode_if decode_tmp_if(); - VX_gpr_read_if gpr_data_tmp_if(); + VX_gpr_read_if gpr_read_tmp_if(); wire stall = ~alu_req_if.ready || ~decode_if.ready; wire flush = alu_req_if.ready && ~decode_if.ready; @@ -75,13 +76,13 @@ module VX_issue #( .reset (reset), .stall (stall), .flush (flush), - .in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}), - .out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data}) + .in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}), + .out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data}) ); VX_issue_demux issue_demux ( .decode_if (decode_tmp_if), - .gpr_read_if (gpr_data_tmp_if), + .gpr_read_if (gpr_read_tmp_if), .issue_tag (issue_tmp_tag), .alu_req_if (alu_req_if), .lsu_req_if (lsu_req_if), diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 051fe41c..8519b9f7 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -19,14 +19,15 @@ module VX_writeback #( VX_wb_if writeback_if ); - reg [`NUM_THREADS-1:0][31:0] wb_data [`ISSUEQ_SIZE-1:0]; - reg [`NW_BITS-1:0] wb_warp_num [`ISSUEQ_SIZE-1:0]; - reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0]; - reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0]; - reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0]; + reg [`NUM_THREADS-1:0][31:0] wb_data_table [`ISSUEQ_SIZE-1:0]; + reg [`NW_BITS-1:0] wb_warp_num_table [`ISSUEQ_SIZE-1:0]; + reg [`NUM_THREADS-1:0] wb_thread_mask_table [`ISSUEQ_SIZE-1:0]; + reg [31:0] wb_curr_PC_table [`ISSUEQ_SIZE-1:0]; + reg [`NR_BITS-1:0] wb_rd_table [`ISSUEQ_SIZE-1:0]; reg [`ISSUEQ_SIZE-1:0] wb_pending; reg [`ISSUEQ_SIZE-1:0] wb_pending_n; + reg [`ISTAG_BITS-1:0] wb_index; wire [`ISTAG_BITS-1:0] wb_index_n; @@ -72,39 +73,43 @@ module VX_writeback #( wb_valid <= 0; end else begin if (alu_commit_if.valid) begin - wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data; - wb_warp_num [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num; - wb_thread_mask [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask; - wb_curr_PC [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC; - wb_rd [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd; + wb_data_table [alu_commit_if.issue_tag] <= alu_commit_if.data; + wb_warp_num_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num; + wb_thread_mask_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask; + wb_curr_PC_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC; + wb_rd_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd; end + if (lsu_commit_if.valid) begin - wb_data [lsu_commit_if.issue_tag] <= lsu_commit_if.data; - wb_warp_num [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num; - wb_thread_mask [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask; - wb_curr_PC [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC; - wb_rd [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd; + wb_data_table [lsu_commit_if.issue_tag] <= lsu_commit_if.data; + wb_warp_num_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num; + wb_thread_mask_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask; + wb_curr_PC_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC; + wb_rd_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd; end + if (csr_commit_if.valid) begin - wb_data [csr_commit_if.issue_tag] <= csr_commit_if.data; - wb_warp_num [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num; - wb_thread_mask [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask; - wb_curr_PC [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC; - wb_rd [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd; + wb_data_table [csr_commit_if.issue_tag] <= csr_commit_if.data; + wb_warp_num_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num; + wb_thread_mask_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask; + wb_curr_PC_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC; + wb_rd_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd; end + if (mul_commit_if.valid) begin - wb_data [mul_commit_if.issue_tag] <= mul_commit_if.data; - wb_warp_num [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num; - wb_thread_mask [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask; - wb_curr_PC [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC; - wb_rd [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd; + wb_data_table [mul_commit_if.issue_tag] <= mul_commit_if.data; + wb_warp_num_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num; + wb_thread_mask_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask; + wb_curr_PC_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC; + wb_rd_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd; end + if (fpu_commit_if.valid) begin - wb_data [fpu_commit_if.issue_tag] <= fpu_commit_if.data; - wb_warp_num [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num; - wb_thread_mask [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask; - wb_curr_PC [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC; - wb_rd [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd; + wb_data_table [fpu_commit_if.issue_tag] <= fpu_commit_if.data; + wb_warp_num_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num; + wb_thread_mask_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask; + wb_curr_PC_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC; + wb_rd_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd; end wb_pending <= wb_pending_n; @@ -115,11 +120,11 @@ module VX_writeback #( // writeback request assign writeback_if.valid = wb_valid; - assign writeback_if.warp_num = wb_warp_num [wb_index]; - assign writeback_if.thread_mask = wb_thread_mask [wb_index]; - assign writeback_if.curr_PC = wb_curr_PC [wb_index]; - assign writeback_if.rd = wb_rd [wb_index]; - assign writeback_if.data = wb_data [wb_index]; + assign writeback_if.warp_num = wb_warp_num_table [wb_index]; + assign writeback_if.thread_mask = wb_thread_mask_table [wb_index]; + assign writeback_if.curr_PC = wb_curr_PC_table [wb_index]; + assign writeback_if.rd = wb_rd_table [wb_index]; + assign writeback_if.data = wb_data_table [wb_index]; // commit back-pressure assign alu_commit_if.ready = 1'b1; diff --git a/hw/rtl/fp_cores/VX_fp_fpga.v b/hw/rtl/fp_cores/VX_fp_fpga.v index 0412a268..01c65105 100644 --- a/hw/rtl/fp_cores/VX_fp_fpga.v +++ b/hw/rtl/fp_cores/VX_fp_fpga.v @@ -38,8 +38,8 @@ module VX_fp_fpga ( VX_fpnew #( .FMULADD (0), - .FDIVSQRT (1), - .FNONCOMP (1), + .FDIVSQRT (0), + .FNONCOMP (0), .FCONV (1) ) fp_core ( .clk (clk), @@ -67,13 +67,15 @@ module VX_fp_fpga ( .out_valid (fpnew_out_valid) ); - acl_fp_add fp_add ( - .clock (clk), - .dataa (dataa), - .datab (datab), - .enable (add_out_ready), - .result (add_result) - ); + for (i = 0; i < `NUM_THREADS; i++) begin + acl_fp_add fp_add ( + .clock (clk), + .dataa (dataa), + .datab (datab), + .enable (add_out_ready), + .result (add_result[i]) + ); + end assign in_reqady = fpnew_in_ready; assign has_fflags = fpnew_has_fflags; @@ -81,6 +83,8 @@ module VX_fp_fpga ( assign out_tag = fpnew_out_tag; assign fpnew_out_ready = out_ready; + assign add_out_ready = out_ready; + assign result = fpnew_out_valid ? fpnew_result : add_result; assign out_valid = fpnew_out_valid; diff --git a/hw/rtl/fp_cores/VX_fpnew.v b/hw/rtl/fp_cores/VX_fpnew.v index 1df511e4..71ad066d 100644 --- a/hw/rtl/fp_cores/VX_fpnew.v +++ b/hw/rtl/fp_cores/VX_fpnew.v @@ -66,7 +66,7 @@ module VX_fpnew #( wire fpu_in_ready, fpu_in_valid; wire fpu_out_ready, fpu_out_valid; - reg [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag; + reg [`ISTAG_BITS-1:0] fpu_in_tag, fpu_out_tag; reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands; @@ -138,7 +138,7 @@ module VX_fpnew #( fpnew_top #( .Features (FPU_FEATURES), .Implementation (FPU_IMPLEMENTATION), - .TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0]) + .TagType (logic[`ISTAG_BITS+1+1-1:0]) ) fpnew_core ( .clk_i (clk), .rst_ni (1'b1), diff --git a/hw/rtl/interfaces/VX_gpr_read_if.v b/hw/rtl/interfaces/VX_gpr_read_if.v index 9b24ce56..27310af2 100644 --- a/hw/rtl/interfaces/VX_gpr_read_if.v +++ b/hw/rtl/interfaces/VX_gpr_read_if.v @@ -19,7 +19,8 @@ interface VX_gpr_read_if (); wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs3_data; - wire ready; + wire in_ready; + wire out_ready; endinterface diff --git a/hw/scripts/gen_config.py b/hw/scripts/gen_config.py index 426c83a8..0f80b093 100755 --- a/hw/scripts/gen_config.py +++ b/hw/scripts/gen_config.py @@ -58,13 +58,12 @@ if args.outc != 'none': translation_rules = [ (re.compile(r'^$'), r''), - (re.compile(r'^( *)`ifndef ([^ ]+)$'), r'\1#ifndef \2'), - (re.compile(r'^( *)`define ([^ ]+)$'), r'\1#define \2'), - # (re.compile(r'^( *)`include "\./VX_define_synth\.v"$'), r'\1#include "VX_define_synth.h"'), - (re.compile(r'^( *)`include "VX_user_config\.vh"$'), r''), - (re.compile(r'^( *)`define ([^ ]+) (.+)$'), r'\1#define \2 \3'), - (re.compile(r'^( *)`endif$'), r'\1#endif'), - (re.compile(r'^( *)// (.*)$'), r'\1// \2'), + (re.compile(r'^(\s*)`ifndef\s+([^ ]+)'), r'\1#ifndef \2'), + (re.compile(r'^(\s*)`define\s+([^ ]+)'), r'\1#define \2'), + (re.compile(r'^(\s*)`include "VX_user_config\.vh"'), r''), + (re.compile(r'^(\s*)`define\s+([^ ]+) (.+)'), r'\1#define \2 \3'), + (re.compile(r'^(\s*)`endif\s+'), r'\1#endif'), + (re.compile(r'^(\s*)//(.*)'), r'\1// \2'), ] post_rules = [ @@ -78,7 +77,6 @@ post_rules = [ (re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'), ] - def post_process_line(line): for pat, repl in post_rules: line = pat.sub(repl, line) diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index ea6d87a1..7788d0b6 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -14,7 +14,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE -DBG_FLAGS += $(DBG_PRINT_FLAGS) +#DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 1c30a870..ab4846cd 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -25,7 +25,6 @@ Simulator::Simulator() { #ifdef VCD_OUTPUT Verilated::traceEverOn(true); trace_ = new VerilatedVcdC(); - trace_->set_time_unit("1ns"); vortex_->trace(trace_, 99); trace_->open("trace.vcd"); #endif diff --git a/hw/simulate/testbench.cpp b/hw/simulate/testbench.cpp index 4cb89a0e..c68062c8 100644 --- a/hw/simulate/testbench.cpp +++ b/hw/simulate/testbench.cpp @@ -3,9 +3,10 @@ #include #include +#define ALL_TESTS + int main(int argc, char **argv) { if (argc == 1) { -#define ALL_TESTS #ifdef ALL_TESTS bool passed = true; @@ -137,12 +138,7 @@ int main(int argc, char **argv) { simulator.load_ihex(test); simulator.run(); - bool status = (1 == simulator.get_last_wb_value(3)); - - if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl; - if (!status) std::cerr << RED << "Test Failed: " << test << std::endl; - - return !status; + return 0; #endif