From 0a0b28aac0a5a411b25e326562c3db8780ed6c11 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 29 Aug 2020 05:14:08 -0700 Subject: [PATCH] minor update - 206-214 mhz --- hw/rtl/VX_alu_unit.v | 4 +-- hw/rtl/VX_commit.v | 2 +- hw/rtl/VX_define.vh | 2 +- hw/rtl/VX_fpu_unit.v | 2 +- hw/rtl/VX_gpr_bypass.v | 4 ++- hw/rtl/VX_gpr_fp_ctrl.v | 7 ++-- hw/rtl/VX_gpr_stage.v | 8 ++--- hw/rtl/VX_ibuffer.v | 19 ++++++---- hw/rtl/VX_lsu_unit.v | 2 +- hw/rtl/VX_mul_unit.v | 2 +- hw/rtl/VX_platform.vh | 8 +---- hw/rtl/VX_writeback.v | 64 ++++++++++++++++----------------- hw/rtl/cache/VX_snp_forwarder.v | 2 +- hw/rtl/fp_cores/VX_fp_noncomp.v | 44 +++++++++++------------ hw/rtl/interfaces/VX_issue_if.v | 38 -------------------- hw/rtl/libs/VX_skid_buffer.v | 4 ++- 16 files changed, 90 insertions(+), 122 deletions(-) delete mode 100644 hw/rtl/interfaces/VX_issue_if.v diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index b813c882..935ad44f 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -72,9 +72,9 @@ module VX_alu_unit #( for (genvar i = 0; i < `NUM_THREADS; i++) begin always @(*) begin case (alu_op_class) - 0: alu_result[i] = is_sub ? sub_result[i][31:0] : add_result[i]; + 0: alu_result[i] = add_result[i]; 1: alu_result[i] = {31'b0, sub_result[i][32]}; - 2: alu_result[i] = shift_result[i]; + 2: alu_result[i] = is_sub ? sub_result[i][31:0] : shift_result[i]; default: alu_result[i] = misc_result[i]; endcase end diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index d1027dc8..2afb1f4a 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -111,7 +111,7 @@ module VX_commit #( end end `else - `UNUSED_FIELD(fpu_commit_if, curr_PC) + `UNUSED_VAR(fpu_commit_if.curr_PC) `endif endmodule diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 5bf9733e..66017c93 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -111,13 +111,13 @@ `define MOD_BITS 3 `define ALU_ADD 4'b0000 -`define ALU_SUB 4'b0001 `define ALU_LUI 4'b0010 `define ALU_AUIPC 4'b0011 `define ALU_SLTU 4'b0100 `define ALU_SLT 4'b0101 `define ALU_SRL 4'b1000 `define ALU_SRA 4'b1001 +`define ALU_SUB 4'b1011 `define ALU_AND 4'b1100 `define ALU_OR 4'b1101 `define ALU_XOR 4'b1110 diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index e93b5543..bdbcd7bf 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -38,7 +38,7 @@ module VX_fpu_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .SIZE (`FPUQ_SIZE) - ) mul_queue ( + ) fpu_cam ( .clk (clk), .reset (reset), .acquire_slot (fpuq_push), diff --git a/hw/rtl/VX_gpr_bypass.v b/hw/rtl/VX_gpr_bypass.v index b3d2a67c..65d13339 100644 --- a/hw/rtl/VX_gpr_bypass.v +++ b/hw/rtl/VX_gpr_bypass.v @@ -35,7 +35,9 @@ module VX_gpr_bypass #( if (reset) begin delayed_push <= 0; use_buffer <= 0; - use_buffer2 <= 0; + use_buffer2 <= 0; + buffer <= 0; + buffer2 <= 0; end else begin delayed_push <= push; assert(!use_buffer2 || use_buffer); diff --git a/hw/rtl/VX_gpr_fp_ctrl.v b/hw/rtl/VX_gpr_fp_ctrl.v index f0c176c7..f38126d5 100644 --- a/hw/rtl/VX_gpr_fp_ctrl.v +++ b/hw/rtl/VX_gpr_fp_ctrl.v @@ -23,8 +23,11 @@ module VX_gpr_fp_ctrl ( always @(posedge clk) begin if (reset) begin - read_rs3 <= 0; - rs3_wid <= 0; + read_rs3 <= 0; + rs3_wid <= 0; + rs1_tmp_data <= 0; + rs2_tmp_data <= 0; + rs3_tmp_data <= 0; end else begin if (rs3_delay) begin read_rs3 <= 1; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index acb2fa08..86f17ad4 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -52,10 +52,10 @@ module VX_gpr_stage #( assign gpr_read_if.rs3_data = 0; assign gpr_read_if.ready_in = 1; - `UNUSED_FIELD (gpr_read_if, valid); - `UNUSED_FIELD (gpr_read_if, use_rs3); - `UNUSED_FIELD (gpr_read_if, rs3); - `UNUSED_FIELD (gpr_read_if, ready_out); + `UNUSED_VAR (gpr_read_if.valid); + `UNUSED_VAR (gpr_read_if.use_rs3); + `UNUSED_VAR (gpr_read_if.rs3); + `UNUSED_VAR (gpr_read_if.ready_out); `endif assign writeback_if.ready = 1'b1; diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 96dc15b6..55478fbc 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -83,6 +83,8 @@ module VX_ibuffer #( reg [`NW_BITS-1:0] deq_wid, deq_wid_n; reg deq_valid, deq_valid_n; reg [DATAW-1:0] deq_instr, deq_instr_n; + + reg [DATAW-1:0] q_data_prev_r, q_data_out_r; always @(*) begin valid_table_n = valid_table; @@ -94,6 +96,8 @@ module VX_ibuffer #( end end + // schedule the next instruction to issue + // does round-robin scheduling when multiple warps are present always @(*) begin deq_valid_n = 0; deq_wid_n = 'x; @@ -108,7 +112,7 @@ module VX_ibuffer #( end else if ((1 == num_warps) || freeze) begin deq_valid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) || enq_fire; deq_wid_n = (!deq_fire || (q_size[deq_wid] != SIZEW'(1))) ? deq_wid : ibuf_enq_if.wid; - deq_instr_n = deq_fire ? ((q_size[deq_wid] != SIZEW'(1)) ? q_data_prev[deq_wid] : q_data_in) : q_data_out[deq_wid]; + deq_instr_n = deq_fire ? ((q_size[deq_wid] != SIZEW'(1)) ? q_data_prev_r : q_data_in) : q_data_out_r; end else begin for (integer i = 0; i < `NUM_WARPS; i++) begin if (schedule_table_n[i]) begin @@ -130,9 +134,9 @@ module VX_ibuffer #( valid_table <= 0; schedule_table <= 0; deq_valid <= 0; - num_warps <= 0; + num_warps <= 0; end else begin - valid_table <= valid_table_n; + valid_table <= valid_table_n; if ((| schedule_table_n)) begin schedule_table <= schedule_table_n; @@ -141,9 +145,12 @@ module VX_ibuffer #( schedule_table[deq_wid_n] <= 0; end - deq_valid <= deq_valid_n; - deq_wid <= deq_wid_n; - deq_instr <= deq_instr_n; + q_data_out_r <= (0 == num_warps) ? q_data_in : q_data_out[deq_wid_n]; + q_data_prev_r <= q_data_prev[deq_wid_n]; + + deq_valid <= deq_valid_n; + deq_wid <= deq_wid_n; + deq_instr <= deq_instr_n; if (warp_added && !warp_removed) begin num_warps <= num_warps + NWARPSW'(1); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 575fa204..0f68f381 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -110,7 +110,7 @@ module VX_lsu_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .SIZE (`LSUQ_SIZE) - ) lsu_queue ( + ) lsu_cam ( .clk (clk), .reset (reset), .write_addr (req_tag), diff --git a/hw/rtl/VX_mul_unit.v b/hw/rtl/VX_mul_unit.v index 770d92e7..50f07644 100644 --- a/hw/rtl/VX_mul_unit.v +++ b/hw/rtl/VX_mul_unit.v @@ -35,7 +35,7 @@ module VX_mul_unit #( VX_cam_buffer #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .SIZE (`MULQ_SIZE) - ) mul_queue ( + ) mul_cam ( .clk (clk), .reset (reset), .acquire_slot (mulq_push), diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 7e59af79..3f158408 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -31,13 +31,7 @@ /* verilator lint_on UNDRIVEN */ \ /* verilator lint_on DECLFILENAME */ -`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \ - wire [$bits(x)-1:0] __``x``__ = x; \ - /* verilator lint_on UNUSED */ - -`define UNUSED_FIELD(x,y) /* verilator lint_off UNUSED */ \ - wire [$bits(x.y)-1:0] __``y``__ = x.y; \ - /* verilator lint_on UNUSED */ +`define UNUSED_VAR(x) always @(x) begin end `define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \ . x () \ diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index ee24cb88..f59109a3 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -25,44 +25,44 @@ module VX_writeback #( wire wb_valid; wire [`NW_BITS-1:0] wb_wid; - wire [`NUM_THREADS-1:0] wb_thread_mask; + wire [`NUM_THREADS-1:0] wb_tmask; wire [`NR_BITS-1:0] wb_rd; wire [`NUM_THREADS-1:0][31:0] wb_data; - assign wb_valid = alu_valid ? alu_commit_if.valid : - lsu_valid ? lsu_commit_if.valid : - csr_valid ? csr_commit_if.valid : - mul_valid ? mul_commit_if.valid : - fpu_valid ? fpu_commit_if.valid : - 0; + assign wb_valid = alu_valid ? alu_commit_if.valid : + lsu_valid ? lsu_commit_if.valid : + csr_valid ? csr_commit_if.valid : + mul_valid ? mul_commit_if.valid : + fpu_valid ? fpu_commit_if.valid : + 0; - assign wb_wid = alu_valid ? alu_commit_if.wid : - lsu_valid ? lsu_commit_if.wid : - csr_valid ? csr_commit_if.wid : - mul_valid ? mul_commit_if.wid : - fpu_valid ? fpu_commit_if.wid : - 0; + assign wb_wid = alu_valid ? alu_commit_if.wid : + lsu_valid ? lsu_commit_if.wid : + csr_valid ? csr_commit_if.wid : + mul_valid ? mul_commit_if.wid : + fpu_valid ? fpu_commit_if.wid : + 0; - assign wb_thread_mask = alu_valid ? alu_commit_if.thread_mask : - lsu_valid ? lsu_commit_if.thread_mask : - csr_valid ? csr_commit_if.thread_mask : - mul_valid ? mul_commit_if.thread_mask : - fpu_valid ? fpu_commit_if.thread_mask : - 0; + assign wb_tmask = alu_valid ? alu_commit_if.thread_mask : + lsu_valid ? lsu_commit_if.thread_mask : + csr_valid ? csr_commit_if.thread_mask : + mul_valid ? mul_commit_if.thread_mask : + fpu_valid ? fpu_commit_if.thread_mask : + 0; - assign wb_rd = alu_valid ? alu_commit_if.rd : - lsu_valid ? lsu_commit_if.rd : - csr_valid ? csr_commit_if.rd : - mul_valid ? mul_commit_if.rd : - fpu_valid ? fpu_commit_if.rd : - 0; + assign wb_rd = alu_valid ? alu_commit_if.rd : + lsu_valid ? lsu_commit_if.rd : + csr_valid ? csr_commit_if.rd : + mul_valid ? mul_commit_if.rd : + fpu_valid ? fpu_commit_if.rd : + 0; - assign wb_data = alu_valid ? alu_commit_if.data : - lsu_valid ? lsu_commit_if.data : - csr_valid ? csr_commit_if.data : - mul_valid ? mul_commit_if.data : - fpu_valid ? fpu_commit_if.data : - 0; + assign wb_data = alu_valid ? alu_commit_if.data : + lsu_valid ? lsu_commit_if.data : + csr_valid ? csr_commit_if.data : + mul_valid ? mul_commit_if.data : + fpu_valid ? fpu_commit_if.data : + 0; wire stall = ~writeback_if.ready && writeback_if.valid; @@ -73,7 +73,7 @@ module VX_writeback #( .reset (reset), .stall (stall), .flush (1'b0), - .in ({wb_valid, wb_wid, wb_thread_mask, wb_rd, wb_data}), + .in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}), .out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data}) ); diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 14df2031..d1a7433a 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -62,7 +62,7 @@ module VX_snp_forwarder #( VX_cam_buffer #( .DATAW (`DRAM_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH), .SIZE (SNRQ_SIZE) - ) snp_fwd_buffer ( + ) snp_fwd_cam ( .clk (clk), .reset (reset), .write_addr (sfq_write_addr), diff --git a/hw/rtl/fp_cores/VX_fp_noncomp.v b/hw/rtl/fp_cores/VX_fp_noncomp.v index 45f7335b..ac30b1b6 100644 --- a/hw/rtl/fp_cores/VX_fp_noncomp.v +++ b/hw/rtl/fp_cores/VX_fp_noncomp.v @@ -85,30 +85,28 @@ module VX_fp_noncomp #( wire tmp_a_smaller = $signed(dataa[i]) < $signed(datab[i]); wire tmp_ab_equal = (dataa[i] == datab[i]) | (tmp_a_type[4] & tmp_b_type[4]); - always @(posedge clk) begin - if (~stall) begin - a_sign[i] <= tmp_a_sign; - b_sign[i] <= tmp_b_sign; - a_exponent[i] <= tmp_a_exponent; - b_exponent[i] <= tmp_b_exponent; - a_mantissa[i] <= tmp_a_mantissa; - b_mantissa[i] <= tmp_b_mantissa; - a_type[i] <= tmp_a_type; - b_type[i] <= tmp_b_type; - a_smaller[i] <= tmp_a_smaller; - ab_equal[i] <= tmp_ab_equal; - end - end - end + VX_generic_register #( + .N(1 + 1 + 8 + 8 + 23 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1) + ) fnc1_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_b_exponent, tmp_a_mantissa, tmp_b_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), + .out ({a_sign[i], b_sign[i], a_exponent[i], b_exponent[i], a_mantissa[i], b_mantissa[i], a_type[i], b_type[i], a_smaller[i], ab_equal[i]}) + ); + end - always @(posedge clk) begin - if (~stall) begin - op_type_r <= op_type; - frm_r <= frm; - dataa_r <= dataa; - datab_r <= datab; - end - end + VX_generic_register #( + .N(`FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32)) + ) fnc2_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({op_type, frm, dataa, datab}), + .out ({op_type_r, frm_r, dataa_r, datab_r}) + ); // FCLASS for (genvar i = 0; i < LANES; i++) begin diff --git a/hw/rtl/interfaces/VX_issue_if.v b/hw/rtl/interfaces/VX_issue_if.v deleted file mode 100644 index e3757ce1..00000000 --- a/hw/rtl/interfaces/VX_issue_if.v +++ /dev/null @@ -1,38 +0,0 @@ -`ifndef VX_ISSUE_IF -`define VX_ISSUE_IF - -`include "VX_define.vh" - -interface VX_issue_if (); - - wire valid; - - wire [`ITAG_BITS-1:0] issue_tag; - wire [`NW_BITS-1:0] wid; - wire [`NUM_THREADS-1:0] thread_mask; - wire [31:0] curr_PC; - - wire [`EX_BITS-1:0] ex_type; - wire [`OP_BITS-1:0] op_type; - - wire [`FRM_BITS-1:0] frm; - - wire wb; - - wire [`NR_BITS-1:0] rd; - - wire [`NUM_THREADS-1:0][31:0] rs1_data; - wire [`NUM_THREADS-1:0][31:0] rs2_data; - wire [`NUM_THREADS-1:0][31:0] rs3_data; - - wire [`NR_BITS-1:0] rs1; - wire [31:0] imm; - - wire rs1_is_PC; - wire rs2_is_imm; - - wire [1NT_BITS-1:0] tid; - -endinterface - -`endif \ No newline at end of file diff --git a/hw/rtl/libs/VX_skid_buffer.v b/hw/rtl/libs/VX_skid_buffer.v index a8290f30..d179ed31 100644 --- a/hw/rtl/libs/VX_skid_buffer.v +++ b/hw/rtl/libs/VX_skid_buffer.v @@ -18,7 +18,9 @@ module VX_skid_buffer #( reg use_buffer; always @(posedge clk) begin - if (reset) begin + if (reset) begin + data_out_r <= 0; + buffer <= 0; use_buffer <= 0; valid_out_r <= 0; end else begin