From feca2db24e120119dd63bf07322a3a3e931851f6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 15 Sep 2021 04:50:45 -0700 Subject: [PATCH] critical path optimizations --- hw/rtl/cache/VX_nc_bypass.v | 12 ++++++---- hw/rtl/fp_cores/VX_fp_cvt.v | 22 +++++++---------- hw/rtl/libs/VX_priority_encoder.v | 40 ++++++++++++++----------------- hw/rtl/libs/VX_stream_arbiter.v | 25 +++++++++++-------- 4 files changed, 49 insertions(+), 50 deletions(-) diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index d307b448..ca6f6be0 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -107,6 +107,7 @@ module VX_nc_bypass #( wire [NUM_REQS-1:0] core_req_valid_in_nc; wire [NUM_REQS-1:0] core_req_nc_tids; wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid; + wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_valid; for (genvar i = 0; i < NUM_REQS; ++i) begin @@ -115,12 +116,13 @@ module VX_nc_bypass #( assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids; - VX_lzc #( + VX_priority_encoder #( .N (NUM_REQS) ) core_req_sel ( - .in_i (core_req_valid_in_nc), - .cnt_o (core_req_nc_tid), - .valid_o (core_req_nc_valid) + .data_in (core_req_valid_in_nc), + .index (core_req_nc_tid), + .onehot (core_req_nc_sel), + .valid_out (core_req_nc_valid) ); assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids; @@ -143,7 +145,7 @@ module VX_nc_bypass #( if (NUM_REQS > 1) begin for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_ready_in[i] = core_req_valid_in_nc[i] ? - (~mem_req_valid_in && mem_req_ready_out && (core_req_nc_tid == i)) : core_req_ready_out[i]; + (~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i]; end end else begin assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out; diff --git a/hw/rtl/fp_cores/VX_fp_cvt.v b/hw/rtl/fp_cores/VX_fp_cvt.v index 2362f65c..2f435f39 100644 --- a/hw/rtl/fp_cores/VX_fp_cvt.v +++ b/hw/rtl/fp_cores/VX_fp_cvt.v @@ -180,39 +180,35 @@ module VX_fp_cvt #( wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1; wire [LANES-1:0] of_before_round_s1; - for (genvar i = 0; i < LANES; ++i) begin - wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination + for (genvar i = 0; i < LANES; ++i) begin reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments reg of_before_round; - // Rebias the exponent - assign destination_exp = input_exp_s1[i] + EXP_BIAS; - always @(*) begin `IGNORE_WARNINGS_BEGIN // Default assignment - final_exp = destination_exp; // take exponent as is, only look at lower bits - preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter + final_exp = input_exp_s1[i] + EXP_BIAS; // take exponent as is, only look at lower bits + preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter denorm_shamt = 0; // right of mantissa of_before_round = 1'b0; // Handle INT casts if (is_itof_s1) begin - if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin + if ($signed(input_exp_s1[i]) >= $signed(2**EXP_BITS-1-EXP_BIAS)) begin // Overflow or infinities (for proper rounding) final_exp = (2**EXP_BITS-2); // largest normal value preshift_mant = ~0; // largest normal value and RS bits set of_before_round = 1'b1; - end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin + end else if ($signed(input_exp_s1[i]) < $signed(-MAN_BITS-EXP_BIAS)) begin // Limit the shift to retain sticky bits final_exp = 0; // denormal result - denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky - end else if ($signed(destination_exp) < $signed(1)) begin + denorm_shamt = (2 + MAN_BITS); // to sticky + end else if ($signed(input_exp_s1[i]) < $signed(1-EXP_BIAS)) begin // Denormalize underflowing values final_exp = 0; // denormal result - denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting + denorm_shamt = (1-EXP_BIAS) - input_exp_s1[i]; // adjust right shifting end end else begin if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin @@ -221,7 +217,7 @@ module VX_fp_cvt #( of_before_round = 1'b1; end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin // underflow - denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky + denorm_shamt = MAX_INT_WIDTH+1; // all bits go to the sticky end else begin // By default right shift mantissa to be an integer denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i]; diff --git a/hw/rtl/libs/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v index 15968a7c..ea5e27d7 100644 --- a/hw/rtl/libs/VX_priority_encoder.v +++ b/hw/rtl/libs/VX_priority_encoder.v @@ -46,17 +46,17 @@ module VX_priority_encoder #( .data_out (scan_lo) ); + VX_lzc #( + .N (N) + ) lzc ( + .in_i (reversed), + .cnt_o (index), + `UNUSED_PIN (valid_o) + ); + assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; assign valid_out = scan_lo[N-1]; - VX_onehot_encoder #( - .N (N) - ) onehot_encoder ( - .data_in (onehot), - .data_out (index), - `UNUSED_PIN (valid_out) - ); - end else if (MODEL == 2) begin `IGNORE_WARNINGS_BEGIN @@ -66,30 +66,26 @@ module VX_priority_encoder #( assign higher_pri_regs[0] = 1'b0; assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; - VX_onehot_encoder #( + VX_lzc #( .N (N) - ) onehot_encoder ( - .data_in (onehot), - .data_out (index), - `UNUSED_PIN (valid_out) + ) lzc ( + .in_i (reversed), + .cnt_o (index), + .valid_o (valid_out) ); - - assign valid_out = (| reversed); end else if (MODEL == 3) begin assign onehot = reversed & ~(reversed-1); - VX_onehot_encoder #( + VX_lzc #( .N (N) - ) onehot_encoder ( - .data_in (onehot), - .data_out (index), - `UNUSED_PIN (valid_out) + ) lzc ( + .in_i (reversed), + .cnt_o (index), + .valid_o (valid_out) ); - assign valid_out = (| reversed); - end else begin reg [LN-1:0] index_r; diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v index 1c89bb4f..faca5d66 100644 --- a/hw/rtl/libs/VX_stream_arbiter.v +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -25,6 +25,7 @@ module VX_stream_arbiter #( wire sel_valid; wire sel_ready; wire [LOG_NUM_REQS-1:0] sel_index; + wire [NUM_REQS-1:0] sel_onehot; wire [NUM_REQS-1:0] valid_in_any; wire [LANES-1:0] ready_in_sel; @@ -42,13 +43,17 @@ module VX_stream_arbiter #( end if (TYPE == "P") begin - `UNUSED_VAR (sel_ready) - VX_lzc #( - .N (NUM_REQS) + VX_fixed_arbiter #( + .NUM_REQS (NUM_REQS), + .LOCK_ENABLE (LOCK_ENABLE) ) sel_arb ( - .in_i (valid_in_any), - .cnt_o (sel_index), - .valid_o (sel_valid) + .clk (clk), + .reset (reset), + .requests (valid_in_any), + .enable (sel_ready), + .grant_valid (sel_valid), + .grant_index (sel_index), + .grant_onehot (sel_onehot) ); end else if (TYPE == "R") begin VX_rr_arbiter #( @@ -61,7 +66,7 @@ module VX_stream_arbiter #( .enable (sel_ready), .grant_valid (sel_valid), .grant_index (sel_index), - `UNUSED_PIN (grant_onehot) + .grant_onehot (sel_onehot) ); end else if (TYPE == "F") begin VX_fair_arbiter #( @@ -74,7 +79,7 @@ module VX_stream_arbiter #( .enable (sel_ready), .grant_valid (sel_valid), .grant_index (sel_index), - `UNUSED_PIN (grant_onehot) + .grant_onehot (sel_onehot) ); end else if (TYPE == "M") begin VX_matrix_arbiter #( @@ -87,7 +92,7 @@ module VX_stream_arbiter #( .enable (sel_ready), .grant_valid (sel_valid), .grant_index (sel_index), - `UNUSED_PIN (grant_onehot) + .grant_onehot (sel_onehot) ); end else begin $error ("invalid parameter"); @@ -109,7 +114,7 @@ module VX_stream_arbiter #( end for (genvar i = 0; i < NUM_REQS; i++) begin - assign ready_in[i] = ready_in_sel & {LANES{(sel_index == LOG_NUM_REQS'(i))}}; + assign ready_in[i] = ready_in_sel & {LANES{sel_onehot[i]}}; end for (genvar i = 0; i < LANES; ++i) begin