From 7e93d253f250184ca6d5331e4786a2d6bcb3fd8f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 10 Jan 2021 22:03:23 -0800 Subject: [PATCH] minor update --- hw/rtl/fp_cores/VX_fp_cvt.v | 13 +++-- hw/rtl/fp_cores/VX_fp_div.v | 22 ++++++++ hw/rtl/fp_cores/VX_fp_fma.v | 22 ++++++++ hw/rtl/fp_cores/VX_fp_ncomp.v | 6 ++- hw/rtl/fp_cores/VX_fp_rounding.v | 39 +++++++------- hw/rtl/fp_cores/VX_fp_sqrt.v | 22 ++++++++ hw/rtl/libs/VX_lzc.v | 89 ++++++++++++++++++++++++++------ 7 files changed, 174 insertions(+), 39 deletions(-) diff --git a/hw/rtl/fp_cores/VX_fp_cvt.v b/hw/rtl/fp_cores/VX_fp_cvt.v index 43fd6dd3..8d58cca3 100644 --- a/hw/rtl/fp_cores/VX_fp_cvt.v +++ b/hw/rtl/fp_cores/VX_fp_cvt.v @@ -1,5 +1,8 @@ `include "VX_define.vh" +/// Modified port of cast module from fpnew Libray +/// reference: https://github.com/pulp-platform/fpnew + `ifndef SYNTHESIS `include "float_dpi.vh" `endif @@ -91,14 +94,14 @@ module VX_fp_cvt #( wire [LANES-1:0] mant_is_zero; // for integer zeroes for (genvar i = 0; i < LANES; ++i) begin - // Leading zero counter for cancellations wire mant_is_nonzero; VX_lzc #( - .DATAW (INT_MAN_WIDTH) + .WIDTH (INT_MAN_WIDTH), + .MODE (1) ) lzc ( - .data_in (encoded_mant[i]), - .data_out (renorm_shamt[i]), - .valid_out (mant_is_nonzero) + .in_i (encoded_mant[i]), + .cnt_o (renorm_shamt[i]), + .valid_o (mant_is_nonzero) ); assign mant_is_zero[i] = ~mant_is_nonzero; end diff --git a/hw/rtl/fp_cores/VX_fp_div.v b/hw/rtl/fp_cores/VX_fp_div.v index be06b7e2..5d3eaafe 100644 --- a/hw/rtl/fp_cores/VX_fp_div.v +++ b/hw/rtl/fp_cores/VX_fp_div.v @@ -38,6 +38,27 @@ module VX_fp_div #( ); for (genvar i = 0; i < LANES; i++) begin + `ifdef VERILATOR + reg [31:0] r; + fflags_t f; + + always @(*) begin + dpi_fdiv (dataa[i], datab[i], frm, r, f); + end + `UNUSED_VAR (f) + + VX_shift_register #( + .DATAW (32), + .DEPTH (`LATENCY_FDIV), + .RESETW (1) + ) shift_req_dpi ( + .clk (clk), + .reset (_reset), + .enable (enable), + .data_in (r), + .data_out (result[i]) + ); + `else acl_fdiv fdiv ( .clk (clk), .areset (_reset), @@ -46,6 +67,7 @@ module VX_fp_div #( .b (datab[i]), .q (result[i]) ); + `endif end VX_shift_register #( diff --git a/hw/rtl/fp_cores/VX_fp_fma.v b/hw/rtl/fp_cores/VX_fp_fma.v index 3cd1b2e5..ce7efb24 100644 --- a/hw/rtl/fp_cores/VX_fp_fma.v +++ b/hw/rtl/fp_cores/VX_fp_fma.v @@ -59,6 +59,27 @@ module VX_fp_fma #( end end + `ifdef VERILATOR + reg [31:0] r; + fflags_t f; + + always @(*) begin + dpi_fmadd (a, b, c, frm, r, f); + end + `UNUSED_VAR (f) + + VX_shift_register #( + .DATAW (32), + .DEPTH (`LATENCY_FMA), + .RESETW (1) + ) shift_req_dpi ( + .clk (clk), + .reset (reset), + .enable (enable), + .data_in (r), + .data_out (result[i]) + ); + `else acl_fmadd fmadd ( .clk (clk), .areset (reset), @@ -68,6 +89,7 @@ module VX_fp_fma #( .c (c), .q (result[i]) ); + `endif end VX_shift_register #( diff --git a/hw/rtl/fp_cores/VX_fp_ncomp.v b/hw/rtl/fp_cores/VX_fp_ncomp.v index 57792d15..7f4406ce 100644 --- a/hw/rtl/fp_cores/VX_fp_ncomp.v +++ b/hw/rtl/fp_cores/VX_fp_ncomp.v @@ -1,5 +1,8 @@ `include "VX_define.vh" +/// Modified port of noncomp module from fpnew Libray +/// reference: https://github.com/pulp-platform/fpnew + module VX_fp_ncomp #( parameter TAGW = 1, parameter LANES = 1 @@ -87,7 +90,8 @@ module VX_fp_ncomp #( VX_pipe_register #( .DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)), - .RESETW (1) + .RESETW (1), + .DEPTH (0) ) pipe_reg0 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fp_cores/VX_fp_rounding.v b/hw/rtl/fp_cores/VX_fp_rounding.v index d899c3d1..9e544e44 100644 --- a/hw/rtl/fp_cores/VX_fp_rounding.v +++ b/hw/rtl/fp_cores/VX_fp_rounding.v @@ -1,6 +1,9 @@ `include "VX_define.vh" +/// Modified port of rouding module from fpnew Libray +/// reference: https://github.com/pulp-platform/fpnew + module VX_fp_rounding #( parameter DAT_WIDTH = 2 // Width of the abolute value, without sign bit ) ( @@ -17,17 +20,17 @@ module VX_fp_rounding #( output wire exact_zero_o // output is an exact zero ); - reg round_up; // Rounding decision + reg round_up; // Rounding decision - // Take the rounding decision according to RISC-V spec - // RoundMode | Mnemonic | Meaning - // :--------:|:--------:|:------- - // 000 | RNE | Round to Nearest, ties to Even - // 001 | RTZ | Round towards Zero - // 010 | RDN | Round Down (towards -\infty) - // 011 | RUP | Round Up (towards \infty) - // 100 | RMM | Round to Nearest, ties to Max Magnitude - // others | | *invalid* + // Take the rounding decision according to RISC-V spec + // RoundMode | Mnemonic | Meaning + // :--------:|:--------:|:------- + // 000 | RNE | Round to Nearest, ties to Even + // 001 | RTZ | Round towards Zero + // 010 | RDN | Round Down (towards -\infty) + // 011 | RUP | Round Up (towards \infty) + // 100 | RMM | Round to Nearest, ties to Max Magnitude + // others | | *invalid* always @(*) begin case (rnd_mode_i) @@ -47,15 +50,15 @@ module VX_fp_rounding #( endcase end - // Perform the rounding, exponent change and overflow to inf happens automagically - assign abs_rounded_o = abs_value_i + DAT_WIDTH'(round_up); + // Perform the rounding, exponent change and overflow to inf happens automagically + assign abs_rounded_o = abs_value_i + DAT_WIDTH'(round_up); - // True zero result is a zero result without dirty round/sticky bits - assign exact_zero_o = (abs_value_i == 0) && (round_sticky_bits_i == 0); + // True zero result is a zero result without dirty round/sticky bits + assign exact_zero_o = (abs_value_i == 0) && (round_sticky_bits_i == 0); - // In case of effective subtraction (thus signs of addition operands must have differed) and a - // true zero result, the result sign is '-' in case of RDN and '+' for other modes. - assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN) - : sign_i; + // In case of effective subtraction (thus signs of addition operands must have differed) and a + // true zero result, the result sign is '-' in case of RDN and '+' for other modes. + assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN) + : sign_i; endmodule \ No newline at end of file diff --git a/hw/rtl/fp_cores/VX_fp_sqrt.v b/hw/rtl/fp_cores/VX_fp_sqrt.v index 132319f4..a00a9a37 100644 --- a/hw/rtl/fp_cores/VX_fp_sqrt.v +++ b/hw/rtl/fp_cores/VX_fp_sqrt.v @@ -37,6 +37,27 @@ module VX_fp_sqrt #( ); for (genvar i = 0; i < LANES; i++) begin + `ifdef VERILATOR + reg [31:0] r; + fflags_t f; + + always @(*) begin + dpi_fsqrt (dataa[i], frm, r, f); + end + `UNUSED_VAR (f) + + VX_shift_register #( + .DATAW (32), + .DEPTH (`LATENCY_FSQRT), + .RESETW (1) + ) shift_req_dpi ( + .clk (clk), + .reset (_reset), + .enable (enable), + .data_in (r), + .data_out (result[i]) + ); + `else acl_fsqrt fsqrt ( .clk (clk), .areset (_reset), @@ -44,6 +65,7 @@ module VX_fp_sqrt #( .a (dataa[i]), .q (result[i]) ); + `endif end VX_shift_register #( diff --git a/hw/rtl/libs/VX_lzc.v b/hw/rtl/libs/VX_lzc.v index 469c587c..0ee0737a 100644 --- a/hw/rtl/libs/VX_lzc.v +++ b/hw/rtl/libs/VX_lzc.v @@ -1,27 +1,86 @@ `include "VX_platform.vh" +/// Modified port of lzc module from fpnew Libray +/// reference: https://github.com/pulp-platform/fpnew +/// A trailing zero counter / leading zero counter. +/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB) +/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB) +/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains +/// the maximum number of zeros - 1. For example: +/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0) +/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0) +/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0) +/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only). +/// This speeds up simulation significantly. + module VX_lzc #( - parameter DATAW = 32, - parameter LDATAW = `LOG2UP(DATAW) + /// The width of the input vector. + parameter int unsigned WIDTH = 2, + parameter bit MODE = 1'b0 // 0 -> trailing zero, 1 -> leading zero ) ( - input wire [DATAW-1:0] data_in, - output wire [LDATAW-1:0] data_out, - output wire valid_out -); + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] cnt_o, + output logic valid_o +); +`IGNORE_WARNINGS_BEGIN - reg [LDATAW-1:0] data_out_r; + localparam int unsigned NUM_LEVELS = $clog2(WIDTH); - always @(*) begin - data_out_r = 'x; - for (integer i = DATAW-1; i >= 0; --i) begin - if (data_in[i]) begin - data_out_r = LDATAW'(DATAW-1-i); - break; + // pragma translate_off + initial begin + assert(WIDTH > 0) else $fatal("input must be at least one bit wide"); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + // reverse vector if required + always_comb begin : flip_vector + for (int unsigned i = 0; i < WIDTH; i++) begin + in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; + end + end + + for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut + assign index_lut[j] = NUM_LEVELS'(unsigned'(j)); + end + + for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels + if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level + for (genvar k = 0; k < 2**level; k++) begin : g_level + // if two successive indices are still in the vector... + if (unsigned'(k) * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : + index_lut[k*2+1]; + end + // if only the first index is still in the vector... + if (unsigned'(k) * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; + assign index_nodes[2**level-1+k] = index_lut[k*2]; + end + // if index is out of range + if (unsigned'(k) * 2 > WIDTH-1) begin + assign sel_nodes[2**level-1+k] = 1'b0; + assign index_nodes[2**level-1+k] = '0; + end + end + end else begin + for (genvar l = 0; l < 2**level; l++) begin : g_level + assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; + assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] : + index_nodes[2**(level+1)-1+l*2+1]; end end end - assign data_out = data_out_r; - assign valid_out = (| data_in); + assign cnt_o = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0); + assign valid_o = NUM_LEVELS > unsigned'(0) ? sel_nodes[0] : (|in_i); + +`IGNORE_WARNINGS_END endmodule \ No newline at end of file