minor update

2021-01-10 22:03:23 -08:00
parent e770824d47
commit 7e93d253f2
7 changed files with 174 additions and 39 deletions
--- a/hw/rtl/fp_cores/VX_fp_cvt.v
+++ b/hw/rtl/fp_cores/VX_fp_cvt.v
@@ -1,5 +1,8 @@
 `include "VX_define.vh"
 /// Modified port of cast module from fpnew Libray 
 /// reference: https://github.com/pulp-platform/fpnew
 `ifndef SYNTHESIS
 `include "float_dpi.vh"
 `endif
@@ -91,14 +94,14 @@ module VX_fp_cvt #(
    wire [LANES-1:0] mant_is_zero;                       // for integer zeroes
    for (genvar i = 0; i < LANES; ++i) begin
        // Leading zero counter for cancellations
        wire mant_is_nonzero;
        VX_lzc #(
-            .DATAW (INT_MAN_WIDTH)
+            .WIDTH (INT_MAN_WIDTH),
            .MODE  (1)
        ) lzc (
-            .data_in   (encoded_mant[i]),
+            .in_i    (encoded_mant[i]),
-            .data_out  (renorm_shamt[i]),
+            .cnt_o   (renorm_shamt[i]),
-            .valid_out (mant_is_nonzero)
+            .valid_o (mant_is_nonzero)
        );
        assign mant_is_zero[i] = ~mant_is_nonzero;
    end
--- a/hw/rtl/fp_cores/VX_fp_div.v
+++ b/hw/rtl/fp_cores/VX_fp_div.v
@@ -38,6 +38,27 @@ module VX_fp_div #(
    );
    for (genvar i = 0; i < LANES; i++) begin
    `ifdef VERILATOR
        reg [31:0] r;
        fflags_t f;
        always @(*) begin        
            dpi_fdiv (dataa[i], datab[i], frm, r, f);
        end
        `UNUSED_VAR (f)
        VX_shift_register #(
            .DATAW  (32),
            .DEPTH  (`LATENCY_FDIV),
            .RESETW (1)
        ) shift_req_dpi (
            .clk      (clk),
            .reset    (_reset),
            .enable   (enable),
            .data_in  (r),
            .data_out (result[i])
        );
    `else
        acl_fdiv fdiv (
            .clk    (clk),
            .areset (_reset),
@@ -46,6 +67,7 @@ module VX_fp_div #(
            .b      (datab[i]),
            .q      (result[i])
        );
    `endif
    end
    VX_shift_register #(
--- a/hw/rtl/fp_cores/VX_fp_fma.v
+++ b/hw/rtl/fp_cores/VX_fp_fma.v
@@ -59,6 +59,27 @@ module VX_fp_fma #(
            end    
        end
    `ifdef VERILATOR
        reg [31:0] r;
        fflags_t f;
        always @(*) begin        
            dpi_fmadd (a, b, c, frm, r, f);
        end
        `UNUSED_VAR (f)
        VX_shift_register #(
            .DATAW  (32),
            .DEPTH  (`LATENCY_FMA),
            .RESETW (1)
        ) shift_req_dpi (
            .clk      (clk),
            .reset    (reset),
            .enable   (enable),
            .data_in  (r),
            .data_out (result[i])
        );
    `else
        acl_fmadd fmadd (
            .clk    (clk),
            .areset (reset),
@@ -68,6 +89,7 @@ module VX_fp_fma #(
            .c      (c),
            .q      (result[i])
        );
    `endif
    end
    VX_shift_register #(
--- a/hw/rtl/fp_cores/VX_fp_ncomp.v
+++ b/hw/rtl/fp_cores/VX_fp_ncomp.v
@@ -1,5 +1,8 @@
 `include "VX_define.vh"
 /// Modified port of noncomp module from fpnew Libray 
 /// reference: https://github.com/pulp-platform/fpnew
 module VX_fp_ncomp #( 
    parameter TAGW = 1,
    parameter LANES = 1
@@ -87,7 +90,8 @@ module VX_fp_ncomp #(
    VX_pipe_register #(
        .DATAW  (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
-        .RESETW (1)
+        .RESETW (1),
        .DEPTH  (0)
    ) pipe_reg0 (
        .clk      (clk),
        .reset    (reset),
--- a/hw/rtl/fp_cores/VX_fp_rounding.v
+++ b/hw/rtl/fp_cores/VX_fp_rounding.v
@@ -1,6 +1,9 @@
 `include "VX_define.vh"
 /// Modified port of rouding module from fpnew Libray
 /// reference: https://github.com/pulp-platform/fpnew
 module VX_fp_rounding #(
    parameter DAT_WIDTH = 2 // Width of the abolute value, without sign bit
 ) (
@@ -17,17 +20,17 @@ module VX_fp_rounding #(
    output wire                 exact_zero_o             // output is an exact zero
 );
-  reg round_up; // Rounding decision
+    reg round_up; // Rounding decision
-  // Take the rounding decision according to RISC-V spec
+    // Take the rounding decision according to RISC-V spec
-  // RoundMode | Mnemonic | Meaning
+    // RoundMode | Mnemonic | Meaning
-  // :--------:|:--------:|:-------
+    // :--------:|:--------:|:-------
-  //    000    |   RNE    | Round to Nearest, ties to Even
+    //    000    |   RNE    | Round to Nearest, ties to Even
-  //    001    |   RTZ    | Round towards Zero
+    //    001    |   RTZ    | Round towards Zero
-  //    010    |   RDN    | Round Down (towards -\infty)
+    //    010    |   RDN    | Round Down (towards -\infty)
-  //    011    |   RUP    | Round Up (towards \infty)
+    //    011    |   RUP    | Round Up (towards \infty)
-  //    100    |   RMM    | Round to Nearest, ties to Max Magnitude
+    //    100    |   RMM    | Round to Nearest, ties to Max Magnitude
-  //  others   |          | *invalid*
+    //  others   |          | *invalid*
    always @(*) begin
        case (rnd_mode_i)
@@ -47,15 +50,15 @@ module VX_fp_rounding #(
        endcase
    end
-  // Perform the rounding, exponent change and overflow to inf happens automagically
+    // Perform the rounding, exponent change and overflow to inf happens automagically
-  assign abs_rounded_o = abs_value_i + DAT_WIDTH'(round_up);
+    assign abs_rounded_o = abs_value_i + DAT_WIDTH'(round_up);
-  // True zero result is a zero result without dirty round/sticky bits
+    // True zero result is a zero result without dirty round/sticky bits
-  assign exact_zero_o = (abs_value_i == 0) && (round_sticky_bits_i == 0);
+    assign exact_zero_o = (abs_value_i == 0) && (round_sticky_bits_i == 0);
-  // In case of effective subtraction (thus signs of addition operands must have differed) and a
+    // In case of effective subtraction (thus signs of addition operands must have differed) and a
-  // true zero result, the result sign is '-' in case of RDN and '+' for other modes.
+    // true zero result, the result sign is '-' in case of RDN and '+' for other modes.
-  assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
+    assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
-                                                            : sign_i;
+                                                              : sign_i;
 endmodule
--- a/hw/rtl/fp_cores/VX_fp_sqrt.v
+++ b/hw/rtl/fp_cores/VX_fp_sqrt.v
@@ -37,6 +37,27 @@ module VX_fp_sqrt #(
    );  
    for (genvar i = 0; i < LANES; i++) begin
    `ifdef VERILATOR
        reg [31:0] r;
        fflags_t f;
        always @(*) begin        
            dpi_fsqrt  (dataa[i], frm, r, f);
        end
        `UNUSED_VAR (f)
        VX_shift_register #(
            .DATAW  (32),
            .DEPTH  (`LATENCY_FSQRT),
            .RESETW (1)
        ) shift_req_dpi (
            .clk      (clk),
            .reset    (_reset),
            .enable   (enable),
            .data_in  (r),
            .data_out (result[i])
        );
    `else
        acl_fsqrt fsqrt (
            .clk    (clk),
            .areset (_reset),
@@ -44,6 +65,7 @@ module VX_fp_sqrt #(
            .a      (dataa[i]),
            .q      (result[i])
        );
    `endif
    end
    VX_shift_register #(
--- a/hw/rtl/libs/VX_lzc.v
+++ b/hw/rtl/libs/VX_lzc.v
@@ -1,27 +1,86 @@
 `include "VX_platform.vh"
 /// Modified port of lzc module from fpnew Libray
 /// reference: https://github.com/pulp-platform/fpnew
 /// A trailing zero counter / leading zero counter.
 /// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
 /// Set MODE to 1 for leading zero counter  => cnt_o is the number of leading zeros  (from the MSB)
 /// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
 /// the maximum number of zeros - 1. For example:
 ///   in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
 ///   in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
 ///   in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
 /// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
 /// This speeds up simulation significantly.
 module VX_lzc #(
-    parameter DATAW  = 32,
+    /// The width of the input vector.
-    parameter LDATAW = `LOG2UP(DATAW)
+    parameter int unsigned WIDTH = 2,
    parameter bit          MODE  = 1'b0 // 0 -> trailing zero, 1 -> leading zero
 ) (
-    input wire  [DATAW-1:0]  data_in,
+    input  logic [WIDTH-1:0]         in_i,
-    output wire [LDATAW-1:0] data_out,
+    output logic [$clog2(WIDTH)-1:0] cnt_o,
-    output wire              valid_out
+    output logic                     valid_o
 );
 `IGNORE_WARNINGS_BEGIN
-    reg [LDATAW-1:0] data_out_r;
+    localparam int unsigned NUM_LEVELS = $clog2(WIDTH);
-    always @(*) begin
+    // pragma translate_off
-        data_out_r = 'x;
+    initial begin
-        for (integer i = DATAW-1; i >= 0; --i) begin
+        assert(WIDTH > 0) else $fatal("input must be at least one bit wide");
-            if (data_in[i]) begin
+    end
-                data_out_r = LDATAW'(DATAW-1-i);
+    // pragma translate_on
-                break;
+
    logic [WIDTH-1:0][NUM_LEVELS-1:0]          index_lut;
    logic [2**NUM_LEVELS-1:0]                  sel_nodes;
    logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0]  index_nodes;
    logic [WIDTH-1:0] in_tmp;
    // reverse vector if required
    always_comb begin : flip_vector
        for (int unsigned i = 0; i < WIDTH; i++) begin
            in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
        end
    end
    for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
        assign index_lut[j] = NUM_LEVELS'(unsigned'(j));
    end
    for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels
        if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level
            for (genvar k = 0; k < 2**level; k++) begin : g_level
                // if two successive indices are still in the vector...
                if (unsigned'(k) * 2 < WIDTH-1) begin
                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2] | in_tmp[k*2+1];
                    assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] :
                                                                               index_lut[k*2+1];
                end
                // if only the first index is still in the vector...
                if (unsigned'(k) * 2 == WIDTH-1) begin
                    assign sel_nodes[2**level-1+k]   = in_tmp[k*2];
                    assign index_nodes[2**level-1+k] = index_lut[k*2];
                end
                // if index is out of range
                if (unsigned'(k) * 2 > WIDTH-1) begin
                    assign sel_nodes[2**level-1+k]   = 1'b0;
                    assign index_nodes[2**level-1+k] = '0;
                end
            end
        end else begin
            for (genvar l = 0; l < 2**level; l++) begin : g_level
                assign sel_nodes[2**level-1+l]   = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
                assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] :
                                                                                             index_nodes[2**(level+1)-1+l*2+1];
            end
        end
    end
-    assign data_out  = data_out_r;
+    assign cnt_o   = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0);
-    assign valid_out = (| data_in);
+    assign valid_o = NUM_LEVELS > unsigned'(0) ? sel_nodes[0]  : (|in_i);
 `IGNORE_WARNINGS_END
 endmodule