From 377466ed1ce7da0bb92ba996380b6541cb5bca30 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 5 Sep 2021 21:01:52 -0700 Subject: [PATCH] fpu area optimization --- hw/rtl/VX_types.vh | 2 +- hw/rtl/fp_cores/VX_fp_class.v | 28 +++++++++++ hw/rtl/fp_cores/VX_fp_cvt.v | 86 ++++++++++++++++--------------- hw/rtl/fp_cores/VX_fp_ncomp.v | 59 +++++++++++++--------- hw/rtl/fp_cores/VX_fp_type.v | 27 ---------- hw/rtl/fp_cores/VX_fpu_fpga.v | 2 +- hw/rtl/libs/VX_find_first.v | 61 ++++++++++++++++++++++ hw/rtl/libs/VX_lzc.v | 95 +++++++---------------------------- 8 files changed, 186 insertions(+), 174 deletions(-) create mode 100644 hw/rtl/fp_cores/VX_fp_class.v delete mode 100644 hw/rtl/fp_cores/VX_fp_type.v create mode 100644 hw/rtl/libs/VX_find_first.v diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 785ce444..4654daae 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -11,7 +11,7 @@ typedef struct packed { logic is_nan; logic is_quiet; logic is_signaling; -} fp_type_t; +} fp_class_t; typedef struct packed { logic NV; // 4-Invalid diff --git a/hw/rtl/fp_cores/VX_fp_class.v b/hw/rtl/fp_cores/VX_fp_class.v new file mode 100644 index 00000000..d30247e5 --- /dev/null +++ b/hw/rtl/fp_cores/VX_fp_class.v @@ -0,0 +1,28 @@ + +`include "VX_define.vh" + +module VX_fp_class # ( + parameter MAN_BITS = 23, + parameter EXP_BITS = 8 +) ( + input [EXP_BITS-1:0] exp_i, + input [MAN_BITS-1:0] man_i, + output fp_class_t clss_o +); + wire is_normal = (exp_i != '0) && (exp_i != '1); + wire is_zero = (exp_i == '0) && (man_i == '0); + wire is_subnormal = (exp_i == '0) && (man_i != '0); + wire is_inf = (exp_i == '1) && (man_i == '0); + wire is_nan = (exp_i == '1) && (man_i != '0); + wire is_signaling = is_nan && ~man_i[MAN_BITS-1]; + wire is_quiet = is_nan && ~is_signaling; + + assign clss_o.is_normal = is_normal; + assign clss_o.is_zero = is_zero; + assign clss_o.is_subnormal = is_subnormal; + assign clss_o.is_inf = is_inf; + assign clss_o.is_nan = is_nan; + assign clss_o.is_quiet = is_quiet; + assign clss_o.is_signaling = is_signaling; + +endmodule \ No newline at end of file diff --git a/hw/rtl/fp_cores/VX_fp_cvt.v b/hw/rtl/fp_cores/VX_fp_cvt.v index 25e178c3..98cdded2 100644 --- a/hw/rtl/fp_cores/VX_fp_cvt.v +++ b/hw/rtl/fp_cores/VX_fp_cvt.v @@ -59,13 +59,16 @@ module VX_fp_cvt #( // Input processing - fp_type_t [LANES-1:0] in_a_type; + fp_class_t [LANES-1:0] fp_clss; for (genvar i = 0; i < LANES; ++i) begin - VX_fp_type fp_type ( + VX_fp_class #( + .EXP_BITS (EXP_BITS), + .MAN_BITS (MAN_BITS) + ) fp_class ( .exp_i (dataa[i][30:23]), .man_i (dataa[i][22:0]), - .type_o (in_a_type[i]) + .clss_o (fp_clss[i]) ); end @@ -74,16 +77,19 @@ module VX_fp_cvt #( wire [LANES-1:0] input_sign; for (genvar i = 0; i < LANES; ++i) begin + `IGNORE_WARNINGS_BEGIN wire [INT_MAN_WIDTH-1:0] int_mantissa; wire [INT_MAN_WIDTH-1:0] fmt_mantissa; wire fmt_sign = dataa[i][31]; wire int_sign = dataa[i][31] & is_signed; assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i]; - assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]}); - - assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]}; + assign fmt_mantissa = INT_MAN_WIDTH'({fp_clss[i].is_normal, dataa[i][MAN_BITS-1:0]}); + assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS +: EXP_BITS]} + + {1'b0, fp_clss[i].is_subnormal} + + (FMT_SHIFT_COMPENSATION - EXP_BIAS); assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa; assign input_sign[i] = is_itof ? int_sign : fmt_sign; + `IGNORE_WARNINGS_END end // Pipeline stage0 @@ -93,7 +99,7 @@ module VX_fp_cvt #( wire is_itof_s0; wire unsigned_s0; wire [2:0] rnd_mode_s0; - fp_type_t [LANES-1:0] in_a_type_s0; + fp_class_t [LANES-1:0] fp_clss_s0; wire [LANES-1:0] input_sign_s0; wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0; wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0; @@ -101,14 +107,14 @@ module VX_fp_cvt #( wire stall; VX_pipe_register #( - .DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)), + .DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall), - .data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}), - .data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0}) + .data_in ({valid_in, tag_in, is_itof, !is_signed, frm, fp_clss, input_sign, fmt_exponent, encoded_mant}), + .data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0}) ); // Normalization @@ -119,8 +125,8 @@ module VX_fp_cvt #( for (genvar i = 0; i < LANES; ++i) begin wire mant_is_nonzero; VX_lzc #( - .WIDTH (INT_MAN_WIDTH), - .MODE (1) + .N (INT_MAN_WIDTH), + .MODE (1) ) lzc ( .in_i (encoded_mant_s0[i]), .cnt_o (renorm_shamt_s0[i]), @@ -134,20 +140,12 @@ module VX_fp_cvt #( for (genvar i = 0; i < LANES; ++i) begin `IGNORE_WARNINGS_BEGIN - // Input mantissa needs to be normalized - wire [INT_EXP_WIDTH-1:0] fp_input_exp; - wire [INT_EXP_WIDTH-1:0] int_input_exp; - - // Realign input mantissa, append zeroes if destination is wider + // Realign input mantissa, append zeroes if destination is wider assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i]; // Unbias exponent and compensate for shift - assign fp_input_exp = fmt_exponent_s0[i] + - {1'b0, in_a_type_s0[i].is_subnormal} + - (FMT_SHIFT_COMPENSATION - EXP_BIAS) - - {1'b0, renorm_shamt_s0[i]}; - - assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]}; + wire [INT_EXP_WIDTH-1:0] fp_input_exp = fmt_exponent_s0[i] - {1'b0, renorm_shamt_s0[i]}; + wire [INT_EXP_WIDTH-1:0] int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]}; assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp; `IGNORE_WARNINGS_END @@ -160,21 +158,21 @@ module VX_fp_cvt #( wire is_itof_s1; wire unsigned_s1; wire [2:0] rnd_mode_s1; - fp_type_t [LANES-1:0] in_a_type_s1; + fp_class_t [LANES-1:0] fp_clss_s1; wire [LANES-1:0] input_sign_s1; wire [LANES-1:0] mant_is_zero_s1; wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1; wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1; VX_pipe_register #( - .DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)), + .DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~stall), - .data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}), - .data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1}) + .data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}), + .data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1}) ); // Perform adjustments to mantissa and exponent @@ -245,7 +243,7 @@ module VX_fp_cvt #( wire is_itof_s2; wire unsigned_s2; wire [2:0] rnd_mode_s2; - fp_type_t [LANES-1:0] in_a_type_s2; + fp_class_t [LANES-1:0] fp_clss_s2; wire [LANES-1:0] mant_is_zero_s2; wire [LANES-1:0] input_sign_s2; wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2; @@ -253,14 +251,14 @@ module VX_fp_cvt #( wire [LANES-1:0] of_before_round_s2; VX_pipe_register #( - .DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)), + .DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_class_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)), .RESETW (1) ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (~stall), - .data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}), - .data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2}) + .data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}), + .data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2}) ); wire [LANES-1:0] rounded_sign; @@ -314,7 +312,7 @@ module VX_fp_cvt #( wire [TAGW-1:0] tag_in_s3; wire is_itof_s3; wire unsigned_s3; - fp_type_t [LANES-1:0] in_a_type_s3; + fp_class_t [LANES-1:0] fp_clss_s3; wire [LANES-1:0] mant_is_zero_s3; wire [LANES-1:0] input_sign_s3; wire [LANES-1:0] rounded_sign_s3; @@ -322,14 +320,14 @@ module VX_fp_cvt #( wire [LANES-1:0] of_before_round_s3; VX_pipe_register #( - .DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)), + .DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + 32 + 1 + 1)), .RESETW (1) ) pipe_reg3 ( .clk (clk), .reset (reset), .enable (~stall), - .data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}), - .data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3}) + .data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}), + .data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, fp_clss_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3}) ); wire [LANES-1:0] of_after_round; @@ -362,14 +360,14 @@ module VX_fp_cvt #( for (genvar i = 0; i < LANES; ++i) begin // Detect special case from source format, I2F casts don't produce a special result - assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan); + assign fp_result_is_special[i] = ~is_itof_s3 & (fp_clss_s3[i].is_zero | fp_clss_s3[i].is_nan); // Signalling input NaNs raise invalid flag, otherwise no flags set - assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation + assign fp_special_status[i] = fp_clss_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation // Assemble result according to destination format - assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero - : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + assign fp_special_result[i] = fp_clss_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero + : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN end // INT Special case handling @@ -381,7 +379,7 @@ module VX_fp_cvt #( for (genvar i = 0; i < LANES; ++i) begin // Assemble result according to destination format always @(*) begin - if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin + if (input_sign_s3[i] && !fp_clss_s3[i].is_nan) begin int_special_result[i][30:0] = 0; // alone yields 2**(31)-1 int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31 end else begin @@ -391,8 +389,8 @@ module VX_fp_cvt #( end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - assign int_result_is_special[i] = in_a_type_s3[i].is_nan - | in_a_type_s3[i].is_inf + assign int_result_is_special[i] = fp_clss_s3[i].is_nan + | fp_clss_s3[i].is_inf | of_before_round_s3[i] | (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]); @@ -411,11 +409,11 @@ module VX_fp_cvt #( wire [31:0] fp_result, int_result; wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f; - : (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); + : (| fp_round_sticky_bits[i]) | (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts assign fp_regular_status.DZ = 1'b0; // no divisions - assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF + assign fp_regular_status.OF = ~is_itof_s3 & (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF assign fp_regular_status.UF = uf_after_round[i] & inexact; assign fp_regular_status.NX = inexact; diff --git a/hw/rtl/fp_cores/VX_fp_ncomp.v b/hw/rtl/fp_cores/VX_fp_ncomp.v index 11300a87..a96c05eb 100644 --- a/hw/rtl/fp_cores/VX_fp_ncomp.v +++ b/hw/rtl/fp_cores/VX_fp_ncomp.v @@ -30,6 +30,9 @@ module VX_fp_ncomp #( input wire ready_out, output wire valid_out ); + localparam EXP_BITS = 8; + localparam MAN_BITS = 23; + localparam NEG_INF = 32'h00000001, NEG_NORM = 32'h00000002, NEG_SUBNORM = 32'h00000004, @@ -44,7 +47,7 @@ module VX_fp_ncomp #( wire [LANES-1:0] tmp_a_sign, tmp_b_sign; wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent; wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa; - fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type; + fp_class_t [LANES-1:0] tmp_a_clss, tmp_b_clss; wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal; // Setup @@ -57,20 +60,26 @@ module VX_fp_ncomp #( assign tmp_b_exponent[i] = datab[i][30:23]; assign tmp_b_mantissa[i] = datab[i][22:0]; - VX_fp_type fp_type_a ( + VX_fp_class #( + .EXP_BITS (EXP_BITS), + .MAN_BITS (MAN_BITS) + ) fp_class_a ( .exp_i (tmp_a_exponent[i]), .man_i (tmp_a_mantissa[i]), - .type_o (tmp_a_type[i]) + .clss_o (tmp_a_clss[i]) ); - VX_fp_type fp_type_b ( + VX_fp_class #( + .EXP_BITS (EXP_BITS), + .MAN_BITS (MAN_BITS) + ) fp_class_b ( .exp_i (tmp_b_exponent[i]), .man_i (tmp_b_mantissa[i]), - .type_o (tmp_b_type[i]) + .clss_o (tmp_b_clss[i]) ); assign tmp_a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]); - assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero); + assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_clss[i].is_zero & tmp_b_clss[i].is_zero); end // Pipeline stage0 @@ -83,41 +92,41 @@ module VX_fp_ncomp #( wire [LANES-1:0] a_sign_s0, b_sign_s0; wire [LANES-1:0][7:0] a_exponent_s0; wire [LANES-1:0][22:0] a_mantissa_s0; - fp_type_t [LANES-1:0] a_type_s0, b_type_s0; + fp_class_t [LANES-1:0] a_clss_s0, b_clss_s0; wire [LANES-1:0] a_smaller_s0, ab_equal_s0; wire stall; VX_pipe_register #( - .DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)), + .DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)), .RESETW (1), .DEPTH (0) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (!stall), - .data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), - .data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0}) + .data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_clss, tmp_b_clss, tmp_a_smaller, tmp_ab_equal}), + .data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_clss_s0, b_clss_s0, a_smaller_s0, ab_equal_s0}) ); // FCLASS reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg for (genvar i = 0; i < LANES; i++) begin always @(*) begin - if (a_type_s0[i].is_normal) begin + if (a_clss_s0[i].is_normal) begin fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM; end - else if (a_type_s0[i].is_inf) begin + else if (a_clss_s0[i].is_inf) begin fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF; end - else if (a_type_s0[i].is_zero) begin + else if (a_clss_s0[i].is_zero) begin fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO; end - else if (a_type_s0[i].is_subnormal) begin + else if (a_clss_s0[i].is_subnormal) begin fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM; end - else if (a_type_s0[i].is_nan) begin - fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0}; + else if (a_clss_s0[i].is_nan) begin + fclass_mask[i] = {22'h0, a_clss_s0[i].is_quiet, a_clss_s0[i].is_signaling, 8'h0}; end else begin fclass_mask[i] = QUT_NAN; @@ -129,11 +138,11 @@ module VX_fp_ncomp #( reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax for (genvar i = 0; i < LANES; i++) begin always @(*) begin - if (a_type_s0[i].is_nan && b_type_s0[i].is_nan) + if (a_clss_s0[i].is_nan && b_clss_s0[i].is_nan) fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN - else if (a_type_s0[i].is_nan) + else if (a_clss_s0[i].is_nan) fminmax_res[i] = datab_s0[i]; - else if (b_type_s0[i].is_nan) + else if (b_clss_s0[i].is_nan) fminmax_res[i] = dataa_s0[i]; else begin case (frm_s0) // use LSB to distinguish MIN and MAX @@ -166,7 +175,7 @@ module VX_fp_ncomp #( case (frm_s0) `INST_FRM_RNE: begin // LE fcmp_fflags[i] = 5'h0; - if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin + if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin fcmp_res[i] = 32'h0; fcmp_fflags[i].NV = 1'b1; end else begin @@ -175,7 +184,7 @@ module VX_fp_ncomp #( end `INST_FRM_RTZ: begin // LS fcmp_fflags[i] = 5'h0; - if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin + if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin fcmp_res[i] = 32'h0; fcmp_fflags[i].NV = 1'b1; end else begin @@ -184,9 +193,9 @@ module VX_fp_ncomp #( end `INST_FRM_RDN: begin // EQ fcmp_fflags[i] = 5'h0; - if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin + if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin fcmp_res[i] = 32'h0; - fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling; + fcmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling; end else begin fcmp_res[i] = {31'h0, ab_equal_s0[i]}; end @@ -225,11 +234,11 @@ module VX_fp_ncomp #( 3,4: begin tmp_result[i] = fminmax_res[i]; tmp_fflags[i] = 0; - tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling; + tmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling; end //5,6,7: MOVE default: begin - tmp_result[i] = dataa[i]; + tmp_result[i] = dataa_s0[i]; tmp_fflags[i] = 'x; end endcase diff --git a/hw/rtl/fp_cores/VX_fp_type.v b/hw/rtl/fp_cores/VX_fp_type.v deleted file mode 100644 index bdc41b86..00000000 --- a/hw/rtl/fp_cores/VX_fp_type.v +++ /dev/null @@ -1,27 +0,0 @@ - -`include "VX_define.vh" - -module VX_fp_type ( - // inputs - input [7:0] exp_i, - input [22:0] man_i, - // outputs - output fp_type_t type_o -); - wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff); - wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0); - wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0); - wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0); - wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0); - wire is_signaling = is_nan && (man_i[22] == 1'b0); - wire is_quiet = is_nan && !is_signaling; - - assign type_o.is_normal = is_normal; - assign type_o.is_zero = is_zero; - assign type_o.is_subnormal = is_subnormal; - assign type_o.is_inf = is_inf; - assign type_o.is_nan = is_nan; - assign type_o.is_quiet = is_quiet; - assign type_o.is_signaling = is_signaling; - -endmodule \ No newline at end of file diff --git a/hw/rtl/fp_cores/VX_fpu_fpga.v b/hw/rtl/fp_cores/VX_fpu_fpga.v index f07afb0d..029f8976 100644 --- a/hw/rtl/fp_cores/VX_fpu_fpga.v +++ b/hw/rtl/fp_cores/VX_fpu_fpga.v @@ -1,7 +1,7 @@ `include "VX_define.vh" module VX_fpu_fpga #( - parameter TAGW = 1 + parameter TAGW = 4 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/libs/VX_find_first.v b/hw/rtl/libs/VX_find_first.v new file mode 100644 index 00000000..ab724c7e --- /dev/null +++ b/hw/rtl/libs/VX_find_first.v @@ -0,0 +1,61 @@ +`include "VX_platform.vh" + +`TRACING_OFF +module VX_find_first #( + parameter N = 1, + parameter DATAW = 1, + parameter REVERSE = 0, + localparam LOGN = $clog2(N) +) ( + input wire [N-1:0][DATAW-1:0] data_i, + input wire [N-1:0] valid_i, + output wire [DATAW-1:0] data_o, + output wire valid_o +); + if (N > 1) begin + wire [N-1:0] valid_r; + wire [N-1:0][DATAW-1:0] data_r; + + for (genvar i = 0; i < N; ++i) begin + assign valid_r[i] = REVERSE ? valid_i[N-1-i] : valid_i[i]; + assign data_r[i] = REVERSE ? data_i[N-1-i] : data_i[i]; + end + + `IGNORE_WARNINGS_BEGIN + wire [2**LOGN-1:0] s_n; + wire [2**LOGN-1:0][DATAW-1:0] d_n; + `IGNORE_WARNINGS_END + + for (genvar i = 0; i < LOGN; ++i) begin + if (i == (LOGN-1)) begin + for (genvar j = 0; j < 2**i; ++j) begin + if ((j*2) < (N-1)) begin + assign s_n[2**i-1+j] = valid_r[j*2] | valid_r[j*2+1]; + assign d_n[2**i-1+j] = valid_r[j*2] ? data_r[j*2] : data_r[j*2+1]; + end + if ((j*2) == (N-1)) begin + assign s_n[2**i-1+j] = valid_r[j*2]; + assign d_n[2**i-1+j] = data_r[j*2]; + end + if ((j*2) > (N-1)) begin + assign s_n[2**i-1+j] = 0; + assign d_n[2**i-1+j] = 'x; + end + end + end else begin + for (genvar j = 0; j < 2**i; ++j) begin + assign s_n[2**i-1+j] = s_n[2**(i+1)-1+j*2] | s_n[2**(i+1)-1+j*2+1]; + assign d_n[2**i-1+j] = s_n[2**(i+1)-1+j*2] ? d_n[2**(i+1)-1+j*2] : d_n[2**(i+1)-1+j*2+1]; + end + end + end + + assign valid_o = s_n[0]; + assign data_o = d_n[0]; + end else begin + assign valid_o = valid_i; + assign data_o = data_i[0]; + end + +endmodule +`TRACING_ON \ No newline at end of file diff --git a/hw/rtl/libs/VX_lzc.v b/hw/rtl/libs/VX_lzc.v index 816d65b6..5b97028b 100644 --- a/hw/rtl/libs/VX_lzc.v +++ b/hw/rtl/libs/VX_lzc.v @@ -1,88 +1,31 @@ `include "VX_platform.vh" -/// Modified port of lzc module from fpnew Libray -/// reference: https://github.com/pulp-platform/fpnew -/// A trailing zero counter / leading zero counter. -/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB) -/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB) -/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains -/// the maximum number of zeros - 1. For example: -/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0) -/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0) -/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0) -/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only). -/// This speeds up simulation significantly. - `TRACING_OFF module VX_lzc #( - /// The width of the input vector. - parameter int unsigned WIDTH = 2, - parameter bit MODE = 1'b0 // 0 -> trailing zero, 1 -> leading zero + parameter N = 2, + parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero + localparam LOGN = $clog2(N) ) ( - input logic [WIDTH-1:0] in_i, - output logic [$clog2(WIDTH)-1:0] cnt_o, - output logic valid_o + input wire [N-1:0] in_i, + output wire [LOGN-1:0] cnt_o, + output wire valid_o ); -`IGNORE_WARNINGS_BEGIN + wire [N-1:0][LOGN-1:0] indices; - localparam int unsigned NUM_LEVELS = $clog2(WIDTH); - - // pragma translate_off - initial begin - assert(WIDTH > 0) else $fatal("input must be at least one bit wide"); - end - // pragma translate_on - - logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; - logic [2**NUM_LEVELS-1:0] sel_nodes; - logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; - - logic [WIDTH-1:0] in_tmp; - - // reverse vector if required - always_comb begin : flip_vector - for (int unsigned i = 0; i < WIDTH; i++) begin - in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; - end + for (genvar i = 0; i < N; ++i) begin + assign indices[i] = MODE ? LOGN'(N-1-i) : LOGN'(i); end - for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut - assign index_lut[j] = NUM_LEVELS'(unsigned'(j)); - end - - for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels - if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level - for (genvar k = 0; k < 2**level; k++) begin : g_level - // if two successive indices are still in the vector... - if (unsigned'(k) * 2 < WIDTH-1) begin - assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; - assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : - index_lut[k*2+1]; - end - // if only the first index is still in the vector... - if (unsigned'(k) * 2 == WIDTH-1) begin - assign sel_nodes[2**level-1+k] = in_tmp[k*2]; - assign index_nodes[2**level-1+k] = index_lut[k*2]; - end - // if index is out of range - if (unsigned'(k) * 2 > WIDTH-1) begin - assign sel_nodes[2**level-1+k] = 1'b0; - assign index_nodes[2**level-1+k] = '0; - end - end - end else begin - for (genvar l = 0; l < 2**level; l++) begin : g_level - assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; - assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] : - index_nodes[2**(level+1)-1+l*2+1]; - end - end - end - - assign cnt_o = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0); - assign valid_o = NUM_LEVELS > unsigned'(0) ? sel_nodes[0] : (|in_i); - -`IGNORE_WARNINGS_END + VX_find_first #( + .N (N), + .DATAW (LOGN), + .REVERSE (MODE) + ) find_first ( + .data_i (indices), + .valid_i (in_i), + .data_o (cnt_o), + .valid_o (valid_o) + ); endmodule `TRACING_ON \ No newline at end of file