From e43116234732548350ce030f51dc6cc030c7cab6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 30 Dec 2020 04:09:21 -0800 Subject: [PATCH] minor update --- hw/rtl/fp_cores/VX_fp_cvt.v | 32 +++----- hw/rtl/fp_cores/VX_fp_fma.v | 7 +- hw/rtl/fp_cores/VX_fp_ncomp.v | 125 ++++++++++++++++--------------- hw/rtl/fp_cores/VX_fp_rounding.v | 2 +- 4 files changed, 81 insertions(+), 85 deletions(-) diff --git a/hw/rtl/fp_cores/VX_fp_cvt.v b/hw/rtl/fp_cores/VX_fp_cvt.v index bf306148..3d99d178 100644 --- a/hw/rtl/fp_cores/VX_fp_cvt.v +++ b/hw/rtl/fp_cores/VX_fp_cvt.v @@ -32,7 +32,7 @@ module VX_fp_cvt #( input wire ready_out, output wire valid_out ); - //! Constants + // Constants localparam MAN_BITS = 23; localparam EXP_BITS = 8; @@ -58,8 +58,7 @@ module VX_fp_cvt #( localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R - //*------------------------------------------------ - //! Input processing + // Input processing fp_type_t [LANES-1:0] in_a_type; @@ -104,8 +103,7 @@ module VX_fp_cvt #( assign mant_is_zero[i] = ~mant_is_nonzero; end - //*------------------------------------------------ - //! Stage0 pipeline + // Pipeline stage0 wire valid_in_s0; wire [TAGW-1:0] tag_in_s0; @@ -133,8 +131,7 @@ module VX_fp_cvt #( .data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0, renorm_shamt_s0, mant_is_zero_s0}) ); - //*------------------------------------------------ - //! Normalization + // Normalization wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent @@ -169,8 +166,7 @@ module VX_fp_cvt #( `IGNORE_WARNINGS_END end - //*------------------------------------------------ - //! Stage1 pipeline + // Pipeline stage1 wire valid_in_s1; wire [TAGW-1:0] tag_in_s1; @@ -196,8 +192,7 @@ module VX_fp_cvt #( .data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1}) ); - //*------------------------------------------------ - //! Casting + // Casting reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift @@ -271,8 +266,7 @@ module VX_fp_cvt #( `IGNORE_WARNINGS_END end - //*------------------------------------------------ - //! Rouding and classification + // Rouding and classification wire [LANES-1:0] rounded_sign; wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding @@ -302,8 +296,7 @@ module VX_fp_cvt #( ); end - //*------------------------------------------------ - //! Stage2 pipeline + // Pipeline stage2 wire valid_in_s2; wire [TAGW-1:0] tag_in_s2; @@ -348,8 +341,7 @@ module VX_fp_cvt #( assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0); end - //*------------------------------------------------ - //! FP Special case handling + // FP Special case handling wire [LANES-1:0][31:0] fp_special_result; fflags_t [LANES-1:0] fp_special_status; @@ -370,8 +362,7 @@ module VX_fp_cvt #( : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN end - //*------------------------------------------------ - //! INT Special case handling + // INT Special case handling reg [LANES-1:0][31:0] int_special_result; fflags_t [LANES-1:0] int_special_status; @@ -399,8 +390,7 @@ module VX_fp_cvt #( assign int_special_status[i] = {1'b1, 4'h0}; end - //*------------------------------------------------ - //! Result selection and Output handshake + // Result selection and Output handshake fflags_t [LANES-1:0] tmp_fflags; wire [LANES-1:0][31:0] tmp_result; diff --git a/hw/rtl/fp_cores/VX_fp_fma.v b/hw/rtl/fp_cores/VX_fp_fma.v index c0f0ba98..1029c2c2 100644 --- a/hw/rtl/fp_cores/VX_fp_fma.v +++ b/hw/rtl/fp_cores/VX_fp_fma.v @@ -34,13 +34,12 @@ module VX_fp_fma #( input wire ready_out, output wire valid_out -); - +); + wire stall = ~ready_out && valid_out; wire enable = ~stall; - for (genvar i = 0; i < LANES; i++) begin - + for (genvar i = 0; i < LANES; i++) begin reg [31:0] a, b, c; always @(*) begin diff --git a/hw/rtl/fp_cores/VX_fp_ncomp.v b/hw/rtl/fp_cores/VX_fp_ncomp.v index b5cb2707..bdd4b199 100644 --- a/hw/rtl/fp_cores/VX_fp_ncomp.v +++ b/hw/rtl/fp_cores/VX_fp_ncomp.v @@ -38,27 +38,17 @@ module VX_fp_ncomp #( SIG_NAN = 32'h00000100, QUT_NAN = 32'h00000200; - reg valid_in_r; - reg [TAGW-1:0] tag_in_r; - reg [`FPU_BITS-1:0] op_type_r; - reg [`FRM_BITS-1:0] frm_r; + wire [LANES-1:0] tmp_a_sign, tmp_b_sign; + wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent; + wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa; + fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type; + wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal; - reg [LANES-1:0][31:0] dataa_r; - reg [LANES-1:0][31:0] datab_r; - - reg [LANES-1:0] a_sign, b_sign, tmp_a_sign, tmp_b_sign; - reg [LANES-1:0][7:0] a_exponent, tmp_a_exponent, tmp_b_exponent; - reg [LANES-1:0][22:0] a_mantissa, tmp_a_mantissa, tmp_b_mantissa; - fp_type_t [LANES-1:0] a_type, b_type, tmp_a_type, tmp_b_type; - reg [LANES-1:0] a_smaller, ab_equal, tmp_a_smaller, tmp_ab_equal; - - reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg - reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax - reg [LANES-1:0][31:0] fsgnj_res; // result of sign injection - reg [LANES-1:0][31:0] fcmp_res; // result of comparison - fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags - - wire stall = ~ready_out && valid_out; + wire [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg + wire [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax + wire [LANES-1:0][31:0] fsgnj_res; // result of sign injection + wire [LANES-1:0][31:0] fcmp_res; // result of comparison + fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags // Setup for (genvar i = 0; i < LANES; i++) begin @@ -86,6 +76,21 @@ module VX_fp_ncomp #( assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero); end + // Pipeline stage0 + + wire valid_in_s0; + wire [TAGW-1:0] tag_in_s0; + wire [`FPU_BITS-1:0] op_type_s0; + wire [`FRM_BITS-1:0] frm_s0; + wire [LANES-1:0][31:0] dataa_s0, datab_s0; + wire [LANES-1:0] a_sign_s0, b_sign_s0; + wire [LANES-1:0][7:0] a_exponent_s0; + wire [LANES-1:0][22:0] a_mantissa_s0; + fp_type_t [LANES-1:0] a_type_s0, b_type_s0; + wire [LANES-1:0] a_smaller_s0, ab_equal_s0; + + wire stall; + VX_pipe_register #( .DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)), .RESETW (1) @@ -93,27 +98,27 @@ module VX_fp_ncomp #( .clk (clk), .reset (reset), .enable (!stall), - .data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), - .data_out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r, a_sign, b_sign, a_exponent, a_mantissa, a_type, b_type, a_smaller, ab_equal}) + .data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}), + .data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0}) ); // FCLASS for (genvar i = 0; i < LANES; i++) begin always @(*) begin - if (a_type[i].is_normal) begin - fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM; + if (a_type_s0[i].is_normal) begin + fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM; end - else if (a_type[i].is_inf) begin - fclass_mask[i] = a_sign[i] ? NEG_INF : POS_INF; + else if (a_type_s0[i].is_inf) begin + fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF; end - else if (a_type[i].is_zero) begin - fclass_mask[i] = a_sign[i] ? NEG_ZERO : POS_ZERO; + else if (a_type_s0[i].is_zero) begin + fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO; end - else if (a_type[i].is_subnormal) begin - fclass_mask[i] = a_sign[i] ? NEG_SUBNORM : POS_SUBNORM; + else if (a_type_s0[i].is_subnormal) begin + fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM; end - else if (a_type[i].is_nan) begin - fclass_mask[i] = {22'h0, a_type[i].is_quiet, a_type[i].is_signaling, 8'h0}; + else if (a_type_s0[i].is_nan) begin + fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0}; end else begin fclass_mask[i] = QUT_NAN; @@ -124,16 +129,16 @@ module VX_fp_ncomp #( // Min/Max for (genvar i = 0; i < LANES; i++) begin always @(*) begin - if (a_type[i].is_nan && b_type[i].is_nan) + if (a_type_s0[i].is_nan && b_type_s0[i].is_nan) fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN - else if (a_type[i].is_nan) - fminmax_res[i] = datab_r[i]; - else if (b_type[i].is_nan) - fminmax_res[i] = dataa_r[i]; + else if (a_type_s0[i].is_nan) + fminmax_res[i] = datab_s0[i]; + else if (b_type_s0[i].is_nan) + fminmax_res[i] = dataa_s0[i]; else begin - case (frm_r) // use LSB to distinguish MIN and MAX - 3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i]; - 4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i]; + case (frm_s0) // use LSB to distinguish MIN and MAX + 3: fminmax_res[i] = a_smaller_s0[i] ? dataa_s0[i] : datab_s0[i]; + 4: fminmax_res[i] = a_smaller_s0[i] ? datab_s0[i] : dataa_s0[i]; default: fminmax_res[i] = 'x; // don't care value endcase end @@ -143,10 +148,10 @@ module VX_fp_ncomp #( // Sign injection for (genvar i = 0; i < LANES; i++) begin always @(*) begin - case (frm_r) - 0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]}; - 1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]}; - 2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]}; + case (frm_s0) + 0: fsgnj_res[i] = { b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]}; + 1: fsgnj_res[i] = {~b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]}; + 2: fsgnj_res[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]}; default: fsgnj_res[i] = 'x; // don't care value endcase end @@ -155,32 +160,32 @@ module VX_fp_ncomp #( // Comparison for (genvar i = 0; i < LANES; i++) begin always @(*) begin - case (frm_r) + case (frm_s0) `FRM_RNE: begin fcmp_fflags[i] = 5'h0; - if (a_type[i].is_nan || b_type[i].is_nan) begin + if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin fcmp_res[i] = 32'h0; fcmp_fflags[i].NV = 1'b1; end else begin - fcmp_res[i] = {31'h0, (a_smaller[i] | ab_equal[i])}; + fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])}; end end `FRM_RTZ: begin fcmp_fflags[i] = 5'h0; - if (a_type[i].is_nan || b_type[i].is_nan) begin + if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin fcmp_res[i] = 32'h0; fcmp_fflags[i].NV = 1'b1; end else begin - fcmp_res[i] = {31'h0, (a_smaller[i] & ~ab_equal[i])}; + fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])}; end end `FRM_RDN: begin fcmp_fflags[i] = 5'h0; - if (a_type[i].is_nan || b_type[i].is_nan) begin + if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin fcmp_res[i] = 32'h0; - fcmp_fflags[i].NV = a_type[i].is_signaling | b_type[i].is_signaling; + fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling; end else begin - fcmp_res[i] = {31'h0, ab_equal[i]}; + fcmp_res[i] = {31'h0, ab_equal_s0[i]}; end end default: begin @@ -198,7 +203,7 @@ module VX_fp_ncomp #( for (genvar i = 0; i < LANES; i++) begin always @(*) begin - case (op_type_r) + case (op_type_s0) `FPU_CLASS: begin tmp_result[i] = fclass_mask[i]; tmp_fflags[i] = 'x; @@ -209,7 +214,7 @@ module VX_fp_ncomp #( end //`FPU_MISC: default: begin - case (frm_r) + case (frm_s0) 0,1,2: begin tmp_result[i] = fsgnj_res[i]; tmp_fflags[i] = 'x; @@ -217,7 +222,7 @@ module VX_fp_ncomp #( 3,4: begin tmp_result[i] = fminmax_res[i]; tmp_fflags[i] = 0; - tmp_fflags[i].NV = a_type[i].is_signaling | b_type[i].is_signaling; + tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling; end //5,6,7: default: begin @@ -230,8 +235,10 @@ module VX_fp_ncomp #( end end - wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX - || (op_type_r == `FPU_CMP); // CMP + wire tmp_has_fflags = ((op_type_s0 == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX + || (op_type_s0 == `FPU_CMP); // CMP + + assign stall = ~ready_out && valid_out; VX_pipe_register #( .DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)), @@ -240,8 +247,8 @@ module VX_fp_ncomp #( .clk (clk), .reset (reset), .enable (!stall), - .data_in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}), - .data_out ({valid_out, tag_out, result, has_fflags, fflags}) + .data_in ({valid_in_s0, tag_in_s0, tmp_result, tmp_has_fflags, tmp_fflags}), + .data_out ({valid_out, tag_out, result, has_fflags, fflags}) ); assign ready_in = ~stall; diff --git a/hw/rtl/fp_cores/VX_fp_rounding.v b/hw/rtl/fp_cores/VX_fp_rounding.v index 53ea2435..d899c3d1 100644 --- a/hw/rtl/fp_cores/VX_fp_rounding.v +++ b/hw/rtl/fp_cores/VX_fp_rounding.v @@ -34,7 +34,7 @@ module VX_fp_rounding #( `FRM_RNE: // Decide accoring to round/sticky bits case (round_sticky_bits_i) 2'b00, - 2'b01: round_up = 1'b0; // < ulp/2 away, round down + 2'b01: round_up = 1'b0; // < ulp/2 away, round down 2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result 2'b11: round_up = 1'b1; // > ulp/2 away, round up default: round_up = 1'bx;