From a08d3ebd4250a7d52ee7741712dfb612c163be2f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 12 Nov 2023 23:40:59 -0800 Subject: [PATCH] minor update --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 2 +- hw/rtl/{fpu => core}/VX_fpu_unit.sv | 0 hw/rtl/core/VX_muldiv_unit.sv | 5 + hw/rtl/core/VX_scoreboard.sv | 3 + hw/rtl/fpu/VX_fpu_cvt.sv | 206 ++++++++++------------------ hw/rtl/fpu/VX_fpu_rounding.sv | 1 - 6 files changed, 81 insertions(+), 136 deletions(-) rename hw/rtl/{fpu => core}/VX_fpu_unit.sv (100%) diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 3c4b3947..2abbbe43 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -262,7 +262,7 @@ module VX_afu_wrap #( .m_axi_awready (m_axi_mem_awready_a), .m_axi_awaddr (m_axi_mem_awaddr_w), .m_axi_awid (m_axi_mem_awid_a), - `UNUSED_PIN (m_axi_awlen), + .m_axi_awlen (m_axi_mem_awlen_a), `UNUSED_PIN (m_axi_awsize), `UNUSED_PIN (m_axi_awburst), `UNUSED_PIN (m_axi_awlock), diff --git a/hw/rtl/fpu/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv similarity index 100% rename from hw/rtl/fpu/VX_fpu_unit.sv rename to hw/rtl/core/VX_fpu_unit.sv diff --git a/hw/rtl/core/VX_muldiv_unit.sv b/hw/rtl/core/VX_muldiv_unit.sv index f3c730d4..141cdb55 100644 --- a/hw/rtl/core/VX_muldiv_unit.sv +++ b/hw/rtl/core/VX_muldiv_unit.sv @@ -220,8 +220,13 @@ module VX_muldiv_unit #( wire [NUM_LANES-1:0][`XLEN-1:0] div_in2; for (genvar i = 0; i < NUM_LANES; ++i) begin + `ifdef XLEN_64 assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i]; assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i]; + `else + assign div_in1[i] = execute_if.data.rs1_data[i]; + assign div_in2[i] = execute_if.data.rs2_data[i]; + `endif end `ifdef IDIV_DPI diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index ee5ae2ec..90a58134 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -107,6 +107,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .ready_out (scoreboard_if[i].ready) ); + `ifdef SIMULATION reg [31:0] timeout_ctr; always @(posedge clk) begin @@ -134,6 +135,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0, ("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", $time, CORE_ID, wis_to_wid(writeback_if[i].data.wis, i), writeback_if[i].data.PC, writeback_if[i].data.tmask, writeback_if[i].data.rd, writeback_if[i].data.uuid)); + `endif + end endmodule diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 34b2ed28..d668539b 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -52,30 +52,27 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( localparam MAN_BITS = 23; localparam EXP_BITS = 8; - localparam EXP_BIAS = 2**(EXP_BITS-1)-1; - - localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = 2**EXP_BITS-1; - localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + localparam EXP_BIAS = 2**(EXP_BITS-1)-1; // Use 32-bit integer - localparam MAX_INT_WIDTH = 32; + localparam INT_WIDTH = 32; // The internal mantissa includes normal bit or an entire integer - localparam INT_MAN_WIDTH = `MAX(MAN_BITS + 1, MAX_INT_WIDTH); + localparam INT_MAN_WIDTH = `MAX(MAN_BITS + 1, INT_WIDTH); // The lower 2p+3 bits of the internal FMA result will be needed for leading-zero detection localparam LZC_RESULT_WIDTH = `CLOG2(INT_MAN_WIDTH); // The internal exponent must be able to represent the smallest denormal input value as signed // or the number of bits in an integer - localparam INT_EXP_WIDTH = `MAX(`CLOG2(MAX_INT_WIDTH), `MAX(EXP_BITS, `CLOG2(EXP_BIAS + MAN_BITS))) + 1; + localparam INT_EXP_WIDTH = `MAX(`CLOG2(INT_WIDTH), `MAX(EXP_BITS, `CLOG2(EXP_BIAS + MAN_BITS))) + 1; // shift amount for denormalization localparam SHAMT_BITS = `CLOG2(INT_MAN_WIDTH+1); localparam FMT_SHIFT_COMPENSATION = INT_MAN_WIDTH - 1 - MAN_BITS; localparam NUM_FP_STICKY = 2 * INT_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R - localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - MAX_INT_WIDTH; // removed int and R + localparam NUM_INT_STICKY = 2 * INT_MAN_WIDTH - INT_WIDTH; // removed int and R // Input processing @@ -86,8 +83,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class ( - .exp_i (dataa[i][30:23]), - .man_i (dataa[i][22:0]), + .exp_i (dataa[i][INT_WIDTH-2:MAN_BITS]), + .man_i (dataa[i][MAN_BITS-1:0]), .clss_o (fclass[i]) ); end @@ -97,15 +94,13 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0] input_sign; for (genvar i = 0; i < NUM_LANES; ++i) begin - wire [INT_MAN_WIDTH-1:0] int_mantissa; - wire [INT_MAN_WIDTH-1:0] fmt_mantissa; - wire fmt_sign = dataa[i][31]; - wire int_sign = dataa[i][31] && is_signed; - assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i]; - assign fmt_mantissa = INT_MAN_WIDTH'({fclass[i].is_normal, dataa[i][MAN_BITS-1:0]}); + wire i2f_sign = dataa[i][INT_WIDTH-1]; + wire f2i_sign = dataa[i][INT_WIDTH-1] && is_signed; + wire [INT_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa[i]) : dataa[i]; + wire [INT_MAN_WIDTH-1:0] i2f_mantissa = INT_MAN_WIDTH'({fclass[i].is_normal, dataa[i][MAN_BITS-1:0]}); assign input_exp[i] = {1'b0, dataa[i][MAN_BITS +: EXP_BITS]} + INT_EXP_WIDTH'({1'b0, fclass[i].is_subnormal}); - assign input_mant[i] = is_itof ? int_mantissa : fmt_mantissa; - assign input_sign[i] = is_itof ? int_sign : fmt_sign; + assign input_mant[i] = is_itof ? f2i_mantissa : i2f_mantissa; + assign input_sign[i] = is_itof ? f2i_sign : i2f_sign; end // Pipeline stage0 @@ -159,9 +154,9 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( assign input_mant_n_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i]; // Unbias exponent and compensate for shift - wire [INT_EXP_WIDTH-1:0] fp_input_exp_s0 = fmt_exponent_s0[i] + INT_EXP_WIDTH'(FMT_SHIFT_COMPENSATION - EXP_BIAS) - INT_EXP_WIDTH'({1'b0, renorm_shamt_s0[i]}); - wire [INT_EXP_WIDTH-1:0] int_input_exp_s0 = INT_EXP_WIDTH'(INT_MAN_WIDTH-1) - INT_EXP_WIDTH'({1'b0, renorm_shamt_s0[i]}); - assign input_exp_n_s0[i] = is_itof_s0 ? int_input_exp_s0 : fp_input_exp_s0; + wire [INT_EXP_WIDTH-1:0] i2f_input_exp_s0 = fmt_exponent_s0[i] + INT_EXP_WIDTH'(FMT_SHIFT_COMPENSATION - EXP_BIAS) - INT_EXP_WIDTH'({1'b0, renorm_shamt_s0[i]}); + wire [INT_EXP_WIDTH-1:0] f2i_input_exp_s0 = INT_EXP_WIDTH'(INT_MAN_WIDTH-1) - INT_EXP_WIDTH'({1'b0, renorm_shamt_s0[i]}); + assign input_exp_n_s0[i] = is_itof_s0 ? f2i_input_exp_s0 : i2f_input_exp_s0; end // Pipeline stage1 @@ -193,51 +188,32 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s1; wire [NUM_LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1; - wire [NUM_LANES-1:0] of_before_round_s1; + wire [NUM_LANES-1:0] of_before_round_s1; - for (genvar i = 0; i < NUM_LANES; ++i) begin - reg [2*INT_MAN_WIDTH:0] preshift_mant_s1; // mantissa before final shift - reg [SHAMT_BITS-1:0] denorm_shamt_s1; // shift amount for denormalization - reg [INT_EXP_WIDTH-1:0] final_exp_tmp_s1; // after eventual adjustments - reg of_before_round_tmp_s1; + for (genvar i = 0; i < NUM_LANES; ++i) begin + reg [SHAMT_BITS-1:0] denorm_shamt_s1; // shift amount for denormalization + reg of_before_round_tmp_s1; always @(*) begin - final_exp_tmp_s1 = input_exp_s1[i] + INT_EXP_WIDTH'(EXP_BIAS); // take exponent as is, only look at lower bits - preshift_mant_s1 = {input_mant_s1[i], 33'b0}; denorm_shamt_s1 = '0; of_before_round_tmp_s1 = 1'b0; - if (is_itof_s1) begin - if ($signed(input_exp_s1[i]) >= INT_EXP_WIDTH'($signed(2**EXP_BITS-1-EXP_BIAS))) begin - // Overflow or infinities (for proper rounding) - final_exp_tmp_s1 = (2**EXP_BITS-2); // largest normal value - preshift_mant_s1 = ~0; // largest normal value and RS bits set - of_before_round_tmp_s1 = 1'b1; - end else if ($signed(input_exp_s1[i]) < INT_EXP_WIDTH'($signed(-MAN_BITS-EXP_BIAS))) begin - // Limit the shift to retain sticky bits - final_exp_tmp_s1 = '0; // denormal result - denorm_shamt_s1 = (2 + MAN_BITS); // to sticky - end else if ($signed(input_exp_s1[i]) < INT_EXP_WIDTH'($signed(1-EXP_BIAS))) begin - // Denormalize underflowing values - final_exp_tmp_s1 = '0; // denormal result - denorm_shamt_s1 = SHAMT_BITS'(1-EXP_BIAS) - SHAMT_BITS'(input_exp_s1[i]); // adjust right shifting - end - end else begin - if ($signed(input_exp_s1[i]) >= $signed(INT_EXP_WIDTH'(MAX_INT_WIDTH-1) + INT_EXP_WIDTH'(unsigned_s1))) begin - // overflow: when converting to unsigned the range is larger by one + if (!is_itof_s1) begin + if ($signed(input_exp_s1[i]) >= $signed(INT_EXP_WIDTH'(INT_WIDTH-1) + INT_EXP_WIDTH'(unsigned_s1))) begin + // overflow of_before_round_tmp_s1 = 1'b1; end else if ($signed(input_exp_s1[i]) < INT_EXP_WIDTH'($signed(-1))) begin // underflow - denorm_shamt_s1 = MAX_INT_WIDTH+1; // all bits go to the sticky + denorm_shamt_s1 = INT_WIDTH+1; // all bits go to the sticky end else begin // By default right shift mantissa to be an integer - denorm_shamt_s1 = SHAMT_BITS'(MAX_INT_WIDTH-1) - SHAMT_BITS'(input_exp_s1[i]); + denorm_shamt_s1 = SHAMT_BITS'(INT_WIDTH-1) - SHAMT_BITS'(input_exp_s1[i]); end end end - assign destination_mant_s1[i] = preshift_mant_s1 >> denorm_shamt_s1; - assign final_exp_s1[i] = final_exp_tmp_s1; + assign destination_mant_s1[i] = {input_mant_s1[i], 33'b0} >> denorm_shamt_s1; + assign final_exp_s1[i] = input_exp_s1[i] + INT_EXP_WIDTH'(EXP_BIAS); assign of_before_round_s1[i] = of_before_round_tmp_s1; end @@ -267,33 +243,33 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .data_out ({valid_in_s2, lane_mask_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2}) ); - wire [NUM_LANES-1:0] rounded_sign_s2; - wire [NUM_LANES-1:0][31:0] rounded_abs_s2; // absolute value of result after rounding - wire [NUM_LANES-1:0] int_round_has_sticky_s2; - wire [NUM_LANES-1:0] fp_round_has_sticky_s2; + wire [NUM_LANES-1:0] rounded_sign_s2; + wire [NUM_LANES-1:0][INT_WIDTH-1:0] rounded_abs_s2; // absolute value of result after rounding + wire [NUM_LANES-1:0] f2i_round_has_sticky_s2; + wire [NUM_LANES-1:0] i2f_round_has_sticky_s2; // Rouding and classification for (genvar i = 0; i < NUM_LANES; ++i) begin - wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments - wire [MAX_INT_WIDTH-1:0] final_int_s2; // integer shifted in position - wire [1:0] round_sticky_bits_s2; - wire [31:0] fmt_pre_round_abs_s2; - wire [31:0] pre_round_abs_s2; - wire [1:0] int_round_sticky_bits_s2, fp_round_sticky_bits_s2; + wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments + wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position + wire [1:0] round_sticky_bits_s2; + wire [INT_WIDTH-1:0] fmt_pre_round_abs_s2; + wire [INT_WIDTH-1:0] pre_round_abs_s2; + wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2; // Extract final mantissa and round bit, discard the normal bit (for FP) - assign {final_mant_s2, fp_round_sticky_bits_s2[1]} = destination_mant_s2[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1]; - assign {final_int_s2, int_round_sticky_bits_s2[1]} = destination_mant_s2[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1]; + assign {final_mant_s2, i2f_round_sticky_bits_s2[1]} = destination_mant_s2[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1]; + assign {final_int_s2, f2i_round_sticky_bits_s2[1]} = destination_mant_s2[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (INT_WIDTH+1) + 1]; // Collapse sticky bits - assign fp_round_sticky_bits_s2[0] = (| destination_mant_s2[i][NUM_FP_STICKY-1:0]); - assign int_round_sticky_bits_s2[0] = (| destination_mant_s2[i][NUM_INT_STICKY-1:0]); - assign fp_round_has_sticky_s2[i] = (| fp_round_sticky_bits_s2); - assign int_round_has_sticky_s2[i] = (| int_round_sticky_bits_s2); + assign i2f_round_sticky_bits_s2[0] = (| destination_mant_s2[i][NUM_FP_STICKY-1:0]); + assign f2i_round_sticky_bits_s2[0] = (| destination_mant_s2[i][NUM_INT_STICKY-1:0]); + assign i2f_round_has_sticky_s2[i] = (| i2f_round_sticky_bits_s2); + assign f2i_round_has_sticky_s2[i] = (| f2i_round_sticky_bits_s2); // select RS bits for destination operation - assign round_sticky_bits_s2 = is_itof_s2 ? fp_round_sticky_bits_s2 : int_round_sticky_bits_s2; + assign round_sticky_bits_s2 = is_itof_s2 ? i2f_round_sticky_bits_s2 : f2i_round_sticky_bits_s2; // Pack exponent and mantissa into proper rounding form assign fmt_pre_round_abs_s2 = {1'b0, final_exp_s2[i][EXP_BITS-1:0], final_mant_s2[MAN_BITS-1:0]}; @@ -327,10 +303,10 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0] mant_is_zero_s3; wire [NUM_LANES-1:0] input_sign_s3; wire [NUM_LANES-1:0] rounded_sign_s3; - wire [NUM_LANES-1:0][31:0] rounded_abs_s3; + wire [NUM_LANES-1:0][INT_WIDTH-1:0] rounded_abs_s3; wire [NUM_LANES-1:0] of_before_round_s3; - wire [NUM_LANES-1:0] int_round_has_sticky_s3; - wire [NUM_LANES-1:0] fp_round_has_sticky_s3; + wire [NUM_LANES-1:0] f2i_round_has_sticky_s3; + wire [NUM_LANES-1:0] i2f_round_has_sticky_s3; VX_pipe_register #( .DATAW (1 + NUM_LANES + TAGW + 1 + 1 + NUM_LANES * ($bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1)), @@ -339,105 +315,68 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .enable (~stall), - .data_in ({valid_in_s2, lane_mask_s2, tag_in_s2, is_itof_s2, unsigned_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, int_round_has_sticky_s2, fp_round_has_sticky_s2}), - .data_out ({valid_in_s3, lane_mask_s3, tag_in_s3, is_itof_s3, unsigned_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, int_round_has_sticky_s3, fp_round_has_sticky_s3}) + .data_in ({valid_in_s2, lane_mask_s2, tag_in_s2, is_itof_s2, unsigned_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}), + .data_out ({valid_in_s3, lane_mask_s3, tag_in_s3, is_itof_s3, unsigned_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3}) ); - wire [NUM_LANES-1:0] of_after_round_s3; - wire [NUM_LANES-1:0] uf_after_round_s3; - wire [NUM_LANES-1:0][31:0] fmt_result_s3; - wire [NUM_LANES-1:0][31:0] rounded_int_res_s3; // after possible inversion + wire [NUM_LANES-1:0][INT_WIDTH-1:0] fmt_result_s3; + wire [NUM_LANES-1:0][INT_WIDTH-1:0] rounded_int_res_s3; // after possible inversion wire [NUM_LANES-1:0] rounded_int_res_zero_s3; // after rounding for (genvar i = 0; i < NUM_LANES; ++i) begin // Assemble regular result, nan box short ones. Int zeroes need to be detected - assign fmt_result_s3[i] = (is_itof_s3 & mant_is_zero_s3[i]) ? 0 : {rounded_sign_s3[i], rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:0]}; - - // Classification after rounding select by destination format - assign uf_after_round_s3[i] = (rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == 0); // denormal - assign of_after_round_s3[i] = (rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp. + assign fmt_result_s3[i] = mant_is_zero_s3[i] ? 0 : {rounded_sign_s3[i], rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:0]}; // Negative integer result needs to be brought into two's complement assign rounded_int_res_s3[i] = rounded_sign_s3[i] ? (-rounded_abs_s3[i]) : rounded_abs_s3[i]; assign rounded_int_res_zero_s3[i] = (rounded_int_res_s3[i] == 0); end - // FP Special case handling + // F2I Special case handling - wire [NUM_LANES-1:0][31:0] fp_special_result_s3; - fflags_t [NUM_LANES-1:0] fp_special_status_s3; - wire [NUM_LANES-1:0] fp_result_is_special_s3; - - for (genvar i = 0; i < NUM_LANES; ++i) begin - // Detect special case from source format, I2F casts don't produce a special result - assign fp_result_is_special_s3[i] = ~is_itof_s3 & (fclass_s3[i].is_zero | fclass_s3[i].is_nan); - - // Signalling input NaNs raise invalid flag, otherwise no flags set - assign fp_special_status_s3[i] = fclass_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation - - // Assemble result according to destination format - assign fp_special_result_s3[i] = fclass_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero - : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN - end - - // INT Special case handling - - reg [NUM_LANES-1:0][31:0] int_special_result_s3; - fflags_t [NUM_LANES-1:0] int_special_status_s3; - wire [NUM_LANES-1:0] int_result_is_special_s3; + reg [NUM_LANES-1:0][INT_WIDTH-1:0] f2i_special_result_s3; + fflags_t [NUM_LANES-1:0] f2i_special_status_s3; + wire [NUM_LANES-1:0] f2i_result_is_special_s3; for (genvar i = 0; i < NUM_LANES; ++i) begin // Assemble result according to destination format always @(*) begin if (input_sign_s3[i] && !fclass_s3[i].is_nan) begin - int_special_result_s3[i][30:0] = '0; // alone yields 2**(31)-1 - int_special_result_s3[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31 + f2i_special_result_s3[i][INT_WIDTH-2:0] = '0; // alone yields 2**(31)-1 + f2i_special_result_s3[i][INT_WIDTH-1] = ~unsigned_s3; // for unsigned casts yields 2**31 end else begin - int_special_result_s3[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1 - int_special_result_s3[i][31] = unsigned_s3; // for unsigned casts yields 2**31 + f2i_special_result_s3[i][INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1 + f2i_special_result_s3[i][INT_WIDTH-1] = unsigned_s3; // for unsigned casts yields 2**31 end end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - assign int_result_is_special_s3[i] = fclass_s3[i].is_nan + assign f2i_result_is_special_s3[i] = fclass_s3[i].is_nan | fclass_s3[i].is_inf | of_before_round_s3[i] | (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero_s3[i]); // All integer special cases are invalid - assign int_special_status_s3[i] = {1'b1, 4'h0}; + assign f2i_special_status_s3[i] = {1'b1, 4'h0}; end // Result selection and Output handshake fflags_t [NUM_LANES-1:0] tmp_fflags_s3; - wire [NUM_LANES-1:0][31:0] tmp_result_s3; + wire [NUM_LANES-1:0][INT_WIDTH-1:0] tmp_result_s3; - for (genvar i = 0; i < NUM_LANES; ++i) begin - fflags_t fp_regular_status_s3, int_regular_status_s3; - fflags_t fp_status_s3, int_status_s3; - wire [31:0] fp_result_s3, int_result_s3; + for (genvar i = 0; i < NUM_LANES; ++i) begin + fflags_t i2f_regular_status_s3 = i2f_round_has_sticky_s3[i] ? 5'h1 : 5'h0; + fflags_t f2i_regular_status_s3 = f2i_round_has_sticky_s3[i] ? 5'h1 : 5'h0; - wire inexact_s3 = is_itof_s3 ? fp_round_has_sticky_s3[i] // overflow is invalid in i2f; - : (fp_round_has_sticky_s3[i] || (~fclass_s3[i].is_inf && (of_before_round_s3[i] || of_after_round_s3[i]))); - - assign fp_regular_status_s3.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round_s3[i]); // overflow is invalid for I2F casts - assign fp_regular_status_s3.DZ = 1'b0; // no divisions - assign fp_regular_status_s3.OF = ~is_itof_s3 & (~fclass_s3[i].is_inf & (of_before_round_s3[i] | of_after_round_s3[i])); // inf casts no OF - assign fp_regular_status_s3.UF = uf_after_round_s3[i] & inexact_s3; - assign fp_regular_status_s3.NX = inexact_s3; + fflags_t i2f_status_s3 = i2f_regular_status_s3; + fflags_t f2i_status_s3 = f2i_result_is_special_s3[i] ? f2i_special_status_s3[i] : f2i_regular_status_s3; - assign int_regular_status_s3 = int_round_has_sticky_s3[i] ? {4'h0, 1'b1} : 5'h0; + wire [INT_WIDTH-1:0] i2f_result_s3 = fmt_result_s3[i]; + wire [INT_WIDTH-1:0] f2i_result_s3 = f2i_result_is_special_s3[i] ? f2i_special_result_s3[i] : rounded_int_res_s3[i]; - assign fp_result_s3 = fp_result_is_special_s3[i] ? fp_special_result_s3[i] : fmt_result_s3[i]; - assign int_result_s3 = int_result_is_special_s3[i] ? int_special_result_s3[i] : rounded_int_res_s3[i]; - - assign fp_status_s3 = fp_result_is_special_s3[i] ? fp_special_status_s3[i] : fp_regular_status_s3; - assign int_status_s3 = int_result_is_special_s3[i] ? int_special_status_s3[i] : int_regular_status_s3; - - // Select output depending on special case detection - assign tmp_result_s3[i] = is_itof_s3 ? fp_result_s3 : int_result_s3; - assign tmp_fflags_s3[i] = is_itof_s3 ? fp_status_s3 : int_status_s3; + assign tmp_result_s3[i] = is_itof_s3 ? i2f_result_s3 : f2i_result_s3; + assign tmp_fflags_s3[i] = is_itof_s3 ? i2f_status_s3 : f2i_status_s3; end assign stall = ~ready_out && valid_out; @@ -457,7 +396,6 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( ); assign ready_in = ~stall; - assign has_fflags = 1'b1; endmodule diff --git a/hw/rtl/fpu/VX_fpu_rounding.sv b/hw/rtl/fpu/VX_fpu_rounding.sv index 5168fada..877b4eb6 100644 --- a/hw/rtl/fpu/VX_fpu_rounding.sv +++ b/hw/rtl/fpu/VX_fpu_rounding.sv @@ -54,7 +54,6 @@ module VX_fpu_rounding #( 2'b01: round_up = 1'b0; // < ulp/2 away, round down 2'b10: round_up = abs_value_i[0]; // = ulp/2 away, round towards even result 2'b11: round_up = 1'b1; // > ulp/2 away, round up - default: round_up = 1'bx; endcase `INST_FRM_RTZ: round_up = 1'b0; // always round down `INST_FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -