// Copyright © 2019-2023 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. `include "VX_fpu_define.vh" `ifdef FPU_DSP /// Modified port of noncomp module from fpnew Libray /// reference: https://github.com/pulp-platform/fpnew module VX_fpu_ncomp import VX_fpu_pkg::*; #( parameter NUM_LANES = 1, parameter TAGW = 1 ) ( input wire clk, input wire reset, output wire ready_in, input wire valid_in, input wire [NUM_LANES-1:0] lane_mask, input wire [TAGW-1:0] tag_in, input wire [`INST_FPU_BITS-1:0] op_type, input wire [`INST_FRM_BITS-1:0] frm, input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] datab, output wire [NUM_LANES-1:0][31:0] result, output wire has_fflags, output wire [`FP_FLAGS_BITS-1:0] fflags, output wire [TAGW-1:0] tag_out, input wire ready_out, output wire valid_out ); localparam EXP_BITS = 8; localparam MAN_BITS = 23; localparam NEG_INF = 32'h00000001, NEG_NORM = 32'h00000002, NEG_SUBNORM = 32'h00000004, NEG_ZERO = 32'h00000008, POS_ZERO = 32'h00000010, POS_SUBNORM = 32'h00000020, POS_NORM = 32'h00000040, POS_INF = 32'h00000080, //SIG_NAN = 32'h00000100, QUT_NAN = 32'h00000200; wire [NUM_LANES-1:0] a_sign, b_sign; wire [NUM_LANES-1:0][7:0] a_exponent, b_exponent; wire [NUM_LANES-1:0][22:0] a_mantissa, b_mantissa; fclass_t [NUM_LANES-1:0] a_fclass, b_fclass; wire [NUM_LANES-1:0] a_smaller, ab_equal; // Setup for (genvar i = 0; i < NUM_LANES; ++i) begin assign a_sign[i] = dataa[i][31]; assign a_exponent[i] = dataa[i][30:23]; assign a_mantissa[i] = dataa[i][22:0]; assign b_sign[i] = datab[i][31]; assign b_exponent[i] = datab[i][30:23]; assign b_mantissa[i] = datab[i][22:0]; VX_fpu_class #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_a ( .exp_i (a_exponent[i]), .man_i (a_mantissa[i]), .clss_o (a_fclass[i]) ); VX_fpu_class #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_b ( .exp_i (b_exponent[i]), .man_i (b_mantissa[i]), .clss_o (b_fclass[i]) ); assign a_smaller[i] = (dataa[i] < datab[i]) ^ (a_sign[i] || b_sign[i]); assign ab_equal[i] = (dataa[i] == datab[i]) || (a_fclass[i].is_zero && b_fclass[i].is_zero); // +0 == -0 end // Pipeline stage0 wire valid_in_s0; wire [NUM_LANES-1:0] lane_mask_s0; wire [TAGW-1:0] tag_in_s0; wire [3:0] op_mod_s0; wire [NUM_LANES-1:0][31:0] dataa_s0, datab_s0; wire [NUM_LANES-1:0] a_sign_s0, b_sign_s0; wire [NUM_LANES-1:0][7:0] a_exponent_s0; wire [NUM_LANES-1:0][22:0] a_mantissa_s0; fclass_t [NUM_LANES-1:0] a_fclass_s0, b_fclass_s0; wire [NUM_LANES-1:0] a_smaller_s0, ab_equal_s0; wire stall; wire [3:0] op_mod = {(op_type == `INST_FPU_CMP), frm}; VX_pipe_register #( .DATAW (1 + NUM_LANES + TAGW + 4 + NUM_LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (!stall), .data_in ({valid_in, lane_mask, tag_in, op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}), .data_out ({valid_in_s0, lane_mask_s0, tag_in_s0, op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0}) ); // FCLASS reg [NUM_LANES-1:0][31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg for (genvar i = 0; i < NUM_LANES; ++i) begin always @(*) begin if (a_fclass_s0[i].is_normal) begin fclass_mask_s0[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM; end else if (a_fclass_s0[i].is_inf) begin fclass_mask_s0[i] = a_sign_s0[i] ? NEG_INF : POS_INF; end else if (a_fclass_s0[i].is_zero) begin fclass_mask_s0[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO; end else if (a_fclass_s0[i].is_subnormal) begin fclass_mask_s0[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM; end else if (a_fclass_s0[i].is_nan) begin fclass_mask_s0[i] = {22'h0, a_fclass_s0[i].is_quiet, a_fclass_s0[i].is_signaling, 8'h0}; end else begin fclass_mask_s0[i] = QUT_NAN; end end end // Min/Max reg [NUM_LANES-1:0][31:0] fminmax_res_s0; for (genvar i = 0; i < NUM_LANES; ++i) begin always @(*) begin if (a_fclass_s0[i].is_nan && b_fclass_s0[i].is_nan) fminmax_res_s0[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN else if (a_fclass_s0[i].is_nan) fminmax_res_s0[i] = datab_s0[i]; else if (b_fclass_s0[i].is_nan) fminmax_res_s0[i] = dataa_s0[i]; else begin // FMIN, FMAX fminmax_res_s0[i] = (op_mod_s0[0] ^ a_smaller_s0[i]) ? dataa_s0[i] : datab_s0[i]; end end end // Sign injection reg [NUM_LANES-1:0][31:0] fsgnj_res_s0; // result of sign injection for (genvar i = 0; i < NUM_LANES; ++i) begin always @(*) begin case (op_mod_s0[1:0]) 0: fsgnj_res_s0[i] = { b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]}; 1: fsgnj_res_s0[i] = {~b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]}; default: fsgnj_res_s0[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]}; endcase end end // Comparison reg [NUM_LANES-1:0] fcmp_res_s0; // result of comparison reg [NUM_LANES-1:0] fcmp_fflags_NV_s0; // comparison fflags for (genvar i = 0; i < NUM_LANES; ++i) begin always @(*) begin case (op_mod_s0[1:0]) 0: begin // LE if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin fcmp_res_s0[i] = 0; fcmp_fflags_NV_s0[i] = 1; end else begin fcmp_res_s0[i] = (a_smaller_s0[i] | ab_equal_s0[i]); fcmp_fflags_NV_s0[i] = 0; end end 1: begin // LT if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin fcmp_res_s0[i] = 0; fcmp_fflags_NV_s0[i] = 1; end else begin fcmp_res_s0[i] = (a_smaller_s0[i] & ~ab_equal_s0[i]); fcmp_fflags_NV_s0[i] = 0; end end 2: begin // EQ if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin fcmp_res_s0[i] = 0; fcmp_fflags_NV_s0[i] = a_fclass_s0[i].is_signaling | b_fclass_s0[i].is_signaling; end else begin fcmp_res_s0[i] = ab_equal_s0[i]; fcmp_fflags_NV_s0[i] = 0; end end default: begin fcmp_res_s0[i] = 'x; fcmp_fflags_NV_s0[i] = 'x; end endcase end end // outputs reg [NUM_LANES-1:0][31:0] result_s0; reg [NUM_LANES-1:0] fflags_NV_s0; for (genvar i = 0; i < NUM_LANES; ++i) begin always @(*) begin case (op_mod_s0[2:0]) 0,1,2: begin // SGNJ, CMP result_s0[i] = op_mod_s0[3] ? 32'(fcmp_res_s0[i]) : fsgnj_res_s0[i]; fflags_NV_s0[i] = fcmp_fflags_NV_s0[i]; end 3: begin // CLASS result_s0[i] = fclass_mask_s0[i]; fflags_NV_s0[i] = 'x; end 4,5: begin // FMV result_s0[i] = dataa_s0[i]; fflags_NV_s0[i] = 'x; end 6,7: begin // MIN/MAX result_s0[i] = fminmax_res_s0[i]; fflags_NV_s0[i] = a_fclass_s0[i].is_signaling | b_fclass_s0[i].is_signaling; end endcase end end // only MIN/MAX and CMP return status flags wire has_fflags_s0 = (op_mod_s0[2:0] >= 6) || op_mod_s0[3]; assign stall = ~ready_out && valid_out; wire fflags_NV; reg fflags_merged; always @(*) begin fflags_merged = 0; for (integer i = 0; i < NUM_LANES; ++i) begin if (lane_mask_s0[i]) begin fflags_merged |= fflags_NV_s0[i]; end end end VX_pipe_register #( .DATAW (1 + TAGW + (NUM_LANES * 32) + 1 + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (!stall), .data_in ({valid_in_s0, tag_in_s0, result_s0, has_fflags_s0, fflags_merged}), .data_out ({valid_out, tag_out, result, has_fflags, fflags_NV}) ); assign ready_in = ~stall; // NV, DZ, OF, UF, NX assign fflags = {fflags_NV, 1'b0, 1'b0, 1'b0, 1'b0}; endmodule `endif