Files
vortex/hw/rtl/fpu/VX_fpu_ncomp.sv
Blaise Tine d47cccc157 Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
2023-10-19 20:51:22 -07:00

293 lines
10 KiB
Systemverilog

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
/// Modified port of noncomp module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
module VX_fpu_ncomp import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter TAGW = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [NUM_LANES-1:0] lane_mask,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
localparam EXP_BITS = 8;
localparam MAN_BITS = 23;
localparam NEG_INF = 32'h00000001,
NEG_NORM = 32'h00000002,
NEG_SUBNORM = 32'h00000004,
NEG_ZERO = 32'h00000008,
POS_ZERO = 32'h00000010,
POS_SUBNORM = 32'h00000020,
POS_NORM = 32'h00000040,
POS_INF = 32'h00000080,
//SIG_NAN = 32'h00000100,
QUT_NAN = 32'h00000200;
wire [NUM_LANES-1:0] a_sign, b_sign;
wire [NUM_LANES-1:0][7:0] a_exponent, b_exponent;
wire [NUM_LANES-1:0][22:0] a_mantissa, b_mantissa;
fclass_t [NUM_LANES-1:0] a_fclass, b_fclass;
wire [NUM_LANES-1:0] a_smaller, ab_equal;
// Setup
for (genvar i = 0; i < NUM_LANES; ++i) begin
assign a_sign[i] = dataa[i][31];
assign a_exponent[i] = dataa[i][30:23];
assign a_mantissa[i] = dataa[i][22:0];
assign b_sign[i] = datab[i][31];
assign b_exponent[i] = datab[i][30:23];
assign b_mantissa[i] = datab[i][22:0];
VX_fpu_class #(
.EXP_BITS (EXP_BITS),
.MAN_BITS (MAN_BITS)
) fp_class_a (
.exp_i (a_exponent[i]),
.man_i (a_mantissa[i]),
.clss_o (a_fclass[i])
);
VX_fpu_class #(
.EXP_BITS (EXP_BITS),
.MAN_BITS (MAN_BITS)
) fp_class_b (
.exp_i (b_exponent[i]),
.man_i (b_mantissa[i]),
.clss_o (b_fclass[i])
);
assign a_smaller[i] = (dataa[i] < datab[i]) ^ (a_sign[i] || b_sign[i]);
assign ab_equal[i] = (dataa[i] == datab[i])
|| (a_fclass[i].is_zero && b_fclass[i].is_zero); // +0 == -0
end
// Pipeline stage0
wire valid_in_s0;
wire [NUM_LANES-1:0] lane_mask_s0;
wire [TAGW-1:0] tag_in_s0;
wire [3:0] op_mod_s0;
wire [NUM_LANES-1:0][31:0] dataa_s0, datab_s0;
wire [NUM_LANES-1:0] a_sign_s0, b_sign_s0;
wire [NUM_LANES-1:0][7:0] a_exponent_s0;
wire [NUM_LANES-1:0][22:0] a_mantissa_s0;
fclass_t [NUM_LANES-1:0] a_fclass_s0, b_fclass_s0;
wire [NUM_LANES-1:0] a_smaller_s0, ab_equal_s0;
wire stall;
wire [3:0] op_mod = {(op_type == `INST_FPU_CMP), frm};
VX_pipe_register #(
.DATAW (1 + NUM_LANES + TAGW + 4 + NUM_LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in, lane_mask, tag_in, op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}),
.data_out ({valid_in_s0, lane_mask_s0, tag_in_s0, op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0})
);
// FCLASS
reg [NUM_LANES-1:0][31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
if (a_fclass_s0[i].is_normal) begin
fclass_mask_s0[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
end
else if (a_fclass_s0[i].is_inf) begin
fclass_mask_s0[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
end
else if (a_fclass_s0[i].is_zero) begin
fclass_mask_s0[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
end
else if (a_fclass_s0[i].is_subnormal) begin
fclass_mask_s0[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
end
else if (a_fclass_s0[i].is_nan) begin
fclass_mask_s0[i] = {22'h0, a_fclass_s0[i].is_quiet, a_fclass_s0[i].is_signaling, 8'h0};
end
else begin
fclass_mask_s0[i] = QUT_NAN;
end
end
end
// Min/Max
reg [NUM_LANES-1:0][31:0] fminmax_res_s0;
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
if (a_fclass_s0[i].is_nan && b_fclass_s0[i].is_nan)
fminmax_res_s0[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
else if (a_fclass_s0[i].is_nan)
fminmax_res_s0[i] = datab_s0[i];
else if (b_fclass_s0[i].is_nan)
fminmax_res_s0[i] = dataa_s0[i];
else begin
// FMIN, FMAX
fminmax_res_s0[i] = (op_mod_s0[0] ^ a_smaller_s0[i]) ? dataa_s0[i] : datab_s0[i];
end
end
end
// Sign injection
reg [NUM_LANES-1:0][31:0] fsgnj_res_s0; // result of sign injection
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
case (op_mod_s0[1:0])
0: fsgnj_res_s0[i] = { b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
1: fsgnj_res_s0[i] = {~b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
default: fsgnj_res_s0[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
endcase
end
end
// Comparison
reg [NUM_LANES-1:0] fcmp_res_s0; // result of comparison
reg [NUM_LANES-1:0] fcmp_fflags_NV_s0; // comparison fflags
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
case (op_mod_s0[1:0])
0: begin // LE
if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin
fcmp_res_s0[i] = 0;
fcmp_fflags_NV_s0[i] = 1;
end else begin
fcmp_res_s0[i] = (a_smaller_s0[i] | ab_equal_s0[i]);
fcmp_fflags_NV_s0[i] = 0;
end
end
1: begin // LT
if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin
fcmp_res_s0[i] = 0;
fcmp_fflags_NV_s0[i] = 1;
end else begin
fcmp_res_s0[i] = (a_smaller_s0[i] & ~ab_equal_s0[i]);
fcmp_fflags_NV_s0[i] = 0;
end
end
2: begin // EQ
if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin
fcmp_res_s0[i] = 0;
fcmp_fflags_NV_s0[i] = a_fclass_s0[i].is_signaling | b_fclass_s0[i].is_signaling;
end else begin
fcmp_res_s0[i] = ab_equal_s0[i];
fcmp_fflags_NV_s0[i] = 0;
end
end
default: begin
fcmp_res_s0[i] = 'x;
fcmp_fflags_NV_s0[i] = 'x;
end
endcase
end
end
// outputs
reg [NUM_LANES-1:0][31:0] result_s0;
reg [NUM_LANES-1:0] fflags_NV_s0;
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
case (op_mod_s0[2:0])
0,1,2: begin
// SGNJ, CMP
result_s0[i] = op_mod_s0[3] ? 32'(fcmp_res_s0[i]) : fsgnj_res_s0[i];
fflags_NV_s0[i] = fcmp_fflags_NV_s0[i];
end
3: begin
// CLASS
result_s0[i] = fclass_mask_s0[i];
fflags_NV_s0[i] = 'x;
end
4,5: begin
// FMV
result_s0[i] = dataa_s0[i];
fflags_NV_s0[i] = 'x;
end
6,7: begin
// MIN/MAX
result_s0[i] = fminmax_res_s0[i];
fflags_NV_s0[i] = a_fclass_s0[i].is_signaling | b_fclass_s0[i].is_signaling;
end
endcase
end
end
// only MIN/MAX and CMP return status flags
wire has_fflags_s0 = (op_mod_s0[2:0] >= 6) || op_mod_s0[3];
assign stall = ~ready_out && valid_out;
wire fflags_NV;
reg fflags_merged;
always @(*) begin
fflags_merged = 0;
for (integer i = 0; i < NUM_LANES; ++i) begin
if (lane_mask_s0[i]) begin
fflags_merged |= fflags_NV_s0[i];
end
end
end
VX_pipe_register #(
.DATAW (1 + TAGW + (NUM_LANES * 32) + 1 + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in_s0, tag_in_s0, result_s0, has_fflags_s0, fflags_merged}),
.data_out ({valid_out, tag_out, result, has_fflags, fflags_NV})
);
assign ready_in = ~stall;
// NV, DZ, OF, UF, NX
assign fflags = {fflags_NV, 1'b0, 1'b0, 1'b0, 1'b0};
endmodule
`endif