Merge branch 'master' into graphics
This commit is contained in:
28
hw/rtl/fp_cores/VX_fp_class.sv
Normal file
28
hw/rtl/fp_cores/VX_fp_class.sv
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_class # (
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
) (
|
||||
input [EXP_BITS-1:0] exp_i,
|
||||
input [MAN_BITS-1:0] man_i,
|
||||
output fp_class_t clss_o
|
||||
);
|
||||
wire is_normal = (exp_i != '0) && (exp_i != '1);
|
||||
wire is_zero = (exp_i == '0) && (man_i == '0);
|
||||
wire is_subnormal = (exp_i == '0) && (man_i != '0);
|
||||
wire is_inf = (exp_i == '1) && (man_i == '0);
|
||||
wire is_nan = (exp_i == '1) && (man_i != '0);
|
||||
wire is_signaling = is_nan && ~man_i[MAN_BITS-1];
|
||||
wire is_quiet = is_nan && ~is_signaling;
|
||||
|
||||
assign clss_o.is_normal = is_normal;
|
||||
assign clss_o.is_zero = is_zero;
|
||||
assign clss_o.is_subnormal = is_subnormal;
|
||||
assign clss_o.is_inf = is_inf;
|
||||
assign clss_o.is_nan = is_nan;
|
||||
assign clss_o.is_quiet = is_quiet;
|
||||
assign clss_o.is_signaling = is_signaling;
|
||||
|
||||
endmodule
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of cast module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
@@ -15,7 +15,7 @@ module VX_fp_cvt #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire is_itof,
|
||||
input wire is_signed,
|
||||
@@ -59,13 +59,16 @@ module VX_fp_cvt #(
|
||||
|
||||
// Input processing
|
||||
|
||||
fp_type_t [LANES-1:0] in_a_type;
|
||||
fp_class_t [LANES-1:0] fp_clss;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
VX_fp_type fp_type (
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class (
|
||||
.exp_i (dataa[i][30:23]),
|
||||
.man_i (dataa[i][22:0]),
|
||||
.type_o (in_a_type[i])
|
||||
.clss_o (fp_clss[i])
|
||||
);
|
||||
end
|
||||
|
||||
@@ -74,16 +77,18 @@ module VX_fp_cvt #(
|
||||
wire [LANES-1:0] input_sign;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [INT_MAN_WIDTH-1:0] int_mantissa;
|
||||
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||
wire fmt_sign = dataa[i][31];
|
||||
wire int_sign = dataa[i][31] & is_signed;
|
||||
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({fp_clss[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS +: EXP_BITS]} +
|
||||
{1'b0, fp_clss[i].is_subnormal};
|
||||
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
||||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
@@ -93,7 +98,7 @@ module VX_fp_cvt #(
|
||||
wire is_itof_s0;
|
||||
wire unsigned_s0;
|
||||
wire [2:0] rnd_mode_s0;
|
||||
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||
fp_class_t [LANES-1:0] fp_clss_s0;
|
||||
wire [LANES-1:0] input_sign_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
@@ -101,14 +106,14 @@ module VX_fp_cvt #(
|
||||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, fp_clss, input_sign, fmt_exponent, encoded_mant}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
|
||||
);
|
||||
|
||||
// Normalization
|
||||
@@ -119,8 +124,8 @@ module VX_fp_cvt #(
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire mant_is_nonzero;
|
||||
VX_lzc #(
|
||||
.WIDTH (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
.N (INT_MAN_WIDTH),
|
||||
.MODE (1)
|
||||
) lzc (
|
||||
.in_i (encoded_mant_s0[i]),
|
||||
.cnt_o (renorm_shamt_s0[i]),
|
||||
@@ -134,20 +139,12 @@ module VX_fp_cvt #(
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Input mantissa needs to be normalized
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||
{1'b0, in_a_type_s0[i].is_subnormal} +
|
||||
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
|
||||
{1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp = fmt_exponent_s0[i] + (FMT_SHIFT_COMPENSATION - EXP_BIAS) - {1'b0, renorm_shamt_s0[i]};
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
`IGNORE_WARNINGS_END
|
||||
@@ -160,21 +157,21 @@ module VX_fp_cvt #(
|
||||
wire is_itof_s1;
|
||||
wire unsigned_s1;
|
||||
wire [2:0] rnd_mode_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
fp_class_t [LANES-1:0] fp_clss_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0] mant_is_zero_s1;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
|
||||
);
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
@@ -183,39 +180,35 @@ module VX_fp_cvt #(
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp = input_exp_s1[i] + EXP_BIAS;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
final_exp = input_exp_s1[i] + EXP_BIAS; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s1) begin
|
||||
if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed(2**EXP_BITS-1-EXP_BIAS)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-MAN_BITS-EXP_BIAS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp) < $signed(1)) begin
|
||||
denorm_shamt = (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(1-EXP_BIAS)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting
|
||||
denorm_shamt = (1-EXP_BIAS) - input_exp_s1[i]; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin
|
||||
@@ -224,7 +217,7 @@ module VX_fp_cvt #(
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
denorm_shamt = MAX_INT_WIDTH+1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
||||
@@ -245,7 +238,7 @@ module VX_fp_cvt #(
|
||||
wire is_itof_s2;
|
||||
wire unsigned_s2;
|
||||
wire [2:0] rnd_mode_s2;
|
||||
fp_type_t [LANES-1:0] in_a_type_s2;
|
||||
fp_class_t [LANES-1:0] fp_clss_s2;
|
||||
wire [LANES-1:0] mant_is_zero_s2;
|
||||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2;
|
||||
@@ -253,14 +246,14 @@ module VX_fp_cvt #(
|
||||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_class_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
@@ -314,7 +307,7 @@ module VX_fp_cvt #(
|
||||
wire [TAGW-1:0] tag_in_s3;
|
||||
wire is_itof_s3;
|
||||
wire unsigned_s3;
|
||||
fp_type_t [LANES-1:0] in_a_type_s3;
|
||||
fp_class_t [LANES-1:0] fp_clss_s3;
|
||||
wire [LANES-1:0] mant_is_zero_s3;
|
||||
wire [LANES-1:0] input_sign_s3;
|
||||
wire [LANES-1:0] rounded_sign_s3;
|
||||
@@ -322,14 +315,14 @@ module VX_fp_cvt #(
|
||||
wire [LANES-1:0] of_before_round_s3;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg3 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
|
||||
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, fp_clss_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] of_after_round;
|
||||
@@ -362,14 +355,14 @@ module VX_fp_cvt #(
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Detect special case from source format, I2F casts don't produce a special result
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan);
|
||||
assign fp_result_is_special[i] = ~is_itof_s3 & (fp_clss_s3[i].is_zero | fp_clss_s3[i].is_nan);
|
||||
|
||||
// Signalling input NaNs raise invalid flag, otherwise no flags set
|
||||
assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
assign fp_special_status[i] = fp_clss_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
|
||||
|
||||
// Assemble result according to destination format
|
||||
assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
assign fp_special_result[i] = fp_clss_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
|
||||
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
|
||||
end
|
||||
|
||||
// INT Special case handling
|
||||
@@ -381,7 +374,7 @@ module VX_fp_cvt #(
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Assemble result according to destination format
|
||||
always @(*) begin
|
||||
if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin
|
||||
if (input_sign_s3[i] && !fp_clss_s3[i].is_nan) begin
|
||||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31
|
||||
end else begin
|
||||
@@ -391,8 +384,8 @@ module VX_fp_cvt #(
|
||||
end
|
||||
|
||||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special[i] = in_a_type_s3[i].is_nan
|
||||
| in_a_type_s3[i].is_inf
|
||||
assign int_result_is_special[i] = fp_clss_s3[i].is_nan
|
||||
| fp_clss_s3[i].is_inf
|
||||
| of_before_round_s3[i]
|
||||
| (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]);
|
||||
|
||||
@@ -411,11 +404,11 @@ module VX_fp_cvt #(
|
||||
wire [31:0] fp_result, int_result;
|
||||
|
||||
wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
: (| fp_round_sticky_bits[i]) | (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
|
||||
|
||||
assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.OF = ~is_itof_s3 & (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||
assign fp_regular_status.NX = inexact;
|
||||
|
||||
@@ -435,7 +428,7 @@ module VX_fp_cvt #(
|
||||
assign stall = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFLAGS_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
||||
@@ -1,8 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_div #(
|
||||
parameter TAGW = 1,
|
||||
@@ -16,7 +12,7 @@ module VX_fp_div #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
@@ -39,7 +35,7 @@ module VX_fp_div #(
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fdiv (dataa[i], datab[i], frm, r, f);
|
||||
dpi_fdiv (enable && valid_in, dataa[i], datab[i], frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_fma #(
|
||||
parameter TAGW = 1,
|
||||
@@ -16,7 +12,7 @@ module VX_fp_fma #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire do_madd,
|
||||
input wire do_sub,
|
||||
@@ -68,7 +64,7 @@ module VX_fp_fma #(
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fmadd (a, b, c, frm, r, f);
|
||||
dpi_fmadd (enable && valid_in, a, b, c, frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of noncomp module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
@@ -15,8 +15,8 @@ module VX_fp_ncomp #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
@@ -30,6 +30,9 @@ module VX_fp_ncomp #(
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam EXP_BITS = 8;
|
||||
localparam MAN_BITS = 23;
|
||||
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
NEG_SUBNORM = 32'h00000004,
|
||||
@@ -38,86 +41,92 @@ module VX_fp_ncomp #(
|
||||
POS_SUBNORM = 32'h00000020,
|
||||
POS_NORM = 32'h00000040,
|
||||
POS_INF = 32'h00000080,
|
||||
SIG_NAN = 32'h00000100,
|
||||
//SIG_NAN = 32'h00000100,
|
||||
QUT_NAN = 32'h00000200;
|
||||
|
||||
wire [LANES-1:0] tmp_a_sign, tmp_b_sign;
|
||||
wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent;
|
||||
wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa;
|
||||
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
|
||||
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
|
||||
wire [LANES-1:0] a_sign, b_sign;
|
||||
wire [LANES-1:0][7:0] a_exponent, b_exponent;
|
||||
wire [LANES-1:0][22:0] a_mantissa, b_mantissa;
|
||||
fp_class_t [LANES-1:0] a_clss, b_clss;
|
||||
wire [LANES-1:0] a_smaller, ab_equal;
|
||||
|
||||
// Setup
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign tmp_a_sign[i] = dataa[i][31];
|
||||
assign tmp_a_exponent[i] = dataa[i][30:23];
|
||||
assign tmp_a_mantissa[i] = dataa[i][22:0];
|
||||
assign a_sign[i] = dataa[i][31];
|
||||
assign a_exponent[i] = dataa[i][30:23];
|
||||
assign a_mantissa[i] = dataa[i][22:0];
|
||||
|
||||
assign tmp_b_sign[i] = datab[i][31];
|
||||
assign tmp_b_exponent[i] = datab[i][30:23];
|
||||
assign tmp_b_mantissa[i] = datab[i][22:0];
|
||||
assign b_sign[i] = datab[i][31];
|
||||
assign b_exponent[i] = datab[i][30:23];
|
||||
assign b_mantissa[i] = datab[i][22:0];
|
||||
|
||||
VX_fp_type fp_type_a (
|
||||
.exp_i (tmp_a_exponent[i]),
|
||||
.man_i (tmp_a_mantissa[i]),
|
||||
.type_o (tmp_a_type[i])
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_a (
|
||||
.exp_i (a_exponent[i]),
|
||||
.man_i (a_mantissa[i]),
|
||||
.clss_o (a_clss[i])
|
||||
);
|
||||
|
||||
VX_fp_type fp_type_b (
|
||||
.exp_i (tmp_b_exponent[i]),
|
||||
.man_i (tmp_b_mantissa[i]),
|
||||
.type_o (tmp_b_type[i])
|
||||
VX_fp_class #(
|
||||
.EXP_BITS (EXP_BITS),
|
||||
.MAN_BITS (MAN_BITS)
|
||||
) fp_class_b (
|
||||
.exp_i (b_exponent[i]),
|
||||
.man_i (b_mantissa[i]),
|
||||
.clss_o (b_clss[i])
|
||||
);
|
||||
|
||||
assign tmp_a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
|
||||
assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero);
|
||||
assign a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
|
||||
assign ab_equal[i] = (dataa[i] == datab[i]) | (a_clss[i].is_zero & b_clss[i].is_zero);
|
||||
end
|
||||
|
||||
// Pipeline stage0
|
||||
|
||||
wire valid_in_s0;
|
||||
wire [TAGW-1:0] tag_in_s0;
|
||||
wire [`FPU_BITS-1:0] op_type_s0;
|
||||
wire [`FRM_BITS-1:0] frm_s0;
|
||||
wire [`INST_FPU_BITS-1:0] op_type_s0;
|
||||
wire [`INST_FRM_BITS-1:0] frm_s0;
|
||||
wire [LANES-1:0][31:0] dataa_s0, datab_s0;
|
||||
wire [LANES-1:0] a_sign_s0, b_sign_s0;
|
||||
wire [LANES-1:0][7:0] a_exponent_s0;
|
||||
wire [LANES-1:0][22:0] a_mantissa_s0;
|
||||
fp_type_t [LANES-1:0] a_type_s0, b_type_s0;
|
||||
fp_class_t [LANES-1:0] a_clss_s0, b_clss_s0;
|
||||
wire [LANES-1:0] a_smaller_s0, ab_equal_s0;
|
||||
|
||||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
||||
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)),
|
||||
.RESETW (1),
|
||||
.DEPTH (0)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0})
|
||||
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_clss, b_clss, a_smaller, ab_equal}),
|
||||
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_clss_s0, b_clss_s0, a_smaller_s0, ab_equal_s0})
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_normal) begin
|
||||
if (a_clss_s0[i].is_normal) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
|
||||
end
|
||||
else if (a_type_s0[i].is_inf) begin
|
||||
else if (a_clss_s0[i].is_inf) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
|
||||
end
|
||||
else if (a_type_s0[i].is_zero) begin
|
||||
else if (a_clss_s0[i].is_zero) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
|
||||
end
|
||||
else if (a_type_s0[i].is_subnormal) begin
|
||||
else if (a_clss_s0[i].is_subnormal) begin
|
||||
fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
|
||||
end
|
||||
else if (a_type_s0[i].is_nan) begin
|
||||
fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0};
|
||||
else if (a_clss_s0[i].is_nan) begin
|
||||
fclass_mask[i] = {22'h0, a_clss_s0[i].is_quiet, a_clss_s0[i].is_signaling, 8'h0};
|
||||
end
|
||||
else begin
|
||||
fclass_mask[i] = QUT_NAN;
|
||||
@@ -129,11 +138,11 @@ module VX_fp_ncomp #(
|
||||
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
|
||||
if (a_clss_s0[i].is_nan && b_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
else if (a_type_s0[i].is_nan)
|
||||
else if (a_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = datab_s0[i];
|
||||
else if (b_type_s0[i].is_nan)
|
||||
else if (b_clss_s0[i].is_nan)
|
||||
fminmax_res[i] = dataa_s0[i];
|
||||
else begin
|
||||
case (frm_s0) // use LSB to distinguish MIN and MAX
|
||||
@@ -160,33 +169,33 @@ module VX_fp_ncomp #(
|
||||
|
||||
// Comparison
|
||||
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
|
||||
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
|
||||
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm_s0)
|
||||
`FRM_RNE: begin // LE
|
||||
`INST_FRM_RNE: begin // LE
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = 1'b1;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
|
||||
end
|
||||
end
|
||||
`FRM_RTZ: begin // LS
|
||||
`INST_FRM_RTZ: begin // LS
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = 1'b1;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
|
||||
end
|
||||
end
|
||||
`FRM_RDN: begin // EQ
|
||||
`INST_FRM_RDN: begin // EQ
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
|
||||
fcmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
|
||||
end else begin
|
||||
fcmp_res[i] = {31'h0, ab_equal_s0[i]};
|
||||
end
|
||||
@@ -207,11 +216,11 @@ module VX_fp_ncomp #(
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (op_type_s0)
|
||||
`FPU_CLASS: begin
|
||||
`INST_FPU_CLASS: begin
|
||||
tmp_result[i] = fclass_mask[i];
|
||||
tmp_fflags[i] = 'x;
|
||||
end
|
||||
`FPU_CMP: begin
|
||||
`INST_FPU_CMP: begin
|
||||
tmp_result[i] = fcmp_res[i];
|
||||
tmp_fflags[i] = fcmp_fflags[i];
|
||||
end
|
||||
@@ -225,11 +234,11 @@ module VX_fp_ncomp #(
|
||||
3,4: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
tmp_fflags[i] = 0;
|
||||
tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
|
||||
tmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
|
||||
end
|
||||
//5,6,7: MOVE
|
||||
default: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
tmp_result[i] = dataa_s0[i];
|
||||
tmp_fflags[i] = 'x;
|
||||
end
|
||||
endcase
|
||||
@@ -238,15 +247,15 @@ module VX_fp_ncomp #(
|
||||
end
|
||||
end
|
||||
|
||||
wire has_fflags_s0 = ((op_type_s0 == `FPU_MISC)
|
||||
&& (frm_s0 == 3 // MIN
|
||||
|| frm_s0 == 4)) // MAX
|
||||
|| (op_type_s0 == `FPU_CMP); // CMP
|
||||
wire has_fflags_s0 = ((op_type_s0 == `INST_FPU_MISC)
|
||||
&& (frm_s0 == 3 // MIN
|
||||
|| frm_s0 == 4)) // MAX
|
||||
|| (op_type_s0 == `INST_FPU_CMP); // CMP
|
||||
|
||||
assign stall = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
|
||||
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFLAGS_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
@@ -1,5 +1,4 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of rouding module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
@@ -34,7 +33,7 @@ module VX_fp_rounding #(
|
||||
|
||||
always @(*) begin
|
||||
case (rnd_mode_i)
|
||||
`FRM_RNE: // Decide accoring to round/sticky bits
|
||||
`INST_FRM_RNE: // Decide accoring to round/sticky bits
|
||||
case (round_sticky_bits_i)
|
||||
2'b00,
|
||||
2'b01: round_up = 1'b0; // < ulp/2 away, round down
|
||||
@@ -42,10 +41,10 @@ module VX_fp_rounding #(
|
||||
2'b11: round_up = 1'b1; // > ulp/2 away, round up
|
||||
default: round_up = 1'bx;
|
||||
endcase
|
||||
`FRM_RTZ: round_up = 1'b0; // always round down
|
||||
`FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
|
||||
`FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
|
||||
`FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
`INST_FRM_RTZ: round_up = 1'b0; // always round down
|
||||
`INST_FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
|
||||
`INST_FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
|
||||
`INST_FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
default: round_up = 1'bx; // propagate x
|
||||
endcase
|
||||
end
|
||||
@@ -58,7 +57,7 @@ module VX_fp_rounding #(
|
||||
|
||||
// In case of effective subtraction (thus signs of addition operands must have differed) and a
|
||||
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `INST_FRM_RDN)
|
||||
: sign_i;
|
||||
|
||||
endmodule
|
||||
@@ -1,8 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_sqrt #(
|
||||
parameter TAGW = 1,
|
||||
@@ -16,7 +12,7 @@ module VX_fp_sqrt #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
@@ -38,7 +34,7 @@ module VX_fp_sqrt #(
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fsqrt (dataa[i], frm, r, f);
|
||||
dpi_fsqrt (enable && valid_in, dataa[i], frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_type (
|
||||
// inputs
|
||||
input [7:0] exp_i,
|
||||
input [22:0] man_i,
|
||||
// outputs
|
||||
output fp_type_t type_o
|
||||
);
|
||||
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
||||
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
||||
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
|
||||
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
||||
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
||||
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
||||
wire is_quiet = is_nan && !is_signaling;
|
||||
|
||||
assign type_o.is_normal = is_normal;
|
||||
assign type_o.is_zero = is_zero;
|
||||
assign type_o.is_subnormal = is_subnormal;
|
||||
assign type_o.is_inf = is_inf;
|
||||
assign type_o.is_nan = is_nan;
|
||||
assign type_o.is_quiet = is_quiet;
|
||||
assign type_o.is_signaling = is_signaling;
|
||||
|
||||
endmodule
|
||||
14
hw/rtl/fp_cores/VX_fpu_define.vh
Normal file
14
hw/rtl/fp_cores/VX_fpu_define.vh
Normal file
@@ -0,0 +1,14 @@
|
||||
`ifndef VX_FPU_DEFINE
|
||||
`define VX_FPU_DEFINE
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
import fpu_types::*;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
`endif
|
||||
@@ -1,7 +1,4 @@
|
||||
`ifndef SYNTHESIS
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "float_dpi.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_dpi #(
|
||||
parameter TAGW = 1
|
||||
@@ -14,8 +11,8 @@ module VX_fpu_dpi #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
@@ -76,21 +73,21 @@ module VX_fpu_dpi #(
|
||||
is_fsgnjx = 0;
|
||||
|
||||
case (op_type)
|
||||
`FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
|
||||
`FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
|
||||
`FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
|
||||
`FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
|
||||
`FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
|
||||
`FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
|
||||
`FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
|
||||
`FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
|
||||
`FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
|
||||
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
|
||||
`FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
|
||||
`FPU_CMP: begin core_select = FPU_NCP;
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
|
||||
`INST_FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
|
||||
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`INST_FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
|
||||
`INST_FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
|
||||
`INST_FPU_CMP: begin core_select = FPU_NCP;
|
||||
is_fle = (frm == 0);
|
||||
is_flt = (frm == 1);
|
||||
is_feq = (frm == 2);
|
||||
@@ -126,15 +123,20 @@ module VX_fpu_dpi #(
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_fnmadd;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_fnmsub;
|
||||
|
||||
wire fma_valid = (valid_in && core_select == FPU_FMA);
|
||||
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
|
||||
|
||||
wire fma_fire = fma_valid && fma_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
dpi_fadd (dataa[i], datab[i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (dataa[i], datab[i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
dpi_fmul (dataa[i], datab[i], frm, result_fmul[i], fflags_fmul[i]);
|
||||
dpi_fmadd (dataa[i], datab[i], datac[i], frm, result_fmadd[i], fflags_fmadd[i]);
|
||||
dpi_fmsub (dataa[i], datab[i], datac[i], frm, result_fmsub[i], fflags_fmsub[i]);
|
||||
dpi_fnmadd (dataa[i], datab[i], datac[i], frm, result_fnmadd[i], fflags_fnmadd[i]);
|
||||
dpi_fnmsub (dataa[i], datab[i], datac[i], frm, result_fnmsub[i], fflags_fnmsub[i]);
|
||||
dpi_fadd (fma_fire, dataa[i], datab[i], frm, result_fadd[i], fflags_fadd[i]);
|
||||
dpi_fsub (fma_fire, dataa[i], datab[i], frm, result_fsub[i], fflags_fsub[i]);
|
||||
dpi_fmul (fma_fire, dataa[i], datab[i], frm, result_fmul[i], fflags_fmul[i]);
|
||||
dpi_fmadd (fma_fire, dataa[i], datab[i], datac[i], frm, result_fmadd[i], fflags_fmadd[i]);
|
||||
dpi_fmsub (fma_fire, dataa[i], datab[i], datac[i], frm, result_fmsub[i], fflags_fmsub[i]);
|
||||
dpi_fnmadd (fma_fire, dataa[i], datab[i], datac[i], frm, result_fnmadd[i], fflags_fnmadd[i]);
|
||||
dpi_fnmsub (fma_fire, dataa[i], datab[i], datac[i], frm, result_fnmsub[i], fflags_fnmsub[i]);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -154,10 +156,7 @@ module VX_fpu_dpi #(
|
||||
is_fmsub ? fflags_fmsub :
|
||||
is_fnmadd ? fflags_fnmadd :
|
||||
is_fnmsub ? fflags_fnmsub :
|
||||
0;
|
||||
|
||||
wire enable = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
|
||||
wire valid = (valid_in && core_select == FPU_FMA);
|
||||
0;
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
|
||||
@@ -166,13 +165,13 @@ module VX_fpu_dpi #(
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid, tag_in, result_fma, fflags_fma}),
|
||||
.enable (fma_ready),
|
||||
.data_in ({fma_valid, tag_in, result_fma, fflags_fma}),
|
||||
.data_out ({per_core_valid_out[FPU_FMA], per_core_tag_out[FPU_FMA], per_core_result[FPU_FMA], per_core_fflags[FPU_FMA]})
|
||||
);
|
||||
|
||||
assign per_core_has_fflags[FPU_FMA] = 1;
|
||||
assign per_core_ready_in[FPU_FMA] = enable;
|
||||
assign per_core_ready_in[FPU_FMA] = fma_ready;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
@@ -182,16 +181,18 @@ module VX_fpu_dpi #(
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result_fdiv;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_fdiv;
|
||||
|
||||
wire fdiv_valid = (valid_in && core_select == FPU_DIV);
|
||||
wire fdiv_ready = per_core_ready_out[FPU_DIV] || ~per_core_valid_out[FPU_DIV];
|
||||
|
||||
wire fdiv_fire = fdiv_valid && fdiv_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
dpi_fdiv (dataa[i], datab[i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
dpi_fdiv (fdiv_fire, dataa[i], datab[i], frm, result_fdiv[i], fflags_fdiv[i]);
|
||||
end
|
||||
end
|
||||
|
||||
wire enable = per_core_ready_out[FPU_DIV] || ~per_core_valid_out[FPU_DIV];
|
||||
wire valid = (valid_in && core_select == FPU_DIV);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
|
||||
.DEPTH (`LATENCY_FDIV),
|
||||
@@ -199,13 +200,13 @@ module VX_fpu_dpi #(
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid, tag_in, result_fdiv, fflags_fdiv}),
|
||||
.enable (fdiv_ready),
|
||||
.data_in ({fdiv_valid, tag_in, result_fdiv, fflags_fdiv}),
|
||||
.data_out ({per_core_valid_out[FPU_DIV], per_core_tag_out[FPU_DIV], per_core_result[FPU_DIV], per_core_fflags[FPU_DIV]})
|
||||
);
|
||||
|
||||
assign per_core_has_fflags[FPU_DIV] = 1;
|
||||
assign per_core_ready_in[FPU_DIV] = enable;
|
||||
assign per_core_ready_in[FPU_DIV] = fdiv_ready;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
@@ -215,16 +216,18 @@ module VX_fpu_dpi #(
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result_fsqrt;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_fsqrt;
|
||||
|
||||
wire fsqrt_valid = (valid_in && core_select == FPU_SQRT);
|
||||
wire fsqrt_ready = per_core_ready_out[FPU_SQRT] || ~per_core_valid_out[FPU_SQRT];
|
||||
|
||||
wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
dpi_fsqrt (dataa[i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
dpi_fsqrt (fsqrt_fire, dataa[i], frm, result_fsqrt[i], fflags_fsqrt[i]);
|
||||
end
|
||||
end
|
||||
|
||||
wire enable = per_core_ready_out[FPU_SQRT] || ~per_core_valid_out[FPU_SQRT];
|
||||
wire valid = (valid_in && core_select == FPU_SQRT);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
|
||||
.DEPTH (`LATENCY_FSQRT),
|
||||
@@ -232,13 +235,13 @@ module VX_fpu_dpi #(
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid, tag_in, result_fsqrt, fflags_fsqrt}),
|
||||
.enable (fsqrt_ready),
|
||||
.data_in ({fsqrt_valid, tag_in, result_fsqrt, fflags_fsqrt}),
|
||||
.data_out ({per_core_valid_out[FPU_SQRT], per_core_tag_out[FPU_SQRT], per_core_result[FPU_SQRT], per_core_fflags[FPU_SQRT]})
|
||||
);
|
||||
|
||||
assign per_core_has_fflags[FPU_SQRT] = 1;
|
||||
assign per_core_ready_in[FPU_SQRT] = enable;
|
||||
assign per_core_ready_in[FPU_SQRT] = fsqrt_ready;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
@@ -257,13 +260,18 @@ module VX_fpu_dpi #(
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_utof;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_ftoi;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_ftou;
|
||||
|
||||
|
||||
wire fcvt_valid = (valid_in && core_select == FPU_CVT);
|
||||
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
|
||||
|
||||
wire fcvt_fire = fcvt_valid && fcvt_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
dpi_itof (dataa[i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (dataa[i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (dataa[i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (dataa[i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
dpi_itof (fcvt_fire, dataa[i], frm, result_itof[i], fflags_itof[i]);
|
||||
dpi_utof (fcvt_fire, dataa[i], frm, result_utof[i], fflags_utof[i]);
|
||||
dpi_ftoi (fcvt_fire, dataa[i], frm, result_ftoi[i], fflags_ftoi[i]);
|
||||
dpi_ftou (fcvt_fire, dataa[i], frm, result_ftou[i], fflags_ftou[i]);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -279,9 +287,6 @@ module VX_fpu_dpi #(
|
||||
is_ftou ? fflags_ftou :
|
||||
0;
|
||||
|
||||
wire enable = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
|
||||
wire valid = (valid_in && core_select == FPU_CVT);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
|
||||
.DEPTH (`LATENCY_FCVT),
|
||||
@@ -289,13 +294,13 @@ module VX_fpu_dpi #(
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid, tag_in, result_fcvt, fflags_fcvt}),
|
||||
.enable (fcvt_ready),
|
||||
.data_in ({fcvt_valid, tag_in, result_fcvt, fflags_fcvt}),
|
||||
.data_out ({per_core_valid_out[FPU_CVT], per_core_tag_out[FPU_CVT], per_core_result[FPU_CVT], per_core_fflags[FPU_CVT]})
|
||||
);
|
||||
|
||||
assign per_core_has_fflags[FPU_CVT] = 1;
|
||||
assign per_core_ready_in[FPU_CVT] = enable;
|
||||
assign per_core_ready_in[FPU_CVT] = fcvt_ready;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
@@ -321,18 +326,23 @@ module VX_fpu_dpi #(
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_feq;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_fmin;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags_fmax;
|
||||
|
||||
|
||||
wire fncp_valid = (valid_in && core_select == FPU_NCP);
|
||||
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
|
||||
|
||||
wire fncp_fire = fncp_valid && fncp_ready;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
dpi_fclss (dataa[i], result_fclss[i]);
|
||||
dpi_flt (dataa[i], datab[i], result_flt[i], fflags_flt[i]);
|
||||
dpi_fle (dataa[i], datab[i], result_fle[i], fflags_fle[i]);
|
||||
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
|
||||
dpi_fclss (fncp_fire, dataa[i], result_fclss[i]);
|
||||
dpi_flt (fncp_fire, dataa[i], datab[i], result_flt[i], fflags_flt[i]);
|
||||
dpi_fle (fncp_fire, dataa[i], datab[i], result_fle[i], fflags_fle[i]);
|
||||
dpi_feq (fncp_fire, dataa[i], datab[i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (fncp_fire, dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (fncp_fire, dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (fncp_fire, dataa[i], datab[i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (fncp_fire, dataa[i], datab[i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (fncp_fire, dataa[i], datab[i], result_fsgnjx[i]);
|
||||
result_fmv[i] = dataa[i];
|
||||
end
|
||||
end
|
||||
@@ -357,9 +367,6 @@ module VX_fpu_dpi #(
|
||||
is_fmax ? fflags_fmax :
|
||||
0;
|
||||
|
||||
wire enable = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
|
||||
wire valid = (valid_in && core_select == FPU_NCP);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW + 1 + `NUM_THREADS * (32 + $bits(fflags_t))),
|
||||
.DEPTH (`LATENCY_FNCP),
|
||||
@@ -367,12 +374,12 @@ module VX_fpu_dpi #(
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid, tag_in, has_fflags_fncp, result_fncp, fflags_fncp}),
|
||||
.enable (fncp_ready),
|
||||
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_fncp}),
|
||||
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
|
||||
);
|
||||
|
||||
assign per_core_ready_in[FPU_NCP] = enable;
|
||||
assign per_core_ready_in[FPU_NCP] = fncp_ready;
|
||||
|
||||
end
|
||||
endgenerate
|
||||
@@ -410,6 +417,4 @@ module VX_fpu_dpi #(
|
||||
|
||||
assign ready_in = per_core_ready_in[core_select];
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
endmodule
|
||||
@@ -1,7 +1,7 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_fpga #(
|
||||
parameter TAGW = 1
|
||||
parameter TAGW = 4
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -11,8 +11,8 @@ module VX_fpu_fpga #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
@@ -54,19 +54,19 @@ module VX_fpu_fpga #(
|
||||
is_itof = 0;
|
||||
is_signed = 0;
|
||||
case (op_type)
|
||||
`FPU_ADD: begin core_select = FPU_FMA; end
|
||||
`FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
|
||||
`FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
|
||||
`FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
|
||||
`FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
|
||||
`FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
|
||||
`FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
|
||||
`FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
|
||||
`FPU_CVTWUS: begin core_select = FPU_CVT; end
|
||||
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
|
||||
`FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
|
||||
`INST_FPU_CVTWUS: begin core_select = FPU_CVT; end
|
||||
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
|
||||
`INST_FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
default: begin core_select = FPU_NCP; end
|
||||
endcase
|
||||
end
|
||||
@@ -1,4 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
@@ -18,8 +18,8 @@ module VX_fpu_fpnew #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
@@ -80,7 +80,7 @@ module VX_fpu_fpnew #(
|
||||
fpnew_pkg::status_t [`NUM_THREADS-1:0] fpu_status;
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg [`INST_FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
reg fpu_has_fflags, fpu_has_fflags_out;
|
||||
|
||||
@@ -94,38 +94,38 @@ module VX_fpu_fpnew #(
|
||||
fpu_operands[2] = datac;
|
||||
|
||||
case (op_type)
|
||||
`FPU_ADD: begin
|
||||
`INST_FPU_ADD: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
end
|
||||
`FPU_SUB: begin
|
||||
`INST_FPU_SUB: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
fpu_op_mod = 1;
|
||||
end
|
||||
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
|
||||
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||
`FPU_MISC: begin
|
||||
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`INST_FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||
`INST_FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||
`INST_FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||
`INST_FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`INST_FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
|
||||
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||
`INST_FPU_MISC: begin
|
||||
case (frm)
|
||||
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
|
||||
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
|
||||
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
|
||||
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
|
||||
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RNE; fpu_has_fflags = 0; end
|
||||
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RTZ; fpu_has_fflags = 0; end
|
||||
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RDN; fpu_has_fflags = 0; end
|
||||
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RNE; end
|
||||
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RTZ; end
|
||||
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_has_fflags = 0; end
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
32
hw/rtl/fp_cores/VX_fpu_types.vh
Normal file
32
hw/rtl/fp_cores/VX_fpu_types.vh
Normal file
@@ -0,0 +1,32 @@
|
||||
`ifndef VX_FPU_TYPES
|
||||
`define VX_FPU_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package fpu_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_quiet;
|
||||
logic is_signaling;
|
||||
} fp_class_t;
|
||||
|
||||
`define FP_CLASS_BITS $bits(fpu_types::fp_class_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // 4-Invalid
|
||||
logic DZ; // 3-Divide by zero
|
||||
logic OF; // 2-Overflow
|
||||
logic UF; // 1-Underflow
|
||||
logic NX; // 0-Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFLAGS_BITS $bits(fpu_types::fflags_t)
|
||||
|
||||
endpackage
|
||||
|
||||
`endif
|
||||
Reference in New Issue
Block a user