Merge branch 'master' into graphics

This commit is contained in:
Blaise Tine
2021-10-15 19:32:11 -07:00
542 changed files with 124552 additions and 18682 deletions

View File

@@ -0,0 +1,28 @@
`include "VX_fpu_define.vh"
module VX_fp_class # (
parameter MAN_BITS = 23,
parameter EXP_BITS = 8
) (
input [EXP_BITS-1:0] exp_i,
input [MAN_BITS-1:0] man_i,
output fp_class_t clss_o
);
wire is_normal = (exp_i != '0) && (exp_i != '1);
wire is_zero = (exp_i == '0) && (man_i == '0);
wire is_subnormal = (exp_i == '0) && (man_i != '0);
wire is_inf = (exp_i == '1) && (man_i == '0);
wire is_nan = (exp_i == '1) && (man_i != '0);
wire is_signaling = is_nan && ~man_i[MAN_BITS-1];
wire is_quiet = is_nan && ~is_signaling;
assign clss_o.is_normal = is_normal;
assign clss_o.is_zero = is_zero;
assign clss_o.is_subnormal = is_subnormal;
assign clss_o.is_inf = is_inf;
assign clss_o.is_nan = is_nan;
assign clss_o.is_quiet = is_quiet;
assign clss_o.is_signaling = is_signaling;
endmodule

View File

@@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_fpu_define.vh"
/// Modified port of cast module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
@@ -15,7 +15,7 @@ module VX_fp_cvt #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire is_itof,
input wire is_signed,
@@ -59,13 +59,16 @@ module VX_fp_cvt #(
// Input processing
fp_type_t [LANES-1:0] in_a_type;
fp_class_t [LANES-1:0] fp_clss;
for (genvar i = 0; i < LANES; ++i) begin
VX_fp_type fp_type (
VX_fp_class #(
.EXP_BITS (EXP_BITS),
.MAN_BITS (MAN_BITS)
) fp_class (
.exp_i (dataa[i][30:23]),
.man_i (dataa[i][22:0]),
.type_o (in_a_type[i])
.clss_o (fp_clss[i])
);
end
@@ -74,16 +77,18 @@ module VX_fp_cvt #(
wire [LANES-1:0] input_sign;
for (genvar i = 0; i < LANES; ++i) begin
`IGNORE_WARNINGS_BEGIN
wire [INT_MAN_WIDTH-1:0] int_mantissa;
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
wire fmt_sign = dataa[i][31];
wire int_sign = dataa[i][31] & is_signed;
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
assign fmt_mantissa = INT_MAN_WIDTH'({fp_clss[i].is_normal, dataa[i][MAN_BITS-1:0]});
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS +: EXP_BITS]} +
{1'b0, fp_clss[i].is_subnormal};
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
`IGNORE_WARNINGS_END
end
// Pipeline stage0
@@ -93,7 +98,7 @@ module VX_fp_cvt #(
wire is_itof_s0;
wire unsigned_s0;
wire [2:0] rnd_mode_s0;
fp_type_t [LANES-1:0] in_a_type_s0;
fp_class_t [LANES-1:0] fp_clss_s0;
wire [LANES-1:0] input_sign_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
@@ -101,14 +106,14 @@ module VX_fp_cvt #(
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}),
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, fp_clss, input_sign, fmt_exponent, encoded_mant}),
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
);
// Normalization
@@ -119,8 +124,8 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
wire mant_is_nonzero;
VX_lzc #(
.WIDTH (INT_MAN_WIDTH),
.MODE (1)
.N (INT_MAN_WIDTH),
.MODE (1)
) lzc (
.in_i (encoded_mant_s0[i]),
.cnt_o (renorm_shamt_s0[i]),
@@ -134,20 +139,12 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
`IGNORE_WARNINGS_BEGIN
// Input mantissa needs to be normalized
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
wire [INT_EXP_WIDTH-1:0] int_input_exp;
// Realign input mantissa, append zeroes if destination is wider
// Realign input mantissa, append zeroes if destination is wider
assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
// Unbias exponent and compensate for shift
assign fp_input_exp = fmt_exponent_s0[i] +
{1'b0, in_a_type_s0[i].is_subnormal} +
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
{1'b0, renorm_shamt_s0[i]};
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
wire [INT_EXP_WIDTH-1:0] fp_input_exp = fmt_exponent_s0[i] + (FMT_SHIFT_COMPENSATION - EXP_BIAS) - {1'b0, renorm_shamt_s0[i]};
wire [INT_EXP_WIDTH-1:0] int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
`IGNORE_WARNINGS_END
@@ -160,21 +157,21 @@ module VX_fp_cvt #(
wire is_itof_s1;
wire unsigned_s1;
wire [2:0] rnd_mode_s1;
fp_type_t [LANES-1:0] in_a_type_s1;
fp_class_t [LANES-1:0] fp_clss_s1;
wire [LANES-1:0] input_sign_s1;
wire [LANES-1:0] mant_is_zero_s1;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, fp_clss_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
);
// Perform adjustments to mantissa and exponent
@@ -183,39 +180,35 @@ module VX_fp_cvt #(
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
wire [LANES-1:0] of_before_round_s1;
for (genvar i = 0; i < LANES; ++i) begin
wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
for (genvar i = 0; i < LANES; ++i) begin
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg of_before_round;
// Rebias the exponent
assign destination_exp = input_exp_s1[i] + EXP_BIAS;
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp = destination_exp; // take exponent as is, only look at lower bits
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
final_exp = input_exp_s1[i] + EXP_BIAS; // take exponent as is, only look at lower bits
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
denorm_shamt = 0; // right of mantissa
of_before_round = 1'b0;
// Handle INT casts
if (is_itof_s1) begin
if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin
if ($signed(input_exp_s1[i]) >= $signed(2**EXP_BITS-1-EXP_BIAS)) begin
// Overflow or infinities (for proper rounding)
final_exp = (2**EXP_BITS-2); // largest normal value
preshift_mant = ~0; // largest normal value and RS bits set
of_before_round = 1'b1;
end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin
end else if ($signed(input_exp_s1[i]) < $signed(-MAN_BITS-EXP_BIAS)) begin
// Limit the shift to retain sticky bits
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
end else if ($signed(destination_exp) < $signed(1)) begin
denorm_shamt = (2 + MAN_BITS); // to sticky
end else if ($signed(input_exp_s1[i]) < $signed(1-EXP_BIAS)) begin
// Denormalize underflowing values
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting
denorm_shamt = (1-EXP_BIAS) - input_exp_s1[i]; // adjust right shifting
end
end else begin
if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin
@@ -224,7 +217,7 @@ module VX_fp_cvt #(
of_before_round = 1'b1;
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
// underflow
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
denorm_shamt = MAX_INT_WIDTH+1; // all bits go to the sticky
end else begin
// By default right shift mantissa to be an integer
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i];
@@ -245,7 +238,7 @@ module VX_fp_cvt #(
wire is_itof_s2;
wire unsigned_s2;
wire [2:0] rnd_mode_s2;
fp_type_t [LANES-1:0] in_a_type_s2;
fp_class_t [LANES-1:0] fp_clss_s2;
wire [LANES-1:0] mant_is_zero_s2;
wire [LANES-1:0] input_sign_s2;
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2;
@@ -253,14 +246,14 @@ module VX_fp_cvt #(
wire [LANES-1:0] of_before_round_s2;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_class_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
.RESETW (1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, fp_clss_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
);
wire [LANES-1:0] rounded_sign;
@@ -314,7 +307,7 @@ module VX_fp_cvt #(
wire [TAGW-1:0] tag_in_s3;
wire is_itof_s3;
wire unsigned_s3;
fp_type_t [LANES-1:0] in_a_type_s3;
fp_class_t [LANES-1:0] fp_clss_s3;
wire [LANES-1:0] mant_is_zero_s3;
wire [LANES-1:0] input_sign_s3;
wire [LANES-1:0] rounded_sign_s3;
@@ -322,14 +315,14 @@ module VX_fp_cvt #(
wire [LANES-1:0] of_before_round_s3;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_class_t) + 1 + 1 + 32 + 1 + 1)),
.RESETW (1)
) pipe_reg3 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, fp_clss_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, fp_clss_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
);
wire [LANES-1:0] of_after_round;
@@ -362,14 +355,14 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
// Detect special case from source format, I2F casts don't produce a special result
assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan);
assign fp_result_is_special[i] = ~is_itof_s3 & (fp_clss_s3[i].is_zero | fp_clss_s3[i].is_nan);
// Signalling input NaNs raise invalid flag, otherwise no flags set
assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
assign fp_special_status[i] = fp_clss_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
// Assemble result according to destination format
assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
assign fp_special_result[i] = fp_clss_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
end
// INT Special case handling
@@ -381,7 +374,7 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
// Assemble result according to destination format
always @(*) begin
if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin
if (input_sign_s3[i] && !fp_clss_s3[i].is_nan) begin
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31
end else begin
@@ -391,8 +384,8 @@ module VX_fp_cvt #(
end
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
assign int_result_is_special[i] = in_a_type_s3[i].is_nan
| in_a_type_s3[i].is_inf
assign int_result_is_special[i] = fp_clss_s3[i].is_nan
| fp_clss_s3[i].is_inf
| of_before_round_s3[i]
| (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]);
@@ -411,11 +404,11 @@ module VX_fp_cvt #(
wire [31:0] fp_result, int_result;
wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
: (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
: (| fp_round_sticky_bits[i]) | (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.DZ = 1'b0; // no divisions
assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.OF = ~is_itof_s3 & (~fp_clss_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.UF = uf_after_round[i] & inexact;
assign fp_regular_status.NX = inexact;
@@ -435,7 +428,7 @@ module VX_fp_cvt #(
assign stall = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFLAGS_BITS)),
.RESETW (1)
) pipe_reg4 (
.clk (clk),

View File

@@ -1,8 +1,4 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
`include "VX_fpu_define.vh"
module VX_fp_div #(
parameter TAGW = 1,
@@ -16,7 +12,7 @@ module VX_fp_div #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
@@ -39,7 +35,7 @@ module VX_fp_div #(
fflags_t f;
always @(*) begin
dpi_fdiv (dataa[i], datab[i], frm, r, f);
dpi_fdiv (enable && valid_in, dataa[i], datab[i], frm, r, f);
end
`UNUSED_VAR (f)

View File

@@ -1,8 +1,4 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
`include "VX_fpu_define.vh"
module VX_fp_fma #(
parameter TAGW = 1,
@@ -16,7 +12,7 @@ module VX_fp_fma #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire do_madd,
input wire do_sub,
@@ -68,7 +64,7 @@ module VX_fp_fma #(
fflags_t f;
always @(*) begin
dpi_fmadd (a, b, c, frm, r, f);
dpi_fmadd (enable && valid_in, a, b, c, frm, r, f);
end
`UNUSED_VAR (f)

View File

@@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_fpu_define.vh"
/// Modified port of noncomp module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
@@ -15,8 +15,8 @@ module VX_fp_ncomp #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
@@ -30,6 +30,9 @@ module VX_fp_ncomp #(
input wire ready_out,
output wire valid_out
);
localparam EXP_BITS = 8;
localparam MAN_BITS = 23;
localparam NEG_INF = 32'h00000001,
NEG_NORM = 32'h00000002,
NEG_SUBNORM = 32'h00000004,
@@ -38,86 +41,92 @@ module VX_fp_ncomp #(
POS_SUBNORM = 32'h00000020,
POS_NORM = 32'h00000040,
POS_INF = 32'h00000080,
SIG_NAN = 32'h00000100,
//SIG_NAN = 32'h00000100,
QUT_NAN = 32'h00000200;
wire [LANES-1:0] tmp_a_sign, tmp_b_sign;
wire [LANES-1:0][7:0] tmp_a_exponent, tmp_b_exponent;
wire [LANES-1:0][22:0] tmp_a_mantissa, tmp_b_mantissa;
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
wire [LANES-1:0] a_sign, b_sign;
wire [LANES-1:0][7:0] a_exponent, b_exponent;
wire [LANES-1:0][22:0] a_mantissa, b_mantissa;
fp_class_t [LANES-1:0] a_clss, b_clss;
wire [LANES-1:0] a_smaller, ab_equal;
// Setup
for (genvar i = 0; i < LANES; i++) begin
assign tmp_a_sign[i] = dataa[i][31];
assign tmp_a_exponent[i] = dataa[i][30:23];
assign tmp_a_mantissa[i] = dataa[i][22:0];
assign a_sign[i] = dataa[i][31];
assign a_exponent[i] = dataa[i][30:23];
assign a_mantissa[i] = dataa[i][22:0];
assign tmp_b_sign[i] = datab[i][31];
assign tmp_b_exponent[i] = datab[i][30:23];
assign tmp_b_mantissa[i] = datab[i][22:0];
assign b_sign[i] = datab[i][31];
assign b_exponent[i] = datab[i][30:23];
assign b_mantissa[i] = datab[i][22:0];
VX_fp_type fp_type_a (
.exp_i (tmp_a_exponent[i]),
.man_i (tmp_a_mantissa[i]),
.type_o (tmp_a_type[i])
VX_fp_class #(
.EXP_BITS (EXP_BITS),
.MAN_BITS (MAN_BITS)
) fp_class_a (
.exp_i (a_exponent[i]),
.man_i (a_mantissa[i]),
.clss_o (a_clss[i])
);
VX_fp_type fp_type_b (
.exp_i (tmp_b_exponent[i]),
.man_i (tmp_b_mantissa[i]),
.type_o (tmp_b_type[i])
VX_fp_class #(
.EXP_BITS (EXP_BITS),
.MAN_BITS (MAN_BITS)
) fp_class_b (
.exp_i (b_exponent[i]),
.man_i (b_mantissa[i]),
.clss_o (b_clss[i])
);
assign tmp_a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
assign tmp_ab_equal[i] = (dataa[i] == datab[i]) | (tmp_a_type[i].is_zero & tmp_b_type[i].is_zero);
assign a_smaller[i] = $signed(dataa[i]) < $signed(datab[i]);
assign ab_equal[i] = (dataa[i] == datab[i]) | (a_clss[i].is_zero & b_clss[i].is_zero);
end
// Pipeline stage0
wire valid_in_s0;
wire [TAGW-1:0] tag_in_s0;
wire [`FPU_BITS-1:0] op_type_s0;
wire [`FRM_BITS-1:0] frm_s0;
wire [`INST_FPU_BITS-1:0] op_type_s0;
wire [`INST_FRM_BITS-1:0] frm_s0;
wire [LANES-1:0][31:0] dataa_s0, datab_s0;
wire [LANES-1:0] a_sign_s0, b_sign_s0;
wire [LANES-1:0][7:0] a_exponent_s0;
wire [LANES-1:0][22:0] a_mantissa_s0;
fp_type_t [LANES-1:0] a_type_s0, b_type_s0;
fp_class_t [LANES-1:0] a_clss_s0, b_clss_s0;
wire [LANES-1:0] a_smaller_s0, ab_equal_s0;
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)),
.RESETW (1),
.DEPTH (0)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, tmp_a_sign, tmp_b_sign, tmp_a_exponent, tmp_a_mantissa, tmp_a_type, tmp_b_type, tmp_a_smaller, tmp_ab_equal}),
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_type_s0, b_type_s0, a_smaller_s0, ab_equal_s0})
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_clss, b_clss, a_smaller, ab_equal}),
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_clss_s0, b_clss_s0, a_smaller_s0, ab_equal_s0})
);
// FCLASS
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type_s0[i].is_normal) begin
if (a_clss_s0[i].is_normal) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_NORM : POS_NORM;
end
else if (a_type_s0[i].is_inf) begin
else if (a_clss_s0[i].is_inf) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_INF : POS_INF;
end
else if (a_type_s0[i].is_zero) begin
else if (a_clss_s0[i].is_zero) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_ZERO : POS_ZERO;
end
else if (a_type_s0[i].is_subnormal) begin
else if (a_clss_s0[i].is_subnormal) begin
fclass_mask[i] = a_sign_s0[i] ? NEG_SUBNORM : POS_SUBNORM;
end
else if (a_type_s0[i].is_nan) begin
fclass_mask[i] = {22'h0, a_type_s0[i].is_quiet, a_type_s0[i].is_signaling, 8'h0};
else if (a_clss_s0[i].is_nan) begin
fclass_mask[i] = {22'h0, a_clss_s0[i].is_quiet, a_clss_s0[i].is_signaling, 8'h0};
end
else begin
fclass_mask[i] = QUT_NAN;
@@ -129,11 +138,11 @@ module VX_fp_ncomp #(
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
if (a_clss_s0[i].is_nan && b_clss_s0[i].is_nan)
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
else if (a_type_s0[i].is_nan)
else if (a_clss_s0[i].is_nan)
fminmax_res[i] = datab_s0[i];
else if (b_type_s0[i].is_nan)
else if (b_clss_s0[i].is_nan)
fminmax_res[i] = dataa_s0[i];
else begin
case (frm_s0) // use LSB to distinguish MIN and MAX
@@ -160,33 +169,33 @@ module VX_fp_ncomp #(
// Comparison
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_s0)
`FRM_RNE: begin // LE
`INST_FRM_RNE: begin // LE
fcmp_fflags[i] = 5'h0;
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
fcmp_fflags[i].NV = 1'b1;
end else begin
fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
end
end
`FRM_RTZ: begin // LS
`INST_FRM_RTZ: begin // LS
fcmp_fflags[i] = 5'h0;
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
fcmp_fflags[i].NV = 1'b1;
end else begin
fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
end
end
`FRM_RDN: begin // EQ
`INST_FRM_RDN: begin // EQ
fcmp_fflags[i] = 5'h0;
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
if (a_clss_s0[i].is_nan || b_clss_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
fcmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
fcmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
end else begin
fcmp_res[i] = {31'h0, ab_equal_s0[i]};
end
@@ -207,11 +216,11 @@ module VX_fp_ncomp #(
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (op_type_s0)
`FPU_CLASS: begin
`INST_FPU_CLASS: begin
tmp_result[i] = fclass_mask[i];
tmp_fflags[i] = 'x;
end
`FPU_CMP: begin
`INST_FPU_CMP: begin
tmp_result[i] = fcmp_res[i];
tmp_fflags[i] = fcmp_fflags[i];
end
@@ -225,11 +234,11 @@ module VX_fp_ncomp #(
3,4: begin
tmp_result[i] = fminmax_res[i];
tmp_fflags[i] = 0;
tmp_fflags[i].NV = a_type_s0[i].is_signaling | b_type_s0[i].is_signaling;
tmp_fflags[i].NV = a_clss_s0[i].is_signaling | b_clss_s0[i].is_signaling;
end
//5,6,7: MOVE
default: begin
tmp_result[i] = dataa[i];
tmp_result[i] = dataa_s0[i];
tmp_fflags[i] = 'x;
end
endcase
@@ -238,15 +247,15 @@ module VX_fp_ncomp #(
end
end
wire has_fflags_s0 = ((op_type_s0 == `FPU_MISC)
&& (frm_s0 == 3 // MIN
|| frm_s0 == 4)) // MAX
|| (op_type_s0 == `FPU_CMP); // CMP
wire has_fflags_s0 = ((op_type_s0 == `INST_FPU_MISC)
&& (frm_s0 == 3 // MIN
|| frm_s0 == 4)) // MAX
|| (op_type_s0 == `INST_FPU_CMP); // CMP
assign stall = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFLAGS_BITS)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),

View File

@@ -1,5 +1,4 @@
`include "VX_define.vh"
`include "VX_fpu_define.vh"
/// Modified port of rouding module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
@@ -34,7 +33,7 @@ module VX_fp_rounding #(
always @(*) begin
case (rnd_mode_i)
`FRM_RNE: // Decide accoring to round/sticky bits
`INST_FRM_RNE: // Decide accoring to round/sticky bits
case (round_sticky_bits_i)
2'b00,
2'b01: round_up = 1'b0; // < ulp/2 away, round down
@@ -42,10 +41,10 @@ module VX_fp_rounding #(
2'b11: round_up = 1'b1; // > ulp/2 away, round up
default: round_up = 1'bx;
endcase
`FRM_RTZ: round_up = 1'b0; // always round down
`FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
`FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
`FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
`INST_FRM_RTZ: round_up = 1'b0; // always round down
`INST_FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
`INST_FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
`INST_FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
default: round_up = 1'bx; // propagate x
endcase
end
@@ -58,7 +57,7 @@ module VX_fp_rounding #(
// In case of effective subtraction (thus signs of addition operands must have differed) and a
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `INST_FRM_RDN)
: sign_i;
endmodule

View File

@@ -1,8 +1,4 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
`include "VX_fpu_define.vh"
module VX_fp_sqrt #(
parameter TAGW = 1,
@@ -16,7 +12,7 @@ module VX_fp_sqrt #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
output wire [LANES-1:0][31:0] result,
@@ -38,7 +34,7 @@ module VX_fp_sqrt #(
fflags_t f;
always @(*) begin
dpi_fsqrt (dataa[i], frm, r, f);
dpi_fsqrt (enable && valid_in, dataa[i], frm, r, f);
end
`UNUSED_VAR (f)

View File

@@ -1,27 +0,0 @@
`include "VX_define.vh"
module VX_fp_type (
// inputs
input [7:0] exp_i,
input [22:0] man_i,
// outputs
output fp_type_t type_o
);
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
wire is_signaling = is_nan && (man_i[22] == 1'b0);
wire is_quiet = is_nan && !is_signaling;
assign type_o.is_normal = is_normal;
assign type_o.is_zero = is_zero;
assign type_o.is_subnormal = is_subnormal;
assign type_o.is_inf = is_inf;
assign type_o.is_nan = is_nan;
assign type_o.is_quiet = is_quiet;
assign type_o.is_signaling = is_signaling;
endmodule

View File

@@ -0,0 +1,14 @@
`ifndef VX_FPU_DEFINE
`define VX_FPU_DEFINE
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
`IGNORE_WARNINGS_BEGIN
import fpu_types::*;
`IGNORE_WARNINGS_END
`endif

View File

@@ -1,7 +1,4 @@
`ifndef SYNTHESIS
`include "VX_define.vh"
`include "float_dpi.vh"
`include "VX_fpu_define.vh"
module VX_fpu_dpi #(
parameter TAGW = 1
@@ -14,8 +11,8 @@ module VX_fpu_dpi #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -76,21 +73,21 @@ module VX_fpu_dpi #(
is_fsgnjx = 0;
case (op_type)
`FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
`FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
`FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
`FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
`FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
`FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
`FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
`FPU_DIV: begin core_select = FPU_DIV; end
`FPU_SQRT: begin core_select = FPU_SQRT; end
`FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
`FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
`FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
`FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
`FPU_CMP: begin core_select = FPU_NCP;
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
`INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
`INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
`INST_FPU_DIV: begin core_select = FPU_DIV; end
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
`INST_FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
`INST_FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
`INST_FPU_CMP: begin core_select = FPU_NCP;
is_fle = (frm == 0);
is_flt = (frm == 1);
is_feq = (frm == 2);
@@ -126,15 +123,20 @@ module VX_fpu_dpi #(
fflags_t [`NUM_THREADS-1:0] fflags_fnmadd;
fflags_t [`NUM_THREADS-1:0] fflags_fnmsub;
wire fma_valid = (valid_in && core_select == FPU_FMA);
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
wire fma_fire = fma_valid && fma_ready;
always @(*) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
dpi_fadd (dataa[i], datab[i], frm, result_fadd[i], fflags_fadd[i]);
dpi_fsub (dataa[i], datab[i], frm, result_fsub[i], fflags_fsub[i]);
dpi_fmul (dataa[i], datab[i], frm, result_fmul[i], fflags_fmul[i]);
dpi_fmadd (dataa[i], datab[i], datac[i], frm, result_fmadd[i], fflags_fmadd[i]);
dpi_fmsub (dataa[i], datab[i], datac[i], frm, result_fmsub[i], fflags_fmsub[i]);
dpi_fnmadd (dataa[i], datab[i], datac[i], frm, result_fnmadd[i], fflags_fnmadd[i]);
dpi_fnmsub (dataa[i], datab[i], datac[i], frm, result_fnmsub[i], fflags_fnmsub[i]);
dpi_fadd (fma_fire, dataa[i], datab[i], frm, result_fadd[i], fflags_fadd[i]);
dpi_fsub (fma_fire, dataa[i], datab[i], frm, result_fsub[i], fflags_fsub[i]);
dpi_fmul (fma_fire, dataa[i], datab[i], frm, result_fmul[i], fflags_fmul[i]);
dpi_fmadd (fma_fire, dataa[i], datab[i], datac[i], frm, result_fmadd[i], fflags_fmadd[i]);
dpi_fmsub (fma_fire, dataa[i], datab[i], datac[i], frm, result_fmsub[i], fflags_fmsub[i]);
dpi_fnmadd (fma_fire, dataa[i], datab[i], datac[i], frm, result_fnmadd[i], fflags_fnmadd[i]);
dpi_fnmsub (fma_fire, dataa[i], datab[i], datac[i], frm, result_fnmsub[i], fflags_fnmsub[i]);
end
end
@@ -154,10 +156,7 @@ module VX_fpu_dpi #(
is_fmsub ? fflags_fmsub :
is_fnmadd ? fflags_fnmadd :
is_fnmsub ? fflags_fnmsub :
0;
wire enable = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
wire valid = (valid_in && core_select == FPU_FMA);
0;
VX_shift_register #(
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
@@ -166,13 +165,13 @@ module VX_fpu_dpi #(
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid, tag_in, result_fma, fflags_fma}),
.enable (fma_ready),
.data_in ({fma_valid, tag_in, result_fma, fflags_fma}),
.data_out ({per_core_valid_out[FPU_FMA], per_core_tag_out[FPU_FMA], per_core_result[FPU_FMA], per_core_fflags[FPU_FMA]})
);
assign per_core_has_fflags[FPU_FMA] = 1;
assign per_core_ready_in[FPU_FMA] = enable;
assign per_core_ready_in[FPU_FMA] = fma_ready;
end
endgenerate
@@ -182,16 +181,18 @@ module VX_fpu_dpi #(
wire [`NUM_THREADS-1:0][31:0] result_fdiv;
fflags_t [`NUM_THREADS-1:0] fflags_fdiv;
wire fdiv_valid = (valid_in && core_select == FPU_DIV);
wire fdiv_ready = per_core_ready_out[FPU_DIV] || ~per_core_valid_out[FPU_DIV];
wire fdiv_fire = fdiv_valid && fdiv_ready;
always @(*) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
dpi_fdiv (dataa[i], datab[i], frm, result_fdiv[i], fflags_fdiv[i]);
dpi_fdiv (fdiv_fire, dataa[i], datab[i], frm, result_fdiv[i], fflags_fdiv[i]);
end
end
wire enable = per_core_ready_out[FPU_DIV] || ~per_core_valid_out[FPU_DIV];
wire valid = (valid_in && core_select == FPU_DIV);
VX_shift_register #(
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
.DEPTH (`LATENCY_FDIV),
@@ -199,13 +200,13 @@ module VX_fpu_dpi #(
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid, tag_in, result_fdiv, fflags_fdiv}),
.enable (fdiv_ready),
.data_in ({fdiv_valid, tag_in, result_fdiv, fflags_fdiv}),
.data_out ({per_core_valid_out[FPU_DIV], per_core_tag_out[FPU_DIV], per_core_result[FPU_DIV], per_core_fflags[FPU_DIV]})
);
assign per_core_has_fflags[FPU_DIV] = 1;
assign per_core_ready_in[FPU_DIV] = enable;
assign per_core_ready_in[FPU_DIV] = fdiv_ready;
end
endgenerate
@@ -215,16 +216,18 @@ module VX_fpu_dpi #(
wire [`NUM_THREADS-1:0][31:0] result_fsqrt;
fflags_t [`NUM_THREADS-1:0] fflags_fsqrt;
wire fsqrt_valid = (valid_in && core_select == FPU_SQRT);
wire fsqrt_ready = per_core_ready_out[FPU_SQRT] || ~per_core_valid_out[FPU_SQRT];
wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
always @(*) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
dpi_fsqrt (dataa[i], frm, result_fsqrt[i], fflags_fsqrt[i]);
dpi_fsqrt (fsqrt_fire, dataa[i], frm, result_fsqrt[i], fflags_fsqrt[i]);
end
end
wire enable = per_core_ready_out[FPU_SQRT] || ~per_core_valid_out[FPU_SQRT];
wire valid = (valid_in && core_select == FPU_SQRT);
VX_shift_register #(
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
.DEPTH (`LATENCY_FSQRT),
@@ -232,13 +235,13 @@ module VX_fpu_dpi #(
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid, tag_in, result_fsqrt, fflags_fsqrt}),
.enable (fsqrt_ready),
.data_in ({fsqrt_valid, tag_in, result_fsqrt, fflags_fsqrt}),
.data_out ({per_core_valid_out[FPU_SQRT], per_core_tag_out[FPU_SQRT], per_core_result[FPU_SQRT], per_core_fflags[FPU_SQRT]})
);
assign per_core_has_fflags[FPU_SQRT] = 1;
assign per_core_ready_in[FPU_SQRT] = enable;
assign per_core_ready_in[FPU_SQRT] = fsqrt_ready;
end
endgenerate
@@ -257,13 +260,18 @@ module VX_fpu_dpi #(
fflags_t [`NUM_THREADS-1:0] fflags_utof;
fflags_t [`NUM_THREADS-1:0] fflags_ftoi;
fflags_t [`NUM_THREADS-1:0] fflags_ftou;
wire fcvt_valid = (valid_in && core_select == FPU_CVT);
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
wire fcvt_fire = fcvt_valid && fcvt_ready;
always @(*) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
dpi_itof (dataa[i], frm, result_itof[i], fflags_itof[i]);
dpi_utof (dataa[i], frm, result_utof[i], fflags_utof[i]);
dpi_ftoi (dataa[i], frm, result_ftoi[i], fflags_ftoi[i]);
dpi_ftou (dataa[i], frm, result_ftou[i], fflags_ftou[i]);
dpi_itof (fcvt_fire, dataa[i], frm, result_itof[i], fflags_itof[i]);
dpi_utof (fcvt_fire, dataa[i], frm, result_utof[i], fflags_utof[i]);
dpi_ftoi (fcvt_fire, dataa[i], frm, result_ftoi[i], fflags_ftoi[i]);
dpi_ftou (fcvt_fire, dataa[i], frm, result_ftou[i], fflags_ftou[i]);
end
end
@@ -279,9 +287,6 @@ module VX_fpu_dpi #(
is_ftou ? fflags_ftou :
0;
wire enable = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
wire valid = (valid_in && core_select == FPU_CVT);
VX_shift_register #(
.DATAW (1 + TAGW + `NUM_THREADS * (32 + $bits(fflags_t))),
.DEPTH (`LATENCY_FCVT),
@@ -289,13 +294,13 @@ module VX_fpu_dpi #(
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid, tag_in, result_fcvt, fflags_fcvt}),
.enable (fcvt_ready),
.data_in ({fcvt_valid, tag_in, result_fcvt, fflags_fcvt}),
.data_out ({per_core_valid_out[FPU_CVT], per_core_tag_out[FPU_CVT], per_core_result[FPU_CVT], per_core_fflags[FPU_CVT]})
);
assign per_core_has_fflags[FPU_CVT] = 1;
assign per_core_ready_in[FPU_CVT] = enable;
assign per_core_ready_in[FPU_CVT] = fcvt_ready;
end
endgenerate
@@ -321,18 +326,23 @@ module VX_fpu_dpi #(
fflags_t [`NUM_THREADS-1:0] fflags_feq;
fflags_t [`NUM_THREADS-1:0] fflags_fmin;
fflags_t [`NUM_THREADS-1:0] fflags_fmax;
wire fncp_valid = (valid_in && core_select == FPU_NCP);
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
wire fncp_fire = fncp_valid && fncp_ready;
always @(*) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
dpi_fclss (dataa[i], result_fclss[i]);
dpi_flt (dataa[i], datab[i], result_flt[i], fflags_flt[i]);
dpi_fle (dataa[i], datab[i], result_fle[i], fflags_fle[i]);
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
dpi_fclss (fncp_fire, dataa[i], result_fclss[i]);
dpi_flt (fncp_fire, dataa[i], datab[i], result_flt[i], fflags_flt[i]);
dpi_fle (fncp_fire, dataa[i], datab[i], result_fle[i], fflags_fle[i]);
dpi_feq (fncp_fire, dataa[i], datab[i], result_feq[i], fflags_feq[i]);
dpi_fmin (fncp_fire, dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (fncp_fire, dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (fncp_fire, dataa[i], datab[i], result_fsgnj[i]);
dpi_fsgnjn (fncp_fire, dataa[i], datab[i], result_fsgnjn[i]);
dpi_fsgnjx (fncp_fire, dataa[i], datab[i], result_fsgnjx[i]);
result_fmv[i] = dataa[i];
end
end
@@ -357,9 +367,6 @@ module VX_fpu_dpi #(
is_fmax ? fflags_fmax :
0;
wire enable = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
wire valid = (valid_in && core_select == FPU_NCP);
VX_shift_register #(
.DATAW (1 + TAGW + 1 + `NUM_THREADS * (32 + $bits(fflags_t))),
.DEPTH (`LATENCY_FNCP),
@@ -367,12 +374,12 @@ module VX_fpu_dpi #(
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid, tag_in, has_fflags_fncp, result_fncp, fflags_fncp}),
.enable (fncp_ready),
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_fncp}),
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
);
assign per_core_ready_in[FPU_NCP] = enable;
assign per_core_ready_in[FPU_NCP] = fncp_ready;
end
endgenerate
@@ -410,6 +417,4 @@ module VX_fpu_dpi #(
assign ready_in = per_core_ready_in[core_select];
endmodule
`endif
endmodule

View File

@@ -1,7 +1,7 @@
`include "VX_define.vh"
`include "VX_fpu_define.vh"
module VX_fpu_fpga #(
parameter TAGW = 1
parameter TAGW = 4
) (
input wire clk,
input wire reset,
@@ -11,8 +11,8 @@ module VX_fpu_fpga #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -54,19 +54,19 @@ module VX_fpu_fpga #(
is_itof = 0;
is_signed = 0;
case (op_type)
`FPU_ADD: begin core_select = FPU_FMA; end
`FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
`FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
`FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
`FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
`FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
`FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
`FPU_DIV: begin core_select = FPU_DIV; end
`FPU_SQRT: begin core_select = FPU_SQRT; end
`FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
`FPU_CVTWUS: begin core_select = FPU_CVT; end
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
`FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_ADD: begin core_select = FPU_FMA; end
`INST_FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
`INST_FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
`INST_FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
`INST_FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
`INST_FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
`INST_FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
`INST_FPU_DIV: begin core_select = FPU_DIV; end
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
`INST_FPU_CVTWUS: begin core_select = FPU_CVT; end
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
`INST_FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
default: begin core_select = FPU_NCP; end
endcase
end

View File

@@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_fpu_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
@@ -18,8 +18,8 @@ module VX_fpu_fpnew #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -80,7 +80,7 @@ module VX_fpu_fpnew #(
fpnew_pkg::status_t [`NUM_THREADS-1:0] fpu_status;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg [`INST_FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fpu_has_fflags, fpu_has_fflags_out;
@@ -94,38 +94,38 @@ module VX_fpu_fpnew #(
fpu_operands[2] = datac;
case (op_type)
`FPU_ADD: begin
`INST_FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
end
`FPU_SUB: begin
`INST_FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
fpu_op_mod = 1;
end
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`FPU_MISC: begin
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`INST_FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`INST_FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`INST_FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`INST_FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`INST_FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`INST_FPU_MISC: begin
case (frm)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_has_fflags = 0; end
endcase
end
default:;

View File

@@ -0,0 +1,32 @@
`ifndef VX_FPU_TYPES
`define VX_FPU_TYPES
`include "VX_define.vh"
package fpu_types;
typedef struct packed {
logic is_normal;
logic is_zero;
logic is_subnormal;
logic is_inf;
logic is_nan;
logic is_quiet;
logic is_signaling;
} fp_class_t;
`define FP_CLASS_BITS $bits(fpu_types::fp_class_t)
typedef struct packed {
logic NV; // 4-Invalid
logic DZ; // 3-Divide by zero
logic OF; // 2-Overflow
logic UF; // 1-Underflow
logic NX; // 0-Inexact
} fflags_t;
`define FFLAGS_BITS $bits(fpu_types::fflags_t)
endpackage
`endif