Files
kernels/hw/rtl/fpu/VX_fpu_fpnew.sv
Blaise Tine c1e168fdbe Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
2023-11-10 02:47:05 -08:00

287 lines
9.8 KiB
Systemverilog

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_FPNEW
module VX_fpu_fpnew
import VX_fpu_pkg::*;
import fpnew_pkg::*;
import cf_math_pkg::*;
import defs_div_sqrt_mvp::*;
#(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter OUT_REG = 0
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [NUM_LANES-1:0] lane_mask,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
localparam LATENCY_FDIVSQRT = `MAX(`LATENCY_FDIV, `LATENCY_FSQRT);
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAGW;
`ifdef XLEN_64
// use scalar configuration for mixed formats
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(`XLEN),
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
`ifdef FLEN_64
FpFmtMask: 5'b11000,
`else
FpFmtMask: 5'b11000, // TODO: added FP64 to fix CVT bug in FpNew
`endif
IntFmtMask: 4'b0011
};
`else
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(`XLEN * NUM_LANES),
EnableVectors: 1'b1,
EnableNanBox: 1'b0,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
`endif
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
'{default: unsigned'(LATENCY_FDIVSQRT)}, // DIVSQRT
'{default: `LATENCY_FNCP}, // NONCOMP
'{default: `LATENCY_FCVT}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_ready_in, fpu_valid_in;
wire fpu_ready_out, fpu_valid_out;
reg [TAGW-1:0] fpu_tag_in, fpu_tag_out;
reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
fpnew_pkg::status_t fpu_status;
fpnew_pkg::operation_e fpu_op;
reg [`INST_FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fpu_has_fflags, fpu_has_fflags_out;
fpnew_pkg::fp_format_e fpu_src_fmt, fpu_dst_fmt;
fpnew_pkg::int_format_e fpu_int_fmt;
`UNUSED_VAR (fmt)
always @(*) begin
fpu_op = 'x;
fpu_rnd = frm;
fpu_op_mod = 0;
fpu_has_fflags = 1;
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
fpu_dst_fmt = fpnew_pkg::FP32;
fpu_int_fmt = fpnew_pkg::INT32;
`ifdef FLEN_64
if (fmt[0]) begin
fpu_dst_fmt = fpnew_pkg::FP64;
end
`endif
`ifdef XLEN_64
if (fmt[1]) begin
fpu_int_fmt = fpnew_pkg::INT64;
end
`endif
fpu_src_fmt = fpu_dst_fmt;
case (op_type)
`INST_FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
end
`INST_FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
fpu_op_mod = 1;
end
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`ifdef FLEN_64
`INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end
`endif
`INST_FPU_F2I,
`INST_FPU_F2U: begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = op_type[0]; end
`INST_FPU_I2F,
`INST_FPU_U2F: begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = op_type[0]; end
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`INST_FPU_MISC:begin
case (frm)
0,1,2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = {1'b0, frm[1:0]}; fpu_has_fflags = 0; end // FSGNJ
3: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
4,5: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = 3'b011; fpu_op_mod = ~frm[0]; fpu_has_fflags = 0; end // FMV.X.W, FMV.W.X
6,7: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = {2'b00, frm[0]}; end // MIN, MAX
endcase
end
default:;
endcase
`ifdef FPU_RV64F
// apply nan-boxing to floating-point operands
for (integer i = 0; i < NUM_LANES; ++i) begin
if (op_type != `INST_FPU_I2F && op_type != `INST_FPU_U2F) begin
fpu_operands[0][i] |= 64'hffffffff00000000;
end
fpu_operands[1][i] |= 64'hffffffff00000000;
fpu_operands[2][i] |= 64'hffffffff00000000;
end
`endif
end
`ifdef XLEN_64
`UNUSED_VAR (lane_mask)
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [(TAGW+1)-1:0] fpu_tag;
wire fpu_valid_out_uq;
wire fpu_ready_in_uq;
fpnew_pkg::status_t fpu_status_uq;
`UNUSED_VAR (fpu_tag)
`UNUSED_VAR (fpu_valid_out_uq)
`UNUSED_VAR (fpu_ready_in_uq)
`UNUSED_VAR (fpu_status_uq)
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[(TAGW+1)-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (~reset),
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpu_op),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
`UNUSED_PIN (vectorial_op_i),
`UNUSED_PIN (simd_mask_i),
.tag_i ({fpu_tag_in, fpu_has_fflags}),
.in_valid_i (fpu_valid_in),
.in_ready_o (fpu_ready_in_uq),
.flush_i (reset),
.result_o (fpu_result[i]),
.status_o (fpu_status_uq),
.tag_o (fpu_tag),
.out_valid_o (fpu_valid_out_uq),
.out_ready_i (fpu_ready_out),
`UNUSED_PIN (busy_o)
);
if (i == 0) begin
assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
assign fpu_valid_out = fpu_valid_out_uq;
assign fpu_ready_in = fpu_ready_in_uq;
assign fpu_status = fpu_status_uq;
end
end
`else
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[(TAGW+1)-1:0]),
.TrueSIMDClass (1),
.EnableSIMDMask (1)
) fpnew_core (
.clk_i (clk),
.rst_ni (~reset),
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpu_op),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.vectorial_op_i (1'b1),
.simd_mask_i (lane_mask),
.tag_i ({fpu_tag_in, fpu_has_fflags}),
.in_valid_i (fpu_valid_in),
.in_ready_o (fpu_ready_in),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
.out_valid_o (fpu_valid_out),
.out_ready_i (fpu_ready_out),
`UNUSED_PIN (busy_o)
);
`endif
assign fpu_valid_in = valid_in;
assign ready_in = fpu_ready_in;
assign fpu_tag_in = tag_in;
VX_elastic_buffer #(
.DATAW (RSP_DATAW),
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
) rsp_buf (
.clk (clk),
.reset (reset),
.valid_in (fpu_valid_out),
.ready_in (fpu_ready_out),
.data_in ({fpu_result, fpu_has_fflags_out, fpu_status, fpu_tag_out}),
.data_out ({result, has_fflags, fflags, tag_out}),
.valid_out (valid_out),
.ready_out (ready_out)
);
endmodule
`endif