+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
287 lines
9.8 KiB
Systemverilog
287 lines
9.8 KiB
Systemverilog
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
`include "VX_fpu_define.vh"
|
|
|
|
`ifdef FPU_FPNEW
|
|
|
|
module VX_fpu_fpnew
|
|
import VX_fpu_pkg::*;
|
|
import fpnew_pkg::*;
|
|
import cf_math_pkg::*;
|
|
import defs_div_sqrt_mvp::*;
|
|
#(
|
|
parameter NUM_LANES = 1,
|
|
parameter TAGW = 1,
|
|
parameter OUT_REG = 0
|
|
) (
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
input wire valid_in,
|
|
output wire ready_in,
|
|
|
|
input wire [NUM_LANES-1:0] lane_mask,
|
|
|
|
input wire [TAGW-1:0] tag_in,
|
|
|
|
input wire [`INST_FPU_BITS-1:0] op_type,
|
|
input wire [`INST_FMT_BITS-1:0] fmt,
|
|
input wire [`INST_FRM_BITS-1:0] frm,
|
|
|
|
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
|
|
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
|
|
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
|
|
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
|
|
|
output wire has_fflags,
|
|
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
|
|
|
output wire [TAGW-1:0] tag_out,
|
|
|
|
input wire ready_out,
|
|
output wire valid_out
|
|
);
|
|
localparam LATENCY_FDIVSQRT = `MAX(`LATENCY_FDIV, `LATENCY_FSQRT);
|
|
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAGW;
|
|
|
|
`ifdef XLEN_64
|
|
// use scalar configuration for mixed formats
|
|
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
|
Width: unsigned'(`XLEN),
|
|
EnableVectors: 1'b0,
|
|
EnableNanBox: 1'b1,
|
|
`ifdef FLEN_64
|
|
FpFmtMask: 5'b11000,
|
|
`else
|
|
FpFmtMask: 5'b11000, // TODO: added FP64 to fix CVT bug in FpNew
|
|
`endif
|
|
IntFmtMask: 4'b0011
|
|
};
|
|
`else
|
|
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
|
Width: unsigned'(`XLEN * NUM_LANES),
|
|
EnableVectors: 1'b1,
|
|
EnableNanBox: 1'b0,
|
|
FpFmtMask: 5'b10000,
|
|
IntFmtMask: 4'b0010
|
|
};
|
|
`endif
|
|
|
|
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
|
|
PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
|
|
'{default: unsigned'(LATENCY_FDIVSQRT)}, // DIVSQRT
|
|
'{default: `LATENCY_FNCP}, // NONCOMP
|
|
'{default: `LATENCY_FCVT}}, // CONV
|
|
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
|
|
'{default: fpnew_pkg::MERGED}, // DIVSQRT
|
|
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
|
|
'{default: fpnew_pkg::MERGED}}, // CONV
|
|
PipeConfig: fpnew_pkg::DISTRIBUTED
|
|
};
|
|
|
|
wire fpu_ready_in, fpu_valid_in;
|
|
wire fpu_ready_out, fpu_valid_out;
|
|
|
|
reg [TAGW-1:0] fpu_tag_in, fpu_tag_out;
|
|
|
|
reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
|
|
fpnew_pkg::status_t fpu_status;
|
|
|
|
fpnew_pkg::operation_e fpu_op;
|
|
reg [`INST_FRM_BITS-1:0] fpu_rnd;
|
|
reg fpu_op_mod;
|
|
reg fpu_has_fflags, fpu_has_fflags_out;
|
|
fpnew_pkg::fp_format_e fpu_src_fmt, fpu_dst_fmt;
|
|
fpnew_pkg::int_format_e fpu_int_fmt;
|
|
|
|
`UNUSED_VAR (fmt)
|
|
|
|
always @(*) begin
|
|
fpu_op = 'x;
|
|
fpu_rnd = frm;
|
|
fpu_op_mod = 0;
|
|
fpu_has_fflags = 1;
|
|
fpu_operands[0] = dataa;
|
|
fpu_operands[1] = datab;
|
|
fpu_operands[2] = datac;
|
|
fpu_dst_fmt = fpnew_pkg::FP32;
|
|
fpu_int_fmt = fpnew_pkg::INT32;
|
|
|
|
`ifdef FLEN_64
|
|
if (fmt[0]) begin
|
|
fpu_dst_fmt = fpnew_pkg::FP64;
|
|
end
|
|
`endif
|
|
|
|
`ifdef XLEN_64
|
|
if (fmt[1]) begin
|
|
fpu_int_fmt = fpnew_pkg::INT64;
|
|
end
|
|
`endif
|
|
|
|
fpu_src_fmt = fpu_dst_fmt;
|
|
|
|
case (op_type)
|
|
`INST_FPU_ADD: begin
|
|
fpu_op = fpnew_pkg::ADD;
|
|
fpu_operands[1] = dataa;
|
|
fpu_operands[2] = datab;
|
|
end
|
|
`INST_FPU_SUB: begin
|
|
fpu_op = fpnew_pkg::ADD;
|
|
fpu_operands[1] = dataa;
|
|
fpu_operands[2] = datab;
|
|
fpu_op_mod = 1;
|
|
end
|
|
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
|
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
|
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
|
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
|
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
|
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
|
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
|
`ifdef FLEN_64
|
|
`INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end
|
|
`endif
|
|
`INST_FPU_F2I,
|
|
`INST_FPU_F2U: begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = op_type[0]; end
|
|
`INST_FPU_I2F,
|
|
`INST_FPU_U2F: begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = op_type[0]; end
|
|
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
|
`INST_FPU_MISC:begin
|
|
case (frm)
|
|
0,1,2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = {1'b0, frm[1:0]}; fpu_has_fflags = 0; end // FSGNJ
|
|
3: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
|
|
4,5: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = 3'b011; fpu_op_mod = ~frm[0]; fpu_has_fflags = 0; end // FMV.X.W, FMV.W.X
|
|
6,7: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = {2'b00, frm[0]}; end // MIN, MAX
|
|
endcase
|
|
end
|
|
default:;
|
|
endcase
|
|
|
|
`ifdef FPU_RV64F
|
|
// apply nan-boxing to floating-point operands
|
|
for (integer i = 0; i < NUM_LANES; ++i) begin
|
|
if (op_type != `INST_FPU_I2F && op_type != `INST_FPU_U2F) begin
|
|
fpu_operands[0][i] |= 64'hffffffff00000000;
|
|
end
|
|
fpu_operands[1][i] |= 64'hffffffff00000000;
|
|
fpu_operands[2][i] |= 64'hffffffff00000000;
|
|
end
|
|
`endif
|
|
end
|
|
|
|
`ifdef XLEN_64
|
|
`UNUSED_VAR (lane_mask)
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
wire [(TAGW+1)-1:0] fpu_tag;
|
|
wire fpu_valid_out_uq;
|
|
wire fpu_ready_in_uq;
|
|
fpnew_pkg::status_t fpu_status_uq;
|
|
`UNUSED_VAR (fpu_tag)
|
|
`UNUSED_VAR (fpu_valid_out_uq)
|
|
`UNUSED_VAR (fpu_ready_in_uq)
|
|
`UNUSED_VAR (fpu_status_uq)
|
|
|
|
fpnew_top #(
|
|
.Features (FPU_FEATURES),
|
|
.Implementation (FPU_IMPLEMENTATION),
|
|
.TagType (logic[(TAGW+1)-1:0])
|
|
) fpnew_core (
|
|
.clk_i (clk),
|
|
.rst_ni (~reset),
|
|
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
|
|
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
|
.op_i (fpu_op),
|
|
.op_mod_i (fpu_op_mod),
|
|
.src_fmt_i (fpu_src_fmt),
|
|
.dst_fmt_i (fpu_dst_fmt),
|
|
.int_fmt_i (fpu_int_fmt),
|
|
`UNUSED_PIN (vectorial_op_i),
|
|
`UNUSED_PIN (simd_mask_i),
|
|
.tag_i ({fpu_tag_in, fpu_has_fflags}),
|
|
.in_valid_i (fpu_valid_in),
|
|
.in_ready_o (fpu_ready_in_uq),
|
|
.flush_i (reset),
|
|
.result_o (fpu_result[i]),
|
|
.status_o (fpu_status_uq),
|
|
.tag_o (fpu_tag),
|
|
.out_valid_o (fpu_valid_out_uq),
|
|
.out_ready_i (fpu_ready_out),
|
|
`UNUSED_PIN (busy_o)
|
|
);
|
|
|
|
if (i == 0) begin
|
|
assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
|
|
assign fpu_valid_out = fpu_valid_out_uq;
|
|
assign fpu_ready_in = fpu_ready_in_uq;
|
|
assign fpu_status = fpu_status_uq;
|
|
end
|
|
end
|
|
`else
|
|
fpnew_top #(
|
|
.Features (FPU_FEATURES),
|
|
.Implementation (FPU_IMPLEMENTATION),
|
|
.TagType (logic[(TAGW+1)-1:0]),
|
|
.TrueSIMDClass (1),
|
|
.EnableSIMDMask (1)
|
|
) fpnew_core (
|
|
.clk_i (clk),
|
|
.rst_ni (~reset),
|
|
.operands_i (fpu_operands),
|
|
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
|
|
.op_i (fpu_op),
|
|
.op_mod_i (fpu_op_mod),
|
|
.src_fmt_i (fpu_src_fmt),
|
|
.dst_fmt_i (fpu_dst_fmt),
|
|
.int_fmt_i (fpu_int_fmt),
|
|
.vectorial_op_i (1'b1),
|
|
.simd_mask_i (lane_mask),
|
|
.tag_i ({fpu_tag_in, fpu_has_fflags}),
|
|
.in_valid_i (fpu_valid_in),
|
|
.in_ready_o (fpu_ready_in),
|
|
.flush_i (reset),
|
|
.result_o (fpu_result),
|
|
.status_o (fpu_status),
|
|
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
|
|
.out_valid_o (fpu_valid_out),
|
|
.out_ready_i (fpu_ready_out),
|
|
`UNUSED_PIN (busy_o)
|
|
);
|
|
`endif
|
|
|
|
assign fpu_valid_in = valid_in;
|
|
assign ready_in = fpu_ready_in;
|
|
assign fpu_tag_in = tag_in;
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (RSP_DATAW),
|
|
.SIZE (`OUT_REG_TO_EB_SIZE(OUT_REG)),
|
|
.OUT_REG (`OUT_REG_TO_EB_REG(OUT_REG))
|
|
) rsp_buf (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (fpu_valid_out),
|
|
.ready_in (fpu_ready_out),
|
|
.data_in ({fpu_result, fpu_has_fflags_out, fpu_status, fpu_tag_out}),
|
|
.data_out ({result, has_fflags, fflags, tag_out}),
|
|
.valid_out (valid_out),
|
|
.ready_out (ready_out)
|
|
);
|
|
|
|
endmodule
|
|
`endif
|