minor update
This commit is contained in:
@@ -1,5 +1,8 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
/// Modified port of cast module from fpnew Libray
|
||||||
|
/// reference: https://github.com/pulp-platform/fpnew
|
||||||
|
|
||||||
`ifndef SYNTHESIS
|
`ifndef SYNTHESIS
|
||||||
`include "float_dpi.vh"
|
`include "float_dpi.vh"
|
||||||
`endif
|
`endif
|
||||||
@@ -91,14 +94,14 @@ module VX_fp_cvt #(
|
|||||||
wire [LANES-1:0] mant_is_zero; // for integer zeroes
|
wire [LANES-1:0] mant_is_zero; // for integer zeroes
|
||||||
|
|
||||||
for (genvar i = 0; i < LANES; ++i) begin
|
for (genvar i = 0; i < LANES; ++i) begin
|
||||||
// Leading zero counter for cancellations
|
|
||||||
wire mant_is_nonzero;
|
wire mant_is_nonzero;
|
||||||
VX_lzc #(
|
VX_lzc #(
|
||||||
.DATAW (INT_MAN_WIDTH)
|
.WIDTH (INT_MAN_WIDTH),
|
||||||
|
.MODE (1)
|
||||||
) lzc (
|
) lzc (
|
||||||
.data_in (encoded_mant[i]),
|
.in_i (encoded_mant[i]),
|
||||||
.data_out (renorm_shamt[i]),
|
.cnt_o (renorm_shamt[i]),
|
||||||
.valid_out (mant_is_nonzero)
|
.valid_o (mant_is_nonzero)
|
||||||
);
|
);
|
||||||
assign mant_is_zero[i] = ~mant_is_nonzero;
|
assign mant_is_zero[i] = ~mant_is_nonzero;
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -38,6 +38,27 @@ module VX_fp_div #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
|
`ifdef VERILATOR
|
||||||
|
reg [31:0] r;
|
||||||
|
fflags_t f;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
dpi_fdiv (dataa[i], datab[i], frm, r, f);
|
||||||
|
end
|
||||||
|
`UNUSED_VAR (f)
|
||||||
|
|
||||||
|
VX_shift_register #(
|
||||||
|
.DATAW (32),
|
||||||
|
.DEPTH (`LATENCY_FDIV),
|
||||||
|
.RESETW (1)
|
||||||
|
) shift_req_dpi (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (_reset),
|
||||||
|
.enable (enable),
|
||||||
|
.data_in (r),
|
||||||
|
.data_out (result[i])
|
||||||
|
);
|
||||||
|
`else
|
||||||
acl_fdiv fdiv (
|
acl_fdiv fdiv (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (_reset),
|
.areset (_reset),
|
||||||
@@ -46,6 +67,7 @@ module VX_fp_div #(
|
|||||||
.b (datab[i]),
|
.b (datab[i]),
|
||||||
.q (result[i])
|
.q (result[i])
|
||||||
);
|
);
|
||||||
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_shift_register #(
|
VX_shift_register #(
|
||||||
|
|||||||
@@ -59,6 +59,27 @@ module VX_fp_fma #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
`ifdef VERILATOR
|
||||||
|
reg [31:0] r;
|
||||||
|
fflags_t f;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
dpi_fmadd (a, b, c, frm, r, f);
|
||||||
|
end
|
||||||
|
`UNUSED_VAR (f)
|
||||||
|
|
||||||
|
VX_shift_register #(
|
||||||
|
.DATAW (32),
|
||||||
|
.DEPTH (`LATENCY_FMA),
|
||||||
|
.RESETW (1)
|
||||||
|
) shift_req_dpi (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.enable (enable),
|
||||||
|
.data_in (r),
|
||||||
|
.data_out (result[i])
|
||||||
|
);
|
||||||
|
`else
|
||||||
acl_fmadd fmadd (
|
acl_fmadd fmadd (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (reset),
|
.areset (reset),
|
||||||
@@ -68,6 +89,7 @@ module VX_fp_fma #(
|
|||||||
.c (c),
|
.c (c),
|
||||||
.q (result[i])
|
.q (result[i])
|
||||||
);
|
);
|
||||||
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_shift_register #(
|
VX_shift_register #(
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
/// Modified port of noncomp module from fpnew Libray
|
||||||
|
/// reference: https://github.com/pulp-platform/fpnew
|
||||||
|
|
||||||
module VX_fp_ncomp #(
|
module VX_fp_ncomp #(
|
||||||
parameter TAGW = 1,
|
parameter TAGW = 1,
|
||||||
parameter LANES = 1
|
parameter LANES = 1
|
||||||
@@ -87,7 +90,8 @@ module VX_fp_ncomp #(
|
|||||||
|
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
||||||
.RESETW (1)
|
.RESETW (1),
|
||||||
|
.DEPTH (0)
|
||||||
) pipe_reg0 (
|
) pipe_reg0 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
/// Modified port of rouding module from fpnew Libray
|
||||||
|
/// reference: https://github.com/pulp-platform/fpnew
|
||||||
|
|
||||||
module VX_fp_rounding #(
|
module VX_fp_rounding #(
|
||||||
parameter DAT_WIDTH = 2 // Width of the abolute value, without sign bit
|
parameter DAT_WIDTH = 2 // Width of the abolute value, without sign bit
|
||||||
) (
|
) (
|
||||||
@@ -17,17 +20,17 @@ module VX_fp_rounding #(
|
|||||||
output wire exact_zero_o // output is an exact zero
|
output wire exact_zero_o // output is an exact zero
|
||||||
);
|
);
|
||||||
|
|
||||||
reg round_up; // Rounding decision
|
reg round_up; // Rounding decision
|
||||||
|
|
||||||
// Take the rounding decision according to RISC-V spec
|
// Take the rounding decision according to RISC-V spec
|
||||||
// RoundMode | Mnemonic | Meaning
|
// RoundMode | Mnemonic | Meaning
|
||||||
// :--------:|:--------:|:-------
|
// :--------:|:--------:|:-------
|
||||||
// 000 | RNE | Round to Nearest, ties to Even
|
// 000 | RNE | Round to Nearest, ties to Even
|
||||||
// 001 | RTZ | Round towards Zero
|
// 001 | RTZ | Round towards Zero
|
||||||
// 010 | RDN | Round Down (towards -\infty)
|
// 010 | RDN | Round Down (towards -\infty)
|
||||||
// 011 | RUP | Round Up (towards \infty)
|
// 011 | RUP | Round Up (towards \infty)
|
||||||
// 100 | RMM | Round to Nearest, ties to Max Magnitude
|
// 100 | RMM | Round to Nearest, ties to Max Magnitude
|
||||||
// others | | *invalid*
|
// others | | *invalid*
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (rnd_mode_i)
|
case (rnd_mode_i)
|
||||||
@@ -47,15 +50,15 @@ module VX_fp_rounding #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
// Perform the rounding, exponent change and overflow to inf happens automagically
|
// Perform the rounding, exponent change and overflow to inf happens automagically
|
||||||
assign abs_rounded_o = abs_value_i + DAT_WIDTH'(round_up);
|
assign abs_rounded_o = abs_value_i + DAT_WIDTH'(round_up);
|
||||||
|
|
||||||
// True zero result is a zero result without dirty round/sticky bits
|
// True zero result is a zero result without dirty round/sticky bits
|
||||||
assign exact_zero_o = (abs_value_i == 0) && (round_sticky_bits_i == 0);
|
assign exact_zero_o = (abs_value_i == 0) && (round_sticky_bits_i == 0);
|
||||||
|
|
||||||
// In case of effective subtraction (thus signs of addition operands must have differed) and a
|
// In case of effective subtraction (thus signs of addition operands must have differed) and a
|
||||||
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
|
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
|
||||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
|
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
|
||||||
: sign_i;
|
: sign_i;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -37,6 +37,27 @@ module VX_fp_sqrt #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
|
`ifdef VERILATOR
|
||||||
|
reg [31:0] r;
|
||||||
|
fflags_t f;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
dpi_fsqrt (dataa[i], frm, r, f);
|
||||||
|
end
|
||||||
|
`UNUSED_VAR (f)
|
||||||
|
|
||||||
|
VX_shift_register #(
|
||||||
|
.DATAW (32),
|
||||||
|
.DEPTH (`LATENCY_FSQRT),
|
||||||
|
.RESETW (1)
|
||||||
|
) shift_req_dpi (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (_reset),
|
||||||
|
.enable (enable),
|
||||||
|
.data_in (r),
|
||||||
|
.data_out (result[i])
|
||||||
|
);
|
||||||
|
`else
|
||||||
acl_fsqrt fsqrt (
|
acl_fsqrt fsqrt (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (_reset),
|
.areset (_reset),
|
||||||
@@ -44,6 +65,7 @@ module VX_fp_sqrt #(
|
|||||||
.a (dataa[i]),
|
.a (dataa[i]),
|
||||||
.q (result[i])
|
.q (result[i])
|
||||||
);
|
);
|
||||||
|
`endif
|
||||||
end
|
end
|
||||||
|
|
||||||
VX_shift_register #(
|
VX_shift_register #(
|
||||||
|
|||||||
@@ -1,27 +1,86 @@
|
|||||||
`include "VX_platform.vh"
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
/// Modified port of lzc module from fpnew Libray
|
||||||
|
/// reference: https://github.com/pulp-platform/fpnew
|
||||||
|
/// A trailing zero counter / leading zero counter.
|
||||||
|
/// Set MODE to 0 for trailing zero counter => cnt_o is the number of trailing zeros (from the LSB)
|
||||||
|
/// Set MODE to 1 for leading zero counter => cnt_o is the number of leading zeros (from the MSB)
|
||||||
|
/// If the input does not contain a zero, `empty_o` is asserted. Additionally `cnt_o` contains
|
||||||
|
/// the maximum number of zeros - 1. For example:
|
||||||
|
/// in_i = 000_0000, empty_o = 1, cnt_o = 6 (mode = 0)
|
||||||
|
/// in_i = 000_0001, empty_o = 0, cnt_o = 0 (mode = 0)
|
||||||
|
/// in_i = 000_1000, empty_o = 0, cnt_o = 3 (mode = 0)
|
||||||
|
/// Furthermore, this unit contains a more efficient implementation for Verilator (simulation only).
|
||||||
|
/// This speeds up simulation significantly.
|
||||||
|
|
||||||
module VX_lzc #(
|
module VX_lzc #(
|
||||||
parameter DATAW = 32,
|
/// The width of the input vector.
|
||||||
parameter LDATAW = `LOG2UP(DATAW)
|
parameter int unsigned WIDTH = 2,
|
||||||
|
parameter bit MODE = 1'b0 // 0 -> trailing zero, 1 -> leading zero
|
||||||
) (
|
) (
|
||||||
input wire [DATAW-1:0] data_in,
|
input logic [WIDTH-1:0] in_i,
|
||||||
output wire [LDATAW-1:0] data_out,
|
output logic [$clog2(WIDTH)-1:0] cnt_o,
|
||||||
output wire valid_out
|
output logic valid_o
|
||||||
);
|
);
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
|
||||||
reg [LDATAW-1:0] data_out_r;
|
localparam int unsigned NUM_LEVELS = $clog2(WIDTH);
|
||||||
|
|
||||||
always @(*) begin
|
// pragma translate_off
|
||||||
data_out_r = 'x;
|
initial begin
|
||||||
for (integer i = DATAW-1; i >= 0; --i) begin
|
assert(WIDTH > 0) else $fatal("input must be at least one bit wide");
|
||||||
if (data_in[i]) begin
|
end
|
||||||
data_out_r = LDATAW'(DATAW-1-i);
|
// pragma translate_on
|
||||||
break;
|
|
||||||
|
logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut;
|
||||||
|
logic [2**NUM_LEVELS-1:0] sel_nodes;
|
||||||
|
logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes;
|
||||||
|
|
||||||
|
logic [WIDTH-1:0] in_tmp;
|
||||||
|
|
||||||
|
// reverse vector if required
|
||||||
|
always_comb begin : flip_vector
|
||||||
|
for (int unsigned i = 0; i < WIDTH; i++) begin
|
||||||
|
in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut
|
||||||
|
assign index_lut[j] = NUM_LEVELS'(unsigned'(j));
|
||||||
|
end
|
||||||
|
|
||||||
|
for (genvar level = 0; unsigned'(level) < NUM_LEVELS; level++) begin : g_levels
|
||||||
|
if (unsigned'(level) == NUM_LEVELS-1) begin : g_last_level
|
||||||
|
for (genvar k = 0; k < 2**level; k++) begin : g_level
|
||||||
|
// if two successive indices are still in the vector...
|
||||||
|
if (unsigned'(k) * 2 < WIDTH-1) begin
|
||||||
|
assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1];
|
||||||
|
assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] :
|
||||||
|
index_lut[k*2+1];
|
||||||
|
end
|
||||||
|
// if only the first index is still in the vector...
|
||||||
|
if (unsigned'(k) * 2 == WIDTH-1) begin
|
||||||
|
assign sel_nodes[2**level-1+k] = in_tmp[k*2];
|
||||||
|
assign index_nodes[2**level-1+k] = index_lut[k*2];
|
||||||
|
end
|
||||||
|
// if index is out of range
|
||||||
|
if (unsigned'(k) * 2 > WIDTH-1) begin
|
||||||
|
assign sel_nodes[2**level-1+k] = 1'b0;
|
||||||
|
assign index_nodes[2**level-1+k] = '0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
for (genvar l = 0; l < 2**level; l++) begin : g_level
|
||||||
|
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
|
||||||
|
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? index_nodes[2**(level+1)-1+l*2] :
|
||||||
|
index_nodes[2**(level+1)-1+l*2+1];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign data_out = data_out_r;
|
assign cnt_o = NUM_LEVELS > unsigned'(0) ? index_nodes[0] : $clog2(WIDTH)'(0);
|
||||||
assign valid_out = (| data_in);
|
assign valid_o = NUM_LEVELS > unsigned'(0) ? sel_nodes[0] : (|in_i);
|
||||||
|
|
||||||
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
Reference in New Issue
Block a user