fixed lmp_mult parameters, ram init filepath
This commit is contained in:
187
hw/rtl/fp_cores/VX_fp_addmul.v
Normal file
187
hw/rtl/fp_cores/VX_fp_addmul.v
Normal file
@@ -0,0 +1,187 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_addmul #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire do_sub,
|
||||
input wire do_mul,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
reg do_sub_r, do_mul_r;
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
|
||||
wire [31:0] result_add;
|
||||
wire [31:0] result_sub;
|
||||
wire [31:0] result_mul;
|
||||
|
||||
`ifdef QUARTUS
|
||||
twentynm_fp_mac mac_fp_add (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(dataa[i]),
|
||||
.ay(datab[i]),
|
||||
.az(),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result_add),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_add.operation_mode = "sp_add";
|
||||
defparam mac_fp_add.use_chainin = "false";
|
||||
defparam mac_fp_add.adder_subtract = "false";
|
||||
defparam mac_fp_add.ax_clock = "0";
|
||||
defparam mac_fp_add.ay_clock = "0";
|
||||
defparam mac_fp_add.az_clock = "none";
|
||||
defparam mac_fp_add.output_clock = "0";
|
||||
defparam mac_fp_add.accumulate_clock = "none";
|
||||
defparam mac_fp_add.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_add.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_add.mult_pipeline_clock = "none";
|
||||
defparam mac_fp_add.adder_input_clock = "0";
|
||||
defparam mac_fp_add.accum_adder_clock = "none";
|
||||
|
||||
twentynm_fp_mac mac_fp_sub (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(dataa[i]),
|
||||
.ay(datab[i]),
|
||||
.az(),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result_sub),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_sub.operation_mode = "sp_add";
|
||||
defparam mac_fp_sub.use_chainin = "false";
|
||||
defparam mac_fp_sub.adder_subtract = "true";
|
||||
defparam mac_fp_sub.ax_clock = "0";
|
||||
defparam mac_fp_sub.ay_clock = "0";
|
||||
defparam mac_fp_sub.az_clock = "none";
|
||||
defparam mac_fp_sub.output_clock = "0";
|
||||
defparam mac_fp_sub.accumulate_clock = "none";
|
||||
defparam mac_fp_sub.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_sub.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_sub.mult_pipeline_clock = "none";
|
||||
defparam mac_fp_sub.adder_input_clock = "0";
|
||||
defparam mac_fp_sub.accum_adder_clock = "none";
|
||||
|
||||
twentynm_fp_mac mac_fp_mul (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(),
|
||||
.ay(datab[i]),
|
||||
.az(dataa[i]),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result_mul),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_mul.operation_mode = "sp_mult";
|
||||
defparam mac_fp_mul.use_chainin = "false";
|
||||
defparam mac_fp_mul.adder_subtract = "false";
|
||||
defparam mac_fp_mul.ax_clock = "none";
|
||||
defparam mac_fp_mul.ay_clock = "0";
|
||||
defparam mac_fp_mul.az_clock = "0";
|
||||
defparam mac_fp_mul.output_clock = "0";
|
||||
defparam mac_fp_mul.accumulate_clock = "none";
|
||||
defparam mac_fp_mul.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_mul.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_mul.mult_pipeline_clock = "0";
|
||||
defparam mac_fp_mul.adder_input_clock = "none";
|
||||
defparam mac_fp_mul.accum_adder_clock = "none";
|
||||
`else
|
||||
always @(posedge clk) begin
|
||||
dpi_fadd(0*LANES+i, enable, dataa[i], datab[i], result_add);
|
||||
dpi_fsub(1*LANES+i, enable, dataa[i], datab[i], result_sub);
|
||||
dpi_fmul(2*LANES+i, enable, dataa[i], datab[i], result_mul);
|
||||
end
|
||||
`endif
|
||||
|
||||
assign result[i] = do_mul_r ? result_mul : (do_sub_r ? result_sub : result_add);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(TAGW + 1 + 1 + 1),
|
||||
.DEPTH(`LATENCY_FADDMUL)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({tag_in, valid_in, do_sub, do_mul}),
|
||||
.out({tag_out, valid_out, do_sub_r, do_mul_r})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
@@ -26,20 +26,21 @@ module VX_fp_div #(
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
`ifdef QUARTUS
|
||||
acl_fp_div fdiv (
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (~stall),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.b (datab[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
`else
|
||||
always @(posedge clk) begin
|
||||
dpi_fdiv(8*LANES+i, ~stall, valid_in, dataa[i], datab[i], result[i]);
|
||||
dpi_fdiv(8*LANES+i, enable, dataa[i], datab[i], result[i]);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
@@ -50,11 +51,11 @@ module VX_fp_div #(
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall),
|
||||
.enable(enable),
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -27,7 +27,7 @@ module VX_fp_fpga #(
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam NUM_FPC = 7;
|
||||
localparam NUM_FPC = 8;
|
||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
@@ -40,29 +40,28 @@ module VX_fp_fpga #(
|
||||
fflags_t [`NUM_THREADS-1:0] fpnew_fflags;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg do_add, do_sub, do_mul;
|
||||
reg do_sub, do_mul;
|
||||
reg is_signed;
|
||||
|
||||
always @(*) begin
|
||||
core_select = 'x;
|
||||
do_add = 'x;
|
||||
do_sub = 'x;
|
||||
do_mul = 'x;
|
||||
is_signed = 'x;
|
||||
case (op_type)
|
||||
`FPU_ADD: begin core_select = 1; do_mul = 0; do_add = 1; do_sub = 0; end
|
||||
`FPU_SUB: begin core_select = 1; do_mul = 0; do_add = 0; do_sub = 1; end
|
||||
`FPU_MUL: begin core_select = 1; do_mul = 1; do_add = 0; do_sub = 0; end
|
||||
`FPU_MADD: begin core_select = 1; do_mul = 1; do_add = 1; do_sub = 0; end
|
||||
`FPU_MSUB: begin core_select = 1; do_mul = 1; do_add = 0; do_sub = 1; end
|
||||
`FPU_NMSUB: begin core_select = 2; do_sub = 1; end
|
||||
`FPU_NMADD: begin core_select = 2; do_sub = 0; end
|
||||
`FPU_DIV: begin core_select = 3; end
|
||||
`FPU_SQRT: begin core_select = 4; end
|
||||
`FPU_CVTWS: begin core_select = 5; is_signed = 1; end
|
||||
`FPU_CVTWUS: begin core_select = 5; is_signed = 0; end
|
||||
`FPU_CVTSW: begin core_select = 6; is_signed = 1; end
|
||||
`FPU_CVTSWU: begin core_select = 6; is_signed = 0; end
|
||||
`FPU_ADD: begin core_select = 1; do_mul = 0; do_sub = 0; end
|
||||
`FPU_SUB: begin core_select = 1; do_mul = 0; do_sub = 1; end
|
||||
`FPU_MUL: begin core_select = 1; do_mul = 1; do_sub = 0; end
|
||||
`FPU_MADD: begin core_select = 2; do_sub = 0; end
|
||||
`FPU_MSUB: begin core_select = 2; do_sub = 1; end
|
||||
`FPU_NMADD: begin core_select = 3; do_sub = 0; end
|
||||
`FPU_NMSUB: begin core_select = 3; do_sub = 1; end
|
||||
`FPU_DIV: begin core_select = 4; end
|
||||
`FPU_SQRT: begin core_select = 5; end
|
||||
`FPU_CVTWS: begin core_select = 6; is_signed = 1; end
|
||||
`FPU_CVTWUS: begin core_select = 6; is_signed = 0; end
|
||||
`FPU_CVTSW: begin core_select = 7; is_signed = 1; end
|
||||
`FPU_CVTSWU: begin core_select = 7; is_signed = 0; end
|
||||
default: begin core_select = 0; end
|
||||
endcase
|
||||
end
|
||||
@@ -88,25 +87,42 @@ module VX_fp_fpga #(
|
||||
.valid_out (per_core_valid_out[0])
|
||||
);
|
||||
|
||||
VX_fp_addmul #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_addmul (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 1)),
|
||||
.ready_in (per_core_ready_in[1]),
|
||||
.tag_in (tag_in),
|
||||
.do_sub (do_sub),
|
||||
.do_mul (do_mul),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (per_core_result[1]),
|
||||
.tag_out (per_core_tag_out[1]),
|
||||
.ready_out (per_core_ready_out[1]),
|
||||
.valid_out (per_core_valid_out[1])
|
||||
);
|
||||
|
||||
VX_fp_madd #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_madd (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 1)),
|
||||
.ready_in (per_core_ready_in[1]),
|
||||
.tag_in (tag_in),
|
||||
.do_add (do_add),
|
||||
.valid_in (valid_in && (core_select == 2)),
|
||||
.ready_in (per_core_ready_in[2]),
|
||||
.tag_in (tag_in),
|
||||
.do_sub (do_sub),
|
||||
.do_mul (do_mul),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.result (per_core_result[1]),
|
||||
.tag_out (per_core_tag_out[1]),
|
||||
.ready_out (per_core_ready_out[1]),
|
||||
.valid_out (per_core_valid_out[1])
|
||||
.result (per_core_result[2]),
|
||||
.tag_out (per_core_tag_out[2]),
|
||||
.ready_out (per_core_ready_out[2]),
|
||||
.valid_out (per_core_valid_out[2])
|
||||
);
|
||||
|
||||
VX_fp_nmadd #(
|
||||
@@ -115,17 +131,17 @@ module VX_fp_fpga #(
|
||||
) fp_nmadd (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 2)),
|
||||
.ready_in (per_core_ready_in[2]),
|
||||
.valid_in (valid_in && (core_select == 3)),
|
||||
.ready_in (per_core_ready_in[3]),
|
||||
.tag_in (tag_in),
|
||||
.do_sub (do_sub),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.result (per_core_result[2]),
|
||||
.tag_out (per_core_tag_out[2]),
|
||||
.ready_out (per_core_ready_out[2]),
|
||||
.valid_out (per_core_valid_out[2])
|
||||
.result (per_core_result[3]),
|
||||
.tag_out (per_core_tag_out[3]),
|
||||
.ready_out (per_core_ready_out[3]),
|
||||
.valid_out (per_core_valid_out[3])
|
||||
);
|
||||
|
||||
VX_fp_div #(
|
||||
@@ -134,15 +150,15 @@ module VX_fp_fpga #(
|
||||
) fp_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 3)),
|
||||
.ready_in (per_core_ready_in[3]),
|
||||
.valid_in (valid_in && (core_select == 4)),
|
||||
.ready_in (per_core_ready_in[4]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (per_core_result[3]),
|
||||
.tag_out (per_core_tag_out[3]),
|
||||
.ready_out (per_core_ready_out[3]),
|
||||
.valid_out (per_core_valid_out[3])
|
||||
.result (per_core_result[4]),
|
||||
.tag_out (per_core_tag_out[4]),
|
||||
.ready_out (per_core_ready_out[4]),
|
||||
.valid_out (per_core_valid_out[4])
|
||||
);
|
||||
|
||||
VX_fp_sqrt #(
|
||||
@@ -151,14 +167,14 @@ module VX_fp_fpga #(
|
||||
) fp_sqrt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 4)),
|
||||
.ready_in (per_core_ready_in[4]),
|
||||
.valid_in (valid_in && (core_select == 5)),
|
||||
.ready_in (per_core_ready_in[5]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.result (per_core_result[4]),
|
||||
.tag_out (per_core_tag_out[4]),
|
||||
.ready_out (per_core_ready_out[4]),
|
||||
.valid_out (per_core_valid_out[4])
|
||||
.result (per_core_result[5]),
|
||||
.tag_out (per_core_tag_out[5]),
|
||||
.ready_out (per_core_ready_out[5]),
|
||||
.valid_out (per_core_valid_out[5])
|
||||
);
|
||||
|
||||
VX_fp_ftoi #(
|
||||
@@ -167,15 +183,15 @@ module VX_fp_fpga #(
|
||||
) fp_ftoi (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 5)),
|
||||
.ready_in (per_core_ready_in[5]),
|
||||
.valid_in (valid_in && (core_select == 6)),
|
||||
.ready_in (per_core_ready_in[6]),
|
||||
.tag_in (tag_in),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa),
|
||||
.result (per_core_result[5]),
|
||||
.tag_out (per_core_tag_out[5]),
|
||||
.ready_out (per_core_ready_out[5]),
|
||||
.valid_out (per_core_valid_out[5])
|
||||
.result (per_core_result[6]),
|
||||
.tag_out (per_core_tag_out[6]),
|
||||
.ready_out (per_core_ready_out[6]),
|
||||
.valid_out (per_core_valid_out[6])
|
||||
);
|
||||
|
||||
VX_fp_itof #(
|
||||
@@ -184,15 +200,15 @@ module VX_fp_fpga #(
|
||||
) fp_itof (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 6)),
|
||||
.ready_in (per_core_ready_in[6]),
|
||||
.valid_in (valid_in && (core_select == 7)),
|
||||
.ready_in (per_core_ready_in[7]),
|
||||
.tag_in (tag_in),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa),
|
||||
.result (per_core_result[6]),
|
||||
.tag_out (per_core_tag_out[6]),
|
||||
.ready_out (per_core_ready_out[6]),
|
||||
.valid_out (per_core_valid_out[6])
|
||||
.result (per_core_result[7]),
|
||||
.tag_out (per_core_tag_out[7]),
|
||||
.ready_out (per_core_ready_out[7]),
|
||||
.valid_out (per_core_valid_out[7])
|
||||
);
|
||||
|
||||
reg valid_out_n;
|
||||
|
||||
@@ -27,6 +27,7 @@ module VX_fp_ftoi #(
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
reg is_signed_r;
|
||||
|
||||
@@ -36,25 +37,25 @@ module VX_fp_ftoi #(
|
||||
wire [31:0] result_u;
|
||||
|
||||
`ifdef QUARTUS
|
||||
acl_fp_ftoi ftoi (
|
||||
acl_ftoi ftoi (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (~stall),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result_s)
|
||||
);
|
||||
|
||||
acl_fp_ftou ftou (
|
||||
acl_ftou ftou (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (~stall),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result_u)
|
||||
);
|
||||
`else
|
||||
always @(posedge clk) begin
|
||||
dpi_ftoi(10*LANES+i, ~stall, valid_in, dataa[i], result_s);
|
||||
dpi_ftou(11*LANES+i, ~stall, valid_in, dataa[i], result_u);
|
||||
dpi_ftoi(10*LANES+i, enable, dataa[i], result_s);
|
||||
dpi_ftou(11*LANES+i, enable, dataa[i], result_u);
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -67,11 +68,11 @@ module VX_fp_ftoi #(
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall),
|
||||
.enable(enable),
|
||||
.in ({tag_in, valid_in, is_signed}),
|
||||
.out({tag_out, valid_out, is_signed_r})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -27,6 +27,7 @@ module VX_fp_itof #(
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
reg is_signed_r;
|
||||
|
||||
@@ -36,25 +37,25 @@ module VX_fp_itof #(
|
||||
wire [31:0] result_u;
|
||||
|
||||
`ifdef QUARTUS
|
||||
acl_fp_itof itof (
|
||||
acl_itof itof (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (~stall),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result_s)
|
||||
);
|
||||
|
||||
acl_fp_utof utof (
|
||||
acl_utof utof (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (~stall),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result_u)
|
||||
);
|
||||
`else
|
||||
always @(posedge clk) begin
|
||||
dpi_itof(12*LANES+i, ~stall, valid_in, dataa[i], result_s);
|
||||
dpi_utof(13*LANES+i, ~stall, valid_in, dataa[i], result_u);
|
||||
dpi_itof(12*LANES+i, enable, dataa[i], result_s);
|
||||
dpi_utof(13*LANES+i, enable, dataa[i], result_u);
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -67,11 +68,11 @@ module VX_fp_itof #(
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall),
|
||||
.enable(enable),
|
||||
.in ({tag_in, valid_in, is_signed}),
|
||||
.out({tag_out, valid_out, is_signed_r})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -16,9 +16,7 @@ module VX_fp_madd #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire do_add,
|
||||
input wire do_sub,
|
||||
input wire do_mul,
|
||||
input wire do_sub,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
@@ -32,138 +30,16 @@ module VX_fp_madd #(
|
||||
);
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
reg do_add_r, do_sub_r, do_mul_r;
|
||||
reg do_sub_r;
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
|
||||
wire [31:0] result_add;
|
||||
wire [31:0] result_sub;
|
||||
wire [31:0] result_mul;
|
||||
wire [31:0] result_madd;
|
||||
wire [31:0] result_msub;
|
||||
|
||||
`ifdef QUARTUS
|
||||
twentynm_fp_mac mac_fp_add (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(dataa[i]),
|
||||
.ay(datab[i]),
|
||||
.az(),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,~stall}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result_add),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_add.operation_mode = "sp_add";
|
||||
defparam mac_fp_add.use_chainin = "false";
|
||||
defparam mac_fp_add.adder_subtract = "false";
|
||||
defparam mac_fp_add.ax_clock = "0";
|
||||
defparam mac_fp_add.ay_clock = "0";
|
||||
defparam mac_fp_add.az_clock = "none";
|
||||
defparam mac_fp_add.output_clock = "0";
|
||||
defparam mac_fp_add.accumulate_clock = "none";
|
||||
defparam mac_fp_add.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_add.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_add.mult_pipeline_clock = "none";
|
||||
defparam mac_fp_add.adder_input_clock = "0";
|
||||
defparam mac_fp_add.accum_adder_clock = "none";
|
||||
|
||||
twentynm_fp_mac mac_fp_sub (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(dataa[i]),
|
||||
.ay(datab[i]),
|
||||
.az(),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,~stall}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result_sub),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_sub.operation_mode = "sp_add";
|
||||
defparam mac_fp_sub.use_chainin = "false";
|
||||
defparam mac_fp_sub.adder_subtract = "true";
|
||||
defparam mac_fp_sub.ax_clock = "0";
|
||||
defparam mac_fp_sub.ay_clock = "0";
|
||||
defparam mac_fp_sub.az_clock = "none";
|
||||
defparam mac_fp_sub.output_clock = "0";
|
||||
defparam mac_fp_sub.accumulate_clock = "none";
|
||||
defparam mac_fp_sub.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_sub.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_sub.mult_pipeline_clock = "none";
|
||||
defparam mac_fp_sub.adder_input_clock = "0";
|
||||
defparam mac_fp_sub.accum_adder_clock = "none";
|
||||
|
||||
twentynm_fp_mac mac_fp_mul (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
.chainin_overflow(),
|
||||
.chainin_invalid(),
|
||||
.chainin_underflow(),
|
||||
.chainin_inexact(),
|
||||
.ax(),
|
||||
.ay(datab[i]),
|
||||
.az(dataa[i]),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,~stall}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
.overflow(),
|
||||
.invalid(),
|
||||
.underflow(),
|
||||
.inexact(),
|
||||
.chainout_overflow(),
|
||||
.chainout_invalid(),
|
||||
.chainout_underflow(),
|
||||
.chainout_inexact(),
|
||||
.resulta(result_mul),
|
||||
.chainout()
|
||||
);
|
||||
defparam mac_fp_mul.operation_mode = "sp_mult";
|
||||
defparam mac_fp_mul.use_chainin = "false";
|
||||
defparam mac_fp_mul.adder_subtract = "false";
|
||||
defparam mac_fp_mul.ax_clock = "none";
|
||||
defparam mac_fp_mul.ay_clock = "0";
|
||||
defparam mac_fp_mul.az_clock = "0";
|
||||
defparam mac_fp_mul.output_clock = "0";
|
||||
defparam mac_fp_mul.accumulate_clock = "none";
|
||||
defparam mac_fp_mul.ax_chainin_pl_clock = "none";
|
||||
defparam mac_fp_mul.accum_pipeline_clock = "none";
|
||||
defparam mac_fp_mul.mult_pipeline_clock = "0";
|
||||
defparam mac_fp_mul.adder_input_clock = "none";
|
||||
defparam mac_fp_mul.accum_adder_clock = "none";
|
||||
|
||||
twentynm_fp_mac mac_fp_madd (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
@@ -175,7 +51,7 @@ module VX_fp_madd #(
|
||||
.ay(datab[i]),
|
||||
.az(dataa[i]),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,~stall}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
@@ -215,7 +91,7 @@ module VX_fp_madd #(
|
||||
.ay(datab[i]),
|
||||
.az(dataa[i]),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,~stall}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
@@ -245,47 +121,25 @@ module VX_fp_madd #(
|
||||
defparam mac_fp_msub.accum_adder_clock = "none";
|
||||
`else
|
||||
always @(posedge clk) begin
|
||||
dpi_fadd(0*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_add);
|
||||
dpi_fsub(1*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_sub);
|
||||
dpi_fmul(2*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_mul);
|
||||
dpi_fmadd(3*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_madd);
|
||||
dpi_fmsub(4*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_msub);
|
||||
dpi_fmadd(3*LANES+i, enable, dataa[i], datab[i], datac[i], result_madd);
|
||||
dpi_fmsub(4*LANES+i, enable, dataa[i], datab[i], datac[i], result_msub);
|
||||
end
|
||||
`endif
|
||||
|
||||
reg [31:0] result_r;
|
||||
|
||||
always @(*) begin
|
||||
result_r = 'x;
|
||||
if (do_mul_r) begin
|
||||
if (do_add_r)
|
||||
result_r = result_madd;
|
||||
else if (do_sub_r)
|
||||
result_r = result_msub;
|
||||
else
|
||||
result_r = result_mul;
|
||||
end else begin
|
||||
if (do_add_r)
|
||||
result_r = result_add;
|
||||
else if (do_sub_r)
|
||||
result_r = result_sub;
|
||||
end
|
||||
end
|
||||
|
||||
assign result[i] = result_r;
|
||||
assign result[i] = do_sub_r ? result_msub : result_madd;
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(TAGW + 1 + 1 + 1 + 1),
|
||||
.DATAW(TAGW + 1 + 1),
|
||||
.DEPTH(`LATENCY_FMADD)
|
||||
) shift_reg1 (
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall),
|
||||
.in({tag_in, valid_in, do_add, do_sub, do_mul}),
|
||||
.out({tag_out, valid_out, do_add_r, do_sub_r, do_mul_r})
|
||||
.enable(enable),
|
||||
.in({tag_in, valid_in, do_sub}),
|
||||
.out({tag_out, valid_out, do_sub_r})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -30,13 +30,14 @@ module VX_fp_nmadd #(
|
||||
);
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
reg do_sub_r;
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
|
||||
wire [31:0] result_madd;
|
||||
wire [31:0] result_msub;
|
||||
wire [31:0] result_msub;
|
||||
|
||||
wire [31:0] result_st0 = do_sub_r ? result_msub : result_madd;
|
||||
|
||||
@@ -52,7 +53,7 @@ module VX_fp_nmadd #(
|
||||
.ay(datab[i]),
|
||||
.az(dataa[i]),
|
||||
.clk({2'b00,clk}),
|
||||
.ena({2'b11,~stall}),
|
||||
.ena({2'b11,enable}),
|
||||
.aclr(2'b00),
|
||||
.chainin(),
|
||||
// outputs
|
||||
@@ -161,33 +162,36 @@ module VX_fp_nmadd #(
|
||||
defparam mac_fp_neg.adder_input_clock = "0";
|
||||
defparam mac_fp_neg.accum_adder_clock = "none";
|
||||
`else
|
||||
reg valid_in_st0;
|
||||
always @(posedge clk) begin
|
||||
valid_in_st0 <= reset ? 0 : valid_in;
|
||||
dpi_fmadd(5*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_madd);
|
||||
dpi_fmsub(6*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_msub);
|
||||
dpi_fsub(7*LANES+i, ~stall, valid_in_st0, 32'b0, result_st0, result[i]);
|
||||
dpi_fmadd(5*LANES+i, enable, dataa[i], datab[i], datac[i], result_madd);
|
||||
dpi_fmsub(6*LANES+i, enable, dataa[i], datab[i], datac[i], result_msub);
|
||||
dpi_fsub(7*LANES+i, enable, 32'b0, result_st0, result[i]);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (~stall) begin
|
||||
do_sub_r <= do_sub;
|
||||
end
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1),
|
||||
.DEPTH(`LATENCY_FMADD)
|
||||
) shift_reg0 (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({do_sub}),
|
||||
.out({do_sub_r})
|
||||
);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_FNMADD)
|
||||
.DEPTH(`LATENCY_FMADD + `LATENCY_FADDMUL)
|
||||
) shift_reg1 (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall),
|
||||
.enable(enable),
|
||||
.in({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
@@ -25,19 +25,20 @@ module VX_fp_sqrt #(
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
`ifdef QUARTUS
|
||||
acl_fp_sqrt fsqrt (
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
.en (~stall),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
`else
|
||||
always @(posedge clk) begin
|
||||
dpi_fsqrt(9*LANES+i, ~stall, valid_in, dataa[i], result[i]);
|
||||
dpi_fsqrt(9*LANES+i, enable, dataa[i], result[i]);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
@@ -48,11 +49,11 @@ module VX_fp_sqrt #(
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall),
|
||||
.enable(enable),
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
// applicable agreement for further details.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fp_div
|
||||
// SystemVerilog created on Mon Aug 31 06:15:17 2020
|
||||
// SystemVerilog created from acl_fdiv
|
||||
// SystemVerilog created on Wed Sep 2 07:11:09 2020
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
module acl_fp_div (
|
||||
module acl_fdiv (
|
||||
input wire [31:0] a,
|
||||
input wire [31:0] b,
|
||||
input wire [0:0] en,
|
||||
@@ -623,7 +623,7 @@ module acl_fp_div (
|
||||
.outdata_aclr_a("CLEAR0"),
|
||||
.clock_enable_input_a("NORMAL"),
|
||||
.power_up_uninitialized("FALSE"),
|
||||
.init_file("acl_fp_div_memoryC2_uid118_invTables_lutmem.hex"),
|
||||
.init_file("acl_fdiv_memoryC2_uid118_invTables_lutmem.hex"),
|
||||
.init_file_layout("PORT_A"),
|
||||
.intended_device_family("Arria 10")
|
||||
) memoryC2_uid118_invTables_lutmem_dmem (
|
||||
@@ -755,7 +755,7 @@ module acl_fp_div (
|
||||
.outdata_aclr_a("CLEAR0"),
|
||||
.clock_enable_input_a("NORMAL"),
|
||||
.power_up_uninitialized("FALSE"),
|
||||
.init_file("acl_fp_div_memoryC1_uid115_invTables_lutmem.hex"),
|
||||
.init_file("acl_fdiv_memoryC1_uid115_invTables_lutmem.hex"),
|
||||
.init_file_layout("PORT_A"),
|
||||
.intended_device_family("Arria 10")
|
||||
) memoryC1_uid115_invTables_lutmem_dmem (
|
||||
@@ -1060,7 +1060,7 @@ module acl_fp_div (
|
||||
.outdata_aclr_a("CLEAR0"),
|
||||
.clock_enable_input_a("NORMAL"),
|
||||
.power_up_uninitialized("FALSE"),
|
||||
.init_file("acl_fp_div_memoryC0_uid112_invTables_lutmem.hex"),
|
||||
.init_file("acl_fdiv_memoryC0_uid112_invTables_lutmem.hex"),
|
||||
.init_file_layout("PORT_A"),
|
||||
.intended_device_family("Arria 10")
|
||||
) memoryC0_uid112_invTables_lutmem_dmem (
|
||||
@@ -15,12 +15,12 @@
|
||||
// applicable agreement for further details.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fp_sqrt
|
||||
// SystemVerilog created on Mon Aug 31 06:15:18 2020
|
||||
// SystemVerilog created from acl_fsqrt
|
||||
// SystemVerilog created on Wed Sep 2 07:11:09 2020
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
module acl_fp_sqrt (
|
||||
module acl_fsqrt (
|
||||
input wire [31:0] a,
|
||||
input wire [0:0] en,
|
||||
output wire [31:0] q,
|
||||
@@ -279,7 +279,7 @@ module acl_fp_sqrt (
|
||||
.outdata_aclr_a("CLEAR0"),
|
||||
.clock_enable_input_a("NORMAL"),
|
||||
.power_up_uninitialized("FALSE"),
|
||||
.init_file("acl_fp_sqrt_memoryC2_uid68_sqrtTables_lutmem.hex"),
|
||||
.init_file("acl_fsqrt_memoryC2_uid68_sqrtTables_lutmem.hex"),
|
||||
.init_file_layout("PORT_A"),
|
||||
.intended_device_family("Arria 10")
|
||||
) memoryC2_uid68_sqrtTables_lutmem_dmem (
|
||||
@@ -412,7 +412,7 @@ module acl_fp_sqrt (
|
||||
.outdata_aclr_a("CLEAR0"),
|
||||
.clock_enable_input_a("NORMAL"),
|
||||
.power_up_uninitialized("FALSE"),
|
||||
.init_file("acl_fp_sqrt_memoryC1_uid65_sqrtTables_lutmem.hex"),
|
||||
.init_file("acl_fsqrt_memoryC1_uid65_sqrtTables_lutmem.hex"),
|
||||
.init_file_layout("PORT_A"),
|
||||
.intended_device_family("Arria 10")
|
||||
) memoryC1_uid65_sqrtTables_lutmem_dmem (
|
||||
@@ -723,7 +723,7 @@ module acl_fp_sqrt (
|
||||
.outdata_aclr_a("CLEAR0"),
|
||||
.clock_enable_input_a("NORMAL"),
|
||||
.power_up_uninitialized("FALSE"),
|
||||
.init_file("acl_fp_sqrt_memoryC0_uid62_sqrtTables_lutmem.hex"),
|
||||
.init_file("acl_fsqrt_memoryC0_uid62_sqrtTables_lutmem.hex"),
|
||||
.init_file_layout("PORT_A"),
|
||||
.intended_device_family("Arria 10")
|
||||
) memoryC0_uid62_sqrtTables_lutmem_dmem (
|
||||
@@ -15,12 +15,12 @@
|
||||
// applicable agreement for further details.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fp_ftoi
|
||||
// SystemVerilog created on Mon Aug 31 06:15:18 2020
|
||||
// SystemVerilog created from acl_ftoi
|
||||
// SystemVerilog created on Wed Sep 2 07:11:09 2020
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
module acl_fp_ftoi (
|
||||
module acl_ftoi (
|
||||
input wire [31:0] a,
|
||||
input wire [0:0] en,
|
||||
output wire [31:0] q,
|
||||
@@ -15,12 +15,12 @@
|
||||
// applicable agreement for further details.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fp_ftou
|
||||
// SystemVerilog created on Mon Aug 31 06:15:18 2020
|
||||
// SystemVerilog created from acl_ftou
|
||||
// SystemVerilog created on Wed Sep 2 07:11:09 2020
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
module acl_fp_ftou (
|
||||
module acl_ftou (
|
||||
input wire [31:0] a,
|
||||
input wire [0:0] en,
|
||||
output wire [31:0] q,
|
||||
169
hw/rtl/fp_cores/altera/acl_gen.log
Normal file
169
hw/rtl/fp_cores/altera/acl_gen.log
Normal file
@@ -0,0 +1,169 @@
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=21
|
||||
Generation context:
|
||||
HardFP is enabled enabling set to true
|
||||
Faithful rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_fdiv
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Arria10
|
||||
Estimated resources LUTs 539, DSPs 5, RAMBits 32768, RAMBlocks 3
|
||||
The pipeline depth of the block is 15 cycle(s)
|
||||
@@start
|
||||
@name FPDiv@
|
||||
@latency 15@
|
||||
@LUT 539@
|
||||
@DSP 5@
|
||||
@RAMBits 32768@
|
||||
@RAMBlockUsage 3@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 1.00@
|
||||
@rounding NA@
|
||||
@method polynomial approximation@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@inPort 1 fpieee 8 23@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=20
|
||||
Generation context:
|
||||
HardFP is enabled enabling set to true
|
||||
Faithful rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_fsqrt
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Arria10
|
||||
Estimated resources LUTs 271, DSPs 3, RAMBits 15872, RAMBlocks 3
|
||||
The pipeline depth of the block is 10 cycle(s)
|
||||
@@start
|
||||
@name FPSqrt@
|
||||
@latency 10@
|
||||
@LUT 271@
|
||||
@DSP 3@
|
||||
@RAMBits 15872@
|
||||
@RAMBlockUsage 3@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 1.00@
|
||||
@rounding NA@
|
||||
@method polynomial approximation@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=23
|
||||
Generation context:
|
||||
HardFP is enabled enabling set to true
|
||||
Faithful rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_ftoi
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Arria10
|
||||
Estimated resources LUTs 327, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPToFXP@
|
||||
@latency 3@
|
||||
@LUT 327@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 1.00@
|
||||
@rounding NA@
|
||||
@method default@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@outPort 0 fxp 32 0 1@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=23
|
||||
Generation context:
|
||||
HardFP is enabled enabling set to true
|
||||
Faithful rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_ftou
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Arria10
|
||||
Estimated resources LUTs 287, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPToFXP@
|
||||
@latency 3@
|
||||
@LUT 287@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 1.00@
|
||||
@rounding NA@
|
||||
@method default@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@outPort 0 fxp 32 0 0@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=23
|
||||
Generation context:
|
||||
HardFP is enabled enabling set to true
|
||||
Faithful rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_itof
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Arria10
|
||||
Estimated resources LUTs 397, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 7 cycle(s)
|
||||
@@start
|
||||
@name FXPToFP@
|
||||
@latency 7@
|
||||
@LUT 397@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 1.00@
|
||||
@rounding NA@
|
||||
@method default@
|
||||
@inPort 0 fxp 32 0 1@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=23
|
||||
Generation context:
|
||||
HardFP is enabled enabling set to true
|
||||
Faithful rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_utof
|
||||
Frequency 300MHz
|
||||
Deployment FPGA Arria10
|
||||
Estimated resources LUTs 363, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 7 cycle(s)
|
||||
@@start
|
||||
@name FXPToFP@
|
||||
@latency 7@
|
||||
@LUT 363@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 1.00@
|
||||
@rounding NA@
|
||||
@method default@
|
||||
@inPort 0 fxp 32 0 0@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
25
hw/rtl/fp_cores/altera/acl_gen.sh
Executable file
25
hw/rtl/fp_cores/altera/acl_gen.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
CMD_POLY_EVAL_PATH=$QUARTUS_HOME/dspba/backend/linux64
|
||||
|
||||
OPTIONS="-target Arria10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2"
|
||||
|
||||
export LD_LIBRARY_PATH=$CMD_POLY_EVAL_PATH:$LD_LIBRARY_PATH
|
||||
|
||||
CMD="$CMD_POLY_EVAL_PATH/cmdPolyEval $OPTIONS"
|
||||
|
||||
EXP_BITS=8
|
||||
MAN_BITS=23
|
||||
FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
|
||||
|
||||
echo Generating IP cores for $FBITS
|
||||
{
|
||||
$CMD -name acl_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
|
||||
$CMD -name acl_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS
|
||||
$CMD -name acl_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1
|
||||
$CMD -name acl_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0
|
||||
$CMD -name acl_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS
|
||||
$CMD -name acl_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS
|
||||
} > acl_gen.log 2>&1
|
||||
|
||||
#cp $QUARTUS_HOME/dspba/backend/Libraries/sv/base/dspba_library_ver.sv .
|
||||
@@ -15,12 +15,12 @@
|
||||
// applicable agreement for further details.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fp_itof
|
||||
// SystemVerilog created on Mon Aug 31 06:15:18 2020
|
||||
// SystemVerilog created from acl_itof
|
||||
// SystemVerilog created on Wed Sep 2 07:11:09 2020
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
module acl_fp_itof (
|
||||
module acl_itof (
|
||||
input wire [31:0] a,
|
||||
input wire [0:0] en,
|
||||
output wire [31:0] q,
|
||||
@@ -15,12 +15,12 @@
|
||||
// applicable agreement for further details.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fp_utof
|
||||
// SystemVerilog created on Mon Aug 31 06:15:18 2020
|
||||
// SystemVerilog created from acl_utof
|
||||
// SystemVerilog created on Wed Sep 2 07:11:09 2020
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
module acl_fp_utof (
|
||||
module acl_utof (
|
||||
input wire [31:0] a,
|
||||
input wire [0:0] en,
|
||||
output wire [31:0] q,
|
||||
@@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
CMD_POLY_EVAL_PATH=$QUARTUS_HOME/dspba/backend/linux64
|
||||
|
||||
OPTIONS="-target Arria10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2"
|
||||
|
||||
export LD_LIBRARY_PATH=$CMD_POLY_EVAL_PATH:$LD_LIBRARY_PATH
|
||||
|
||||
CMD="$CMD_POLY_EVAL_PATH/cmdPolyEval $OPTIONS"
|
||||
|
||||
EXP_BITS=8
|
||||
MAN_BITS=23
|
||||
FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
|
||||
|
||||
echo Generating IP cores for $FBITS
|
||||
{
|
||||
$CMD -name acl_fp_div -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
|
||||
$CMD -name acl_fp_sqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS
|
||||
$CMD -name acl_fp_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1
|
||||
$CMD -name acl_fp_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0
|
||||
$CMD -name acl_fp_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS
|
||||
$CMD -name acl_fp_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS
|
||||
} > log.txt 2>&1
|
||||
|
||||
cp $QUARTUS_HOME/dspba/backend/Libraries/sv/base/dspba_library_ver.sv .
|
||||
@@ -8,21 +8,19 @@
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
void dpi_fadd(int inst, bool enable, bool valid, int a, int b, int* result);
|
||||
void dpi_fsub(int inst, bool enable, bool valid, int a, int b, int* result);
|
||||
void dpi_fmul(int inst, bool enable, bool valid, int a, int b, int* result);
|
||||
void dpi_fmadd(int inst, bool enable, bool valid, int a, int b, int c, int* result);
|
||||
void dpi_fmsub(int inst, bool enable, bool valid, int a, int b, int c, int* result);
|
||||
void dpi_fdiv(int inst, bool enable, bool valid, int a, int b, int* result);
|
||||
void dpi_fsqrt(int inst, bool enable, bool valid, int a, int* result);
|
||||
void dpi_ftoi(int inst, bool enable, bool valid, int a, int* result);
|
||||
void dpi_ftou(int inst, bool enable, bool valid, int a, int* result);
|
||||
void dpi_itof(int inst, bool enable, bool valid, int a, int* result);
|
||||
void dpi_utof(int inst, bool enable, bool valid, int a, int* result);
|
||||
void dpi_fadd(int inst, bool enable, int a, int b, int* result);
|
||||
void dpi_fsub(int inst, bool enable, int a, int b, int* result);
|
||||
void dpi_fmul(int inst, bool enable, int a, int b, int* result);
|
||||
void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result);
|
||||
void dpi_fmsub(int inst, bool enable, int a, int b, int c, int* result);
|
||||
void dpi_fdiv(int inst, bool enable, int a, int b, int* result);
|
||||
void dpi_fsqrt(int inst, bool enable, int a, int* result);
|
||||
void dpi_ftoi(int inst, bool enable, int a, int* result);
|
||||
void dpi_ftou(int inst, bool enable, int a, int* result);
|
||||
void dpi_itof(int inst, bool enable, int a, int* result);
|
||||
void dpi_utof(int inst, bool enable, int a, int* result);
|
||||
}
|
||||
|
||||
extern double sc_time_stamp();
|
||||
|
||||
class ShiftRegister {
|
||||
public:
|
||||
ShiftRegister() : init_(false), depth_(0) {}
|
||||
@@ -35,37 +33,36 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void push(int value, bool enable, bool valid) {
|
||||
void push(int value, bool enable) {
|
||||
if (!enable)
|
||||
return;
|
||||
for (unsigned i = 0; i < depth_-1; ++i) {
|
||||
buffer_[i] = buffer_[i+1];
|
||||
}
|
||||
buffer_[depth_-1].value = value;
|
||||
buffer_[depth_-1].valid = valid;
|
||||
buffer_[depth_-1] = value;
|
||||
}
|
||||
|
||||
int top() const {
|
||||
return buffer_[0].value;
|
||||
}
|
||||
|
||||
bool valid() const {
|
||||
return buffer_[0].valid;
|
||||
return buffer_[0];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct entry_t {
|
||||
int value;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
std::vector<entry_t> buffer_;
|
||||
int top_;
|
||||
unsigned depth_;
|
||||
std::vector<int> buffer_;
|
||||
bool init_;
|
||||
unsigned depth_;
|
||||
};
|
||||
|
||||
union Float_t {
|
||||
float f;
|
||||
int i;
|
||||
struct {
|
||||
uint32_t man : 23;
|
||||
uint32_t exp : 8;
|
||||
uint32_t sign : 1;
|
||||
} parts;
|
||||
};
|
||||
|
||||
class Instances {
|
||||
public:
|
||||
ShiftRegister& get(int inst) {
|
||||
@@ -82,130 +79,152 @@ private:
|
||||
|
||||
Instances instances;
|
||||
|
||||
void dpi_fadd(int inst, bool enable, bool valid, int a, int b, int* result) {
|
||||
void dpi_fadd(int inst, bool enable, int a, int b, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fb = *(float*)&b;
|
||||
float fr = fa + fb;
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
sr.ensure_init(LATENCY_FMADD);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f + fb.f;
|
||||
|
||||
sr.ensure_init(LATENCY_FADDMUL);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_fsub(int inst, bool enable, bool valid, int a, int b, int* result) {
|
||||
void dpi_fsub(int inst, bool enable, int a, int b, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fb = *(float*)&b;
|
||||
float fr = fa - fb;
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
sr.ensure_init(LATENCY_FMADD);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f - fb.f;
|
||||
|
||||
sr.ensure_init(LATENCY_FADDMUL);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_fmul(int inst, bool enable, bool valid, int a, int b, int* result) {
|
||||
void dpi_fmul(int inst, bool enable, int a, int b, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fb = *(float*)&b;
|
||||
float fr = fa * fb;
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
sr.ensure_init(LATENCY_FMADD);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f * fb.f;
|
||||
|
||||
sr.ensure_init(LATENCY_FADDMUL);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_fmadd(int inst, bool enable, bool valid, int a, int b, int c, int* result) {
|
||||
void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fb = *(float*)&b;
|
||||
float fc = *(float*)&c;
|
||||
float fr = fa * fb + fc;
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f + fc.f;
|
||||
|
||||
sr.ensure_init(LATENCY_FMADD);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_fmsub(int inst, bool enable, bool valid, int a, int b, int c, int* result) {
|
||||
void dpi_fmsub(int inst, bool enable, int a, int b, int c, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fb = *(float*)&b;
|
||||
float fc = *(float*)&c;
|
||||
float fr = fa * fb - fc;
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f - fc.f;
|
||||
|
||||
sr.ensure_init(LATENCY_FMADD);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_fdiv(int inst, bool enable, bool valid, int a, int b, int* result) {
|
||||
void dpi_fdiv(int inst, bool enable, int a, int b, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fb = *(float*)&b;
|
||||
float fr = fa / fb;
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f / fb.f;
|
||||
|
||||
sr.ensure_init(LATENCY_FDIV);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_fsqrt(int inst, bool enable, bool valid, int a, int* result) {
|
||||
void dpi_fsqrt(int inst, bool enable, int a, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
float fr = sqrtf(fa);
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.f = sqrtf(fa.f);
|
||||
|
||||
sr.ensure_init(LATENCY_FSQRT);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_ftoi(int inst, bool enable, bool valid, int a, int* result) {
|
||||
void dpi_ftoi(int inst, bool enable, int a, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
int ir = int(fa);
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = int(fa.f);
|
||||
|
||||
sr.ensure_init(LATENCY_FTOI);
|
||||
sr.push(ir, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_ftou(int inst, bool enable, bool valid, int a, int* result) {
|
||||
void dpi_ftou(int inst, bool enable, int a, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fa = *(float*)&a;
|
||||
unsigned ir = unsigned(fa);
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = unsigned(fa.f);
|
||||
|
||||
sr.ensure_init(LATENCY_FTOI);
|
||||
sr.push(ir, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_itof(int inst, bool enable, bool valid, int a, int* result) {
|
||||
void dpi_itof(int inst, bool enable, int a, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
float fr = (float)a;
|
||||
Float_t fa, fr;
|
||||
|
||||
fr.f = (float)a;
|
||||
|
||||
sr.ensure_init(LATENCY_ITOF);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
|
||||
void dpi_utof(int inst, bool enable, bool valid, int a, int* result) {
|
||||
void dpi_utof(int inst, bool enable, int a, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
unsigned ua = *(unsigned*)&a;
|
||||
float fr = (float)ua;
|
||||
Float_t fa, fr;
|
||||
|
||||
unsigned ua = a;
|
||||
fr.f = (float)ua;
|
||||
|
||||
sr.ensure_init(LATENCY_ITOF);
|
||||
sr.push(*(int*)&fr, enable, valid);
|
||||
sr.push(fr.i, enable);
|
||||
*result = sr.top();
|
||||
}
|
||||
@@ -1,16 +1,16 @@
|
||||
`ifndef FLOAT_DPI
|
||||
`define FLOAT_DPI
|
||||
|
||||
import "DPI-C" context function void dpi_fadd(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsub(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fmul(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fmadd(int inst, input logic enable, input logic valid, input int a, input int b, input int c, output int result);
|
||||
import "DPI-C" context function void dpi_fmsub(int inst, input logic enable, input logic valid, input int a, input int b, input int c, output int result);
|
||||
import "DPI-C" context function void dpi_fdiv(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsqrt(int inst, input logic enable, input logic valid, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_ftoi(int inst, input logic enable, input logic valid, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_ftou(int inst, input logic enable, input logic valid, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_itof(int inst, input logic enable, input logic valid, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_utof(int inst, input logic enable, input logic valid, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fadd(int inst, input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsub(int inst, input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fmul(int inst, input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fmadd(int inst, input logic enable, input int a, input int b, input int c, output int result);
|
||||
import "DPI-C" context function void dpi_fmsub(int inst, input logic enable, input int a, input int b, input int c, output int result);
|
||||
import "DPI-C" context function void dpi_fdiv(int inst, input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsqrt(int inst, input logic enable, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_ftoi(int inst, input logic enable, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_ftou(int inst, input logic enable, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_itof(int inst, input logic enable, input int a, output int result);
|
||||
import "DPI-C" context function void dpi_utof(int inst, input logic enable, input int a, output int result);
|
||||
|
||||
`endif
|
||||
Reference in New Issue
Block a user