FPU area optimization sharing fmadd hard block

This commit is contained in:
Blaise Tine
2020-12-27 17:31:10 -08:00
parent 25df233005
commit e83c4638a0
15 changed files with 111 additions and 856 deletions

View File

@@ -96,10 +96,6 @@
`define LATENCY_FNONCOMP 1
`endif
`ifndef LATENCY_FADDMUL
`define LATENCY_FADDMUL 3
`endif
`ifndef LATENCY_FMADD
`define LATENCY_FMADD 4
`endif

View File

@@ -1,101 +0,0 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_addmul #(
parameter TAGW = 1,
parameter LANES = 1
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire do_sub,
input wire do_mul,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
output wire [LANES-1:0][31:0] result,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg do_sub_r, do_mul_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_add;
wire [31:0] result_sub;
wire [31:0] result_mul;
`ifdef QUARTUS
acl_fadd fadd (
.clk (clk),
.areset (reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result_add)
);
acl_fsub fsub (
.clk (clk),
.areset (reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result_sub)
);
acl_fmul fmul (
.clk (clk),
.areset (reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result_mul)
);
`else
integer fadd_h, fsub_h, fmul_h;
initial begin
fadd_h = dpi_register();
fsub_h = dpi_register();
fmul_h = dpi_register();
end
always @(posedge clk) begin
dpi_fadd (fadd_h, enable, dataa[i], datab[i], result_add);
dpi_fsub (fsub_h, enable, dataa[i], datab[i], result_sub);
dpi_fmul (fmul_h, enable, dataa[i], datab[i], result_mul);
end
`endif
assign result[i] = do_mul_r ? result_mul : (do_sub_r ? result_sub : result_add);
end
VX_shift_register #(
.DATAW (1 + TAGW + 1 + 1),
.DEPTH (`LATENCY_FADDMUL),
.RESETW (1)
) shift_reg (
.clk(clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in, do_sub, do_mul}),
.data_out ({valid_out, tag_out, do_sub_r, do_mul_r})
);
assign ready_in = enable;
endmodule

View File

@@ -27,7 +27,7 @@ module VX_fp_fpga #(
input wire ready_out,
output wire valid_out
);
localparam NUM_FPC = 7;
localparam NUM_FPC = 6;
localparam FPC_BITS = `LOG2UP(NUM_FPC);
wire [NUM_FPC-1:0] per_core_ready_in;
@@ -40,90 +40,50 @@ module VX_fp_fpga #(
fflags_t [`NUM_THREADS-1:0] fpnew_fflags;
reg [FPC_BITS-1:0] core_select;
reg do_sub, do_mul, do_neg;
reg is_signed;
reg do_madd, do_sub, do_neg, is_signed;
always @(*) begin
do_sub = 'x;
do_mul = 'x;
do_madd = 'x;
do_sub = 'x;
do_neg = 'x;
is_signed = 'x;
case (op_type)
`FPU_ADD: begin core_select = 1; do_mul = 0; do_sub = 0; end
`FPU_SUB: begin core_select = 1; do_mul = 0; do_sub = 1; end
`FPU_MUL: begin core_select = 1; do_mul = 1; do_sub = 0; end
`FPU_MADD: begin core_select = 2; do_sub = 0; do_neg = 0; end
`FPU_MSUB: begin core_select = 2; do_sub = 1; do_neg = 0; end
`FPU_NMADD: begin core_select = 2; do_sub = 0; do_neg = 1; end
`FPU_NMSUB: begin core_select = 2; do_sub = 1; do_neg = 1; end
`FPU_DIV: begin core_select = 3; end
`FPU_SQRT: begin core_select = 4; end
`FPU_CVTWS: begin core_select = 5; is_signed = 1; end
`FPU_CVTWUS: begin core_select = 5; is_signed = 0; end
`FPU_CVTSW: begin core_select = 6; is_signed = 1; end
`FPU_CVTSWU: begin core_select = 6; is_signed = 0; end
default: begin core_select = 0; end
`FPU_ADD: begin core_select = 0; do_madd = 0; do_sub = 0; do_neg = 0; end
`FPU_SUB: begin core_select = 0; do_madd = 0; do_sub = 1; do_neg = 0; end
`FPU_MUL: begin core_select = 0; do_madd = 0; do_sub = 0; do_neg = 1; end
`FPU_MADD: begin core_select = 0; do_madd = 1; do_sub = 0; do_neg = 0; end
`FPU_MSUB: begin core_select = 0; do_madd = 1; do_sub = 1; do_neg = 0; end
`FPU_NMADD: begin core_select = 0; do_madd = 1; do_sub = 0; do_neg = 1; end
`FPU_NMSUB: begin core_select = 0; do_madd = 1; do_sub = 1; do_neg = 1; end
`FPU_DIV: begin core_select = 1; end
`FPU_SQRT: begin core_select = 2; end
`FPU_CVTWS: begin core_select = 3; is_signed = 1; end
`FPU_CVTWUS: begin core_select = 3; is_signed = 0; end
`FPU_CVTSW: begin core_select = 4; is_signed = 1; end
`FPU_CVTSWU: begin core_select = 4; is_signed = 0; end
default: begin core_select = 5; end
endcase
end
VX_fp_noncomp #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_noncomp (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 0)),
.ready_in (per_core_ready_in[0]),
.tag_in (tag_in),
.op_type (op_type),
.frm (frm),
.dataa (dataa),
.datab (datab),
.result (per_core_result[0]),
.has_fflags (fpnew_has_fflags),
.fflags (fpnew_fflags),
.tag_out (per_core_tag_out[0]),
.ready_out (per_core_ready_out[0]),
.valid_out (per_core_valid_out[0])
);
VX_fp_addmul #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_addmul (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 1)),
.ready_in (per_core_ready_in[1]),
.tag_in (tag_in),
.do_sub (do_sub),
.do_mul (do_mul),
.dataa (dataa),
.datab (datab),
.result (per_core_result[1]),
.tag_out (per_core_tag_out[1]),
.ready_out (per_core_ready_out[1]),
.valid_out (per_core_valid_out[1])
);
VX_fp_madd #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_madd (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 2)),
.ready_in (per_core_ready_in[2]),
.valid_in (valid_in && (core_select == 0)),
.ready_in (per_core_ready_in[0]),
.tag_in (tag_in),
.do_madd (do_madd),
.do_sub (do_sub),
.do_neg (do_neg),
.dataa (dataa),
.datab (datab),
.datab (datab),
.datac (datac),
.result (per_core_result[2]),
.tag_out (per_core_tag_out[2]),
.ready_out (per_core_ready_out[2]),
.valid_out (per_core_valid_out[2])
.result (per_core_result[0]),
.tag_out (per_core_tag_out[0]),
.ready_out (per_core_ready_out[0]),
.valid_out (per_core_valid_out[0])
);
VX_fp_div #(
@@ -132,15 +92,15 @@ module VX_fp_fpga #(
) fp_div (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 3)),
.ready_in (per_core_ready_in[3]),
.tag_in (tag_in),
.valid_in (valid_in && (core_select == 1)),
.ready_in (per_core_ready_in[1]),
.tag_in (tag_in),
.dataa (dataa),
.datab (datab),
.result (per_core_result[3]),
.tag_out (per_core_tag_out[3]),
.ready_out (per_core_ready_out[3]),
.valid_out (per_core_valid_out[3])
.datab (datab),
.result (per_core_result[1]),
.tag_out (per_core_tag_out[1]),
.ready_out (per_core_ready_out[1]),
.valid_out (per_core_valid_out[1])
);
VX_fp_sqrt #(
@@ -149,14 +109,14 @@ module VX_fp_fpga #(
) fp_sqrt (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 4)),
.ready_in (per_core_ready_in[4]),
.valid_in (valid_in && (core_select == 2)),
.ready_in (per_core_ready_in[2]),
.tag_in (tag_in),
.dataa (dataa),
.result (per_core_result[4]),
.tag_out (per_core_tag_out[4]),
.ready_out (per_core_ready_out[4]),
.valid_out (per_core_valid_out[4])
.dataa (dataa),
.result (per_core_result[2]),
.tag_out (per_core_tag_out[2]),
.ready_out (per_core_ready_out[2]),
.valid_out (per_core_valid_out[2])
);
VX_fp_ftoi #(
@@ -165,15 +125,15 @@ module VX_fp_fpga #(
) fp_ftoi (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 5)),
.ready_in (per_core_ready_in[5]),
.valid_in (valid_in && (core_select == 3)),
.ready_in (per_core_ready_in[3]),
.tag_in (tag_in),
.is_signed (is_signed),
.dataa (dataa),
.result (per_core_result[5]),
.tag_out (per_core_tag_out[5]),
.ready_out (per_core_ready_out[5]),
.valid_out (per_core_valid_out[5])
.result (per_core_result[3]),
.tag_out (per_core_tag_out[3]),
.ready_out (per_core_ready_out[3]),
.valid_out (per_core_valid_out[3])
);
VX_fp_itof #(
@@ -182,15 +142,36 @@ module VX_fp_fpga #(
) fp_itof (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 6)),
.ready_in (per_core_ready_in[6]),
.valid_in (valid_in && (core_select == 4)),
.ready_in (per_core_ready_in[4]),
.tag_in (tag_in),
.is_signed (is_signed),
.dataa (dataa),
.result (per_core_result[6]),
.tag_out (per_core_tag_out[6]),
.ready_out (per_core_ready_out[6]),
.valid_out (per_core_valid_out[6])
.result (per_core_result[4]),
.tag_out (per_core_tag_out[4]),
.ready_out (per_core_ready_out[4]),
.valid_out (per_core_valid_out[4])
);
VX_fp_noncomp #(
.TAGW (TAGW),
.LANES(`NUM_THREADS)
) fp_noncomp (
.clk (clk),
.reset (reset),
.valid_in (valid_in && (core_select == 5)),
.ready_in (per_core_ready_in[5]),
.tag_in (tag_in),
.op_type (op_type),
.frm (frm),
.dataa (dataa),
.datab (datab),
.result (per_core_result[5]),
.has_fflags (fpnew_has_fflags),
.fflags (fpnew_fflags),
.tag_out (per_core_tag_out[5]),
.ready_out (per_core_ready_out[5]),
.valid_out (per_core_valid_out[5])
);
reg valid_out_n;
@@ -208,7 +189,7 @@ module VX_fp_fpga #(
if (per_core_valid_out[i]) begin
per_core_ready_out[i] = ready_out;
valid_out_n = 1;
has_fflags_n = fpnew_has_fflags && (i == 0);
has_fflags_n = fpnew_has_fflags && (i == 5);
result_n = per_core_result[i];
tag_out_n = per_core_tag_out[i];
break;

View File

@@ -16,9 +16,10 @@ module VX_fp_madd #(
input wire [TAGW-1:0] tag_in,
input wire do_sub,
input wire do_neg,
input wire do_madd,
input wire do_sub,
input wire do_neg,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
input wire [LANES-1:0][31:0] datac,
@@ -33,60 +34,62 @@ module VX_fp_madd #(
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
reg do_sub_r, do_neg_r;
for (genvar i = 0; i < LANES; i++) begin
wire [31:0] result_madd;
wire [31:0] result_msub;
reg [31:0] a, b, c;
always @(*) begin
if (do_madd) begin
// MADD/MSUB/NMADD/NMSUB
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
b = datab[i];
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
end else begin
if (do_neg) begin
// MUL
a = dataa[i];
b = datab[i];
c = 0;
end else begin
// ADD/SUB
a = 32'h3f800000; // 1.0f
b = dataa[i];
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
end
end
end
`ifdef QUARTUS
acl_fmadd fmadd (
.clk (clk),
.areset (reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.c (datac[i]),
.q (result_madd)
);
acl_fmsub fmsub (
.clk (clk),
.areset (reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.c (datac[i]),
.q (result_msub)
.a (a),
.b (b),
.c (c),
.q (result[i])
);
`else
integer fmadd_h, fmsub_h;
integer fmadd_h;
initial begin
fmadd_h = dpi_register();
fmsub_h = dpi_register();
end
always @(posedge clk) begin
dpi_fmadd (fmadd_h, enable, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub (fmsub_h, enable, dataa[i], datab[i], datac[i], result_msub);
dpi_fmadd (fmadd_h, enable, a, b, c, result[i]);
end
`endif
wire [31:0] result_unqual = do_sub_r ? result_msub : result_madd;
assign result[i][31] = result_unqual[31] ^ do_neg_r;
assign result[i][30:0] = result_unqual[30:0];
end
VX_shift_register #(
.DATAW (1 + TAGW + 1 + 1),
.DATAW (1 + TAGW),
.DEPTH (`LATENCY_FMADD),
.RESETW (1)
) shift_reg (
.clk(clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in, do_sub, do_neg}),
.data_out ({valid_out, tag_out, do_sub_r, do_neg_r})
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;

View File

@@ -1,67 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fadd
// SystemVerilog created on Sun Dec 27 09:47:20 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fadd (
input wire [31:0] a,
input wire [31:0] b,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [31:0] fpAddTest_impl_ax0;
wire [31:0] fpAddTest_impl_ay0;
wire [31:0] fpAddTest_impl_q0;
wire fpAddTest_impl_reset0;
wire fpAddTest_impl_fpAddTest_impl_ena0;
// fpAddTest_impl(FPCOLUMN,5)@0
// out q0@3
assign fpAddTest_impl_ax0 = b;
assign fpAddTest_impl_ay0 = a;
assign fpAddTest_impl_reset0 = areset;
assign fpAddTest_impl_fpAddTest_impl_ena0 = en[0];
twentynm_fp_mac #(
.operation_mode("sp_add"),
.ax_clock("0"),
.ay_clock("0"),
.adder_input_clock("0"),
.output_clock("0")
) fpAddTest_impl_DSP0 (
.aclr({ fpAddTest_impl_reset0, fpAddTest_impl_reset0 }),
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpAddTest_impl_fpAddTest_impl_ena0 }),
.ax(fpAddTest_impl_ax0),
.ay(fpAddTest_impl_ay0),
.resulta(fpAddTest_impl_q0),
.accumulate(),
.az(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@3
assign q = fpAddTest_impl_q0;
endmodule

View File

@@ -1,75 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fmsub
// SystemVerilog created on Sun Dec 27 07:07:02 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fmsub (
input wire [31:0] a,
input wire [31:0] b,
input wire [31:0] c,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [31:0] fpMultAddTest_impl_ax0;
wire [31:0] fpMultAddTest_impl_ay0;
wire [31:0] fpMultAddTest_impl_az0;
wire [31:0] fpMultAddTest_impl_q0;
wire fpMultAddTest_impl_reset0;
wire fpMultAddTest_impl_fpMultAddTest_impl_ena0;
// fpMultAddTest_impl(FPCOLUMN,5)@0
// out q0@4
assign fpMultAddTest_impl_ax0 = c;
assign fpMultAddTest_impl_ay0 = b;
assign fpMultAddTest_impl_az0 = a;
assign fpMultAddTest_impl_reset0 = areset;
assign fpMultAddTest_impl_fpMultAddTest_impl_ena0 = en[0];
twentynm_fp_mac #(
.operation_mode("sp_mult_add"),
.adder_subtract("true"),
.use_chainin("false"),
.ax_clock("0"),
.ay_clock("0"),
.az_clock("0"),
.mult_pipeline_clock("0"),
.adder_input_clock("0"),
.ax_chainin_pl_clock("0"),
.output_clock("0")
) fpMultAddTest_impl_DSP0 (
.aclr({ fpMultAddTest_impl_reset0, fpMultAddTest_impl_reset0 }),
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpMultAddTest_impl_fpMultAddTest_impl_ena0 }),
.ax(fpMultAddTest_impl_ax0),
.ay(fpMultAddTest_impl_ay0),
.az(fpMultAddTest_impl_az0),
.resulta(fpMultAddTest_impl_q0),
.accumulate(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@4
assign q = fpMultAddTest_impl_q0;
endmodule

View File

@@ -1,67 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fmul
// SystemVerilog created on Sun Dec 27 09:47:20 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fmul (
input wire [31:0] a,
input wire [31:0] b,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [31:0] fpMulTest_impl_ay0;
wire [31:0] fpMulTest_impl_az0;
wire [31:0] fpMulTest_impl_q0;
wire fpMulTest_impl_reset0;
wire fpMulTest_impl_fpMulTest_impl_ena0;
// fpMulTest_impl(FPCOLUMN,5)@0
// out q0@3
assign fpMulTest_impl_ay0 = b;
assign fpMulTest_impl_az0 = a;
assign fpMulTest_impl_reset0 = areset;
assign fpMulTest_impl_fpMulTest_impl_ena0 = en[0];
twentynm_fp_mac #(
.operation_mode("sp_mult"),
.ay_clock("0"),
.az_clock("0"),
.mult_pipeline_clock("0"),
.output_clock("0")
) fpMulTest_impl_DSP0 (
.aclr({ fpMulTest_impl_reset0, fpMulTest_impl_reset0 }),
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpMulTest_impl_fpMulTest_impl_ena0 }),
.ay(fpMulTest_impl_ay0),
.az(fpMulTest_impl_az0),
.resulta(fpMulTest_impl_q0),
.accumulate(),
.ax(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@3
assign q = fpMulTest_impl_q0;
endmodule

View File

@@ -1,68 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 17.1 (Release Build #273)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2017 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fsub
// SystemVerilog created on Sun Dec 27 09:47:20 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fsub (
input wire [31:0] a,
input wire [31:0] b,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire [31:0] fpSubTest_impl_ax0;
wire [31:0] fpSubTest_impl_ay0;
wire [31:0] fpSubTest_impl_q0;
wire fpSubTest_impl_reset0;
wire fpSubTest_impl_fpSubTest_impl_ena0;
// fpSubTest_impl(FPCOLUMN,5)@0
// out q0@3
assign fpSubTest_impl_ax0 = b;
assign fpSubTest_impl_ay0 = a;
assign fpSubTest_impl_reset0 = areset;
assign fpSubTest_impl_fpSubTest_impl_ena0 = en[0];
twentynm_fp_mac #(
.operation_mode("sp_add"),
.adder_subtract("true"),
.ax_clock("0"),
.ay_clock("0"),
.adder_input_clock("0"),
.output_clock("0")
) fpSubTest_impl_DSP0 (
.aclr({ fpSubTest_impl_reset0, fpSubTest_impl_reset0 }),
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpSubTest_impl_fpSubTest_impl_ena0 }),
.ax(fpSubTest_impl_ax0),
.ay(fpSubTest_impl_ay0),
.resulta(fpSubTest_impl_q0),
.accumulate(),
.az(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@3
assign q = fpSubTest_impl_q0;
endmodule

View File

@@ -18,9 +18,6 @@ FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
echo Generating IP cores for $FBITS
{
$CMD -name "$PREFIX"_fadd -frequency 250 FPAdd $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fsub -frequency 250 FPSub $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fmul -frequency 250 FPMul $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fmadd -frequency 250 FPMultAdd $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
$CMD -name "$PREFIX"_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS

View File

@@ -1,68 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 18.1 (Release Build #277)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2019 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fadd
// SystemVerilog created on Sun Dec 27 09:48:57 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fadd (
input wire [31:0] a,
input wire [31:0] b,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire fpAddTest_impl_reset0;
wire fpAddTest_impl_ena0;
wire [31:0] fpAddTest_impl_ax0;
wire [31:0] fpAddTest_impl_ay0;
wire [31:0] fpAddTest_impl_q0;
// fpAddTest_impl(FPCOLUMN,5)@0
// out q0@3
assign fpAddTest_impl_ax0 = b;
assign fpAddTest_impl_ay0 = a;
assign fpAddTest_impl_reset0 = areset;
assign fpAddTest_impl_ena0 = en[0] | fpAddTest_impl_reset0;
fourteennm_fp_mac #(
.operation_mode("sp_add"),
.ax_clock("0"),
.ay_clock("0"),
.adder_input_clock("0"),
.output_clock("0"),
.clear_type("sclr")
) fpAddTest_impl_DSP0 (
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpAddTest_impl_ena0 }),
.clr({ fpAddTest_impl_reset0, fpAddTest_impl_reset0 }),
.ax(fpAddTest_impl_ax0),
.ay(fpAddTest_impl_ay0),
.resulta(fpAddTest_impl_q0),
.accumulate(),
.az(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@3
assign q = fpAddTest_impl_q0;
endmodule

View File

@@ -1,75 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 18.1 (Release Build #277)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2019 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fmsub
// SystemVerilog created on Sun Dec 27 07:06:39 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fmsub (
input wire [31:0] a,
input wire [31:0] b,
input wire [31:0] c,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire fpMultAddTest_impl_reset0;
wire fpMultAddTest_impl_ena0;
wire [31:0] fpMultAddTest_impl_ax0;
wire [31:0] fpMultAddTest_impl_ay0;
wire [31:0] fpMultAddTest_impl_az0;
wire [31:0] fpMultAddTest_impl_q0;
// fpMultAddTest_impl(FPCOLUMN,5)@0
// out q0@4
assign fpMultAddTest_impl_ax0 = c;
assign fpMultAddTest_impl_ay0 = b;
assign fpMultAddTest_impl_az0 = a;
assign fpMultAddTest_impl_reset0 = areset;
assign fpMultAddTest_impl_ena0 = en[0] | fpMultAddTest_impl_reset0;
fourteennm_fp_mac #(
.operation_mode("sp_mult_add"),
.adder_subtract("true"),
.ax_clock("0"),
.ay_clock("0"),
.az_clock("0"),
.mult_2nd_pipeline_clock("0"),
.adder_input_clock("0"),
.ax_chainin_pl_clock("0"),
.output_clock("0"),
.clear_type("sclr")
) fpMultAddTest_impl_DSP0 (
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpMultAddTest_impl_ena0 }),
.clr({ fpMultAddTest_impl_reset0, fpMultAddTest_impl_reset0 }),
.ax(fpMultAddTest_impl_ax0),
.ay(fpMultAddTest_impl_ay0),
.az(fpMultAddTest_impl_az0),
.resulta(fpMultAddTest_impl_q0),
.accumulate(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@4
assign q = fpMultAddTest_impl_q0;
endmodule

View File

@@ -1,68 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 18.1 (Release Build #277)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2019 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fmul
// SystemVerilog created on Sun Dec 27 09:48:57 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fmul (
input wire [31:0] a,
input wire [31:0] b,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire fpMulTest_impl_reset0;
wire fpMulTest_impl_ena0;
wire [31:0] fpMulTest_impl_ay0;
wire [31:0] fpMulTest_impl_az0;
wire [31:0] fpMulTest_impl_q0;
// fpMulTest_impl(FPCOLUMN,5)@0
// out q0@3
assign fpMulTest_impl_ay0 = b;
assign fpMulTest_impl_az0 = a;
assign fpMulTest_impl_reset0 = areset;
assign fpMulTest_impl_ena0 = en[0] | fpMulTest_impl_reset0;
fourteennm_fp_mac #(
.operation_mode("sp_mult"),
.ay_clock("0"),
.az_clock("0"),
.mult_2nd_pipeline_clock("0"),
.output_clock("0"),
.clear_type("sclr")
) fpMulTest_impl_DSP0 (
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpMulTest_impl_ena0 }),
.clr({ fpMulTest_impl_reset0, fpMulTest_impl_reset0 }),
.ay(fpMulTest_impl_ay0),
.az(fpMulTest_impl_az0),
.resulta(fpMulTest_impl_q0),
.accumulate(),
.ax(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@3
assign q = fpMulTest_impl_q0;
endmodule

View File

@@ -1,69 +0,0 @@
// -------------------------------------------------------------------------
// High Level Design Compiler for Intel(R) FPGAs Version 18.1 (Release Build #277)
// Quartus Prime development tool and MATLAB/Simulink Interface
//
// Legal Notice: Copyright 2019 Intel Corporation. All rights reserved.
// Your use of Intel Corporation's design tools, logic functions and other
// software and tools, and its AMPP partner logic functions, and any output
// files any of the foregoing (including device programming or simulation
// files), and any associated documentation or information are expressly
// subject to the terms and conditions of the Intel FPGA Software License
// Agreement, Intel MegaCore Function License Agreement, or other applicable
// license agreement, including, without limitation, that your use is for
// the sole purpose of programming logic devices manufactured by Intel
// and sold by Intel or its authorized distributors. Please refer to the
// applicable agreement for further details.
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fsub
// SystemVerilog created on Sun Dec 27 09:48:57 2020
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
module acl_fsub (
input wire [31:0] a,
input wire [31:0] b,
input wire [0:0] en,
output wire [31:0] q,
input wire clk,
input wire areset
);
wire fpSubTest_impl_reset0;
wire fpSubTest_impl_ena0;
wire [31:0] fpSubTest_impl_ax0;
wire [31:0] fpSubTest_impl_ay0;
wire [31:0] fpSubTest_impl_q0;
// fpSubTest_impl(FPCOLUMN,5)@0
// out q0@3
assign fpSubTest_impl_ax0 = b;
assign fpSubTest_impl_ay0 = a;
assign fpSubTest_impl_reset0 = areset;
assign fpSubTest_impl_ena0 = en[0] | fpSubTest_impl_reset0;
fourteennm_fp_mac #(
.operation_mode("sp_add"),
.adder_subtract("true"),
.ax_clock("0"),
.ay_clock("0"),
.adder_input_clock("0"),
.output_clock("0"),
.clear_type("sclr")
) fpSubTest_impl_DSP0 (
.clk({1'b0,1'b0,clk}),
.ena({ 1'b0, 1'b0, fpSubTest_impl_ena0 }),
.clr({ fpSubTest_impl_reset0, fpSubTest_impl_reset0 }),
.ax(fpSubTest_impl_ax0),
.ay(fpSubTest_impl_ay0),
.resulta(fpSubTest_impl_q0),
.accumulate(),
.az(),
.chainin(),
.chainout()
);
// xOut(GPOUT,4)@3
assign q = fpSubTest_impl_q0;
endmodule

View File

@@ -18,9 +18,6 @@ FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
echo Generating IP cores for $FBITS
{
$CMD -name "$PREFIX"_fadd -frequency 250 FPAdd $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fsub -frequency 250 FPSub $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fmul -frequency 250 FPMul $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fmadd -frequency 250 FPMultAdd $EXP_BITS $MAN_BITS
$CMD -name "$PREFIX"_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
$CMD -name "$PREFIX"_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS

View File

@@ -10,11 +10,7 @@
extern "C" {
int dpi_register();
void dpi_fadd(int inst, bool enable, int a, int b, int* result);
void dpi_fsub(int inst, bool enable, int a, int b, int* result);
void dpi_fmul(int inst, bool enable, int a, int b, int* result);
void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result);
void dpi_fmsub(int inst, bool enable, int a, int b, int c, int* result);
void dpi_fdiv(int inst, bool enable, int a, int b, int* result);
void dpi_fsqrt(int inst, bool enable, int a, int* result);
void dpi_ftoi(int inst, bool enable, int a, int* result);
@@ -91,48 +87,6 @@ int dpi_register() {
return instances.allocate();
}
void dpi_fadd(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f + fb.f;
sr.ensure_init(LATENCY_FADDMUL);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fsub(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f - fb.f;
sr.ensure_init(LATENCY_FADDMUL);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fmul(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f * fb.f;
sr.ensure_init(LATENCY_FADDMUL);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result) {
ShiftRegister& sr = instances.get(inst);
@@ -148,21 +102,6 @@ void dpi_fmadd(int inst, bool enable, int a, int b, int c, int* result) {
*result = sr.top();
}
void dpi_fmsub(int inst, bool enable, int a, int b, int c, int* result) {
ShiftRegister& sr = instances.get(inst);
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f - fc.f;
sr.ensure_init(LATENCY_FMADD);
sr.push(fr.i, enable);
*result = sr.top();
}
void dpi_fdiv(int inst, bool enable, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);