mutiple fixes: parallel printf, fixed cycle in cache, opencl refactored vecadd and sgemm, regen opencl kernels with hard-float, fixed vortex io bus interface, fixed dpi floats APi to support multicore mode, make vlsim multicore default, make rtlsim multi-core default, removed POCL binaries from repository, updated Makefiles to use external POCL
This commit is contained in:
@@ -161,10 +161,16 @@ module VX_fp_addmul #(
|
||||
defparam mac_fp_mul.adder_input_clock = "none";
|
||||
defparam mac_fp_mul.accum_adder_clock = "none";
|
||||
`else
|
||||
integer fadd_h, fsub_h, fmul_h;
|
||||
initial begin
|
||||
fadd_h = dpi_register();
|
||||
fsub_h = dpi_register();
|
||||
fmul_h = dpi_register();
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
dpi_fadd(0*LANES+i, enable, dataa[i], datab[i], result_add);
|
||||
dpi_fsub(1*LANES+i, enable, dataa[i], datab[i], result_sub);
|
||||
dpi_fmul(2*LANES+i, enable, dataa[i], datab[i], result_mul);
|
||||
dpi_fadd(fadd_h, enable, dataa[i], datab[i], result_add);
|
||||
dpi_fsub(fsub_h, enable, dataa[i], datab[i], result_sub);
|
||||
dpi_fmul(fmul_h, enable, dataa[i], datab[i], result_mul);
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
@@ -39,8 +39,12 @@ module VX_fp_div #(
|
||||
.q (result[i])
|
||||
);
|
||||
`else
|
||||
integer fdiv_h;
|
||||
initial begin
|
||||
fdiv_h = dpi_register();
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
dpi_fdiv(8*LANES+i, enable, dataa[i], datab[i], result[i]);
|
||||
dpi_fdiv(fdiv_h, enable, dataa[i], datab[i], result[i]);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
@@ -53,9 +53,14 @@ module VX_fp_ftoi #(
|
||||
.q (result_u)
|
||||
);
|
||||
`else
|
||||
integer ftoi_h, ftou_h;
|
||||
initial begin
|
||||
ftoi_h = dpi_register();
|
||||
ftou_h = dpi_register();
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
dpi_ftoi(10*LANES+i, enable, dataa[i], result_s);
|
||||
dpi_ftou(11*LANES+i, enable, dataa[i], result_u);
|
||||
dpi_ftoi(ftoi_h, enable, dataa[i], result_s);
|
||||
dpi_ftou(ftou_h, enable, dataa[i], result_u);
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
@@ -53,9 +53,14 @@ module VX_fp_itof #(
|
||||
.q (result_u)
|
||||
);
|
||||
`else
|
||||
integer itof_h, utof_h;
|
||||
initial begin
|
||||
itof_h = dpi_register();
|
||||
utof_h = dpi_register();
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
dpi_itof(12*LANES+i, enable, dataa[i], result_s);
|
||||
dpi_utof(13*LANES+i, enable, dataa[i], result_u);
|
||||
dpi_itof(itof_h, enable, dataa[i], result_s);
|
||||
dpi_utof(utof_h, enable, dataa[i], result_u);
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
@@ -121,9 +121,14 @@ module VX_fp_madd #(
|
||||
defparam mac_fp_msub.adder_input_clock = "0";
|
||||
defparam mac_fp_msub.accum_adder_clock = "none";
|
||||
`else
|
||||
integer fmadd_h, fmsub_h;
|
||||
initial begin
|
||||
fmadd_h = dpi_register();
|
||||
fmsub_h = dpi_register();
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
dpi_fmadd(3*LANES+i, enable, dataa[i], datab[i], datac[i], result_madd);
|
||||
dpi_fmsub(4*LANES+i, enable, dataa[i], datab[i], datac[i], result_msub);
|
||||
dpi_fmadd(fmadd_h, enable, dataa[i], datab[i], datac[i], result_madd);
|
||||
dpi_fmsub(fmsub_h, enable, dataa[i], datab[i], datac[i], result_msub);
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
@@ -37,8 +37,12 @@ module VX_fp_sqrt #(
|
||||
.q (result[i])
|
||||
);
|
||||
`else
|
||||
integer fsqrt_h;
|
||||
initial begin
|
||||
fsqrt_h = dpi_register();
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
dpi_fsqrt(9*LANES+i, enable, dataa[i], result[i]);
|
||||
dpi_fsqrt(fsqrt_h, enable, dataa[i], result[i]);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
@@ -3,11 +3,13 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
int dpi_register();
|
||||
void dpi_fadd(int inst, bool enable, int a, int b, int* result);
|
||||
void dpi_fsub(int inst, bool enable, int a, int b, int* result);
|
||||
void dpi_fmul(int inst, bool enable, int a, int b, int* result);
|
||||
@@ -66,19 +68,28 @@ union Float_t {
|
||||
class Instances {
|
||||
public:
|
||||
ShiftRegister& get(int inst) {
|
||||
mutex_.lock();
|
||||
ShiftRegister& sr = instances_[inst];
|
||||
return instances_.at(inst);
|
||||
}
|
||||
|
||||
int allocate() {
|
||||
mutex_.lock();
|
||||
int inst = instances_.size();
|
||||
instances_.resize(inst + 1);
|
||||
mutex_.unlock();
|
||||
return sr;
|
||||
return inst;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<int, ShiftRegister> instances_;
|
||||
std::vector<ShiftRegister> instances_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
Instances instances;
|
||||
|
||||
int dpi_register() {
|
||||
return instances.allocate();
|
||||
}
|
||||
|
||||
void dpi_fadd(int inst, bool enable, int a, int b, int* result) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
`ifndef FLOAT_DPI
|
||||
`define FLOAT_DPI
|
||||
|
||||
import "DPI-C" context function int dpi_register();
|
||||
|
||||
import "DPI-C" context function void dpi_fadd(int inst, input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsub(int inst, input logic enable, input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fmul(int inst, input logic enable, input int a, input int b, output int result);
|
||||
|
||||
Reference in New Issue
Block a user