speeding up simulation using dedicated full dpi-based FPU core
This commit is contained in:
264
hw/dpi/float_dpi.cpp
Normal file
264
hw/dpi/float_dpi.cpp
Normal file
@@ -0,0 +1,264 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
void dpi_fadd(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fsub(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fmul(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
|
||||
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fsqrt(int a, int frm, int* result, int* fflags);
|
||||
|
||||
void dpi_ftoi(int a, int frm, int* result, int* fflags);
|
||||
void dpi_ftou(int a, int frm, int* result, int* fflags);
|
||||
void dpi_itof(int a, int frm, int* result, int* fflags);
|
||||
void dpi_utof(int a, int frm, int* result, int* fflags);
|
||||
|
||||
void dpi_fclss(int a, int* result);
|
||||
void dpi_fsgnj(int a, int* result);
|
||||
void dpi_fsgnjn(int a, int* result);
|
||||
void dpi_fsgnjx(int a, int* result);
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags);
|
||||
void dpi_fle(int a, int b, int* result, int* fflags);
|
||||
void dpi_feq(int a, int b, int* result, int* fflags);
|
||||
void dpi_fmin(int a, int b, int* result, int* fflags);
|
||||
void dpi_fmax(int a, int b, int* result, int* fflags);
|
||||
}
|
||||
|
||||
union Float_t {
|
||||
float f;
|
||||
int i;
|
||||
struct {
|
||||
uint32_t man : 23;
|
||||
uint32_t exp : 8;
|
||||
uint32_t sign : 1;
|
||||
} parts;
|
||||
};
|
||||
|
||||
void dpi_fadd(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f + fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fsub(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f - fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmul(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f * fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f + fc.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f - fc.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = -(fa.f * fb.f + fc.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = -(fa.f * fb.f - fc.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f / fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fsqrt(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.f = sqrtf(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_ftoi(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = int(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_ftou(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = unsigned(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_itof(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fr.f = (float)a;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_utof(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
unsigned ua = a;
|
||||
fr.f = (float)ua;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f < fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fle(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f <= fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_feq(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f == fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmin(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = std::min<float>(fa.f, fb.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmax(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = std::max<float>(fa.f, fb.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fclss(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
}
|
||||
|
||||
void dpi_fsgnj(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
}
|
||||
31
hw/dpi/float_dpi.vh
Normal file
31
hw/dpi/float_dpi.vh
Normal file
@@ -0,0 +1,31 @@
|
||||
`ifndef FLOAT_DPI
|
||||
`define FLOAT_DPI
|
||||
|
||||
import "DPI-C" context function void dpi_fadd(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fsub(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmul(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fnmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fnmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fdiv(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fsqrt(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_ftoi(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_ftou(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
|
||||
|
||||
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_feq(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmin(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmax(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
|
||||
`endif
|
||||
84
hw/dpi/util_dpi.cpp
Normal file
84
hw/dpi/util_dpi.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
int dpi_register();
|
||||
void dpi_assert(int inst, bool cond, int delay);
|
||||
}
|
||||
|
||||
class ShiftRegister {
|
||||
public:
|
||||
ShiftRegister() : init_(false), depth_(0) {}
|
||||
|
||||
void ensure_init(int depth) {
|
||||
if (!init_) {
|
||||
buffer_.resize(depth);
|
||||
init_ = true;
|
||||
depth_ = depth;
|
||||
}
|
||||
}
|
||||
|
||||
void push(int value, bool enable) {
|
||||
if (!enable)
|
||||
return;
|
||||
for (unsigned i = 0; i < depth_-1; ++i) {
|
||||
buffer_[i] = buffer_[i+1];
|
||||
}
|
||||
buffer_[depth_-1] = value;
|
||||
}
|
||||
|
||||
int top() const {
|
||||
return buffer_[0];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<int> buffer_;
|
||||
bool init_;
|
||||
unsigned depth_;
|
||||
};
|
||||
|
||||
class Instances {
|
||||
public:
|
||||
ShiftRegister& get(int inst) {
|
||||
return instances_.at(inst);
|
||||
}
|
||||
|
||||
int allocate() {
|
||||
mutex_.lock();
|
||||
int inst = instances_.size();
|
||||
instances_.resize(inst + 1);
|
||||
mutex_.unlock();
|
||||
return inst;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<ShiftRegister> instances_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
Instances instances;
|
||||
|
||||
int dpi_register() {
|
||||
return instances.allocate();
|
||||
}
|
||||
|
||||
void dpi_assert(int inst, bool cond, int delay) {
|
||||
ShiftRegister& sr = instances.get(inst);
|
||||
|
||||
sr.ensure_init(delay);
|
||||
sr.push(!cond, 1);
|
||||
|
||||
auto status = sr.top();
|
||||
if (status) {
|
||||
printf("delayed assertion at %s!\n", svGetNameFromScope(svGetScope()));
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
7
hw/dpi/util_dpi.vh
Normal file
7
hw/dpi/util_dpi.vh
Normal file
@@ -0,0 +1,7 @@
|
||||
`ifndef UTIL_DPI
|
||||
`define UTIL_DPI
|
||||
|
||||
import "DPI-C" context function int dpi_register();
|
||||
import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
|
||||
`endif
|
||||
Reference in New Issue
Block a user