speeding up simulation using dedicated full dpi-based FPU core

This commit is contained in:
Blaise Tine
2021-01-06 18:44:06 -08:00
parent 2058718f0f
commit 2b8435471a
26 changed files with 990 additions and 430 deletions

264
hw/dpi/float_dpi.cpp Normal file
View File

@@ -0,0 +1,264 @@
#include <stdio.h>
#include <math.h>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <iostream>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
extern "C" {
void dpi_fadd(int a, int b, int frm, int* result, int* fflags);
void dpi_fsub(int a, int b, int frm, int* result, int* fflags);
void dpi_fmul(int a, int b, int frm, int* result, int* fflags);
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags);
void dpi_fsqrt(int a, int frm, int* result, int* fflags);
void dpi_ftoi(int a, int frm, int* result, int* fflags);
void dpi_ftou(int a, int frm, int* result, int* fflags);
void dpi_itof(int a, int frm, int* result, int* fflags);
void dpi_utof(int a, int frm, int* result, int* fflags);
void dpi_fclss(int a, int* result);
void dpi_fsgnj(int a, int* result);
void dpi_fsgnjn(int a, int* result);
void dpi_fsgnjx(int a, int* result);
void dpi_flt(int a, int b, int* result, int* fflags);
void dpi_fle(int a, int b, int* result, int* fflags);
void dpi_feq(int a, int b, int* result, int* fflags);
void dpi_fmin(int a, int b, int* result, int* fflags);
void dpi_fmax(int a, int b, int* result, int* fflags);
}
union Float_t {
float f;
int i;
struct {
uint32_t man : 23;
uint32_t exp : 8;
uint32_t sign : 1;
} parts;
};
void dpi_fadd(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f + fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fsub(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f - fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fmul(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f * fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f + fc.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f - fc.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = -(fa.f * fb.f + fc.f);
*result = fr.i;
*fflags = 0;
}
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = -(fa.f * fb.f - fc.f);
*result = fr.i;
*fflags = 0;
}
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f / fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fsqrt(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fa.i = a;
fr.f = sqrtf(fa.f);
*result = fr.i;
*fflags = 0;
}
void dpi_ftoi(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fa.i = a;
fr.i = int(fa.f);
*result = fr.i;
*fflags = 0;
}
void dpi_ftou(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fa.i = a;
fr.i = unsigned(fa.f);
*result = fr.i;
*fflags = 0;
}
void dpi_itof(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fr.f = (float)a;
*result = fr.i;
*fflags = 0;
}
void dpi_utof(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
unsigned ua = a;
fr.f = (float)ua;
*result = fr.i;
*fflags = 0;
}
void dpi_flt(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f < fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fle(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f <= fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_feq(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f == fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fmin(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = std::min<float>(fa.f, fb.f);
*result = fr.i;
*fflags = 0;
}
void dpi_fmax(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = std::max<float>(fa.f, fb.f);
*result = fr.i;
*fflags = 0;
}
void dpi_fclss(int a, int* result) {
// TODO
*result = 0;
}
void dpi_fsgnj(int a, int* result) {
// TODO
*result = 0;
}
void dpi_fsgnjn(int a, int* result) {
// TODO
*result = 0;
}
void dpi_fsgnjx(int a, int* result) {
// TODO
*result = 0;
}

31
hw/dpi/float_dpi.vh Normal file
View File

@@ -0,0 +1,31 @@
`ifndef FLOAT_DPI
`define FLOAT_DPI
import "DPI-C" context function void dpi_fadd(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fsub(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmul(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fnmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fnmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fdiv(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fsqrt(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_ftoi(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_ftou(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fclss(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_feq(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmin(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmax(input int a, input int b, output int result, output bit[4:0] fflags);
`endif

84
hw/dpi/util_dpi.cpp Normal file
View File

@@ -0,0 +1,84 @@
#include <stdio.h>
#include <math.h>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <iostream>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
extern "C" {
int dpi_register();
void dpi_assert(int inst, bool cond, int delay);
}
class ShiftRegister {
public:
ShiftRegister() : init_(false), depth_(0) {}
void ensure_init(int depth) {
if (!init_) {
buffer_.resize(depth);
init_ = true;
depth_ = depth;
}
}
void push(int value, bool enable) {
if (!enable)
return;
for (unsigned i = 0; i < depth_-1; ++i) {
buffer_[i] = buffer_[i+1];
}
buffer_[depth_-1] = value;
}
int top() const {
return buffer_[0];
}
private:
std::vector<int> buffer_;
bool init_;
unsigned depth_;
};
class Instances {
public:
ShiftRegister& get(int inst) {
return instances_.at(inst);
}
int allocate() {
mutex_.lock();
int inst = instances_.size();
instances_.resize(inst + 1);
mutex_.unlock();
return inst;
}
private:
std::vector<ShiftRegister> instances_;
std::mutex mutex_;
};
Instances instances;
int dpi_register() {
return instances.allocate();
}
void dpi_assert(int inst, bool cond, int delay) {
ShiftRegister& sr = instances.get(inst);
sr.ensure_init(delay);
sr.push(!cond, 1);
auto status = sr.top();
if (status) {
printf("delayed assertion at %s!\n", svGetNameFromScope(svGetScope()));
std::abort();
}
}

7
hw/dpi/util_dpi.vh Normal file
View File

@@ -0,0 +1,7 @@
`ifndef UTIL_DPI
`define UTIL_DPI
import "DPI-C" context function int dpi_register();
import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay);
`endif