FPU SVDPI support complete

This commit is contained in:
Blaise Tine
2020-09-01 00:59:37 -04:00
parent c1df08843c
commit 4e8b9fb296
16 changed files with 17598 additions and 28978 deletions

View File

@@ -8,7 +8,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a

View File

@@ -40,7 +40,7 @@ public:
this->add_test("fsqrt", new Test_FSQRT()); this->add_test("fsqrt", new Test_FSQRT());
this->add_test("ftoi", new Test_FTOI()); this->add_test("ftoi", new Test_FTOI());
this->add_test("ftou", new Test_FTOU()); this->add_test("ftou", new Test_FTOU());
this->add_test("tof", new Test_ITOF()); this->add_test("itof", new Test_ITOF());
this->add_test("utof", new Test_UTOF()); this->add_test("utof", new Test_UTOF());
#endif #endif
} }
@@ -257,14 +257,14 @@ int main(int argc, char *argv[]) {
(void*)vx_host_ptr(src2_buf)); (void*)vx_host_ptr(src2_buf));
if (errors != 0) { if (errors != 0) {
std::cout << "found " << errors << " errors!" << std::endl; std::cout << "found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl << std::flush; std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
if (stop_on_error) { if (stop_on_error) {
cleanup(); cleanup();
exit(1); exit(1);
} }
exitcode = 1; exitcode = 1;
} else { } else {
std::cout << "PASSED!" << std::endl << std::flush; std::cout << "Test" << t << "-" << name << " PASSED!" << std::endl << std::flush;
} }
} }

Binary file not shown.

View File

@@ -247,7 +247,7 @@ void kernel_fsqrt(void* arg) {
for (uint32_t i = 0; i < count; ++i) { for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i]; float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i]; float b = src1_ptr[offset+i];
float c = sqrt(a * b); float c = sqrtf(a * b);
dst_ptr[offset+i] = c; dst_ptr[offset+i] = c;
} }
} }

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -59,7 +59,9 @@
`define EXT_F_ENABLE `define EXT_F_ENABLE
`endif `endif
`ifndef FPNEW_DISABLE
`define FPNEW_ENABLE `define FPNEW_ENABLE
`endif
// Device identification // Device identification
`define VENDOR_ID 0 `define VENDOR_ID 0

View File

@@ -64,13 +64,13 @@ module VX_scoreboard #(
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay); assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
`ifdef DBG_PRINT_PIPELINE `ifdef DBG_PRINT_PIPELINE
/*always @(posedge clk) begin always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb, $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
end end
end*/ end
`endif `endif
endmodule endmodule

View File

@@ -39,7 +39,7 @@ module VX_fp_div #(
); );
`else `else
always @(posedge clk) begin always @(posedge clk) begin
dpi_fdiv(clk, ~stall, dataa[i], datab[i], result[i]); dpi_fdiv(8*LANES+i, ~stall, valid_in, dataa[i], datab[i], result[i]);
end end
`endif `endif
end end

View File

@@ -53,8 +53,8 @@ module VX_fp_ftoi #(
); );
`else `else
always @(posedge clk) begin always @(posedge clk) begin
dpi_ftoi(clk, ~stall, dataa[i], result_s); dpi_ftoi(10*LANES+i, ~stall, valid_in, dataa[i], result_s);
dpi_ftou(clk, ~stall, dataa[i], result_u); dpi_ftou(11*LANES+i, ~stall, valid_in, dataa[i], result_u);
end end
`endif `endif

View File

@@ -53,8 +53,8 @@ module VX_fp_itof #(
); );
`else `else
always @(posedge clk) begin always @(posedge clk) begin
dpi_itof(clk, ~stall, dataa[i], result_s); dpi_itof(12*LANES+i, ~stall, valid_in, dataa[i], result_s);
dpi_utof(clk, ~stall, dataa[i], result_u); dpi_utof(13*LANES+i, ~stall, valid_in, dataa[i], result_u);
end end
`endif `endif
@@ -63,7 +63,7 @@ module VX_fp_itof #(
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1 + 1), .DATAW(TAGW + 1 + 1),
.DEPTH(`LATENCY_FTOI) .DEPTH(`LATENCY_ITOF)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),

View File

@@ -245,11 +245,11 @@ module VX_fp_madd #(
defparam mac_fp_msub.accum_adder_clock = "none"; defparam mac_fp_msub.accum_adder_clock = "none";
`else `else
always @(posedge clk) begin always @(posedge clk) begin
dpi_fadd(clk, ~stall, dataa[i], datab[i], result_add); dpi_fadd(0*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_add);
dpi_fsub(clk, ~stall, dataa[i], datab[i], result_sub); dpi_fsub(1*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_sub);
dpi_fmul(clk, ~stall, dataa[i], datab[i], result_mul); dpi_fmul(2*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_mul);
dpi_fmadd(clk, ~stall, dataa[i], datab[i], datac[i], result_madd); dpi_fmadd(3*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(clk, ~stall, dataa[i], datab[i], datac[i], result_msub); dpi_fmsub(4*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_msub);
end end
`endif `endif

View File

@@ -161,10 +161,12 @@ module VX_fp_nmadd #(
defparam mac_fp_neg.adder_input_clock = "0"; defparam mac_fp_neg.adder_input_clock = "0";
defparam mac_fp_neg.accum_adder_clock = "none"; defparam mac_fp_neg.accum_adder_clock = "none";
`else `else
reg valid_in_st0;
always @(posedge clk) begin always @(posedge clk) begin
dpi_fmadd(clk, ~stall, dataa[i], datab[i], datac[i], result_madd); valid_in_st0 <= reset ? 0 : valid_in;
dpi_fmsub(clk, ~stall, dataa[i], datab[i], datac[i], result_msub); dpi_fmadd(5*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_madd);
dpi_fsub(clk, ~stall, 32'b0, result_st0, result[i]); dpi_fmsub(6*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_msub);
dpi_fsub(7*LANES+i, ~stall, valid_in_st0, 32'b0, result_st0, result[i]);
end end
`endif `endif
end end

View File

@@ -37,7 +37,7 @@ module VX_fp_sqrt #(
); );
`else `else
always @(posedge clk) begin always @(posedge clk) begin
dpi_fsqrt(clk, ~stall, dataa[i], result[i]); dpi_fsqrt(9*LANES+i, ~stall, valid_in, dataa[i], result[i]);
end end
`endif `endif
end end

View File

@@ -4,22 +4,25 @@
#include <vector> #include <vector>
#include <mutex> #include <mutex>
#include "svdpi.h" #include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h" #include "VX_config.h"
extern "C" { extern "C" {
void dpi_fadd(bool clk, bool enable, int a, int b, int* result); void dpi_fadd(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fsub(bool clk, bool enable, int a, int b, int* result); void dpi_fsub(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fmul(bool clk, bool enable, int a, int b, int* result); void dpi_fmul(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fmadd(bool clk, bool enable, int a, int b, int c, int* result); void dpi_fmadd(int inst, bool enable, bool valid, int a, int b, int c, int* result);
void dpi_fmsub(bool clk, bool enable, int a, int b, int c, int* result); void dpi_fmsub(int inst, bool enable, bool valid, int a, int b, int c, int* result);
void dpi_fdiv(bool clk, bool enable, int a, int b, int* result); void dpi_fdiv(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fsqrt(bool clk, bool enable, int a, int* result); void dpi_fsqrt(int inst, bool enable, bool valid, int a, int* result);
void dpi_ftoi(bool clk, bool enable, int a, int* result); void dpi_ftoi(int inst, bool enable, bool valid, int a, int* result);
void dpi_ftou(bool clk, bool enable, int a, int* result); void dpi_ftou(int inst, bool enable, bool valid, int a, int* result);
void dpi_itof(bool clk, bool enable, int a, int* result); void dpi_itof(int inst, bool enable, bool valid, int a, int* result);
void dpi_utof(bool clk, bool enable, int a, int* result); void dpi_utof(int inst, bool enable, bool valid, int a, int* result);
} }
extern double sc_time_stamp();
class ShiftRegister { class ShiftRegister {
public: public:
ShiftRegister() : init_(false), depth_(0) {} ShiftRegister() : init_(false), depth_(0) {}
@@ -32,179 +35,177 @@ public:
} }
} }
void push(int value, bool clk, bool enable) { void push(int value, bool enable, bool valid) {
if (clk || !enable) if (!enable)
return; return;
for (unsigned i = 0; i < depth_-1; ++i) { for (unsigned i = 0; i < depth_-1; ++i) {
buffer_[i] = buffer_[i+1]; buffer_[i] = buffer_[i+1];
} }
buffer_[depth_-1] = value; buffer_[depth_-1].value = value;
buffer_[depth_-1].valid = valid;
} }
int top() const { int top() const {
return buffer_[0]; return buffer_[0].value;
}
bool valid() const {
return buffer_[0].valid;
} }
private: private:
std::vector<int> buffer_; struct entry_t {
int value;
bool valid;
};
std::vector<entry_t> buffer_;
int top_;
unsigned depth_; unsigned depth_;
bool init_; bool init_;
}; };
class Instances { class Instances {
public: public:
ShiftRegister& get(svScope scope) { ShiftRegister& get(int inst) {
mutex_.lock(); mutex_.lock();
ShiftRegister& reg = instances_[scope]; ShiftRegister& sr = instances_[inst];
mutex_.unlock(); mutex_.unlock();
return reg; return sr;
} }
private: private:
std::unordered_map<svScope, ShiftRegister> instances_; std::unordered_map<int, ShiftRegister> instances_;
std::mutex mutex_; std::mutex mutex_;
}; };
Instances instances; Instances instances;
void dpi_fadd(bool clk, bool enable, int a, int b, int* result) { void dpi_fadd(int inst, bool enable, bool valid, int a, int b, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fb = *(float*)&b; float fb = *(float*)&b;
float fr = fa + fb; float fr = fa + fb;
inst.ensure_init(LATENCY_FMADD); sr.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_fsub(bool clk, bool enable, int a, int b, int* result) { void dpi_fsub(int inst, bool enable, bool valid, int a, int b, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fb = *(float*)&b; float fb = *(float*)&b;
float fr = fa - fb; float fr = fa - fb;
inst.ensure_init(LATENCY_FMADD); sr.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_fmul(bool clk, bool enable, int a, int b, int* result) { void dpi_fmul(int inst, bool enable, bool valid, int a, int b, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fb = *(float*)&b; float fb = *(float*)&b;
float fr = fa * fb; float fr = fa * fb;
inst.ensure_init(LATENCY_FMADD); sr.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_fmadd(bool clk, bool enable, int a, int b, int c, int* result) { void dpi_fmadd(int inst, bool enable, bool valid, int a, int b, int c, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fb = *(float*)&b; float fb = *(float*)&b;
float fc = *(float*)&c; float fc = *(float*)&c;
float fr = fa * fb + fc; float fr = fa * fb + fc;
inst.ensure_init(LATENCY_FMADD); sr.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_fmsub(bool clk, bool enable, int a, int b, int c, int* result) { void dpi_fmsub(int inst, bool enable, bool valid, int a, int b, int c, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fb = *(float*)&b; float fb = *(float*)&b;
float fc = *(float*)&c; float fc = *(float*)&c;
float fr = fa * fb - fc; float fr = fa * fb - fc;
inst.ensure_init(LATENCY_FMADD); sr.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_fdiv(bool clk, bool enable, int a, int b, int* result) { void dpi_fdiv(int inst, bool enable, bool valid, int a, int b, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fb = *(float*)&b; float fb = *(float*)&b;
float fr = fa / fb; float fr = fa / fb;
inst.ensure_init(LATENCY_FDIV); sr.ensure_init(LATENCY_FDIV);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst. *result = sr.top();
top();
} }
void dpi_fsqrt(bool clk, bool enable, int a, int* result) { void dpi_fsqrt(int inst, bool enable, bool valid, int a, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
float fr = sqrt(fa); float fr = sqrtf(fa);
inst.ensure_init(LATENCY_FSQRT); sr.ensure_init(LATENCY_FSQRT);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_ftoi(bool clk, bool enable, int a, int* result) { void dpi_ftoi(int inst, bool enable, bool valid, int a, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
int ir = int(fa); int ir = int(fa);
inst.ensure_init(LATENCY_FTOI); sr.ensure_init(LATENCY_FTOI);
inst.push(ir, clk, enable); sr.push(ir, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_ftou(bool clk, bool enable, int a, int* result) { void dpi_ftou(int inst, bool enable, bool valid, int a, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fa = *(float*)&a; float fa = *(float*)&a;
unsigned ir = unsigned(fa); unsigned ir = unsigned(fa);
inst.ensure_init(LATENCY_FTOI); sr.ensure_init(LATENCY_FTOI);
inst.push(ir, clk, enable); sr.push(ir, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_itof(bool clk, bool enable, int a, int* result) { void dpi_itof(int inst, bool enable, bool valid, int a, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
float fr = float(a); float fr = (float)a;
inst.ensure_init(LATENCY_ITOF); sr.ensure_init(LATENCY_ITOF);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }
void dpi_utof(bool clk, bool enable, int a, int* result) { void dpi_utof(int inst, bool enable, bool valid, int a, int* result) {
auto scope = svGetScope(); ShiftRegister& sr = instances.get(inst);
ShiftRegister& inst = instances.get(scope);
unsigned ua = *(unsigned*)&a; unsigned ua = *(unsigned*)&a;
float fr = float(ua); float fr = (float)ua;
inst.ensure_init(LATENCY_ITOF); sr.ensure_init(LATENCY_ITOF);
inst.push(*(int*)&fr, clk, enable); sr.push(*(int*)&fr, enable, valid);
*result = inst.top(); *result = sr.top();
} }

View File

@@ -1,16 +1,16 @@
`ifndef FLOAT_DPI `ifndef FLOAT_DPI
`define FLOAT_DPI `define FLOAT_DPI
import "DPI-C" context function void dpi_fadd(input logic clk, input logic enable, input int a, input int b, output int result); import "DPI-C" context function void dpi_fadd(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsub(input logic clk, input logic enable, input int a, input int b, output int result); import "DPI-C" context function void dpi_fsub(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmul(input logic clk, input logic enable, input int a, input int b, output int result); import "DPI-C" context function void dpi_fmul(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmadd(input logic clk, input logic enable, input int a, input int b, input int c, output int result); import "DPI-C" context function void dpi_fmadd(int inst, input logic enable, input logic valid, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fmsub(input logic clk, input logic enable, input int a, input int b, input int c, output int result); import "DPI-C" context function void dpi_fmsub(int inst, input logic enable, input logic valid, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fdiv(input logic clk, input logic enable, input int a, input int b, output int result); import "DPI-C" context function void dpi_fdiv(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsqrt(input logic clk, input logic enable, input int a, output int result); import "DPI-C" context function void dpi_fsqrt(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_ftoi(input logic clk, input logic enable, input int a, output int result); import "DPI-C" context function void dpi_ftoi(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_ftou(input logic clk, input logic enable, input int a, output int result); import "DPI-C" context function void dpi_ftou(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_itof(input logic clk, input logic enable, input int a, output int result); import "DPI-C" context function void dpi_itof(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_utof(input logic clk, input logic enable, input int a, output int result); import "DPI-C" context function void dpi_utof(int inst, input logic enable, input logic valid, input int a, output int result);
`endif `endif