+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes minor update minor update minor update minor update minor update minor update cleanup cleanup cache bindings and memory perf refactory minor update minor update hw unit tests fixes minor update minor update minor update minor update minor update minor udpate minor update minor update minor update minor update minor update minor update minor update minor updates minor updates minor update minor update minor update minor update minor update minor update minor updates minor updates minor updates minor updates minor update minor update
821 lines
22 KiB
C++
821 lines
22 KiB
C++
#pragma once
|
|
|
|
#include <iostream>
|
|
#include <math.h>
|
|
#include <limits>
|
|
#include <assert.h>
|
|
|
|
void cleanup();
|
|
|
|
#define RT_CHECK(_expr) \
|
|
do { \
|
|
int _ret = _expr; \
|
|
if (0 == _ret) \
|
|
break; \
|
|
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
|
cleanup(); \
|
|
exit(-1); \
|
|
} while (false)
|
|
|
|
union Float_t {
|
|
float f;
|
|
int i;
|
|
struct {
|
|
uint32_t man : 23;
|
|
uint32_t exp : 8;
|
|
uint32_t sign : 1;
|
|
} parts;
|
|
};
|
|
|
|
inline float fround(float x, int32_t precision = 8) {
|
|
auto power_of_10 = std::pow(10, precision);
|
|
return std::round(x * power_of_10) / power_of_10;
|
|
}
|
|
|
|
inline bool almost_equal_eps(float a, float b, int ulp = 128) {
|
|
auto eps = std::numeric_limits<float>::epsilon() * (std::max(fabs(a), fabs(b)) * ulp);
|
|
auto d = fabs(a - b);
|
|
if (d > eps) {
|
|
std::cout << "*** almost_equal_eps: d=" << d << ", eps=" << eps << std::endl;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 6) {
|
|
Float_t fa{a}, fb{b};
|
|
auto d = std::abs(fa.i - fb.i);
|
|
if (d > ulp) {
|
|
std::cout << "*** almost_equal_ulp: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline bool almost_equal(float a, float b) {
|
|
if (a == b)
|
|
return true;
|
|
/*if (almost_equal_eps(a, b))
|
|
return true;*/
|
|
return almost_equal_ulp(a, b);
|
|
}
|
|
|
|
class ITestCase;
|
|
|
|
class TestSuite {
|
|
public:
|
|
TestSuite(vx_device_h device);
|
|
~TestSuite();
|
|
|
|
ITestCase* get_test(int testid) const;
|
|
|
|
void add_test(ITestCase* test);
|
|
|
|
size_t size() const;
|
|
|
|
vx_device_h device() const;
|
|
|
|
private:
|
|
std::vector<ITestCase*> _tests;
|
|
vx_device_h device_;
|
|
};
|
|
|
|
class ITestCase {
|
|
public:
|
|
ITestCase(TestSuite* suite, const char* name)
|
|
: suite_(suite)
|
|
, name_(name)
|
|
{}
|
|
|
|
virtual ~ITestCase() {}
|
|
|
|
TestSuite* suite() const {
|
|
return suite_;
|
|
}
|
|
|
|
const char* name() const {
|
|
return name_;
|
|
}
|
|
|
|
virtual int setup(uint32_t n, void* src1, void* src2) = 0;
|
|
|
|
virtual int verify(uint32_t n, void* dst, const void* src1, const void* src2) = 0;
|
|
|
|
protected:
|
|
TestSuite* suite_;
|
|
const char* const name_;
|
|
};
|
|
|
|
class Test_IADD : public ITestCase {
|
|
public:
|
|
Test_IADD(TestSuite* suite) : ITestCase(suite, "iadd") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = n/2 - i;
|
|
b[i] = n/2 + i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
auto c = (int32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] + b[i];
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_IMUL : public ITestCase {
|
|
public:
|
|
Test_IMUL(TestSuite* suite) : ITestCase(suite, "imul") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = n/2 - i;
|
|
b[i] = n/2 + i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
auto c = (int32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] * b[i];
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_IDIV : public ITestCase {
|
|
public:
|
|
Test_IDIV(TestSuite* suite) : ITestCase(suite, "idiv") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = n/2 - i;
|
|
b[i] = n/2 + i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
auto c = (int32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] / b[i];
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_IDIV_MUL : public ITestCase {
|
|
public:
|
|
Test_IDIV_MUL(TestSuite* suite) : ITestCase(suite, "idiv-mul") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = n/2 - i;
|
|
b[i] = n/2 + i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
auto c = (int32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = a[i] / b[i];
|
|
auto y = a[i] * b[i];
|
|
auto ref = x + y;
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FADD : public ITestCase {
|
|
public:
|
|
Test_FADD(TestSuite* suite) : ITestCase(suite, "fadd") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] + b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FSUB : public ITestCase {
|
|
public:
|
|
Test_FSUB(TestSuite* suite) : ITestCase(suite, "fsub") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] - b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FMUL : public ITestCase {
|
|
public:
|
|
Test_FMUL(TestSuite* suite) : ITestCase(suite, "fmul") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] * b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FMADD : public ITestCase {
|
|
public:
|
|
Test_FMADD(TestSuite* suite) : ITestCase(suite, "fmadd") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] * b[i] + b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FMSUB : public ITestCase {
|
|
public:
|
|
Test_FMSUB(TestSuite* suite) : ITestCase(suite, "fmsub") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] * b[i] - b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FNMADD : public ITestCase {
|
|
public:
|
|
Test_FNMADD(TestSuite* suite) : ITestCase(suite, "fnmadd") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = -a[i] * b[i] - b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FNMSUB : public ITestCase {
|
|
public:
|
|
Test_FNMSUB(TestSuite* suite) : ITestCase(suite, "fnmsub") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = -a[i] * b[i] + b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FNMADD_MADD : public ITestCase {
|
|
public:
|
|
Test_FNMADD_MADD(TestSuite* suite) : ITestCase(suite, "fnmadd-madd") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = -a[i] * b[i] - b[i];
|
|
auto y = a[i] * b[i] + b[i];
|
|
auto ref = x + y;
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FDIV : public ITestCase {
|
|
public:
|
|
Test_FDIV(TestSuite* suite) : ITestCase(suite, "fdiv") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = a[i] / b[i];
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FDIV2 : public ITestCase {
|
|
public:
|
|
Test_FDIV2(TestSuite* suite) : ITestCase(suite, "fdiv2") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = fround((n - i) * (1.0f/n));
|
|
b[i] = fround((n + i) * (1.0f/n));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = a[i] / b[i];
|
|
auto y = b[i] / a[i];
|
|
auto ref = x + y;
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FSQRT : public ITestCase {
|
|
public:
|
|
Test_FSQRT(TestSuite* suite) : ITestCase(suite, "fsqrt") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
float q = 1.0f + (i % 64);
|
|
a[i] = q;
|
|
b[i] = q;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto ref = sqrt(a[i] * b[i]);
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FTOI : public ITestCase {
|
|
public:
|
|
Test_FTOI(TestSuite* suite) : ITestCase(suite, "ftoi") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
float q = fround(float(n/2) - i + (float(i) / n));
|
|
a[i] = q;
|
|
b[i] = q;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (int32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = a[i] + b[i];
|
|
auto ref = (int32_t)x;
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_FTOU : public ITestCase {
|
|
public:
|
|
Test_FTOU(TestSuite* suite) : ITestCase(suite, "ftou") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
float q = fround(i + (float(i) / n));
|
|
a[i] = q;
|
|
b[i] = q;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (float*)src1;
|
|
auto b = (float*)src2;
|
|
auto c = (uint32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = a[i] + b[i];
|
|
auto ref = (uint32_t)x;
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_ITOF : public ITestCase {
|
|
public:
|
|
Test_ITOF(TestSuite* suite) : ITestCase(suite, "itof") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = n/2 - i;
|
|
b[i] = n/2 - i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (int32_t*)src1;
|
|
auto b = (int32_t*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = a[i] + b[i];
|
|
auto ref = (float)x;
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_UTOF : public ITestCase {
|
|
public:
|
|
Test_UTOF(TestSuite* suite) : ITestCase(suite, "utof") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* src2) override {
|
|
auto a = (uint32_t*)src1;
|
|
auto b = (uint32_t*)src2;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = i;
|
|
b[i] = i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
|
int errors = 0;
|
|
auto a = (uint32_t*)src1;
|
|
auto b = (uint32_t*)src2;
|
|
auto c = (float*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto x = a[i] + b[i];
|
|
auto ref = (float)x;
|
|
if (!almost_equal(c[i], ref)) {
|
|
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
};
|
|
|
|
class Test_BAR : public ITestCase {
|
|
public:
|
|
Test_BAR(TestSuite* suite) : ITestCase(suite, "bar") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* /*src2*/) override {
|
|
RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_WARPS, &num_warps_));
|
|
if (num_warps_ == 1) {
|
|
std::cout << "Error: multiple warps configuration required!" << std::endl;
|
|
return -1;
|
|
}
|
|
RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_THREADS, &num_threads_));
|
|
auto a = (uint32_t*)src1;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* /*src2*/) override {
|
|
int errors = 0;
|
|
auto a = (uint32_t*)src1;
|
|
auto c = (uint32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto tid = i % num_threads_;
|
|
auto wid = (i / num_threads_) % num_warps_;
|
|
auto cid = i / (num_warps_ * num_threads_);
|
|
auto src_idx = (cid * num_warps_ + (num_warps_ - 1 - wid)) * num_threads_ + tid;
|
|
uint32_t ref = a[src_idx];
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
uint64_t num_warps_;
|
|
uint64_t num_threads_;
|
|
};
|
|
|
|
class Test_GBAR : public ITestCase {
|
|
public:
|
|
Test_GBAR(TestSuite* suite) : ITestCase(suite, "gbar") {}
|
|
|
|
int setup(uint32_t n, void* src1, void* /*src2*/) override {
|
|
RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_CORES, &num_cores_));
|
|
if (num_cores_ == 1) {
|
|
std::cout << "Error: multiple cores configuration required!" << std::endl;
|
|
return -1;
|
|
}
|
|
RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_WARPS, &num_warps_));
|
|
RT_CHECK(vx_dev_caps(suite_->device(), VX_CAPS_NUM_THREADS, &num_threads_));
|
|
auto a = (uint32_t*)src1;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
a[i] = i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int verify(uint32_t n, void* dst, const void* src1, const void* /*src2*/) override {
|
|
int errors = 0;
|
|
auto a = (uint32_t*)src1;
|
|
auto c = (uint32_t*)dst;
|
|
for (uint32_t i = 0; i < n; ++i) {
|
|
auto tid = i % num_threads_;
|
|
auto wid = (i / num_threads_) % num_warps_;
|
|
auto cid = i / (num_warps_ * num_threads_);
|
|
auto src_idx = ((num_cores_ - 1 - cid) * num_warps_ + (num_warps_ - 1 - wid)) * num_threads_ + tid;
|
|
uint32_t ref = a[src_idx];
|
|
if (c[i] != ref) {
|
|
std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
uint64_t num_cores_;
|
|
uint64_t num_warps_;
|
|
uint64_t num_threads_;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
TestSuite::TestSuite(vx_device_h device)
|
|
: device_(device) {
|
|
this->add_test(new Test_IADD(this));
|
|
this->add_test(new Test_IMUL(this));
|
|
this->add_test(new Test_IDIV(this));
|
|
this->add_test(new Test_IDIV_MUL(this));
|
|
this->add_test(new Test_FADD(this));
|
|
this->add_test(new Test_FSUB(this));
|
|
this->add_test(new Test_FMUL(this));
|
|
this->add_test(new Test_FMADD(this));
|
|
this->add_test(new Test_FMSUB(this));
|
|
this->add_test(new Test_FNMADD(this));
|
|
this->add_test(new Test_FNMSUB(this));
|
|
this->add_test(new Test_FNMADD_MADD(this));
|
|
this->add_test(new Test_FDIV(this));
|
|
this->add_test(new Test_FDIV2(this));
|
|
this->add_test(new Test_FSQRT(this));
|
|
this->add_test(new Test_FTOI(this));
|
|
this->add_test(new Test_FTOU(this));
|
|
this->add_test(new Test_ITOF(this));
|
|
this->add_test(new Test_UTOF(this));
|
|
this->add_test(new Test_BAR(this));
|
|
this->add_test(new Test_GBAR(this));
|
|
}
|
|
|
|
TestSuite::~TestSuite() {
|
|
for (size_t i = 0; i < _tests.size(); ++i) {
|
|
delete _tests[i];
|
|
}
|
|
}
|
|
|
|
ITestCase* TestSuite::get_test(int testid) const {
|
|
return _tests.at(testid);
|
|
}
|
|
|
|
void TestSuite::add_test(ITestCase* test) {
|
|
_tests.push_back(test);
|
|
}
|
|
|
|
size_t TestSuite::size() const {
|
|
return _tests.size();
|
|
}
|
|
|
|
vx_device_h TestSuite::device() const {
|
|
return device_;
|
|
} |