fixed l3cache hang using memory arbiter in afu

This commit is contained in:
Blaise Tine
2020-11-15 06:36:32 -08:00
parent 2e0f51af80
commit 5d58bf3d11
20 changed files with 514 additions and 388 deletions

View File

@@ -14,17 +14,29 @@ union Float_t {
} parts;
};
inline float fround(float x, int32_t precision = 4) {
auto power_of_10 = std::pow(10, precision);
return std::round(x * power_of_10) / power_of_10;
}
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
auto tolerance = std::max(fabs(a), fabs(b)) * eps;
auto tolerance = std::min(fabs(a), fabs(b)) * eps;
return fabs(a - b) <= tolerance;
}
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) {
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
Float_t fa{a}, fb{b};
return std::abs(fa.i - fb.i) <= ulp;
auto d = std::abs(fa.i - fb.i);
if (d > ulp) {
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
return false;
}
return true;
}
inline bool almost_equal(float a, float b) {
if (almost_equal_eps(a, b))
return true;
return almost_equal_ulp(a, b);
}
@@ -158,8 +170,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -186,8 +198,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -214,8 +226,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -242,8 +254,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -270,8 +282,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -298,8 +310,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -326,8 +338,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -354,8 +366,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -384,8 +396,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -412,8 +424,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n - i) * (1.0f/n);
b[i] = (n + i) * (1.0f/n);
a[i] = fround((n - i) * (1.0f/n));
b[i] = fround((n + i) * (1.0f/n));
}
}
@@ -442,7 +454,7 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
int q = 1.0f + (i % 64);
float q = 1.0f + (i % 64);
a[i] = q;
b[i] = q;
}
@@ -471,8 +483,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = (n/2 - i) * (1.0f/n);
b[i] = (n/2 - i) * (1.0f/n);
a[i] = fround((n/2 - i) * (1.0f/n));
b[i] = fround((n/2 - i) * (1.0f/n));
}
}
@@ -500,8 +512,8 @@ public:
auto a = (float*)src1;
auto b = (float*)src2;
for (int i = 0; i < n; ++i) {
a[i] = i * (1.0f/n);
b[i] = i * (1.0f/n);
a[i] = fround(i * (1.0f/n));
b[i] = fround(i * (1.0f/n));
}
}