adding opencl convolution benchmark

This commit is contained in:
Blaise Tine
2023-11-14 22:31:30 -08:00
parent 4e7a536918
commit 61e3442ef8
16 changed files with 490 additions and 170 deletions

View File

@@ -1,12 +1,8 @@
#ifndef COMMON_H
#define COMMON_H
#define USE_FLOAT
#ifdef USE_FLOAT
#ifndef TYPE
#define TYPE float
#else
#define TYPE int
#endif
#endif // COMMON_H

View File

@@ -11,6 +11,8 @@
#define KERNEL_NAME "sgemm"
#define FLOAT_ULP 6
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
@@ -33,6 +35,66 @@
_ret; \
})
template <typename Type>
class Comparator {};
template <>
class Comparator<int> {
public:
static const char* type_str() {
return "integer";
}
static int generate() {
return rand();
}
static bool compare(int a, int b, int index, int errors) {
if (a != b) {
if (errors < 100) {
printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b);
}
return false;
}
return true;
}
};
template <>
class Comparator<float> {
public:
static const char* type_str() {
return "float";
}
static int generate() {
return static_cast<float>(rand()) / RAND_MAX;
}
static bool compare(float a, float b, int index, int errors) {
union fi_t { float f; int32_t i; };
fi_t fa, fb;
fa.f = a;
fb.f = b;
auto d = std::abs(fa.i - fb.i);
if (d > FLOAT_ULP) {
if (errors < 100) {
printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b);
}
return false;
}
return true;
}
};
/*static void sgemm_cpu(TYPE *C, const TYPE* A, const TYPE *B, int M, int N, int K) {
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
TYPE acc = 0;
for (int k = 0; k < K; ++k) {
acc += A[k * M + m] * B[n * K + k];
}
C[n * M + m] = acc;
}
}
}*/
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
@@ -54,32 +116,6 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
return 0;
}
/*static void matmul(TYPE *C, const TYPE* A, const TYPE *B, int M, int N, int K) {
for (int m = 0; m < M; ++m) {
for (int n = 0; n < N; ++n) {
TYPE acc = 0;
for (int k = 0; k < K; ++k) {
acc += A[k * M + m] * B[n * K + k];
}
C[n * M + m] = acc;
}
}
}*/
#ifdef USE_FLOAT
static bool compare_equal(float a, float b, int ulp = 21) {
union fi_t { int i; float f; };
fi_t fa, fb;
fa.f = a;
fb.f = b;
return std::abs(fa.i - fb.i) <= ulp;
}
#else
static bool compare_equal(int a, int b, int ulp = 21) {
return (a == b);
}
#endif
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
@@ -145,6 +181,8 @@ int main (int argc, char **argv) {
// parse command arguments
parse_args(argc, argv);
uint32_t num_points = size * size;
cl_platform_id platform_id;
size_t kernel_size;
cl_int binary_status;
@@ -163,7 +201,7 @@ int main (int argc, char **argv) {
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
// Allocate device buffers
size_t nbytes = size * size * sizeof(TYPE);
size_t nbytes = num_points * sizeof(TYPE);
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
@@ -194,23 +232,17 @@ int main (int argc, char **argv) {
h_b = (TYPE*)malloc(nbytes);
h_c = (TYPE*)malloc(nbytes);
// Initialize values for array members.
for (int i = 0; i < (size * size); ++i) {
#ifdef USE_FLOAT
h_a[i] = (float)rand() / (float)RAND_MAX;
h_b[i] = (float)rand() / (float)RAND_MAX;
#else
h_a[i] = rand();
h_b[i] = rand();
#endif
h_c[i] = 0xdeadbeef;
// Generate input values
for (uint32_t i = 0; i < num_points; ++i) {
h_a[i] = Comparator<TYPE>::generate();
h_b[i] = Comparator<TYPE>::generate();
}
size_t global_offset[2] = {0, 0};
size_t global_work_size[2] = {size, size};
size_t local_work_size[2] = {1, 1};
std::vector<float> ref_vec(size * size);
std::vector<float> ref_vec(num_points);
// reference generation
size_t num_groups_y = global_work_size[1] / local_work_size[1];
@@ -228,12 +260,7 @@ int main (int argc, char **argv) {
TYPE acc = 0;
for (int k = 0; k < width; k++) {
acc += h_a[k * width + r] * h_b[c * width + k];
}
/*#ifdef USE_FLOAT
printf("*** r=%d, c=%d, v=%f\n", r, c, acc);
#else
printf("*** r=%d, c=%d, v=%d\n", r, c, acc);
#endif*/
}
ref_vec[c * width + r] = acc;
}
}
@@ -260,14 +287,8 @@ int main (int argc, char **argv) {
printf("Verify result\n");
int errors = 0;
for (int i = 0; i < (size * size); i++) {
if (!compare_equal(h_c[i], ref_vec[i])) {
if (errors < 100)
#ifdef USE_FLOAT
printf("*** error: [%d] expected=%f, actual=%f\n", i, ref_vec[i], h_c[i]);
#else
printf("*** error: [%d] expected=%d, actual=%d\n", i, ref_vec[i], h_c[i]);
#endif
for (uint32_t i = 0; i < num_points; ++i) {
if (!Comparator<TYPE>::compare(h_c[i], ref_vec[i], i, errors)) {
++errors;
}
}