adding opencl convolution benchmark
This commit is contained in:
@@ -1,12 +1,8 @@
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#define USE_FLOAT
|
||||
|
||||
#ifdef USE_FLOAT
|
||||
#ifndef TYPE
|
||||
#define TYPE float
|
||||
#else
|
||||
#define TYPE int
|
||||
#endif
|
||||
|
||||
#endif // COMMON_H
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#define KERNEL_NAME "sgemm"
|
||||
|
||||
#define FLOAT_ULP 6
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
@@ -33,6 +35,66 @@
|
||||
_ret; \
|
||||
})
|
||||
|
||||
template <typename Type>
|
||||
class Comparator {};
|
||||
|
||||
template <>
|
||||
class Comparator<int> {
|
||||
public:
|
||||
static const char* type_str() {
|
||||
return "integer";
|
||||
}
|
||||
static int generate() {
|
||||
return rand();
|
||||
}
|
||||
static bool compare(int a, int b, int index, int errors) {
|
||||
if (a != b) {
|
||||
if (errors < 100) {
|
||||
printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class Comparator<float> {
|
||||
public:
|
||||
static const char* type_str() {
|
||||
return "float";
|
||||
}
|
||||
static int generate() {
|
||||
return static_cast<float>(rand()) / RAND_MAX;
|
||||
}
|
||||
static bool compare(float a, float b, int index, int errors) {
|
||||
union fi_t { float f; int32_t i; };
|
||||
fi_t fa, fb;
|
||||
fa.f = a;
|
||||
fb.f = b;
|
||||
auto d = std::abs(fa.i - fb.i);
|
||||
if (d > FLOAT_ULP) {
|
||||
if (errors < 100) {
|
||||
printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/*static void sgemm_cpu(TYPE *C, const TYPE* A, const TYPE *B, int M, int N, int K) {
|
||||
for (int m = 0; m < M; ++m) {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
TYPE acc = 0;
|
||||
for (int k = 0; k < K; ++k) {
|
||||
acc += A[k * M + m] * B[n * K + k];
|
||||
}
|
||||
C[n * M + m] = acc;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
@@ -54,32 +116,6 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*static void matmul(TYPE *C, const TYPE* A, const TYPE *B, int M, int N, int K) {
|
||||
for (int m = 0; m < M; ++m) {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
TYPE acc = 0;
|
||||
for (int k = 0; k < K; ++k) {
|
||||
acc += A[k * M + m] * B[n * K + k];
|
||||
}
|
||||
C[n * M + m] = acc;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
#ifdef USE_FLOAT
|
||||
static bool compare_equal(float a, float b, int ulp = 21) {
|
||||
union fi_t { int i; float f; };
|
||||
fi_t fa, fb;
|
||||
fa.f = a;
|
||||
fb.f = b;
|
||||
return std::abs(fa.i - fb.i) <= ulp;
|
||||
}
|
||||
#else
|
||||
static bool compare_equal(int a, int b, int ulp = 21) {
|
||||
return (a == b);
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_device_id device_id = NULL;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue commandQueue = NULL;
|
||||
@@ -145,6 +181,8 @@ int main (int argc, char **argv) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t num_points = size * size;
|
||||
|
||||
cl_platform_id platform_id;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status;
|
||||
@@ -163,7 +201,7 @@ int main (int argc, char **argv) {
|
||||
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
|
||||
|
||||
// Allocate device buffers
|
||||
size_t nbytes = size * size * sizeof(TYPE);
|
||||
size_t nbytes = num_points * sizeof(TYPE);
|
||||
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
|
||||
@@ -194,23 +232,17 @@ int main (int argc, char **argv) {
|
||||
h_b = (TYPE*)malloc(nbytes);
|
||||
h_c = (TYPE*)malloc(nbytes);
|
||||
|
||||
// Initialize values for array members.
|
||||
for (int i = 0; i < (size * size); ++i) {
|
||||
#ifdef USE_FLOAT
|
||||
h_a[i] = (float)rand() / (float)RAND_MAX;
|
||||
h_b[i] = (float)rand() / (float)RAND_MAX;
|
||||
#else
|
||||
h_a[i] = rand();
|
||||
h_b[i] = rand();
|
||||
#endif
|
||||
h_c[i] = 0xdeadbeef;
|
||||
// Generate input values
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
h_a[i] = Comparator<TYPE>::generate();
|
||||
h_b[i] = Comparator<TYPE>::generate();
|
||||
}
|
||||
|
||||
size_t global_offset[2] = {0, 0};
|
||||
size_t global_work_size[2] = {size, size};
|
||||
size_t local_work_size[2] = {1, 1};
|
||||
|
||||
std::vector<float> ref_vec(size * size);
|
||||
std::vector<float> ref_vec(num_points);
|
||||
|
||||
// reference generation
|
||||
size_t num_groups_y = global_work_size[1] / local_work_size[1];
|
||||
@@ -228,12 +260,7 @@ int main (int argc, char **argv) {
|
||||
TYPE acc = 0;
|
||||
for (int k = 0; k < width; k++) {
|
||||
acc += h_a[k * width + r] * h_b[c * width + k];
|
||||
}
|
||||
/*#ifdef USE_FLOAT
|
||||
printf("*** r=%d, c=%d, v=%f\n", r, c, acc);
|
||||
#else
|
||||
printf("*** r=%d, c=%d, v=%d\n", r, c, acc);
|
||||
#endif*/
|
||||
}
|
||||
ref_vec[c * width + r] = acc;
|
||||
}
|
||||
}
|
||||
@@ -260,14 +287,8 @@ int main (int argc, char **argv) {
|
||||
|
||||
printf("Verify result\n");
|
||||
int errors = 0;
|
||||
for (int i = 0; i < (size * size); i++) {
|
||||
if (!compare_equal(h_c[i], ref_vec[i])) {
|
||||
if (errors < 100)
|
||||
#ifdef USE_FLOAT
|
||||
printf("*** error: [%d] expected=%f, actual=%f\n", i, ref_vec[i], h_c[i]);
|
||||
#else
|
||||
printf("*** error: [%d] expected=%d, actual=%d\n", i, ref_vec[i], h_c[i]);
|
||||
#endif
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
if (!Comparator<TYPE>::compare(h_c[i], ref_vec[i], i, errors)) {
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user