opencl benchmark fixes
This commit is contained in:
@@ -69,7 +69,7 @@ static cl_uint numPlatforms;
|
||||
|
||||
//! All discoverable OpenCL devices (one pointer per platform)
|
||||
static cl_device_id* devices = NULL;
|
||||
static cl_uint* numDevices;
|
||||
static cl_uint* numDevices = NULL;
|
||||
|
||||
//! The chosen OpenCL platform
|
||||
static cl_platform_id platform = NULL;
|
||||
@@ -265,9 +265,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
||||
if (platform >= 0 && dev >= 0) printInfo = 0;
|
||||
cl_int status;
|
||||
// Used to iterate through the platforms and devices, respectively
|
||||
cl_uint numPlatforms;
|
||||
cl_uint numDevices;
|
||||
|
||||
|
||||
// These will hold the platform and device we select (can potentially be
|
||||
// multiple, but we're just doing one for now)
|
||||
// cl_platform_id platform = NULL;
|
||||
@@ -397,23 +395,24 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
||||
// Getting platform and device information
|
||||
|
||||
numPlatforms = 1;
|
||||
numDevices = 1;
|
||||
int platform_touse = 0;
|
||||
int device_touse = 0;
|
||||
platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
|
||||
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
|
||||
|
||||
status = clGetPlatformIDs(1, platforms, NULL);
|
||||
numDevices = (cl_uint*)malloc(sizeof(cl_uint)*numPlatforms);
|
||||
numDevices[0] = 1;
|
||||
devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices[0]);
|
||||
|
||||
int platform_touse = 0;
|
||||
int device_touse = 0;
|
||||
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL);
|
||||
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, numDevices[0], devices, NULL);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
context = clCreateContext(NULL, 1, devices, NULL, NULL, &status);
|
||||
context = clCreateContext(NULL, numDevices[0], devices, NULL, NULL, &status);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
|
||||
device=devices[device_touse];
|
||||
|
||||
#define PROFILING
|
||||
|
||||
#ifdef PROFILING
|
||||
|
||||
commandQueue = clCreateCommandQueue(context,
|
||||
@@ -421,7 +420,7 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
||||
|
||||
#else
|
||||
|
||||
clCommandQueue = clCreateCommandQueue(clGPUContext,
|
||||
commandQueue = clCreateCommandQueue(context,
|
||||
devices[device_touse], NULL, &status);
|
||||
|
||||
#endif // PROFILING
|
||||
@@ -434,22 +433,34 @@ cl_context cl_init_context(int platform, int dev,int quiet) {
|
||||
/*!
|
||||
Release all resources that the user doesn't have access to.
|
||||
*/
|
||||
void cl_cleanup()
|
||||
void cl_cleanup()
|
||||
{
|
||||
cl_int status;
|
||||
|
||||
// Free the command queue
|
||||
if(commandQueue) {
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
if (commandQueue) {
|
||||
status = clReleaseCommandQueue(commandQueue);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseCommandQueue()\n");
|
||||
}
|
||||
|
||||
// Free the context
|
||||
if(context) {
|
||||
clReleaseContext(context);
|
||||
if (context) {
|
||||
status = clReleaseContext(context);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseContext()\n");
|
||||
}
|
||||
|
||||
for (int p = 0; p < numPlatforms; ++p) {
|
||||
for (int d = 0; d < numDevices[p]; ++d) {
|
||||
status = clReleaseDevice(devices[d]);
|
||||
cl_errChk(status, "Oops!", true);
|
||||
printf("clReleaseDevice()\n");
|
||||
}
|
||||
}
|
||||
|
||||
free(devices);
|
||||
free(numDevices);
|
||||
|
||||
// Free the platforms
|
||||
free(platforms);
|
||||
}
|
||||
|
||||
@@ -464,6 +475,7 @@ void cl_freeKernel(cl_kernel kernel)
|
||||
if(kernel != NULL) {
|
||||
status = clReleaseKernel(kernel);
|
||||
cl_errChk(status, "Releasing kernel object", true);
|
||||
printf("clReleaseKernel()\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -478,6 +490,7 @@ void cl_freeMem(cl_mem mem)
|
||||
if(mem != NULL) {
|
||||
status = clReleaseMemObject(mem);
|
||||
cl_errChk(status, "Releasing mem object", true);
|
||||
printf("clReleaseMemObject()\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -492,6 +505,7 @@ void cl_freeProgram(cl_program program)
|
||||
if(program != NULL) {
|
||||
status = clReleaseProgram(program);
|
||||
cl_errChk(status, "Releasing program object", true);
|
||||
printf("clReleaseProgram()\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -49,25 +49,27 @@ int main(int argc, char *argv[]) {
|
||||
printf("%s --> Distance=%f\n", records[i].recString, records[i].distance);
|
||||
}
|
||||
free(recordDistances);
|
||||
|
||||
cl_cleanup();
|
||||
|
||||
printf("Passed!\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
std::vector<LatLong> &locations, float lat,
|
||||
float lng, int timing) {
|
||||
|
||||
// 1. set up kernel
|
||||
cl_kernel NN_kernel;
|
||||
cl_int status;
|
||||
|
||||
// 1. set up kernel
|
||||
cl_kernel NN_kernel;
|
||||
cl_program cl_NN_program;
|
||||
cl_NN_program = cl_compileProgram((char *)"nearestNeighbor_kernel.cl", NULL);
|
||||
|
||||
NN_kernel = clCreateKernel(cl_NN_program, "NearestNeighbor", &status);
|
||||
status =
|
||||
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
cl_errChk(status, (char *)"Error Creating Nearest Neighbor kernel", true);
|
||||
|
||||
// 2. set up memory on device and send ipts data to device
|
||||
// copy ipts(1,2) to device
|
||||
// also need to alloate memory for the distancePoints
|
||||
@@ -78,9 +80,11 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
|
||||
d_locations = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
||||
sizeof(LatLong) * numRecords, NULL, &error);
|
||||
cl_errChk(error, "ERROR: clCreateBuffer() failed", true);
|
||||
|
||||
d_distances = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * numRecords, NULL, &error);
|
||||
cl_errChk(error, "ERROR: clCreateBuffer() failed", true);
|
||||
|
||||
cl_command_queue command_queue = cl_getCommandQueue();
|
||||
cl_event writeEvent, kernelEvent, readEvent;
|
||||
@@ -89,6 +93,7 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
0, // offset
|
||||
sizeof(LatLong) * numRecords, &locations[0], 0,
|
||||
NULL, &writeEvent);
|
||||
cl_errChk(error, "ERROR: clEnqueueWriteBuffer() failed", true);
|
||||
|
||||
// 3. send arguments to device
|
||||
cl_int argchk;
|
||||
@@ -124,8 +129,10 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
&readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing) {
|
||||
clFinish(command_queue);
|
||||
|
||||
clFinish(command_queue);
|
||||
|
||||
if (timing) {
|
||||
cl_ulong eventStart, eventEnd, totalTime = 0;
|
||||
printf("# Records\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
|
||||
"[size]\t\tTotal(s)\n");
|
||||
@@ -166,8 +173,14 @@ float *OpenClFindNearestNeighbors(cl_context context, int numRecords,
|
||||
printf("%f\n\n", (float)(totalTime / 1e9));
|
||||
}
|
||||
// 6. return finalized data and release buffers
|
||||
clReleaseMemObject(d_locations);
|
||||
clReleaseMemObject(d_distances);
|
||||
clReleaseEvent(writeEvent);
|
||||
clReleaseEvent(kernelEvent);
|
||||
clReleaseEvent(readEvent);
|
||||
cl_freeMem(d_locations);
|
||||
cl_freeMem(d_distances);
|
||||
cl_freeKernel(NN_kernel);
|
||||
cl_freeProgram(cl_NN_program);
|
||||
|
||||
return distances;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user