project directories reorganization
This commit is contained in:
412
benchmarks/old_opencl/guassian/main.cc
Executable file
412
benchmarks/old_opencl/guassian/main.cc
Executable file
@@ -0,0 +1,412 @@
|
||||
#ifndef __GAUSSIAN_ELIMINATION__
|
||||
#define __GAUSSIAN_ELIMINATION__
|
||||
|
||||
#include "gaussianElim.h"
|
||||
|
||||
cl_context context = NULL;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("enter demo main\n");
|
||||
float *a = NULL, *b = NULL, *finalVec = NULL;
|
||||
float *m = NULL;
|
||||
int size;
|
||||
|
||||
FILE *fp;
|
||||
|
||||
// args
|
||||
char filename[100];
|
||||
int quiet = 0, timing = 0, platform = -1, device = -1;
|
||||
|
||||
// parse command line
|
||||
if (parseCommandline(argc, argv, filename, &quiet, &timing, &platform,
|
||||
&device)) {
|
||||
printUsage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
context = cl_init_context(platform, device, quiet);
|
||||
|
||||
fp = fopen(filename, "r");
|
||||
fscanf(fp, "%d", &size);
|
||||
|
||||
a = (float *)malloc(size * size * sizeof(float));
|
||||
|
||||
printf("OK\n");
|
||||
|
||||
InitMat(fp, size, a, size, size);
|
||||
// printf("The input matrix a is:\n");
|
||||
// PrintMat(a, size, size, size);
|
||||
b = (float *)malloc(size * sizeof(float));
|
||||
|
||||
InitAry(fp, b, size);
|
||||
// printf("The input array b is:\n");
|
||||
// PrintAry(b, size);
|
||||
|
||||
// create the solution matrix
|
||||
m = (float *)malloc(size * size * sizeof(float));
|
||||
|
||||
// create a new vector to hold the final answer
|
||||
finalVec = (float *)malloc(size * sizeof(float));
|
||||
|
||||
InitPerRun(size, m);
|
||||
|
||||
// begin timing
|
||||
|
||||
// run kernels
|
||||
ForwardSub(context, a, b, m, size, timing);
|
||||
|
||||
// end timing
|
||||
if (!quiet) {
|
||||
printf("The result of matrix m is: \n");
|
||||
|
||||
PrintMat(m, size, size, size);
|
||||
printf("The result of matrix a is: \n");
|
||||
PrintMat(a, size, size, size);
|
||||
printf("The result of array b is: \n");
|
||||
PrintAry(b, size);
|
||||
|
||||
BackSub(a, b, finalVec, size);
|
||||
printf("The final solution is: \n");
|
||||
PrintAry(finalVec, size);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
free(m);
|
||||
free(a);
|
||||
free(b);
|
||||
free(finalVec);
|
||||
// OpenClGaussianElimination(context,timing);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** ForwardSub() -- Forward substitution of Gaussian
|
||||
** elimination.
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void ForwardSub(cl_context context, float *a, float *b, float *m, int size,
|
||||
int timing) {
|
||||
// 1. set up kernels
|
||||
cl_kernel fan1_kernel, fan2_kernel;
|
||||
cl_int status = 0;
|
||||
cl_program gaussianElim_program;
|
||||
cl_event writeEvent, kernelEvent, readEvent;
|
||||
float writeTime = 0, readTime = 0, kernelTime = 0;
|
||||
float writeMB = 0, readMB = 0;
|
||||
|
||||
gaussianElim_program =
|
||||
cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL);
|
||||
|
||||
fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status);
|
||||
status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
|
||||
fan2_kernel = clCreateKernel(gaussianElim_program, "Fan2", &status);
|
||||
status = cl_errChk(status, (char *)"Error Creating Fan2 kernel", true);
|
||||
if (status)
|
||||
exit(1);
|
||||
|
||||
// 2. set up memory on device and send ipts data to device
|
||||
|
||||
cl_mem a_dev, b_dev, m_dev;
|
||||
|
||||
cl_int error = 0;
|
||||
|
||||
a_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * size * size, NULL, &error);
|
||||
|
||||
b_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * size, NULL,
|
||||
&error);
|
||||
|
||||
m_dev = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(float) * size * size, NULL, &error);
|
||||
|
||||
cl_command_queue command_queue = cl_getCommandQueue();
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, a_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, a, 0, NULL,
|
||||
&writeEvent);
|
||||
|
||||
if (timing)
|
||||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, b_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size, b, 0, NULL, &writeEvent);
|
||||
if (timing)
|
||||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
|
||||
error = clEnqueueWriteBuffer(command_queue, m_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, m, 0, NULL,
|
||||
&writeEvent);
|
||||
if (timing)
|
||||
writeTime += eventTime(writeEvent, command_queue);
|
||||
clReleaseEvent(writeEvent);
|
||||
writeMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
|
||||
|
||||
// 3. Determine block sizes
|
||||
size_t globalWorksizeFan1[1];
|
||||
size_t globalWorksizeFan2[2];
|
||||
|
||||
globalWorksizeFan1[0] = size;
|
||||
globalWorksizeFan2[0] = size;
|
||||
globalWorksizeFan2[1] = size;
|
||||
|
||||
int t;
|
||||
// 4. Setup and Run kernels
|
||||
for (t = 0; t < (size - 1); t++) {
|
||||
// kernel args
|
||||
cl_int argchk;
|
||||
argchk = clSetKernelArg(fan1_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 3, sizeof(int), (void *)&size);
|
||||
argchk |= clSetKernelArg(fan1_kernel, 4, sizeof(int), (void *)&t);
|
||||
|
||||
cl_errChk(argchk, "ERROR in Setting Fan1 kernel args", true);
|
||||
|
||||
// launch kernel
|
||||
error =
|
||||
clEnqueueNDRangeKernel(command_queue, fan1_kernel, 1, 0,
|
||||
globalWorksizeFan1, NULL, 0, NULL, &kernelEvent);
|
||||
|
||||
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
|
||||
if (timing) {
|
||||
// printf("here1a\n");
|
||||
kernelTime += eventTime(kernelEvent, command_queue);
|
||||
// printf("here1b\n");
|
||||
}
|
||||
clReleaseEvent(kernelEvent);
|
||||
// Fan1<<<dimGrid,dimBlock>>>(m_cuda,a_cuda,Size,t);
|
||||
// cudaThreadSynchronize();
|
||||
|
||||
// kernel args
|
||||
argchk = clSetKernelArg(fan2_kernel, 0, sizeof(cl_mem), (void *)&m_dev);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 1, sizeof(cl_mem), (void *)&a_dev);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 2, sizeof(cl_mem), (void *)&b_dev);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 3, sizeof(int), (void *)&size);
|
||||
argchk |= clSetKernelArg(fan2_kernel, 4, sizeof(int), (void *)&t);
|
||||
|
||||
cl_errChk(argchk, "ERROR in Setting Fan2 kernel args", true);
|
||||
|
||||
// launch kernel
|
||||
error =
|
||||
clEnqueueNDRangeKernel(command_queue, fan2_kernel, 2, 0,
|
||||
globalWorksizeFan2, NULL, 0, NULL, &kernelEvent);
|
||||
|
||||
cl_errChk(error, "ERROR in Executing Fan1 Kernel", true);
|
||||
if (timing) {
|
||||
// printf("here2a\n");
|
||||
kernelTime += eventTime(kernelEvent, command_queue);
|
||||
// printf("here2b\n");
|
||||
}
|
||||
clReleaseEvent(kernelEvent);
|
||||
// Fan2<<<dimGridXY,dimBlockXY>>>(m_cuda,a_cuda,b_cuda,Size,Size-t,t);
|
||||
// cudaThreadSynchronize();
|
||||
}
|
||||
// 5. transfer data off of device
|
||||
error =
|
||||
clEnqueueReadBuffer(command_queue, a_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, a, 0, NULL, &readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing)
|
||||
readTime += eventTime(readEvent, command_queue);
|
||||
clReleaseEvent(readEvent);
|
||||
|
||||
error = clEnqueueReadBuffer(command_queue, b_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size, b, 0, NULL, &readEvent);
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing)
|
||||
readTime += eventTime(readEvent, command_queue);
|
||||
clReleaseEvent(readEvent);
|
||||
|
||||
error =
|
||||
clEnqueueReadBuffer(command_queue, m_dev,
|
||||
1, // change to 0 for nonblocking write
|
||||
0, // offset
|
||||
sizeof(float) * size * size, m, 0, NULL, &readEvent);
|
||||
|
||||
cl_errChk(error, "ERROR with clEnqueueReadBuffer", true);
|
||||
if (timing)
|
||||
readTime += eventTime(readEvent, command_queue);
|
||||
clReleaseEvent(readEvent);
|
||||
readMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6);
|
||||
|
||||
if (timing) {
|
||||
printf("Matrix Size\tWrite(s) [size]\t\tKernel(s)\tRead(s) "
|
||||
"[size]\t\tTotal(s)\n");
|
||||
printf("%dx%d \t", size, size);
|
||||
|
||||
printf("%f [%.2fMB]\t", writeTime, writeMB);
|
||||
|
||||
printf("%f\t", kernelTime);
|
||||
|
||||
printf("%f [%.2fMB]\t", readTime, readMB);
|
||||
|
||||
printf("%f\n\n", writeTime + kernelTime + readTime);
|
||||
}
|
||||
}
|
||||
|
||||
float eventTime(cl_event event, cl_command_queue command_queue) {
|
||||
cl_int error = 0;
|
||||
cl_ulong eventStart, eventEnd;
|
||||
clFinish(command_queue);
|
||||
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_ulong), &eventStart, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling.", true);
|
||||
error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &eventEnd, NULL);
|
||||
cl_errChk(error, "ERROR in Event Profiling.", true);
|
||||
|
||||
return (float)((eventEnd - eventStart) / 1e9);
|
||||
}
|
||||
|
||||
int parseCommandline(int argc, char *argv[], char *filename, int *q, int *t,
|
||||
int *p, int *d) {
|
||||
int i;
|
||||
// if (argc < 2) return 1; // error
|
||||
strncpy(filename, "matrix4.txt", 100);
|
||||
char flag;
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (argv[i][0] == '-') { // flag
|
||||
flag = argv[i][1];
|
||||
switch (flag) {
|
||||
case 'h': // help
|
||||
return 1;
|
||||
break;
|
||||
case 'q': // quiet
|
||||
*q = 1;
|
||||
break;
|
||||
case 't': // timing
|
||||
*t = 1;
|
||||
break;
|
||||
case 'p': // platform
|
||||
i++;
|
||||
*p = atoi(argv[i]);
|
||||
break;
|
||||
case 'd': // device
|
||||
i++;
|
||||
*d = atoi(argv[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((*d >= 0 && *p < 0) ||
|
||||
(*p >= 0 &&
|
||||
*d < 0)) // both p and d must be specified if either are specified
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void printUsage() {
|
||||
printf("Gaussian Elimination Usage\n");
|
||||
printf("\n");
|
||||
printf("gaussianElimination [filename] [-hqt] [-p [int] -d [int]]\n");
|
||||
printf("\n");
|
||||
printf("example:\n");
|
||||
printf("$ ./gaussianElimination matrix4.txt\n");
|
||||
printf("\n");
|
||||
printf("filename the filename that holds the matrix data\n");
|
||||
printf("\n");
|
||||
printf("-h Display the help file\n");
|
||||
printf("-q Quiet mode. Suppress all text output.\n");
|
||||
printf("-t Print timing information.\n");
|
||||
printf("\n");
|
||||
printf("-p [int] Choose the platform (must choose both platform and "
|
||||
"device)\n");
|
||||
printf("-d [int] Choose the device (must choose both platform and "
|
||||
"device)\n");
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
printf("Notes: 1. The filename is required as the first parameter.\n");
|
||||
printf(" 2. If you declare either the device or the platform,\n");
|
||||
printf(" you must declare both.\n\n");
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** InitPerRun() -- Initialize the contents of the
|
||||
** multipier matrix **m
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitPerRun(int size, float *m) {
|
||||
int i;
|
||||
for (i = 0; i < size * size; i++)
|
||||
*(m + i) = 0.0;
|
||||
}
|
||||
void BackSub(float *a, float *b, float *finalVec, int size) {
|
||||
// solve "bottom up"
|
||||
int i, j;
|
||||
for (i = 0; i < size; i++) {
|
||||
finalVec[size - i - 1] = b[size - i - 1];
|
||||
for (j = 0; j < i; j++) {
|
||||
finalVec[size - i - 1] -= *(a + size * (size - i - 1) + (size - j - 1)) *
|
||||
finalVec[size - j - 1];
|
||||
}
|
||||
finalVec[size - i - 1] =
|
||||
finalVec[size - i - 1] / *(a + size * (size - i - 1) + (size - i - 1));
|
||||
}
|
||||
}
|
||||
void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < nrow; i++) {
|
||||
for (j = 0; j < ncol; j++) {
|
||||
fscanf(fp, "%f", ary + size * i + j);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*------------------------------------------------------
|
||||
** InitAry() -- Initialize the array (vector) by reading
|
||||
** data from the data file
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void InitAry(FILE *fp, float *ary, int ary_size) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ary_size; i++) {
|
||||
fscanf(fp, "%f", &ary[i]);
|
||||
}
|
||||
}
|
||||
/*------------------------------------------------------
|
||||
** PrintMat() -- Print the contents of the matrix
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void PrintMat(float *ary, int size, int nrow, int ncol) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < nrow; i++) {
|
||||
for (j = 0; j < ncol; j++) {
|
||||
printf("%8.2f ", *(ary + size * i + j));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*------------------------------------------------------
|
||||
** PrintAry() -- Print the contents of the array (vector)
|
||||
**------------------------------------------------------
|
||||
*/
|
||||
void PrintAry(float *ary, int ary_size) {
|
||||
int i;
|
||||
for (i = 0; i < ary_size; i++) {
|
||||
printf("%.2f ", ary[i]);
|
||||
}
|
||||
printf("\n\n");
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user