diff --git a/benchmarks/opencl/bfs/Makefile b/benchmarks/opencl/bfs/Makefile index b085b35f..98594a82 100644 --- a/benchmarks/opencl/bfs/Makefile +++ b/benchmarks/opencl/bfs/Makefile @@ -31,18 +31,20 @@ CXXFLAGS += -I$(POCL_INC_PATH) VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a -PROJECT=bfs +PROJECT = bfs + +SRCS = main.cc all: $(PROJECT).dump $(PROJECT).hex lib$(PROJECT).a: kernel.cl POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl -$(PROJECT).elf: main.cc lib$(PROJECT).a - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf -$(PROJECT).qemu: main.cc lib$(PROJECT).a - $(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu $(PROJECT).hex: $(PROJECT).elf $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex @@ -63,4 +65,4 @@ gdb-c: $(PROJECT).qemu $(GDB) $(PROJECT).qemu clean: - rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu \ No newline at end of file + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile new file mode 100644 index 00000000..5e5d6b79 --- /dev/null +++ b/benchmarks/opencl/guassian/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops) +POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc) +POCL_INC_PATH = $(wildcard ../include) +POCL_LIB_PATH = $(wildcard ../lib) +VX_RT_PATH = $(wildcard ../../../runtime) +VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a + +PROJECT = gaussian + +SRCS = main.cc clutils.cpp utils.cpp + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/opencl/guassian/OriginalParallel.c b/benchmarks/opencl/guassian/OriginalParallel.c new file mode 100755 index 00000000..6a899b68 --- /dev/null +++ b/benchmarks/opencl/guassian/OriginalParallel.c @@ -0,0 +1,241 @@ +/*----------------------------------------------------------- +** ge_p.c -- The program is to solve a linear system Ax = b +** by using Gaussian Elimination. The algorithm on page 101 +** ("Foundations of Parallel Programming") is used. +** The sequential version is ge_s.c. This parallel +** implementation converts three independent for() loops +** into three Fans. Use the data file ge_3.dat to verify +** the correction of the output. +** +** Written by Andreas Kura, 02/15/95 +** Modified by Chong-wei Xu, /04/20/95 +**----------------------------------------------------------- +*/ +#include +#include + +int Size, t; +float **a, *b; +BEGIN_SHARED_DECL + float **m; +END_SHARED_DECL; +FILE *fp; + +void InitProblemOnce(); +void InitPerRun(); +void ForwardSub(); +void Fan1(); +void Fan2(); +void Fan3(); +void InitMat(); +void InitAry(); +void PrintMat(); +void PrintAry(); + +main () +{ + InitializeUs(); + MakeSharedVariables; /* to make SHARED m */ + + InitProblemOnce(); + InitPerRun(); + ForwardSub(); + + printf("The result of matrix m is: \n"); + PrintMat(SHARED m, Size, Size); + printf("The result of matrix a is: \n"); + PrintMat(a, Size, Size); + printf("The result of array b is: \n"); + PrintAry(b, Size); +} + +/*------------------------------------------------------ +** InitProblemOnce -- Initialize all of matrices and +** vectors by opening a data file specified by the user. +** +** We used dynamic array **a, *b, and **m to allocate +** the memory storages. +**------------------------------------------------------ +*/ +void InitProblemOnce() +{ + char filename[30]; + + printf("Enter the data file name: "); + scanf("%s", filename); + printf("The file name is: %s\n", filename); + + fp = fopen(filename, "r"); + + fscanf(fp, "%d", &Size); + a = (float **) UsAllocScatterMatrix(Size, Size, sizeof(float)); + /* + a = (float **) malloc(Size * sizeof(float *)); + for (i=0; i +#include +#include +#include + +#include + +#include "clutils.h" +#include "utils.h" + + +// The following variables have file scope to simplify +// the utility functions + +//! All discoverable OpenCL platforms +static cl_platform_id* platforms = NULL; +static cl_uint numPlatforms; + +//! All discoverable OpenCL devices (one pointer per platform) +static cl_device_id* devices = NULL; +static cl_uint* numDevices; + +//! The chosen OpenCL platform +static cl_platform_id platform = NULL; + +//! The chosen OpenCL device +static cl_device_id device = NULL; + +//! OpenCL context +static cl_context context = NULL; + +//! OpenCL command queue +static cl_command_queue commandQueue = NULL; +static cl_command_queue commandQueueProf = NULL; +static cl_command_queue commandQueueNoProf = NULL; + +//! Global status of events +static bool eventsEnabled = false; + + +//------------------------------------------------------- +// Initialization and Cleanup +//------------------------------------------------------- + +//! Initialize OpenCl environment on one device +/*! + Init function for one device. Looks for supported devices and creates a context + \return returns a context initialized +*/ +/*cl_context cl_init(char devicePreference) +{ + cl_int status; + + // Discover and populate the platforms + status = clGetPlatformIDs(0, NULL, &numPlatforms); + cl_errChk(status, "Getting platform IDs", true); + if (numPlatforms > 0) + { + // Get all the platforms + platforms = (cl_platform_id*)alloc(numPlatforms * + sizeof(cl_platform_id)); + + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + cl_errChk(status, "Getting platform IDs", true); + } + else + { + // If no platforms are available, we shouldn't continue + printf("No OpenCL platforms found\n"); + exit(-1); + } + + // Allocate space for the device lists and lengths + numDevices = (cl_uint*)alloc(sizeof(cl_uint)*numPlatforms); + devices = (cl_device_id**)alloc(sizeof(cl_device_id*)*numPlatforms); + + // If a device preference was supplied, we'll limit the search of devices + // based on type + cl_device_type deviceType = CL_DEVICE_TYPE_ALL; + if(devicePreference == 'c') { + deviceType = CL_DEVICE_TYPE_CPU; + } + if(devicePreference == 'g') { + deviceType = CL_DEVICE_TYPE_GPU; + } + + // Traverse the platforms array printing information and + // populating devices + for(unsigned int i = 0; i < numPlatforms ; i++) + { + // Print out some basic info about the platform + char* platformName = NULL; + char* platformVendor = NULL; + + platformName = cl_getPlatformName(platforms[i]); + platformVendor = cl_getPlatformVendor(platforms[i]); + + status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices[i]); + cl_errChk(status, "Getting device IDs", false); + if(status != CL_SUCCESS) { + printf("This is a known NVIDIA bug (if platform == AMD then die)\n"); + printf("Setting number of devices to 0 and continuing\n"); + numDevices[i] = 0; + } + + printf("Platform %d (%d devices):\n", i, numDevices[i]); + printf("\tName: %s\n", platformName); + printf("\tVendor: %s\n", platformVendor); + + free(platformName); + free(platformVendor); + + // Populate OpenCL devices if any exist + if(numDevices[i] != 0) + { + // Allocate an array of devices of size "numDevices" + devices[i] = (cl_device_id*)alloc(sizeof(cl_device_id)*numDevices[i]); + + // Populate Arrray with devices + status = clGetDeviceIDs(platforms[i], deviceType, numDevices[i], + devices[i], NULL); + cl_errChk(status, "Getting device IDs", true); + } + + // Print some information about each device + for( unsigned int j = 0; j < numDevices[i]; j++) + { + char* deviceName = NULL; + char* deviceVendor = NULL; + + printf("\tDevice %d:\n", j); + + deviceName = cl_getDeviceName(devices[i][j]); + deviceVendor = cl_getDeviceVendor(devices[i][j]); + + printf("\t\tName: %s\n", deviceName); + printf("\t\tVendor: %s\n", deviceVendor); + + free(deviceName); + free(deviceVendor); + } + } + + // Hard-code in the platform/device to use, or uncomment 'scanf' + // to decide at runtime + cl_uint chosen_platform, chosen_device; + // UNCOMMENT the following two lines to manually select device each time + //printf("Enter Platform and Device No (Seperated by Space) \n"); + //scanf("%d %d", &chosen_platform, &chosen_device); + chosen_platform = 0; + chosen_device = 0; + printf("Using Platform %d, Device %d \n", chosen_platform, chosen_device); + + // Do a sanity check of platform/device selection + if(chosen_platform >= numPlatforms || + chosen_device >= numDevices[chosen_platform]) { + printf("Invalid platform/device combination\n"); + exit(-1); + } + + // Set the selected platform and device + platform = platforms[chosen_platform]; + device = devices[chosen_platform][chosen_device]; + + // Create the context + cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)(platform), 0}; + context = clCreateContext(cps, 1, &device, NULL, NULL, &status); + cl_errChk(status, "Creating context", true); + + // Create the command queue + commandQueueProf = clCreateCommandQueue(context, device, + CL_QUEUE_PROFILING_ENABLE, &status); + cl_errChk(status, "creating command queue", true); + + commandQueueNoProf = clCreateCommandQueue(context, device, 0, &status); + cl_errChk(status, "creating command queue", true); + + if(eventsEnabled) { + printf("Profiling enabled\n"); + commandQueue = commandQueueProf; + } + else { + printf("Profiling disabled\n"); + commandQueue = commandQueueNoProf; + } + + return context; +}*/ + +cl_context cl_init_context(int platform, int dev,int quiet) { + int printInfo=1; + if (platform >= 0 && dev >= 0) printInfo = 0; + cl_int status; + // Used to iterate through the platforms and devices, respectively + cl_uint numPlatforms; + cl_uint numDevices; + + // These will hold the platform and device we select (can potentially be + // multiple, but we're just doing one for now) + // cl_platform_id platform = NULL; + + /*status = clGetPlatformIDs(0, NULL, &numPlatforms); + if (printInfo) printf("Number of platforms detected:%d\n", numPlatforms); + + // Print some information about the available platforms + cl_platform_id *platforms = NULL; + cl_device_id * devices = NULL; + if (numPlatforms > 0) + { + // get all the platforms + platforms = (cl_platform_id*)malloc(numPlatforms * + sizeof(cl_platform_id)); + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + + // Traverse the platforms array + if (printInfo) printf("Checking For OpenCl Compatible Devices\n"); + for(unsigned int i = 0; i < numPlatforms ; i++) + { + char pbuf[100]; + if (printInfo) printf("Platform %d:\t", i); + status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, + sizeof(pbuf), pbuf, NULL); + if (printInfo) printf("Vendor: %s\n", pbuf); + + //unsigned int numDevices; + + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); + if(cl_errChk(status, "checking for devices",true)) + exit(1); + if(numDevices == 0) { + printf("There are no devices for Platform %d\n",i); + exit(0); + } + else + { + if (printInfo) printf("\tNo of devices for Platform %d is %u\n",i, numDevices); + //! Allocate an array of devices of size "numDevices" + devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices); + //! Populate Arrray with devices + status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, + devices, NULL); + if(cl_errChk(status, "getting device IDs",true)) { + exit(1); + } + } + for( unsigned int j = 0; j < numDevices; j++) + { + char dbuf[100]; + char deviceStr[100]; + if (printInfo) printf("\tDevice: %d\t", j); + status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuf), + deviceStr, NULL); + cl_errChk(status, "Getting Device Info\n",true); + if (printInfo) printf("Vendor: %s", deviceStr); + status = clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(dbuf), + dbuf, NULL); + if (printInfo) printf("\n\t\tName: %s\n", dbuf); + } + } + } + else + { + // If no platforms are available, we're sunk! + printf("No OpenCL platforms found\n"); + exit(0); + } + + int platform_touse; + unsigned int device_touse; + if (printInfo) printf("Enter Platform and Device No (Seperated by Space) \n"); + if (printInfo) scanf("%d %d", &platform_touse, &device_touse); + else { + platform_touse = platform; + device_touse = dev; + } + if (!quiet) printf("Using Platform %d \t Device No %d \n",platform_touse, device_touse); + + //! Recheck how many devices does our chosen platform have + status = clGetDeviceIDs(platforms[platform_touse], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); + + if(device_touse > numDevices) + { + printf("Invalid Device Number\n"); + exit(1); + } + + //! Populate devices array with all the visible devices of our chosen platform + devices = (cl_device_id *)malloc(sizeof(cl_device_id)*numDevices); + status = clGetDeviceIDs(platforms[platform_touse], + CL_DEVICE_TYPE_ALL, numDevices, + devices, NULL); + if(cl_errChk(status,"Error in Getting Devices\n",true)) exit(1); + + + //!Check if Device requested is a CPU or a GPU + cl_device_type dtype; + device = devices[device_touse]; + status = clGetDeviceInfo(devices[device_touse], + CL_DEVICE_TYPE, + sizeof(dtype), + (void *)&dtype, + NULL); + if(cl_errChk(status,"Error in Getting Device Info\n",true)) exit(1); + if(dtype == CL_DEVICE_TYPE_GPU) { + if (!quiet) printf("Creating GPU Context\n\n"); + } + else if (dtype == CL_DEVICE_TYPE_CPU) { + if (!quiet) printf("Creating CPU Context\n\n"); + } + else perror("This Context Type Not Supported\n"); + + cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)(platforms[platform_touse]), 0}; + + cl_context_properties *cprops = cps; + + context = clCreateContextFromType( + cprops, (cl_device_type)dtype, + NULL, NULL, &status); + if(cl_errChk(status, "creating Context",true)) { + exit(1); + }*/ + + // Getting platform and device information + + numPlatforms = 1; + numDevices = 1; + int platform_touse = 0; + int device_touse = 0; + platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); + devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices); + + status = clGetPlatformIDs(1, platforms, NULL); + cl_errChk(status, "Oops!", true); + status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, devices, NULL); + cl_errChk(status, "Oops!", true); + context = clCreateContext(NULL, 1, devices, NULL, NULL, &status); + cl_errChk(status, "Oops!", true); + + device=devices[device_touse]; + +#define PROFILING + +#ifdef PROFILING + + commandQueue = clCreateCommandQueue(context, + devices[device_touse], CL_QUEUE_PROFILING_ENABLE, &status); + +#else + + clCommandQueue = clCreateCommandQueue(clGPUContext, + devices[device_touse], NULL, &status); + +#endif // PROFILING + + if(cl_errChk(status, "creating command queue",true)) { + exit(1); + } + return context; +} +/*! + Release all resources that the user doesn't have access to. +*/ +void cl_cleanup() +{ + // Free the command queue + if(commandQueue) { + clReleaseCommandQueue(commandQueue); + } + + // Free the context + if(context) { + clReleaseContext(context); + } + + free(devices); + free(numDevices); + + // Free the platforms + free(platforms); +} + +//! Release a kernel object +/*! + \param mem The kernel object to release +*/ +void cl_freeKernel(cl_kernel kernel) +{ + cl_int status; + + if(kernel != NULL) { + status = clReleaseKernel(kernel); + cl_errChk(status, "Releasing kernel object", true); + } +} + +//! Release memory allocated on the device +/*! + \param mem The device pointer to release +*/ +void cl_freeMem(cl_mem mem) +{ + cl_int status; + + if(mem != NULL) { + status = clReleaseMemObject(mem); + cl_errChk(status, "Releasing mem object", true); + } +} + +//! Release a program object +/*! + \param mem The program object to release +*/ +void cl_freeProgram(cl_program program) +{ + cl_int status; + + if(program != NULL) { + status = clReleaseProgram(program); + cl_errChk(status, "Releasing program object", true); + } +} + +//! Returns a reference to the command queue +/*! + Returns a reference to the command queue \n + Used for any OpenCl call that needs the command queue declared in clutils.cpp +*/ +cl_command_queue cl_getCommandQueue() +{ + return commandQueue; +} + +//------------------------------------------------------- +// Synchronization functions +//------------------------------------------------------- + +/*! + Wait till all pending commands in queue are finished +*/ +void cl_sync() +{ + clFinish(commandQueue); +} + + +//------------------------------------------------------- +// Memory allocation +//------------------------------------------------------- + +//! Allocate a buffer on a device +/*! + \param mem_size Size of memory in bytes + \param flags Optional cl_mem_flags + \return Returns a cl_mem object that points to device memory +*/ +cl_mem cl_allocBuffer(size_t mem_size, cl_mem_flags flags) +{ + cl_mem mem; + cl_int status; + + /*! + Logging information for keeping track of device memory + */ + static int allocationCount = 1; + static size_t allocationSize = 0; + + allocationCount++; + allocationSize += mem_size; + + mem = clCreateBuffer(context, flags, mem_size, NULL, &status); + + cl_errChk(status, "creating buffer", true); + + return mem; +} + +//! Allocate constant memory on device +/*! + \param mem_size Size of memory in bytes + \param host_ptr Host pointer that contains the data + \return Returns a cl_mem object that points to device memory +*/ +cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr) +{ + cl_mem mem; + cl_int status; + + mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + mem_size, host_ptr, &status); + cl_errChk(status, "Error creating const mem buffer", true); + + return mem; +} + +//! Allocate a buffer on device pinning the host memory at host_ptr +/*! + \param mem_size Size of memory in bytes + \return Returns a cl_mem object that points to pinned memory on the host +*/ +cl_mem cl_allocBufferPinned(size_t mem_size) +{ + cl_mem mem; + cl_int status; + + mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, + mem_size, NULL, &status); + cl_errChk(status, "Error allocating pinned memory", true); + + return mem; +} + +//! Allocate an image on a device +/*! + \param height Number of rows in the image + \param width Number of columns in the image + \param elemSize Size of the elements in the image + \param flags Optional cl_mem_flags + \return Returns a cl_mem object that points to device memory +*/ +cl_mem cl_allocImage(size_t height, size_t width, char type, cl_mem_flags flags) +{ + cl_mem mem; + cl_int status; + + size_t elemSize = 0; + + cl_image_format format; + format.image_channel_order = CL_R; + + switch(type) { + case 'f': + elemSize = sizeof(float); + format.image_channel_data_type = CL_FLOAT; + break; + case 'i': + elemSize = sizeof(int); + format.image_channel_data_type = CL_SIGNED_INT32; + break; + default: + printf("Error creating image: Unsupported image type.\n"); + exit(-1); + } + + /*! + Logging information for keeping track of device memory + */ + static int allocationCount = 1; + static size_t allocationSize = 0; + + allocationCount++; + allocationSize += height*width*elemSize; + + // Create the image + mem = clCreateImage2D(context, flags, &format, width, height, 0, NULL, &status); + + //cl_errChk(status, "creating image", true); + if(status != CL_SUCCESS) { + printf("Error creating image: Images may not be supported for this device.\n"); + printSupportedImageFormats(); + getchar(); + exit(-1); + } + + return mem; +} + + +//------------------------------------------------------- +// Data transfers +//------------------------------------------------------- + + +// Copy and map a buffer +void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size) { + + void* ptr; // Pointer to the pinned memory that will be returned + + cl_copyBufferToBuffer(dst, src, size); + + ptr = cl_mapBuffer(dst, size, CL_MAP_READ); + + return ptr; +} + +// Copy a buffer +void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size) +{ + cl_int status; + status = clEnqueueCopyBuffer(commandQueue, src, dst, 0, 0, size, 0, NULL, + NULL); + cl_errChk(status, "Copying buffer", true); + +} + +//! Copy a buffer to the device +/*! + \param dst Valid device pointer + \param src Host pointer that contains the data + \param mem_size Size of data to copy + \param blocking Blocking or non-blocking operation +*/ +void cl_copyBufferToDevice(cl_mem dst, void* src, size_t mem_size, cl_bool blocking) +{ + cl_int status; + status = clEnqueueWriteBuffer(commandQueue, dst, blocking, 0, + mem_size, src, 0, NULL, NULL); + cl_errChk(status, "Writing buffer", true); + +} + +//! Copy a buffer to the host +/*! + \param dst Valid host pointer + \param src Device pointer that contains the data + \param mem_size Size of data to copy + \param blocking Blocking or non-blocking operation +*/ +void cl_copyBufferToHost(void* dst, cl_mem src, size_t mem_size, cl_bool blocking) +{ + cl_int status; + status = clEnqueueReadBuffer(commandQueue, src, blocking, 0, + mem_size, dst, 0, NULL, NULL); + cl_errChk(status, "Reading buffer", true); + +} + +//! Copy a buffer to a 2D image +/*! + \param src Valid device buffer + \param dst Empty device image + \param mem_size Size of data to copy +*/ +void cl_copyBufferToImage(cl_mem buffer, cl_mem image, int height, int width) +{ + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + + cl_int status; + status = clEnqueueCopyBufferToImage(commandQueue, buffer, image, 0, + origin, region, 0, NULL, NULL); + cl_errChk(status, "Copying buffer to image", true); + +} + +// Copy data to an image on the device +/*! + \param dst Valid device pointer + \param src Host pointer that contains the data + \param height Height of the image + \param width Width of the image +*/ +void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width) +{ + cl_int status; + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + + status = clEnqueueWriteImage(commandQueue, dst, CL_TRUE, origin, + region, 0, 0, src, 0, NULL, NULL); + cl_errChk(status, "Writing image", true); +} + +//! Copy an image to the host +/*! + \param dst Valid host pointer + \param src Device pointer that contains the data + \param height Height of the image + \param width Width of the image +*/ +void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width) +{ + cl_int status; + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + + status = clEnqueueReadImage(commandQueue, src, CL_TRUE, origin, + region, 0, 0, dst, 0, NULL, NULL); + cl_errChk(status, "Reading image", true); +} + +//! Map a buffer into a host address +/*! + \param mem cl_mem object + \param mem_size Size of memory in bytes + \param flags Optional cl_mem_flags + \return Returns a host pointer that points to the mapped region +*/ +void *cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags) +{ + cl_int status; + void *ptr; + + ptr = (void *)clEnqueueMapBuffer(commandQueue, mem, CL_TRUE, flags, + 0, mem_size, 0, NULL, NULL, &status); + + cl_errChk(status, "Error mapping a buffer", true); + + return ptr; +} + +//! Unmap a buffer or image +/*! + \param mem cl_mem object + \param ptr A host pointer that points to the mapped region +*/ +void cl_unmapBuffer(cl_mem mem, void *ptr) +{ + + // TODO It looks like AMD doesn't support profiling unmapping yet. Leaving the + // commented code here until it's supported + + cl_int status; + + status = clEnqueueUnmapMemObject(commandQueue, mem, ptr, 0, NULL, NULL); + + cl_errChk(status, "Error unmapping a buffer or image", true); +} + +void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size) +{ + + void* ptr; + + ptr = cl_mapBuffer(mem, size, CL_MAP_WRITE); + + memcpy(ptr, data, size); + + cl_unmapBuffer(mem, ptr); +} + +//------------------------------------------------------- +// Program and kernels +//------------------------------------------------------- + +//! Convert source code file into cl_program +/*! +Compile Opencl source file into a cl_program. The cl_program will be made into a kernel in PrecompileKernels() + +\param kernelPath Filename of OpenCl code +\param compileoptions Compilation options +\param verbosebuild Switch to enable verbose Output +*/ +cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbosebuild ) +{ + cl_int status; + FILE *fp = NULL; + char *source = NULL; + long int size; + + /*printf("\t%s\n", kernelPath); + + // Determine the size of the source file +#ifdef _WIN32 + fopen_s(&fp, kernelPath, "rb"); +#else + fp = fopen(kernelPath, "rb"); +#endif + if(!fp) { + printf("Could not open kernel file\n"); + exit(-1); + } + status = fseek(fp, 0, SEEK_END); + if(status != 0) { + printf("Error seeking to end of file\n"); + exit(-1); + } + size = ftell(fp); + if(size < 0) { + printf("Error getting file position\n"); + exit(-1); + } + rewind(fp); + + // Allocate enough space for the source code + source = (char *)alloc(size + 1); + + // fill with NULLs (just for fun) + for (int i = 0; i < size+1; i++) { + source[i] = '\0'; + } + + // Read in the source code + fread(source, 1, size, fp); + source[size] = '\0';*/ + + // Create the program object + //cl_program clProgramReturn = clCreateProgramWithSource(context, 1, (const char **)&source, NULL, &status); + cl_program clProgramReturn = clCreateProgramWithBuiltInKernels(context, 1, &device, "Fan1;Fan2", &status); + cl_errChk(status, "Creating program", true); + + free(source); + fclose(fp); + + // Try to compile the program + status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL); + if(cl_errChk(status, "Building program", false) || verbosebuild == 1) + { + + cl_build_status build_status; + + clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_STATUS, + sizeof(cl_build_status), &build_status, NULL); + + if(build_status == CL_SUCCESS && verbosebuild == 0) { + return clProgramReturn; + } + + //char *build_log; + size_t ret_val_size; + printf("Device: %p",device); + clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_LOG, 0, + NULL, &ret_val_size); + + char *build_log = (char*)alloc(ret_val_size+1); + + clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_LOG, + ret_val_size+1, build_log, NULL); + + // to be careful, terminate with \0 + // there's no information in the reference whether the string is 0 + // terminated or not + build_log[ret_val_size] = '\0'; + + printf("Build log:\n %s...\n", build_log); + if(build_status != CL_SUCCESS) { + getchar(); + exit(-1); + } + else + return clProgramReturn; + } + + // print the ptx information + // printBinaries(clProgram); + + return clProgramReturn; +} + +//! Create a kernel from compiled source +/*! +Create a kernel from compiled source + +\param program Compiled OpenCL program +\param kernel_name Name of the kernel in the program +\return Returns a cl_kernel object for the specified kernel +*/ +cl_kernel cl_createKernel(cl_program program, const char* kernel_name) { + + cl_kernel kernel; + cl_int status; + + kernel = clCreateKernel(program, kernel_name, &status); + cl_errChk(status, "Creating kernel", true); + + return kernel; +} + +//! Set an argument for a OpenCL kernel +/*! +Set an argument for a OpenCL kernel + +\param kernel The kernel for which the argument is being set +\param index The argument index +\param size The size of the argument +\param data A pointer to the argument +*/ +void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size, + void* data) +{ + cl_int status; + status = clSetKernelArg(kernel, index, size, data); + + cl_errChk(status, "Setting kernel arg", true); +} + + +//------------------------------------------------------- +// Profiling/events +//------------------------------------------------------- + + +//! Time kernel execution using cl_event +/*! + Prints out the time taken between the start and end of an event + \param event_time +*/ +double cl_computeExecTime(cl_event event_time) +{ + cl_int status; + cl_ulong starttime; + cl_ulong endtime; + + double elapsed; + + status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &starttime, NULL); + cl_errChk(status, "profiling start", true); + + status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &endtime, NULL); + cl_errChk(status, "profiling end", true); + + // Convert to ms + elapsed = (double)(endtime-starttime)/1000000.0; + + return elapsed; +} + +//! Compute the elapsed time between two timer values +double cl_computeTime(cl_time start, cl_time end) +{ +#ifdef _WIN32 + __int64 freq; + int status; + + status = QueryPerformanceFrequency((LARGE_INTEGER*)&freq); + if(status == 0) { + perror("QueryPerformanceFrequency"); + exit(-1); + } + + // Return time in ms + return double(end-start)/(double(freq)/1000.0); +#else + + return end-start; +#endif +} + +//! Grab the current time using a system-specific timer +void cl_getTime(cl_time* time) +{ + +#ifdef _WIN32 + int status = QueryPerformanceCounter((LARGE_INTEGER*)time); + if(status == 0) { + perror("QueryPerformanceCounter"); + exit(-1); + } +#else + // Use gettimeofday to get the current time + struct timeval curTime; + gettimeofday(&curTime, NULL); + + // Convert timeval into double + *time = curTime.tv_sec * 1000 + (double)curTime.tv_usec/1000; +#endif +} + + + +//------------------------------------------------------- +// Error handling +//------------------------------------------------------- + +//! OpenCl error code list +/*! + An array of character strings used to give the error corresponding to the error code \n + + The error code is the index within this array +*/ +char *cl_errs[MAX_ERR_VAL] = { + (char *)"CL_SUCCESS", // 0 + (char *)"CL_DEVICE_NOT_FOUND", //-1 + (char *)"CL_DEVICE_NOT_AVAILABLE", //-2 + (char *)"CL_COMPILER_NOT_AVAILABLE", //-3 + (char *)"CL_MEM_OBJECT_ALLOCATION_FAILURE", //-4 + (char *)"CL_OUT_OF_RESOURCES", //-5 + (char *)"CL_OUT_OF_HOST_MEMORY", //-6 + (char *)"CL_PROFILING_INFO_NOT_AVAILABLE", //-7 + (char *)"CL_MEM_COPY_OVERLAP", //-8 + (char *)"CL_IMAGE_FORMAT_MISMATCH", //-9 + (char *)"CL_IMAGE_FORMAT_NOT_SUPPORTED", //-10 + (char *)"CL_BUILD_PROGRAM_FAILURE", //-11 + (char *)"CL_MAP_FAILURE", //-12 + (char *)"", //-13 + (char *)"", //-14 + (char *)"", //-15 + (char *)"", //-16 + (char *)"", //-17 + (char *)"", //-18 + (char *)"", //-19 + (char *)"", //-20 + (char *)"", //-21 + (char *)"", //-22 + (char *)"", //-23 + (char *)"", //-24 + (char *)"", //-25 + (char *)"", //-26 + (char *)"", //-27 + (char *)"", //-28 + (char *)"", //-29 + (char *)"CL_INVALID_VALUE", //-30 + (char *)"CL_INVALID_DEVICE_TYPE", //-31 + (char *)"CL_INVALID_PLATFORM", //-32 + (char *)"CL_INVALID_DEVICE", //-33 + (char *)"CL_INVALID_CONTEXT", //-34 + (char *)"CL_INVALID_QUEUE_PROPERTIES", //-35 + (char *)"CL_INVALID_COMMAND_QUEUE", //-36 + (char *)"CL_INVALID_HOST_PTR", //-37 + (char *)"CL_INVALID_MEM_OBJECT", //-38 + (char *)"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", //-39 + (char *)"CL_INVALID_IMAGE_SIZE", //-40 + (char *)"CL_INVALID_SAMPLER", //-41 + (char *)"CL_INVALID_BINARY", //-42 + (char *)"CL_INVALID_BUILD_OPTIONS", //-43 + (char *)"CL_INVALID_PROGRAM", //-44 + (char *)"CL_INVALID_PROGRAM_EXECUTABLE", //-45 + (char *)"CL_INVALID_KERNEL_NAME", //-46 + (char *)"CL_INVALID_KERNEL_DEFINITION", //-47 + (char *)"CL_INVALID_KERNEL", //-48 + (char *)"CL_INVALID_ARG_INDEX", //-49 + (char *)"CL_INVALID_ARG_VALUE", //-50 + (char *)"CL_INVALID_ARG_SIZE", //-51 + (char *)"CL_INVALID_KERNEL_ARGS", //-52 + (char *)"CL_INVALID_WORK_DIMENSION ", //-53 + (char *)"CL_INVALID_WORK_GROUP_SIZE", //-54 + (char *)"CL_INVALID_WORK_ITEM_SIZE", //-55 + (char *)"CL_INVALID_GLOBAL_OFFSET", //-56 + (char *)"CL_INVALID_EVENT_WAIT_LIST", //-57 + (char *)"CL_INVALID_EVENT", //-58 + (char *)"CL_INVALID_OPERATION", //-59 + (char *)"CL_INVALID_GL_OBJECT", //-60 + (char *)"CL_INVALID_BUFFER_SIZE", //-61 + (char *)"CL_INVALID_MIP_LEVEL", //-62 + (char *)"CL_INVALID_GLOBAL_WORK_SIZE"}; //-63 + +//! OpenCl Error checker +/*! +Checks for error code as per cl_int returned by OpenCl +\param status Error value as cl_int +\param msg User provided error message +\return True if Error Seen, False if no error +*/ +int cl_errChk(const cl_int status, const char * msg, bool exitOnErr) +{ + + if(status != CL_SUCCESS) { + printf("OpenCL Error: %d %s %s\n", status, cl_errs[-status], msg); + + if(exitOnErr) { + exit(-1); + } + + return true; + } + return false; +} + +// Queries the supported image formats for the device and prints +// them to the screen + void printSupportedImageFormats() +{ + cl_uint numFormats; + cl_int status; + + status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D, + 0, NULL, &numFormats); + cl_errChk(status, "getting supported image formats", true); + + cl_image_format* imageFormats = NULL; + imageFormats = (cl_image_format*)alloc(sizeof(cl_image_format)*numFormats); + + status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D, + numFormats, imageFormats, NULL); + + printf("There are %d supported image formats\n", numFormats); + + cl_uint orders[]={CL_R, CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG, + CL_RA, CL_RGB, CL_RGBA, CL_ARGB, CL_BGRA}; + char *orderstr[]={(char *)"CL_R", (char *)"CL_A",(char *)"CL_INTENSITY", (char *)"CL_LUMINANCE", (char *)"CL_RG", + (char *)"CL_RA", (char *)"CL_RGB", (char *)"CL_RGBA", (char *)"CL_ARGB", (char *)"CL_BGRA"}; + + cl_uint types[]={ + CL_SNORM_INT8 , CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, + CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,CL_SIGNED_INT8, + CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, + CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; + + char * typesstr[]={ + (char *)"CL_SNORM_INT8" ,(char *)"CL_SNORM_INT16",(char *)"CL_UNORM_INT8",(char *)"CL_UNORM_INT16", + (char *)"CL_UNORM_SHORT_565",(char *)"CL_UNORM_SHORT_555",(char *)"CL_UNORM_INT_101010", + (char *)"CL_SIGNED_INT8",(char *)"CL_SIGNED_INT16",(char *)"CL_SIGNED_INT32",(char *)"CL_UNSIGNED_INT8", + (char *)"CL_UNSIGNED_INT16",(char *)"CL_UNSIGNED_INT32",(char *)"CL_HALF_FLOAT",(char *)"CL_FLOAT"}; + + printf("Supported Formats:\n"); + for(int i = 0; i < (int)numFormats; i++) { + printf("\tFormat %d: ", i); + + for(int j = 0; j < (int)(sizeof(orders)/sizeof(cl_int)); j++) { + if(imageFormats[i].image_channel_order == orders[j]) { + printf("%s, ", orderstr[j]); + } + } + for(int j = 0; j < (int)(sizeof(types)/sizeof(cl_int)); j++) { + if(imageFormats[i].image_channel_data_type == types[j]) { + printf("%s, ", typesstr[j]); + } + } + printf("\n"); + } + + free(imageFormats); +} + + +//------------------------------------------------------- +// Platform and device information +//------------------------------------------------------- + +//! Returns true if AMD is the device vendor +bool cl_deviceIsAMD(cl_device_id dev) { + + bool retval = false; + + char* vendor = cl_getDeviceVendor(dev); + + if(strncmp(vendor, "Advanced", 8) == 0) { + retval = true; + } + + free(vendor); + + return retval; +} + +//! Returns true if NVIDIA is the device vendor +bool cl_deviceIsNVIDIA(cl_device_id dev) { + + bool retval = false; + + char* vendor = cl_getDeviceVendor(dev); + + if(strncmp(vendor, "NVIDIA", 6) == 0) { + retval = true; + } + + free(vendor); + + return retval; +} + +//! Returns true if NVIDIA is the device vendor +bool cl_platformIsNVIDIA(cl_platform_id plat) { + + bool retval = false; + + char* vendor = cl_getPlatformVendor(plat); + + if(strncmp(vendor, "NVIDIA", 6) == 0) { + retval = true; + } + + free(vendor); + + return retval; +} + +//! Get the name of the vendor for a device +char* cl_getDeviceDriverVersion(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the vendor + status = clGetDeviceInfo(dev, CL_DRIVER_VERSION, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting vendor name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DRIVER_VERSION, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting vendor name", true); + + return devInfoStr; +} + +//! The the name of the device as supplied by the OpenCL implementation +char* cl_getDeviceName(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the name + status = clGetDeviceInfo(dev, CL_DEVICE_NAME, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting device name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DEVICE_NAME, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting device name", true); + + return(devInfoStr); +} + +//! Get the name of the vendor for a device +char* cl_getDeviceVendor(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the vendor + status = clGetDeviceInfo(dev, CL_DEVICE_VENDOR, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting vendor name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DEVICE_VENDOR, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting vendor name", true); + + return devInfoStr; +} + +//! Get the name of the vendor for a device +char* cl_getDeviceVersion(cl_device_id dev) +{ + cl_int status; + size_t devInfoSize; + char* devInfoStr = NULL; + + // If dev is NULL, set it to the default device + if(dev == NULL) { + dev = device; + } + + // Print the vendor + status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, + NULL, &devInfoSize); + cl_errChk(status, "Getting vendor name", true); + + devInfoStr = (char*)alloc(devInfoSize); + + status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devInfoSize, + devInfoStr, NULL); + cl_errChk(status, "Getting vendor name", true); + + return devInfoStr; +} + +//! The the name of the device as supplied by the OpenCL implementation +char* cl_getPlatformName(cl_platform_id platform) +{ + cl_int status; + size_t platformInfoSize; + char* platformInfoStr = NULL; + + // Print the name + status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, + NULL, &platformInfoSize); + cl_errChk(status, "Getting platform name", true); + + platformInfoStr = (char*)alloc(platformInfoSize); + + status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformInfoSize, + platformInfoStr, NULL); + cl_errChk(status, "Getting platform name", true); + + return(platformInfoStr); +} + +//! The the name of the device as supplied by the OpenCL implementation +char* cl_getPlatformVendor(cl_platform_id platform) +{ + cl_int status; + size_t platformInfoSize; + char* platformInfoStr = NULL; + + // Print the name + status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, 0, + NULL, &platformInfoSize); + cl_errChk(status, "Getting platform name", true); + + platformInfoStr = (char*)alloc(platformInfoSize); + + status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, platformInfoSize, + platformInfoStr, NULL); + cl_errChk(status, "Getting platform name", true); + + return(platformInfoStr); +} + +//------------------------------------------------------- +// Utility functions +//------------------------------------------------------- + +//! Take a string and an int, and return a string +char* catStringWithInt(const char* string, int integer) { + + if(integer > 99999) { + printf("Can't handle event identifiers with 6 digits\n"); + exit(-1); + } + + // 5 characters for the identifier, 1 for the null terminator + int strLen = strlen(string)+5+1; + char* eventStr = (char*)alloc(sizeof(char)*strLen); + + char tmp[6]; + + strcpy(eventStr, string); + strncat(eventStr, itoa_portable(integer, tmp, 10), 5); + + return eventStr; +} + +/** + ** C++ version 0.4 char* style "itoa": + ** Written by Lukás Chmela + ** Released under GPLv3. + **/ +//portable itoa function +char* itoa_portable(int value, char* result, int base) { + // check that the base if valid + if (base < 2 || base > 36) { *result = '\0'; return result; } + + char* ptr = result, *ptr1 = result, tmp_char; + int tmp_value; + + do { + tmp_value = value; + value /= base; + *ptr++ = "zyxwvutsrqponmlkjihgfedcba9876543210123456789abcdefghijklmnopqrstuvwxyz" [35 + (tmp_value - value * base)]; + } while ( value ); + + //Apply negative sign + if (tmp_value < 0) *ptr++ = '-'; + *ptr-- = '\0'; + + while(ptr1 < ptr) { + tmp_char = *ptr; + *ptr--= *ptr1; + *ptr1++ = tmp_char; + } + + return result; +} diff --git a/benchmarks/opencl/guassian/clutils.h b/benchmarks/opencl/guassian/clutils.h new file mode 100755 index 00000000..51177d07 --- /dev/null +++ b/benchmarks/opencl/guassian/clutils.h @@ -0,0 +1,281 @@ +/****************************************************************************\ + * Copyright (c) 2011, Advanced Micro Devices, Inc. * + * All rights reserved. * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions * + * are met: * + * * + * Redistributions of source code must retain the above copyright notice, * + * this list of conditions and the following disclaimer. * + * * + * Redistributions in binary form must reproduce the above copyright notice, * + * this list of conditions and the following disclaimer in the documentation * + * and/or other materials provided with the distribution. * + * * + * Neither the name of the copyright holder nor the names of its contributors * + * may be used to endorse or promote products derived from this software * + * without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + * If you use the software (in whole or in part), you shall adhere to all * + * applicable U.S., European, and other export laws, including but not * + * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. * + * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 * + * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you * + * hereby certify that, except pursuant to a license granted by the United * + * States Department of Commerce Bureau of Industry and Security or as * + * otherwise permitted pursuant to a License Exception under the U.S. Export * + * Administration Regulations ("EAR"), you will not (1) export, re-export or * + * release to a national of a country in Country Groups D:1, E:1 or E:2 any * + * restricted technology, software, or source code you receive hereunder, * + * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such * + * technology or software, if such foreign produced direct product is subject * + * to national security controls as identified on the Commerce Control List * + *(currently found in Supplement 1 to Part 774 of EAR). For the most current * + * Country Group listings, or for additional information about the EAR or * + * your obligations under those regulations, please refer to the U.S. Bureau * + * of Industry and Security’s website at http://www.bis.doc.gov/. * + \****************************************************************************/ + +#ifndef __CL_UTILS_H__ +#define __CL_UTILS_H__ + +#include + +// The cl_time type is OS specific +#ifdef _WIN32 +#include +#include +typedef __int64 cl_time; +#else +#include +typedef double cl_time; +#endif + +//------------------------------------------------------- +// Initialization and Cleanup +//------------------------------------------------------- + +// Detects platforms and devices, creates context and command queue +cl_context cl_init(char devicePreference='\0'); + +// Creates a context given a platform and a device +cl_context cl_init_context(int platform,int dev,int quiet=0); + +// Releases resources used by clutils +void cl_cleanup(); + +// Releases a kernel object +void cl_freeKernel(cl_kernel kernel); + +// Releases a memory object +void cl_freeMem(cl_mem mem); + +// Releases a program object +void cl_freeProgram(cl_program program); + +// Returns the global command queue +cl_command_queue cl_getCommandQueue(); + + +//------------------------------------------------------- +// Synchronization functions +//------------------------------------------------------- + +// Performs a clFinish on the command queue +void cl_sync(); + + +//------------------------------------------------------- +// Memory allocation +//------------------------------------------------------- + +// Allocates a regular buffer on the device +cl_mem cl_allocBuffer(size_t mem_size, + cl_mem_flags flags = CL_MEM_READ_WRITE); + +// XXX I don't think this does exactly what we want it to do +// Allocates a read-only buffer and transfers the data +cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr); + +// Allocates pinned memory on the host +cl_mem cl_allocBufferPinned(size_t mem_size); + +// Allocates an image on the device +cl_mem cl_allocImage(size_t height, size_t width, char type, + cl_mem_flags flags = CL_MEM_READ_WRITE); + + + +//------------------------------------------------------- +// Data transfers +//------------------------------------------------------- + +// Copies a buffer from the device to pinned memory on the host and +// maps it so it can be read +void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size); + +// Copies from one buffer to another +void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size); + +// Copies data to a buffer on the device +void cl_copyBufferToDevice(cl_mem dst, void *src, size_t mem_size, + cl_bool blocking = CL_TRUE); + +// Copies data to an image on the device +void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width); + +// Copies an image from the device to the host +void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width); + +// Copies data from a device buffer to the host +void cl_copyBufferToHost(void *dst, cl_mem src, size_t mem_size, + cl_bool blocking = CL_TRUE); + +// Copies data from a buffer on the device to an image on the device +void cl_copyBufferToImage(cl_mem src, cl_mem dst, int height, int width); + +// Maps a buffer +void* cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags); + +// Unmaps a buffer +void cl_unmapBuffer(cl_mem mem, void *ptr); + +// Writes data to a zero-copy buffer on the device +void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size); + +//------------------------------------------------------- +// Program and kernels +//------------------------------------------------------- + +// Compiles a program +cl_program cl_compileProgram(char* kernelPath, char* compileoptions, + bool verboseoptions = 0); + +// Creates a kernel +cl_kernel cl_createKernel(cl_program program, const char* kernelName); + + +// Sets a kernel argument +void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size, + void* data); + + +//------------------------------------------------------- +// Profiling/events +//------------------------------------------------------- + +// Computes the execution time (start to end) for an event +double cl_computeExecTime(cl_event); + +// Compute the elapsed time between two CPU timer values +double cl_computeTime(cl_time start, cl_time end); + +// Creates an event from CPU timers +void cl_createUserEvent(cl_time start, cl_time end, char* desc); + +// Disable logging of events +void cl_disableEvents(); + +// Enable logging of events +void cl_enableEvents(); + +// Query the current system time +void cl_getTime(cl_time* time); + +// Calls a function which prints events to the terminal +void cl_printEvents(); + +// Calls a function which writes the events to a file +void cl_writeEventsToFile(char* path); + + +//------------------------------------------------------- +// Error handling +//------------------------------------------------------- + +// Compare a status value to CL_SUCCESS and optionally exit on error +int cl_errChk(const cl_int status, const char *msg, bool exitOnErr); + +// Queries the supported image formats for the device and prints +// them to the screen +void printSupportedImageFormats(); + +//------------------------------------------------------- +// Platform and device information +//------------------------------------------------------- + +bool cl_deviceIsAMD(cl_device_id dev=NULL); +bool cl_deviceIsNVIDIA(cl_device_id dev=NULL); +bool cl_platformIsNVIDIA(cl_platform_id plat=NULL); +char* cl_getDeviceDriverVersion(cl_device_id dev=NULL); +char* cl_getDeviceName(cl_device_id dev=NULL); +char* cl_getDeviceVendor(cl_device_id dev=NULL); +char* cl_getDeviceVersion(cl_device_id dev=NULL); +char* cl_getPlatformName(cl_platform_id platform); +char* cl_getPlatformVendor(cl_platform_id platform); + +//------------------------------------------------------- +// Utility functions +//------------------------------------------------------- + +char* catStringWithInt(const char* str, int integer); + +char* itoa_portable(int value, char* result, int base); + +//------------------------------------------------------- +// Data types +//------------------------------------------------------- +typedef struct{ + int x; + int y; +} int2; + +typedef struct{ + float x; + float y; +}float2; + +typedef struct{ + float x; + float y; + float z; + float w; +}float4; + +//------------------------------------------------------- +// Defines +//------------------------------------------------------- + +#define MAX_ERR_VAL 64 + +#define NUM_PROGRAMS 7 + +#define NUM_KERNELS 13 +#define KERNEL_INIT_DET 0 +#define KERNEL_BUILD_DET 1 +#define KERNEL_SURF_DESC 2 +#define KERNEL_NORM_DESC 3 +#define KERNEL_NON_MAX_SUP 4 +#define KERNEL_GET_ORIENT1 5 +#define KERNEL_GET_ORIENT2 6 +#define KERNEL_NN 7 +#define KERNEL_SCAN 8 +#define KERNEL_SCAN4 9 +#define KERNEL_TRANSPOSE 10 +#define KERNEL_SCANIMAGE 11 +#define KERNEL_TRANSPOSEIMAGE 12 + +#endif diff --git a/benchmarks/opencl/guassian/gaussianElim.h b/benchmarks/opencl/guassian/gaussianElim.h new file mode 100755 index 00000000..5d905d7e --- /dev/null +++ b/benchmarks/opencl/guassian/gaussianElim.h @@ -0,0 +1,40 @@ +#ifndef _GAUSSIANELIM +#define _GAUSSIANELIM + +#include +#include +#include +#include +#include +#include +#include + +#include "clutils.h" + +// All OpenCL headers +#if defined (__APPLE__) || defined(MACOSX) + #include +#else + #include +#endif + +float *OpenClGaussianElimination( + cl_context context, + int timing); + +void printUsage(); +int parseCommandline(int argc, char *argv[], char* filename, + int *q, int *t, int *p, int *d); + +void InitPerRun(int size,float *m); +void ForwardSub(cl_context context, float *a, float *b, float *m, int size,int timing); +void BackSub(float *a, float *b, float *finalVec, int size); +void Fan1(float *m, float *a, int Size, int t); +void Fan2(float *m, float *a, float *b,int Size, int j1, int t); +//void Fan3(float *m, float *b, int Size, int t); +void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol); +void InitAry(FILE *fp, float *ary, int ary_size); +void PrintMat(float *ary, int size, int nrow, int ncolumn); +void PrintAry(float *ary, int ary_size); +float eventTime(cl_event event,cl_command_queue command_queue); +#endif diff --git a/benchmarks/opencl/guassian/gettimeofday.cpp b/benchmarks/opencl/guassian/gettimeofday.cpp new file mode 100755 index 00000000..a0486593 --- /dev/null +++ b/benchmarks/opencl/guassian/gettimeofday.cpp @@ -0,0 +1,74 @@ +#include "stdio.h" +#include +#include +#include +//using namespace System; +using namespace std; + +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else + #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +struct timezone +{ + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + + +// Definition of a gettimeofday function + int gettimeofday(struct timeval *tv, struct timezone *tz) +{ +// Define a structure to receive the current Windows filetime + FILETIME ft; + +// Initialize the present time to 0 and the timezone to UTC + unsigned __int64 tmpres = 0; + static int tzflag = 0; + + if (NULL != tv) + { + GetSystemTimeAsFileTime(&ft); + +// The GetSystemTimeAsFileTime returns the number of 100 nanosecond +// intervals since Jan 1, 1601 in a structure. Copy the high bits to +// the 64 bit tmpres, shift it left by 32 then or in the low 32 bits. + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + +// Convert to microseconds by dividing by 10 + tmpres /= 10; + +// The Unix epoch starts on Jan 1 1970. Need to subtract the difference +// in seconds from Jan 1 1601. + tmpres -= DELTA_EPOCH_IN_MICROSECS; + +// Finally change microseconds to seconds and place in the seconds value. +// The modulus picks up the microseconds. + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + + if (NULL != tz) + { + if (!tzflag) + { + _tzset(); + tzflag++; + } + +// Adjust for the timezone west of Greenwich + long seconds_diff; + _get_timezone(&seconds_diff); + tz->tz_minuteswest = seconds_diff / 60; + int hours_offset; + _get_daylight(&hours_offset); + tz->tz_dsttime = hours_offset; + } + + return 0; +} + diff --git a/benchmarks/opencl/guassian/gettimeofday.h b/benchmarks/opencl/guassian/gettimeofday.h new file mode 100755 index 00000000..8db1f7a9 --- /dev/null +++ b/benchmarks/opencl/guassian/gettimeofday.h @@ -0,0 +1,17 @@ + +#ifdef _WIN32 +#include +/** +Based on code seen at. + +http://www.winehq.org/pipermail/wine-devel/2003-June/018082.html + +http://msdn.microsoft.com/en-us/library/ms740560 + +*/ +int gettimeofday(struct timeval *tv, struct timezone *tz); +#else +#include +#endif + + diff --git a/benchmarks/opencl/guassian/kernel.cl b/benchmarks/opencl/guassian/kernel.cl new file mode 100755 index 00000000..c370e9b2 --- /dev/null +++ b/benchmarks/opencl/guassian/kernel.cl @@ -0,0 +1,49 @@ +//#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +typedef struct latLong + { + float lat; + float lng; + } LatLong; + +__kernel void Fan1(__global float *m_dev, + __global float *a_dev, + __global float *b_dev, + const int size, + const int t) { + int globalId = get_global_id(0); + + if (globalId < size-1-t) { + *(m_dev + size * (globalId + t + 1)+t) = *(a_dev + size * (globalId + t + 1) + t) / *(a_dev + size * t + t); + } +} + + +__kernel void Fan2(__global float *m_dev, + __global float *a_dev, + __global float *b_dev, + const int size, + const int t) { + int globalId = get_global_id(0); + + int globalIdx = get_global_id(0); + int globalIdy = get_global_id(1); + if (globalIdx < size-1-t && globalIdy < size-t) { + a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)]; + + if(globalIdy == 0){ + b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t]; + } + } +// One dimensional +// int globalIdx = globalId % size; +// int globalIdy = globalId / size; +// +// if (globalIdx < size-1-t && globalIdy < size-t) { +// a_dev[size*(globalIdx+1+t)+(globalIdy+t)] -= m_dev[size*(globalIdx+1+t)+t] * a_dev[size*t+(globalIdy+t)]; +// } +// if(globalIdy == 0){ +// b_dev[globalIdx+1+t] -= m_dev[size*(globalIdx+1+t)+(globalIdy+t)] * b_dev[t]; +// } + +} diff --git a/benchmarks/opencl/guassian/libgaussian.a b/benchmarks/opencl/guassian/libgaussian.a new file mode 100644 index 00000000..95920728 Binary files /dev/null and b/benchmarks/opencl/guassian/libgaussian.a differ diff --git a/benchmarks/opencl/guassian/main.cc b/benchmarks/opencl/guassian/main.cc new file mode 100755 index 00000000..1b852908 --- /dev/null +++ b/benchmarks/opencl/guassian/main.cc @@ -0,0 +1,412 @@ +#ifndef __GAUSSIAN_ELIMINATION__ +#define __GAUSSIAN_ELIMINATION__ + +#include "gaussianElim.h" + +cl_context context = NULL; + +int main(int argc, char *argv[]) { + printf("enter demo main\n"); + float *a = NULL, *b = NULL, *finalVec = NULL; + float *m = NULL; + int size; + + FILE *fp; + + // args + char filename[100]; + int quiet = 0, timing = 0, platform = -1, device = -1; + + // parse command line + if (parseCommandline(argc, argv, filename, &quiet, &timing, &platform, + &device)) { + printUsage(); + return 0; + } + + context = cl_init_context(platform, device, quiet); + + fp = fopen(filename, "r"); + fscanf(fp, "%d", &size); + + a = (float *)malloc(size * size * sizeof(float)); + + printf("OK\n"); + + InitMat(fp, size, a, size, size); + // printf("The input matrix a is:\n"); + // PrintMat(a, size, size, size); + b = (float *)malloc(size * sizeof(float)); + + InitAry(fp, b, size); + // printf("The input array b is:\n"); + // PrintAry(b, size); + + // create the solution matrix + m = (float *)malloc(size * size * sizeof(float)); + + // create a new vector to hold the final answer + finalVec = (float *)malloc(size * sizeof(float)); + + InitPerRun(size, m); + + // begin timing + + // run kernels + ForwardSub(context, a, b, m, size, timing); + + // end timing + if (!quiet) { + printf("The result of matrix m is: \n"); + + PrintMat(m, size, size, size); + printf("The result of matrix a is: \n"); + PrintMat(a, size, size, size); + printf("The result of array b is: \n"); + PrintAry(b, size); + + BackSub(a, b, finalVec, size); + printf("The final solution is: \n"); + PrintAry(finalVec, size); + } + + fclose(fp); + free(m); + free(a); + free(b); + free(finalVec); + // OpenClGaussianElimination(context,timing); + + return 0; +} + +/*------------------------------------------------------ + ** ForwardSub() -- Forward substitution of Gaussian + ** elimination. + **------------------------------------------------------ + */ +void ForwardSub(cl_context context, float *a, float *b, float *m, int size, + int timing) { + // 1. set up kernels + cl_kernel fan1_kernel, fan2_kernel; + cl_int status = 0; + cl_program gaussianElim_program; + cl_event writeEvent, kernelEvent, readEvent; + float writeTime = 0, readTime = 0, kernelTime = 0; + float writeMB = 0, readMB = 0; + + gaussianElim_program = + cl_compileProgram((char *)"gaussianElim_kernels.cl", NULL); + + fan1_kernel = clCreateKernel(gaussianElim_program, "Fan1", &status); + status = cl_errChk(status, (char *)"Error Creating Fan1 kernel", true); + if (status) + exit(1); + + fan2_kernel = clCreateKernel(gaussianElim_program, "Fan2", &status); + status = cl_errChk(status, (char *)"Error Creating Fan2 kernel", true); + if (status) + exit(1); + + // 2. set up memory on device and send ipts data to device + + cl_mem a_dev, b_dev, m_dev; + + cl_int error = 0; + + a_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(float) * size * size, NULL, &error); + + b_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * size, NULL, + &error); + + m_dev = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(float) * size * size, NULL, &error); + + cl_command_queue command_queue = cl_getCommandQueue(); + + error = clEnqueueWriteBuffer(command_queue, a_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, a, 0, NULL, + &writeEvent); + + if (timing) + writeTime += eventTime(writeEvent, command_queue); + clReleaseEvent(writeEvent); + + error = clEnqueueWriteBuffer(command_queue, b_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size, b, 0, NULL, &writeEvent); + if (timing) + writeTime += eventTime(writeEvent, command_queue); + clReleaseEvent(writeEvent); + + error = clEnqueueWriteBuffer(command_queue, m_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, m, 0, NULL, + &writeEvent); + if (timing) + writeTime += eventTime(writeEvent, command_queue); + clReleaseEvent(writeEvent); + writeMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6); + + // 3. Determine block sizes + size_t globalWorksizeFan1[1]; + size_t globalWorksizeFan2[2]; + + globalWorksizeFan1[0] = size; + globalWorksizeFan2[0] = size; + globalWorksizeFan2[1] = size; + + int t; + // 4. Setup and Run kernels + for (t = 0; t < (size - 1); t++) { + // kernel args + cl_int argchk; + argchk = clSetKernelArg(fan1_kernel, 0, sizeof(cl_mem), (void *)&m_dev); + argchk |= clSetKernelArg(fan1_kernel, 1, sizeof(cl_mem), (void *)&a_dev); + argchk |= clSetKernelArg(fan1_kernel, 2, sizeof(cl_mem), (void *)&b_dev); + argchk |= clSetKernelArg(fan1_kernel, 3, sizeof(int), (void *)&size); + argchk |= clSetKernelArg(fan1_kernel, 4, sizeof(int), (void *)&t); + + cl_errChk(argchk, "ERROR in Setting Fan1 kernel args", true); + + // launch kernel + error = + clEnqueueNDRangeKernel(command_queue, fan1_kernel, 1, 0, + globalWorksizeFan1, NULL, 0, NULL, &kernelEvent); + + cl_errChk(error, "ERROR in Executing Fan1 Kernel", true); + if (timing) { + // printf("here1a\n"); + kernelTime += eventTime(kernelEvent, command_queue); + // printf("here1b\n"); + } + clReleaseEvent(kernelEvent); + // Fan1<<>>(m_cuda,a_cuda,Size,t); + // cudaThreadSynchronize(); + + // kernel args + argchk = clSetKernelArg(fan2_kernel, 0, sizeof(cl_mem), (void *)&m_dev); + argchk |= clSetKernelArg(fan2_kernel, 1, sizeof(cl_mem), (void *)&a_dev); + argchk |= clSetKernelArg(fan2_kernel, 2, sizeof(cl_mem), (void *)&b_dev); + argchk |= clSetKernelArg(fan2_kernel, 3, sizeof(int), (void *)&size); + argchk |= clSetKernelArg(fan2_kernel, 4, sizeof(int), (void *)&t); + + cl_errChk(argchk, "ERROR in Setting Fan2 kernel args", true); + + // launch kernel + error = + clEnqueueNDRangeKernel(command_queue, fan2_kernel, 2, 0, + globalWorksizeFan2, NULL, 0, NULL, &kernelEvent); + + cl_errChk(error, "ERROR in Executing Fan1 Kernel", true); + if (timing) { + // printf("here2a\n"); + kernelTime += eventTime(kernelEvent, command_queue); + // printf("here2b\n"); + } + clReleaseEvent(kernelEvent); + // Fan2<<>>(m_cuda,a_cuda,b_cuda,Size,Size-t,t); + // cudaThreadSynchronize(); + } + // 5. transfer data off of device + error = + clEnqueueReadBuffer(command_queue, a_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, a, 0, NULL, &readEvent); + + cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); + if (timing) + readTime += eventTime(readEvent, command_queue); + clReleaseEvent(readEvent); + + error = clEnqueueReadBuffer(command_queue, b_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size, b, 0, NULL, &readEvent); + cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); + if (timing) + readTime += eventTime(readEvent, command_queue); + clReleaseEvent(readEvent); + + error = + clEnqueueReadBuffer(command_queue, m_dev, + 1, // change to 0 for nonblocking write + 0, // offset + sizeof(float) * size * size, m, 0, NULL, &readEvent); + + cl_errChk(error, "ERROR with clEnqueueReadBuffer", true); + if (timing) + readTime += eventTime(readEvent, command_queue); + clReleaseEvent(readEvent); + readMB = (float)(sizeof(float) * size * (size + size + 1) / 1e6); + + if (timing) { + printf("Matrix Size\tWrite(s) [size]\t\tKernel(s)\tRead(s) " + "[size]\t\tTotal(s)\n"); + printf("%dx%d \t", size, size); + + printf("%f [%.2fMB]\t", writeTime, writeMB); + + printf("%f\t", kernelTime); + + printf("%f [%.2fMB]\t", readTime, readMB); + + printf("%f\n\n", writeTime + kernelTime + readTime); + } +} + +float eventTime(cl_event event, cl_command_queue command_queue) { + cl_int error = 0; + cl_ulong eventStart, eventEnd; + clFinish(command_queue); + error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &eventStart, NULL); + cl_errChk(error, "ERROR in Event Profiling.", true); + error = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &eventEnd, NULL); + cl_errChk(error, "ERROR in Event Profiling.", true); + + return (float)((eventEnd - eventStart) / 1e9); +} + +int parseCommandline(int argc, char *argv[], char *filename, int *q, int *t, + int *p, int *d) { + int i; + // if (argc < 2) return 1; // error + strncpy(filename, "matrix4.txt", 100); + char flag; + + for (i = 1; i < argc; i++) { + if (argv[i][0] == '-') { // flag + flag = argv[i][1]; + switch (flag) { + case 'h': // help + return 1; + break; + case 'q': // quiet + *q = 1; + break; + case 't': // timing + *t = 1; + break; + case 'p': // platform + i++; + *p = atoi(argv[i]); + break; + case 'd': // device + i++; + *d = atoi(argv[i]); + break; + } + } + } + if ((*d >= 0 && *p < 0) || + (*p >= 0 && + *d < 0)) // both p and d must be specified if either are specified + return 1; + return 0; +} + +void printUsage() { + printf("Gaussian Elimination Usage\n"); + printf("\n"); + printf("gaussianElimination [filename] [-hqt] [-p [int] -d [int]]\n"); + printf("\n"); + printf("example:\n"); + printf("$ ./gaussianElimination matrix4.txt\n"); + printf("\n"); + printf("filename the filename that holds the matrix data\n"); + printf("\n"); + printf("-h Display the help file\n"); + printf("-q Quiet mode. Suppress all text output.\n"); + printf("-t Print timing information.\n"); + printf("\n"); + printf("-p [int] Choose the platform (must choose both platform and " + "device)\n"); + printf("-d [int] Choose the device (must choose both platform and " + "device)\n"); + printf("\n"); + printf("\n"); + printf("Notes: 1. The filename is required as the first parameter.\n"); + printf(" 2. If you declare either the device or the platform,\n"); + printf(" you must declare both.\n\n"); +} + +/*------------------------------------------------------ + ** InitPerRun() -- Initialize the contents of the + ** multipier matrix **m + **------------------------------------------------------ + */ +void InitPerRun(int size, float *m) { + int i; + for (i = 0; i < size * size; i++) + *(m + i) = 0.0; +} +void BackSub(float *a, float *b, float *finalVec, int size) { + // solve "bottom up" + int i, j; + for (i = 0; i < size; i++) { + finalVec[size - i - 1] = b[size - i - 1]; + for (j = 0; j < i; j++) { + finalVec[size - i - 1] -= *(a + size * (size - i - 1) + (size - j - 1)) * + finalVec[size - j - 1]; + } + finalVec[size - i - 1] = + finalVec[size - i - 1] / *(a + size * (size - i - 1) + (size - i - 1)); + } +} +void InitMat(FILE *fp, int size, float *ary, int nrow, int ncol) { + int i, j; + + for (i = 0; i < nrow; i++) { + for (j = 0; j < ncol; j++) { + fscanf(fp, "%f", ary + size * i + j); + } + } +} +/*------------------------------------------------------ + ** InitAry() -- Initialize the array (vector) by reading + ** data from the data file + **------------------------------------------------------ + */ +void InitAry(FILE *fp, float *ary, int ary_size) { + int i; + + for (i = 0; i < ary_size; i++) { + fscanf(fp, "%f", &ary[i]); + } +} +/*------------------------------------------------------ + ** PrintMat() -- Print the contents of the matrix + **------------------------------------------------------ + */ +void PrintMat(float *ary, int size, int nrow, int ncol) { + int i, j; + + for (i = 0; i < nrow; i++) { + for (j = 0; j < ncol; j++) { + printf("%8.2f ", *(ary + size * i + j)); + } + printf("\n"); + } + printf("\n"); +} + +/*------------------------------------------------------ + ** PrintAry() -- Print the contents of the array (vector) + **------------------------------------------------------ + */ +void PrintAry(float *ary, int ary_size) { + int i; + for (i = 0; i < ary_size; i++) { + printf("%.2f ", ary[i]); + } + printf("\n\n"); +} +#endif diff --git a/benchmarks/opencl/guassian/matrix4.txt b/benchmarks/opencl/guassian/matrix4.txt new file mode 100755 index 00000000..abf30b49 --- /dev/null +++ b/benchmarks/opencl/guassian/matrix4.txt @@ -0,0 +1,11 @@ +4 + +-0.6 -0.5 0.7 0.3 +-0.3 -0.9 0.3 0.7 +-0.4 -0.5 -0.3 -0.8 +0.0 -0.1 0.2 0.9 + +-0.85 -0.68 0.24 -0.53 + +0.7 0.0 -0.4 -0.5 + diff --git a/benchmarks/opencl/guassian/run b/benchmarks/opencl/guassian/run new file mode 100755 index 00000000..31683b1b --- /dev/null +++ b/benchmarks/opencl/guassian/run @@ -0,0 +1 @@ +./gaussian ../../data/gaussian/matrix4.txt \ No newline at end of file diff --git a/benchmarks/opencl/guassian/utils.cpp b/benchmarks/opencl/guassian/utils.cpp new file mode 100755 index 00000000..b0f9115f --- /dev/null +++ b/benchmarks/opencl/guassian/utils.cpp @@ -0,0 +1,204 @@ +/****************************************************************************\ + * Copyright (c) 2011, Advanced Micro Devices, Inc. * + * All rights reserved. * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions * + * are met: * + * * + * Redistributions of source code must retain the above copyright notice, * + * this list of conditions and the following disclaimer. * + * * + * Redistributions in binary form must reproduce the above copyright notice, * + * this list of conditions and the following disclaimer in the documentation * + * and/or other materials provided with the distribution. * + * * + * Neither the name of the copyright holder nor the names of its contributors * + * may be used to endorse or promote products derived from this software * + * without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + * If you use the software (in whole or in part), you shall adhere to all * + * applicable U.S., European, and other export laws, including but not * + * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. * + * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 * + * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you * + * hereby certify that, except pursuant to a license granted by the United * + * States Department of Commerce Bureau of Industry and Security or as * + * otherwise permitted pursuant to a License Exception under the U.S. Export * + * Administration Regulations ("EAR"), you will not (1) export, re-export or * + * release to a national of a country in Country Groups D:1, E:1 or E:2 any * + * restricted technology, software, or source code you receive hereunder, * + * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such * + * technology or software, if such foreign produced direct product is subject * + * to national security controls as identified on the Commerce Control List * + *(currently found in Supplement 1 to Part 774 of EAR). For the most current * + * Country Group listings, or for additional information about the EAR or * + * your obligations under those regulations, please refer to the U.S. Bureau * + * of Industry and Security’s website at http://www.bis.doc.gov/. * + \****************************************************************************/ + +#include +#include +#include +#include + +#include "utils.h" + +static bool usingImages = true; + +//! A wrapper for malloc that checks the return value +void* alloc(size_t size) { + + void* ptr = NULL; + ptr = malloc(size); + if(ptr == NULL) { + perror("malloc"); + exit(-1); + } + + return ptr; +} + +// This function checks to make sure a file exists before we open it +void checkFile(char* filename) +{ + + struct stat fileStatus; + if(stat(filename, &fileStatus) != 0) { + printf("Error opening file: %s\n", filename); + exit(-1); + } + else { + if(!(S_IFREG & fileStatus.st_mode)) { + printf("File %s is not a regular file\n", filename); + exit(-1); + } + } +} + + +// This function checks to make sure a directory exists +void checkDir(char* dirpath) +{ + + struct stat fileStatus; + if(stat(dirpath, &fileStatus) != 0) { + printf("Directory does not exist: %s\n", dirpath); + exit(-1); + } + else { + if(!(S_IFDIR & fileStatus.st_mode)) { + printf("Directory was not provided: %s\n", dirpath); + exit(-1); + } + } +} + +// Parse the command line arguments +void parseArguments(int argc, char** argv, char** input, char** events, + char** ipts, char* devicePref, bool* verifyResults) +{ + + for(int i = 2; i < argc; i++) { + if(strcmp(argv[i], "-d") == 0) { // Event dump found + if(i == argc-1) { + printf("Usage: -e Needs directory path\n"); + exit(-1); + } + devicePref[0] = argv[i+1][0]; + i++; + continue; + } + if(strcmp(argv[i], "-e") == 0) { // Event dump found + if(i == argc-1) { + printf("Usage: -e Needs directory path\n"); + exit(-1); + } + *events = argv[i+1]; + i++; + continue; + } + if(strcmp(argv[i], "-i") == 0) { // Input found + if(i == argc-1) { + printf("Usage: -i Needs directory path\n"); + exit(-1); + } + *input = argv[i+1]; + i++; + continue; + } + if(strcmp(argv[i], "-l") == 0) { // Ipts dump found + if(i == argc-1) { + printf("Usage: -l Needs directory path\n"); + exit(-1); + } + *ipts = argv[i+1]; + i++; + continue; + } + if(strcmp(argv[i], "-n") == 0) { // Don't use OpenCL images + setUsingImages(false); + continue; + } + if(strcmp(argv[i], "-v") == 0) { // Verify results + *verifyResults = true; + continue; + } + } +} + + +// This function that takes a positive integer 'value' and returns +// the nearest multiple of 'multiple' (used for padding columns) +unsigned int roundUp(unsigned int value, unsigned int multiple) { + + unsigned int remainder = value % multiple; + + // Make the value a multiple of multiple + if(remainder != 0) { + value += (multiple-remainder); + } + + return value; +} + + +// Concatenate two strings and return a pointer to the new string +char* smartStrcat(char* str1, char* str2) +{ + char* newStr = NULL; + + newStr = (char*)alloc((strlen(str1)+strlen(str2)+1)*sizeof(char)); + + strcpy(newStr, str1); + strcat(newStr, str2); + + return newStr; +} + + +// Set the value of using images to true if they are being +// used, or false if they are not +void setUsingImages(bool val) +{ + usingImages = val; +} + + +// Return whether or not images are being used +bool isUsingImages() +{ + return usingImages; +} diff --git a/benchmarks/opencl/guassian/utils.h b/benchmarks/opencl/guassian/utils.h new file mode 100755 index 00000000..1e901ced --- /dev/null +++ b/benchmarks/opencl/guassian/utils.h @@ -0,0 +1,84 @@ +/****************************************************************************\ + * Copyright (c) 2011, Advanced Micro Devices, Inc. * + * All rights reserved. * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted provided that the following conditions * + * are met: * + * * + * Redistributions of source code must retain the above copyright notice, * + * this list of conditions and the following disclaimer. * + * * + * Redistributions in binary form must reproduce the above copyright notice, * + * this list of conditions and the following disclaimer in the documentation * + * and/or other materials provided with the distribution. * + * * + * Neither the name of the copyright holder nor the names of its contributors * + * may be used to endorse or promote products derived from this software * + * without specific prior written permission. * + * * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * * + * If you use the software (in whole or in part), you shall adhere to all * + * applicable U.S., European, and other export laws, including but not * + * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. * + * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 * + * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you * + * hereby certify that, except pursuant to a license granted by the United * + * States Department of Commerce Bureau of Industry and Security or as * + * otherwise permitted pursuant to a License Exception under the U.S. Export * + * Administration Regulations ("EAR"), you will not (1) export, re-export or * + * release to a national of a country in Country Groups D:1, E:1 or E:2 any * + * restricted technology, software, or source code you receive hereunder, * + * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such * + * technology or software, if such foreign produced direct product is subject * + * to national security controls as identified on the Commerce Control List * + *(currently found in Supplement 1 to Part 774 of EAR). For the most current * + * Country Group listings, or for additional information about the EAR or * + * your obligations under those regulations, please refer to the U.S. Bureau * + * of Industry and Security’s website at http://www.bis.doc.gov/. * + \****************************************************************************/ + +#ifndef _UTILS_ +#define _UTILS_ + +// Wrapper for malloc +void* alloc(size_t size); + +// Checks for existence of directory +void checkDir(char* dirpath); + +// Check for existence of file +void checkFile(char* filename); + +// Parse the input command line options to the program +void parseArguments(int argc, char** argv, char** input, char** events, + char** ipts, char* devicePref, bool* verifyResults); + + +// Print the program usage information +void printUsage(); + +// Rounds up size to the nearest multiple of multiple +unsigned int roundUp(unsigned int value, unsigned int multiple); + +// Concatenate two strings, creating a new one +char* smartStrcat(char* str1, char* str2); + +// Set the value of usingImages +void setUsingImages(bool val); + +// Return whether or not images are being used +bool isUsingImages(); + +#endif diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index b60de117..f3d35062 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -31,7 +31,8 @@ CXXFLAGS += -I$(POCL_INC_PATH) VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a -PROJECT=kmeans +PROJECT = kmeans +SRCS = main.cc read_input.c rmse.c cluster.c kmeans_clustering.c all: $(PROJECT).dump $(PROJECT).hex @@ -50,11 +51,11 @@ read_input.o: read_input.c rmse.o: rmse.c $(CC) $(CXXFLAGS) -c rmse.c -$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o - $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf -$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o - $(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu $(PROJECT).hex: $(PROJECT).elf $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex @@ -75,4 +76,4 @@ gdb-c: $(PROJECT).qemu $(GDB) $(PROJECT).qemu clean: - rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu \ No newline at end of file + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug \ No newline at end of file diff --git a/benchmarks/opencl/kmeans/cluster.c b/benchmarks/opencl/kmeans/cluster.c new file mode 100755 index 00000000..bc3c7c59 --- /dev/null +++ b/benchmarks/opencl/kmeans/cluster.c @@ -0,0 +1,155 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: cluster.c **/ +/** Description: Takes as input a file, containing 1 data point per **/ +/** per line, and performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Brendan McCane **/ +/** James Cook University of North Queensland. **/ +/** Australia. email: mccane@cs.jcu.edu.au **/ +/** **/ +/** Edited by: Jay Pisharath, Wei-keng Liao **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/ +/** University of Virginia **/ +/** **/ +/** Description: No longer supports fuzzy c-means clustering; **/ +/** only regular k-means clustering. **/ +/** No longer performs "validity" function to analyze **/ +/** compactness and separation crietria; instead **/ +/** calculate root mean squared error. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include "kmeans.h" + +float min_rmse_ref = FLT_MAX; +extern double wtime(void); + /* reference min_rmse value */ + +/*---< cluster() >-----------------------------------------------------------*/ +int cluster(int npoints, /* number of data points */ + int nfeatures, /* number of attributes for each point */ + float **features, /* array: [npoints][nfeatures] */ + int min_nclusters, /* range of min to max number of clusters */ + int max_nclusters, + float threshold, /* loop terminating factor */ + int *best_nclusters, /* out: number between min and max with lowest RMSE */ + float ***cluster_centres, /* out: [best_nclusters][nfeatures] */ + float *min_rmse, /* out: minimum RMSE */ + int isRMSE, /* calculate RMSE */ + int nloops /* number of iteration for each number of clusters */ + ) +{ + int nclusters; /* number of clusters k */ + int index =0; /* number of iteration to reach the best RMSE */ + int rmse; /* RMSE for each clustering */ + int *membership; /* which cluster a data point belongs to */ + float **tmp_cluster_centres; /* hold coordinates of cluster centers */ + int i; + + /* allocate memory for membership */ + membership = (int*) malloc(npoints * sizeof(int)); + + /* sweep k from min to max_nclusters to find the best number of clusters */ + for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) + { + if (nclusters > npoints) break; /* cannot have more clusters than points */ + + /* allocate device memory, invert data array (@ kmeans_cuda.cu) */ + allocate(npoints, nfeatures, nclusters, features); + + /* iterate nloops times for each number of clusters */ + for(i = 0; i < nloops; i++) + { + /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */ + tmp_cluster_centres = kmeans_clustering(features, + nfeatures, + npoints, + nclusters, + threshold, + membership); + + if (*cluster_centres) { + free((*cluster_centres)[0]); + free(*cluster_centres); + } + *cluster_centres = tmp_cluster_centres; + + + /* find the number of clusters with the best RMSE */ + if(isRMSE) + { + rmse = rms_err(features, + nfeatures, + npoints, + tmp_cluster_centres, + nclusters); + + if(rmse < min_rmse_ref){ + min_rmse_ref = rmse; //update reference min RMSE + *min_rmse = min_rmse_ref; //update return min RMSE + *best_nclusters = nclusters; //update optimum number of clusters + index = i; //update number of iteration to reach best RMSE + } + } + } + + deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */ + } + + free(membership); + + return index; +} + diff --git a/benchmarks/opencl/kmeans/getopt.c b/benchmarks/opencl/kmeans/getopt.c new file mode 100755 index 00000000..fa2f3137 --- /dev/null +++ b/benchmarks/opencl/kmeans/getopt.c @@ -0,0 +1,1184 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. */ +# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifndef _ +# define _(msgid) gettext (msgid) +# endif +# else +# define _(msgid) (msgid) +# endif +# if defined _LIBC && defined USE_IN_LIBIO +# include +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +//# if HAVE_STRING_H || WIN32 /* Pete Wilson mod 7/28/02 */ +# include +//# else +//# include +//# endif + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Stored original parameters. + XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ +extern int __libc_argc; +extern char **__libc_argv; + +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +# ifdef USE_NONOPTION_FLAGS +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; +# endif + +# ifdef USE_NONOPTION_FLAGS +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +# else +# define SWAP_FLAGS(ch1, ch2) +# endif +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#if defined _LIBC && defined USE_NONOPTION_FLAGS + if (posixly_correct == NULL + && argc == __libc_argc && argv == __libc_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int print_errors = opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#if defined _LIBC && defined USE_NONOPTION_FLAGS +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT and LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind - 1][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#else + fprintf (stderr, _("\ +%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], + pfound->name); +#else + fprintf (stderr, _("\ +%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (argv[optind][1] == '-') + { + /* --option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); +#endif + } + else + { + /* +option or -option */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#else + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; +#endif + + if (posixly_correct) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: illegal option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); +#endif + } + else + { +#if defined _LIBC && defined USE_IN_LIBIO + __asprintf (&buf, _("%s: invalid option -- %c\n"), + argv[0], c); +#else + fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c); +#endif + } + +#if defined _LIBC && defined USE_IN_LIBIO + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#endif + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); +#endif + } + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); +#endif + } + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (print_errors) + { +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, _("\ +%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); +#endif + } + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (print_errors) + { + /* 1003.2 specifies the format of this message. */ +#if defined _LIBC && defined USE_IN_LIBIO + char *buf; + + __asprintf (&buf, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + + if (_IO_fwide (stderr, 0) > 0) + __fwprintf (stderr, L"%s", buf); + else + fputs (buf, stderr); + + free (buf); +#else + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ \ No newline at end of file diff --git a/benchmarks/opencl/kmeans/getopt.h b/benchmarks/opencl/kmeans/getopt.h new file mode 100755 index 00000000..bae04bf7 --- /dev/null +++ b/benchmarks/opencl/kmeans/getopt.h @@ -0,0 +1,191 @@ + + +/* getopt.h */ +/* Declarations for getopt. + Copyright (C) 1989-1994, 1996-1999, 2001 Free Software + Foundation, Inc. This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute + it and/or modify it under the terms of the GNU Lesser + General Public License as published by the Free Software + Foundation; either version 2.1 of the License, or + (at your option) any later version. + + The GNU C Library is distributed in the hope that it will + be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A + PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General + Public License along with the GNU C Library; if not, write + to the Free Software Foundation, Inc., 59 Temple Place, + Suite 330, Boston, MA 02111-1307 USA. */ + + + + + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +/* If __GNU_LIBRARY__ is not already defined, either we are being used + standalone, or this is the first header included in the source file. + If we are being used with glibc, we need to include , but + that does not exist if we are standalone. So: if __GNU_LIBRARY__ is + not defined, include , which will pull in for us + if it's from glibc. (Why ctype.h? It's guaranteed to exist and it + doesn't flood the namespace with stuff the way some other headers do.) */ +#if !defined __GNU_LIBRARY__ +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if (defined __STDC__ && __STDC__) || defined __cplusplus + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined __cplusplus +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int ___argc, char *const *___argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ + diff --git a/benchmarks/opencl/kmeans/kernel.cl b/benchmarks/opencl/kmeans/kernel.cl new file mode 100755 index 00000000..11ca065e --- /dev/null +++ b/benchmarks/opencl/kmeans/kernel.cl @@ -0,0 +1,61 @@ +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +__kernel void +kmeans_kernel_c(__global float *feature, + __global float *clusters, + __global int *membership, + int npoints, + int nclusters, + int nfeatures, + int offset, + int size + ) +{ + unsigned int point_id = get_global_id(0); + int index = 0; + //const unsigned int point_id = get_global_id(0); + if (point_id < npoints) + { + float min_dist=FLT_MAX; + for (int i=0; i < nclusters; i++) { + + float dist = 0; + float ans = 0; + for (int l=0; l +#include +#include +#include +#include "kmeans.h" + +#define RANDOM_MAX 2147483647 + +extern double wtime(void); + +/*----< kmeans_clustering() >---------------------------------------------*/ +float** kmeans_clustering(float **feature, /* in: [npoints][nfeatures] */ + int nfeatures, + int npoints, + int nclusters, + float threshold, + int *membership) /* out: [npoints] */ +{ + int i, j, n = 0; /* counters */ + int loop=0, temp; + int *new_centers_len; /* [nclusters]: no. of points in each cluster */ + float delta; /* if the point moved */ + float **clusters; /* out: [nclusters][nfeatures] */ + float **new_centers; /* [nclusters][nfeatures] */ + + int *initial; /* used to hold the index of points not yet selected + prevents the "birthday problem" of dual selection (?) + considered holding initial cluster indices, but changed due to + possible, though unlikely, infinite loops */ + int initial_points; + int c = 0; + + /* nclusters should never be > npoints + that would guarantee a cluster without points */ + if (nclusters > npoints) + nclusters = npoints; + + /* allocate space for and initialize returning variable clusters[] */ + clusters = (float**) malloc(nclusters * sizeof(float*)); + clusters[0] = (float*) malloc(nclusters * nfeatures * sizeof(float)); + for (i=1; i= 0; i++) { + //n = (int)rand() % initial_points; + + for (j=0; j 0) + clusters[i][j] = new_centers[i][j] / new_centers_len[i]; /* take average i.e. sum/n */ + new_centers[i][j] = 0.0; /* set back to 0 */ + } + new_centers_len[i] = 0; /* set back to 0 */ + } + c++; + } while ((delta > threshold) && (loop++ < 500)); /* makes sure loop terminates */ + printf("iterated %d times\n", c); + free(new_centers[0]); + free(new_centers); + free(new_centers_len); + + return clusters; +} + diff --git a/benchmarks/opencl/kmeans/libkmeans.a b/benchmarks/opencl/kmeans/libkmeans.a new file mode 100644 index 00000000..74640506 Binary files /dev/null and b/benchmarks/opencl/kmeans/libkmeans.a differ diff --git a/benchmarks/opencl/kmeans/main.cc b/benchmarks/opencl/kmeans/main.cc new file mode 100755 index 00000000..a5ddcbf1 --- /dev/null +++ b/benchmarks/opencl/kmeans/main.cc @@ -0,0 +1,359 @@ +#include "kmeans.h" +#include +#include +#include +#include +#include +#include + +#ifdef WIN +#include +#else +#include +#include +double gettime() { + struct timeval t; + gettimeofday(&t, NULL); + return t.tv_sec + t.tv_usec * 1e-6; +} +#endif + +#ifdef NV +#include +#else +#include +#endif + +#ifndef FLT_MAX +#define FLT_MAX 3.40282347e+38 +#endif + +#ifdef RD_WG_SIZE_0_0 +#define BLOCK_SIZE RD_WG_SIZE_0_0 +#elif defined(RD_WG_SIZE_0) +#define BLOCK_SIZE RD_WG_SIZE_0 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE RD_WG_SIZE +#else +#define BLOCK_SIZE 256 +#endif + +#ifdef RD_WG_SIZE_1_0 +#define BLOCK_SIZE2 RD_WG_SIZE_1_0 +#elif defined(RD_WG_SIZE_1) +#define BLOCK_SIZE2 RD_WG_SIZE_1 +#elif defined(RD_WG_SIZE) +#define BLOCK_SIZE2 RD_WG_SIZE +#else +#define BLOCK_SIZE2 256 +#endif + +// local variables +static cl_context context; +static cl_command_queue cmd_queue; +static cl_device_type device_type; +static cl_device_id *device_list; +static cl_int num_devices; + +static int initialize(int use_gpu) { + cl_int result; + size_t size; + + /*// create OpenCL context + cl_platform_id platform_id; + if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { + printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); + return -1; + } + cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, + (cl_context_properties)platform_id, 0}; + device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; + context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, NULL); + if (!context) { + printf("ERROR: clCreateContextFromType(%s) failed\n", + use_gpu ? "GPU" : "CPU"); + return -1; + } + + // get the list of GPUs + result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + num_devices = (int)(size / sizeof(cl_device_id)); + + if (result != CL_SUCCESS || num_devices < 1) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + } + device_list = new cl_device_id[num_devices]; + if (!device_list) { + printf("ERROR: new cl_device_id[] failed\n"); + return -1; + } + result = + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL); + if (result != CL_SUCCESS) { + printf("ERROR: clGetContextInfo() failed\n"); + return -1; + }*/ + + cl_platform_id platform_id; + num_devices = 1; + device_list = new cl_device_id[num_devices]; + + result = clGetPlatformIDs(1, &platform_id, NULL); + result = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, device_list, NULL); + context = clCreateContext(NULL, 1, device_list, NULL, NULL, &result); + + // create command queue for the first device + cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL); + if (!cmd_queue) { + printf("ERROR: clCreateCommandQueue() failed\n"); + return -1; + } + + return 0; +} + +static int shutdown() { + // release resources + if (cmd_queue) + clReleaseCommandQueue(cmd_queue); + if (context) + clReleaseContext(context); + if (device_list) + delete device_list; + + // reset all variables + cmd_queue = 0; + context = 0; + device_list = 0; + num_devices = 0; + device_type = 0; + + return 0; +} + +cl_mem d_feature; +cl_mem d_feature_swap; +cl_mem d_cluster; +cl_mem d_membership; + +cl_kernel kernel; +cl_kernel kernel_s; +cl_kernel kernel2; + +int *membership_OCL; +int *membership_d; +float *feature_d; +float *clusters_d; +float *center_d; + +int allocate(int n_points, int n_features, int n_clusters, float **feature) { + /*int sourcesize = 1024 * 1024; + char *source = (char *)calloc(sourcesize, sizeof(char)); + if (!source) { + printf("ERROR: calloc(%d) failed\n", sourcesize); + return -1; + } + + // read the kernel core source + char *tempchar = "./kmeans.cl"; + FILE *fp = fopen(tempchar, "rb"); + if (!fp) { + printf("ERROR: unable to open '%s'\n", tempchar); + return -1; + } + fread(source + strlen(source), sourcesize, 1, fp); + fclose(fp);*/ + + // OpenCL initialization + int use_gpu = 1; + if (initialize(use_gpu)) + return -1; + + // compile kernel + cl_int err = 0; + //const char *slist[2] = {source, 0}; + //cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); + cl_program prog = clCreateProgramWithBuiltInKernels(context, 1, device_list, "kmeans_kernel_c;kmeans_swap", &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateProgramWithSource() => %d\n", err); + return -1; + } + err = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); + { // show warnings/errors + // static char log[65536]; memset(log, 0, sizeof(log)); + // cl_device_id device_id = 0; + // err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), + //&device_id, NULL); + // clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, + // sizeof(log)-1, log, NULL); + // if(err || strstr(log,"warning:") || strstr(log, "error:")) + // printf("<<<<\n%s\n>>>>\n", log); + } + if (err != CL_SUCCESS) { + printf("ERROR: clBuildProgram() => %d\n", err); + return -1; + } + + char *kernel_kmeans_c = "kmeans_kernel_c"; + char *kernel_swap = "kmeans_swap"; + + kernel_s = clCreateKernel(prog, kernel_kmeans_c, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + kernel2 = clCreateKernel(prog, kernel_swap, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateKernel() 0 => %d\n", err); + return -1; + } + + clReleaseProgram(prog); + + d_feature = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_feature_swap = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_feature_swap (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + d_cluster = + clCreateBuffer(context, CL_MEM_READ_WRITE, + n_clusters * n_features * sizeof(float), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_cluster (size:%d) => %d\n", + n_clusters * n_features, err); + return -1; + } + d_membership = clCreateBuffer(context, CL_MEM_READ_WRITE, + n_points * sizeof(int), NULL, &err); + if (err != CL_SUCCESS) { + printf("ERROR: clCreateBuffer d_membership (size:%d) => %d\n", n_points, + err); + return -1; + } + + // write buffers + err = clEnqueueWriteBuffer(cmd_queue, d_feature, 1, 0, + n_points * n_features * sizeof(float), feature[0], + 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_feature (size:%d) => %d\n", + n_points * n_features, err); + return -1; + } + + clSetKernelArg(kernel2, 0, sizeof(void *), (void *)&d_feature); + clSetKernelArg(kernel2, 1, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel2, 2, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel2, 3, sizeof(cl_int), (void *)&n_features); + + size_t global_work[3] = {n_points, 1, 1}; + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE; // work group size is defined by + // RD_WG_SIZE_0 or RD_WG_SIZE_0_0 + // 2014/06/10 17:00:51 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueNDRangeKernel(cmd_queue, kernel2, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + + membership_OCL = (int *)malloc(n_points * sizeof(int)); +} + +void deallocateMemory() { + clReleaseMemObject(d_feature); + clReleaseMemObject(d_feature_swap); + clReleaseMemObject(d_cluster); + clReleaseMemObject(d_membership); + free(membership_OCL); +} + +int main(int argc, char **argv) { + printf("WG size of kernel_swap = %d, WG size of kernel_kmeans = %d \n", + BLOCK_SIZE, BLOCK_SIZE2); + setup(argc, argv); + shutdown(); +} + +int kmeansOCL(float **feature, /* in: [npoints][nfeatures] */ + int n_features, int n_points, int n_clusters, int *membership, + float **clusters, int *new_centers_len, float **new_centers) { + + int delta = 0; + int i, j, k; + cl_int err = 0; + + size_t global_work[3] = {n_points, 1, 1}; + + /// Ke Wang adjustable local group size 2013/08/07 10:37:33 + size_t local_work_size = BLOCK_SIZE2; // work group size is defined by + // RD_WG_SIZE_1 or RD_WG_SIZE_1_0 + // 2014/06/10 17:00:41 + if (global_work[0] % local_work_size != 0) + global_work[0] = (global_work[0] / local_work_size + 1) * local_work_size; + + err = clEnqueueWriteBuffer(cmd_queue, d_cluster, 1, 0, + n_clusters * n_features * sizeof(float), + clusters[0], 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueWriteBuffer d_cluster (size:%d) => %d\n", n_points, + err); + return -1; + } + + int size = 0; + int offset = 0; + + clSetKernelArg(kernel_s, 0, sizeof(void *), (void *)&d_feature_swap); + clSetKernelArg(kernel_s, 1, sizeof(void *), (void *)&d_cluster); + clSetKernelArg(kernel_s, 2, sizeof(void *), (void *)&d_membership); + clSetKernelArg(kernel_s, 3, sizeof(cl_int), (void *)&n_points); + clSetKernelArg(kernel_s, 4, sizeof(cl_int), (void *)&n_clusters); + clSetKernelArg(kernel_s, 5, sizeof(cl_int), (void *)&n_features); + clSetKernelArg(kernel_s, 6, sizeof(cl_int), (void *)&offset); + clSetKernelArg(kernel_s, 7, sizeof(cl_int), (void *)&size); + + err = clEnqueueNDRangeKernel(cmd_queue, kernel_s, 1, NULL, global_work, + &local_work_size, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: clEnqueueNDRangeKernel()=>%d failed\n", err); + return -1; + } + clFinish(cmd_queue); + err = clEnqueueReadBuffer(cmd_queue, d_membership, 1, 0, + n_points * sizeof(int), membership_OCL, 0, 0, 0); + if (err != CL_SUCCESS) { + printf("ERROR: Memcopy Out\n"); + return -1; + } + + delta = 0; + for (i = 0; i < n_points; i++) { + int cluster_id = membership_OCL[i]; + new_centers_len[cluster_id]++; + if (membership_OCL[i] != membership[i]) { + delta++; + membership[i] = membership_OCL[i]; + } + for (j = 0; j < n_features; j++) { + new_centers[cluster_id][j] += feature[i][j]; + } + } + + return delta; +} diff --git a/benchmarks/opencl/kmeans/read_input.c b/benchmarks/opencl/kmeans/read_input.c new file mode 100755 index 00000000..6845453e --- /dev/null +++ b/benchmarks/opencl/kmeans/read_input.c @@ -0,0 +1,338 @@ +/*****************************************************************************/ +/*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */ +/*By downloading, copying, installing or using the software you agree */ +/*to this license. If you do not agree to this license, do not download, */ +/*install, copy or use the software. */ +/* */ +/* */ +/*Copyright (c) 2005 Northwestern University */ +/*All rights reserved. */ + +/*Redistribution of the software in source and binary forms, */ +/*with or without modification, is permitted provided that the */ +/*following conditions are met: */ +/* */ +/*1 Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* */ +/*2 Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution.*/ +/* */ +/*3 Neither the name of Northwestern University nor the names of its */ +/* contributors may be used to endorse or promote products derived */ +/* from this software without specific prior written permission. */ +/* */ +/*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */ +/*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */ +/*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */ +/*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */ +/*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */ +/*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */ +/*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */ +/*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */ +/*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/*POSSIBILITY OF SUCH DAMAGE. */ +/******************************************************************************/ + +/*************************************************************************/ +/** File: example.c **/ +/** Description: Takes as input a file: **/ +/** ascii file: containing 1 data point per line **/ +/** binary file: first int is the number of objects **/ +/** 2nd int is the no. of features of each **/ +/** object **/ +/** This example performs a fuzzy c-means clustering **/ +/** on the data. Fuzzy clustering is performed using **/ +/** min to max clusters and the clustering that gets **/ +/** the best score according to a compactness and **/ +/** separation criterion are returned. **/ +/** Author: Wei-keng Liao **/ +/** ECE Department Northwestern University **/ +/** email: wkliao@ece.northwestern.edu **/ +/** **/ +/** Edited by: Jay Pisharath **/ +/** Northwestern University. **/ +/** **/ +/** ================================================================ **/ +/** + * **/ +/** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee + * **/ +/** University of Virginia + * **/ +/** + * **/ +/** Description: No longer supports fuzzy c-means clustering; + * **/ +/** only regular k-means clustering. + * **/ +/** No longer performs "validity" function to + * analyze **/ +/** compactness and separation crietria; instead + * **/ +/** calculate root mean squared error. + * **/ +/** **/ +/*************************************************************************/ +#define _CRT_SECURE_NO_DEPRECATE 1 + +#include "kmeans.h" +#include +#include +#include +#include +#include +#include +#include + +extern double wtime(void); + +/*---< usage() >------------------------------------------------------------*/ +void usage(char *argv0) { + char *help = "\nUsage: %s [switches] -i filename\n\n" + " -i filename :file containing data to be clustered\n" + " -m max_nclusters :maximum number of clusters allowed " + "[default=5]\n" + " -n min_nclusters :minimum number of clusters allowed " + "[default=5]\n" + " -t threshold :threshold value " + "[default=0.001]\n" + " -l nloops :iteration for each number of clusters " + "[default=1]\n" + " -b :input file is in binary format\n" + " -r :calculate RMSE " + "[default=off]\n" + " -o :output cluster center coordinates " + "[default=off]\n"; + fprintf(stderr, help, argv0); + exit(-1); +} + +/*---< main() >-------------------------------------------------------------*/ +int setup(int argc, char **argv) { + int opt; + extern char *optarg; + char *filename = 0; + float *buf; + char line[1024]; + int isBinaryFile = 0; + + float threshold = 0.001; /* default value */ + int max_nclusters = 5; /* default value */ + int min_nclusters = 5; /* default value */ + int best_nclusters = 0; + int nfeatures = 0; + int npoints = 0; + float len; + + float **features; + float **cluster_centres = NULL; + int i, j, index; + int nloops = 1; /* default value */ + + int isRMSE = 0; + float rmse; + + int isOutput = 0; + // float cluster_timing, io_timing; + + /* obtain command line arguments and change appropriate options */ + while ((opt = getopt(argc, argv, "i:t:m:n:l:bro")) != EOF) { + switch (opt) { + case 'i': + filename = optarg; + break; + case 'b': + isBinaryFile = 1; + break; + case 't': + threshold = atof(optarg); + break; + case 'm': + max_nclusters = atoi(optarg); + break; + case 'n': + min_nclusters = atoi(optarg); + break; + case 'r': + isRMSE = 1; + break; + case 'o': + isOutput = 1; + break; + case 'l': + nloops = atoi(optarg); + break; + case '?': + usage(argv[0]); + break; + default: + usage(argv[0]); + break; + } + } + + /* ============== I/O begin ==============*/ + /* get nfeatures and npoints */ + // io_timing = omp_get_wtime(); + + /*if (isBinaryFile) { // Binary file input + FILE *infile; + if ((infile = fopen("100", "r")) == NULL) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + fread(&npoints, 1, sizeof(int), infile); + fread(&nfeatures, 1, sizeof(int), infile); + + // allocate space for features[][] and read attributes of all objects + buf = (float *)malloc(npoints * nfeatures * sizeof(float)); + features = (float **)malloc(npoints * sizeof(float *)); + features[0] = (float *)malloc(npoints * nfeatures * sizeof(float)); + for (i = 1; i < npoints; i++) { + features[i] = features[i - 1] + nfeatures; + } + fread(buf, 1, npoints * nfeatures * sizeof(float), infile); + fclose(infile); + } else { + FILE *infile; + if ((infile = fopen("100", "r")) == NULL) { + fprintf(stderr, "Error: no such file (%s)\n", filename); + exit(1); + } + while (fgets(line, 1024, infile) != NULL) + if (strtok(line, " \t\n") != 0) { + npoints++; + } + rewind(infile); + while (fgets(line, 1024, infile) != NULL) { + if (strtok(line, " \t\n") != 0) { + // ignore the id (first attribute): nfeatures = 1; + while (strtok(NULL, " ,\t\n") != NULL) + nfeatures++; + break; + } + } + + // allocate space for features[] and read attributes of all objects + buf = (float *)malloc(npoints * nfeatures * sizeof(float)); + features = (float **)malloc(npoints * sizeof(float *)); + features[0] = (float *)malloc(npoints * nfeatures * sizeof(float)); + for (i = 1; i < npoints; i++) + features[i] = features[i - 1] + nfeatures; + rewind(infile); + i = 0; + while (fgets(line, 1024, infile) != NULL) { + if (strtok(line, " \t\n") == NULL) + continue; + for (j = 0; j < nfeatures; j++) { + buf[i] = atof(strtok(NULL, " ,\t\n")); + i++; + } + } + fclose(infile); + }*/ + + npoints = 100; + nfeatures = 100; + buf = (float *)malloc(npoints * nfeatures * sizeof(float)); + features = (float **)malloc(npoints * sizeof(float *)); + features[0] = (float *)malloc(npoints * nfeatures * sizeof(float)); + for (i = 1; i < npoints; i++) { + features[i] = features[i - 1] + nfeatures; + } + for (i = 0; i < npoints * nfeatures; ++i) { + buf[i] = (i % 64); + } + + // io_timing = omp_get_wtime() - io_timing; + + printf("\nI/O completed\n"); + printf("\nNumber of objects: %d\n", npoints); + printf("Number of features: %d\n", nfeatures); + /* ============== I/O end ==============*/ + + // error check for clusters + if (npoints < min_nclusters) { + printf("Error: min_nclusters(%d) > npoints(%d) -- cannot proceed\n", + min_nclusters, npoints); + exit(0); + } + + srand(7); /* seed for future random number generator */ + memcpy( + features[0], buf, + npoints * nfeatures * + sizeof( + float)); /* now features holds 2-dimensional array of features */ + free(buf); + + /* ======================= core of the clustering ===================*/ + + // cluster_timing = omp_get_wtime(); /* Total clustering time */ + cluster_centres = NULL; + index = cluster(npoints, /* number of data points */ + nfeatures, /* number of features for each point */ + features, /* array: [npoints][nfeatures] */ + min_nclusters, /* range of min to max number of clusters */ + max_nclusters, threshold, /* loop termination factor */ + &best_nclusters, /* return: number between min and max */ + &cluster_centres, /* return: [best_nclusters][nfeatures] */ + &rmse, /* Root Mean Squared Error */ + isRMSE, /* calculate RMSE */ + nloops); /* number of iteration for each number of clusters */ + + // cluster_timing = omp_get_wtime() - cluster_timing; + + /* =============== Command Line Output =============== */ + + /* cluster center coordinates + :displayed only for when k=1*/ + if ((min_nclusters == max_nclusters) && (isOutput == 1)) { + printf("\n================= Centroid Coordinates =================\n"); + for (i = 0; i < max_nclusters; i++) { + printf("%d:", i); + for (j = 0; j < nfeatures; j++) { + printf(" %.2f", cluster_centres[i][j]); + } + printf("\n\n"); + } + } + + len = (float)((max_nclusters - min_nclusters + 1) * nloops); + + printf("Number of Iteration: %d\n", nloops); + // printf("Time for I/O: %.5fsec\n", io_timing); + // printf("Time for Entire Clustering: %.5fsec\n", cluster_timing); + + if (min_nclusters != max_nclusters) { + if (nloops != 1) { // range of k, multiple iteration + // printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } else { // range of k, single iteration + // printf("Average Clustering Time: %fsec\n", + // cluster_timing / len); + printf("Best number of clusters is %d\n", best_nclusters); + } + } else { + if (nloops != 1) { // single k, multiple iteration + // printf("Average Clustering Time: %.5fsec\n", + // cluster_timing / nloops); + if (isRMSE) // if calculated RMSE + printf("Number of trials to approach the best RMSE of %.3f is %d\n", + rmse, index + 1); + } else { // single k, single iteration + if (isRMSE) // if calculated RMSE + printf("Root Mean Squared Error: %.3f\n", rmse); + } + } + + /* free up memory */ + free(features[0]); + free(features); + return (0); +} diff --git a/benchmarks/opencl/kmeans/rmse.c b/benchmarks/opencl/kmeans/rmse.c new file mode 100755 index 00000000..03d614a6 --- /dev/null +++ b/benchmarks/opencl/kmeans/rmse.c @@ -0,0 +1,94 @@ +/*************************************************************************/ +/** File: rmse.c **/ +/** Description: calculate root mean squared error of particular **/ +/** clustering. **/ +/** Author: Sang-Ha Lee **/ +/** University of Virginia. **/ +/** **/ +/** Note: euclid_dist_2() and find_nearest_point() adopted from **/ +/** Minebench code. **/ +/** **/ +/*************************************************************************/ + +#include +#include +#include +#include + +#include "kmeans.h" + +extern double wtime(void); + +/*----< euclid_dist_2() >----------------------------------------------------*/ +/* multi-dimensional spatial Euclid distance square */ +__inline +float euclid_dist_2(float *pt1, + float *pt2, + int numdims) +{ + int i; + float ans=0.0; + + for (i=0; i-----------------------------------------------*/ +__inline +int find_nearest_point(float *pt, /* [nfeatures] */ + int nfeatures, + float **pts, /* [npts][nfeatures] */ + int npts) +{ + int index, i; + float max_dist=FLT_MAX; + + /* find the cluster center id with min distance to pt */ + for (i=0; i-------------------------------------*/ +float rms_err (float **feature, /* [npoints][nfeatures] */ + int nfeatures, + int npoints, + float **cluster_centres, /* [nclusters][nfeatures] */ + int nclusters) +{ + int i; + int nearest_cluster_index; /* cluster center id with min distance to pt */ + float sum_euclid = 0.0; /* sum of Euclidean distance squares */ + float ret; /* return value */ + + /* calculate and sum the sqaure of euclidean distance*/ + #pragma omp parallel for \ + shared(feature,cluster_centres) \ + firstprivate(npoints,nfeatures,nclusters) \ + private(i, nearest_cluster_index) \ + schedule (static) + for (i=0; i