diff --git a/tests/opencl/fft/Makefile b/tests/opencl/fft/Makefile deleted file mode 100644 index fd039b0e..00000000 --- a/tests/opencl/fft/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -PROJECT = fft4 - -SRCS = main.cc - -OPTS ?= -n32 - -include ../common.mk diff --git a/tests/opencl/fft/common.h b/tests/opencl/fft/common.h deleted file mode 100644 index 8c8e3344..00000000 --- a/tests/opencl/fft/common.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#define LOCAL_SIZE 16 \ No newline at end of file diff --git a/tests/opencl/fft/kernel.cl b/tests/opencl/fft/kernel.cl deleted file mode 100644 index 3e47282c..00000000 --- a/tests/opencl/fft/kernel.cl +++ /dev/null @@ -1,63 +0,0 @@ -#include "common.h" - -__kernel void fft_radix4(__global float2* input, __global float2* output, const unsigned int N) { - int globalId = get_global_id(0); - int localId = get_local_id(0); - int groupId = get_group_id(0); - - // Allocate local memory to store intermediate results and twiddle factors - __local float2 localData[LOCAL_SIZE]; - __local float2 twiddleFactors[LOCAL_SIZE / 4]; - - // Calculate twiddle factors for this FFT stage and store in local memory - if (localId < LOCAL_SIZE / 4) { - float angle = -2 * M_PI * localId / LOCAL_SIZE; - twiddleFactors[localId] = (float2)(cos(angle), sin(angle)); - } - barrier(CLK_LOCAL_MEM_FENCE); - - // Calculate the offset for the data this work-group will process - int offset = groupId * LOCAL_SIZE; - - // Load a chunk of input into local memory for faster access - if (globalId < N) { - localData[localId] = input[globalId]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - // Perform the Radix-4 FFT on the data chunk in local memory - for (unsigned int stride = 1; stride < LOCAL_SIZE; stride *= 4) { - int twiddleIndex = (localId / stride) % 4; - float2 twiddle = twiddleFactors[twiddleIndex * (LOCAL_SIZE / (4 * stride))]; - - // Load data - float2 data0 = localData[localId]; - float2 data1 = localData[localId + stride]; - float2 data2 = localData[localId + 2 * stride]; - float2 data3 = localData[localId + 3 * stride]; - - // Apply twiddle factors - data1 *= twiddle; - data2 *= twiddle * twiddle; - data3 *= twiddle * twiddle * twiddle; - - // Radix-4 butterfly operations - float2 t0 = data0 + data2; - float2 t1 = data0 - data2; - float2 t2 = data1 + data3; - float2 t3 = (data1 - data3) * (float2)(0, -1); - - // Store results - localData[localId] = t0 + t2; - localData[localId + stride] = t1 + t3; - localData[localId + 2 * stride] = t0 - t2; - localData[localId + 3 * stride] = t1 - t3; - - barrier(CLK_LOCAL_MEM_FENCE); - } - - // Write the results back to global memory - if (globalId < N) { - output[globalId] = localData[localId]; - } -} diff --git a/tests/opencl/fft/main.cc b/tests/opencl/fft/main.cc deleted file mode 100644 index b10b225a..00000000 --- a/tests/opencl/fft/main.cc +++ /dev/null @@ -1,240 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define KERNEL_NAME "fft_radix4" - -#define FLOAT_ULP 6 - -struct float2 { - float x; - float y; - - float2(float real = 0.0f, float imag = 0.0f) : x(real), y(imag) {} - - float2 operator+(const float2& other) const { - return {x + other.x, y + other.y}; - } - - float2 operator-(const float2& other) const { - return {x - other.x, y - other.y}; - } - - float2 operator*(const float2& other) const { - return {x * other.x - y * other.y, x * other.y + y * other.x}; - } -}; - -#define CL_CHECK(_expr) \ - do { \ - cl_int _err = _expr; \ - if (_err == CL_SUCCESS) \ - break; \ - printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ - cleanup(); \ - exit(-1); \ - } while (0) - -#define CL_CHECK2(_expr) \ - ({ \ - cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ - if (_err != CL_SUCCESS) { \ - printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ - cleanup(); \ - exit(-1); \ - } \ - _ret; \ - }) - -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -static std::vector referenceDFT(const std::vector& input) { - std::vector output(input.size()); - for (unsigned int k = 0; k < input.size(); ++k) { // For each output element - output[k] = {0, 0}; // Initialize to zero - for (unsigned int n = 0; n < input.size(); ++n) { // For each input element - float angle = -2 * M_PI * k * n / input.size(); - float2 twiddle = {cos(angle), sin(angle)}; - output[k].x += input[n].x * twiddle.x - input[n].y * twiddle.y; - output[k].y += input[n].x * twiddle.y + input[n].y * twiddle.x; - } - } - return output; -} - -static int verifyOutput(const std::vector& output, - const std::vector& reference, - unsigned int N) { - int errors = 0; - for (unsigned int i = 0; i < N; ++i) { - float2 diff = {output[i].x - reference[i].x, output[i].y - reference[i].y}; - float error = sqrt(diff.x * diff.x + diff.y * diff.y); - if (error > 1e-5) { - printf("*** error: [%d] expected=(%f,%f), actual=(%f,%f)\n", i, reference[i].x, reference[i].y, output[i].x, output[i].y); - ++errors; - } - } - return errors; -} - -cl_device_id device_id = NULL; -cl_context context = NULL; -cl_command_queue commandQueue = NULL; -cl_program program = NULL; -cl_kernel kernel = NULL; -cl_mem i_memobj = NULL; -cl_mem o_memobj = NULL; -uint8_t *kernel_bin = NULL; - -static void cleanup() { - if (commandQueue) clReleaseCommandQueue(commandQueue); - if (kernel) clReleaseKernel(kernel); - if (program) clReleaseProgram(program); - if (i_memobj) clReleaseMemObject(i_memobj); - if (o_memobj) clReleaseMemObject(o_memobj); - if (context) clReleaseContext(context); - if (device_id) clReleaseDevice(device_id); - if (kernel_bin) free(kernel_bin); -} - -int size = 64; - -static void show_usage() { - printf("Usage: [-n size] [-h: help]\n"); -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { - switch (c) { - case 'n': - size = atoi(optarg); - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } - - printf("Workload size=%d\n", size); -} - -int main (int argc, char **argv) { - // parse command arguments - parse_args(argc, argv); - - cl_platform_id platform_id; - size_t kernel_size; - - // Getting platform and device information - CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); - CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); - - printf("Create context\n"); - context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err)); - - printf("Allocate device buffers\n"); - size_t nbytes = size * sizeof(float2); - i_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err)); - o_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err)); - - printf("Create program from kernel source\n"); -#ifdef HOSTGPU - if (0 != read_kernel_file("kernel.cl", &kernel_bin, &kernel_size)) - return -1; - program = CL_CHECK2(clCreateProgramWithSource( - context, 1, (const char**)&kernel_bin, &kernel_size, &_err)); -#else - if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) - return -1; - program = CL_CHECK2(clCreateProgramWithBinary( - context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, NULL, &_err)); -#endif - - // Build program - CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); - - // Create kernel - kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err)); - - // Set kernel arguments - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&i_memobj)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&o_memobj)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(int), (void *)&size)); - - // Allocate memories for input arrays and output arrays. - std::vector h_i(size); - std::vector h_o(size); - - // Generate input values - for (int i = 0; i < size; ++i) { - h_i[i].x = sin(2 * M_PI * i / size); // Sine wave as an example - h_i[i].y = 0.0f; // Zero imaginary part - } - - // Creating command queue - commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err)); - - printf("Upload source buffers\n"); - CL_CHECK(clEnqueueWriteBuffer(commandQueue, i_memobj, CL_TRUE, 0, nbytes, h_i.data(), 0, NULL, NULL)); - - printf("Execute the kernel\n"); - size_t global_work_size[1] = {size}; - size_t local_work_size[1] = {LOCAL_SIZE}; - auto time_start = std::chrono::high_resolution_clock::now(); - CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL)); - CL_CHECK(clFinish(commandQueue)); - auto time_end = std::chrono::high_resolution_clock::now(); - double elapsed = std::chrono::duration_cast(time_end - time_start).count(); - printf("Elapsed time: %lg ms\n", elapsed); - - printf("Download destination buffer\n"); - CL_CHECK(clEnqueueReadBuffer(commandQueue, o_memobj, CL_TRUE, 0, nbytes, h_o.data(), 0, NULL, NULL)); - - printf("Verify result\n"); - std::vector reference = referenceDFT(h_i); - auto errors = verifyOutput(h_o, reference, size); - if (0 == errors) { - printf("PASSED!\n"); - } else { - printf("FAILED! - %d errors\n", errors); - } - - // Clean up - cleanup(); - - return errors; -} diff --git a/tests/opencl/fft/main.cc.o b/tests/opencl/fft/main.cc.o deleted file mode 100644 index e3f30c90..00000000 Binary files a/tests/opencl/fft/main.cc.o and /dev/null differ