From bd18b03cc33e97a060eef4bdd23493c2608c6e35 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 31 Dec 2023 15:29:04 -0800 Subject: [PATCH] minor update --- tests/opencl/fft/Makefile | 7 -- tests/opencl/fft/common.h | 3 - tests/opencl/fft/kernel.cl | 63 ---------- tests/opencl/fft/main.cc | 240 ------------------------------------- tests/opencl/fft/main.cc.o | Bin 15368 -> 0 bytes 5 files changed, 313 deletions(-) delete mode 100644 tests/opencl/fft/Makefile delete mode 100644 tests/opencl/fft/common.h delete mode 100644 tests/opencl/fft/kernel.cl delete mode 100644 tests/opencl/fft/main.cc delete mode 100644 tests/opencl/fft/main.cc.o diff --git a/tests/opencl/fft/Makefile b/tests/opencl/fft/Makefile deleted file mode 100644 index fd039b0e..00000000 --- a/tests/opencl/fft/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -PROJECT = fft4 - -SRCS = main.cc - -OPTS ?= -n32 - -include ../common.mk diff --git a/tests/opencl/fft/common.h b/tests/opencl/fft/common.h deleted file mode 100644 index 8c8e3344..00000000 --- a/tests/opencl/fft/common.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#define LOCAL_SIZE 16 \ No newline at end of file diff --git a/tests/opencl/fft/kernel.cl b/tests/opencl/fft/kernel.cl deleted file mode 100644 index 3e47282c..00000000 --- a/tests/opencl/fft/kernel.cl +++ /dev/null @@ -1,63 +0,0 @@ -#include "common.h" - -__kernel void fft_radix4(__global float2* input, __global float2* output, const unsigned int N) { - int globalId = get_global_id(0); - int localId = get_local_id(0); - int groupId = get_group_id(0); - - // Allocate local memory to store intermediate results and twiddle factors - __local float2 localData[LOCAL_SIZE]; - __local float2 twiddleFactors[LOCAL_SIZE / 4]; - - // Calculate twiddle factors for this FFT stage and store in local memory - if (localId < LOCAL_SIZE / 4) { - float angle = -2 * M_PI * localId / LOCAL_SIZE; - twiddleFactors[localId] = (float2)(cos(angle), sin(angle)); - } - barrier(CLK_LOCAL_MEM_FENCE); - - // Calculate the offset for the data this work-group will process - int offset = groupId * LOCAL_SIZE; - - // Load a chunk of input into local memory for faster access - if (globalId < N) { - localData[localId] = input[globalId]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - // Perform the Radix-4 FFT on the data chunk in local memory - for (unsigned int stride = 1; stride < LOCAL_SIZE; stride *= 4) { - int twiddleIndex = (localId / stride) % 4; - float2 twiddle = twiddleFactors[twiddleIndex * (LOCAL_SIZE / (4 * stride))]; - - // Load data - float2 data0 = localData[localId]; - float2 data1 = localData[localId + stride]; - float2 data2 = localData[localId + 2 * stride]; - float2 data3 = localData[localId + 3 * stride]; - - // Apply twiddle factors - data1 *= twiddle; - data2 *= twiddle * twiddle; - data3 *= twiddle * twiddle * twiddle; - - // Radix-4 butterfly operations - float2 t0 = data0 + data2; - float2 t1 = data0 - data2; - float2 t2 = data1 + data3; - float2 t3 = (data1 - data3) * (float2)(0, -1); - - // Store results - localData[localId] = t0 + t2; - localData[localId + stride] = t1 + t3; - localData[localId + 2 * stride] = t0 - t2; - localData[localId + 3 * stride] = t1 - t3; - - barrier(CLK_LOCAL_MEM_FENCE); - } - - // Write the results back to global memory - if (globalId < N) { - output[globalId] = localData[localId]; - } -} diff --git a/tests/opencl/fft/main.cc b/tests/opencl/fft/main.cc deleted file mode 100644 index b10b225a..00000000 --- a/tests/opencl/fft/main.cc +++ /dev/null @@ -1,240 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "common.h" - -#define KERNEL_NAME "fft_radix4" - -#define FLOAT_ULP 6 - -struct float2 { - float x; - float y; - - float2(float real = 0.0f, float imag = 0.0f) : x(real), y(imag) {} - - float2 operator+(const float2& other) const { - return {x + other.x, y + other.y}; - } - - float2 operator-(const float2& other) const { - return {x - other.x, y - other.y}; - } - - float2 operator*(const float2& other) const { - return {x * other.x - y * other.y, x * other.y + y * other.x}; - } -}; - -#define CL_CHECK(_expr) \ - do { \ - cl_int _err = _expr; \ - if (_err == CL_SUCCESS) \ - break; \ - printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ - cleanup(); \ - exit(-1); \ - } while (0) - -#define CL_CHECK2(_expr) \ - ({ \ - cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ - if (_err != CL_SUCCESS) { \ - printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ - cleanup(); \ - exit(-1); \ - } \ - _ret; \ - }) - -static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) { - if (nullptr == filename || nullptr == data || 0 == size) - return -1; - - FILE* fp = fopen(filename, "r"); - if (NULL == fp) { - fprintf(stderr, "Failed to load kernel."); - return -1; - } - fseek(fp , 0 , SEEK_END); - long fsize = ftell(fp); - rewind(fp); - - *data = (uint8_t*)malloc(fsize); - *size = fread(*data, 1, fsize, fp); - - fclose(fp); - - return 0; -} - -static std::vector referenceDFT(const std::vector& input) { - std::vector output(input.size()); - for (unsigned int k = 0; k < input.size(); ++k) { // For each output element - output[k] = {0, 0}; // Initialize to zero - for (unsigned int n = 0; n < input.size(); ++n) { // For each input element - float angle = -2 * M_PI * k * n / input.size(); - float2 twiddle = {cos(angle), sin(angle)}; - output[k].x += input[n].x * twiddle.x - input[n].y * twiddle.y; - output[k].y += input[n].x * twiddle.y + input[n].y * twiddle.x; - } - } - return output; -} - -static int verifyOutput(const std::vector& output, - const std::vector& reference, - unsigned int N) { - int errors = 0; - for (unsigned int i = 0; i < N; ++i) { - float2 diff = {output[i].x - reference[i].x, output[i].y - reference[i].y}; - float error = sqrt(diff.x * diff.x + diff.y * diff.y); - if (error > 1e-5) { - printf("*** error: [%d] expected=(%f,%f), actual=(%f,%f)\n", i, reference[i].x, reference[i].y, output[i].x, output[i].y); - ++errors; - } - } - return errors; -} - -cl_device_id device_id = NULL; -cl_context context = NULL; -cl_command_queue commandQueue = NULL; -cl_program program = NULL; -cl_kernel kernel = NULL; -cl_mem i_memobj = NULL; -cl_mem o_memobj = NULL; -uint8_t *kernel_bin = NULL; - -static void cleanup() { - if (commandQueue) clReleaseCommandQueue(commandQueue); - if (kernel) clReleaseKernel(kernel); - if (program) clReleaseProgram(program); - if (i_memobj) clReleaseMemObject(i_memobj); - if (o_memobj) clReleaseMemObject(o_memobj); - if (context) clReleaseContext(context); - if (device_id) clReleaseDevice(device_id); - if (kernel_bin) free(kernel_bin); -} - -int size = 64; - -static void show_usage() { - printf("Usage: [-n size] [-h: help]\n"); -} - -static void parse_args(int argc, char **argv) { - int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { - switch (c) { - case 'n': - size = atoi(optarg); - break; - case 'h': - case '?': { - show_usage(); - exit(0); - } break; - default: - show_usage(); - exit(-1); - } - } - - printf("Workload size=%d\n", size); -} - -int main (int argc, char **argv) { - // parse command arguments - parse_args(argc, argv); - - cl_platform_id platform_id; - size_t kernel_size; - - // Getting platform and device information - CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); - CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); - - printf("Create context\n"); - context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err)); - - printf("Allocate device buffers\n"); - size_t nbytes = size * sizeof(float2); - i_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err)); - o_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err)); - - printf("Create program from kernel source\n"); -#ifdef HOSTGPU - if (0 != read_kernel_file("kernel.cl", &kernel_bin, &kernel_size)) - return -1; - program = CL_CHECK2(clCreateProgramWithSource( - context, 1, (const char**)&kernel_bin, &kernel_size, &_err)); -#else - if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size)) - return -1; - program = CL_CHECK2(clCreateProgramWithBinary( - context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, NULL, &_err)); -#endif - - // Build program - CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); - - // Create kernel - kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err)); - - // Set kernel arguments - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&i_memobj)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&o_memobj)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(int), (void *)&size)); - - // Allocate memories for input arrays and output arrays. - std::vector h_i(size); - std::vector h_o(size); - - // Generate input values - for (int i = 0; i < size; ++i) { - h_i[i].x = sin(2 * M_PI * i / size); // Sine wave as an example - h_i[i].y = 0.0f; // Zero imaginary part - } - - // Creating command queue - commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err)); - - printf("Upload source buffers\n"); - CL_CHECK(clEnqueueWriteBuffer(commandQueue, i_memobj, CL_TRUE, 0, nbytes, h_i.data(), 0, NULL, NULL)); - - printf("Execute the kernel\n"); - size_t global_work_size[1] = {size}; - size_t local_work_size[1] = {LOCAL_SIZE}; - auto time_start = std::chrono::high_resolution_clock::now(); - CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL)); - CL_CHECK(clFinish(commandQueue)); - auto time_end = std::chrono::high_resolution_clock::now(); - double elapsed = std::chrono::duration_cast(time_end - time_start).count(); - printf("Elapsed time: %lg ms\n", elapsed); - - printf("Download destination buffer\n"); - CL_CHECK(clEnqueueReadBuffer(commandQueue, o_memobj, CL_TRUE, 0, nbytes, h_o.data(), 0, NULL, NULL)); - - printf("Verify result\n"); - std::vector reference = referenceDFT(h_i); - auto errors = verifyOutput(h_o, reference, size); - if (0 == errors) { - printf("PASSED!\n"); - } else { - printf("FAILED! - %d errors\n", errors); - } - - // Clean up - cleanup(); - - return errors; -} diff --git a/tests/opencl/fft/main.cc.o b/tests/opencl/fft/main.cc.o deleted file mode 100644 index e3f30c90822ce9fddb7745f61c868737af43a331..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15368 zcmcgzdvsIBnIBsQ92<~%r0YDkT-cb{#3+^x39xxsSc2hx zoCH$WBzILM>FK6vwxOk`o9=15?DlLAdq}c$O-SQ%cAE#?2MI}&CS(^v+7L(x&7!^*_ts?w4YCQ}K!jTLGVWlUXKC^z$BGrNXe z&AH`JzXA^IHye&QaLA+%y7=(QfxK){Hr%a43;T8cH~& zhYVdw8<7<&cIsgxnNs4RRIi>=jJ{Ap=?@KR>Daw`ZJpJNu`bm--@ylU^vo^p;RgU$ zN2g({YJQ>SuKk!7M^|G^pk;Lx&xbD=x2fiv*5Gqb#G+9{ONAn_!6mF~z%rD~q^L|_P!DH7-ssa6 zt5*dJ$5+J?v2&KBG#ZKZ!>DTFy-Gj$;ZN>LaOH@eHe!j8 z5lbe7mlbTio{B|xD=9sliNj#pd>tJD|NIKJ%GVl%g0fhti70w1mBbN&-PO9$77rQG zWU9Z_pRV=RD+>oiU5iEPm9<^LU>#dGpeI^_N&x$k4+(D`?URH8Ggivu3Bonw(nnrf3h#QBcxgY;`a$ ztKUjKcMw%RxAhA6uqLoZYY+JR+PbyDjrB^RXSboJZQcL3{u|m`I|Gvmh&45kO7^Bg z{Yo^I>=$#br1^3X5tDwlS;P%7qi-e5Xli$@lK0sbpM}<%;U{LjfSBhMr%2g>$-Ik>lfwv9rAk z0tO;-8TM&0Pb6f7YQZajt@U1*g1r7(e|sp=tBZM{SB-kH(DtmaKa7nrW4fB(0rRm2iU?Z*-15ryJfTj=>DgixIsP> z=5p8^%<-k;>#_~T*PUO6qrQz8<7MC-mjN+3C*w?7%hp-~l|UATg64Z-z%~;F11SG0 z_@U-z!06dC=g&WjXbN zd(+6M|0MdwF1lxxom7X8mH3vwzUPRVrAswvp7ov>9l!oDY8txhxbW8pvpIBVjd|XC zd^^O9OY$=jYy8=DyLB?cGp~Xt1G%{d6Lwfv@SMJsGt1)g6dv0ayWkhDduN3 zu(y4(;kKGbVd&=Y8MvTxgK%w?V}Hz5eC*l%eGo+ThW!{zHTT#$$b#kv>e0_!aFJHc z{R#kFY;^b|yis6;vVj9&2JahL4ywzCk>=i)1poN}vyWjbACp`oYHsdxYL*jK^RX&4 zH;NYocELSsxE!kxtcUP*4-|fjUC%(vK?W@VobsLuc*ne#+_monbBC4z7|b1PfQ(=7 z(Pq&61XkHowSZyg7lP*RpuWEz>+X%*0<6$s)Wel|5e_~s@CnC`5^x;9=&rIf+?$5c zjJx(c&~s=?&E~ky09Rs!X|GWe-#HQ$7oU371J1&)v5_~k|SmjAfu?tU8_ znGbb14Z4rQ0o`Y9EK`^B;JP2Y4YY@NAWP2v7{aDzAH#5>N2>Yq(Ay>ID`(UzUKY{p z9>IX&FwUL^vfoIEho|0`q$tm_*$8} z_BjYC_`_!e{QWr`zk&KebOG}H7*NP_7)ThyX*eKw-PY9_L_|ZCAZ~&v-pEfVXcHn; zT-?2HGf3d5{N{Un;UC6p155h)vA{1P>J8RoN@ly7(@*-%w>JB>__q4)_Gw#>6)}3wG>^(_ z{KSU{?vi}e5SIhN=GSxHe(ph@*>$`lJT$z`;{D@flyj#hZ9Ot+>vx@paBc{0(`WJY zbjruZN*3M!9NY_EoHphCIqZ1(w_T^_uwPY8d3_H1r0U*(o5TKM_9Dhk%-&l9IIzo> z@nwOU)rz)h_Pz7aq06Ul!#18)x3sKKYP)(e2_vI;y&kWpVX-&EtKR#) z%RCKBJl;Cd03GIvZYB4&vy#E8l53_hC#sa`R&8W5nWl^gSAUu9mia(F*!vo3*AJEJ z5P|g36^!9Jx3pVkOkMSe>*2E8lvQ7wIyB8?mU$G-!%F=gr5m#QJxY&q0e&mrSlU956kT~=Idx-5>%U2NvK zx~#Yu|EeadJpU5>mJ%KQR3R8HHftPLR$TmRrK}Zl7rU}hAte_>E8~;Gs}ti>!|@9C zr{n1i&NE08<8!0H7x29BW37>iaa%6s#Z6#bRh6JwJ@Lpc8WP5Eo{Q^I4!_z|ND>j|og5SU&PzsHE z1isCN4-5Q&4bKYvEgSxIj!#@ak8`|&<#=BB*)Qw!88uB$Q*^2)dbaQmE~{2dPXHlb53e2_Z54*Xq$uh{r|9Qb&zML(raQvV4D{@)6| zB;#W`=s)U!Kkk5k2k=?sJ}yt2?>q2+;(#A=z+Z5{ z|J?z9*8%^?0sqti$48A~^D@-|pW%Sdb-=H7z~?*Qf9`ei`ob2Un@6EiGck=RH zjl5SQ@72hAHCkqPug05@-^lq*oYTZPO}w>vS-=m zl*Xkn?zJ7pP1c@u>rGME4mBD(LaiMQS|HG}G|=CEXNOl~bYFyNtAp!S`hwcJRjWDz zom!`FWiSAgc)JeqOY3y2M{M*K8m)a6Cicq;xY$Tza#N?!vPSP;*RvfqHJIEf0dM#& zWP#Hnfc$<^p~>1q6@F;2na83jU7vi|jdIx9ro^s(;C2{|4Vq^3rINd}xSr@W`fztN znQFTe7tSOB^hzaMX3W3AlhUIeP3s*T)COQHJedf^W5#Z6X9MfijpP7~alnAT8(?W8 zWhCPachQ)p4WwXqEvkk4b})S~X29UaZ#G+iHB#6uhU&P@-hAEKe`3)jY(leWTGw~5 zsG-N>ET!*?B_gowamOA56pFBDIG#-F`MzR;3LC4?LbpKkL@~qTHymLki9BQ60?BWX zk&QkP(=fz4k!;e|H+mb>yVHi=uYphD9ZiYkuE0)^6I+Diw(tP9{sN%TUf6m=H^Rv@ zOW%_+qF^NwZ`%p6j=*F>n4=`lnwB;~;T;%rEee02z_hLe#zDzL*h*cLPCBE91>rFASC4b z;gC3%F(E$yhs5th5EAlN!69+$y=)?X3z8ure>EJEA4L!n>Z@=_+ghTSX5rl;P;2cT(ZUiAA ze?1%$A4Cum;&b7Uxb;43>1-hQJ}6^CeY7R{-$4+PUFRta=6tGiklZlKdqE$8$>Jw-6k^JxP2u!M7279l=8cj|dzydVPx#eh=Xf5ZXJDgO5T^nXbB zHxT>~;Zr}~B7D03m8sAf62=Xm@udHU1&(n`68!#YP=SPeI=>$a9QgxqNS#^Y10eDj z6MP-PJp}I*IO?4Yaa9vOeO{eM_$lZk?JguZzKcn`f#{?Oelx)hf;ST# z+K|0bgI5W%U=UpnY~m+)^UIzJ#d z)%gbpoo5Mu8PR!;;8f>D2c46IkGrTc{%_fEaXPazHhebp!F1k+D;zES;NxtjP-TiJ z%XKK5nh7p02^<+Bc)KWL_AP=}68w7vpHA>|1gAPx=pZEY2jd{qRRl*Jyw>pKN$>7A zLOslbpQw1=Crc?k9`fM7elY$g%goc0PBTv`iGPOene;Q6{NRS7UJrc2%uD@o{qk0L zfB0A`j82rxB;v6hdVDuE0bhDjMrMF{dc$E&9}MdQh6Y26>v^4U+K^81!GL=n{+HjL z$v)|M@T;kwfN;RS27q|4$OWcg`xJ)GRQ$Ch(mU zzgggQ5|hlG`Ufpq{x2i2KdvL$Pv#0>xOC)s14W*;iTn|f%dAQkM2_!>sMRd$GCRM( zD#?9*w2zOP;x@prj%gtrXkR>rBE-{qkxM^CHSfQy*Fym{?fowcLbJ$aChf~Q-lNcd zqXa}Q`(ryzvOo60gwIqwet1HX)F2242!=&_{Gx#I_}TgoTxWGut&GPDZA<$yevboI z%>S2#{ToFtWH=Yg68?che;Lpp*(N~Rm;Ld35SH!h=SSkAh|hR5$$k<$1O+~R35fi# z82>g=$MNF1AnnWXzX(_{`{#vyxnC<}CdB_ZG!(O6U1RmQJj2S--b^^`_Fo4K?c1i0 zofaoG-h*kfkN&