From 8d26e402caa1c4c776a3b7fc5f93501a7b21f959 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 25 Nov 2019 13:15:03 -0500 Subject: [PATCH] mri-q --- benchmarks/opencl/mri-q/32_32_32_dataset.bin | Bin 0 -> 454664 bytes benchmarks/opencl/mri-q/Makefile | 68 + benchmarks/opencl/mri-q/args.c | 617 ++++++++ benchmarks/opencl/mri-q/computeQ.c | 118 ++ benchmarks/opencl/mri-q/computeQ.h | 14 + benchmarks/opencl/mri-q/file.cc | 78 + benchmarks/opencl/mri-q/file.h | 22 + benchmarks/opencl/mri-q/gpu_info.c | 55 + benchmarks/opencl/mri-q/gpu_info.h | 20 + benchmarks/opencl/mri-q/kernel.cl | 51 + benchmarks/opencl/mri-q/libmri-q.a | Bin 0 -> 15812 bytes benchmarks/opencl/mri-q/libsgemm.a | Bin 0 -> 15812 bytes benchmarks/opencl/mri-q/macros.h | 21 + benchmarks/opencl/mri-q/main.cc | 293 ++++ benchmarks/opencl/mri-q/ocl copy.c | 50 + benchmarks/opencl/mri-q/ocl copy.h | 21 + benchmarks/opencl/mri-q/ocl.c | 50 + benchmarks/opencl/mri-q/ocl.h | 23 + benchmarks/opencl/mri-q/parboil.h | 348 +++++ benchmarks/opencl/mri-q/parboil_opencl.c | 1394 ++++++++++++++++++ benchmarks/opencl/sad/parboil.c | 427 ------ benchmarks/opencl/spmv/parboil.c | 427 ------ benchmarks/opencl/stencil/parboil.c | 427 ------ 23 files changed, 3243 insertions(+), 1281 deletions(-) create mode 100755 benchmarks/opencl/mri-q/32_32_32_dataset.bin create mode 100644 benchmarks/opencl/mri-q/Makefile create mode 100644 benchmarks/opencl/mri-q/args.c create mode 100644 benchmarks/opencl/mri-q/computeQ.c create mode 100644 benchmarks/opencl/mri-q/computeQ.h create mode 100644 benchmarks/opencl/mri-q/file.cc create mode 100644 benchmarks/opencl/mri-q/file.h create mode 100644 benchmarks/opencl/mri-q/gpu_info.c create mode 100644 benchmarks/opencl/mri-q/gpu_info.h create mode 100644 benchmarks/opencl/mri-q/kernel.cl create mode 100644 benchmarks/opencl/mri-q/libmri-q.a create mode 100644 benchmarks/opencl/mri-q/libsgemm.a create mode 100644 benchmarks/opencl/mri-q/macros.h create mode 100644 benchmarks/opencl/mri-q/main.cc create mode 100644 benchmarks/opencl/mri-q/ocl copy.c create mode 100644 benchmarks/opencl/mri-q/ocl copy.h create mode 100644 benchmarks/opencl/mri-q/ocl.c create mode 100644 benchmarks/opencl/mri-q/ocl.h create mode 100644 benchmarks/opencl/mri-q/parboil.h create mode 100644 benchmarks/opencl/mri-q/parboil_opencl.c delete mode 100644 benchmarks/opencl/sad/parboil.c delete mode 100644 benchmarks/opencl/spmv/parboil.c delete mode 100644 benchmarks/opencl/stencil/parboil.c diff --git a/benchmarks/opencl/mri-q/32_32_32_dataset.bin b/benchmarks/opencl/mri-q/32_32_32_dataset.bin new file mode 100755 index 0000000000000000000000000000000000000000..db8385bb0ea6916e969fdba41a0355ab2a1ca471 GIT binary patch literal 454664 zcmeF(-Qb^ z^Zxz=?mfmHgBNG;VxRpv*NQn;vNXw(B^!ks*WJW~&^?>hhM7;nj#Q0iI0kGv;dtws z%&aQnXPUJxYf?{cY-DdgQ@;NMGk4+~^KR)XQ)AB#v*xtZ{Jo!Ont!`#c6q%pUirS6 zl1p8w!-iBeeYZCqc4nsMr*e|-mHbrkeqrkOsst_lTAoh3RHScdYEZr`^{GMLW)vOL zmbR4ZLN_Y+qU7}lQi)bWsp#%ebfWAyYU7+ppDRtFm{Zfqzuqi5aCt7(ZMl%1+*?A! zx~!lKuU6B>e(TA6-9%T1ZKGZ;yU1F&j>kf5=(d zzxx8!Eq8^UIIq*N%C{-Q>3g)H-Xk(sp3;?;FR9o4H{{m!11)^@nTq!RP6=OsQJdla z=;tUG-dWy_mppgpbW@UZi##d$=(bcGTQoJhY)HfPebe&p!Rff&MQOg%4NG$_JWe;|}e!^R*6sT&h(LUQ|0L=PsR#A7swW-#+B#p~w7r z%#1u-piy2Po-Q9RJeH4V4amaY6R#SBOu1F2oN81aSDv0FG=G z$Qcp>xqe^}J0=D3>w7`mqgY`+I=nD@9x2Q0$w1RI31gxA{3!(fs^(aXvoemX8~B%gad#dAMwzJiKU#KmSV1 z&0YL*^Mh`=IBI)NF8V14`vlFy>6MMEmCwpgI%VOPlQVPMEt$CH z8DHN2G$U{R>%&ESe7JVL4D4AvJ@*Us=82WkanovP`DryTHdWGacaxfXmiEMVm5P0{ zr{r@fQ}C!y9$fU6J3qH>yltf`$KX8}*E$(j4gX8`GXJLg&L7mG{Wog)`!l_t^^pSe zy{F7O-q4BCuc+FA=d`ZmQ~J315ryV_Kr^P?rC*su9Z{5T!$|TvKZa7)oFhh``25`@KKtqLj6<|9wUc&CRNCxzoYqC3q?NzU(2}Tg zH2>2@n$`CzO?!2NCU(0+V;SY@(ciQw zS2C`0(uHrdapO*p+&Q3Sa`rl!f^E)Je7?6QAJ~zapFB&$`+U-Jbjfr)qoy|pH%-rr zn`Pik^?dkRXh!z&_vK%|eAy;t;;Hj8^VFtU_^DS`*0`)(u~RmVammhc^Rx5!{C?bO ziyylOpjWM z5h?w-P=0?NUcsNo)$-?tP5k*=bAO)D)StcU`tus&&rO2-xmbFCu7UjDdL}pbT9KRQ z_RY;@%HeqJmnp=t@MV=E@E{eavv-J@@7Zd2osn-sF^8qFpjc9@cC9H0wR_EPCnyC}8m4r*U| zD^>2gi4tb6r*8+=(34v$X~oZFRK|A+9SmAXr5*F=aP8UDqFF5EY&(szc8H2lg<}iH7LF|(TR66GEO0DvEO0Dv zEO0DvEO0DvEO0DvjNGdua4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{v za4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c}F zaIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2w zaIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2w zaIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2w zaIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2waIA2w zaIA1Fa4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{v za4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{va4c{v za4c{va4c{va4c{va4c{va4c{v|8wl!&KWvs&9bn-*##W!%w$JEDeL$c_{GsS*4I>; zQq&ZhP|J9Y?rMI-k22vErklG57MuE|H=3in_nA_~j+yn_E|_eE@0qC^Uz>jgewuO@ z-RNViCvC2mo;s(^LOGIh(zPl1DYj-|s*$P$xt=UXaTACls#K#8_qz1#NMl+tx)rsg zj^y*NJ0-^VrSZcf$x&eh{r)|gnztTLdk;^d4B4krpP@78%#B%8sN6gnw_qVX__lU@P}zRRY3{j`^dTgX z>P|mLo8DZc6m_mr=Pfr#Qs1E*;rD6i=|^-m;2D*e{E}uoe?zaUeV`iaKhxUe-zg^V zFFKs&FYWA_jKA)2;WZy!`9f}YZcQHiw?T4l)HDS@u9Nb=?3kIwlZU?cWV1Il7ay30 zmlp8i6X(46L!Y$V+&vwaicQDP4Bni7sy8qB?alQ%r|0ma={ZZb3>@7(10Ua(fz!Rg z-+6twOhX@TKE#Jz7W#1goj%;*hz~bJriGouea45w#fKkk^WpvTeAs=k58ub-1WL})qdfTJ zVRzo~(TywScjIY|TzTmT7mi+?jH6@z(Y51$D94Xq6rS%V-Kh4RMs@m1kx`%N(%6r* zebRgKpYWDw_-o1&{*vxCd`>YXo>G|CWAeKHfYNTcN6q`+p}$3LQRe5@Y5&5j^r`Y? zdhp@`&6#wL{QZ+?$?nsXjZV_`gyYn^+EMCxL}-5*Cxx#+O!Lwoq}q}DXyujNw7$qr z>OXE9rN6wH7U$eZxjV0;-D_4;*ZV7|R_0|isK#RY6uE$IET2ndkIkYyk7v+^e^cpJ zrWi^Lm`KyHZd0VvXc{|vIBi`JNx$dzqnT5B(u^UU$+vx5s%)B5SmuV5`*|%&y{iiO zM;MAOR*tGYD^By~6{e}C0M&Syi(Zb*N;7N zop9`gV<#M2IJR(X;n>2lg<}iH7LF|(TR66GY~k3#v4vv`#{$O!#{$O!#{$O!#{$O! z#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O! z#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O!#{$O+#|p;^ z#|p;^#|p;^$Nyyq94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v z94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v z94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0v94j0P919!^919!^ z919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^ z919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^919!^ z919!^919!^919!^919!^919!^919!^919!^919!^99uZHaBSh&!m)*83&$3YEgV}o zws36W*ut@eV++Uje~zyw+Y`DY*}SkrqjESd9UtweerAuudH$IrTez3G@}_{9(T_~c zPc4i`?xwIVbv2@8@|adyN1(y|D-l@hnXR!$axnnTj;Na1AOstsXsm(}ZT$ZB4Yj zBfU-6gO>K|OSLaV(63^{sY#8|v}MRRa@#bKTHTDHU7ph^RfU<ggmV7 z;lfuZB;#1uWSsHCU)q1+4;5ekn|Ad7MJ?t*#{+MpZJ*1-#?^B^<_b9;k4n6X}MG*yV z(62n#Xm;i+)Xe=7b$)T4t|gqMQ%e%5Y4~YcSo|by{&<`=Y(7dG+9(}QX({X0IEt%z zh)z7%PYnm`rO4mAsOG30^z_eG>KL(^t~}j9O&YGFpZizSt;{RQYrryEm#~d^UBY zjr&^DnL|x!`M&y8Yg0}7Jhw8*5C>gpQkI$p7N_uUK~yR}KiwRbi$;{lN);bxpxV>Z zkVS3|UHZ>lJpIv}dG*9>NOr?)PnBr=Q&>~w$4=Ao{u;9?ZocU_J;vh=y7-$aV^)_qPy>x7RxX1DF`v^yYm+2j)!ZU?cdpa%tTzm=V#tyO0C&^AYHy{4v zoKiiT%`T;Ey2=ggw@bb3wld@FQ_oqp=cDD;MYr0>)ral$m{T@G;Ml;ifnx*5296CJ|2M9lmbf;we_%}5_YCPB(cXO>ZWGrzPK>|es8Z3@ ztknKaAxnYcDbOz#|LP0l+v%(*2` z%&68MOfkPd=0k6HI#tJ$Rt0<0P_InX=uLJCO7bW7tp(}!)L`1vs{~D}U5>gGHsnZC zg)+UaN$<|orxTl-(uygqX>fQ)YEY{?1qAgWm(+vk=F1_p>vS|#xiW@|A0JOS_f8_u zwNvQpoayv%+)O$*Xbv6jJfGG#UPQAiFQri>S5U8ftEqX$byV4XBNh9+nQ}bdMyW3E zq%TMJ(1YFk>Fnx5bZB-wts5&eYv56e?vy~`jZRUsN{Ljd#5pRG_agcET%nY1*Xiu| zn-my-n=Z!RrIj=8)2cELY52iMHGMhonlkl&P3g0|rh406QOT08 zDA%T!)YBV#+TC7YEb*M~KYvE=eV);WqE9J^p3q}{Ol?a)q8m9L(wy)2DKX(59iDWT z7CG+Fs^_;T`NW&_H|q^LwDKDD$$6D(#$Kk8-!4*_#usSyx^wjIZ4y-tPNYj6PLp%$ zNh-E2ff^?qqdHfPP~}@f?XEjX6XPiMz#)3K_yD;@?W5N<_R!gkyXe%l?UZcxR+?FL zGcEhLfvV15N9#+jp@*kel26O!)Z*b1I^Ss#&A&UJPBolMV`UaSC>TrsCQYNSk7DR? z*-7+Z;&@7RVGKoN97WM5Z@=9H4q9b?44rFcM@%vkPez)}(_5QBM@6&vT0t|Tm6s_x_lYC6 z<_?E*M}(taw5OwA_V1x3#w?6~ex|Im|Fl`ow^vR&+dBR^uRY9W!%LO69p*K#{)=8lJLCOI))fvOcjH+&|eFLy}3cfhpudpS04jm#@qV z_mk*edF6edKshs@n0$#UEA>YjvE!>sf$4Rm=lsUfXJsqNyrrX5-`_)?AL%DI&qj&I zt&y_s6MnC7!EqcM$HTD`j-7DqgkvWhTR66GY~k3#v4vv`#}!&Y7P1h1yoV1KG^-nVQL6DtG^BA74NIFur*|h(rYeb)?C2TF9dd>|CZ48JXHQX4 z_fyn6;3TzkoS^=d5-4@K;}o0g7`6F!gpx$5)NrAf0hTJ9j;BFw;^_V(%q!a*q+VzC zQ^w$Z^moi2>Y22Q=A_w42WUH`?YWiCPTEXGmTsi3>(|p-?6v1!wwjVpSxJ-oET^m0 zmy%z$#WdmJ0^(KkXmE=;)HvlV`nzogEvqw)+B}Y-_5&x;*`MPnap+iz{xpg*o*qvA z%cH1Q*8x;6cW*kA*p0gM>PS`pwWb~8o6)0x4Jdn`+BE-sRhnIpsph~6^lf)3y7Hn3 zXpuQleKP1Devs>w-%lTo@XpH0c5&rH)cx6JjY7~i!&VkV#3 zYlh_AXlnOaV%l$-VKQDHW5&D-H&c8Xn$Lb^O}{ib&9;Yb#?mE6o2knjK8+eVBD!7- ztCM}zq3ZtE<3|>0=FDDir86M-nzPY@;Ml;ifnx*5296CJ8#p#_Y~YySnBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV; znBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nBbV;nEvN@ zUXzib-##=7yBBjeY+~y&j%5R*9j%V9c04if z-`w%;XjWw!WJ;zTYci*wYJU36HFq*CGm>?^S?#yOOvrW6^vH9>)Xsm#6e)DsWC*%r zz63ut*NVO~he~`m^%f^1@$;bK%Tm$mTxrR7Wd@p%Co_Ftot=8*&rLVi<)hk#0w{iC zFck?ZPAj&QrVJq!XzX@FUy4?yuDhz!^%8Zc#@+^WxO7u0e4r&QE7y+F9qvS^rtHlK~*abr2|Jt(%e;}sm`OZlsw-AvYjW<;I%PS^yyT3QD6qG>Nb;FZJ13y zFXm#8Z~=|)xro9xFQKommeF>sfp_n{nsRSjOV!%1r=5E@(1X((DPq$mD%5H-oqDjD zQdQhSw?}TFy^FTchWT4)RMZwqs<4GVKG{qmQJ6oz-$d0LZK5nIHqy&$8))LM^|UGV zdU8p%j*5I5%j#&5Q<(th@!vt zr6v`^DZEct3XJVQUstrI;wzhxV`f8&>R*Q{RIE-9+$z!0ePL9pRau()p*Uq58cfB$ z6r>!T@=)+GKiZZhGree^o&uJprrk*%wBpG>Gdb&Lv$4!`Gl6cKH6_oR@fnYqjraGP zX$v=-t!B9yeQA~%+hl^NJ9VHrQLCkC@{-K;?gdSelU`;}hUbnYE%rFZcN^u{G%Smw z+y_Tk_U&EbBYxy@o*Xp6IcCXW=l|Z%|9Ac0KY@_;n?f(#ZygrY?MPUSIq4l!8dP)4 zygtfN$7h9OioZHiUVY>k?BQo6YvQ`%Uu^M~%zWbEdz&WoGz3H{GXxHW_9nqp>fO)9wadbmzDaRjQheie1c2 zf4UbWSI-byzo#TM>0W_GT!)Z&J0hIpNU`l)w zO^yL$sr|bNl&en+)pbhRhfsFQ~8eDUIUOVV*P+HL*$$@@IKcyvDQ=3Rg*EiK4}3I(wD&OrWBrZAt63+5%J2sf6Z{N7QVdmk>r zM@yCBw_8i|^Sotw>-=(DCRGIBnn7_U8(32Jrv=8~p!1`M)Os$8m5R z56AIv?1W<{96RCI3CB)2cEYiRV++R?jx8KpIJR(X;n>2lg<}iH7LF|(TR66GY~k3# zv4vxSV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCV zV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCVV}WCV zV}WCVV}WCVV}WCZV})adV})adV})adV})adV})adV})adV})adV})adV})adV})ad zV})adV})adV})adV})adV})adV})adV})adV})adV})adV})adV})adV})adV})ad zV})adV})adV})adV})adV})adV})adV})adV})adV})adV})adV})adV})adV})ad zV})adV})adV})adV})adV})adWBuQ|86MlMLgVig+H%?HP}h$I!miKh9(FDJ+^~vo z_J(b7UJc6}`z=h1rg20r^m7z;4R$=}8RnROsg9#|Xj_NN+CGl`UeS)yhbKB_zMJ8A zm3M)oYV#G2l~Xr3u=D0<`sILQXMj4=v_0kMI_rW%6K**Cem!tR7J2Eo(CMS2@PeO? zN?lz{|4|-h*`ieD%pNcE<7|3U;DxVgkU#&Hd`5%(o_EOxjN4O#XfoOob7XOv9KM(_{Wr6TN1-nYT06 zY<12uN6*YLH?Pk#i$^RrP1dY5H{v#!o3nP9w;l&g>wjv}^*LkaRlZ{WMcp$&|6Z83 z7eAX9FaMcgD?Mme&(xGuH$6FPWv0#Ta?qnmdFj^i01EOgO5-9*)2=6>G`Djl%KM=P zb&RP`-HSA%R`=Ra&xM`oV(W0aU1$JZ|2>#;Uyr8LZN`$%g$Z=8S_}p0G};$Bllaga ziY>Q*dLCL#^Fo)?1G|bcS6)ZSPi>@;b+=NZ%R8u9%RSWo{(jV?AEsNcob<2X5t{tv zI5iz|iuiXT1&lmTul`=5?jx_!=-)TVcgS5z`S}5b_I^V19zUm_EnZXYq<1vX@rjyj z{Yp2o{-oz)|IpruWSqBgheBx0`{xaQ@XO&IEv+jHGn+fT7VNiOW zdNu<)`)1^V=`wNrmdrf3dRDIgFdK&r_2bgsIl0uDTwJWQKj%XaUh>BI_}snxJhVqa z{`Krq_2A_XilY1Ab&26LV@UpnN z9Qdyu|8X>6-{B3p&%Q?N@wPEv$`$J6f>c&6a#WS!+I%uMOX-(w4nC zx8t_a?fK074%}#KNB(3x@rHAqx!>(BT>WWRF7l=u=lRf`bAIf>dEfWskXPaCK)%+y z-kUp|?!)~L_T}ic{dnT^{v0!C0FQ4rkR!_s;x-v0xa`{q_BbBN#}-8KfF6VKjEC?N zm!X{T)KDHgZ5W@ZJN&;qhU3F<90$j7a2yZE@o*du$MJ9+56AIv?1W<{96RCI3CB)2 zcEYg}j-7DqgkvWhJK@*~$4)qQ!m$&Mop9`gV<#Lt;n)esPB?bLu@jCh99uZHaBSh& z!m)*83&*I@$YbHy!m)*83&$3YEgV}ows36W*ut@eV++R?jx8KpIJR(X;n>2lg<}iH z7LF|(TR66GY~k3#v4vv`#}SJIRC=pK8i+tSME0 zjw#-5feFDHdeDd!rqI+iCg0)>#(&cmljGnHlkMallj+(4~Trt1u-Z0`M&SL)&7LC;dAK+ke2dYm~mbb|LBMPAMO173NF3MrEP|9GDi*KZC zd$-V&vO8!|{BHVDaX+0nd5BKdbyC7*r710s)8l*SH|(59v!9=%so|HX0|oc?l~R5_KG^!c}IujBdsm=l~S(wK?$k

1r#xbGVmc3tkqp7lI< z#Gm9`ds9m8)5Mdn{7ubK>%6#n?R4xw-$1=t8Mt5ZjJ)8yFUR%D%$JjA<*UoG@u8CF z&pVle-CE}27BA2%7@3DdUC{47IX^#4SCF^NF2w151KD#<5XX84^Rk!_4stEZC8CP) z_7}x@VT+RdDxnmwC{~7dEiB7^|H|?84i)%;hH{Bq4n8%+@X`y!n+sIrcY`Z&r<0Z0 z!?!A*Yg>(-YpZk8iyHhRuoj2)sLk`&)M2+fb$LXF`s`Ar0Z$v$kaMhT#M@3c=FksK zI3Z&*u3oA+OT!i%*t;c<8`p{x7qsSo8{2TU18v#wXghW})1FVA>%jdkbmTARI&sr8 zojLYs7hZj!D^K0jjcY9I&Ns*P;0nEaa_@%W+`D8iF6YymFTU-~rB3wW4omuS(?0z; zeOP~zI6r_N%^k=)nhj#g6v4ADMDUu)ksRfS;#BXWxYe@3+_T0IF8g5!ADcUr zy~+$@pDV-oYTx19G{p#>xp)LmE2lg<}iH7LF|( zTR66GY~k3#v4vv`#}2lg<}iH7LF|(TR66GY~k3#v4vxSV}WCVWArSi5;zt(7C06-7C06-7C06- z7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06- z7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06-7C06- z7C06-RybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBT zRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBT zRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBT zRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBTRybBT zRybBTRybBTRybBTRybBT{x6R=A1xH>xjIc)iOsXayqkRriy9r^=-RA>WAnUmj#Z_M9YQj%xE z)YKziS~}^Uo(km3NaOu7)8nk!s8Z$}v?^n6a>{`h+rAX-89~LK4W;nwOs=`75qFQJ9S3JqIiLCT zY~UigdvhtZthSOiqF=gD$OfvielrcuyPYbp+C`=E?xTlm57Lu>ctRN!O(=Pch8;RV zk#vTNqJF+pt&4Q=+!YFIbc1?dxlMsh?-N~lL?atMqrFKlX@AwX)FS=^4J!79?yUMw z{nP)Z)Fb}Uu>mgJ3B7TRuDbKs@yYpFapddWRGfNJYTjAIiyK`@%Z~?pa|`qUu080( z&ZfS+;6o-3jLE`1a%bb3d$RNKDmnPV<(%BP1A6aY`SYIvdHK_~d|Yp60rvb^kh4bx z@Q4qAT(C!B_PHC(0L}u%;5htz*4Y)_fM*JYSG5c0-!Y<94^6IY5_$Stg-G;Z|!=qcWKXSJC*w*}J zWE*aPb>ddN+HtD3?YU*`4qT^9N4}r46K8Pi%nu)R<~rJi8!zt4U;20BB2~Jxt8Whu ze}H>aPj=}S&V`Hj;#co`v13bb&eypQFU``IH=OFrRipZGgnxf7eX>7K3?IPZsR#1C z^#l0{4dP)}2l49e5#0J)1g{?x$>Thu_~Y~_{*-PoPnb5Cx271v-9`@Ky~yAN9fq>^ z*`b^RIecWvF#hv*7@u!5oI?)|XHGwYA9NYPe)~u8C-;%uz1~P3Hg6;c-yHd09>Z}Q z9LK|PJRHZvu@jD+aO{L*CmcKB*a^o@ICjFZg<}iH7LF|(TR66GY~k3#v4vv`#}2lz_Gxwz_Gxw zz_Gxwz_Gxwz_Gxwz_GwF0y{w9Sm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RT zSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RT zSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RTSm0RUSm9XVSm9XVSm9XV zSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XV zSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XV zSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XV zSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XVSm9XV zSm9XVSm9XVSm9XVSm9XVSm9XV_`f{P(Xmx%{DsnCSH9S=`vEx}SDUwXJgqd%(V^8A z$L#@U9L-{0I$oT2H|ttwG@YO4HU5K2n0x;!nk7>jnwFV6n#`;Fo1XJWnHp^-nL>GE zO^O%u&691*&9OnKbtu2xj83-Sv`=svGd02FY)mM>&G&QnF%n zyG<@P$-RswYlbGFjq-#aKFd=E9czn}WI zI7~IKIw@DvBUJVJaVpgM6qR|9NZ-4jr*E$>(WwE~$m{z}>N)%_P5t+P@{fK(ZC zl_Ou1%g=XoX}~AC^ZYBlYW0(n&itW(CCPY0ee~P8xpRs=9&9_N;I8RX@r#3=T&`mp zu97k>C)<*aTh>X>RbOY|{n&e5nI{u}JD8cb)y>Lr_p@=vetx{|cMcwfb8n)(aE8sy zJp3d>KCUn`KQBpBkX^D#KkL5HI(u$O9Tz;#-p{^Ob{DxZSg=9Fe{{ zXDnNT%eAV>4~Eua&-t}^*Y-Mmo-!)*buMK(Fk4D_$M`K?1r3trv z+my#VYR2iVG-vmtEx7&GmYio!E3OgMnlCqP!&cU%GyYkL`UD>BwH@@i8o!`ZE=N=tw@EcJa|0If6yp3Yt&rzKFYt(;v z499VB91q9waO{L*CmcKB*a^o@ICjFZg<}iH7LF|(TR66GY~k3#v4vv`#}P=vyQVjqaFChn|`(x!;>g6Mvwu!G+2OCa1A?Jn3v~I?7f# zBX#+fg?4PpLBHDMp=#+1(yU`abSJ7P6$&av1Mik2JJUg)RVq=F@6~9<=GyeSZ38Nk zp(%|y-jb3Ax1%hDJ5%R--D&%*-t@D|0IKpMl4fohMsJIZqB)btP`O9r=w9eV8aHn; z1-_p`=W9%-h}E&^;hII#bS`z>F`v?SE~1^CmQb_!W%MW0N?O%#HB~>omfqyuK(j|| zqH@=^(Aw48Y0BnZROs?v`t5U&99`n*(@{%7!AEKNvIH8L=`_`xokYidF3|mXmuXk_ z>(qbcE$UV99;Mj&koFaOO278Jpl+q#(CoeMDPQr=bZqlCDv;+Fxz7DdX!Ar<9X|Rf6sURK5xygx_@kI*WOjL#^ueJ zSFc4gvF?enn1bJ}R$$sf91&_JhD~fGjxTI2mhEXPUNdbkf~UEt8`M$k`M8s~NZmzT z65CbmX46f)u5=d_uBUkBK`-%7uRdb+rM}{+`u#=k^#jB)X#>UiBL<6~J%)&%zYY~= z*$fx^Tpc0KFUEW0k)y@TeIT}qwG{6fT8sO}aN@naL~I_R5~V_3^lL4M>1t7g)nmkr zlVe4%02|RS!B%|p)=m`4?Zp=j#*3#~Ob|DlPZaldnIwjGoh&x)>>wU$J4NhnJXL)E zcd8gyGEGd#nl65lW{7=m&lHdBoh8=C9rxy-IpPe1xniHxxnle4^Ta+&=Zn*OE)Z{g zSRfkQStuUH?(Jo?SS*ZQEXJ(+_##Pwpgi|fVsSL?+yTp|a;!s+b;z*} zIo2V^I^ycwUa;!&=^~kXvIo2b`dgNG-9P5!|J#wr^j`hf~ z9y!({$9m*gj~wffV?A=LM~?N#F@A>YydF8$BgcB=SdSd*kz+k_tVfRZ$gv(d)+5Jy zycwUa;!&=^~kXvIo2b`dgNG-9P5!|J#wr^j`hf~9y!({$9m*gj~wffV?A=L zM~?N#u^u_rBgcB=SdSbl$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQz zv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e z94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$T5DQu)TsDE6A~e94pANf*dQz zv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e z94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c z$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pAN zf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSPE6A~e94pANf*dQzv4R{c$gzSP zE6A~e9RE)r`#ir&qJ#IVQXe+ti)Sw6U0ts6r<3FO>mADY8IjG^^A`0`J2tdZuL^ch zJIz|5-uid9nx-F9>wNG%Czo651P4ELQuhe;(<-exHTj7;{dR^rYwKI}E4$C?ybcxW zci;c1w-gu(&J&GU(vEZq~@piL?lWq$H_Yz0p^sH6FS>1Z!T)!>C`3pOQ3xD_EXWLxyGkJ%E zr=5=q&(iU;W5-Sj&&e4f^~X8kg%5t0$%4y5dW&m9di)I`!{xS+IpnU8_3^%tbg3G85-_ZY2s2 zTZ_T=ZAFil?ZkrR=Hl6}9mI26I*B#Y`zG=kVASrqHC#)yG$#){nwY{V7$w&JE-J8^8f zy;u-8PPC60FYdfGLELm`qBv~jB$1ApEShz45QqJ65Ie?95!2306=%$uCf@5fUDV}H z7Xz-$5LZo^DgLZKOPuy#mUwW{Z1IHQ9C4-Z9MRlvuIQ6LS8Tp>-v9K>*8OX&!(@@W zyD^3Hxz&f%9$HKKMP4E^{i4W$8!w6fncw7#-dN>o-C5P<;UHDOAYLW-I;gTNmZ;WW z-=s1ya#f|+9amjGbWY`{zp5JW*Hcwx<*(B32v?noh*r%mQ&imsC96u-rmLcSa#U^~ z-m4~Bl&IP+DpwU;t5MA^smDuA8}Z%x8u3T%O!$%&E%|AO+VW9%JMbO#U3j;*-T9*0 z-u%Rl1Nd;OA$;4pBltZ#fG@bn@pchvJ}7N0Z&qf{?=YFf=M9|7ixX$^K5OUl#>W@( zTYMb(?6?&?`LKq+TW(-VA3 z`&0bCKBxI%aF$afz>SzQRZEy2g*$e1l)T;1*wKeTQFVdY3=)`W~Nt z&5KW+>cdz3^W{Um{Q0$G0{MNff_U52A$+USP=5I4aQ@4e2mHPz5BYx2Bl&*th>yJf znE&@XnuqBU@9snS{#9DuO4RZB$Mn2SGJf@UlUV-Qm^gm;?sz^sFoDm?N#rLqO5&#s zPUinjPvJLgd&)mN|BNpVe$IDENafe%zTn?|OXJV}P3PT>GWk!fv-q%1+5E3=FZq<7 zuXyWTuleaca`9|{EiQQmCvWg7x0b4-}CdXf8f*H zKJvR)7xMEb7V&!red3Fo7xO+}i+NqrXWq*D3*YHL3BP&HSDqXEjX%=hJ3s5?cRtL& zl)vRt#&@6agSTz}lW$)5liwd!&fj*a;GM@-^7Wck@l(>O_@$SB@!b61{Krmz_{DGk z@S(S>`MgCnd~wHG{^hG$KJ?;We(RKfe6OZ;{8Oclk0b`_`d1Cq&wm=IJ5H{rF7c_S zRvXk;o6oDS-Wy(Dy}D@w^&7_q>ikCy)LTpp)gCJh)uP5web}_2dj5)r>iEYE)rrQ9 z)Jq&2sXZbZsTVeCtd5zBpSucZtX^5$SnV~riTcp(ChGd7P1HTrP1VI`nyRP0ZK_@} z$VlCCuaSB~vXOdo`)2A+tD32|Ml@498yKrCCL61FUo%#_78aC;9)bd_4 z{GLKH^>L$?>PFLAs>LfU)s}Bss=s${rCz$GmHJ|EEA`Qzt<;v#S{=Bjwfdc|wK}Uo z8}(`XHtHs)+NkZGxA~v%VL^^H$gzYROUN-rjwy0XkzyTp|a;!s+b;z*}Io2V^ zI^5yX`a;!s+b;z*}Io2V^I^G4sdFA2WZ<{4w*#%pWs<%--i>?{l&Dx!C($?0qiwJ{NnR zi@jgX-mm`udcXSd3|nE1m96k`h>Z|5OAw}2gWy(gpm64ACn0HDGohJ#nYy9l6ZO;T zJL)27i+XEfUv)zCbKdtI;T;}oIiG0_!((Ii=3bu zV>U*0-La3VO=K+z^1nhnEBv`Zi|VA}akuFA+l4geOjGT|9cJ3ISB)}BN3Qd^in z!wpZ<$M~CaW)y|cYs>v~RX>@gytJaduaBkMO6P$6~Bkp{Y58Q;+4aw^Z?Z~3P zeaQOTqe z(~WD$ulXy8@y#V9)owls-87Sw^>ZLz$Lz_yYXUjG$%;trhLd%7`jeY2dyt`fJCe%6 zR>WtnF)_?DBnjWDx#f$$asyhwlXB!^_j92)@y$BxAxK%SNe%2)Noz9JN;S(jNEd52 zOMmZflg#Grl5X_gEBQ8dm9m;0luWuGmX=RCDy6v}mwFbSlqzSRmL7jND|x%0m-N#w zNxIQjrS4WYq;B(XOOLMImCpV0lDatiNsMF0G2{4u2giQc$Ns}@ghxvSp=k$8VGCNF@IKXa=olM1 z@m(4hQs``cPiPo zYYM?{;31{;97qyBnRMPYi5Q1YBxgTPAlu9*kfjdeNyedZ#9U)f=KQcDNBh|k=Vi7; zzF|WOGRKmB&Bqem*fGRsr%0%;K;qKXWce?i^l!}*k3K5Wcqk#?2XMsK+?t&IWl2oq zfLz@-nuK*9MZSfPAPxHuCr?idCBt6~CbMb>lC8}LkTN`rg?YWn>T5kou~m1Hq3udW z_UcSVZ0aUlnzxE0Ys+<*F*am+Yo9RK%oyv*8Gh_tp5=wU%{*kLK` z_%u)$F}sT(Ep9Fpt*li0b$hN(+w7x$cM~kuw9b(DG@qvW_sLf^`@&Nd{gSQf zxhF|Qj{B*;HFQ?({AQ?nb@mFkZ%rs|dVQGIF2-4V_n*7g{Gf-ns^4*Kz0{Rj`w{)L z<3GgEQL&-Y#DDhWS)Y8epX;jn*^Q{kY+F_2L{8OWsD&!bs+7E&?@qq;JkFg z@muRh)m*eTEfn(^f1=jTGrs1^zw*GYPmhN=crVh04_LlV9aU)t3TAwz4Qz`vDS0p`l zdnq+|_(VE9KqF;*36yS@-;#pt&PnY)9+X@zub19dPLn!*u#{TNw~$87ZX!+bD$o?1 zi_kn+byPETQ-P7glJ*?GHj*QzdZx4YfqX7uvrG7N&bWf2cK2ST}_ zH)6T0)MRc^P8v5zdc_$!zvIg57jbKxzH;5de{w@IesiUn|F~TX>tp{nBs0vLkOrTO z$g;rZM7z2LfzGW+;?p*y$IkX7s#8aDM%$Upn`c3$m3Aj1_xB<;hJDG11N}+=uLDV< z{Sean%rJ6UGlJMZ8AW`JhkLBiGFU4>dtQ(;f98uj7YO!ch6A?m`m$JKYf+N&SMHB>Jt zxy*Cz(p9xHrfK`G?I-`9xKsYL>74vcxFj#lc9;L2+a|yF7v)j28_RVSm$ln3UZWvX zO32QfqpCm~jq1E{rfT8*EY+$(it5(a8>;BkS*mZ-QpwaQUo|rpDyi(Q z@@ic}vb=7QZq|8RpIS+>+^lR)E>Vm&|^yca2^zG^}$$5scWby8x zwds2|Zlqf-_p3`=vhOPXefwn&Y4mq9`R#O&bjUnGR-~UHUS~YW`BCRd_^)&1Oye`e z_UZ{TBkvHgZR$ekfptXVJdZ?;wkL~QjU;;~TaZuPj0u0blpC0x!2R5Oo0~Om17|#^ zEBEldk2M_WV)d%|HceNHdQ!%?F;e^CuF|sbTT<5TXvwEKLu&KntF*mQeY&sMn7%h@ zN9!H1p!m%?wC~JeG&bInHomT=dtcg8@52*miSJZ;#eNn|UOSIEw_ZeNPIjb!%$HN? zz)Bi(Xf8rVG z=-skabkgFL)Zc44ot3bZhQ%+T*@k~ms+&QLrca|+Pdd=$eiP}v zfN`|bIa@kt&KUaaFHeuTarDePpt{y0X<+A}^lsGv8h@h?ZQZm7-DTZ{77Z|`j;~wM zW@-~Ub6yi#(z8BIRsE9gpZF~8=$9*9?(kIdT0kYQuYpph)az2uMkl1qfg7ZU#`aP_ z+Ew~~>$_&f;~N_D>gF07w@S;RZOQGfT}jH1-egp#!DPj`kz}r5O>TBklMP+QlAIBC zqnB(w%wfUd}2aM zdN=r=K4u&1D& z&ngDWx}l%6&-<>`Cf*8>T1RzO)qi(E^=PxAYFm}98W)|V`gvWidJ%R*)jfQU%Bs_A zax{6d<*K?w+J3sF*0Xwt)oH8FHff`3=9xn}tl3Sz2aMyaTba^>qi@l?`33aOwZ__p z&0A{C(oD7A_4TyNrf1OP{ugP5*q$brE{eWr)s)!xaU=M>=cL806jJpllbHG@6aP`+ zq{W2O`svCxn1mBMKDwg{ToKAe_K z2%sU4?$E}@=jpLhHyTy9gx^$a@dp*{Z*6sI@yBoRf^NM)uS z*?5d2H;jgot806bQ6cS#QGqe>Xj+e~T=<1c^-Je2)RVYL8}D<&Do=4Pqt|ls6KigU zuQ3-BthMgtWNU4Zb=1;)N_S0ag8+?%Sv_f~JWxt%I9D3I#Z7AAds({f87K{&r-ML#ua~*8yw%AE@AD*k>`uVi+ z8As|obQN`&u%3oxY@(myoayeiJ1MX1rcUx6>eYB3ZFprL9ld%V-R``XdOmWYc0+g3 z;GAu=@Zn~fqFqm0Rj#Cq7c8Npe$Js5bPByk?dY`Mgl23TPD?HO&@+=d(kG8i=(}U} z==qbMq_s84lIi|nNjLPY)NGBT6g;b&-=HGv!H*)t<*X>oQGOZpEX;+;z({ zT*{aruGBt>8_=eZ8@scPbMI_S8eeHghM9IJuQv=J(v1-$;h;5HVImMZz=oJ4+mXr~ zd!kIYCvkmk$-Oxux$%Z08FVDMR5O5_^6ExDCA1+YcQhduSIfERqzuj=A)FhQbDG=o zcq%u1W&_T9+%)U|?8l5_#xdjgzn^1o-d4z8Ya@)FE(#MbSqpu0hYCS^dkXVEw-JWl zYbemw#p)EtIQ89Wp6cQoF6y{Dmg)x4zxgIJ&+uk-sVe`vzS=gc%;hh3t7Ny6C*+lv zJ><`8Ps{dY`{jDtneyEI-Q@V4nc55^2krGE*EH20y;XV9msFZPF{-kXSE@h8IjSi8 zC#s2^d{t9CwyS<;nW~lzHzyrV?xtO&wpv}zLT#AiQSBtF^IFs29@@3Xk7_SlE!Q@v z>#tq8IfFJD)S4Q0{mQ+n2q)R3sjARnfXdB9rK(+^R=H;mQ*AD6s|qiDPj(JENooy) zxi@2(h)=zl_F}_E+MNxHXnwyiI>uxPz23rCYN(mdIRuX+ z?rvvD^w5XIz~~7%?e&!Cuf&laP7lbnJ6FjPGiS2>&|sq9`kXT^H{jkhY9YyE5~UtC zo#}~xQ|L3hJ=A#XS-N@CEjm8hmu_o`@fmgElUL4J1_L5lSv*)((tx$CrmtlPbc%v!LStZcWNY@g>! zqOKhx3{rJf>VjZP8z;soj0`#5oGew17}afryb zTuG1XE+lD`Gnv|EJtHLU`>3*FXtl!05JxVyEi*?qGT#i`x*!jRR=jS4g(Vk+B?~b98_WOG2(x?lP z!K+ZIvpz-2dtM|h-D*I;lo`{?^!9XOlb$rnbueu<(2{c10zHsBjxNzpq3^EEqBrNw zr|CzS`~sKs z^f9+NHIF;~=pWbD#FVs{)R{Ca>_-vj7Am`le$*LJ+ z2nn_(Y5qgWi={nDW>p(9>Ow=ZKck47vQEbpY`nw``8b{Xpf0nH%zkTS)Xz<$PwgnJ z-?2?fz2qesIXsn`#*|4Df}7CFgtk;fXYijsW*jq)8OQ(q9M2bQh4j5P!aFZfaNW!a zbo+3D81xb5Z!#A;4L1^6A1PN)64TVXuZO6woN!k+E19G2JFBC5h(i(|a(@Qz^?8zN z%FOH9Rci;y4)-?7n~tB5yO*7jC%BxD+lTJJ@8z8=x479!?k1;d8`ezL`n~3)(!QNk znJ>?&HdW*A)$(qZs?B|kYEs+V`iwY{5a zFPGHPOjR~btGP>$&mK=z%}z@LAFSn`J?}#fT{=KI>O9H0KT)JzuUN8+PayT274p1c z6uHBBlQomhk;M_4$+$E^IJpt2HS*+)ZlqfG%HiJRg-A?w~e&; zm=Few+4_eCW;b!L)u+I9(n1kp7K|r1pIx>6VTWH2y&-wKoo=bM3sT+y2{> zq+O;3^Ul!2!;jEUpLWod>zC2ej+5z@9zYlSThQm}O=kvuf`c~Q%?}Bst)EP)xn}ib@1|D9mJig zgWg-~;P0$D7!P$|)V2=B{`?0&Q~rVZ{eKX>^B<&-{|770|ABMSU-%pL7Yuj*h2#8R zaIg0lTEx^sYu8%1$<=~SMGcG&tbs{OYT$E=8fc-f2E9`?{B2teiSi$?Sn&sp8vlVK zzQ5s;?Qh@;e!*^+Uoge&7bxCUkOoz7Ah8lsXI4VYy9$`UrUI^iFNZ^0%b|V6Pgv;u z6Q-8^fU0#rVETtLST(NI||k31+g$b-IHbK(8R959`i13u4R!yVh# zP_BIiH{lg{Ja`Gu2ET;yLD^t1ARE30WI>aGSui;y6CMrA1TiWDint8$j7bNl3F%;) zkp{gNr9rde7f|Z_0-hVZfcs}sVSVRR=ob7O6xDOEPkjcVOP@hS#Zzc`;wd!h@)S}c zQee@96o@WJhS$53LDMoB<_0A}s9h3h3ZB5ueNQ0Y{0aPsN(8@|iSX}d0{rkufPKRf z;C5O(IBbmv_g3*REiw*n&WVH7)v*wAD;741vEWr00~1chz~Uh>kdvu^)Q8Zk5@EFMA$57SuG02gRz+>qn*wp+H z%!-Zzab*;YFpGjd+DPcKHWJ#ji3B6%A^dZC2&L^GLT+3H#B7d$`<)_S|C0wWb=w2z zXz>8@Qo`ZV&Tz2l5e{FThrtn-Flg6149_4GI_(REbA3ahCL;tE9SDKQ{vpsPI~b<8 z1%t=HV0iX22pSv;f>DEmVEOAnICD4<9uEnGf}8+ob~FHBSOBcb^M@P9{2^<&KeT)6 z2g^_RLG(yJ=#=jZr%(BUDfq&@_dYQGj1M%j_JQ0&Z%Fd+hIExTRDbpY@v;{LjPU{} zy$_6I#xdjge?P}_v5y!3zkM8!ecT88_#gK11ngsD>||F+kCU*E7h@m0U>|ojtAPpF#|<`AgMGVd=%D)p z`>>C1iN@vB_!up!0xpbkXBj_na<^K zr1B>eZu<$pe*6I0=?9R)GH_miot#<Lcv^@BvCyA0YhtdkFkf z015L8;D}(=-ML|7R1NE`_(ZJW)uUl z!3wmWra(Z29(G*S!yclChxt0_dPE1&{dC~^T!zigG8}6!L#$Q{-Ir_OZWAr2u#c-| zP)PbC!NWTeMA}M_Qlfzh4-E{q)WGhy(U5*P8mtFILrB_V7_{p#Jnr%sCdEI3&+8w- z*_Mw$g z+s69A?{B^kdDRy}#`;3xcOO`I-3Qv*`9Q0m-Z1C3H+-Dn4fK~66nJ^T?CD<6(ZCBv zhTMny3-80aX8-p-W*jq)8OQ&7IDUouaRT;nKkQ?J*>x}(``8Tocp~=kU+m*P*vBc@ z$HmykZrI0{u#bCVACJO5j>SGs$39+)eY_3(*of3Z@5&l@f_+?$ee8>UT!eibfqmS( zeKq{XKHk0(cjG31V1WN`SY!Vi_z%AzZ0|47wfY4c{Hj1}T?Jvul`wv8C7k+D0Xv*3 zz@)q!X74PADSv)K&90xYr0NIk+42Ksd?|y^j%6TbmBK>%Qcykm4#hpc!;*8~!28cv z&^msFfcO&F)VBm$ocID~K7EGoV?V?2+r_ZB>Jx0A^a=KO7Qxn^g}~VrLdwmLP+9o_ zu1>>*zK1ua?{Q}=fY;giutt>+CEo8~YKwPpbnjbm{qhEe%zpz%Q}SRJp9jAm3wyZ9OO z-TMqCwRi^2y`RDg>!;xGA_d}Cr9kZ8WU#%I40eN(K_8z4VT+T%?DrFR=kWyE4txS( z35gK2G!YE{CSYGA03DeCt#L=zY>0;-qj)Ivjf3U3anSukELdP4&+i%w&$ThIYhet` zsli{-dR+lOD+SuV#$Rv0OApTN^|1J{4mQu#f$wh_46ezr*GdNS94%zIXd$el7W^p% zc@c%Oe-iMX68MM`^!uQJ#KRgm&|3r3o|(z%v9o*oQ#-@4+zlQZT$B!QlHb z2%_DCpv|Zt(7p+T2S)?pH}1k`UkAXRLjgbs1;FfA{xJTKKU^K`54JgeF#V_>D8v2W z;yYh>>h25ct$krzu@4kq_JJf@A26@EH!lpWcU( z1NUL|i2Jbg(>=(!aqs`SAOD{|W*jq)8OQ(K952EB_%-(N6YS#w*vAd9kEdfFx5GZ3 zfqmTM=|32SeVm7V{15y12KI3z_VHxw;}zJ)HQ2|k>i>mM?BnOy$6K+FZ(|=%52}H4 zjx{j9WetQY)o^8FHJF%JLwn^9M6LP*Uz+}b#{s{g>-gW$qwp6b?Ei&#1`PfV!u4-3(C{1lSpO9+rjG}*hK6na0Y@UMfDg_!ir2rVDK=#FCc+)Q##_5uv z^{gaNl{^8x+Y`9f{t3i~Cc+r|L}>ml0eb9A05{VF=5QUk@ebdMJw2LCfhn*z{e7zb9po`p6Iv zr-h`YTIf(m;pR;WQ>-bBd?mr89THq=EkWA|4J1#~Ktx$IWSotL{zIZc^ZYR!-uM`f zn>>c3kVjxY60+9-53V>t-~N!3I)sMp-^oU3f&_@;K70r2r>)- zvyfo;Feey#)DMQNpdk1?CkW0q2!b1-fzV}PAarPoT^tnvt}6oITgw1=rTD{`E&ed5 zn?GDj^Mexy{h)S)A5;|j!dBdgcTMz#hIKyBA;JfuSNp(+PCjrs-5auwd&6C|H~g;h zg1itfn7rBxCR%vG%h&gz?81Gx>~J4an%)QZ`2T->%s6HoGmigvaGZmEoP>Qm2>ZAZ z_VH}&<4)Me3$TyPu#ZLT;}Yy+vkCto0{b`}`*<_<@fqynLF&KYfPGw!ecZ*Z7E-Z~ zn^x7pGwkC|OKac{_VKEiYFNIh8p{9E$1#5(bM+tS-|P=`2>K0v6MjQh(J%0F{RQpX z|AJw`RZy<3f~7AiVgKSvnDC_nQk*NGUQIca9w>+F^~<6Dp`S3Y-cRVT?+3i7DuXE- z%i!jRQiz^i3W15=!Flv|XmkG?Tx|XgDz|?H>%0Pv56dE+-w=aL5v%5q`Fs$6LOHV2x_$${Z%ui@mR*HAy{6`Z$w1w-Rs zLYmD>aEi}{YP)RMoRkGmCS}2&7nx8qD-#lPGhoiL47mR}9lW-r!|dub2s@MpAw#a26N9ng$@=^VM1666bdQuDJ>bM zEKkOLED6ROOM)cxBnSw40)=Yy$Qg;Scy%INtHBOFmjH&n5@5I{9zv$X!|uX3IJYMb z%FVEo{bJ#^Iu<@;$H2CYF)*c33~axxK+$Lg?xpI%e~lh$>!CNkp@U{YMOAkS1{Sd~!ivam>1cdjG zfR1Soz+l${aP0a3NMbl#-V_d-+J{4u5(eJu!@#g@7~a=}!W6ulR<#d>h`104-x30r zT|;2PvtTIN6AY#Uf+0312pXOWf-EiwMtlo|u6F|A#6c~!I6hvK(~3p_EBCCT6G`RJiZTiT>k&vSO2Gv8OMxc#_@kI$Meu1 z{~wN%v5yC1A2-21o{N2KfqlFL`?wwU@i^?`3hd)f*vES8;mv5&`LA8*1w zHjk}^3E0OCRJCA(eQXw71M9Glr?skqB<$l1?BlH+s=){Q*m=z#$iY5-gMI8e@i)YM z`UQb*zrfP`7i{toLUn?MDdj+)oTMh#cmV;r#a=3j2@2ngA1Pj+6 zU|L-UDO<{5?x#|CJiipmQocid?mPVS{RXiuzrpT3U!nc`61Znq0-gN7z=@_`;M?lY zFfpze{JIoFlbxSn!?PlI)2#@KcNW6g7at*Hz(?qH;sazAzXz`g@4+Rc0QlAgkh>=z zef2wVnEDP3wD^I=0dJw^`WxtF{093w58Qv|Lh$NbXoP)yYE}*m!*{yA+rNfKF|Xht z{|aoSm+;5(C0u-z4JMEcj*qh7pk)?p)MNs_p$SLy8Q^V`0Xve?;fF&ym}R9w#)32$ z`r!riTK@t<%2Od>Zz`;9lnR&6;rm?Zk6nDQkB2{l1@cococI)a;chIgOo4>*WEgWW z8HSi8!zs@sm^m~FPU)Y(plMI=eY8XfcTR-!4HF^n3VP%I2@s%-hr!e0p#ygD=G}2H z$RrMC-j9Vs%UF2#JO*r5#z5;@1*Tq5;CFuoOk(wLet{kiSLoo=X&vsuIxvWlA$p+< zMZdIg;sUzjAzB#zlmfRF@2DG5pk5NdSP6!{*MP5^25xuLK)aY|Xt_KZ&KX9-MW4se zc|7{!ua6+%>?8Oz;t^bU6$Qz7KYhAK6ue1_1kL71=+izDdgvcQ*qVp%%E(ClXj z{PhZfgL6aRx^W2DD#37YM=(sq_qIab2f@v2L0~>D2zoRP0!aykw=RKjYGfe9d=CIO z{{Tq9J=wz|0P^wut*~qUP;ZVuq_y^kt{Hyt$-@u&P4j~fE&QNWnlD6o_(Jk@UvOyc z3qrOJ+`Q@o%NF>+=`KFd`Mo!o`gnumI&Ww`#2eaRKW~rrg8m1*{;&7djAOT?tpzPV;|>ZAG={6-@`t(!9HGxecT%R*arK!2KQqQ`?xXo z@nY;_8|-5V`#2W+cnS8g2lnv->|;Ilu^#)_5&PH^`}i>SadYfrHTH2w>|-zN<2Oqx zA*rMS)?pvV)|SIdw{q|^EC&wz_#F0e@PQw2;ZGUZVILDiBU=dr4?vySxR_{SD;Sq-z<6-IYc$o4l4!uPjJhg~}g;BAvdr~a)_z(jNU1FenOZ3J83M?M0 zKxUpE@^;`mTxNQ>7o>yb<8<)oqYQW5Wa!pa2Ecc>J}%Hg-x>-HZ&29FQ8<++!KnQb zxL8OqFIEGYt2F=V8J&SanQUN?k9mR7W5O_=m~$9cj3Y+CSYQk=dM*Ar#^68s&!fVB z#^^ByJpOwa0po~q#hk-#mj~+Q-{mXv- z@WeS_KYy{GKV5MS95DjM0%L$-KYz3RgY93)0^2|F5b$C9H@kme_b=@JiQT`k`$wML zzgplK2pC62VI2e`Eg56X$^WH|F15aSj|Y0>%PkfMNcP`8Vd@kOk)7@DT7}{*C!J=HHlqWB!f# zH=g-73p@h>2VI2e`EeFALoGiH{2bWe`EelX8tW7&w%+i z=HK)<2h6`Q|K^Ev!2BEYZ>~58ju-)Bfib`^|Hk|q^KZxk^KWbn{*C!JWP$lNJOq50e`EfQ`8Vd@ zn15sbjc5MN0?$CeIAUBe%)c@J=81E_{2TLcdYl91-MJJ|Hk~A9_N7hH|F0waSoV&WB$z*=fDvoU@R~O80O!Ye`Ed)Sz!JR4*?(M-zggfJ2pC6O6hWR(<- zjrlj3`L}#L1LohDf79a}F#pEzab0Ezu_U^ z!~7fbZ_K|j|Hk|q^KU%!Zx(n40>%;JiedhZ`8Q9T1LohDf79a}F#pEo6P)MKAr*dZ_K~xaSoV&WB$z(=YaV)=HFa#4jeH8#sXu2Vg8NzH|F1v1?J!I z5b$CCjrlj`-K%)c@J=81E_{2TLct~dvd7y)B}F~BhY#{3)eZ^#1k zZ+Hm!F#pE<8}o0>zcK&D{2R~wn+2YMfN{jQVwitp{>>BTfcZD(-}E>K%)c@JmXCA5 z{2T6$%)c@JCNuw*k7vOA8}n~^oCD_Hn1A!cIbi;c`8QXb14oR2vA`H$n15sbjrliZ zf%!K)1bmo(WB!f#H|F1%e`EfQXa3Cs&p^O9Vq7uIzcK&jiF3gG8}n~^oCD_Hn19R1 zIbi+`cSq*mn17R*f6K=+VE&EyH$Bb)^KZ<*dEy)}|Hk~AE6#xq^780O!YfAhpSVE&EyH$Bb)^KZ<* z<>MSM|AxCG^KZ<*$;`jy;~6mj#{8Qe=YaV)=HEPV4w!#q{>>HVz!4*0EHDNb=HHlq zWBv_UVEzpc0Uzeyn15sbjrlj`-=HF!I-}3Pcn15sbO^zwyk!S>PE67)OjNhWR(<-#l>+n15sbO^f9w6N_qX2PdVlNv?bZ9+#0_RxV1?e_dVkyLp!c`l-%dK{{jK-6FCFy$ z#&+!et@pQ2?{8mj(ED5OZzmn}{?_~3P6xfe_5QZf!2&Z(FrfFh-rstE9rXUz``bwey}$MT_N9Z~-`I}5zxDq1 z>HY1?4SIj;{q3ZK-rstE+v%Y9x8C1YI#^(a2?q53*85xUZ~U{+`x{5#>;0|wx8C1+ zf9w6N_qSK?Zxc6|VSyETf9w5ir-R<#dVf3Vp!c`l-@bIv`y1P__qX2PKE1zvxk2x5 zy}zAw(ED5OZ#y0I{?_~3N(T$fFu{P{-+F)R{f&PXdVk{xe7(Q@U+-_v2DP~P&vTP6 zev=Eo$@%>EsmOubWXEr^;Wt_Fn=JTEX8a}-ev^UU zS#g`3_)W%(`-@DtO=kQi3x1Oozsd9e4KdmAn;iH}PW&bpev>bLlN-N@wfleG`_ulv zxPirq+vLD+vg0?|@SCjoO&0tnGk%i^zsbOFa=zRD?SJn#0CW34ZgcxT?(uS&@SDu| zO&0tnD}Iv=zsZi@6K<0kzsZ8%WW{f?;Wydw zn;iH}PW&bpev>bLlN-N@wfleG`_ulvxPirq+vLD+vg0?|@SCjoO&0tnGk%i^zsbOF z(*EcEZxPh$CvN{CZ~y0a2Xp&B?(uS&@SDu|O&0tnD}Iv=zsZi@ev=cw$%WtKi{IqNZ}QvW*5CWn{=c|^#fjVGz;CkSH`(x; ztoTh9{3bJglL^1cz;Dw2=l*XI)av(aP?2YYn>-ubWW2b)$b{Qu#&5FVH(BwUZ1_!f z{3Zu}lM}zmh2P|h-{i(`V(tE)_x`m1FK%FQ;x;+(o9y^aHvA?lev<{i$&BA*!f!J0 zo3#JA|62sL`ia|r$lL$<-ND@ck9)jaCj2Hdev<{i$%@}(!*8P@ta)uO}_X|Zu};{9d7--KkfgE8(5sUO%D7fJARW5 zzsZWN!A-`C`-@DtO=kQi3x1OozsZK* zWXErE;5Rw(n_T!!zW7aU{3h1!|9S6E`~Ttw7AJ0#1HZ|R-(P^+J~{fE5$pWhwK?f+k((|6km|;>2xo;5XUvn{4<^R{SOlev=u$$%Nlz;5TXibN{ypYV~_I zsK~RyO`Z*IGG5$YWWsGS<2PCGo2>XvHvA?#ev<>g$%)_O!f*1$Z*t=|v3CE@dw<&h z7dNmtahn|YO?Lby8-9}&zsZ8%WX5kY;WruhP1^t5|1E-A{lx7*bLlN-OuZ--ld?@#;x;szEcZj%GQ$&TM- z!*8fyIg2Xv7W^hNev=8m z$-r;Y{^$O05!C7@ZvP=~|L1oHbNfH;@p75)o6Pu47W^hFev=Kq$&TOTz;ANmH@Wbe zeDRyy_)UI0-1>We+W!|fusCs>9QaLk{3aWIlNGi29=k!OROJR96(ytu!}gxh4sZ?fPwS@D}} z_)T{FCI^0#6TiuY-{gzmAe?f#$l{778{FjC;3nh6{Y56+CNqAM1;5FP-(7@SB|YO)mTf#0P4&;8#bsMSy0{zKmW&+iWA_J7>t zP@SE)T zO*Z@{D}Iv&zsZc>WWsMU@SC*%x&K=Pwfa39ROH#p887ZHGT}Cv@tZ98O;-FS z8-9}=zsZ5$H@WefSiAq{y+7^$iyK&+xJ?fHCOdwU4Zq2X-(`4*VuN zev=Kq$%@}(!EZ9-H<|F84E!eTfA0SlL9Kqz1{HZWxXH7@O~#A+i%hspX8a}#ev=iy z$%fx#$8U1rH#zZ}T=-4C_)Tv7Cf4r%dGAmA|KbJ~CvKAizsZi@WW#T=;x}3Fo6Pu4 zCj2G?ze)R_`@cm{tDm_2hrIot-yO{D|G3A?Wx{VV<2PCGo2>XvHvA?#ev<>g$%)_O z!f*1$Z*t=|`R(xj^5291MmMN|8mNI9sDT=&ff}fR8mNI9_?r#fANTz|fA_C}8mNI9 zsDT=&ff}fR8mNI9sDb~!fiL?%xJD4b(sl)Ibf?Kn>JD4gBo}F7|(PgBqxT z8mNI9sDT=&ff}fR8mNI9_}dMf?EmNnHBbXJPy;nk12s?sHBbXJPy;paw;MRv|IrO< zpayE725O)NYM=&cpayE725R7MH?Xt+qZ`yf4b(sl)Ibf?Kn>JD4b(sl)WF|vU}OJB zH>iOcsDT=&ff}fR8mNI9sDT=&fxq3r%KndTPy;nk12s?sHBbXJPy;nk12s?sf4hN& z{U6<+25O)NYM=&cpayE725O)NYM=)Gb^|l}Ke|B;)Ibf?Kn>JD4b(sl)Ibf?Kn?uu z1}64@bb}hGff}fR8mNI9sDT=&ff}fR8u;4{4EBF?gBqxT8mNI9sDT=&ff}fR8mNI9 z_}dMf?=hkq)Ibf?Kn>JD4b(sl)Ibf?Kn>Ku-)>-K|3^2dff}fR8mNI9sDT=&ff}fR z8mNK4-N0b~M>nW}8mNI9sDT=&ff}fR8mNI9sDVG;0RQs(xIvf$`%1{~z6;25O)N zYM=&cpayE725O)NYM=)GbORIr{zf;bff}fR8mNI9sDT=&ff}fR8mNK4-N4NLk8V%{ zHBbXJPy;nk12s?sHBbXJPy>Ivfrb4a-Jk|)payE725O)NYM=&cpayE72L5&fEBimX zK@HSE4b(sl)Ibf?Kn>JD4b(sl{Otxd_J4GP8mNI9sDT=&ff}fR8mNI9sDT>z+YRjO z|L6uaPy;nk12s?sHBbXJPy;nk12yos8#vhi(G6;#25O)NYM=&cpayE725O)NYT$1- zaI*iS8`MAz)Ibf?Kn>JD4b(sl)Ibf?z~64*V*f`ssDT=&ff}fR8mNI9sDT=&ff}fR zzumx>{U6<+25O)NYM=&cpayE725O)NYM=)Gb^|y2Ke|B;)Ibf?Kn>JD4b(sl)Ibf? KKn?um2L1=Sy+cv} literal 0 HcmV?d00001 diff --git a/benchmarks/opencl/mri-q/Makefile b/benchmarks/opencl/mri-q/Makefile new file mode 100644 index 00000000..55c9b3c6 --- /dev/null +++ b/benchmarks/opencl/mri-q/Makefile @@ -0,0 +1,68 @@ +RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops) +POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc) +POCL_INC_PATH = $(wildcard ../include) +POCL_LIB_PATH = $(wildcard ../lib) +VX_RT_PATH = $(wildcard ../../../runtime) +VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir) + +CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc +CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++ +DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump +HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy +GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb + +VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c +VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s +VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s +VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c +VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s +VX_SRCS += $(VX_RT_PATH)/tests/tests.c +VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c +VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) + +VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld + +CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32 +CXXFLAGS += -ffreestanding # program may not begin at main() +CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections +CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions +CXXFLAGS += -I$(POCL_INC_PATH) -I. + +VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a +QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a + +PROJECT = mri-q + +SRCS = main.cc args.c parboil_opencl.c ocl.c gpu_info.c file.cc computeQ.c + +all: $(PROJECT).dump $(PROJECT).hex + +lib$(PROJECT).a: kernel.cl + POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl + +$(PROJECT).elf: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) $(SRCS) $(VX_LIBS) -o $(PROJECT).elf + +$(PROJECT).qemu: $(SRCS) lib$(PROJECT).a + $(CXX) $(CXXFLAGS) $(SRCS) $(QEMU_LIBS) -o $(PROJECT).qemu + +$(PROJECT).hex: $(PROJECT).elf + $(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex + +$(PROJECT).dump: $(PROJECT).elf + $(DMP) -D $(PROJECT).elf > $(PROJECT).dump + +run: $(PROJECT).hex + POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug + +qemu: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-s: $(PROJECT).qemu + POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu + +gdb-c: $(PROJECT).qemu + $(GDB) $(PROJECT).qemu + +clean: + rm -rf *.o *.elf *.dump *.hex *.qemu *.log *.debug diff --git a/benchmarks/opencl/mri-q/args.c b/benchmarks/opencl/mri-q/args.c new file mode 100644 index 00000000..9d751e29 --- /dev/null +++ b/benchmarks/opencl/mri-q/args.c @@ -0,0 +1,617 @@ + +#include +#include +#include +#include +#include +#include + +/*****************************************************************************/ +/* Memory management routines */ + +/* Free an array of owned strings. */ +void +pb_FreeStringArray(char **string_array) +{ + char **p; + + if (!string_array) return; + for (p = string_array; *p; p++) free(*p); + free(string_array); +} + +struct pb_PlatformParam * +pb_PlatformParam(char *name, char *version) +{ + if (name == NULL) { + fprintf(stderr, "pb_PlatformParam: Invalid argument\n"); + exit(-1); + } + + struct pb_PlatformParam *ret = + (struct pb_PlatformParam *)malloc(sizeof (struct pb_PlatformParam)); + + ret->name = name; + ret->version = version; + return ret; +} + +void +pb_FreePlatformParam(struct pb_PlatformParam *p) +{ + if (p == NULL) return; + + free(p->name); + free(p->version); + free(p); +} + +struct pb_DeviceParam * +pb_DeviceParam_index(int index) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_INDEX; + ret->index = index; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_cpu(void) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_CPU; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_gpu(void) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_GPU; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_accelerator(void) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_ACCELERATOR; + return ret; +} + +struct pb_DeviceParam * +pb_DeviceParam_name(char *name) +{ + struct pb_DeviceParam *ret = + (struct pb_DeviceParam *)malloc(sizeof (struct pb_DeviceParam)); + ret->criterion = pb_Device_NAME; + ret->name = name; + return ret; +} + +void +pb_FreeDeviceParam(struct pb_DeviceParam *p) +{ + if (p == NULL) return; + + switch(p->criterion) { + case pb_Device_NAME: + free(p->name); + break; + case pb_Device_INDEX: + case pb_Device_CPU: + case pb_Device_ACCELERATOR: + break; + default: + fprintf(stderr, "pb_FreeDeviceParam: Invalid argument\n"); + exit(-1); + } +} + +void +pb_FreeParameters(struct pb_Parameters *p) +{ + free(p->outFile); + pb_FreeStringArray(p->inpFiles); + pb_FreePlatformParam(p->platform); + pb_FreeDeviceParam(p->device); + free(p); +} + +/*****************************************************************************/ + +/* Parse a comma-delimited list of strings into an + * array of strings. */ +static char ** +read_string_array(char *in) +{ + char **ret; + int i; + int count; /* Number of items in the input */ + char *substring; /* Current substring within 'in' */ + + /* Count the number of items in the string */ + count = 1; + for (i = 0; in[i]; i++) if (in[i] == ',') count++; + + /* Allocate storage */ + ret = (char **)malloc((count + 1) * sizeof(char *)); + + /* Create copies of the strings from the list */ + substring = in; + for (i = 0; i < count; i++) { + char *substring_end; + int substring_length; + + /* Find length of substring */ + for (substring_end = substring; + (*substring_end != ',') && (*substring_end != 0); + substring_end++); + + substring_length = substring_end - substring; + + /* Allocate memory and copy the substring */ + ret[i] = (char *)malloc(substring_length + 1); + memcpy(ret[i], substring, substring_length); + ret[i][substring_length] = 0; + + /* go to next substring */ + substring = substring_end + 1; + } + ret[i] = NULL; /* Write the sentinel value */ + + return ret; +} + +static void +report_parse_error(const char *str) +{ + fputs(str, stderr); +} + +/* Interpret a string as a 'pb_DeviceParam' value. + * Return a pointer to a new value, or NULL on failure. + */ +static struct pb_DeviceParam * +read_device_param(char *str) +{ + /* Try different ways of interpreting 'device_string' until one works */ + + /* If argument is an integer, then interpret it as a device index */ + errno = 0; + char *end; + long device_int = strtol(str, &end, 10); + if (!errno) { + /* Negative numbers are not valid */ + if (device_int < 0 || device_int > INT_MAX) return NULL; + + return pb_DeviceParam_index(device_int); + } + + /* Match against predefined strings */ + if (strcmp(str, "CPU") == 0) + return pb_DeviceParam_cpu(); + if (strcmp(str, "GPU") == 0) + return pb_DeviceParam_gpu(); + if (strcmp(str, "ACCELERATOR") == 0) + return pb_DeviceParam_accelerator(); + + /* Assume any other string is a device name */ + return pb_DeviceParam_name(strdup(str)); +} + +/* Interpret a string as a 'pb_PlatformParam' value. + * Return a pointer to a new value, or NULL on failure. + */ +static struct pb_PlatformParam * +read_platform_param(char *str) +{ + int separator_index; /* Index of the '-' character separating + * name and version number. It's -1 if + * there's no '-' character. */ + + /* Find the last occurrence of '-' in 'str' */ + { + char *cur; + separator_index = -1; + for (cur = str; *cur; cur++) { + if (*cur == '-') separator_index = cur - str; + } + } + + /* The platform name is either the entire string, or all characters before + * the separator */ + int name_length = separator_index == -1 ? strlen(str) : separator_index; + char *name_str = (char *)malloc(name_length + 1); + memcpy(name_str, str, name_length); + name_str[name_length] = 0; + + /* The version is either NULL, or all characters after the separator */ + char *version_str; + if (separator_index == -1) { + version_str = NULL; + } + else { + const char *version_input_str = str + separator_index + 1; + int version_length = strlen(version_input_str); + + version_str = (char *)malloc(version_length + 1); + memcpy(version_str, version_input_str, version_length); + version_str[version_length] = 0; + } + + /* Create output structure */ + return pb_PlatformParam(name_str, version_str); +} + +/****************************************************************************/ +/* Argument parsing state */ + +/* Argument parsing state. + * + * Arguments that are interpreted by the argument parser are removed from + * the list. Variables 'argc' and 'argn' do not count arguments that have + * been removed. + * + * During argument parsing, the array of arguments is compacted, overwriting + * the erased arguments. Variable 'argv_put' points to the array element + * where the next argument will be written. Variable 'argv_get' points to + * the array element where the next argument will be read from. + */ +struct argparse { + int argc; /* Number of arguments. Mutable. */ + int argn; /* Current argument index. */ + char **argv_get; /* Argument value being read. */ + char **argv_put; /* Argument value being written. + * argv_put <= argv_get. */ +}; + +static void +initialize_argparse(struct argparse *ap, int argc, char **argv) +{ + ap->argc = argc; + ap->argn = 0; + ap->argv_get = ap->argv_put = argv; +} + +/* Finish argument parsing, without processing the remaining arguments. + * Write new argument count into _argc. */ +static void +finalize_argparse(struct argparse *ap, int *_argc, char **argv) +{ + /* Move the remaining arguments */ + for(; ap->argn < ap->argc; ap->argn++) + *ap->argv_put++ = *ap->argv_get++; + + /* Update the argument count */ + *_argc = ap->argc; + + /* Insert a terminating NULL */ + argv[ap->argc] = NULL; +} + +/* Delete the current argument. The argument will not be visible + * when argument parsing is done. */ +static void +delete_argument(struct argparse *ap) +{ + if (ap->argn >= ap->argc) { + fprintf(stderr, "delete_argument\n"); + } + ap->argc--; + ap->argv_get++; +} + +/* Go to the next argument. Also, move the current argument to its + * final location in argv. */ +static void +next_argument(struct argparse *ap) +{ + if (ap->argn >= ap->argc) { + fprintf(stderr, "next_argument\n"); + } + /* Move argument to its new location. */ + *ap->argv_put++ = *ap->argv_get++; + ap->argn++; +} + +static int +is_end_of_arguments(struct argparse *ap) +{ + return ap->argn == ap->argc; +} + +/* Get the current argument */ +static char * +get_argument(struct argparse *ap) +{ + return *ap->argv_get; +} + +/* Get the current argument, and also delete it */ +static char * +consume_argument(struct argparse *ap) +{ + char *ret = get_argument(ap); + delete_argument(ap); + return ret; +} + +/****************************************************************************/ + +/* The result of parsing a command-line argument */ +typedef enum { + ARGPARSE_OK, /* Success */ + ARGPARSE_ERROR, /* Error */ + ARGPARSE_DONE /* Success, and do not continue parsing */ +} result; + +typedef result parse_action(struct argparse *ap, struct pb_Parameters *params); + + +/* A command-line option */ +struct option { + char short_name; /* If not 0, the one-character + * name of this option */ + const char *long_name; /* If not NULL, the long name of this option */ + parse_action *action; /* What to do when this option occurs. + * Sentinel value is NULL. + */ +}; + +/* Output file + * + * -o FILE + */ +static result +parse_output_file(struct argparse *ap, struct pb_Parameters *params) +{ + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting file name after '-o'\n"); + return ARGPARSE_ERROR; + } + + /* Replace the output file name */ + free(params->outFile); + params->outFile = strdup(consume_argument(ap)); + + return ARGPARSE_OK; +} + +/* Input files + * + * -i FILE,FILE,... + */ +static result +parse_input_files(struct argparse *ap, struct pb_Parameters *params) +{ + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting file name after '-i'\n"); + return ARGPARSE_ERROR; + } + + /* Replace the input file list */ + pb_FreeStringArray(params->inpFiles); + params->inpFiles = read_string_array(consume_argument(ap)); + return ARGPARSE_OK; +} + +/* End of options + * + * -- + */ + +static result +parse_end_options(struct argparse *ap, struct pb_Parameters *params) +{ + return ARGPARSE_DONE; +} + +/* OpenCL device + * + * --device X + */ + +static result +parse_device(struct argparse *ap, struct pb_Parameters *params) +{ + /* Read the next argument, which specifies a device */ + + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting device specification after '--device'\n"); + return ARGPARSE_ERROR; + } + + char *device_string = consume_argument(ap); + struct pb_DeviceParam *device_param = read_device_param(device_string); + + if (!device_param) { + report_parse_error("Unrecognized device specification format on command line\n"); + return ARGPARSE_ERROR; + } + + /* Save the result */ + pb_FreeDeviceParam(params->device); + params->device = device_param; + + return ARGPARSE_OK; +} + +static result +parse_platform(struct argparse *ap, struct pb_Parameters *params) +{ + /* Read the next argument, which specifies a platform */ + + if (is_end_of_arguments(ap)) + { + report_parse_error("Expecting device specification after '--platform'\n"); + return ARGPARSE_ERROR; + } + + char *platform_string = consume_argument(ap); + struct pb_PlatformParam *platform_param = read_platform_param(platform_string); + + if (!platform_param) { + report_parse_error("Unrecognized platform specification format on command line\n"); + return ARGPARSE_ERROR; + } + + /* Save the result */ + pb_FreePlatformParam(params->platform); + params->platform = platform_param; + + return ARGPARSE_OK; +} + + +static struct option options[] = { + { 'o', NULL, &parse_output_file }, + { 'i', NULL, &parse_input_files }, + { '-', NULL, &parse_end_options }, + { 0, "device", &parse_device }, + { 0, "platform", &parse_platform }, + { 0, NULL, NULL } +}; + +static int +is_last_option(struct option *op) +{ + return op->action == NULL; +} + +/****************************************************************************/ + +/* Parse command-line parameters. + * Return zero on error, nonzero otherwise. + * On error, the other outputs may be invalid. + * + * The information collected from parameters is used to update + * 'ret'. 'ret' should be initialized. + * + * '_argc' and 'argv' are updated to contain only the unprocessed arguments. + */ +static int +pb_ParseParameters (struct pb_Parameters *ret, int *_argc, char **argv) +{ + char *err_message; + struct argparse ap; + + /* Each argument */ + initialize_argparse(&ap, *_argc, argv); + while(!is_end_of_arguments(&ap)) { + result arg_result; /* Result of parsing this option */ + char *arg = get_argument(&ap); + + /* Process this argument */ + if (arg[0] == '-') { + /* Single-character flag */ + if ((arg[1] != 0) && (arg[2] == 0)) { + delete_argument(&ap); /* This argument is consumed here */ + + /* Find a matching short option */ + struct option *op; + for (op = options; !is_last_option(op); op++) { + if (op->short_name == arg[1]) { + arg_result = (*op->action)(&ap, ret); + goto option_was_processed; + } + } + + /* No option matches */ + report_parse_error("Unexpected command-line parameter\n"); + arg_result = ARGPARSE_ERROR; + goto option_was_processed; + } + + /* Long flag */ + if (arg[1] == '-') { + delete_argument(&ap); /* This argument is consumed here */ + + /* Find a matching long option */ + struct option *op; + for (op = options; !is_last_option(op); op++) { + if (op->long_name && strcmp(&arg[2], op->long_name) == 0) { + arg_result = (*op->action)(&ap, ret); + goto option_was_processed; + } + } + + /* No option matches */ + report_parse_error("Unexpected command-line parameter\n"); + arg_result = ARGPARSE_ERROR; + goto option_was_processed; + } + } + else { + /* Other arguments are ignored */ + next_argument(&ap); + arg_result = ARGPARSE_OK; + goto option_was_processed; + } + + option_was_processed: + /* Decide what to do next based on 'arg_result' */ + switch(arg_result) { + case ARGPARSE_OK: + /* Continue processing */ + break; + + case ARGPARSE_ERROR: + /* Error exit from the function */ + return 0; + + case ARGPARSE_DONE: + /* Normal exit from the argument parsing loop */ + goto end_of_options; + } + } /* end for each argument */ + + /* If all arguments were processed, then normal exit from the loop */ + + end_of_options: + finalize_argparse(&ap, _argc, argv); + return 1; +} + +/*****************************************************************************/ +/* Other exported functions */ + +struct pb_Parameters * +pb_ReadParameters(int *_argc, char **argv) +{ + struct pb_Parameters *ret = + (struct pb_Parameters *)malloc(sizeof(struct pb_Parameters)); + + /* Initialize the parameters structure */ + ret->outFile = NULL; + ret->inpFiles = (char **)malloc(sizeof(char *)); + ret->inpFiles[0] = NULL; + ret->platform = NULL; + ret->device = NULL; + + /* Read parameters and update _argc, argv */ + if (!pb_ParseParameters(ret, _argc, argv)) { + /* Parse error */ + pb_FreeParameters(ret); + return NULL; + } + + return ret; +} + +int +pb_Parameters_CountInputs(struct pb_Parameters *p) +{ + int n; + + for (n = 0; p->inpFiles[n]; n++); + return n; +} + diff --git a/benchmarks/opencl/mri-q/computeQ.c b/benchmarks/opencl/mri-q/computeQ.c new file mode 100644 index 00000000..65ed6f4d --- /dev/null +++ b/benchmarks/opencl/mri-q/computeQ.c @@ -0,0 +1,118 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include +#include +#include +#include "ocl.h" +#include "macros.h" +#include "computeQ.h" +#include "parboil.h" + +#define NC 1 + +void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm) +{ + int phiMagBlocks = numK / KERNEL_PHI_MAG_THREADS_PER_BLOCK; + if (numK % KERNEL_PHI_MAG_THREADS_PER_BLOCK) + phiMagBlocks++; + + size_t DimPhiMagBlock = KERNEL_PHI_MAG_THREADS_PER_BLOCK; + size_t DimPhiMagGrid = phiMagBlocks*KERNEL_PHI_MAG_THREADS_PER_BLOCK; + + cl_int clStatus; + clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(cl_mem),&phiR_d); + clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(cl_mem),&phiI_d); + clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&phiMag_d); + clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(int),&numK); + CHECK_ERROR("clSetKernelArg") + + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimPhiMagGrid,&DimPhiMagBlock,0,NULL,NULL); + CHECK_ERROR("clEnqueueNDRangeKernel") +} + +static +unsigned long long int +readElapsedTime(cl_event internal) +{ + cl_int status; + cl_ulong t_begin, t_end; + status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &t_begin, NULL); + if (status != CL_SUCCESS) return 0; + status = clGetEventProfilingInfo(internal, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &t_end, NULL); + if (status != CL_SUCCESS) return 0; + return (unsigned long long int)(t_end - t_begin); +} + + +void computeQ_GPU (int numK,int numX, + cl_mem x_d, cl_mem y_d, cl_mem z_d, + struct kValues* kVals, + cl_mem Qr_d, cl_mem Qi_d, + clPrmtr* clPrm) +{ + int QGrids = numK / KERNEL_Q_K_ELEMS_PER_GRID; + if (numK % KERNEL_Q_K_ELEMS_PER_GRID) + QGrids++; + int QBlocks = numX / KERNEL_Q_THREADS_PER_BLOCK; + if (numX % KERNEL_Q_THREADS_PER_BLOCK) + QBlocks++; + + size_t DimQBlock = KERNEL_Q_THREADS_PER_BLOCK/NC; + size_t DimQGrid = QBlocks*KERNEL_Q_THREADS_PER_BLOCK/NC; + + cl_int clStatus; + cl_mem ck; + ck = clCreateBuffer(clPrm->clContext,CL_MEM_READ_WRITE,KERNEL_Q_K_ELEMS_PER_GRID*sizeof(struct kValues),NULL,&clStatus); + + int QGrid; + for (QGrid = 0; QGrid < QGrids; QGrid++) { + // Put the tile of K values into constant mem + int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID; + struct kValues* kValsTile = kVals + QGridBase; + int numElems = MIN(KERNEL_Q_K_ELEMS_PER_GRID, numK - QGridBase); + + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,ck,CL_TRUE,0,numElems*sizeof(struct kValues),kValsTile,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + + clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(int),&numK); + clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(int),&QGridBase); + clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&x_d); + clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(cl_mem),&y_d); + clStatus = clSetKernelArg(clPrm->clKernel,4,sizeof(cl_mem),&z_d); + clStatus = clSetKernelArg(clPrm->clKernel,5,sizeof(cl_mem),&Qr_d); + clStatus = clSetKernelArg(clPrm->clKernel,6,sizeof(cl_mem),&Qi_d); + clStatus = clSetKernelArg(clPrm->clKernel,7,sizeof(cl_mem),&ck); + CHECK_ERROR("clSetKernelArg") + + printf ("Grid: %d, Block: %d\n", DimQGrid, DimQBlock); + + #define TIMED_EXECUTION + #ifdef TIMED_EXECUTION + cl_event e; + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,&e); + CHECK_ERROR("clEnqueueNDRangeKernel") + clWaitForEvents(1, &e); + printf ("%llu\n", readElapsedTime(e)); + #else + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,NULL); + CHECK_ERROR("clEnqueueNDRangeKernel") + #endif + } +} + +void createDataStructsCPU(int numK, int numX, float** phiMag, + float** Qr, float** Qi) +{ + *phiMag = (float* ) memalign(16, numK * sizeof(float)); + *Qr = (float*) memalign(16, numX * sizeof (float)); + *Qi = (float*) memalign(16, numX * sizeof (float)); +} + diff --git a/benchmarks/opencl/mri-q/computeQ.h b/benchmarks/opencl/mri-q/computeQ.h new file mode 100644 index 00000000..ec919220 --- /dev/null +++ b/benchmarks/opencl/mri-q/computeQ.h @@ -0,0 +1,14 @@ +#ifndef __COMPUTEQ__ +#define __COMPUTEQ__ + +void computePhiMag_GPU(int numK,cl_mem phiR_d,cl_mem phiI_d,cl_mem phiMag_d,clPrmtr* clPrm); +void computeQ_GPU (int numK,int numX, + cl_mem x_d, cl_mem y_d, cl_mem z_d, + struct kValues* kVals, + cl_mem Qr_d, cl_mem Qi_d, + clPrmtr* clPrm); + +void createDataStructsCPU(int numK, int numX, float** phiMag, + float** Qr, float** Qi); + +#endif diff --git a/benchmarks/opencl/mri-q/file.cc b/benchmarks/opencl/mri-q/file.cc new file mode 100644 index 00000000..15b07075 --- /dev/null +++ b/benchmarks/opencl/mri-q/file.cc @@ -0,0 +1,78 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +//#include +#include +#include +#include +#include + +#include "file.h" + +#if __BYTE_ORDER != __LITTLE_ENDIAN +# error "File I/O is not implemented for this system: wrong endianness." +#endif + +extern "C" +void inputData(char* fName, int* _numK, int* _numX, + float** kx, float** ky, float** kz, + float** x, float** y, float** z, + float** phiR, float** phiI) +{ + int numK, numX; + FILE* fid = fopen(fName, "r"); + + if (fid == NULL) + { + fprintf(stderr, "Cannot open input file\n"); + exit(-1); + } + fread (&numK, sizeof (int), 1, fid); + *_numK = numK; + fread (&numX, sizeof (int), 1, fid); + *_numX = numX; + *kx = (float *) memalign(16, numK * sizeof (float)); + fread (*kx, sizeof (float), numK, fid); + *ky = (float *) memalign(16, numK * sizeof (float)); + fread (*ky, sizeof (float), numK, fid); + *kz = (float *) memalign(16, numK * sizeof (float)); + fread (*kz, sizeof (float), numK, fid); + *x = (float *) memalign(16, numX * sizeof (float)); + fread (*x, sizeof (float), numX, fid); + *y = (float *) memalign(16, numX * sizeof (float)); + fread (*y, sizeof (float), numX, fid); + *z = (float *) memalign(16, numX * sizeof (float)); + fread (*z, sizeof (float), numX, fid); + *phiR = (float *) memalign(16, numK * sizeof (float)); + fread (*phiR, sizeof (float), numK, fid); + *phiI = (float *) memalign(16, numK * sizeof (float)); + fread (*phiI, sizeof (float), numK, fid); + fclose (fid); +} + +extern "C" +void outputData(char* fName, float* outR, float* outI, int numX) +{ + FILE* fid = fopen(fName, "w"); + uint32_t tmp32; + + if (fid == NULL) + { + fprintf(stderr, "Cannot open output file\n"); + exit(-1); + } + + /* Write the data size */ + tmp32 = numX; + fwrite(&tmp32, sizeof(uint32_t), 1, fid); + + /* Write the reconstructed data */ + fwrite (outR, sizeof (float), numX, fid); + fwrite (outI, sizeof (float), numX, fid); + fclose (fid); +} diff --git a/benchmarks/opencl/mri-q/file.h b/benchmarks/opencl/mri-q/file.h new file mode 100644 index 00000000..c6a61ef4 --- /dev/null +++ b/benchmarks/opencl/mri-q/file.h @@ -0,0 +1,22 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +void inputData(char* fName, int* _numK, int* _numX, + float** kx, float** ky, float** kz, + float** x, float** y, float** z, + float** phiR, float** phiI); + +void outputData(char* fName, float* outR, float* outI, int numX); + +#ifdef __cplusplus +} +#endif diff --git a/benchmarks/opencl/mri-q/gpu_info.c b/benchmarks/opencl/mri-q/gpu_info.c new file mode 100644 index 00000000..4d641f81 --- /dev/null +++ b/benchmarks/opencl/mri-q/gpu_info.c @@ -0,0 +1,55 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ +//#include +#include +#include +#include +#include + +#include "gpu_info.h" + +void compute_active_thread(size_t *thread, + size_t *grid, + int task, + int pad, + int major, + int minor, + int sm) +{ + int max_thread; + int max_block=8; + if(major==1) + { + if(minor>=2) + max_thread=1024; + else + max_thread=768; + } + else if(major==2) + max_thread=1536; + else + //newer GPU //keep using 2.0 + max_thread=1536; + + int _grid; + int _thread; + + if(task*pad>sm*max_thread) + { + _thread=max_thread/max_block; + _grid = ((task*pad+_thread-1)/_thread)*_thread; + } + else + { + _thread=pad; + _grid=task*pad; + } + + thread[0]=_thread; + grid[0]=_grid; +} diff --git a/benchmarks/opencl/mri-q/gpu_info.h b/benchmarks/opencl/mri-q/gpu_info.h new file mode 100644 index 00000000..4219cda9 --- /dev/null +++ b/benchmarks/opencl/mri-q/gpu_info.h @@ -0,0 +1,20 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef __GPUINFOH__ +#define __GPUINFOH__ + +void compute_active_thread(size_t *thread, + size_t *grid, + int task, + int pad, + int major, + int minor, + int sm); + +#endif diff --git a/benchmarks/opencl/mri-q/kernel.cl b/benchmarks/opencl/mri-q/kernel.cl new file mode 100644 index 00000000..39a1842e --- /dev/null +++ b/benchmarks/opencl/mri-q/kernel.cl @@ -0,0 +1,51 @@ +#include "macros.h" + +__kernel void +ComputePhiMag_GPU(__global float* phiR, __global float* phiI, __global float* phiMag, int numK) { + int indexK = get_global_id(0); + float real = indexK; + float imag = indexK; + if (indexK < numK) { + /*float*/ real = phiR[indexK]; + /*float*/ imag = phiI[indexK]; + phiMag[indexK] = real*real + imag*imag; + } +} + +__kernel void +ComputeQ_GPU(int numK, int kGlobalIndex, + __global float* x, __global float* y, __global float* z, + __global float* Qr, __global float* Qi, __global struct kValues* ck) +{ + float sX; + float sY; + float sZ; + float sQr; + float sQi; + + // Determine the element of the X arrays computed by this thread + int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0); + + // Read block's X values from global mem to shared mem + sX = x[xIndex]; + sY = y[xIndex]; + sZ = z[xIndex]; + sQr = Qr[xIndex]; + sQi = Qi[xIndex]; + + int kIndex = 0; + for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID); kIndex++) { + if (kGlobalIndex < numK) { + float expArg; + expArg = PIx2 * (ck[kIndex].Kx * sX + + ck[kIndex].Ky * sY + + ck[kIndex].Kz * sZ); + sQr = sQr + ck[kIndex].PhiMag * cos(expArg); // native_cos(expArg); + sQi = sQi + ck[kIndex].PhiMag * sin(expArg); // native_sin(expArg); + } + kGlobalIndex++; + } + + Qr[xIndex] = sQr; + Qi[xIndex] = sQi; +} diff --git a/benchmarks/opencl/mri-q/libmri-q.a b/benchmarks/opencl/mri-q/libmri-q.a new file mode 100644 index 0000000000000000000000000000000000000000..372b194145ec5f7e3554e3e9c135131e04a0ab5e GIT binary patch literal 15812 zcmeHO4|G)3nZIw|yqS;$p*|86bd3%H44IJRWirI2np7g)np&(3xLxZonaqULKropA zdUP9{3_-wZ_Yt9g+ygFy)lw^cgkfFJF7W`ayWO*P4}W^jdR%%!N_P*ZhgL$O24{cY zd+%m$CLma?J=^WgdGmhvyWjopz2E)rz2Cj>=H`W=_VCKvbLSV059LDC!s2gHQKgS_ ztjJ}Ixfq*tD)U@_kTL5Cu4xOm1|N#Fua2|^?`&JOrZW~HRe+61j#3RYB9+2j?uvB;`5(U#VT z*XQ&4E4-0v?}CUo94Tv?pGorjz12Q%7ZS$%**jn+a{W)*%-#ct)bP;MQbDN9W8CEi)zZs%PNZI zu8hUjblf(7esfD~WoKhqxNX(^*4DMFysKK;+uPdbn|aERSkxG4()>+TjTH;Rk?NWn zEgG)$Rr@0gnj+N|(dtI6Jla$qZuI+Zy`oH#Z;A|8Mtxd&O=D$6b-1y-GE^Q4MJoK7 zPxDpPG}SEdHToLMebt)pR=WY2*P}wGIoWU0XYHhG@s^mIEkSxW@LW6&{1Lqz&r&$gXFzS5j#x9`vzMcktj~j4BM6nYj`M3!!y&S z6Xx{mfUl*(5D0VnBH$SgqB2I9)4vEj)20*V^akJ=lkkH;nA4X4&$Q`;Ii2(|!;K#V z!km6T@JyRdnA0Bso&h_hK$z240ME4PggLzx_*$A-5eReoHsEV>h{_mYPN%pe(=TH= zxszlCW23A+(i$pjS=|x~hT7XhkFem^?y0B@E^heB7wQ^u z)n!!FudT1BXl)BaR7cCBk&m6ZCe+>>j6JdjIm;T#I%4gYocA-U&4ejFtKF<@bz3Y_ zw)mcaw<8t`Kg7ytzy(81O&IviY)o+-osGf22=e(m=&KI;e8K8aFcb<#wX$%S@g%RW znr8RvkT(?aT1iyRSf~*ydt{aMZVJUh6MM!N46Y9b>(L2q#6a09$||h>y_(-%j4Rt5 zOm+j2W5QVDupX&|Z4+cwzrRC%K1|Bj`UiK+q zUKZdOPJ&2ukx!J*^Q-}nDDSi&18}TPM{IeOL_{Dyn!9))dGt^_uffC19RU{0<(POm z24p*tf!asyrTS1B#qY*LyybW(jN*CCMAi>4w-SIb4+q5vh&=iQ0J$OE22vOC?nSvY zolv9-EdSMCETql^Pw4WJ?wA)cKtv9@zJ zeV|It58~c_N%l&PcxjzOF5TdW&+B){^CUT5QYXtL8|3({{jz+k-;ozs}`0*uL)*sJPJMj%+#^AJDa2LG?G(=A;g}|P z)t=aE@#gfU0$xGb=6MD(IL~eYze3pN5k0x9-EdnQ+>GD%k=uB|vC4L#)`Z$at*zMm zH-_Q!jQ4#$x-RfFS9P{_L^S}oU<5u!*aTi%y=Ja|#{BiBkRNI`11({M?WN50e@2Cj z+y@{Yjw#~?Zh?n~V3fTc?4Hp=eyBX3Vfaa?DqCupUU1Inl^*Izqby8tS0?Y?d>}SgA(`lcc%PZ#P#HcHQ*_V(7i@u$80V|4twwih%W_;OPV0>_m0N%!?TQ30w0uj46TnlE_Z^`kY6=$!9Yz&wq3d7Tc~ z^7y`<`1oF!_$UuOw0{2~_Vr%7ekk4ym)qBW9(-4`uipVaalOCnzJ7tdO}BUVSK8Ot zR8%jx+V%e6Emm9jdT;ORFJfQs@XW$K^cL(xi*ClgvjY3hI_x{|#lACyeJA#zvo=Ub zm$0v=J-w{v;f(%C>>nrL>|XKQp)0-}s$!ns-@(t~^}NhCfiW8P+G-+iY(K5$TJe<9 zmF7I*T*XN|pnY?U#uR5Nmv!mg@29n)?;(xd&C_({TsnLx>7YEik~=2b82T$o>+EI5 zLw(FYlujG(|IjR3Ph@zge1E~4sDJXJST8*v^vSGCyDKTF;7=)UjB6G;d}yTRCNsJ6Ab4Q4CfOP&LVvsjVdm%F9ZVwAJVf7s}8$%b>7`Q7lj-NR{P-36xo`V3Q#Ph;xx zYa~Ni?$A#bFijp}#)rKuH63y`Z~v;%y-e1RIVFD(arrB!jl^1qv1LC?H6i}={@b*~ z1z8O!lF?l#Yq`BRBf9BvEwRU>rG`=Vmc{p}Q~tco==r6rDML&j{xg*EE=!$3{%_4W zplm&^L`nM^-6of(@kZa-Kj8O|4EJ%IXM+zt9PW+Xs& z4$J!T8D<-_#0$(AUL@(m({Ub!a@O6*jHju*nUL{5^4!1-r3Z2$WAlEdFMStrx2(nY zGc|xVJEzM=PA}8Dp9mO<6SC3sEcAgoJwf$pM%~ZIhO*9K$R}mvn(Yq#*!xU-8gh=O zQBIQS0mL^CLH~CyR5ukkQ`;K@omxI;RPUMoc&cF0J$j<|tEuky zp4YoiS8I8H>oD^E(V^vk=rHnMb{NWo=%;IBZPN~BR9%O5>r6j+E!qgZnQeR!ZL-=( z_12V5W^CHejJ_SfeP~m!r1d3%UxM$D0RB7V`!U+r3A`KhA4C5UNnN(KPIYY@(H-Zc z@V~t7O7)ze?pKolu;4n76!}NCwnc;dv)=%J#IL;Iu`U#vT zZu}+Fd?;h<9_;0EU~4%{$H!tlAu~PhWV%ykTJm+K76fp{yQemM_;m-)5@mnn-Var! z?>58rvO~?!;&})BTamY$WRNV#%puvXY*{X`ys>f=l=C0YyDS-2er?b5Of6i7IvlAD zKX}M#xDM57y@#20@;K8XgLoff#`K>u?d%z5Ec-b#dXF$;@Hm}0%e*e4{DIe>f1k=0 z=U7CpiAUf^ZFl8j#-hn)>ddI8R32a_0wtjic3Wa`PGI-`D& z8F!sz+Ffs8j${s;XFI}ON{&k7@ysC8m(O8Z^Dy3v?RSs;c1uK0Ee{0rp5*~`@o-vS ziZL^Mp8BUY<<~U5$FCW&VT^IeSU1eHp3{~boC&9I(j1XGdEp#~CCYAF3rM(kCLjUUt+ zfdfpP1ikjomW&q2!o03$t$n5+kOx1&OcT~D@(~pH0!LrMyziX2#uH6ku`S4e@KjPPrP{1sAp#q zZOq6u*GHkv@#1ciM-7~n^v)YGUnH5@>BDyctYx3^**mRms_9FzE@l0`YQ0D{KpsnNLL19{>~{Csw?sp>i@Aiz_;=D{>^^XmGn%Y7rfOm{^uChMtFFG6vgu-+{5P%hdr~SA?bj2Lr~&);M_qJZdF){eA}b1NhBXU8Q*p?$~gKDCS4c(MAMI(Jl1xqODeDH z4nJ)3a=z-AzbWbr`TWPr=3~F`oZolyC#tLdU9pX1qyE9_YSk6^?F4$klD`N4a}(&j zISW-+=ca7BD)`viU43YiX)j}KrLwf6ubOS;eM-KezpvzWU7^0n_^IaD5p84WdKP|G zajAhvrIho~H+ALIl<-K>r7Dxme&F)Lupxa5)&`8%Pcp8w2Gn0DYv62hMD6`=>~H!c zV-eOF)WQprQGb9Lhq0b!)nQ;Njmw*8T#n%T zzspm)!G-VpE`0xY56CWE(PEJ6|9pvbf3agD&gM6 zZ*Xtm7q~0&Go0V_JM=A>YtW}^ViWV+vdbYC*X5`M35GKpU@0eFjPL)m_px~K5uEMp z!?*r~;CylvbeuKqbDI7}K{4th$p(B6dG-eEf%eeb zfjtAhv-e{Ug7};|?89C_JHG9V|H#QuXWRq8TK*$EFVejd7uNMU>?d$1A+bdYH}pzs zUY?}p-@pcPM=ua<;Qropy!c9a-g5+JgZ(U?OyIrG5r<91 zlM3vQaJ_|#2rCvY+eh{Xe(=C%z;9vLBlyie1uv8L;(?vY!0>|^?BBA}NqrZ`+o*7F zg=`e}RxI4xvkz$ zlD-u-x*0YK+Q9vZbUgWQuu;KQ$Oe;7!A1p|V1uTOCMTnvf(?u5g1wu(yT5GP=%ISc z?gW1}d%Ykzrr*sTo4~6GYYS|Y&hf_D^IgkEVV|<>m7K6oC+yQ1pV#k%eJZd|1@@`H zJ{8!f3-;-PeY#+uF4(90_idltHa)go!mdr5Z*kK>$%1hW`@fGZ__+!}NEN+LjH%_pv%i3YreG_;uVTWVw z`4sH%kLm8y2iE5w^Ox=s9JAvw*}6B7M4a&3_~=IW2fq>~-qH14sdW*1K?rDK%M8Hny zhs(TyuwA!u0WT8p{Q{=%$UN_Q!gjgO5Vp(RE$|!^@L>TTBW&kAOW3XheK_WLntcZ5U@fPh;G+j+YL`ZfXY z74U0>?fRT1Z0m3(=0J|mk$I&T|3dZENKQfu~IYAo)BHANdQEPGwV_Fx=Vy0%2o`-T(jq literal 0 HcmV?d00001 diff --git a/benchmarks/opencl/mri-q/libsgemm.a b/benchmarks/opencl/mri-q/libsgemm.a new file mode 100644 index 0000000000000000000000000000000000000000..372b194145ec5f7e3554e3e9c135131e04a0ab5e GIT binary patch literal 15812 zcmeHO4|G)3nZIw|yqS;$p*|86bd3%H44IJRWirI2np7g)np&(3xLxZonaqULKropA zdUP9{3_-wZ_Yt9g+ygFy)lw^cgkfFJF7W`ayWO*P4}W^jdR%%!N_P*ZhgL$O24{cY zd+%m$CLma?J=^WgdGmhvyWjopz2E)rz2Cj>=H`W=_VCKvbLSV059LDC!s2gHQKgS_ ztjJ}Ixfq*tD)U@_kTL5Cu4xOm1|N#Fua2|^?`&JOrZW~HRe+61j#3RYB9+2j?uvB;`5(U#VT z*XQ&4E4-0v?}CUo94Tv?pGorjz12Q%7ZS$%**jn+a{W)*%-#ct)bP;MQbDN9W8CEi)zZs%PNZI zu8hUjblf(7esfD~WoKhqxNX(^*4DMFysKK;+uPdbn|aERSkxG4()>+TjTH;Rk?NWn zEgG)$Rr@0gnj+N|(dtI6Jla$qZuI+Zy`oH#Z;A|8Mtxd&O=D$6b-1y-GE^Q4MJoK7 zPxDpPG}SEdHToLMebt)pR=WY2*P}wGIoWU0XYHhG@s^mIEkSxW@LW6&{1Lqz&r&$gXFzS5j#x9`vzMcktj~j4BM6nYj`M3!!y&S z6Xx{mfUl*(5D0VnBH$SgqB2I9)4vEj)20*V^akJ=lkkH;nA4X4&$Q`;Ii2(|!;K#V z!km6T@JyRdnA0Bso&h_hK$z240ME4PggLzx_*$A-5eReoHsEV>h{_mYPN%pe(=TH= zxszlCW23A+(i$pjS=|x~hT7XhkFem^?y0B@E^heB7wQ^u z)n!!FudT1BXl)BaR7cCBk&m6ZCe+>>j6JdjIm;T#I%4gYocA-U&4ejFtKF<@bz3Y_ zw)mcaw<8t`Kg7ytzy(81O&IviY)o+-osGf22=e(m=&KI;e8K8aFcb<#wX$%S@g%RW znr8RvkT(?aT1iyRSf~*ydt{aMZVJUh6MM!N46Y9b>(L2q#6a09$||h>y_(-%j4Rt5 zOm+j2W5QVDupX&|Z4+cwzrRC%K1|Bj`UiK+q zUKZdOPJ&2ukx!J*^Q-}nDDSi&18}TPM{IeOL_{Dyn!9))dGt^_uffC19RU{0<(POm z24p*tf!asyrTS1B#qY*LyybW(jN*CCMAi>4w-SIb4+q5vh&=iQ0J$OE22vOC?nSvY zolv9-EdSMCETql^Pw4WJ?wA)cKtv9@zJ zeV|It58~c_N%l&PcxjzOF5TdW&+B){^CUT5QYXtL8|3({{jz+k-;ozs}`0*uL)*sJPJMj%+#^AJDa2LG?G(=A;g}|P z)t=aE@#gfU0$xGb=6MD(IL~eYze3pN5k0x9-EdnQ+>GD%k=uB|vC4L#)`Z$at*zMm zH-_Q!jQ4#$x-RfFS9P{_L^S}oU<5u!*aTi%y=Ja|#{BiBkRNI`11({M?WN50e@2Cj z+y@{Yjw#~?Zh?n~V3fTc?4Hp=eyBX3Vfaa?DqCupUU1Inl^*Izqby8tS0?Y?d>}SgA(`lcc%PZ#P#HcHQ*_V(7i@u$80V|4twwih%W_;OPV0>_m0N%!?TQ30w0uj46TnlE_Z^`kY6=$!9Yz&wq3d7Tc~ z^7y`<`1oF!_$UuOw0{2~_Vr%7ekk4ym)qBW9(-4`uipVaalOCnzJ7tdO}BUVSK8Ot zR8%jx+V%e6Emm9jdT;ORFJfQs@XW$K^cL(xi*ClgvjY3hI_x{|#lACyeJA#zvo=Ub zm$0v=J-w{v;f(%C>>nrL>|XKQp)0-}s$!ns-@(t~^}NhCfiW8P+G-+iY(K5$TJe<9 zmF7I*T*XN|pnY?U#uR5Nmv!mg@29n)?;(xd&C_({TsnLx>7YEik~=2b82T$o>+EI5 zLw(FYlujG(|IjR3Ph@zge1E~4sDJXJST8*v^vSGCyDKTF;7=)UjB6G;d}yTRCNsJ6Ab4Q4CfOP&LVvsjVdm%F9ZVwAJVf7s}8$%b>7`Q7lj-NR{P-36xo`V3Q#Ph;xx zYa~Ni?$A#bFijp}#)rKuH63y`Z~v;%y-e1RIVFD(arrB!jl^1qv1LC?H6i}={@b*~ z1z8O!lF?l#Yq`BRBf9BvEwRU>rG`=Vmc{p}Q~tco==r6rDML&j{xg*EE=!$3{%_4W zplm&^L`nM^-6of(@kZa-Kj8O|4EJ%IXM+zt9PW+Xs& z4$J!T8D<-_#0$(AUL@(m({Ub!a@O6*jHju*nUL{5^4!1-r3Z2$WAlEdFMStrx2(nY zGc|xVJEzM=PA}8Dp9mO<6SC3sEcAgoJwf$pM%~ZIhO*9K$R}mvn(Yq#*!xU-8gh=O zQBIQS0mL^CLH~CyR5ukkQ`;K@omxI;RPUMoc&cF0J$j<|tEuky zp4YoiS8I8H>oD^E(V^vk=rHnMb{NWo=%;IBZPN~BR9%O5>r6j+E!qgZnQeR!ZL-=( z_12V5W^CHejJ_SfeP~m!r1d3%UxM$D0RB7V`!U+r3A`KhA4C5UNnN(KPIYY@(H-Zc z@V~t7O7)ze?pKolu;4n76!}NCwnc;dv)=%J#IL;Iu`U#vT zZu}+Fd?;h<9_;0EU~4%{$H!tlAu~PhWV%ykTJm+K76fp{yQemM_;m-)5@mnn-Var! z?>58rvO~?!;&})BTamY$WRNV#%puvXY*{X`ys>f=l=C0YyDS-2er?b5Of6i7IvlAD zKX}M#xDM57y@#20@;K8XgLoff#`K>u?d%z5Ec-b#dXF$;@Hm}0%e*e4{DIe>f1k=0 z=U7CpiAUf^ZFl8j#-hn)>ddI8R32a_0wtjic3Wa`PGI-`D& z8F!sz+Ffs8j${s;XFI}ON{&k7@ysC8m(O8Z^Dy3v?RSs;c1uK0Ee{0rp5*~`@o-vS ziZL^Mp8BUY<<~U5$FCW&VT^IeSU1eHp3{~boC&9I(j1XGdEp#~CCYAF3rM(kCLjUUt+ zfdfpP1ikjomW&q2!o03$t$n5+kOx1&OcT~D@(~pH0!LrMyziX2#uH6ku`S4e@KjPPrP{1sAp#q zZOq6u*GHkv@#1ciM-7~n^v)YGUnH5@>BDyctYx3^**mRms_9FzE@l0`YQ0D{KpsnNLL19{>~{Csw?sp>i@Aiz_;=D{>^^XmGn%Y7rfOm{^uChMtFFG6vgu-+{5P%hdr~SA?bj2Lr~&);M_qJZdF){eA}b1NhBXU8Q*p?$~gKDCS4c(MAMI(Jl1xqODeDH z4nJ)3a=z-AzbWbr`TWPr=3~F`oZolyC#tLdU9pX1qyE9_YSk6^?F4$klD`N4a}(&j zISW-+=ca7BD)`viU43YiX)j}KrLwf6ubOS;eM-KezpvzWU7^0n_^IaD5p84WdKP|G zajAhvrIho~H+ALIl<-K>r7Dxme&F)Lupxa5)&`8%Pcp8w2Gn0DYv62hMD6`=>~H!c zV-eOF)WQprQGb9Lhq0b!)nQ;Njmw*8T#n%T zzspm)!G-VpE`0xY56CWE(PEJ6|9pvbf3agD&gM6 zZ*Xtm7q~0&Go0V_JM=A>YtW}^ViWV+vdbYC*X5`M35GKpU@0eFjPL)m_px~K5uEMp z!?*r~;CylvbeuKqbDI7}K{4th$p(B6dG-eEf%eeb zfjtAhv-e{Ug7};|?89C_JHG9V|H#QuXWRq8TK*$EFVejd7uNMU>?d$1A+bdYH}pzs zUY?}p-@pcPM=ua<;Qropy!c9a-g5+JgZ(U?OyIrG5r<91 zlM3vQaJ_|#2rCvY+eh{Xe(=C%z;9vLBlyie1uv8L;(?vY!0>|^?BBA}NqrZ`+o*7F zg=`e}RxI4xvkz$ zlD-u-x*0YK+Q9vZbUgWQuu;KQ$Oe;7!A1p|V1uTOCMTnvf(?u5g1wu(yT5GP=%ISc z?gW1}d%Ykzrr*sTo4~6GYYS|Y&hf_D^IgkEVV|<>m7K6oC+yQ1pV#k%eJZd|1@@`H zJ{8!f3-;-PeY#+uF4(90_idltHa)go!mdr5Z*kK>$%1hW`@fGZ__+!}NEN+LjH%_pv%i3YreG_;uVTWVw z`4sH%kLm8y2iE5w^Ox=s9JAvw*}6B7M4a&3_~=IW2fq>~-qH14sdW*1K?rDK%M8Hny zhs(TyuwA!u0WT8p{Q{=%$UN_Q!gjgO5Vp(RE$|!^@L>TTBW&kAOW3XheK_WLntcZ5U@fPh;G+j+YL`ZfXY z74U0>?fRT1Z0m3(=0J|mk$I&T|3dZENKQfu~IYAo)BHANdQEPGwV_Fx=Vy0%2o`-T(jq literal 0 HcmV?d00001 diff --git a/benchmarks/opencl/mri-q/macros.h b/benchmarks/opencl/mri-q/macros.h new file mode 100644 index 00000000..501ead7e --- /dev/null +++ b/benchmarks/opencl/mri-q/macros.h @@ -0,0 +1,21 @@ +#ifndef __MACROS__ +#define __MACROS__ + +#define PI 3.1415926535897932384626433832795029f +#define PIx2 6.2831853071795864769252867665590058f + +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define K_ELEMS_PER_GRID 2048 + +#define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256 +#define KERNEL_Q_THREADS_PER_BLOCK 256 +#define KERNEL_Q_K_ELEMS_PER_GRID 1024 + +struct kValues { + float Kx; + float Ky; + float Kz; + float PhiMag; +}; + +#endif diff --git a/benchmarks/opencl/mri-q/main.cc b/benchmarks/opencl/mri-q/main.cc new file mode 100644 index 00000000..9288845f --- /dev/null +++ b/benchmarks/opencl/mri-q/main.cc @@ -0,0 +1,293 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * C code for creating the Q data structure for fast convolution-based + * Hessian multiplication for arbitrary k-space trajectories. + * + * Inputs: + * kx - VECTOR of kx values, same length as ky and kz + * ky - VECTOR of ky values, same length as kx and kz + * kz - VECTOR of kz values, same length as kx and ky + * x - VECTOR of x values, same length as y and z + * y - VECTOR of y values, same length as x and z + * z - VECTOR of z values, same length as x and y + * phi - VECTOR of the Fourier transform of the spatial basis + * function, evaluated at [kx, ky, kz]. Same length as kx, ky, and kz. + * + * recommended g++ options: + * -O3 -lm -ffast-math -funroll-all-loops + */ + +#include +#include +#include +#include + +#include "ocl.h" +#include "file.h" +#include "macros.h" +#include "computeQ.h" + +static void +setupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr,clPrmtr* clPrm) +{ + cl_int clStatus; + *dev_ptr = clCreateBuffer(clPrm->clContext,CL_MEM_READ_ONLY,num*size,NULL,&clStatus); + CHECK_ERROR("clCreateBuffer"); + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL); + CHECK_ERROR("clEnequeueWriteBuffer"); +} + +static void +cleanupMemoryGPU(int num, int size, cl_mem* dev_ptr, float* host_ptr, clPrmtr* clPrm) +{ + cl_int clStatus; + clStatus = clEnqueueReadBuffer(clPrm->clCommandQueue,*dev_ptr,CL_TRUE,0,num*size,host_ptr,0,NULL,NULL); + CHECK_ERROR("clEnqueueReadBuffer") + clStatus = clReleaseMemObject(*dev_ptr); + CHECK_ERROR("clReleaseMemObject") +} + +int +main (int argc, char *argv[]) { + int numX, numK; /* Number of X and K values */ + int original_numK; /* Number of K values in input file */ + float *kx, *ky, *kz; /* K trajectory (3D vectors) */ + float *x, *y, *z; /* X coordinates (3D vectors) */ + float *phiR, *phiI; /* Phi values (complex) */ + float *phiMag; /* Magnitude of Phi */ + float *Qr, *Qi; /* Q signal (complex) */ + + struct kValues* kVals; + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + pb_InitializeTimerSet(&timers); + + /* Read command line */ + params = pb_ReadParameters(&argc, argv); + /*if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL)) + { + fprintf(stderr, "Expecting one input filename\n"); + exit(-1); + }*/ + params->inpFiles = (char **)malloc(sizeof(char *) * 2); + params->inpFiles[0] = (char *)malloc(100); + params->inpFiles[1] = NULL; + strncpy(params->inpFiles[0], "32_32_32_dataset.bin", 100); + + /* Read in data */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + inputData(params->inpFiles[0], + &original_numK, &numX, + &kx, &ky, &kz, + &x, &y, &z, + &phiR, &phiI); + + printf("OK\n"); + + /* Reduce the number of k-space samples if a number is given + * on the command line */ + if (argc < 2) + numK = original_numK; + else + { + int inputK; + char *end; + inputK = strtol(argv[1], &end, 10); + if (end == argv[1]) + { + fprintf(stderr, "Expecting an integer parameter\n"); + exit(-1); + } + + numK = MIN(inputK, original_numK); + } + + printf("%d pixels in output; %d samples in trajectory; using %d samples\n", + numX, original_numK, numK); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + clPrmtr clPrm; + + pb_Context* pb_context; + pb_context = pb_InitOpenCLContext(params); + if (pb_context == NULL) { + fprintf (stderr, "Error: No OpenCL platform/device can be found."); + return -1; + } + + cl_int clStatus; + cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId; + cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId; + clPrm.clContext = (cl_context) pb_context->clContext; + + clPrm.clCommandQueue = clCreateCommandQueue(clPrm.clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus); + CHECK_ERROR("clCreateCommandQueue") + + pb_SetOpenCL(&(clPrm.clContext), &(clPrm.clCommandQueue)); + + printf("OK\n"); + + //const char* clSource[] = {readFile("src/opencl_base/kernels.cl")}; + //cl_program clProgram = clCreateProgramWithSource(clPrm.clContext,1,clSource,NULL,&clStatus); + cl_program clProgram = clCreateProgramWithBuiltInKernels( + clPrm.clContext, 1, &clDevice, "ComputePhiMag_GPU;ComputeQ_GPU", &clStatus); + CHECK_ERROR("clCreateProgramWithSource") + + char options[50]; + sprintf(options,"-I src/opencl_nvidia"); + clStatus = clBuildProgram(clProgram,0,NULL,options,NULL,NULL); + if (clStatus != CL_SUCCESS) { + char buf[4096]; + clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, 4096, buf, NULL); + printf ("%s\n", buf); + CHECK_ERROR("clBuildProgram") + } + + /* Create CPU data structures */ + createDataStructsCPU(numK, numX, &phiMag, &Qr, &Qi); + + /* GPU section 1 (precompute PhiMag) */ + { + clPrm.clKernel = clCreateKernel(clProgram,"ComputePhiMag_GPU",&clStatus); + CHECK_ERROR("clCreateKernel") + + /* Mirror several data structures on the device */ + cl_mem phiR_d; + cl_mem phiI_d; + cl_mem phiMag_d; + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + setupMemoryGPU(numK,sizeof(float),&phiR_d,phiR,&clPrm); + setupMemoryGPU(numK,sizeof(float),&phiI_d,phiI,&clPrm); + phiMag_d = clCreateBuffer(clPrm.clContext,CL_MEM_WRITE_ONLY,numK*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + + computePhiMag_GPU(numK, phiR_d, phiI_d, phiMag_d, &clPrm); + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + cleanupMemoryGPU(numK,sizeof(float),&phiMag_d,phiMag,&clPrm); + + clStatus = clReleaseMemObject(phiR_d); + CHECK_ERROR("clReleaseMemObject") + clStatus = clReleaseMemObject(phiI_d); + CHECK_ERROR("clReleaseMemObject") + } + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + kVals = (struct kValues*)calloc(numK, sizeof (struct kValues)); + + int k; + for (k = 0; k < numK; k++) { + kVals[k].Kx = kx[k]; + kVals[k].Ky = ky[k]; + kVals[k].Kz = kz[k]; + kVals[k].PhiMag = phiMag[k]; + } + + free(phiMag); + + clStatus = clReleaseKernel(clPrm.clKernel); + + /* GPU section 2 */ + { + clPrm.clKernel = clCreateKernel(clProgram,"ComputeQ_GPU",&clStatus); + CHECK_ERROR("clCreateKernel") + + cl_mem x_d; + cl_mem y_d; + cl_mem z_d; + cl_mem Qr_d; + cl_mem Qi_d; + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + setupMemoryGPU(numX,sizeof(float),&x_d,x,&clPrm); + setupMemoryGPU(numX,sizeof(float),&y_d,y,&clPrm); + setupMemoryGPU(numX,sizeof(float),&z_d,z,&clPrm); + + Qr_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + clMemSet(&clPrm,Qr_d,0,numX*sizeof(float)); + Qi_d = clCreateBuffer(clPrm.clContext,CL_MEM_READ_WRITE,numX*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + clMemSet(&clPrm,Qi_d,0,numX*sizeof(float)); + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + + computeQ_GPU(numK, numX, x_d, y_d, z_d, kVals, Qr_d, Qi_d, &clPrm); + + clStatus = clFinish(clPrm.clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + clStatus = clReleaseMemObject(x_d); + CHECK_ERROR("clReleaseMemObject") + clStatus = clReleaseMemObject(y_d); + CHECK_ERROR("clReleaseMemObject") + clStatus = clReleaseMemObject(z_d); + CHECK_ERROR("clReleaseMemObject") + cleanupMemoryGPU(numX,sizeof(float),&Qr_d,Qr,&clPrm); + cleanupMemoryGPU(numX,sizeof(float),&Qi_d,Qi,&clPrm); + } + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + if (params->outFile) + { + /* Write Q to file */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + outputData(params->outFile, Qr, Qi, numX); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + } + + free (kx); + free (ky); + free (kz); + free (x); + free (y); + free (z); + free (phiR); + free (phiI); + free (kVals); + free (Qr); + free (Qi); + + //free((void*)clSource[0]); + + clStatus = clReleaseKernel(clPrm.clKernel); + clStatus = clReleaseProgram(clProgram); + clStatus = clReleaseCommandQueue(clPrm.clCommandQueue); + clStatus = clReleaseContext(clPrm.clContext); + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + pb_PrintTimerSet(&timers); + + pb_FreeParameters(params); + + return 0; +} diff --git a/benchmarks/opencl/mri-q/ocl copy.c b/benchmarks/opencl/mri-q/ocl copy.c new file mode 100644 index 00000000..9ce9a2f5 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl copy.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include "ocl.h" + +char* readFile(const char* fileName) +{ + FILE* fp; + fp = fopen(fileName,"r"); + if(fp == NULL) + { + printf("Error 1!\n"); + exit(1); + } + + fseek(fp,0,SEEK_END); + long size = ftell(fp); + rewind(fp); + + char* buffer = (char*)malloc(sizeof(char)*(size+1)); + if(buffer == NULL) + { + printf("Error 2!\n"); + fclose(fp); + exit(1); + } + + size_t res = fread(buffer,1,size,fp); + if(res != size) + { + printf("Error 3!\n"); + fclose(fp); + exit(1); + } + + buffer[size] = 0; + fclose(fp); + return buffer; +} + +void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size) +{ + cl_int clStatus; + char* temp = (char*)malloc(size); + memset(temp,val,size); + clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + free(temp); +} diff --git a/benchmarks/opencl/mri-q/ocl copy.h b/benchmarks/opencl/mri-q/ocl copy.h new file mode 100644 index 00000000..8840a868 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl copy.h @@ -0,0 +1,21 @@ +#ifndef __OCLH__ +#define __OCLH__ + +typedef struct { + cl_uint major; + cl_uint minor; + cl_uint multiProcessorCount; +} OpenCLDeviceProp; + +void clMemSet(cl_command_queue, cl_mem, int, size_t); +char* readFile(const char*); + +#define CHECK_ERROR(errorMessage) \ + if(clStatus != CL_SUCCESS) \ + { \ + printf("Error: %s!\n",errorMessage); \ + printf("Line: %d\n",__LINE__); \ + exit(1); \ + } + +#endif diff --git a/benchmarks/opencl/mri-q/ocl.c b/benchmarks/opencl/mri-q/ocl.c new file mode 100644 index 00000000..61cd5fe6 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include "ocl.h" +#include + +char* readFile(const char* fileName) +{ + FILE* fp; + fp = fopen(fileName,"r"); + if(fp == NULL) + { + printf("Error 1!\n"); + exit(1); + } + + fseek(fp,0,SEEK_END); + long size = ftell(fp); + rewind(fp); + + char* buffer = (char*)malloc(sizeof(char)*(size+1)); + if(buffer == NULL) + { + printf("Error 2!\n"); + fclose(fp); + exit(1); + } + + size_t res = fread(buffer,1,size,fp); + if(res != size) + { + printf("Error 3!\n"); + fclose(fp); + exit(1); + } + + buffer[size] = 0; + fclose(fp); + return buffer; +} + +void clMemSet(clPrmtr* clPrm, cl_mem buf, int val, size_t size) +{ + cl_int clStatus; + char* temp = (char*)malloc(size); + memset(temp,val,size); + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + free(temp); +} diff --git a/benchmarks/opencl/mri-q/ocl.h b/benchmarks/opencl/mri-q/ocl.h new file mode 100644 index 00000000..04c33cd3 --- /dev/null +++ b/benchmarks/opencl/mri-q/ocl.h @@ -0,0 +1,23 @@ +#ifndef __OCLH__ +#define __OCLH__ + +#include + +typedef struct { + cl_context clContext; + cl_command_queue clCommandQueue; + cl_kernel clKernel; +} clPrmtr; + +void clMemSet(clPrmtr*, cl_mem, int, size_t); +char* readFile(const char*); + +#define CHECK_ERROR(errorMessage) \ + if(clStatus != CL_SUCCESS) \ + { \ + printf("Error: %s!\n",errorMessage); \ + printf("Line: %d\n",__LINE__); \ + exit(1); \ + } + +#endif diff --git a/benchmarks/opencl/mri-q/parboil.h b/benchmarks/opencl/mri-q/parboil.h new file mode 100644 index 00000000..4c9a8b5e --- /dev/null +++ b/benchmarks/opencl/mri-q/parboil.h @@ -0,0 +1,348 @@ +/* + * (c) 2010 The Board of Trustees of the University of Illinois. + */ +#ifndef PARBOIL_HEADER +#define PARBOIL_HEADER + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* A platform as specified by the user on the command line */ +struct pb_PlatformParam { + char *name; /* The platform name. This string is owned. */ + char *version; /* The platform version; may be NULL. + * This string is owned. */ +}; + +/* Create a PlatformParam from the given strings. + * 'name' must not be NULL. 'version' may be NULL. + * If not NULL, the strings should have been allocated by malloc(), + * and they will be owned by the returned object. + */ +struct pb_PlatformParam * +pb_PlatformParam(char *name, char *version); + +void +pb_FreePlatformParam(struct pb_PlatformParam *); + +/* A criterion for how to select a device */ +enum pb_DeviceSelectionCriterion { + pb_Device_INDEX, /* Enumerate the devices and select one + * by its number */ + pb_Device_CPU, /* Select a CPU device */ + pb_Device_GPU, /* Select a GPU device */ + pb_Device_ACCELERATOR, /* Select an accelerator device */ + pb_Device_NAME /* Select a device by name */ +}; + +/* A device as specified by the user on the command line */ +struct pb_DeviceParam { + enum pb_DeviceSelectionCriterion criterion; + union { + int index; /* If criterion == pb_Device_INDEX, + * the index of the device */ + char *name; /* If criterion == pb_Device_NAME, + * the name of the device. + * This string is owned. */ + }; +}; + +struct pb_DeviceParam * +pb_DeviceParam_index(int index); + +struct pb_DeviceParam * +pb_DeviceParam_cpu(void); + +struct pb_DeviceParam * +pb_DeviceParam_gpu(void); + +struct pb_DeviceParam * +pb_DeviceParam_accelerator(void); + +/* Create a by-name device selection criterion. + * The string should have been allocated by malloc(), and it will will be + * owned by the returned object. + */ +struct pb_DeviceParam * +pb_DeviceParam_name(char *name); + +void +pb_FreeDeviceParam(struct pb_DeviceParam *); + +/* Command line parameters for benchmarks */ +struct pb_Parameters { + char *outFile; /* If not NULL, the raw output of the + * computation should be saved to this + * file. The string is owned. */ + char **inpFiles; /* A NULL-terminated array of strings + * holding the input file(s) for the + * computation. The array and strings + * are owned. */ + struct pb_PlatformParam *platform; /* If not NULL, the platform + * specified on the command line. */ + struct pb_DeviceParam *device; /* If not NULL, the device + * specified on the command line. */ +}; + +/* Read command-line parameters. + * + * The argc and argv parameters to main are read, and any parameters + * interpreted by this function are removed from the argument list. + * + * A new instance of struct pb_Parameters is returned. + * If there is an error, then an error message is printed on stderr + * and NULL is returned. + */ +struct pb_Parameters * +pb_ReadParameters(int *_argc, char **argv); + +/* Free an instance of struct pb_Parameters. + */ +void +pb_FreeParameters(struct pb_Parameters *p); + +void +pb_FreeStringArray(char **); + +/* Count the number of input files in a pb_Parameters instance. + */ +int +pb_Parameters_CountInputs(struct pb_Parameters *p); + +/* A time or duration. */ +//#if _POSIX_VERSION >= 200112L +typedef unsigned long long pb_Timestamp; /* time in microseconds */ +//#else +//# error "Timestamps not implemented" +//#endif + +enum pb_TimerState { + pb_Timer_STOPPED, + pb_Timer_RUNNING, +}; + +struct pb_Timer { + enum pb_TimerState state; + pb_Timestamp elapsed; /* Amount of time elapsed so far */ + pb_Timestamp init; /* Beginning of the current time interval, + * if state is RUNNING. End of the last + * recorded time interfal otherwise. */ +}; + +/* Reset a timer. + * Use this to initialize a timer or to clear + * its elapsed time. The reset timer is stopped. + */ +void +pb_ResetTimer(struct pb_Timer *timer); + +/* Start a timer. The timer is set to RUNNING mode and + * time elapsed while the timer is running is added to + * the timer. + * The timer should not already be running. + */ +void +pb_StartTimer(struct pb_Timer *timer); + +/* Stop a timer. + * This stops adding elapsed time to the timer. + * The timer should not already be stopped. + */ +void +pb_StopTimer(struct pb_Timer *timer); + +/* Get the elapsed time in seconds. */ +double +pb_GetElapsedTime(struct pb_Timer *timer); + +/* Execution time is assigned to one of these categories. */ +enum pb_TimerID { + pb_TimerID_NONE = 0, + pb_TimerID_IO, /* Time spent in input/output */ + pb_TimerID_KERNEL, /* Time spent computing on the device, + * recorded asynchronously */ + pb_TimerID_COPY, /* Time spent synchronously moving data + * to/from device and allocating/freeing + * memory on the device */ + pb_TimerID_DRIVER, /* Time spent in the host interacting with the + * driver, primarily for recording the time + * spent queueing asynchronous operations */ + pb_TimerID_COPY_ASYNC, /* Time spent in asynchronous transfers */ + pb_TimerID_COMPUTE, /* Time for all program execution other + * than parsing command line arguments, + * I/O, kernel, and copy */ + pb_TimerID_OVERLAP, /* Time double-counted in asynchronous and + * host activity: automatically filled in, + * not intended for direct usage */ + pb_TimerID_LAST /* Number of timer IDs */ +}; + +/* Dynamic list of asynchronously tracked times between events */ +struct pb_async_time_marker_list { + char *label; // actually just a pointer to a string + enum pb_TimerID timerID; /* The ID to which the interval beginning + * with this marker should be attributed */ + void * marker; + //cudaEvent_t marker; /* The driver event for this marker */ + struct pb_async_time_marker_list *next; +}; + +struct pb_SubTimer { + char *label; + struct pb_Timer timer; + struct pb_SubTimer *next; +}; + +struct pb_SubTimerList { + struct pb_SubTimer *current; + struct pb_SubTimer *subtimer_list; +}; + +/* A set of timers for recording execution times. */ +struct pb_TimerSet { + enum pb_TimerID current; + struct pb_async_time_marker_list* async_markers; + pb_Timestamp async_begin; + pb_Timestamp wall_begin; + struct pb_Timer timers[pb_TimerID_LAST]; + struct pb_SubTimerList *sub_timer_list[pb_TimerID_LAST]; +}; + +/* Reset all timers in the set. */ +void +pb_InitializeTimerSet(struct pb_TimerSet *timers); + +void +pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category); + +/* Select which timer the next interval of time should be accounted + * to. The selected timer is started and other timers are stopped. + * Using pb_TimerID_NONE stops all timers. */ +void +pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer); + +void +pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category); + +/* Print timer values to standard output. */ +void +pb_PrintTimerSet(struct pb_TimerSet *timers); + +/* Release timer resources */ +void +pb_DestroyTimerSet(struct pb_TimerSet * timers); + +void +pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr); + + +typedef struct pb_Device_tag { + char* name; + void* clDevice; + int id; + unsigned int in_use; + unsigned int available; +} pb_Device; + +struct pb_Context_tag; +typedef struct pb_Context_tag pb_Context; + +typedef struct pb_Platform_tag { + char* name; + char* version; + void* clPlatform; + unsigned int in_use; + pb_Context** contexts; + pb_Device** devices; +} pb_Platform; + +struct pb_Context_tag { + void* clPlatformId; + void* clContext; + void* clDeviceId; + pb_Platform* pb_platform; + pb_Device* pb_device; +}; + +// verbosely print out list of platforms and their devices to the console. +pb_Platform** +pb_GetPlatforms(); + +// Choose a platform according to the given platform specification +pb_Platform* +pb_GetPlatform(struct pb_PlatformParam *platform); + +// choose a platform: by name, name & version +pb_Platform* +pb_GetPlatformByName(const char* name); + +pb_Platform* +pb_GetPlatformByNameAndVersion(const char* name, const char* version); + +// Choose a device according to the given device specification +pb_Device* +pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device); + +pb_Device** +pb_GetDevices(pb_Platform* pb_platform); + +// choose a device by name. +pb_Device* +pb_GetDeviceByName(pb_Platform* pb_platform, const char* name); + +pb_Platform* +pb_GetPlatformByEnvVars(); + +pb_Context* +pb_InitOpenCLContext(struct pb_Parameters* parameters); + +void +pb_ReleasePlatforms(); + +void +pb_ReleaseContext(pb_Context* c); + +void +pb_PrintPlatformInfo(pb_Context* c); + +void +perf_init(); + +//#define MEASURE_KERNEL_TIME + +#include + +#ifdef MEASURE_KERNEL_TIME +#define clEnqueueNDRangeKernel(q,k,d,o,dg,db,a,b,c) pb_clEnqueueNDRangeKernel((q), (k), (d), (o), (dg), (db), (a), (b), (c)) +cl_int +pb_clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* work_dim */, + const size_t * /* global_work_offset */, + const size_t * /* global_work_size */, + const size_t * /* local_work_size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */); +#endif + +enum { T_FLOAT, T_DOUBLE, T_SHORT, T_INT, T_UCHAR }; +void pb_sig_float(char*, float*, int); +void pb_sig_double(char*, double*, int); +void pb_sig_short(char*, short*, int); +void pb_sig_int(char*, int*, int); +void pb_sig_uchar(char*, unsigned char*, unsigned int); +void pb_sig_clmem(char*, cl_command_queue, cl_mem, int); + +#ifdef __cplusplus +} +#endif + +#endif //PARBOIL_HEADER + diff --git a/benchmarks/opencl/mri-q/parboil_opencl.c b/benchmarks/opencl/mri-q/parboil_opencl.c new file mode 100644 index 00000000..a4db1680 --- /dev/null +++ b/benchmarks/opencl/mri-q/parboil_opencl.c @@ -0,0 +1,1394 @@ +/* + * (c) 2007 The Board of Trustees of the University of Illinois. + */ + +#include +#include +#include +#include +#include +#include + +#if _POSIX_VERSION >= 200112L +# include +#endif + +//#include "perfmon.h" + +cl_context *clContextPtr; +cl_command_queue *clCommandQueuePtr; + +// #define DISABLE_PARBOIL_TIMER + +/*****************************************************************************/ +/* Timer routines */ + +static int is_async(enum pb_TimerID timer) +{ + return (timer == pb_TimerID_KERNEL) || + (timer == pb_TimerID_COPY_ASYNC); +} + +static int is_blocking(enum pb_TimerID timer) +{ + return (timer == pb_TimerID_COPY) || (timer == pb_TimerID_NONE); +} + +#define INVALID_TIMERID pb_TimerID_LAST + +static int asyncs_outstanding(struct pb_TimerSet* timers) +{ + return (timers->async_markers != NULL) && + (timers->async_markers->timerID != INVALID_TIMERID); +} + +static struct pb_async_time_marker_list * +get_last_async(struct pb_TimerSet* timers) +{ + /* Find the last event recorded thus far */ + struct pb_async_time_marker_list * last_event = timers->async_markers; + if(last_event != NULL && last_event->timerID != INVALID_TIMERID) { + while(last_event->next != NULL && + last_event->next->timerID != INVALID_TIMERID) + last_event = last_event->next; + return last_event; + } else + return NULL; +} + +static void insert_marker(struct pb_TimerSet* tset, enum pb_TimerID timer) +{ + cl_int ciErrNum = CL_SUCCESS; + struct pb_async_time_marker_list ** new_event = &(tset->async_markers); + + while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { + new_event = &((*new_event)->next); + } + + if(*new_event == NULL) { + *new_event = (struct pb_async_time_marker_list *) + malloc(sizeof(struct pb_async_time_marker_list)); + (*new_event)->marker = calloc(1, sizeof(cl_event)); + /* + // I don't think this is needed at all. I believe clEnqueueMarker 'creates' the event +#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) +fprintf(stderr, "Creating Marker [%d]\n", timer); + *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Creating User Event Object!\n"); + } + ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Setting User Event Status!\n"); + } +#endif +*/ + (*new_event)->next = NULL; + } + + /* valid event handle now aquired: insert the event record */ + (*new_event)->label = NULL; + (*new_event)->timerID = timer; + ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Enqueueing Marker!\n"); + } + +} + +static void insert_submarker(struct pb_TimerSet* tset, char *label, enum pb_TimerID timer) +{ + cl_int ciErrNum = CL_SUCCESS; + struct pb_async_time_marker_list ** new_event = &(tset->async_markers); + + while(*new_event != NULL && (*new_event)->timerID != INVALID_TIMERID) { + new_event = &((*new_event)->next); + } + + if(*new_event == NULL) { + *new_event = (struct pb_async_time_marker_list *) + malloc(sizeof(struct pb_async_time_marker_list)); + (*new_event)->marker = calloc(1, sizeof(cl_event)); + /* +#if ( __OPENCL_VERSION__ >= CL_VERSION_1_1 ) +fprintf(stderr, "Creating SubMarker %s[%d]\n", label, timer); + *((cl_event *)((*new_event)->marker)) = clCreateUserEvent(*clContextPtr, &ciErrNum); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Creating User Event Object!\n"); + } + ciErrNum = clSetUserEventStatus(*((cl_event *)((*new_event)->marker)), CL_QUEUED); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Setting User Event Status!\n"); + } +#endif +*/ + (*new_event)->next = NULL; + } + + /* valid event handle now aquired: insert the event record */ + (*new_event)->label = label; + (*new_event)->timerID = timer; + ciErrNum = clEnqueueMarker(*clCommandQueuePtr, (cl_event *)(*new_event)->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Enqueueing Marker!\n"); + } + +} + + +/* Assumes that all recorded events have completed */ +static pb_Timestamp record_async_times(struct pb_TimerSet* tset) +{ + struct pb_async_time_marker_list * next_interval = NULL; + struct pb_async_time_marker_list * last_marker = get_last_async(tset); + pb_Timestamp total_async_time = 0; + enum pb_TimerID timer; + + for(next_interval = tset->async_markers; next_interval != last_marker; + next_interval = next_interval->next) { + cl_ulong command_start=0, command_end=0; + cl_int ciErrNum = CL_SUCCESS; + + ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_start, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error getting first EventProfilingInfo: %d\n", ciErrNum); + } + + ciErrNum = clGetEventProfilingInfo(*((cl_event *)next_interval->next->marker), CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &command_end, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error getting second EventProfilingInfo: %d\n", ciErrNum); + } + + pb_Timestamp interval = (pb_Timestamp) (((double)(command_end - command_start)) / 1e3); + tset->timers[next_interval->timerID].elapsed += interval; + if (next_interval->label != NULL) { + struct pb_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list; + while (subtimer != NULL) { + if ( strcmp(subtimer->label, next_interval->label) == 0) { + subtimer->timer.elapsed += interval; + break; + } + subtimer = subtimer->next; + } + } + total_async_time += interval; + next_interval->timerID = INVALID_TIMERID; + } + + if(next_interval != NULL) + next_interval->timerID = INVALID_TIMERID; + + return total_async_time; +} + +static void +accumulate_time(pb_Timestamp *accum, + pb_Timestamp start, + pb_Timestamp end) +{ +//#if _POSIX_VERSION >= 200112L + *accum += end - start; +//#else +//# error "Timestamps not implemented for this system" +//#endif +} + +//#if _POSIX_VERSION >= 200112L +static pb_Timestamp get_time() +{ + //struct timeval tv; + //gettimeofday(&tv, NULL); + //return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); + return 0; +} +//#else +//# error "no supported time libraries are available on this platform" +//#endif + +void +pb_ResetTimer(struct pb_Timer *timer) +{ +//#ifndef DISABLE_PARBOIL_TIMER + timer->state = pb_Timer_STOPPED; + +//#if _POSIX_VERSION >= 200112L + timer->elapsed = 0; +//#else +//# error "pb_ResetTimer: not implemented for this system" +//#endif +//#endif +} + +void +pb_StartTimer(struct pb_Timer *timer) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + if (timer->state != pb_Timer_STOPPED) { + fputs("Ignoring attempt to start a running timer\n", stderr); + return; + } + + timer->state = pb_Timer_RUNNING; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; + } +#else +# error "pb_StartTimer: not implemented for this system" +#endif +#endif*/ +} + +void +pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + + unsigned int numNotStopped = 0x3; // 11 + if (timer->state != pb_Timer_STOPPED) { + fputs("Warning: Timer was not stopped\n", stderr); + numNotStopped &= 0x1; // Zero out 2^1 + } + if (subtimer->state != pb_Timer_STOPPED) { + fputs("Warning: Subtimer was not stopped\n", stderr); + numNotStopped &= 0x2; // Zero out 2^0 + } + if (numNotStopped == 0x0) { + fputs("Ignoring attempt to start running timer and subtimer\n", stderr); + return; + } + + timer->state = pb_Timer_RUNNING; + subtimer->state = pb_Timer_RUNNING; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + + if (numNotStopped & 0x2) { + timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; + } + + if (numNotStopped & 0x1) { + subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; + } + } +#else +# error "pb_StartTimer: not implemented for this system" +#endif + +#endif*/ +} + +void +pb_StopTimer(struct pb_Timer *timer) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + + pb_Timestamp fini; + + if (timer->state != pb_Timer_RUNNING) { + fputs("Ignoring attempt to stop a stopped timer\n", stderr); + return; + } + + timer->state = pb_Timer_STOPPED; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + fini = tv.tv_sec * 1000000LL + tv.tv_usec; + } +#else +# error "pb_StopTimer: not implemented for this system" +#endif + + accumulate_time(&timer->elapsed, timer->init, fini); + timer->init = fini; + +#endif*/ +} + +void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { +/*#ifndef DISABLE_PARBOIL_TIMER + + pb_Timestamp fini; + + unsigned int numNotRunning = 0x3; // 11 + if (timer->state != pb_Timer_RUNNING) { + fputs("Warning: Timer was not running\n", stderr); + numNotRunning &= 0x1; // Zero out 2^1 + } + if (subtimer->state != pb_Timer_RUNNING) { + fputs("Warning: Subtimer was not running\n", stderr); + numNotRunning &= 0x2; // Zero out 2^0 + } + if (numNotRunning == 0x0) { + fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); + return; + } + + + timer->state = pb_Timer_STOPPED; + subtimer->state = pb_Timer_STOPPED; + +#if _POSIX_VERSION >= 200112L + { + struct timeval tv; + gettimeofday(&tv, NULL); + fini = tv.tv_sec * 1000000LL + tv.tv_usec; + } +#else +# error "pb_StopTimer: not implemented for this system" +#endif + + if (numNotRunning & 0x2) { + accumulate_time(&timer->elapsed, timer->init, fini); + timer->init = fini; + } + + if (numNotRunning & 0x1) { + accumulate_time(&subtimer->elapsed, subtimer->init, fini); + subtimer->init = fini; + } + +#endif*/ +} + +/* Get the elapsed time in seconds. */ +double +pb_GetElapsedTime(struct pb_Timer *timer) +{ + /*double ret; +#ifndef DISABLE_PARBOIL_TIMER + + if (timer->state != pb_Timer_STOPPED) { + fputs("Elapsed time from a running timer is inaccurate\n", stderr); + } + +#if _POSIX_VERSION >= 200112L + ret = timer->elapsed / 1e6; +#else +# error "pb_GetElapsedTime: not implemented for this system" +#endif +#endif + return ret;*/ + return 0; +} + +void +pb_InitializeTimerSet(struct pb_TimerSet *timers) +{ +/*#ifndef DISABLE_PARBOIL_TIMER + int n; + + timers->wall_begin = 0; //get_time(); + timers->current = pb_TimerID_NONE; + + timers->async_markers = NULL; + + for (n = 0; n < pb_TimerID_LAST; n++) { + pb_ResetTimer(&timers->timers[n]); + timers->sub_timer_list[n] = NULL; + } +#endif*/ +} + +void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) { + clContextPtr = ((cl_context *)p_clContextPtr); + clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr); +} + +void +pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { +/*#ifndef DISABLE_PARBOIL_TIMER + + struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc + (sizeof(struct pb_SubTimer)); + + int len = strlen(label); + + subtimer->label = (char *) malloc (sizeof(char)*(len+1)); + sprintf(subtimer->label, "%s\0", label); + + pb_ResetTimer(&subtimer->timer); + subtimer->next = NULL; + + struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; + if (subtimerlist == NULL) { + subtimerlist = (struct pb_SubTimerList *) calloc + (1, sizeof(struct pb_SubTimerList)); + subtimerlist->subtimer_list = subtimer; + timers->sub_timer_list[pb_Category] = subtimerlist; + } else { + // Append to list + struct pb_SubTimer *element = subtimerlist->subtimer_list; + while (element->next != NULL) { + element = element->next; + } + element->next = subtimer; + } + +#endif*/ +} + +void +pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) +{ +#if 0 +#ifndef DISABLE_PARBOIL_TIMER + + /* Stop the currently running timer */ + if (timers->current != pb_TimerID_NONE) { + struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; + struct pb_SubTimer *currSubTimer = (subtimerlist != NULL) ? subtimerlist->current : NULL; + + if (!is_async(timers->current) ) { + if (timers->current != timer) { + if (currSubTimer != NULL) { + pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); + } else { + pb_StopTimer(&timers->timers[timers->current]); + } + } else { + if (currSubTimer != NULL) { + pb_StopTimer(&currSubTimer->timer); + } + } + } else { + insert_marker(timers, timer); + if (!is_async(timer)) { // if switching to async too, keep driver going + pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + + pb_Timestamp currentTime = 0; //get_time(); + + /* The only cases we check for asynchronous task completion is + * when an overlapping CPU operation completes, or the next + * segment blocks on completion of previous async operations */ + if( asyncs_outstanding(timers) && + (!is_async(timers->current) || is_blocking(timer) ) ) { + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + /* CL_COMPLETE if completed */ + + cl_int ciErrNum = CL_SUCCESS; + cl_int async_done = CL_COMPLETE; + + ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Querying EventInfo!\n"); + } + + + if(is_blocking(timer)) { + /* Async operations completed after previous CPU operations: + * overlapped time is the total CPU time since this set of async + * operations were first issued */ + + // timer to switch to is COPY or NONE + if(async_done != CL_COMPLETE) { + accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), + timers->async_begin,currentTime); + } + + /* Wait on async operation completion */ + ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Waiting for Events!\n"); + } + + pb_Timestamp total_async_time = record_async_times(timers); + + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + if(async_done == CL_COMPLETE) { + //fprintf(stderr, "Async_done: total_async_type = %lld\n", total_async_time); + timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; + } + + } else + /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ + // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding + // so something is deeper in stack + if(async_done == CL_COMPLETE ) { + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); + } + } + + /* Start the new timer */ + if (timer != pb_TimerID_NONE) { + if(!is_async(timer)) { + pb_StartTimer(&timers->timers[timer]); + } else { + // toSwitchTo Is Async (KERNEL/COPY_ASYNC) + if (!asyncs_outstanding(timers)) { + /* No asyncs outstanding, insert a fresh async marker */ + + insert_marker(timers, timer); + timers->async_begin = currentTime; + } else if(!is_async(timers->current)) { + /* Previous asyncs still in flight, but a previous SwitchTo + * already marked the end of the most recent async operation, + * so we can rename that marker as the beginning of this async + * operation */ + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + last_event->label = NULL; + last_event->timerID = timer; + } + if (!is_async(timers->current)) { + pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + timers->current = timer; + +#endif +#endif +} + +void +pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) +{ +#if 0 +#ifndef DISABLE_PARBOIL_TIMER + struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; + struct pb_SubTimer *curr = (subtimerlist != NULL) ? subtimerlist->current : NULL; + + if (timers->current != pb_TimerID_NONE) { + if (!is_async(timers->current) ) { + if (timers->current != category) { + if (curr != NULL) { + pb_StopTimerAndSubTimer(&timers->timers[timers->current], &curr->timer); + } else { + pb_StopTimer(&timers->timers[timers->current]); + } + } else { + if (curr != NULL) { + pb_StopTimer(&curr->timer); + } + } + } else { + insert_submarker(timers, label, category); + if (!is_async(category)) { // if switching to async too, keep driver going + pb_StopTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + + pb_Timestamp currentTime = 0; //get_time(); + + /* The only cases we check for asynchronous task completion is + * when an overlapping CPU operation completes, or the next + * segment blocks on completion of previous async operations */ + if( asyncs_outstanding(timers) && + (!is_async(timers->current) || is_blocking(category) ) ) { + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + /* CL_COMPLETE if completed */ + + cl_int ciErrNum = CL_SUCCESS; + cl_int async_done = CL_COMPLETE; + + ciErrNum = clGetEventInfo(*((cl_event *)last_event->marker), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &async_done, NULL); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Querying EventInfo!\n"); + } + + if(is_blocking(category)) { + /* Async operations completed after previous CPU operations: + * overlapped time is the total CPU time since this set of async + * operations were first issued */ + + // timer to switch to is COPY or NONE + // if it hasn't already finished, then just take now and use that as the elapsed time in OVERLAP + // anything happening after now isn't OVERLAP because everything is being stopped to wait for synchronization + // it seems that the extra sync wall time isn't being recorded anywhere + if(async_done != CL_COMPLETE) + accumulate_time(&(timers->timers[pb_TimerID_OVERLAP].elapsed), + timers->async_begin,currentTime); + + /* Wait on async operation completion */ + ciErrNum = clWaitForEvents(1, (cl_event *)last_event->marker); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Waiting for Events!\n"); + } + pb_Timestamp total_async_time = record_async_times(timers); + + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + // If it did finish, then accumulate all the async time that did happen into OVERLAP + // the immediately preceding EventSynchronize theoretically didn't have any effect since it was already completed. + if(async_done == CL_COMPLETE /*cudaSuccess*/) + timers->timers[pb_TimerID_OVERLAP].elapsed += total_async_time; + + } else + /* implies (!is_async(timers->current) && asyncs_outstanding(timers)) */ + // i.e. Current Not Async (not KERNEL/COPY_ASYNC) but there are outstanding + // so something is deeper in stack + if(async_done == CL_COMPLETE /*cudaSuccess*/) { + /* Async operations completed before previous CPU operations: + * overlapped time is the total async time */ + timers->timers[pb_TimerID_OVERLAP].elapsed += record_async_times(timers); + } + // else, this isn't blocking, so just check the next time around + } + + subtimerlist = timers->sub_timer_list[category]; + struct pb_SubTimer *subtimer = NULL; + + if (label != NULL) { + subtimer = subtimerlist->subtimer_list; + while (subtimer != NULL) { + if (strcmp(subtimer->label, label) == 0) { + break; + } else { + subtimer = subtimer->next; + } + } + } + + /* Start the new timer */ + if (category != pb_TimerID_NONE) { + if(!is_async(category)) { + if (subtimerlist != NULL) { + subtimerlist->current = subtimer; + } + + if (category != timers->current && subtimer != NULL) { + pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); + } else if (subtimer != NULL) { + pb_StartTimer(&subtimer->timer); + } else { + pb_StartTimer(&timers->timers[category]); + } + } else { + if (subtimerlist != NULL) { + subtimerlist->current = subtimer; + } + + // toSwitchTo Is Async (KERNEL/COPY_ASYNC) + if (!asyncs_outstanding(timers)) { + /* No asyncs outstanding, insert a fresh async marker */ + insert_submarker(timers, label, category); + timers->async_begin = currentTime; + } else if(!is_async(timers->current)) { + /* Previous asyncs still in flight, but a previous SwitchTo + * already marked the end of the most recent async operation, + * so we can rename that marker as the beginning of this async + * operation */ + + struct pb_async_time_marker_list * last_event = get_last_async(timers); + last_event->timerID = category; + last_event->label = label; + } // else, marker for switchToThis was already inserted + + //toSwitchto is already asynchronous, but if current/prev state is async too, then DRIVER is already running + if (!is_async(timers->current)) { + pb_StartTimer(&timers->timers[pb_TimerID_DRIVER]); + } + } + } + + timers->current = category; +#endif +#endif +} + +void +pb_PrintTimerSet(struct pb_TimerSet *timers) +{ +#if 0 +#ifndef DISABLE_PARBOIL_TIMER + pb_Timestamp wall_end = 0; //get_time(); + + struct pb_Timer *t = timers->timers; + struct pb_SubTimer* sub = NULL; + + int maxSubLength; + + const char *categories[] = { + "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" + }; + + const int maxCategoryLength = 10; + + int i; + for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format + if(pb_GetElapsedTime(&t[i]) != 0) { + + // Print Category Timer + printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); + + if (timers->sub_timer_list[i] != NULL) { + sub = timers->sub_timer_list[i]->subtimer_list; + maxSubLength = 0; + while (sub != NULL) { + // Find longest SubTimer label + if (strlen(sub->label) > maxSubLength) { + maxSubLength = strlen(sub->label); + } + sub = sub->next; + } + + // Fit to Categories + if (maxSubLength <= maxCategoryLength) { + maxSubLength = maxCategoryLength; + } + + sub = timers->sub_timer_list[i]->subtimer_list; + + // Print SubTimers + while (sub != NULL) { + printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); + sub = sub->next; + } + } + } + } + + if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) + printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); + + float walltime = (wall_end - timers->wall_begin)/ 1e6; + printf("Timer Wall Time: %f\n", walltime); + +#endif +#endif +} + +void pb_DestroyTimerSet(struct pb_TimerSet * timers) +{ +#ifndef DISABLE_PARBOIL_TIMER + /* clean up all of the async event markers */ + struct pb_async_time_marker_list* event = timers->async_markers; + while(event != NULL) { + + cl_int ciErrNum = CL_SUCCESS; + ciErrNum = clWaitForEvents(1, (cl_event *)(event)->marker); + if (ciErrNum != CL_SUCCESS) { + //fprintf(stderr, "Error Waiting for Events!\n"); + } + + ciErrNum = clReleaseEvent( *((cl_event *)(event)->marker) ); + if (ciErrNum != CL_SUCCESS) { + fprintf(stderr, "Error Release Events!\n"); + } + + free((event)->marker); + struct pb_async_time_marker_list* next = ((event)->next); + + free(event); + + // (*event) = NULL; + event = next; + } + + int i = 0; + for(i = 0; i < pb_TimerID_LAST; ++i) { + if (timers->sub_timer_list[i] != NULL) { + struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; + struct pb_SubTimer *prev = NULL; + while (subtimer != NULL) { + free(subtimer->label); + prev = subtimer; + subtimer = subtimer->next; + free(prev); + } + free(timers->sub_timer_list[i]); + } + } +#endif +} + +static pb_Platform** ptr = NULL; + +// verbosely print out list of platforms and their devices to the console. +pb_Platform** +pb_GetPlatforms() { + if (ptr == NULL) { + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + if (num_platforms == 0) return NULL; + + ptr = (pb_Platform **) malloc(sizeof(pb_Platform *) * (num_platforms + 1)); + cl_platform_id* ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, ids, NULL); + + unsigned int i; + for (i = 0; i < num_platforms; i++) { + ptr[i] = (pb_Platform *) malloc(sizeof(pb_Platform)); + ptr[i]->clPlatform = ids[i]; + ptr[i]->contexts = NULL; + ptr[i]->in_use = 0; + ptr[i]->devices = NULL; + + size_t sz; + clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, 0, NULL, &sz); + char* name = (char *) malloc(sz + 1); + clGetPlatformInfo(ids[i], CL_PLATFORM_NAME, sz, name, NULL); + name[sz] = '\0'; + ptr[i]->name = name; + + clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, 0, NULL, &sz); + char* version = (char *) malloc(sz + 1); + clGetPlatformInfo(ids[i], CL_PLATFORM_VERSION, sz, version, NULL); + version[sz] = '\0'; + ptr[i]->version = version; + } + ptr[i] = NULL; + + free(ids); + } + + return (pb_Platform**) ptr; +} + +pb_Context* +createContext(pb_Platform* pb_platform, pb_Device* pb_device) { + pb_Context* c = (pb_Context*) malloc(sizeof(pb_Context)); + cl_int clStatus; + cl_context_properties clCps[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)(pb_platform->clPlatform), 0 + }; + c->clContext = + clCreateContext(clCps, 1, (cl_device_id*)&pb_device->clDevice, NULL, NULL, &clStatus); + c->clPlatformId = pb_platform->clPlatform; + c->clDeviceId = pb_device->clDevice; + c->pb_platform = pb_platform; + c->pb_device = pb_device; + pb_platform->in_use = 1; + pb_device->in_use = 1; + unsigned int i = 0; + if (pb_platform->contexts == NULL) { + pb_platform->contexts = (pb_Context**) malloc(2*sizeof(pb_Context*)); + } else { + for (i = 0; pb_platform->contexts[i] != NULL; i++) {}; + pb_platform->contexts = (pb_Context**) realloc(pb_platform->contexts, + (i+1)*sizeof(pb_Context*)); + } + pb_platform->contexts[i+1] = NULL; + pb_platform->contexts[i] = c; + return c; +} + +// choose a platform by name. +pb_Platform* +pb_GetPlatformByName(const char* name) { + pb_Platform** ps = (pb_Platform **) pb_GetPlatforms(); + if (ps == NULL) return NULL; + if (name == NULL) { + return *ps; + } + + while (*ps) { + if (strstr((*ps)->name, name)) break; + ps++; + } + return (pb_Platform*) *ps; +} + +pb_Device** +pb_GetDevices(pb_Platform* pb_platform) { + if (pb_platform->devices == NULL) { + cl_uint num_devs; + cl_device_id* dev_ids; + clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform, + CL_DEVICE_TYPE_ALL, 0, NULL, &num_devs); + if (num_devs == 0) return NULL; + + pb_platform->devices = + (pb_Device **) malloc((num_devs + 1) * sizeof(pb_Device *)); + dev_ids = (cl_device_id *) malloc(sizeof(cl_device_id) * num_devs); + clGetDeviceIDs((cl_platform_id) pb_platform->clPlatform, + CL_DEVICE_TYPE_ALL, num_devs, dev_ids, NULL); + + unsigned int i; + for (i = 0; i < num_devs; i++) { + pb_platform->devices[i] = (pb_Device *) malloc(sizeof(pb_Device)); + + pb_platform->devices[i]->clDevice = dev_ids[i]; + pb_platform->devices[i]->id = i; + + size_t sz; + clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, 0, NULL, &sz); + char* name = (char *) malloc(sz + 1); + clGetDeviceInfo(dev_ids[i], CL_DEVICE_NAME, sz, name, NULL); + name[sz] = '\0'; + pb_platform->devices[i]->name = (char *) name; + + cl_bool available; + clGetDeviceInfo(dev_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL); + pb_platform->devices[i]->available = (int) available; + + pb_platform->devices[i]->in_use = 0; + } + pb_platform->devices[i] = NULL; + } + return (pb_Device **) pb_platform->devices; +} + +// choose a device by name. +static pb_Device* +pb_SelectDeviceByName(pb_Device **ds, const char* name) { + if (ds == NULL) return NULL; + if (name == NULL) return *ds; + while (*ds) { + if (strstr((*ds)->name, name)) break; + ds++; + } + + return *ds; +} + +// choose a device by name and set the device's 'in_use' flag. +pb_Device* +pb_GetDeviceByName(pb_Platform* pb_platform, const char* name) { + pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform); + pb_Device *d = pb_SelectDeviceByName(ds, name); + + if (d) d->in_use = 1; + + return d; +} + +void +pb_ReleasePlatforms() { + if (!ptr) return; + pb_Platform** cur_ptr = ptr; + while (*cur_ptr) { + pb_Platform* pfptr = *cur_ptr++; + if (pfptr->devices) { + pb_Device** dvptr = pfptr->devices; + while (*dvptr) { + pb_Device* d = *dvptr++; + free(d->name); + free(d); + } + free(pfptr->devices); + } + if (pfptr->contexts) { + pb_Context** cptr = pfptr->contexts; + while (*cptr) { + free(*cptr++); + } + free(pfptr->contexts); + } + free(pfptr->name); + free(pfptr); + } + free(ptr); + ptr = NULL; +} + +pb_Platform* +pb_GetPlatformByNameAndVersion(const char* name, const char* version) { + pb_Platform** ps = (pb_Platform **) pb_GetPlatforms(); + if (ps == NULL) return NULL; + if (name == NULL) return *ps; + while (*ps) { + if (strstr((*ps)->name, name) && strstr((*ps)->version, version)) break; + ps++; + } + return (pb_Platform*) *ps; +} + +/* Return a pointer to the device at the specified index, or NULL. + * Used by pb_GetDevice. */ +static pb_Device * +select_device_by_index(pb_Device** ds, int id) +{ + int i = 0; + pb_Device** p = ds; + while (*p && (i < id)) { p++; i++; } + return *p; +} + +/* Return a pointer to the device with the specified type, or NULL. + * Used by pb_GetDevice. */ +static pb_Device * +select_device_by_type(pb_Device** ds, + enum pb_DeviceSelectionCriterion criterion) +{ + cl_device_type sought_type; + + /* Determine the OpenCL device type to search for */ + switch(criterion) { + case pb_Device_CPU: + sought_type = CL_DEVICE_TYPE_CPU; + break; + case pb_Device_GPU: + sought_type = CL_DEVICE_TYPE_GPU; + break; + case pb_Device_ACCELERATOR: + sought_type = CL_DEVICE_TYPE_ACCELERATOR; + break; + default: + fprintf(stderr, "pb_GetDevice: Invalid device type"); + exit(-1); + } + + /* Find the device */ + { + pb_Device** p = ds; + cl_device_type type; + while (*p) { + clGetDeviceInfo(((cl_device_id) ((*p)->clDevice)), CL_DEVICE_TYPE, + sizeof(cl_device_type), &type, NULL); + if (type == sought_type) break; + } + + return *p; + } +} + +pb_Device* +pb_GetDevice(pb_Platform* pb_platform, struct pb_DeviceParam *device) +{ + pb_Device** ds = (pb_Device **) pb_GetDevices(pb_platform); + + // The list of devices must be nonempty + if (ds == NULL || *ds == NULL) { + fprintf(stderr, "Error: No device is found in platform: name = %s, version = %s\n.", pb_platform->name, pb_platform->version); + exit(-1); + } + + pb_Device *selected_device = NULL; + + if (device != NULL) { + /* Use 'device' to select and return a device. + * If unable to select a device, fall + * back on the default selection mechanism. */ + switch(device->criterion) { + case pb_Device_INDEX: + selected_device = select_device_by_index(ds, device->index); + break; + case pb_Device_GPU: + case pb_Device_CPU: + case pb_Device_ACCELERATOR: + selected_device = select_device_by_type(ds, device->criterion); + break; + case pb_Device_NAME: + selected_device = pb_SelectDeviceByName(ds, device->name); + break; + default: + fprintf(stderr, "pb_GetDevice: Invalid argument"); + exit(-1); + } + } + + /* By default or if user-specified selection failed, + * select the first device */ + if (selected_device == NULL) + selected_device = *ds; + + /* Set the in_use flag */ + selected_device->in_use = 1; + + return selected_device; +} + +pb_Device* +pb_GetDeviceByEnvVars(pb_Platform* pb_platform) { + + /* Convert environment variables to a 'pb_DeviceParam' */ + struct pb_DeviceParam *param = NULL; + + char* device_num = getenv("PARBOIL_DEVICE_NUMBER"); + if (device_num && strcmp(device_num, "")) { + int id = atoi(device_num); + param = pb_DeviceParam_index(id); + } + else { + char* device_name = getenv("PARBOIL_DEVICE_NAME"); + if (device_name && strcmp(device_name, "")) { + param = pb_DeviceParam_name(strdup(device_name)); + } + else { + char* device_type = getenv("PARBOIL_DEVICE_TYPE"); + if (device_type && strcmp(device_type, "")) { + if (strcmp(device_type, "CPU") == 0) + param = pb_DeviceParam_cpu(); + else if (strcmp(device_type, "GPU") == 0) + param = pb_DeviceParam_gpu(); + else if (strcmp(device_type, "ACCELERATOR") == 0) + param = pb_DeviceParam_accelerator(); + } + } + } + + /* Get a device */ + pb_Device *d = pb_GetDevice(pb_platform, param); + pb_FreeDeviceParam(param); + + return d; +} + +pb_Platform* +pb_GetPlatformByEnvVars() { + char* name = getenv("PARBOIL_PLATFORM_NAME"); + char* version = getenv("PARBOIL_PLATFORM_VERSION"); + + /* Create a pb_PlatformParam object (or NULL) representing the data from the + * environment variables */ + struct pb_PlatformParam *platform; + + if (name) { + if (version) { + platform = pb_PlatformParam(strdup(name), strdup(version)); + } + else { + platform = pb_PlatformParam(strdup(name), NULL); + } + } + else { + platform = NULL; + } + + /* Convert to a platform */ + pb_Platform *p = pb_GetPlatform(platform); + pb_FreePlatformParam(platform); + + return p; +} + +/* Choose an OpenCL platform based on the given command-line parameters. + * If NULL, use the default OpenCL platform. */ +pb_Platform* +pb_GetPlatform(struct pb_PlatformParam *platform) { + if (platform != NULL) { + /* Try to use command-line parameters to choose platform */ + char *name = platform->name; + char *version = platform->version; + + if (!name) { + fprintf(stderr, "Internal error: NULL pointer"); + exit(-1); + } + + if (version) { + pb_Platform* p = pb_GetPlatformByNameAndVersion(name, version); + if (p) return p; + } + + pb_Platform* p = pb_GetPlatformByName(name); + if (p) return p; + } + + pb_Platform* p = pb_GetPlatformByName(NULL); + if (p == NULL) { + fprintf(stderr, "Error: No OpenCL platform in this system. Exiting."); + exit(-1); + } + return p; +} + +//extern void perf_init(); +//extern void mxpa_scheduler_init(); + +pb_Context* +pb_InitOpenCLContext(struct pb_Parameters* parameters) { +#if 0 + pb_Platform* ps = pb_GetPlatform(parameters->platform); + if (!ps) return NULL; + pb_Device* ds = pb_GetDevice(ps, parameters->device); + if (!ds) return NULL; + + /* HERE INITIALIZE TIMER */ + //perf_init(); + //mxpa_scheduler_init(); + + pb_Context* c = createContext(ps, ds); + pb_PrintPlatformInfo(c); + return c; +#endif + cl_int _err; + cl_platform_id platform_id; + cl_device_id device_id; + cl_context context; + clGetPlatformIDs(1, &platform_id, NULL); + clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL); + context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err); + + pb_Context* c = (pb_Context*)malloc(sizeof(pb_Context)); + c->clContext = context; + c->clDeviceId = device_id; + c->clPlatformId = platform_id; + c->pb_platform = (pb_Platform*)malloc(sizeof(pb_Platform)); + c->pb_device = (pb_Device*)malloc(sizeof(pb_Device)); + c->pb_platform->devices = (pb_Device**)malloc(sizeof(pb_Device*) * 2); + c->pb_platform->devices[0] = c->pb_device; + c->pb_platform->devices[1] = NULL; + c->pb_platform->contexts = (pb_Context**)malloc(sizeof(pb_Context*) * 2); + c->pb_platform->contexts[0] = c; + c->pb_platform->contexts[1] = NULL; + c->pb_platform->in_use = 1; + c->pb_device->in_use = 1; + return c; +} + +void +pb_ReleaseOpenCLContext(pb_Context* c) { + pb_ReleasePlatforms(); +} + +void +pb_PrintPlatformInfo(pb_Context* c) { + /*pb_Platform** ps = pb_GetPlatforms(); + if (!ps) { + fprintf (stderr, "No platform found"); + return; + } + + printf ("********************************************************\n"); + printf ("DETECTED OPENCL PLATFORMS AND DEVICES:\n"); + printf ("--------------------------------------------------------\n"); + + while (*ps) { + printf ("PLATFORM = %s, %s", (*ps)->name, (*ps)->version); + if (c->pb_platform == *ps) printf (" (SELECTED)"); + printf ("\n"); + + pb_Device** ds = (pb_Device **) pb_GetDevices((*ps)); + if (ds == NULL) { + printf (" + (No devices)\n"); + } else { + while (*ds) { + printf (" + %d: %s", (*ds)->id, (*ds)->name); + if (c->pb_device == *ds) printf (" (SELECTED)"); + printf ("\n"); + ds++; + } + } + + ps++; + } + printf ("********************************************************\n");*/ +} + +#ifdef MEASURE_KERNEL_TIME + +#undef clEnqueueNDRangeKernel + +//extern void pin_trace_enable(char*); +//extern void pin_trace_disable(char*); + +cl_int +pb_clEnqueueNDRangeKernel(cl_command_queue q/* command_queue */, + cl_kernel k/* kernel */, + cl_uint d/* work_dim */, + const size_t * o/* global_work_offset */, + const size_t * gws/* global_work_size */, + const size_t * lws/* local_work_size */, + cl_uint n/* num_events_in_wait_list */, + const cl_event * w/* event_wait_list */, + cl_event * e/* event */) { + + char buf[128]; + struct timeval begin, end; + clGetKernelInfo(k, CL_KERNEL_FUNCTION_NAME, 128, buf, NULL); + +#if 0 + int i; + for (i = 0; i < d; i++) { + printf ("%s: %d: %d / %d\n", buf, i, gws[i], (lws == NULL ? 0 : lws[i])); + } +#endif + + clFinish(q); clFlush(q); + //pin_trace_enable(buf); + //gettimeofday(&begin, NULL); + cl_int result = clEnqueueNDRangeKernel(q, k, d, o, gws, lws, n, w, e); + clFinish(q); clFlush(q); + //gettimeofday(&end, NULL); + //pin_trace_disable(buf); + //float t = (float)(end.tv_sec - begin.tv_sec) + (end.tv_usec - begin.tv_usec) / 1000000.0f; + fflush(stdout); + fflush(stderr); + //printf ("PBTIMER: %s: %f\n", buf, t); + return result; +} + +#endif + +void +pb_sig_float(char* c, float* p, int sz) { + int i; + double s = 0.0; + for (i = 0; i < sz; i++) s += p[i] * (float)(i+1); + printf ("[Signature] %s = %lf\n", c, s); +} + +void +pb_sig_double(char* c, double* p, int sz) { + int i; + double s = 0.0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lf\n", c, s); +} + +void +pb_sig_short(char* c, short* p, int sz) { + int i; + long long int s = 0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lld\n", c, s); +} + +void +pb_sig_int(char* c, int* p, int sz) { + int i; + long long int s = 0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lld\n", c, s); +} + +void +pb_sig_uchar(char* c, unsigned char* p, unsigned int sz) { + int i; + unsigned long long int s = 0; + for (i = 0; i < sz; i++) s += p[i]; + printf ("[Signature] %s = %lld\n", c, s); +} + +void pb_sig_clmem(char* s, cl_command_queue command_queue, cl_mem memobj, int ty) { + size_t sz; + if (clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &sz, NULL) != CL_SUCCESS) { + printf ("Something wrong.\n"); + assert(0); + } else { + printf ("size = %d\n", sz); + } + char* hp; // = (char*) malloc(sz); + //posix_memalign((void**)&hp, 64, sz); + hp = (char*)malloc(sz); + + clEnqueueReadBuffer (command_queue, + memobj, + CL_TRUE, + 0, + sz, + hp, + 0, + NULL, + NULL); + + if (ty == T_FLOAT) pb_sig_float(s, (float*)hp, sz/sizeof(float)); + if (ty == T_DOUBLE) pb_sig_double(s, (double*)hp, sz/sizeof(double)); + if (ty == T_INT) pb_sig_int(s, (int*)hp, sz/sizeof(int)); + if (ty == T_SHORT) pb_sig_short(s, (short*)hp, sz/sizeof(short)); + if (ty == T_UCHAR) pb_sig_uchar(s, (unsigned char*)hp, sz/sizeof(char)); + + free(hp); +} + diff --git a/benchmarks/opencl/sad/parboil.c b/benchmarks/opencl/sad/parboil.c deleted file mode 100644 index 54fca9d0..00000000 --- a/benchmarks/opencl/sad/parboil.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include -#include -#include -#include - -#if _POSIX_VERSION >= 200112L -# include -#endif - - -/*****************************************************************************/ -/* Timer routines */ - -static void -accumulate_time(pb_Timestamp *accum, - pb_Timestamp start, - pb_Timestamp end) -{ -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -# error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -# error "no supported time libraries are available on this platform" -#endif - -void -pb_ResetTimer(struct pb_Timer *timer) -{ - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -# error "pb_ResetTimer: not implemented for this system" -#endif -} - -void -pb_StartTimer(struct pb_Timer *timer) -{ - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif -} - -void -pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) -{ - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif - -} - -void -pb_StopTimer(struct pb_Timer *timer) -{ - - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 0b11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } - -} - -/* Get the elapsed time in seconds. */ -double -pb_GetElapsedTime(struct pb_Timer *timer) -{ - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -# error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void -pb_InitializeTimerSet(struct pb_TimerSet *timers) -{ - int n; - - timers->wall_begin = get_time(); - - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; // free first? - } -} - -void -pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc - (sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *) malloc (sizeof(char)*(len+1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = (struct pb_SubTimerList *) malloc - (sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } - -} - -void -pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) -{ - -// switchToSub( NULL, NONE -// switchToSub( NULL, some -// switchToSub( some, some -// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed - - struct pb_Timer *topLevelToStop = NULL; - if (timers->current != category && timers->current != pb_TimerID_NONE) { - // Switching to subtimer in a different category needs to stop the top-level current, different categoried timer. - // NONE shouldn't have a timer associated with it, so exclude from branch - topLevelToStop = &timers->timers[timers->current]; - } - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current; - - if (timers->current != pb_TimerID_NONE) { - if (curr != NULL && topLevelToStop != NULL) { - pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer); - } else if (curr != NULL) { - pb_StopTimer(&curr->timer); - } else { - pb_StopTimer(topLevelToStop); - } - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - if (category != pb_TimerID_NONE) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - // Same category, different non-NULL subtimer - pb_StartTimer(&subtimer->timer); - } else{ - // Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer - pb_StartTimer(&timers->timers[category]); - } - } - - timers->current = category; - -} - -void -pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) -{ - /* Stop the currently running timer */ - /*if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimer *currSubTimer = NULL; - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - - if ( subtimerlist != NULL) { - currSubTimer = timers->sub_timer_list[timers->current]->current; - } - if ( currSubTimer!= NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } - - timers->current = timer; - - if (timer != pb_TimerID_NONE) { - pb_StartTimer(&timers->timers[timer]); - }*/ -} - -void -pb_PrintTimerSet(struct pb_TimerSet *timers) -{ - - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer* sub = NULL; - - int maxSubLength; - - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - }; - - const int maxCategoryLength = 10; - - int i; - for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format - if(pb_GetElapsedTime(&t[i]) != 0) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin)/ 1e6; - printf("Timer Wall Time: %f\n", walltime); - -} - -void pb_DestroyTimerSet(struct pb_TimerSet * timers) -{ - /* clean up all of the async event markers */ - struct pb_async_time_marker_list ** event = &(timers->async_markers); - while( *event != NULL) { - struct pb_async_time_marker_list ** next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for(i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} - - diff --git a/benchmarks/opencl/spmv/parboil.c b/benchmarks/opencl/spmv/parboil.c deleted file mode 100644 index 54fca9d0..00000000 --- a/benchmarks/opencl/spmv/parboil.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include -#include -#include -#include - -#if _POSIX_VERSION >= 200112L -# include -#endif - - -/*****************************************************************************/ -/* Timer routines */ - -static void -accumulate_time(pb_Timestamp *accum, - pb_Timestamp start, - pb_Timestamp end) -{ -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -# error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -# error "no supported time libraries are available on this platform" -#endif - -void -pb_ResetTimer(struct pb_Timer *timer) -{ - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -# error "pb_ResetTimer: not implemented for this system" -#endif -} - -void -pb_StartTimer(struct pb_Timer *timer) -{ - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif -} - -void -pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) -{ - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif - -} - -void -pb_StopTimer(struct pb_Timer *timer) -{ - - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 0b11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } - -} - -/* Get the elapsed time in seconds. */ -double -pb_GetElapsedTime(struct pb_Timer *timer) -{ - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -# error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void -pb_InitializeTimerSet(struct pb_TimerSet *timers) -{ - int n; - - timers->wall_begin = get_time(); - - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; // free first? - } -} - -void -pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc - (sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *) malloc (sizeof(char)*(len+1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = (struct pb_SubTimerList *) malloc - (sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } - -} - -void -pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) -{ - -// switchToSub( NULL, NONE -// switchToSub( NULL, some -// switchToSub( some, some -// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed - - struct pb_Timer *topLevelToStop = NULL; - if (timers->current != category && timers->current != pb_TimerID_NONE) { - // Switching to subtimer in a different category needs to stop the top-level current, different categoried timer. - // NONE shouldn't have a timer associated with it, so exclude from branch - topLevelToStop = &timers->timers[timers->current]; - } - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current; - - if (timers->current != pb_TimerID_NONE) { - if (curr != NULL && topLevelToStop != NULL) { - pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer); - } else if (curr != NULL) { - pb_StopTimer(&curr->timer); - } else { - pb_StopTimer(topLevelToStop); - } - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - if (category != pb_TimerID_NONE) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - // Same category, different non-NULL subtimer - pb_StartTimer(&subtimer->timer); - } else{ - // Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer - pb_StartTimer(&timers->timers[category]); - } - } - - timers->current = category; - -} - -void -pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) -{ - /* Stop the currently running timer */ - /*if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimer *currSubTimer = NULL; - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - - if ( subtimerlist != NULL) { - currSubTimer = timers->sub_timer_list[timers->current]->current; - } - if ( currSubTimer!= NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } - - timers->current = timer; - - if (timer != pb_TimerID_NONE) { - pb_StartTimer(&timers->timers[timer]); - }*/ -} - -void -pb_PrintTimerSet(struct pb_TimerSet *timers) -{ - - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer* sub = NULL; - - int maxSubLength; - - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - }; - - const int maxCategoryLength = 10; - - int i; - for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format - if(pb_GetElapsedTime(&t[i]) != 0) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin)/ 1e6; - printf("Timer Wall Time: %f\n", walltime); - -} - -void pb_DestroyTimerSet(struct pb_TimerSet * timers) -{ - /* clean up all of the async event markers */ - struct pb_async_time_marker_list ** event = &(timers->async_markers); - while( *event != NULL) { - struct pb_async_time_marker_list ** next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for(i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} - - diff --git a/benchmarks/opencl/stencil/parboil.c b/benchmarks/opencl/stencil/parboil.c deleted file mode 100644 index 54fca9d0..00000000 --- a/benchmarks/opencl/stencil/parboil.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * (c) 2007 The Board of Trustees of the University of Illinois. - */ - -#include -#include -#include -#include - -#if _POSIX_VERSION >= 200112L -# include -#endif - - -/*****************************************************************************/ -/* Timer routines */ - -static void -accumulate_time(pb_Timestamp *accum, - pb_Timestamp start, - pb_Timestamp end) -{ -#if _POSIX_VERSION >= 200112L - *accum += end - start; -#else -# error "Timestamps not implemented for this system" -#endif -} - -#if _POSIX_VERSION >= 200112L -static pb_Timestamp get_time() -{ - struct timeval tv; - gettimeofday(&tv, NULL); - return (pb_Timestamp) (tv.tv_sec * 1000000LL + tv.tv_usec); -} -#else -# error "no supported time libraries are available on this platform" -#endif - -void -pb_ResetTimer(struct pb_Timer *timer) -{ - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - timer->elapsed = 0; -#else -# error "pb_ResetTimer: not implemented for this system" -#endif -} - -void -pb_StartTimer(struct pb_Timer *timer) -{ - if (timer->state != pb_Timer_STOPPED) { - fputs("Ignoring attempt to start a running timer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif -} - -void -pb_StartTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) -{ - unsigned int numNotStopped = 0x3; // 11 - if (timer->state != pb_Timer_STOPPED) { - fputs("Warning: Timer was not stopped\n", stderr); - numNotStopped &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_STOPPED) { - fputs("Warning: Subtimer was not stopped\n", stderr); - numNotStopped &= 0x2; // Zero out 2^0 - } - if (numNotStopped == 0x0) { - fputs("Ignoring attempt to start running timer and subtimer\n", stderr); - return; - } - - timer->state = pb_Timer_RUNNING; - subtimer->state = pb_Timer_RUNNING; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - - if (numNotStopped & 0x2) { - timer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - - if (numNotStopped & 0x1) { - subtimer->init = tv.tv_sec * 1000000LL + tv.tv_usec; - } - } -#else -# error "pb_StartTimer: not implemented for this system" -#endif - -} - -void -pb_StopTimer(struct pb_Timer *timer) -{ - - pb_Timestamp fini; - - if (timer->state != pb_Timer_RUNNING) { - fputs("Ignoring attempt to stop a stopped timer\n", stderr); - return; - } - - timer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - -} - -void pb_StopTimerAndSubTimer(struct pb_Timer *timer, struct pb_Timer *subtimer) { - - pb_Timestamp fini; - - unsigned int numNotRunning = 0x3; // 0b11 - if (timer->state != pb_Timer_RUNNING) { - fputs("Warning: Timer was not running\n", stderr); - numNotRunning &= 0x1; // Zero out 2^1 - } - if (subtimer->state != pb_Timer_RUNNING) { - fputs("Warning: Subtimer was not running\n", stderr); - numNotRunning &= 0x2; // Zero out 2^0 - } - if (numNotRunning == 0x0) { - fputs("Ignoring attempt to stop stopped timer and subtimer\n", stderr); - return; - } - - - timer->state = pb_Timer_STOPPED; - subtimer->state = pb_Timer_STOPPED; - -#if _POSIX_VERSION >= 200112L - { - struct timeval tv; - gettimeofday(&tv, NULL); - fini = tv.tv_sec * 1000000LL + tv.tv_usec; - } -#else -# error "pb_StopTimer: not implemented for this system" -#endif - - if (numNotRunning & 0x2) { - accumulate_time(&timer->elapsed, timer->init, fini); - timer->init = fini; - } - - if (numNotRunning & 0x1) { - accumulate_time(&subtimer->elapsed, subtimer->init, fini); - subtimer->init = fini; - } - -} - -/* Get the elapsed time in seconds. */ -double -pb_GetElapsedTime(struct pb_Timer *timer) -{ - double ret; - - if (timer->state != pb_Timer_STOPPED) { - fputs("Elapsed time from a running timer is inaccurate\n", stderr); - } - -#if _POSIX_VERSION >= 200112L - ret = timer->elapsed / 1e6; -#else -# error "pb_GetElapsedTime: not implemented for this system" -#endif - return ret; -} - -void -pb_InitializeTimerSet(struct pb_TimerSet *timers) -{ - int n; - - timers->wall_begin = get_time(); - - timers->current = pb_TimerID_NONE; - - timers->async_markers = NULL; - - - for (n = 0; n < pb_TimerID_LAST; n++) { - pb_ResetTimer(&timers->timers[n]); - timers->sub_timer_list[n] = NULL; // free first? - } -} - -void -pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) { - - struct pb_SubTimer *subtimer = (struct pb_SubTimer *) malloc - (sizeof(struct pb_SubTimer)); - - int len = strlen(label); - - subtimer->label = (char *) malloc (sizeof(char)*(len+1)); - sprintf(subtimer->label, "%s\0", label); - - pb_ResetTimer(&subtimer->timer); - subtimer->next = NULL; - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[pb_Category]; - if (subtimerlist == NULL) { - subtimerlist = (struct pb_SubTimerList *) malloc - (sizeof(struct pb_SubTimerList)); - subtimerlist->subtimer_list = subtimer; - timers->sub_timer_list[pb_Category] = subtimerlist; - } else { - // Append to list - struct pb_SubTimer *element = subtimerlist->subtimer_list; - while (element->next != NULL) { - element = element->next; - } - element->next = subtimer; - } - -} - -void -pb_SwitchToSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID category) -{ - -// switchToSub( NULL, NONE -// switchToSub( NULL, some -// switchToSub( some, some -// switchToSub( some, NONE -- tries to find "some" in NONE's sublist, which won't be printed - - struct pb_Timer *topLevelToStop = NULL; - if (timers->current != category && timers->current != pb_TimerID_NONE) { - // Switching to subtimer in a different category needs to stop the top-level current, different categoried timer. - // NONE shouldn't have a timer associated with it, so exclude from branch - topLevelToStop = &timers->timers[timers->current]; - } - - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - struct pb_SubTimer *curr = (subtimerlist == NULL) ? NULL : subtimerlist->current; - - if (timers->current != pb_TimerID_NONE) { - if (curr != NULL && topLevelToStop != NULL) { - pb_StopTimerAndSubTimer(topLevelToStop, &curr->timer); - } else if (curr != NULL) { - pb_StopTimer(&curr->timer); - } else { - pb_StopTimer(topLevelToStop); - } - } - - subtimerlist = timers->sub_timer_list[category]; - struct pb_SubTimer *subtimer = NULL; - - if (label != NULL) { - subtimer = subtimerlist->subtimer_list; - while (subtimer != NULL) { - if (strcmp(subtimer->label, label) == 0) { - break; - } else { - subtimer = subtimer->next; - } - } - } - - if (category != pb_TimerID_NONE) { - - if (subtimerlist != NULL) { - subtimerlist->current = subtimer; - } - - if (category != timers->current && subtimer != NULL) { - pb_StartTimerAndSubTimer(&timers->timers[category], &subtimer->timer); - } else if (subtimer != NULL) { - // Same category, different non-NULL subtimer - pb_StartTimer(&subtimer->timer); - } else{ - // Different category, but no subtimer (not found or specified as NULL) -- unprefered way of setting topLevel timer - pb_StartTimer(&timers->timers[category]); - } - } - - timers->current = category; - -} - -void -pb_SwitchToTimer(struct pb_TimerSet *timers, enum pb_TimerID timer) -{ - /* Stop the currently running timer */ - /*if (timers->current != pb_TimerID_NONE) { - struct pb_SubTimer *currSubTimer = NULL; - struct pb_SubTimerList *subtimerlist = timers->sub_timer_list[timers->current]; - - if ( subtimerlist != NULL) { - currSubTimer = timers->sub_timer_list[timers->current]->current; - } - if ( currSubTimer!= NULL) { - pb_StopTimerAndSubTimer(&timers->timers[timers->current], &currSubTimer->timer); - } else { - pb_StopTimer(&timers->timers[timers->current]); - } - } - - timers->current = timer; - - if (timer != pb_TimerID_NONE) { - pb_StartTimer(&timers->timers[timer]); - }*/ -} - -void -pb_PrintTimerSet(struct pb_TimerSet *timers) -{ - - pb_Timestamp wall_end = get_time(); - - struct pb_Timer *t = timers->timers; - struct pb_SubTimer* sub = NULL; - - int maxSubLength; - - const char *categories[] = { - "IO", "Kernel", "Copy", "Driver", "Copy Async", "Compute" - }; - - const int maxCategoryLength = 10; - - int i; - for(i = 1; i < pb_TimerID_LAST-1; ++i) { // exclude NONE and OVRELAP from this format - if(pb_GetElapsedTime(&t[i]) != 0) { - - // Print Category Timer - printf("%-*s: %f\n", maxCategoryLength, categories[i-1], pb_GetElapsedTime(&t[i])); - - if (timers->sub_timer_list[i] != NULL) { - sub = timers->sub_timer_list[i]->subtimer_list; - maxSubLength = 0; - while (sub != NULL) { - // Find longest SubTimer label - if (strlen(sub->label) > maxSubLength) { - maxSubLength = strlen(sub->label); - } - sub = sub->next; - } - - // Fit to Categories - if (maxSubLength <= maxCategoryLength) { - maxSubLength = maxCategoryLength; - } - - sub = timers->sub_timer_list[i]->subtimer_list; - - // Print SubTimers - while (sub != NULL) { - printf(" -%-*s: %f\n", maxSubLength, sub->label, pb_GetElapsedTime(&sub->timer)); - sub = sub->next; - } - } - } - } - - if(pb_GetElapsedTime(&t[pb_TimerID_OVERLAP]) != 0) - printf("CPU/Kernel Overlap: %f\n", pb_GetElapsedTime(&t[pb_TimerID_OVERLAP])); - - float walltime = (wall_end - timers->wall_begin)/ 1e6; - printf("Timer Wall Time: %f\n", walltime); - -} - -void pb_DestroyTimerSet(struct pb_TimerSet * timers) -{ - /* clean up all of the async event markers */ - struct pb_async_time_marker_list ** event = &(timers->async_markers); - while( *event != NULL) { - struct pb_async_time_marker_list ** next = &((*event)->next); - free(*event); - (*event) = NULL; - event = next; - } - - int i = 0; - for(i = 0; i < pb_TimerID_LAST; ++i) { - if (timers->sub_timer_list[i] != NULL) { - struct pb_SubTimer *subtimer = timers->sub_timer_list[i]->subtimer_list; - struct pb_SubTimer *prev = NULL; - while (subtimer != NULL) { - free(subtimer->label); - prev = subtimer; - subtimer = subtimer->next; - free(prev); - } - free(timers->sub_timer_list[i]); - } - } -} - -