From f63ef8110caa449a20267da98f30ddb843e08a69 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 22 Feb 2021 00:59:39 -0500 Subject: [PATCH] update opencl kernels --- benchmarks/opencl/guassian/Fan1.dump | 56 +++++++++---------- benchmarks/opencl/guassian/Fan2.dump | 55 +++++++++--------- benchmarks/opencl/guassian/kernel.pocl | Bin 39039 -> 38747 bytes benchmarks/opencl/nearn/NearestNeighbor.dump | 55 +++++++++--------- benchmarks/opencl/nearn/kernel.pocl | Bin 17251 -> 17147 bytes benchmarks/opencl/saxpy/kernel.pocl | Bin 20099 -> 20019 bytes benchmarks/opencl/saxpy/saxpy.dump | 53 +++++++++--------- benchmarks/opencl/sfilter/kernel.pocl | Bin 18090 -> 17930 bytes benchmarks/opencl/sfilter/sfilter.dump | 55 +++++++++--------- benchmarks/opencl/sgemm/kernel.pocl | Bin 17331 -> 17091 bytes benchmarks/opencl/sgemm/sgemm.dump | 51 +++++++++-------- benchmarks/opencl/vecadd/kernel.pocl | Bin 20125 -> 20049 bytes benchmarks/opencl/vecadd/vecadd.dump | 52 ++++++++--------- 13 files changed, 190 insertions(+), 187 deletions(-) diff --git a/benchmarks/opencl/guassian/Fan1.dump b/benchmarks/opencl/guassian/Fan1.dump index c23e12d5..bff54585 100644 --- a/benchmarks/opencl/guassian/Fan1.dump +++ b/benchmarks/opencl/guassian/Fan1.dump @@ -1,5 +1,5 @@ -/tmp/pocl_vortex_kernel-db-03-14-35-2b.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-00-08-9c-7c-dc.elf: file format ELF32-riscv Disassembly of section .init: @@ -774,7 +774,7 @@ Disassembly of section .comment: c: 6e 20 e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -792,25 +792,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1258,13 +1258,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 32 34 - 48: 2d 37 - 4a: 61 2d - 4c: 61 31 - 4e: 2d 63 - 50: 30 2d - 52: 33 31 2e 63 + 46: 31 38 + 48: 2d 63 + 4a: 33 2d 38 61 + 4e: 2d 37 + 50: 35 2d + 52: 31 33 + 54: 2e 63 56: 00 70 58: 61 72 5a: 61 6c diff --git a/benchmarks/opencl/guassian/Fan2.dump b/benchmarks/opencl/guassian/Fan2.dump index 4384c0e4..8c52171f 100644 --- a/benchmarks/opencl/guassian/Fan2.dump +++ b/benchmarks/opencl/guassian/Fan2.dump @@ -1,5 +1,5 @@ -/tmp/pocl_vortex_kernel-6b-12-be-02-10.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-1b-12-83-97-df.elf: file format ELF32-riscv Disassembly of section .init: @@ -989,7 +989,7 @@ Disassembly of section .comment: c: 6e 20 e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -1007,25 +1007,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1473,12 +1473,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 61 34 - 48: 2d 61 - 4a: 61 2d - 4c: 37 62 2d 65 lui tp, 414422 - 50: 61 2d - 52: 31 31 + 46: 34 35 + 48: 2d 30 + 4a: 34 2d + 4c: 64 61 + 4e: 2d 31 + 50: 35 2d + 52: 36 65 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/guassian/kernel.pocl b/benchmarks/opencl/guassian/kernel.pocl index 3b14c4b7a3f4d865c84f2eb281f2b9040191063e..23ce271bbf404d6f8c2d159ee5c7f226a71f3e9a 100644 GIT binary patch delta 2374 zcmb`Ie^8Tk9LJyU28KDiAu~6@7&kQ)jb}f=QMj@8AWX*|26buD>cb7P?w0ouN&KN!qDklyxYA=62xGch9r&L&4~3e|?^PKl^+? zpZELqc{Y5QbNO?QOT0KsmqM&wovmG+qt|G12?J>$G@9&H*@Qu#t0ivNNjwaDnf4V3qQFwRXU!ch zkitvrlNZ??a+x(vDm9yBQiaWKR;H%P(-d}xZMns)NR`^HsdlT}v0OnXo8AUsKYgGu z1%NErv0+QXM-_L8stRR$k<78MadTvy?34a0pvlA`Ro$mQ(RuBcv#Z^~tx&86m&5_?IJ#mX9oUB=nqbPD~mW7}-b+W1EuETP}pp^D-h7tvpo|M71v zgvlliy}X(S07(-yg8&?YnN(s~YXWD$-|Ae8RPf%Cs1^k4ZO`%eSy&BkP2nKbj^_Ve z)m5;E)NSt*SF7g{-E59(7L&90vq6f~sTRzu5M{)g#fE`7hP!d?0<+4ekL?L%l0-rj z&aeAMKin1L>6duRg3Y0PQ7zzUZ%a;#tt&=vzSM1?tz6XrZR41gb)E)w5UWAN$V5nAtopJKiNbgO`b0?IX z!$WO-ud2E(xDPOmpg9&B*>O{EOIR~6-ouPh#ci8|6q3~@r-*2zFXa=@ntcuJi~BC- z&AW{P1_z^nvHB;31LMJ-PBE{g51kr&y4flnp?$kI--G*`QSe|sLsWp8na;q&DjfZK z->85ERY5|Y)qs*iFoYrT&HOs_-Y5(8#F}-g1@j(eZnb5^;Jj96plz`-=Q{a>FTnJj z*%6+$2u;iQ$iFzO{I)-t@bk|Mv*LH|<`N<8?V)+@<705UCQ(^dhA|Fz(wHZ~P0sK` zA@}}h$H?+9Y_=CDPsW$%apSVccugANM4mDLt@Pu5BqS4OmqQ;EW`}(~av*Zjk$_%R zlyhwzyMs*bc79TL-I&K70x*Vk9*cD6fv@a}2e+WewMgJa7+cJ-?Ao?7ifx4(TuXom ze&SN%P1*@3X};WWo?pY%JBpT@1sPc!1$$iEfPpsEeZ?~77{^%ZSjHS9jLmzPv4i6n z!^4c73^V2!$C%+^#@Zu{9U05m%?M+6B8-9m%9wkUvGj=;TQLR3M2}@mJD#ylate$o z!;BS9iLs|AWvpg|vF};N8eEezcI2NK+wcfuiEz4m23QA|xhLl=elpHV-8-W_JIc8q O(?PcuX#a=5(Z2y;$mrMr delta 2577 zcmb`IeM}Q~7{_~s7PMDrp-K^$g$~7m<7f-isK;BYyp&!Ml`;|R3py4_pwlJGBrx7~ zld(e%W6fsf-jijSBo|!NxE1Rri$hWr2b*kAg-tgd3Mveh{d!lRFvB!k|N8ZJ&+mKB z^L;+g?|%A%J=Mx?lC09HVs%*?HfCU{nYwhHN}Hz9q-s+zZKf_uovOvu*=q1GeM1(D zI-(ePhFY^+CtAEw&Z2*E8AZyMNZ8K?lF+_jKa7`CrGdbcaG1p^p&Yp)ZzzEp&b#de zA!P5i=%_@wDPCTj5GOMj;$-m#nWESzGbP5$6mdpbvC(LbPe6>ycvB)3UGORdeMWt} zJr;tp$ezOT5dAp_t|7+p#wZ&y-X*$@8bf{3R-;bo%49O!80x=EvR-JAXh(yzV}YHz zNwT77g-*Pf4X53dqQ;e-Un|+I4?0GQ(9#vtYZF=;IlbP6R``DBqiDklI?T)KRh%|L zpBau2gcWbs@oq`)ZGzK>q%87A(I)69xqip*P#(F;a3N8j$)=Mc#C~3I&6LLBdRZ)t zz>nnWlf-yOuntp+@rw`Y0w1d6mq4=h&B!IElnga$ct*uy1a1Qr5iGLAm;}8Cbj4)5 zu?uJ)FkMLO&2Vd5`Lo)q7tr46p)H%$c9=DsrkASN=c$O|5GWgwJZTdwB`=#^k;}>q z7Q^n{#k-4)CZ_4EV{y#l9faDybMF#%Oz1zm|2`G95A|j@FQZLwxRD{|zG~*{gj&3Jj;Ly1S9TUD9hf4wc0wgrTU{|p!g7Hv zS;dt=|f6Y$i~MKp4v#T>&$;L}8Jz!_Zw2s1y!fcGt>s2wDe@g=!O?DuB%0Hh69 z*PX4?{1u*@J{Z_p_6w8peSrs{1Jb3{EPEBm(Fz!?xdpI^@?g=cl&l~^E3>+)HE3iX z9-h;FQ@U%!pt91(yaIPMyPY`#FP1EPmqyPT58`<2@~ zr8=9!@)G91%N0-a9(V`oSa?()JzZpV9ID z1aYVzt&#_9CG;3bIvXr{u?Pg~cP*O0R8FhpwQ@ZMU2UbQauoo$bX(MKBS-MCN<4Nw zPX%@D|pMC>A$9GOH!h1Iz3WP_0DWhbp|etr@C=|C;yYs7Qp z&qNUWqY&~FLdoy5(z63V04S1vO?x2&RoisxacCxZqPcYrnp>yQ>=VvM^94^dxo$M| zup7+{o@frM=A+pMyU|?hfu>dKMpHbErue_1nLmT(+cEF>jF@vVkjb{IRH2ZpH(BwP~nuZxPw|Jl_W6)f&0Gg{8LeuI$2hEsyXco>zGkONi kQ!Q>Zw~@mwN`CMhG_?n-Ijt2{UhP!R!89oCfB35Z4dUJ<&j0`b diff --git a/benchmarks/opencl/nearn/NearestNeighbor.dump b/benchmarks/opencl/nearn/NearestNeighbor.dump index 2c461925..c63db6b8 100644 --- a/benchmarks/opencl/nearn/NearestNeighbor.dump +++ b/benchmarks/opencl/nearn/NearestNeighbor.dump @@ -1,5 +1,5 @@ -/tmp/pocl_vortex_kernel-7f-1f-8c-52-07.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-c7-de-a7-37-23.elf: file format ELF32-riscv Disassembly of section .init: @@ -987,7 +987,7 @@ Disassembly of section .comment: c: 6e 20 e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -1005,25 +1005,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1530,12 +1530,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 61 62 - 48: 2d 63 - 4a: 37 2d 35 39 lui s10, 234322 - 4e: 2d 32 - 50: 32 2d - 52: 32 35 + 46: 35 35 + 48: 2d 30 + 4a: 39 2d + 4c: 39 61 + 4e: 2d 61 + 50: 36 2d + 52: 38 38 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/nearn/kernel.pocl b/benchmarks/opencl/nearn/kernel.pocl index 97b4afb8a00cbd8f48ff60382a8b429e5f7ecb25..0cd2e947daa911860f0b5b3a005b87768c676bcf 100644 GIT binary patch delta 997 zcmaFd#`wFHae{(|v#Y+dvxmR4lZTI+vxk$1ud}D0v%9C0ua}>{m$$FKM}V)ZhqIIa zMvrU}P7gf>Fc8_iM`X9CW`hI+gOl@td!`}`46HzjBpw45ugHCAzQ4D`a|br~FapIC zc^DYxZC;?I%*nW5^AY2B%#4PUH=Ao~q^21gC7T%g9@Y-Y2V$|%VKG)b7{V25L3i-H7??*@>`i3$u1D>k38jAvxDo2+9U z%3s8GNm!8WkxBhS7yFAmTPM%Ab_qD47@*GR(qb}~B}t}H_~(&UAGJ$=x|JDD_w)RJ zdRYv{wtsecT9Quh#NQo_CjTFC9QdSsc>d2V6GfOl|L9mNAtd_3E=h;YS$A*$WE&fQ z#>ta=Y<}0f1KptZ8E%-eU9REz{T56c{+#%bZ1?E$v}Uk*{Ss$B>-RC}*c<(s+5cn9 zMH!Hh{WtiO0vq2c+a=j>fK2^yLb2uGe~Uw(v|X+UoURvgaPhe8sc=ltVxoV{i4=== z7goDR%H|tv1rrtgK8yQ(5k4!DSZ{aHNAMZY2(uGRKHGpsu+BM%@IFw=9;RBE*XSWo z|Fgv$2R2>V(*L95s=)z+ALm7`0tM3@kPHp@-O)d%LDB}M5oFCjxS5v}CIf8|)bV)e z&wlYohhl$>V~dF0MV`7Z!s13hJNoY&{NmEmXmK{);-bGvbN>&H$%pJL87EEtWA_~# zR%|F?rH>p|WzeviaT+C#Ghnu5gfQz56zqhZykE^qnmy4T|r?0=4OMttVtCy$y zMvrU}&JB7DU{JGpkH~IO%@-033{K7m?wN`(Ft7q8l6VYMydw9d`TpMG&mGv{!w3{p z}Xk?mbWRjX>lx%KlWMY_P zl$w;3VrpiPWMP_YzM0KpDx)D6&?I4&gB^~EEeaAmz8f4Z1Q=L=V%G#1*aHO4iX=XB z;5e`eC;$YB3JeVEHgBHg7h_6+~%u8k&c@08gt{pnU_INi@v|LpR%B#)2lZtw$vigx zjI$Yc?C_611G8vG|Bo$C6}*$5 z0Num(8E(F^U9L&{0UMy;wq~$J{Sy3#0=(ZV+ZAynez5P`@P|v#^vI|AsP30Mrx@7S z|KkL2j=zgUeaGcLt-^m@j9q8=n*_J~?C77tyNO})Tc8n)=^tEL8ZFMoTU-Q70aZg3 zdo=s(0J`<32Ga%|Q=rNuQyv4Jf5xs`fr5Kg4j2Ff^2Z6u0JwS=V^BCgn`Ch1kus2> zp1_8XvIScFA7)Lz$%!XGi&?mXeq44m0V)j_P;B)#X_o(cq}2z=EjK`lwFk<;uq?B) zWSj{M%kSWD6oZH3LF90}eHu9&??S`z*(c<1G=hZVG-x=^;3FK4Gtc4;M+suX@gqDO zjS=Cv6=ygeqeVC#b8zD{sBK_r=8X8Ta^A17abCr=k=S3f^jPhT%LPk)z< z?lN+m7xWpxfNOJ(+-Eta3j&i9Z6*2QxdR(~7=aRsJPZt{Hh0)su`rr#KJ56KnbC0a zdRJ|Y)HGwGWHUp<#6%-Qla$m%3sX~LGn3S`6!WA+6H~*~WYg4S<1}-V%}j2-jEv=z z1Kb}o>P|NCFjQ$Z`TvOHz$fLy^*^^1!r1*kw@lS>5Mh)sirLm5<1qQIhm^MPJLPQ` z|8y%eobKnTe|Fhi!B#L)!SA!U-xuMt0lzzXH6ZHkikMO+2lz-cGEHXk^kmGL?Bltf z(RlJ7&uqr~lMB5JCa>}`n*7Ymk}+$tg7uKMxNtKX>l{S0`5&XCTYX!`IWt(Zk={HDIH= zj2x$e0RtFBZO)PVEXSlEI62W)lFy$zu)&8BD51#1z;JJKhpiP0W83D#j<1;+4JNO5 z)z&aKPBt}8H8V6yOf)nyO*Aq|O)^S0H#IUbOfpJMN=h*`Gf1*9O*Y@mp ze-4ABjbNgJ-)C{ZFT!U<68{McvOO}Xf9PU=Nnvt-#Ib_`-Tgl}7$(2+@MJ8WY~s0| z(Q5J&&up&S9!wyEJNoZT4)8KyQ=4RP<&pB_S}#+s8(mC1&To~0j-I^9%aXBh@*A(^ zKuuG;v$>*CG;y(kH31bao80JS$Q6OmnB?QeIw8J+`Q7HVK5mQ-1`5Q6U{Pjqa+$GF RL1Iy2PEKl0d{Qzfz5v;Rt_A=A diff --git a/benchmarks/opencl/saxpy/saxpy.dump b/benchmarks/opencl/saxpy/saxpy.dump index 0ed173a8..8fab3750 100644 --- a/benchmarks/opencl/saxpy/saxpy.dump +++ b/benchmarks/opencl/saxpy/saxpy.dump @@ -1,5 +1,5 @@ -/tmp/pocl_vortex_kernel-5f-33-e1-2a-a5.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-1b-e7-8a-9c-af.elf: file format ELF32-riscv Disassembly of section .init: @@ -720,7 +720,7 @@ Disassembly of section .comment: c: 6e 20 e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -738,25 +738,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1199,12 +1199,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 36 35 - 48: 2d 66 - 4a: 33 2d 64 35 - 4e: 2d 36 + 46: 35 36 + 48: 2d 63 + 4a: 30 2d + 4c: 66 37 + 4e: 2d 38 50: 34 2d - 52: 31 37 + 52: 36 39 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/sfilter/kernel.pocl b/benchmarks/opencl/sfilter/kernel.pocl index bae293fa09006a8d01ab3e8a452f5b44dcff5d22..e172666627d2cdda41ae9eaa84b625c96aab2c73 100644 GIT binary patch delta 1601 zcmbu=Z)_7~7y$6Qjs0^6p=sR?baCY7g2RRWxo$;WyS2Tw*0yVz(i*#UcU`N-j?77r z;fEGsc&X|=m(84F@$s-vM^k&Ax*XsmJwE!k!_h6Lk5Y0wkc%3ckOj0QDb61 zJ$>%I_j%su_on@75dQE8e5`t_v1Pl-tTmZ*1YxsUtY(wZND$fP^V(sRrr|= z3ST20KBut2@blqSys`iu3RO1t1fowOk!$13jo>}~p#!REw`!&h*R5ce`%10tKd31y zvXOueBPV*&@lDd52c8v+Qr{1lDPE_bypOZVNu3>%w7WVQA8O6xuG z^a6HkFBJQ9PEB=|=<4^aV8gQyKEj|$23h@icGLqlPtl*_t4o2>iQll;nbe5A3W+2x zGHfDhE5DScqq1fRjLKbOcd+Y}@#Wq$ovMlVHXV;8>5lB9KUXl{9w=r5HD1hg>G(QL zAFt<^EsrwJnW*h0OPcP=cZ|z?#phCZx+381_1h*kjXadXW1bVb~vGL{_AGfgw7>pQ+LhIN6=Ajq&}^Fd@zyklxUByOq; zBzA!X^IYD`y>o?&x|>1nD2SX-@Jo@qn3_5Pmr0R+E^>UZfDf&JpnrqlMKg(x0^}eV z0K-z5{gKn$24@QOqTrU8_pW*gJG|)_P?`miK`Q74cs@1t3Ag)xZqt)oG{!~mmFepL z-~t~F3NlIh;nO@@lI|&1<;2v~?)7{d4z9O1N~8!V&@ji}nf6~mT#GJ1_H|wH-ud%P zGXA)~O^?#E4+g{!^$LP*ZNu9SrNErJ!0BThpyh-o)4zE)*S`Z~0V3lhx#xh7K}j&3 zNq&+J5R8MAYa(<-RrR{MukhL$9ufi7%GiJAoH+ov^h?;N zaDc@zCRqxFnlx|2_V8IvNde$K-*TZA&2h7@Ww}v-8?-BM4{PfJPJtWEaiclz#O5{b zp&a)s0{6Rl+)jZT&F4;>&g1s|3pan6cy2bw-7j!=XSweS+}=mH{dwHJ40lBycd;<7 zzvCvd+^jpR?abFs?8<5<_}jo~vt5EV`oFc=jx}xPBidi&X%7qiXSKi82aB%m4;H*1 i))O_bR#yFAe}DVle8;m_A@3#oNWb6fcT=9sU%+2JO_Wgp delta 1795 zcma)-e@q)?7{~7@>mb|~8#=jV5Q{~`;pFHKie?mO56YmW#m%h?wa}}@EJ4GVpwVb6 z1-v-m&@y&Qgk*79)Wk$mzyaF=)M&}Xh;(o$PVJD5ET|dkN;-q@UFog-v4sB7Joh}$ z`##U}eLlH+zXSfP6YiE|6&rK(T2ryXRBSv{c1WiI|3;0b+)!bx)Zt~Nx^itrvEEpw zA1FH}PE3+P-0!yc6LHt0Pc`g=AdPnXhEfbcJ3xRyu#EEKSL+PnL1R*tOAiBIIs!p& zdTX-9@vz$aTF#09#ynPec9u$RSIQlExiXtASEjVd6b?dWS1DzRTtem`h?kUk7?H2E ztGokp0}SiD8;X=T_`D}Y^@_7kf($LHgHqusEb#n2b)(>iWRWnaaQ=oD&LBgSd1}IR znhKEvsV%cdhABE(fQww|KF3e$Mq$Sh$u%5TpG^0Z9Pl?3eBfh7DQ3O}nNNNe=@0uw zxDiLm+52tD|Kaiwxgr_I0-(@x6BcNhR})zY7;=enhREL)8^MI}yHcNXeQ*^8Wze1(RcXi&bD5%<2~*P5FgcPoj@<@5m9OP=K~ERT?w>POpH8ouSd|LDfvVzb{S7Ol z6m8@>=%AQAkzSjAQ4^&*F{Ei#+WS=`?fNK%E^ujj{i|Ve4=58Dq?kF*_3h7Law>Hk zTLeiz!ics%Or8LR7cWx`10Jnm@>B4*G)6HC6Q+u;FgcwHBpjnGMV=~k+P=&S65l`g zZeN%jVAHNp^c0M^sgOUh+cT|A{rGM^4BI@q!VGW9b9Z2P(jz!Hnu%H_O%;YPNzt|< zH!C391Vvlo5N#)G*o%N=_1rwT-%0mOny#%%fs(^#xRnK^nR~mjz=t=1Ywqi4af)`Z zXTAfc8P2-FS}#1FZ*Ty^+|TbZC~ZCq`D$mu>5J(1+;R6U_NrSEtro_>B5cA8ApIbi z!YR^kSu16(19jX$6^iC8peMyxCrzi{W{sXWMDPdOY+ZXo<>7UZbOvN|o`26`0b&h= zhWGx;Z!biSD2v1}DzjVxWzPczL*Nz*r-{A~+~}lhUHo2y(qmr~h-{L|wUo-glc&Wt z^^9J$EL9UOI1xi$2PzR1yh~;=E@30=!;8TCB`8j`v2oR(z-omPwFgwJ&kBrtz-WYD z{tIt?H-EY&f5xad+`7)sTjthZa)P!>p5WRF`FEN47%pew7J`)o*>Rt&6~=^0=#A)& zl)}FRioFr(s+sNTJ$UycB~Uw(QVHfmYq(iEK4;Y({{voy_d);w diff --git a/benchmarks/opencl/sfilter/sfilter.dump b/benchmarks/opencl/sfilter/sfilter.dump index 244b713a..163eb04a 100644 --- a/benchmarks/opencl/sfilter/sfilter.dump +++ b/benchmarks/opencl/sfilter/sfilter.dump @@ -1,5 +1,5 @@ -/tmp/pocl_vortex_kernel-9f-b2-9e-1e-b6.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-a9-15-33-2e-25.elf: file format ELF32-riscv Disassembly of section .init: @@ -1076,7 +1076,7 @@ Disassembly of section .comment: c: 6e 20 e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -1094,25 +1094,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1558,12 +1558,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 37 35 2d 38 lui a0, 230099 - 4a: 62 2d - 4c: 32 61 - 4e: 2d 35 - 50: 34 2d - 52: 35 38 + 46: 30 63 + 48: 2d 32 + 4a: 61 2d + 4c: 64 30 + 4e: 2d 33 + 50: 39 2d + 52: 36 61 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/sgemm/kernel.pocl b/benchmarks/opencl/sgemm/kernel.pocl index 7944ce9809ce587b5ae2f0f2e8bb321750e978c1..689300e22d9f0b7e0ce1e1101cc07e78691359c3 100644 GIT binary patch delta 1778 zcmdno&Um<$ae{(|zmvX?qlc@9x1+0%XMnGpzlXE4hog(1m!qq@lc$@blZU6XyO*2) zMvr(AE*U)rFtF0yyip`bRBMj}1A~+EfqSMR3=FJ5nIs+q6|cyBX}-U=#B&EW_%H&+ z6nPjJjsq4$cJ(QtCGk+w!^nz2!`nW14~qLHCVN@}8osj0D zwg@v9+_rn9-}hNS-01GMuM!~Bk|y6Z%d219*`UE>kg}sd{-fr}bHC@OaIwzeAmMOB~hYKhE#4;7G_;Sd={j$*c>x z@;h0K@Mzm8!O@|_;M{iFy~gX4=E<^IaA*DEIQO8FgSl|l{(?pGGnR3Do)2^4M7EhV zN0zHh_O*xtM&}BPV#fM4!06oejeUxb@rzS@H`e4H=4^JgF7mL=Kps& z;>aM%EEqWvo(>*?V#|F3+hjj8nR;L(8#Ww-$Nxp1U2l~UQFk`v|09kApOg>R15@8` zV9L5A>(MIwv(Y%g?hzzG*$D}_GPLnb2Yba%HL~}iev=AgxB#2Ki$ureKdr)lL8%g) zJRzQ#^wz>d`A0|p6>xgwv*C5tg_>2uTs*nVUcUaPzl#iufxtvTQ?No%g1)3%fn?qb zP{8fCIOiO5qJ%T;14jESn`v^X>hb3HPUZa;r zJAtN~DjqO+1JwU)633ZjU|LpwZh(}!fte}Eme+s>>Ts9I{En)V8*Jhfo75D#7B>*ucL>fhp(TLw~w!vw~KFprHp5*V@#w;jq#OY`Jqr>0RPE}I)Boj~bJ7tjELWzG$9zq3- zX_E!4r5vi>DThrmxRUf6sLot~-QPu`T z$S6OTy2I5BJyHY7$fEV&IX~yXWqAfOlSpfxz;g-0HtN)4B})1NGK}HOpsC zIO+aJU5SrLqT_|sGIb@MBbomWb=EDCEV=D|<}pLtLHC*`LMI*|EL+C$c|OdniEJ}# zjwC5Stdakyd2+eR?aY5i1sk5ax5dYcn0iW%z<0u#=$B@BlHfNA7Mhpz4aM;r$}DIc!?+2KF?%%Onpx8O+KYXcRZ z4-}t&P(sA-2{$nE4>>XLe*Mw?*yc#94>ZC`_^i;q2N${kN8a!L?fQ&6-$pj`Q`FdGq2^&NTUvL{J_wSJf zf1%>#+S@)~_4^of?2Z0R@Bgs{lqT;s!V^3=#XeJJ3}i_-QScM!Pmt+bH4Yd&0GXa- z+P0r#5=)FDvf;4o14-@M9xL=dP!_cd`VG?g(S&E~4~Qw3c+X7MGLta^842};+ZB*n zh`#L)ka7~TdsSzEwAJ@V_%K8A4KT4wUiu96!VI7nG!=Qa-T*pJynzj8rdjeFsNh-) z!{H6U$eZD1dsc%6lR?Uk0-y*ahqNpd1Iccj%?Zvqz^npM$uPOa#+^}na)O1# z<|j7(kR&w?o}?6!lhp0g@Fca79!W|KElDjUAxTLSOj6s>lGI9gk}@Kgq}ouD)Kk1k zN{o~wHNzW`q#`dOCn;HClhiV3lDgrAD@naXN>Y+2Ny^!w!6^Wiq>@mQ)aRL~Novz% zXp%CVg^{G5IE1hkR5dVP-)!sX#^_+6Ky0m8lv$iyW^7cDSd^HPlbREslngB37yxLV BOeO#T diff --git a/benchmarks/opencl/sgemm/sgemm.dump b/benchmarks/opencl/sgemm/sgemm.dump index caa5cd1c..7c1fe6f4 100644 --- a/benchmarks/opencl/sgemm/sgemm.dump +++ b/benchmarks/opencl/sgemm/sgemm.dump @@ -1,5 +1,5 @@ -/tmp/pocl_vortex_kernel-c0-b8-f5-98-dd.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-31-f9-9a-78-75.elf: file format ELF32-riscv Disassembly of section .init: @@ -887,7 +887,7 @@ Disassembly of section .comment: c: 6e 20 e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -905,25 +905,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1370,12 +1370,11 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 63 34 2d 66 + 46: 63 65 2d 36 bltu s10, sp, 874 4a: 30 2d - 4c: 35 34 - 4e: 2d 38 - 50: 30 2d - 52: 32 36 + 4c: 37 34 2d 32 lui s0, 205523 + 50: 62 2d + 52: 38 35 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/vecadd/kernel.pocl b/benchmarks/opencl/vecadd/kernel.pocl index 9fd25c4f81c2b3927b966b36cdc835c536345fe5..a2aef8b285a8372a77b7736049e86de2388d3bf2 100644 GIT binary patch delta 441 zcmbO`m+|5p#t8}<9&Y*pZqDxBo}SJ=z5(7&?p`j=eqN5AF5d3W&i?K`0Zx7au70kb z8$ER8IPd5)fPw7hI=TOHOcw+u*V{_+#d8NX_%H$`6nPjJE^l6D%gw?R_=85$-g8X20Tq$XOJni`v#q^6~qCncJg8m1;OfiB zSU1_u{W+u2WHk?6J+@1Kx|JDD_w)RJdf9CgLm*4SiGr(kN$T7Jt_*EF)88tmZwP5& z;A!@cIZ?uxI{Bf8ROpWW8*ZQV`xtcWjsDE=pABcv^uOQm#igau;%vOdMSm0Lx5{dZ zAO`+C!8zfeuprwbllq4)_7{BwpV0BGg?gM z@=9j>GCAJIaPmekUq+_MoZc3Ug_E7U*8}A~dBfyp`hes<`na+_h-+YayxH1Uky=?%u9$o{m1g?mhwTPR`!0o~}-=9{Z1PPfaIWCyBzq-?HW zE10O@_gUQUi||>XEu9(=)9i|vQYJTgNO9UFNw7QX0##1_;Nit@KskoVH@z$vD<%thuLtTn e: 31 30 10: 2e 30 - 12: 2e 30 + 12: 2e 31 14: 20 28 16: 68 74 18: 74 70 @@ -743,25 +743,25 @@ Disassembly of section .comment: 36: 6a 65 38: 63 74 2e 67 bgeu t3, s2, 1640 3c: 69 74 - 3e: 20 37 - 40: 33 63 35 33 - 44: 65 36 - 46: 31 32 - 48: 61 61 - 4a: 31 32 - 4c: 35 61 - 4e: 32 34 - 50: 65 62 - 52: 32 63 - 54: 37 35 32 34 lui a0, 213795 - 58: 31 62 - 5a: 32 65 - 5c: 62 62 - 5e: 64 35 - 60: 36 30 - 62: 62 38 - 64: 35 63 - 66: 37 29 00 47 lui s2, 290818 + 3e: 20 65 + 40: 66 33 + 42: 32 63 + 44: 36 31 + 46: 31 61 + 48: 61 32 + 4a: 31 34 + 4c: 64 65 + 4e: 61 38 + 50: 35 35 + 52: 33 36 34 65 + 56: 66 64 + 58: 37 62 61 34 lui tp, 214550 + 5c: 35 31 + 5e: 65 63 + 60: 35 65 + 62: 63 33 66 37 + 66: 34 29 + 68: 00 47 6a: 43 43 3a 20 fmadd.s ft6, fs4, ft3, ft4, rmm 6e: 28 47 70: 4e 55 @@ -1207,12 +1207,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 65 39 - 48: 2d 38 - 4a: 63 2d 32 66 + 46: 65 31 + 48: 2d 37 + 4a: 63 2d 39 39 4e: 2d 38 - 50: 30 2d - 52: 64 35 + 50: 66 2d + 52: 32 31 54: 2e 63 56: 00 70 58: 61 72