From 676a13f30d946ad109c08c3ab3d789cf8f8c398a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 16 Mar 2021 09:25:57 -0400 Subject: [PATCH] tex refactoring and bug fixes --- ci/blackbox.sh | 20 ++--- driver/opae/vlsim/Makefile | 1 + driver/rtlsim/Makefile | 1 + driver/tests/tex_demo/demo | Bin 48696 -> 48672 bytes driver/tests/tex_demo/demo.cpp | 1 - hw/rtl/VX_alu_unit.v | 4 +- hw/rtl/VX_config.vh | 4 + hw/rtl/VX_csr_data.v | 11 ++- hw/rtl/VX_csr_unit.v | 7 +- hw/rtl/VX_decode.v | 6 +- hw/rtl/VX_define.vh | 2 - hw/rtl/VX_execute.v | 11 ++- hw/rtl/VX_gpu_unit.v | 124 ++++++++++++++++++++-------- hw/rtl/VX_issue.v | 2 +- hw/rtl/VX_platform.vh | 6 +- hw/rtl/VX_print_instr.vh | 1 + hw/rtl/interfaces/VX_tex_req_if.v | 24 +++--- hw/rtl/interfaces/VX_tex_rsp_if.v | 15 +++- hw/rtl/tex_unit/VX_tex_unit.v | 132 ++++++++++++++++-------------- hw/scripts/scope.json | 8 +- hw/simulate/Makefile | 1 + 21 files changed, 227 insertions(+), 154 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 8a7d3657..d9d56b7e 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -120,20 +120,12 @@ case $DRIVER in ;; esac -case $APP in - basic) - APP_PATH=$VORTEX_HOME/driver/tests/basic - ;; - demo) - APP_PATH=$VORTEX_HOME/driver/tests/demo - ;; - dogfood) - APP_PATH=$VORTEX_HOME/driver/tests/dogfood - ;; - *) - APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP - ;; -esac +if [ -d "$VORTEX_HOME/driver/tests/$APP" ]; +then + APP_PATH=$VORTEX_HOME/driver/tests/$APP +else + APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP +fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG" diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 47611fe5..932dac05 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index c665ffcb..06d5a93f 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -16,6 +16,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO diff --git a/driver/tests/tex_demo/demo b/driver/tests/tex_demo/demo index 922905c550b4e28bf17d96db10408ae0bb1461bc..69c2c6e82b9d04b09ed18fd211a977351fb32995 100755 GIT binary patch delta 12220 zcmZ`<30zdw{=erA%YZlxn+)5WVP`-^5yTAybzl(O07Xl~1r-b2Ld{Y}MGYJ&=~$^@ zY5G#jHO$n^?PX|LPg~4=~6a4}QYLBVB6=G$eiM=h}3LI`;q_P6h?Iq6~NB-T` z<`{FYt?f`7F2~W^ZEY*8jTtLNx1c9kqSzi3!#ar5K}Mz#H-ZKRzu#Ulb34K$IYvMf z&@p0gu#vqas)J+M0?`!QGx;zSH@sT}eb$Dza?n*+?H=(-;946jx4Is-+Mg$`eMynH z863wX5f+kWssX3&U23J8RQnPl8jeE!Nk7HnKJi#cVz*Lo4>Cnjt7F@rfGl}Jk)u|T zBSMjTT9qpo$3hbQN0OZElq>mD)Q3iDC#nEX!8;_{MiW5e1pIjmIxGw7<}Z+HI@&D@ z6n#Z1e7CPFa})kRzS@w3Ly&EAR6uA&t7S2HXZJi9Zj$W77yIux+F+@-Tk4NolpB_e z1r8DQR5jIwA%|Z>iM8RV4UmTStqor|IztT@R=ej-g9-Ukm?sF;m=UZB7Lwp=s-Rs_ zs-k{XQFq!=o?9J@RRlD>eqWPK#atjvV-_P#qrtMNCKufYmJ!e%vXeM=&p!dQHk=`~ z+kdCzEU%ltTuIJyB{}O=eXD(3$*}_ExP%2#lT!d~R8|{R+%jl&Tn0on+sbFrzScA!=+J>T$934pan_R2H&`y(Js*SO2FoQ%r7GSp z6scSbL9kAGK_eo$cT>r%-QrMl`ZZ0%rrmR62l%Bj=c+Q36*ac3+n8~TPvSgS=mlC(_LzAsA`6~)pT&Fv3=pVN~p$+Ygk2OrJD2{ zR|VnsPa$r<{|;vBk_v34VA`&Y&l72FR5zDeqCa_J3nER;_Cd`M&P9r>jUkjNXBm}4 zM}uWu-TZaRdaP@((MhvRHGK|l?^V{L;a99jJW5Vok7baeA{Hv{L-^!KaJyJ^-^FNR z+C4u}qC?Rt7|UU6iOffs{5_j<+tUL~cl{U7>ng>K`|&WaCimGMcqE`Yj;@ z96N4h#~bu@3Y%R$iAZmwHm!}etWSdxYG*|&RZ%I1&MGp4>8$&sYQ5P#KSCyySSW(M zaJ9j*xo-YuCDofzhK(8TD0;IdV4c)V2U0-WQ5R}JLC%2cl$_dH9iL(z5uIvXQss__ zC}%`OHV$QWW%e%-opMG*^J+v-sg^Yq&?Aac3TSbVJD^cu-VcZh-){4~3%ENX$Ilee z*1Gvym58<~5%pH}3J+IA{eW_Ojg>zH+ii~iP)keHqNJ|Xp-}@m+qz^rrqv}PiUNwW zD@^IVF&py1~+Gx3`7QGiVHCT4n&EKt9-K|)CNmW+jvg(k@=*y@C+aV}ub7-7aUsizw z$m%P=s8Jsaa7Vp{1Se6oHfGFK!J!16=?1U553UANiPCBxbgRPJ%};qwsqu8wVB*Bz zhg%yii?znc*q=zlYSPdc@+65jX8e-^wR?_Ml(jm}i}S`eL_Lqmo(gvn6ZV6(?yx~U zlU@YRKK!EHa;f1=L#re4zd$SmvD^`Qli@j2Rp;EY+z|y0U*rzYEiGz{uk|xY!>x^- zmZC4oZMa*|@OzQ@vbFA-)_UZw#xbPjWmB=It)ru@?GCExuOuuX!Kv9pNBsDT3cH)dVamF?Ew1yxcS^cufYv25-VXq-v@e<(T_&o2mL(s z1JO65pNswg`pxLiGbvE=#q-b|Fw)RRLeWEFVM3;?{Xy(c=qx9FFRmmcuyrCJF_e8K z%!!=6FAC7^6;sf@CK}MbCAOe_PntS!jnbnUaK~1(%jx4+5Q>B1!wit_?vVxvL-hr6segr(EEOkt8+% zS@&t8=q2AI(CzGS&1=k3w1pV@EsT&P@ANLv*1is;)_X`SXkA_$*u&nWc>U|dl5VlK zeYm5yKw=*$e329ecwpLl#C-U863kEB6M-^F?~%{KGt%$_M%QU_ElCBD8Sk-M@D{@~ zs3{-%Q}n4b;{!WjCJNjn*NlEcqy+2g{KV;^Yr4T((K01L3{B6qU52wepqs6S#8$V| z7R6i$nOhj8$Mj)LedwC@;c!n15=eyi;!1x^U%=EA*R-A#N+B!^(<_SOCFw0PAR{+g z{HdXIa6rs^H5(`bGLpRSz(sG%OCl#D)_l?gu-5Vxpy6|%N1J~D;szt0*tJ&D6nFzq zlG=gah~*hw%(qcCKKW0FQjy8P0jr<0HEgV+X~i7`LNq5wiz5Onx{3s}rM z#OS~%Nxr{=5&LB_7?CV20mlQLJoN3NBuRv4_Sde4_w75f3^6e?HsT@ZCWWgB5(K;@S9nTeRf}ZEfEPaBk3l6yKb=T1FJ|*_ zLZ~q>WqBb8@g3+^V}6?L2GaK=7;4OoID@|Lk;$3_?-}Z&M7x@q{Y2lMF%eVY2ZcKm zw_bh5{cu~@RWYw;EY7fXJ#*A>KVeS^M^9(C=U62q$Y>-y(dXU|_iOg88T$z{-DxKE zez@PVbdl4`7%%{pL-T65SJ*-^t(SwXX&Kr30JDW5Wu6|iYOLpOR2%;nHXh^o*i$m^ zjzoFzyLe~Uax3_HUT8pZa2?tl-Wz%sB=@Hyj3s{xs3a%<7LCp$IXDrWHn|U~NYW=y zG@&ykpT~kh>~S!6@&e?0VDfQ2mfwS=+z!VSN-brw*pQWK%f}Ng8z&z(Nm4338L{y) z?SU@kh=PrgpN~Oz7tU(nnNJ@aCsNl(XzCGpGo1y&F1EU5r8FNbFze3(A;P+SgAB13 z<>~O$1R(22=a-LA9{?tu=BNskx$Uqlb;?6j zXI*m-$oa$6P1oFi${D;d7rczLcAqe1s^rn94EqJ(Sjq4=?BU?X#h7Xrj)Z$;g`m~J zbi30iIFIa;qHEvK2rp9bCYf$m7Y}y7TC{mU88(UueUl2_$3f;{If8a9n3=!BTaST9 zk%=B{AT|G73-3Qqm81_a8&O_5U3v}5^~6ycDXwzj6IdCKz2dXJ9c|BInc3t%@NsZ= z)FM38mx9n!YNZ~xkP|i`)1=P!dSEse?z3`#6da*TXjT>DG*LE!sS`6?6Y6Zw7m!(2 z(g*qj{cu((Ty;wF8xMw0TPi$``bDhCNx*&jNRBBc64?mtm4MajtxE*iTOm`ie4%~m zTyj@x6ZMag>*(!fG9Z`?QJpggGAQON(T;Ts^OtXpWI!RGXH0d}lYTp|Cr_*`Xg zXPY$dVX?CH9*5viwueUvPOWUM#~i2yb0#|F+%=q~b2nS-U4#3qFVWt;Q!p2%)TQoG z8%jr|C0#j|S*1O6Yhfi!d3w<5wn9{;^|0hUmo!mT_thFc!fputb~ajNLfz-i;}ie6nL&M4#UHWa&7 z$FW+*J4EYsOQ6qqmuQ1-D$Zczy*4^)OuA!8ps|f6lJeHz*fUCu23_*fv7L-e8T80I zkF(M!D}$W8E*cCxlz}y`NSq%KHo6zKy-~})!8trOuM*)KecAUIRObCoPWUM|^yzv2 z{Iu$qhVCW&E|u+e1%j%UXJUXk|< zZUM$lj2`k<&dOk2UNa3+mBHq`4K(PY40h(dPJ=X744J)o{#XfPH^q;`dHLdf zOEOz2eztUFjUqNLF`*ku#g|T4>s9P5(`A`%&D9v|iK!1qBUv-V^gI)LM>OSyvlOv4 zFM_=*j^}0J7jCcoc)VA1%#UNwiDCH>tXNd%C#PK_Ro^M9b|TMzRaJ!(z;sQSdfzoA zS-%n|@=bWiyOJLfS%w-lX*A@A5v!vo!s$n{h_=SF10vtb*?BS98pjI6a%->1Z%{m@ zEi#qG1rn(z*Rw%M)*r;T*7%6W2rw=OY&tphL?|Gsuw29xbYmZj5op(nWd#`p*YF+? zt=SYutHO04jeJnGCFX3!Je9hFi=wARgqO_WR+rcVRX9#2;P?*vd$It zh1sFO$dk#W6v#OeqkKzH`BfpuCy2Xf&xoiZ&c=$YB5vpih^bIX^$b*&Vx&N_9uzf2 z>E1W-bZ9zuQ|u_phz~>>O{b`()u5L{)gfin<1)?z)1Q}x&!EW2{Wxt*3mBd6*>0HP zw^bIogQC6Oh63v_Q8_5udm6BV?u3dJgUrlV>>m^r_c}CXGq0`G$)0_%5q;_S&DYXX z`OqV1{Q8UAg9h8qV$EXWXrmcIz>SIjl*oQK-eM9+!teqP%9upjmWDtaEip;7j=qmW z%&-I=#F*)nGqhq6V^S!8hEm)cVmi`^YcOMNVzLzOPuS5h*$USe(Z?)TxLO;Im6%3_ zJ_nVt2D&902Ecr5*cc*%VKX*dF?RwsVk4d*ZYW%hGZYiK3jW1~RTB9${D}*%BXR_s zh>KWF!^81_^!xZ5}v44ok)*tzaAE$)WkH`u|E(NKGpSF=4x`9;0Pu~GD zTh{+bE99a7C(`BpGXfl&(5ggPbj9_ zQK@bZ);pn;=;6A}#DAFR5xOhl$Dy%l^~9M#oQHI85^^HZ59>A(zl!M5y69Y_r8Zkqno7e-)PKzXjU3Nb7Hk*&f+!0v4jRxCaAa&hrHZ_(3D!V!0bX_z`RoCw>Ugz?T7G9@-0- z7+>5c&GDMqp!2o!#FNDa)=P-uS-36ul+0rP5Vn$V)={h};Y^7AXtTw+k~B`=Ld?gM zC`U~qx|JoN%Jfu=QsFL9Zr-RD<2t%i>twpCuP0K{wTcgi4-AZMhcqw1E-?0GK~Pk( zS|pB03_XP^N_m=Y=*6SVAnO$puihg}p&tO&G2#id#k)O08kJQ$A(}_T^!pm+ozhB- z)0o%qK_giYVZWvPp`4cA-{%^kJmpV?d)Lh^40*0B7-O=6eCr@PnQxPM!+4 zJox~$UVJCRk-$Hev5dS58S~+@kW62G2FYp1PvVKekAI8GYR^A`&pPf8=luC-=+yIC zlwts%2|a;)wijMNxj7kkU%m+y6vAiN;70o25k&J#Fc!l%pf+Q92AqiF+1@zY_+l7N;ItHpyeIC; zN&Hn5jhQzClgzj3ZAc?`pd35$i-@BWKMuMxe;8+EDyMsX7j8!Sx^ns@EsYOH^xgOc zBs-lS2S0hM4mC+sKfWN5M=1UxZ~I z$opcK6!Ptesfd3M*9Y;Pcx)WZcOdQ|{0L-+@_INojOReMn12A-68;^Wu$A%Wz8u3nOp^2nuL51p+ps8O`8?3$ zcr8we@w^f31m29cf@j0P{>4kmM?L#pIsfuCaI=V0Jbz7nxM#^(ZB#cLo| z%_HF5RDJ_aOylc8Pv;ll^$gwzMLLro# zycz9Wei)8E$sR>H@4N@=jN*H*B`vSj?2O`$>{0F%5D&K{A+Q4Ix(TzM7hX2Mt zfd1F`V3^;;KZMhpxi%8@zl9G)8eivIPCAOPiMCRV&uOP}jJPZN6%`1?Dy?iWkwvYdgh1kzCVe%cWhtvTc zhI)FJf2>9QALKJow}<#236gY}cS3we=rEIxa!`IB z5g5M8hr-=!JOBaw$OSMz@x3s7ov*^O-r$RnkDqw}kiYO{So>f3HiU7L4*~Kweh+rL z{?31cu|N0@tj8^>co*DDi&GBIO8}*%kuSw-peHR;A-4-&9%lZN2BioJGjb+ zpd963g867hc6BnpLu%v>i4gM1(u{R+N1-_6&@|#|DxJ)+#}FC4k&Y{e-f9Fcja!dG zdmj`V|CDGqHr%!b$3zp7mryYstNbabbXM_@4u>+9Wmz%08C&7SgR4EPCZiDtI8{n_|vk-~mV$G3VaMy7C z?)PYU`8fP0NE+p)N{q%UBiNUl#!8P3!cLMa$jIN3D!P;}XXsh;Kf1w-b7WruXqpbx zR;~w1uIPtS)->&`t(pU%Jds}cG)H!h^r)n1dat%B9;N3R z9-mfQb%jhjr%%>aQ7OrDY0WiFH*2fhM-=)olv@DRp?Nwkiv?Ccmf-m)okd zD5bax)lTy2igWZ?tf@SMLen%|5&`3bY$^#Fxn$I(^ntU0N(8?{PBtpJMNap+RY5`7JUR^r%_*HRT= zt!-2HJ25Vw)C9kxDp?81Y8|#MUBlE)+UwS`k+j$}O?z!qqn)5^CurIfN&cIFI<}K>JiX7s*Zxwm4m7uIk+8Hgr%mw9Y0? zUGGE%b};#x&moGt?!wAe%pA}PI<%0|gn&F{8V_7~=f zMz6wj3QY_GIZY*JfD|hE{6rq|2WcHb2IQt9HIYOAAx=yjF_@0|86~JRIejDoT^OMH zm*sSmHP0MEvc_+)+vr-Sv~+yTA@7vqLwOC6n?_fz7uA!(@eWWwDbnWrzU}_J&6?kV zz^tzzk8cy*aUa5YCDmNQ5%=Jyan}5u8+uFQg63=Y!4qGan%xP9NpN?198}#I@Aitg z^r55UoHK#Abpk9mjhwXx*~b?o`dlP8mCpJI&kpY4&?lY}Lnd?MgCFo&^K;OnoUd$h zsaQTa-1gu%f7bkg8!~~us<@6|Hi5pA$W0Z}6#Ay};QtP==9_N0aur!6O`-2A5B}Ey zYwnooa-vMcS4OY{ExDC_z1g;wh0|_phP(c2LG!7^UY4~IW0H^TQlk0dFHDyYnhO%E zk-b%S&ECiNg$Hn!AP+?w@AWScMwZtvj?%m?GOlz>YGUn=C$#V6s zJh^&Tid?;`OkBMyPp;mT8dvYy2k!s(aNW>deS*l{7=-^&iEa7p)l$YbVi=;)OFPkr zhPnQW+*#jBr@?>SjBJ^)F;T|9(X4*0JnX>-3=_p{SihFJ#q}&e*B4Eoj`6r`Tv?WizXcQl8$-{rn1iWrzY`bWB zBNe}cA9>>t>mn9zO=U%5*VghdR}QiBl#71-B$exgdD|TM3~zaL+w68MMuhE4_1S1r zF&o9OeVINxoui$i2_sk38^vjiM&iktu8}PP`}1WsPK-K`$;OMO1ATpFPE)6Li7yXi l`ozvwM<0l=cQfT5*NftJv+=)2P4D(a$fwcjMZm$t{{xfuNLK&= delta 12286 zcmZ`<30zdw{=erA$bdMGATlh@8TL)Z1w;`L6ox@G756P%Akn}rH1#Qiq2`G6#8W*j zEG;va77Hyk7qkU^^-5dKthDSoT0O1ICAIl~f9KAOJwN|n=RHE6W9c?-59-)PlfM;VQJkY8 z3g`xLUq>_hN>p`>VJ)JuW8dTxQ0zFE1${Qhp6=+%Z1p~IiQw8C1vh%M*y^7kZvBcZ zakXPC%N1cEJuPM6)E%T&sZOn5L4@Nd)Cabc9^VuXh9vaL2lo(@idvm7-vneug_NUK z$`K*uS_7rrv*KOI*+@=x%Z-q7l~RscDMy5qdqI;c5WG`@eK5)C+;TT>ki9ulj#?>4 zgp{k(@Yt!-`&W-TRxAWjFWVz$ zRmX~97~1R&>1CSQSz$={acHzTj%p~!dp5@x&Q8z@maX3RjLCxZO6F^XYe>)1M0=6w zV;)h`tD(NqP_cw^v^py_1T?MlYRY#`*Fb)ns*9L$=6iuNy+CJwV%CSOCr-WZIheFL zJ}0$LUK6)MBJC}0VWlB^ufHcd3D=O5x3Ln3;gHLj2zjczw>8`hGV~50T5Oj>NuP}E zFqri+JB}l;G?^?ljM`>%d`Q+9)Hn)i>z39k)m`cfYLS_S^fhf2I~mXN8sGUlOcr{L zzXY9YZS@7)90jNTqTBbsb#DdAQLwyj>2j%ixzyc5^Y@(I6W1_O{S#J2i)*;1+D^L9 z{6+T;O4MDyi>2;WbxT)C-K(VT&rwQnd!<+R?^mI_(X0C!Ou5}YTQ3XCQE;N(w-Blw z1)m~01!Tlg;IuW^)Awk~f{{t*EC?bmU$%jMB<&-prLrnGrKR{)nuZ7UzVkbg0-5(g zn#@8?%?FrpCTOg6ZZ)esYR+g96<#&nI@!H?bed2`C@;rLZYI>r) zdM<=`)f~`d?qErgq8w7QL6d{qhwtJZ2eWm>NIPoGLfg3EiIE^}4ch+FO7xa5wj)xu zrhdp8NC#1|HH1*6+*Q>bN*o1i>z1yStFu-Z^&+fQ_kzaXsPXd*T-LpS?~M2Om?gDH;F0OWosC60!6uNzjR!y??*N0LO}PC zQ8{-=x#F_ZLXN|%qu}wnrH@NLAD4bo)sy{vucOVs!qK5#M|VRCW#k*{wqLE;-`l!k zEK)^Q>Lt}r9!?J>X`_Z=*oFkKH3Y685M14BW5*oEI>}}~9!K#0T9P)~J0keou&V_h zjiI})u7l~`7w!tI_k9|OELD*@SFE2`WWP z{y7LyOv7cCTb=gJ(sn^h>xzlqpoqE~6pb2&v3d#kOHeKDplDtTiZ&#Se;ACIlB8bR zD?>4K#}o_Z-I!=Q*z0|dA!ct-&2ZOI&{VgyNe0y<(W+cN^zt`#a*qA@ zAr?um*;Oc>2~sD;vn|=DHg|fQ=R`7JL2Qn zWN|V+lzk*F#&dQ=_$P4IERxV}7DZ@xi5X~HgafTAwxbOY$I(s|7twAI{)wE0i6peW zMNuM;iulRp`Wz>cl4mfkR0pw{J+N$?y(!LB)P|_Zv5DL#;e@psqXnp z?s1}fj2faVNbxW-HHMNXPO{L@2Qw4}rw1-QyB-X>XNsac6D&+U%?4K$AcY-H;cdll^a?om0hk|qCxTRveq*P>Ez)oiqsuh8k)(pjjNjy? zkYzd)lRy1QecH^_@J^WN3~r)l#`t`s0_)m*`9jen&EzMw%;+YDrRCXwhO@6iH`@$} zm%LKjrMb>zZrLw*KVmJIdfzkc&*7dD{4^5bw>*oczs1xg&$N*gDk1)1m|i;`uXcOL zfQsDc@u!8-=>Ww1Hn0-WAwAL056_2w1<#Az^q8biEdc8a_5d3GF!bmP4?x`XDom>Wtkrsx8YuE3sA3(3cJ!HkZ^=$kPpfDjf89Rc3~ z7X3CcIx$L8z;9s0oJ$5Hl10TU3di$>v2ABX5#bpF^|V3Sbz#|}JR>GzDs+>=4Fm}V zM2YjoO1H~r4~yqB3fcSO$Ban5)HRiz67Bm$W?dsoGubc5G+7=9X=99(Y@wMs%z?@# z7bek62RYLlZndjsp<;R;^VDV-p!tM{Q6%k_uv_qy#+nwzlnyWaFHAxuM*Tg3VqVUc z;AqfdUdgN>2=OiG)?$8??FKU712D9h8`wc{p-%=58QwS4Uq-uu-DxZO_l=Gi06!?) zdAP3{)9;4c%npksePh@wVpHE-E!>Y;opAPbhkKe;K!S`$!V_cO-EhBQmy)obFw>i6 zQtyWQ9m^8AndS~dQ8_fPg?ouP#OzEbd#-tGzXQx3g_QaF(5kV%yYVK+zsJU7d?@ym z$^(!nAAS;V*?MjR-^fRpP#nAy+FWjh-rD3)I=@)*i-0O>@|`GjKFN^@==90?s3OIf zTxCILNj{GSgV^7|+{KHK??K5Yj97jjmZC!uLW`0zTRfkgY9HDWgDL7Ui=w3VL)>ht zN_(JtDWYJL)Q!>T?!Z|cJoC@Oaia9NpQi3tchOnU(ZklZtkSas7MKlWK@efRE+Rwh zAL>GQY5|Z9AoNwNxsis{h~YUo_U(v^syqE(aON|3t8{Hdzz2XyqdBSqIrj=ID_!qH zRHr<1_sE69)K$;if6JNLW3JK*9OsY!W^ChejAk!RutYZG9Zr16HE70%ck#ru9!Odddi9!4qgFgPCy+-uetW zicIuzfz*Aq5#FCmRg`~XHllRo9Azt%8;K(uCAOk`Ay&rcZSiUUF7|0yW;Q(^K2Ge7 zT7-wjsUVD$TBVOd*`L0yBWP9=OQ4A{qnS1_*E6Bb_I(DK zWh1hnKPV7oBDva>5;zkKe^)9z?)-;%DmR`T5=U|^(N~~9G&3Hn*RN9o$bOQ{#PWsq zr*pO6UZKk?R@HbhEU%NP3V{SUE-?0f%yYd?w)3%F+ASp&ZP`=;XO;Iz{Jhj^1wx?y~_z`wdLNT$rp&LyjFw ztI>*~7m8k^eGD(aN|=0l&>MC`RHKcsOY0Rr(x1kmp`pwE1VXp>nEJ1{Kyn$%#ODL!V>)9))8z z%0abt0(>_IvHi%BP=}S8H91v&S6DPGdD6vCb013&JLr5;%^%3-DU&leh(# zyE1ynTW{S+gKl!L$-0FGsdBK*x|0Uo{>P4HvICe&KDU4-LiQ$D2Y`mx{OzwG=RQ+43qT9<~RpsIYu>2&a-t$Z;w%5e* zLJJ=9E)_;Zrl3YGIvx38#_AY}aQo3yMA_olJEG7=uPf7Sv23_lW6O-Zh~lwqSE($% zC6QI+dJZVX_KWz=78mgl0jA`F&9T%*A|yyDo++Y>da>ukXtXbg)kWz=SMin*rQHV& z*RXe@WUcxbs-P~aE!of;=3;H40*MYZ1S5>7c0?Nu@u;vU1JNA~?c zFoFN{fHeu*5y6Trkv2GDsbO^BX2PD9g)mS`@oyr#*v#gLiN#TE-$C#J<}N5SUV(%1&RGbI>+vTrX1GhP3lzNd~C!3I(`fF^i)3V z2%5l9acjtZ_On>C=vdllrVwzW<31sB5YCzCc#<&9#9~J$(6%&%<7kOaq;(8<2x6uN zco3aLr<~~{EMjyD<^O86HWRzc+7;ZJOM9g)d!A~s?J zk^8YAaVauFE4Xo8WLQNIi|fio;n4Pd_7S4D47`9488TWMA^OPZ*5a&<%aqQ4gW8PC zm)g3E$A(%0zb3|TDV!?y54G4sk)OCJGNevKmPxr@NJZT2E#%M*q#|z4s~~e!;|*FN zALFY?m)}hUI61yoe+!QEe;}3d8FUaUhR)dg@qLKa8=i-7d=Ak@1AT3Z&nMbs=!zwd zF94l}BVCZNl_nAmVI*8gba%tEBy1zPkKq{3+xQ})a}CQx!?1pFkHdU?3EhrH8V+H- z<3|!b%CL+06Nnyd_*MKoET-pL;!GpXc*A}|mJ>a}@DlMWiJoZq503r#DxxPT#bsi^ z@J#zPY=neoXCl0k13)HhqEafkgXAV`rhJv$Ai!1$I7EO~B)~<0R|zm;2a=XxlDpDW zfXYa;5Ta23C3KF=)hG4)7>nkj{HZ9Rq%4|MM$Id)Qj)T{9RnZ5^a&tIIi~^2H6{%l zj>M+|s|nz?%gkzPHJJUl0yy2*k zWb-0a$AnS@Zhp$ImmliIyov7Ah5)*&ZzfVPd?elWpO?3@@kHXe8C+Oc6NUnB68H`MK3 zgs#r!|jg0y8S|l@oe}m+-<(u%t5Xdi~vfA;}@Y%pa;aqz@5ju^0B}%aa zUkp7#d_fz$fbt&6xcl481S0qgn2qF> z2&*%H0~|ANkK>i|w_(!4UxE`+{5*_B^PQ;87@iF$VtI}q&NjXVhU57%EJXs(!(BO% z??BNc@h!k4^ID@FY2*h`j$Qa}#L<;Ef$qkq;fze>Wf17j>9xEEe+>@xwxUb&msqzyaH`M{xnQx@qxf+@gGsp*_>W(b9hl4 zK2`MRr;yBCJ{rdI_!-1BfTzKgfxHN9KF>m;3V0)uX5}v zU??r-Zy=_@{4!i0!uR5_@jm`0;vUM6Lv|Qn1?PtIT*#L2k0CpP@5h6(eI)OU2uAT+ zsDshG2J{$y5h)$ZCnBbCyaJr@d=?B$;HgOOL>`8HIf(~a6y<*YFz8b5!lF#(kAj}U z9XKVX^7Uw^@fXpS@f;W~=Vwtv4{-W8uc;AhzRcQ7!MS0L60`656oxgBCv z{4$`k_!T%Yo6m=*bNB^#J(uUBNayimc$RvIchloP?febk=W{(4WdXm9=Z}Z^BxH0U ze-Z5>ehiK-=CLV?@;BZC(J$fug2koW4lioBeHsMp{12>aE#I1~D9iX`xQ#r*w*XYf zzeakO^Nny~1^)^HEBPVh=21Q#nOMc?Yp8=?0==5=fV=g44x(Jc2gA_@UJSP$<35=E zJ6{9jS{@B;jXVRI*YP{R3*L=h{vPL_!Q>PC8dmyAUXF$M2mdA(?-TrMU;G=9zl_R% ziqA)ep62vl!6r`MkT&vHAoUEN6RjxE@*vbtnIDAe=lDL<)AKwA8QsEDVE6_8 zF*3B3kB0dd`6qB|8#hLx{Scqmm8zr`zJCV6P{8>c#29H7jZ}LjyU@xyg&i3I$1-{+#{xJC#H$mzE z{~5s?(O803 z{s0XB$YY`CC*A=8{LH5V^B>*Ly~U@UK(q+r0Lgn!%?>msZR3` zf-%LgTm>CKw2$FUP>CM6&nU1=2-OAV&? z4}YDpJ8a@A9fQ(TJCcaMZfp;YSycKTu%*p>kc3(#* zQ>H_26_TJ#nUBQ22Z~J{DqN-E_UCY9G$N7lWpl9NpMXkZW!(%opt0@qgQsjm`HTUX zJEa%awhoki>gU*i`$5&#)|7=|ecuJ`vQM1@RW~HNR=e!w|AgyQH~6MUm;HhJ5$1Pb z(uHD_pKBG?H2rtPw870UlCyBJiT-a_vE72mKA;6q8!Ia^VZbx0Q0~*e4DB3*2`3ya4 z`nngq_>SyLfUfbSnu_f}sbvFD*1E<`HI)wosFu?+nyzuZy>h$-!v|9*183?qtf@MeLen*VTBB6SmplN*6A1Au8D3PihB^fl z9nEw}6#9u?(0mPa2B0hy0$m=^GB0R>209DSTvQNUW3^7%>IFT*gJECS_!CgSxKShB zsQ107M>SPf02+)6qss%j;srT0&{cq5yayE0)02dH4Riybd~8O#>?K;SWO`8>G*qo! zsXF9Axk0rAdKP|(k+hN*eF8iz) zH%2oA|H#rV&C&;CX^k7;u~b4zWD)JrEUkg1@7o9*W8#aH--%D z*9@H?L*udMhtZtJ(pz3jhc!!U?aHh#fuc(y(QB@}tpOf~|Al&@=QQJBQ?t4FrYtNr zG95csZo+ILb&Y%N6>{g~Xy$j@D~d%#xw*}_G*T$~mz$H9N|N@9mGr5NW0k5egQRI? zwALw`HTa|DJmh^cH&jCo)^a@TYw=e3==x2Bx^ng zTXb!bEgdu~$ZdJh$d?zjabm@GQTjkQ-W(P`5NQvxXvn)K+;IrZ{~BC;(dZ2}PBx|b zJPyNqzoxS$`Xr=5Z|FSGLj68?568~T zjlRWr4r?}zzOJZ^Wy%ct5_9j*4Oo+!;Za?xAuE*`^i}5GA1bh>OfPbb2(5@<*PDA) z^!H=Cnje~dOK11|&_VZ!!k$s}3S)}D>QSQm;x9~(54vv^R;~JJNKZhz?-h1lwP;LF zB6Ju2!t`WX_oOe=M?b4SuEqpstK#Q}{K7rUr@P_D-qMf5w;fGSkVP1(Pnna0)YPCh zG0*CY|E=~5RtJTsiNT8Mqqb2KF(Y*IyXzVoUlHl^v+;7jetxXCJvg81)R-Xg&HQlF zpEYZJRNW&?O=4MtC`=z60?o8E8yBqd_b)47Fk@PI>7453C+${Mo zv#ZKeA6zu0a!S>ddDRt*dZ#X!J+HcaVOL0`md%^7pnP6xb@@Zp520OHT2@{)yZI-F zzsZ|cPkKEW_4J-(dU{VDJ-sJ|p8ipHoqDq9NtvhjY!-JP}UVFn%D`y!zz@ZSiv-wlCHHygP#P zqVK*8|10j%6|o4TyT88QhtYaGSJSn=`R2Yt6~Cy@JCFghiw^Ynubr(;?G{H4Wca5o i&_*8%b}&P2^PI>$n8WspMF;z{V`3lLL~-+A!v6tu3xY!c diff --git a/driver/tests/tex_demo/demo.cpp b/driver/tests/tex_demo/demo.cpp index 229734a5..a28d675d 100644 --- a/driver/tests/tex_demo/demo.cpp +++ b/driver/tests/tex_demo/demo.cpp @@ -141,7 +141,6 @@ int main(int argc, char *argv[]) { kernel_arg.num_tasks = num_tasks; kernel_arg.task_size = count; - kernel_arg.device_ptr = device; std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 9751bb48..2b6c2719 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -153,7 +153,7 @@ module VX_alu_unit #( assign mul_ready_out = !stall_out; - assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op); + assign result_valid = mul_valid_out || (alu_req_if.valid && ~is_mul_op); assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid; assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask; assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC; @@ -164,7 +164,7 @@ module VX_alu_unit #( `else - assign stall_in = 0; + assign stall_in = stall_out; assign result_valid = alu_req_if.valid; assign result_wid = alu_req_if.wid; diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index fa9d2af4..2fc873fc 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -85,6 +85,10 @@ `define EXT_F_ENABLE `endif +`ifndef EXT_TEX_DISABLE +`define EXT_TEX_ENABLE +`endif + // Device identification `define VENDOR_ID 0 `define ARCHITECTURE_ID 0 diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 05fe8250..c9ac3357 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -13,7 +13,10 @@ module VX_csr_data #( VX_cmt_to_csr_if cmt_to_csr_if, VX_fpu_to_csr_if fpu_to_csr_if, + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if, +`endif input wire read_enable, input wire[`CSR_ADDR_BITS-1:0] read_addr, @@ -80,10 +83,12 @@ module VX_csr_data #( end end - //write tex csrs - assign tex_csr_if.write_addr = write_addr; - assign tex_csr_if.write_data = write_data; + // TEX CSRs +`ifdef EXT_TEX_ENABLE assign tex_csr_if.write_enable = write_enable; + assign tex_csr_if.write_addr = write_addr; + assign tex_csr_if.write_data = write_data; +`endif always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 325dbdd1..c6e1fbcf 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -13,8 +13,11 @@ module VX_csr_unit #( VX_cmt_to_csr_if cmt_to_csr_if, VX_fpu_to_csr_if fpu_to_csr_if, + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if, - +`endif + VX_csr_io_req_if csr_io_req_if, VX_csr_io_rsp_if csr_io_rsp_if, @@ -63,7 +66,9 @@ module VX_csr_unit #( `endif .cmt_to_csr_if (cmt_to_csr_if), .fpu_to_csr_if (fpu_to_csr_if), + `ifdef EXT_TEX_ENABLE .tex_csr_if (tex_csr_if), + `endif .read_enable (csr_pipe_req_if.valid), .read_addr (csr_pipe_req_if.addr), .read_wid (csr_pipe_req_if.wid), diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 7ddc8000..d30ca443 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -358,6 +358,7 @@ module VX_decode #( use_rs2 = 1; is_wstall = 1; end + `ifdef EXT_TEX_ENABLE 3'h5: begin op_type = `OP_BITS'(`GPU_TEX); use_rd = 1; @@ -365,6 +366,7 @@ module VX_decode #( use_rs2 = 1; use_rs3 = 1; end + `endif default:; endcase end @@ -373,7 +375,7 @@ module VX_decode #( end // disable write to integer register r0 - wire use_rd_qual = use_rd && (rd_fp || (rd != 0)); + wire wb = use_rd && (rd_fp || (rd != 0)); // EX_ALU needs rs1=0 for LUI operation wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1; @@ -385,7 +387,7 @@ module VX_decode #( assign decode_if.ex_type = ex_type; assign decode_if.op_type = op_type; assign decode_if.op_mod = op_mod; - assign decode_if.wb = use_rd_qual; + assign decode_if.wb = wb; `ifdef EXT_F_ENABLE assign decode_if.rd = {rd_fp, rd}; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 139b2a05..e8b7668a 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -156,7 +156,6 @@ `define CSR_RW 2'h0 `define CSR_RS 2'h1 `define CSR_RC 2'h2 -`define CSR_OTHER 2'h3 `define CSR_BITS 2 `define CSR_OP(x) x[`CSR_BITS-1:0] @@ -185,7 +184,6 @@ `define GPU_JOIN 3'h3 `define GPU_BAR 3'h4 `define GPU_TEX 3'h5 -`define GPU_OTHER 3'h7 `define GPU_BITS 3 `define GPU_OP(x) x[`GPU_BITS-1:0] diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 5ac78f35..224d0731 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -45,7 +45,10 @@ module VX_execute #( output wire ebreak ); VX_fpu_to_csr_if fpu_to_csr_if(); + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if(); +`endif wire[`NUM_WARPS-1:0] csr_pending; wire[`NUM_WARPS-1:0] fpu_pending; @@ -84,7 +87,9 @@ module VX_execute #( `endif .cmt_to_csr_if (cmt_to_csr_if), .fpu_to_csr_if (fpu_to_csr_if), + `ifdef EXT_TEX_ENABLE .tex_csr_if (tex_csr_if), + `endif .csr_io_req_if (csr_io_req_if), .csr_io_rsp_if (csr_io_rsp_if), .csr_req_if (csr_req_if), @@ -131,9 +136,11 @@ module VX_execute #( .clk (clk), .reset (reset), .gpu_req_if (gpu_req_if), + `ifdef EXT_TEX_ENABLE + .tex_csr_if (tex_csr_if), + `endif .warp_ctl_if (warp_ctl_if), - .gpu_commit_if (gpu_commit_if), - .tex_csr_if (tex_csr_if) + .gpu_commit_if (gpu_commit_if) ); assign ebreak = alu_req_if.valid diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 9cad586d..1469423d 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -10,7 +10,10 @@ module VX_gpu_unit #( // Inputs VX_gpu_req_if gpu_req_if, + +`ifdef EXT_TEX_ENABLE VX_tex_csr_if tex_csr_if, +`endif // Outputs VX_warp_ctl_if warp_ctl_if, @@ -18,23 +21,30 @@ module VX_gpu_unit #( ); `UNUSED_PARAM (CORE_ID) - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) + + wire rsp_valid; + wire [`NW_BITS-1:0] rsp_wid; + wire [`NUM_THREADS-1:0] rsp_tmask; + wire [31:0] rsp_PC; + wire [`NR_BITS-1:0] rsp_rd; + wire rsp_wb; + wire [`NUM_THREADS-1:0][31:0] rsp_data; gpu_tmc_t tmc; gpu_wspawn_t wspawn; gpu_barrier_t barrier; gpu_split_t split; - VX_tex_req_if tex_req_if; - VX_tex_rsp_if tex_rsp_if; + wire [(`NUM_THREADS * 32)-1:0] warp_ctl_data; + wire is_warp_ctl; + + wire stall_in, stall_out; wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN); wire is_tmc = (gpu_req_if.op_type == `GPU_TMC); wire is_split = (gpu_req_if.op_type == `GPU_SPLIT); wire is_bar = (gpu_req_if.op_type == `GPU_BAR); - wire is_tex = (gpu_req_if.op_type == `GPU_TEX); - + // tmc wire [`NUM_THREADS-1:0] tmc_new_mask; @@ -76,10 +86,28 @@ module VX_gpu_unit #( assign barrier.valid = is_bar; assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0]; - assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1); + assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1); + + // pack warp ctl result + `IGNORE_WARNINGS_BEGIN + assign warp_ctl_data = {tmc, wspawn, barrier, split}; + `IGNORE_WARNINGS_END // texture - assign tex_req_if.valid = is_tex; + +`ifdef EXT_TEX_ENABLE + + VX_tex_req_if tex_req_if; + VX_tex_rsp_if tex_rsp_if; + + wire is_tex = (gpu_req_if.op_type == `GPU_TEX); + + assign tex_req_if.valid = gpu_req_if.valid && is_tex; + assign tex_req_if.wid = gpu_req_if.wid; + assign tex_req_if.tmask = gpu_req_if.tmask; + assign tex_req_if.PC = gpu_req_if.PC; + assign tex_req_if.rd = gpu_req_if.rd; + assign tex_req_if.wb = gpu_req_if.wb; for (genvar i = 0; i < `NUM_THREADS; i++) begin assign tex_req_if.u[i] = gpu_req_if.rs1_data[i]; @@ -87,54 +115,78 @@ module VX_gpu_unit #( assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i]; end - `UNUSED_VAR (tex_req_if.u) - `UNUSED_VAR (tex_req_if.v) - `UNUSED_VAR (tex_req_if.valid) - `UNUSED_VAR (tex_req_if.lod_t) - - VX_tex_unit #( .CORE_ID(CORE_ID) ) texture_unit ( - .clk (clk), - .reset (reset), - - .tex_req_if (tex_req_if), - .tex_csr_if (tex_csr_if), - .tex_rsp_if (tex_rsp_if) + .clk (clk), + .reset (reset), + .tex_req_if (tex_req_if), + .tex_csr_if (tex_csr_if), + .tex_rsp_if (tex_rsp_if) ); - assign gpu_req_if.valid = is_tex; - assign gpu_req_if.wb = tex_rsp_if.ready; + assign tex_rsp_if.ready = !stall_out; + + assign stall_in = (is_tex && ~tex_req_if.ready) + || (~is_tex && (tex_rsp_if.valid || stall_out)); + + assign is_warp_ctl = !(is_tex || tex_rsp_if.valid); + + assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex); + assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid; + assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask; + assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC; + assign rsp_rd = tex_rsp_if.rd; + assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb; + assign rsp_data = tex_rsp_if.valid ? tex_rsp_if.data : warp_ctl_data; + +`else + + assign stall_in = stall_out; + assign is_warp_ctl = 1; + + assign rsp_valid = gpu_req_if.valid; + assign rsp_wid = gpu_req_if.wid; + assign rsp_tmask = gpu_req_if.tmask; + assign rsp_PC = gpu_req_if.PC; + assign rsp_rd = 0; + assign rsp_wb = 0; + assign rsp_data = warp_ctl_data; + + `UNUSED_VAR (gpu_req_if.rd) + `UNUSED_VAR (gpu_req_if.wb) + +`endif + + wire is_warp_ctl_r; // output - wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid; + assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), - .enable (!stall), - .data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}), - .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier}) + .enable (!stall_out), + .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}), + .data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, gpu_commit_if.data, is_warp_ctl_r}) ); assign gpu_commit_if.eop = 1'b1; - assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready; - assign warp_ctl_if.wid = gpu_commit_if.wid; + // warp control reponse + + `IGNORE_WARNINGS_BEGIN + assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.barrier, warp_ctl_if.split} = gpu_commit_if.data; + `IGNORE_WARNINGS_END + assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready && is_warp_ctl_r; + assign warp_ctl_if.wid = gpu_commit_if.wid; // can accept new request? - assign gpu_req_if.ready = ~stall; + assign gpu_req_if.ready = ~stall_in; - `SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready); - `SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid); - `SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask); - `SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type); - `SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]); - `SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]); `SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid); `SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid); `SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc); diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 7f35602b..37300527 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -195,7 +195,7 @@ module VX_issue #( $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data); end if (gpu_req_if.valid && gpu_req_if.ready) begin - $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data); + $display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data); end end `endif diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 5ec9c74a..ca1fbc79 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -75,10 +75,10 @@ `define UP(x) (((x) > 0) ? x : 1) -`define SAFE_RNG(h,l) `MAX(h,l) : l +`define SAFE_RNG(h, l) `MAX(h,l) : l -`define RTRIM(x,s) x[$bits(x)-1:($bits(x)-s)] +`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)] -`define LTRIM(x,s) x[s-1:0] +`define LTRIM(x, s) x[s-1:0] `endif \ No newline at end of file diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index 81f63aeb..36c6dbc9 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -128,6 +128,7 @@ task print_ex_op ( `GPU_SPLIT: $write("SPLIT"); `GPU_JOIN: $write("JOIN"); `GPU_BAR: $write("BAR"); + `GPU_TEX: $write("TEX"); default: $write("?"); endcase end diff --git a/hw/rtl/interfaces/VX_tex_req_if.v b/hw/rtl/interfaces/VX_tex_req_if.v index e8290587..7d4d9af8 100644 --- a/hw/rtl/interfaces/VX_tex_req_if.v +++ b/hw/rtl/interfaces/VX_tex_req_if.v @@ -4,19 +4,17 @@ `include "VX_define.vh" interface VX_tex_req_if (); - wire valid; - wire [`NUM_THREADS-1:0][31:0] u; - wire [`NUM_THREADS-1:0][31:0] v; - wire [`NUM_THREADS-1:0][31:0] lod_t; - // wire [`NUM_THREADS-1:0][7:0] t; - // wire [`MADDRW-1:0] addr; - // wire [`MAXWTW-1:0] width; - // wire [`MAXHTW-1:0] height; - // wire [`MAXFTW-1:0] format; - // wire [`MAXFMW-1:0] filter; - // wire [`MAXAMW-1:0] clamp; - // wire [`TAGW-1:0] tag; - // wire ready; + + wire valid; + wire [`NW_BITS-1:0] wid; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; + wire [`NR_BITS-1:0] rd; + wire wb; + wire [`NUM_THREADS-1:0][31:0] u; + wire [`NUM_THREADS-1:0][31:0] v; + wire [`NUM_THREADS-1:0][31:0] lod_t; + wire ready; endinterface `endif diff --git a/hw/rtl/interfaces/VX_tex_rsp_if.v b/hw/rtl/interfaces/VX_tex_rsp_if.v index 3ca929d5..e0e3cbea 100644 --- a/hw/rtl/interfaces/VX_tex_rsp_if.v +++ b/hw/rtl/interfaces/VX_tex_rsp_if.v @@ -4,11 +4,18 @@ `include "VX_define.vh" interface VX_tex_rsp_if (); - // wire valid; - // wire [`TAGW-1:0] tag; - wire [`NUM_THREADS-1:0][31:0] data; - wire ready; + + wire valid; + wire [`NW_BITS-1:0] wid; + wire [`NUM_THREADS-1:0] tmask; + wire [31:0] PC; + wire [`NR_BITS-1:0] rd; + wire wb; + wire [`NUM_THREADS-1:0][31:0] data; + wire ready; + endinterface + `endif diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index dd6ba8a3..1485e32c 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -6,56 +6,26 @@ module VX_tex_unit #( ) ( input wire clk, input wire reset, + // Inputs VX_tex_req_if tex_req_if, VX_tex_csr_if tex_csr_if, // Outputs VX_tex_rsp_if tex_rsp_if - // VX_commit_if gpu_commit_if - // // Texture Request - // input wire tex_req_valid, - // input wire [`TADDRW-1:0] tex_req_u, - // input wire [`TADDRW-1:0] tex_req_v, - // input wire [`MADDRW-1:0] tex_req_addr, - // input wire [`MAXWTW-1:0] tex_req_width, - // input wire [`MAXHTW-1:0] tex_req_height, - // input wire [`MAXFTW-1:0] tex_req_format, - // input wire [`MAXFMW-1:0] tex_req_filter, - // input wire [`MAXAMW-1:0] tex_req_clamp, - // input wire [`TAGW-1:0] tex_req_tag, - // output wire tex_req_ready, - - // // Texture Response - // output wire tex_rsp_valid, - // output wire [`TAGW-1:0] tex_rsp_tag, - // input wire [`DATAW-1:0] tex_rsp_data, - // input wire tex_rsp_ready, - - // Cache Request - // output wire [NUMCRQS-1:0] cache_req_valids, - // output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs, - // input wire cache_req_ready, - - // Cache Response - // input wire cache_rsp_valid, - // input wire [MADDRW-1:0] cache_rsp_addr, - // input wire [DATAW-1:0] cache_rsp_data, - // output wire cache_rsp_ready ); `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) - `UNUSED_VAR(tex_addr) - `UNUSED_VAR(tex_format) - `UNUSED_VAR(tex_width) - `UNUSED_VAR(tex_height) - `UNUSED_VAR(tex_stride) - `UNUSED_VAR(tex_wrap_u) - `UNUSED_VAR(tex_wrap_v) - `UNUSED_VAR(tex_min_filter) - `UNUSED_VAR(tex_max_filter) + wire rsp_valid; + wire [`NW_BITS-1:0] rsp_wid; + wire [`NUM_THREADS-1:0] rsp_tmask; + wire [31:0] rsp_PC; + wire [`NR_BITS-1:0] rsp_rd; + wire rsp_wb; + wire [`NUM_THREADS-1:0][31:0] rsp_data; + wire stall_in, stall_out; reg [`CSR_WIDTH-1:0] tex_addr [`NUM_TEX_UNITS-1: 0]; reg [`CSR_WIDTH-1:0] tex_format [`NUM_TEX_UNITS-1: 0]; @@ -67,44 +37,81 @@ module VX_tex_unit #( reg [`CSR_WIDTH-1:0] tex_min_filter [`NUM_TEX_UNITS-1: 0]; reg [`CSR_WIDTH-1:0] tex_max_filter [`NUM_TEX_UNITS-1: 0]; + `UNUSED_VAR (tex_addr) + `UNUSED_VAR (tex_format) + `UNUSED_VAR (tex_width) + `UNUSED_VAR (tex_height) + `UNUSED_VAR (tex_stride) + `UNUSED_VAR (tex_wrap_u) + `UNUSED_VAR (tex_wrap_v) + `UNUSED_VAR (tex_min_filter) + `UNUSED_VAR (tex_max_filter) + //tex csr programming, need to make make consistent with `NUM_TEX_UNITS always @(posedge clk ) begin if (tex_csr_if.write_enable) begin case (tex_csr_if.write_addr) - `CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data; - `CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data; - `CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data; - `CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data; - `CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data; - `CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data; - `CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data; + `CSR_TEX0_ADDR : tex_addr[0] <= tex_csr_if.write_data; + `CSR_TEX0_FORMAT : tex_format[0] <= tex_csr_if.write_data; + `CSR_TEX0_WIDTH : tex_width[0] <= tex_csr_if.write_data; + `CSR_TEX0_HEIGHT : tex_height[0] <= tex_csr_if.write_data; + `CSR_TEX0_STRIDE : tex_stride[0] <= tex_csr_if.write_data; + `CSR_TEX0_WRAP_U : tex_wrap_u[0] <= tex_csr_if.write_data; + `CSR_TEX0_WRAP_V : tex_wrap_v[0] <= tex_csr_if.write_data; `CSR_TEX0_MIN_FILTER : tex_min_filter[0] <= tex_csr_if.write_data; `CSR_TEX0_MAX_FILTER : tex_max_filter[0] <= tex_csr_if.write_data; - `CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data; - `CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data; - `CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data; - `CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data; - `CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data; - `CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data; - `CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data; + `CSR_TEX1_ADDR : tex_addr[1] <= tex_csr_if.write_data; + `CSR_TEX1_FORMAT : tex_format[1] <= tex_csr_if.write_data; + `CSR_TEX1_WIDTH : tex_width[1] <= tex_csr_if.write_data; + `CSR_TEX1_HEIGHT : tex_height[1] <= tex_csr_if.write_data; + `CSR_TEX1_STRIDE : tex_stride[1] <= tex_csr_if.write_data; + `CSR_TEX1_WRAP_U : tex_wrap_u[1] <= tex_csr_if.write_data; + `CSR_TEX1_WRAP_V : tex_wrap_v[1] <= tex_csr_if.write_data; `CSR_TEX1_MIN_FILTER : tex_min_filter[1] <= tex_csr_if.write_data; `CSR_TEX1_MAX_FILTER : tex_max_filter[1] <= tex_csr_if.write_data; - default: - assert(tex_csr_if.write_addr > `CSR_TEX_END || tex_csr_if.write_addr < `CSR_TEX_BEGIN) else $error("%t: invalid CSR write address: %0h", $time, tex_csr_if.write_addr); + default:; endcase end end - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign tex_rsp_if.data[i] = 32'hFAAF; - end + // texture response + `UNUSED_VAR (tex_req_if.u) + `UNUSED_VAR (tex_req_if.v) + `UNUSED_VAR (tex_req_if.lod_t) - assign tex_rsp_if.ready = 1'b1; + assign stall_in = stall_out; - `ifdef DBG_PRINT_TEX_CSRS + assign rsp_valid = tex_req_if.valid; + assign rsp_wid = tex_req_if.wid; + assign rsp_tmask = tex_req_if.tmask; + assign rsp_PC = tex_req_if.PC; + assign rsp_rd = tex_req_if.rd; + assign rsp_wb = tex_req_if.wb; + assign rsp_data = {`NUM_THREADS{32'hFAAF}}; // dummy color value + + // output + assign stall_out = ~tex_rsp_if.ready && tex_rsp_if.valid; + + VX_pipe_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .RESETW (1) + ) pipe_reg ( + .clk (clk), + .reset (reset), + .enable (~stall_out), + .data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}), + .data_out ({tex_rsp_if.valid, tex_rsp_if.wid, tex_rsp_if.tmask, tex_rsp_if.PC, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.data}) + ); + + // can accept new request? + assign tex_req_if.ready = ~stall_in; + +`ifdef DBG_PRINT_TEX always @(posedge clk) begin - if (tex_csr_if.write_addr <= `CSR_TEX_END || tex_csr_if.write_addr >= `CSR_TEX_BEGIN) begin + if (tex_csr_if.write_enable + && (tex_csr_if.write_addr <= `CSR_TEX_END + || tex_csr_if.write_addr >= `CSR_TEX_BEGIN)) begin $display("%t: core%0d-tex_csr: csr_tex0_addr, csr_data=%0h", $time, CORE_ID, tex_addr[0]); $display("%t: core%0d-tex_csr: csr_tex0_format, csr_data=%0h", $time, CORE_ID, tex_format[0]); $display("%t: core%0d-tex_csr: csr_tex0_width, csr_data=%0h", $time, CORE_ID, tex_width[0]); @@ -116,7 +123,6 @@ module VX_tex_unit #( $display("%t: core%0d-tex_csr: csr_tex0_max_filter, csr_data=%0h", $time, CORE_ID, tex_max_filter[0]); end end - `endif - +`endif endmodule \ No newline at end of file diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index d4d8033f..7003c846 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -147,18 +147,12 @@ "wsched_warp_pc": "32" }, "afu/vortex/cluster/core/pipeline/execute/gpu_unit": { - "?gpu_req_fire": 1, - "gpu_req_wid": "`NW_BITS", - "gpu_req_tmask": "`NUM_THREADS", - "gpu_req_op_type": "`GPU_BITS", - "gpu_req_rs1": "32", - "gpu_req_rs2": "32", "?gpu_rsp_valid": 1, "gpu_rsp_wid": "`NW_BITS", "gpu_rsp_tmc": "`GPU_TMC_SIZE", "gpu_rsp_wspawn": "`GPU_WSPAWN_SIZE", "gpu_rsp_split": "`GPU_SPLIT_SIZE", - "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" + "gpu_rsp_barrier": "`GPU_BARRIER_SIZE" }, "afu/vortex/cluster/core/pipeline/execute/lsu_unit": { "?dcache_req_fire":"`NUM_THREADS", diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index b468ea62..82422c27 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -17,6 +17,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE +DBG_PRINT_FLAGS += -DDBG_PRINT_TEX DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CACHE_REQ_INFO