From d6c87dbb0a97621edcf6055539e744824fb11264 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 16 May 2020 14:19:17 -0400 Subject: [PATCH] added debug print states or rtl --- driver/rtlsim/Makefile | 11 +- driver/tests/demo/Makefile | 2 +- driver/tests/demo/common.h | 2 - driver/tests/demo/demo.cpp | 21 +- driver/tests/demo/kernel.bin | Bin 92640 -> 92648 bytes driver/tests/demo/kernel.c | 29 +- driver/tests/demo/kernel.elf | Bin 252488 -> 252496 bytes driver/tests/demo/run.log | 12843 +++++++++++---------- hw/rtl/VX_icache_stage.v | 6 +- hw/rtl/VX_lsu_unit.v | 6 +- hw/rtl/Vortex_Socket.v | 6 +- hw/rtl/cache/VX_bank.v | 14 + hw/rtl/cache/VX_cache_config.vh | 4 +- hw/rtl/cache/VX_snp_forwarder.v | 6 +- hw/rtl/libs/VX_encoder_onehot.v | 25 + hw/rtl/libs/VX_matrix_arbiter.v | 63 +- runtime/intrinsics/vx_intrinsics.S | 4 +- runtime/intrinsics/vx_intrinsics.h | 12 +- runtime/io/vx_io.h | 7 +- runtime/tests/simple/vx_simple_main.dump | 2 +- runtime/tests/simple/vx_simple_main.elf | Bin 257244 -> 257244 bytes runtime/tests/simple/vx_simple_main.hex | 2 +- runtime/vx_api/vx_api.c | 8 +- runtime/vx_api/vx_api.h | 7 +- 24 files changed, 7100 insertions(+), 5980 deletions(-) create mode 100644 hw/rtl/libs/VX_encoder_onehot.v diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index c98bf2b5..b1521078 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -3,11 +3,18 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime +# control RTL debug print states +DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \ + -DDBG_PRINT_CORE_DCACHE \ + -DDBG_PRINT_BANK \ + -DDBG_PRINT_DRAM \ + -DDBG_PRINT_SNP_FWD + #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#DEBUG = 1 +DEBUG = 1 CFLAGS += -fPIC @@ -31,7 +38,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64 # Debugigng ifdef DEBUG - VL_FLAGS += --trace -DVL_DEBUG=1 + VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT) CFLAGS += -DVCD_OUTPUT else CFLAGS += -DNDEBUG diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index 1d03d777..9069a094 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -46,7 +46,7 @@ run-ase: $(PROJECT) ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 run-rtlsim: $(PROJECT) - LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 4 run-simx: $(PROJECT) LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 diff --git a/driver/tests/demo/common.h b/driver/tests/demo/common.h index 6913c88f..51969fed 100644 --- a/driver/tests/demo/common.h +++ b/driver/tests/demo/common.h @@ -4,8 +4,6 @@ #define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 struct kernel_arg_t { - uint32_t num_warps; - uint32_t num_threads; uint32_t stride; uint32_t src0_ptr; uint32_t src1_ptr; diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 4f937d51..d8c25771 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -6,7 +6,7 @@ #define RT_CHECK(_expr) \ do { \ - int _ret = _expr; \ + int _ret = _expr; \ if (0 == _ret) \ break; \ printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ @@ -15,7 +15,7 @@ } while (false) const char* program_file = "kernel.bin"; -uint32_t data_stride = 0xffffffff; +uint32_t data_stride = 0; static void show_usage() { std::cout << "Vortex Driver Test." << std::endl; @@ -111,19 +111,22 @@ int main(int argc, char *argv[]) { // parse command arguments parse_args(argc, argv); - uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES); uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS); uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS); - if (data_stride == 0xffffffff) { - data_stride = block_size / sizeof(uint32_t); + if (data_stride == 0) { + data_stride = 1; } - uint32_t num_points = max_cores * max_warps * max_threads * data_stride; - uint32_t buf_size = num_points * sizeof(uint32_t); + kernel_arg.stride = data_stride; + + uint32_t num_points = max_cores * max_warps * max_threads; + uint32_t buf_size = num_points * data_stride * sizeof(uint32_t); std::cout << "number of workitems: " << num_points << std::endl; + std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; // open device connection std::cout << "open device connection" << std::endl; @@ -167,10 +170,6 @@ int main(int argc, char *argv[]) { // upload kernel argument std::cout << "upload kernel argument" << std::endl; { - kernel_arg.num_warps = max_warps; - kernel_arg.num_threads = max_threads; - kernel_arg.stride = data_stride; - auto buf_ptr = (int*)vx_host_ptr(buffer); memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); diff --git a/driver/tests/demo/kernel.bin b/driver/tests/demo/kernel.bin index 2a81970a1eba760d63397030623a227cff1198e1..827bb41775cdb4802313bbe4d85e3c7c134b2da0 100755 GIT binary patch delta 3194 zcmai0e@s+Y7QXlWm|^zVHsxtp;&bVI~$XYgU(b}+Dfb2YHI^6Y^235?6PUc z)a^VM2C%jTd)toI8eL6=)^2rsWgl&gVj@nvB`J-oF}o&^YP)RN1jzOWZA{s7X5M3l z`J+s7a?bs}@1F0xd*4iE^s+p5Sq_F}LRiS3+EP?2G%9^$f_QDJtdK?`6E(EQp|(?3 zx#fhHfeyHhiy*6VX!QgcST#YkH4vnUc<)jvopBdu5O?7o&}j=*NyfZ@$Dv(R8Ro235==1xo=84=&S;B1;4`QGEF*lBSS8CV83Kf0IhA?`c& z6L-mC>Ms5{Rc$h9E%n1#A<|knNZiXZ2<~q!wvlrAicqInB;J04u-A%-nq5S=brt39 zchU0Bipp{s!ukzqebY+BC00^ynI^1*JubSK=C`r^V&!c|hN9;I&#dpDnx`IykuFl! z5nZ!c61?^9$7I^*T>NHgzY=k!b zQN*L#{G1*acjqIsNP9*oHC;#{Ku=D1yBFwp!%G2gC~G%4ryJ$^pDxFHA6*EoVQuw1H0RoR@Wsnk;nG2NcA=jieDyF~J3FbY<-5Y!aP_?F^?i7Ed58apH=ceh zh&R}M>}kC5l;b%lDLBFLwpqYg%zx;)2v+R;TPI;t>D~^yd`FmJB zJGnVJ!tm6Cl2FsbOTPV^1s-qY))73Y;7k{M3-okKHwpA3y+)*PejWO_Oh+t&|4V(y zPQS7Vmap{w9J;cMM#AHjv`8ZG2`g)9AUKdgG;c~Q&Lq@`RiwtbZs1lJyh9<{J*F12 z0^0e)^hnCVsQGEbXlFXnsxyg(YcrBIXg=Ug?3y`*@c;*4KXOWf#-1P7oBQIqF-9g1 zpykEQ5qE$IVgX~w06aZ5D1q#R<}W}tIUJxFbVnU=(|3%zS)W=*JyNj#0PJ}w7F!c) z#45VuTm^7L2H*P{(K?NKH{|W0oi9v}q#TTzpEitkL*whv5Z7iTZO}>#W5j#I@d0r? z{c3Febs~Oju4ZmOIB^6L2dMaS+#K;yXogt87*aBtpMhbILi1c$n;Z_%3_6lvYB%cU z@cv&>cd+d|(Y}5^7J-Bsv9lk>xqE=?HTX9m_Zzhr@>$f*6Q)N}4$hh*(Rq$&_Oo%* zB=HkGlgLl?g9A@A!2y({rnd}?82FfBG7Toz06Pt9lfwa~;nb!t z#rtkI2AB@#SJ9SW{eOs-bu$)Q6KcfX_#)0N15-I!v*t+D ze+ggTf5%Oe#B;RBDM{K~*pK)YV>qm($)D=Cb1sPkykImj&3D7oA~tHA#*lFe-H@B1 zc_)~e91d^|boB|Q0i(Y5Mfhh`)TKM_r4kJer&4VojcTJteg|rEjT{DI$oD417}T#Q zvFL}~X+$7{YUUbi#Btr05Qx{iqOnq|g(Ay{n}u|CjWy!PD+zH^|Li^r{}qP9Q=6!2 z`S0nO8AZ}Fs_9I6xY|dpxoreqJmX`pquL)Ju0w=Db3h-crJbT)T~DV)o40{#80*57 tK3WiNXrKeDZhiyr)YVvr!h7NQs;IVQz6d}11Z`2AwN%5Hfe#@9`9G6sSXTf5 delta 3252 zcmai0e^6A{6~6cV*tftER~`@&U|eDOVW7GVO_p)e2?)z#OyahhXc9YY2OWvhZl*FR zoto@3u&i2BO|E7&(b>d+*p$Si7yC$cj7~#gVy0mR>ZDVr&|(_XKf3Fb+SoFo=dka& z%T6ck%<-K2ednC-ym!xIygn{ZjmyD^Ob82kth>tm!cO~H5+ikrD%;6UA`>;Vr%3Ig zB{h~)bqw-iggkU7cvbeSjuGuX$bAqz2@rC>{_nY9SD%-NeF||~h^F713#yi;mB8gQ zg^mRRXki&L5$_o$T~3*bD?Bt7Z~3z-$pn2@ z|60;@Y9FbQW`r84mvCi5Xw@td@3~3XE9HbmDhLmIDeq~bH5qH`d3R%dO?ZZM!Sjd} z3aPQ&V%xh`ya_?yWG6)JR#_%VOeFvP9L447OgnlS(Wq z-NiI1GY_$~(EC=M`oxO5R+1s8lEkA@SXdgabbF~*T7844Ss8>WUhY=H3T@&a+!3SH z%g@+1iEvVKqoUw_kU36#kIMybOVrDFQHu~RXSu~c6hk@>VTdM}@GH?{?%< zi&7SdNB_Zm)eb(i{80hADb7^_6!F^fm9jA6WNlSDplG~mi3sHM%6@C!@G}A86XALY zUX%U@yoC^px@$mj+`T<}-7&U*e^kxPJk9p)QzgJI|HPy5iY4v_h6BS zf4;gfR~T;K()!0m@NCO>e!0j7yD0=6{5{q!Tfj2){BA{>I@~k9;8u>zDzy* zzuQDX7_NW0Z4LFXhPEmpw1z#?R)U#_+kOKtfUo~SVOEaiY1IO2Ec>!xy>`TW#P#z2 zJpNK=HqADvDH~Aymp|E=gh(zzr9UkdZfA||5UEwl`KpX<>rIpG)Fn-*OAtE^V}27m9x zK1FCc#xCCB;U|uqfCXS@>}&XQkvv!ersS6M_g~$H7rE@jKXC`tlR?~p!zZ7`9k9Kb zFA7bS>~(7yUwi6BTm-h;QO>{W|1hx(Eq}pfVAp@q11$n?jfl7dwePI9G#v{}9h_lT zolbuIo%^A%oYj^W@%WjQGK!r&DCT)zb3X--DP59E@3}$TPe80>vEL3=(F`6Nn#8ki$Cs~fq*`l^ET|!Ah&iNOX1|h|FF}d~v|XLflxGudhX=Xq$^{!$ zSnRJgmdl^8?hnU!<@ED7RsZ!?1*fvbe*~vse8c=y6E_~0glHeHzWHkld;-(6QGBgk za|huo(&77FFVJSaRir-?sG`SYI%yGxKhwu-^zRu$&lmdne0pyc^+sa#v@BAvhIR(e zFD2Ro)`Y#A5+mlyOmYW+t2X!oJJFiWT<|u?UCd04G#{K7A25X4mJsdX9HQabw6p@P z4tNurFo!T6;4su9XG_Vk7ANz~x}*;#$kYLL7be9KF8~q5I*cJZ;OVh}6MPXg-wWR4 zaDaBmJ6@C&J#EB@JJNnGJ-d;XNU>o=r1OM==5M2o+lu#S`% zv7vKGt{J#qgFgy>(1;%ZKZLo9n5mKGgSW+z=)OQS>-nT;TJnNiLId&te+iN1b<`L1 ze?c_+wS;X{0DL<%?*=iG!vQ8B&z~aN zVZ;+UeS}uf;K!L%JD)|htA;-fv3W)g4Kd_!|kd VViwusrc0_ptr; - int* y = (int*)_arg->src1_ptr; - int* z = (int*)_arg->dst_ptr; + int* src0_ptr = (int*)_arg->src0_ptr; + int* src1_ptr = (int*)_arg->src1_ptr; + int* dst_ptr = (int*)_arg->dst_ptr; - unsigned wid = vx_warp_gid(); - unsigned tid = vx_thread_id(); + unsigned offset = vx_thread_gid() * _arg->stride; - unsigned i = ((wid * _arg->num_threads) + tid) * _arg->stride; - - for (unsigned j = 0; j < _arg->stride; ++j) { - z[i+j] = x[i+j] + y[i+j]; + for (unsigned i = 0; i < _arg->stride; ++i) { + dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; } } void main() { struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; - /*printf("num_warps=%d\n", arg->num_warps); - printf("num_threads=%d\n", arg->num_threads); - printf("stride=%d\n", arg->stride); - printf("src0_ptr=0x%x\n", arg->src0_ptr); - printf("src1_ptr=0x%x\n", arg->src1_ptr); - printf("dst_ptr=0x%x\n", arg->dst_ptr);*/ - vx_spawn_warps(arg->num_warps, arg->num_threads, kernel_body, arg); + /*printf("stride=%d\n", arg->stride); + printf("src0_ptr=0x%src0\n", arg->src0_ptr); + printf("src1_ptr=0x%src0\n", arg->src1_ptr); + printf("dst_ptr=0x%src0\n", arg->dst_ptr);*/ + int num_warps = vx_num_warps(); + int num_threads = vx_num_threads(); + vx_spawn_warps(num_warps, num_threads, kernel_body, arg); } \ No newline at end of file diff --git a/driver/tests/demo/kernel.elf b/driver/tests/demo/kernel.elf index 855033d66789b7d57636385ef85de25a0ec25d97..74677f44f3ff766ef07ce1204da9ea30d8532505 100755 GIT binary patch delta 8943 zcmai)4|q&x_Q&6I?@ay)CYglLVB`-$5T%k*1VO4)v|6;f3DQ>3vdgxkO*>ZwQE4R| zb$QB4Tdg4NYA4(vDz-tiR@3b&D!)fo(^ktaTH8kJ_w)OnnfID2-Df*_?sMMHIp;m+ z{deE{-b;N|`-ZCar7o&2>z>#pE{6nPc7dNm2!e>qgD#J>xc6qh-QE+iC^=7==h!7K ziOg`P-67_|*|~gaymJL53=DZY(~G*WjqeEQv@aiWNz~qPNoXT*i3K8au#@OUVvi`1 zn6wmrT05uB&a$o7jyQIkP@!MuIw;aZgl7DB!{ZDYn}#~HK!^_B(0)S9E)EsxcA+8o zFeuX=*Eo^zH)K(Aj=~MnXOi$P9r?6xtz$B^`C_XuP{F(WP^|ZI)EQ6QrOTVos&5^1 zEx3I4TKX8`P~8`Z+J4CMe~GC}MPlzVk(kn*5_=3FXSiJ~>oo@LS}v9)9TJKCq6F7p z)+1aDw4YFBYauq@inoP#T@T@mO%}e;JIJ?UFbymoG;*LFSLnaRvbREo+A~xP3~3SG zmEKv-1h4i$xVOwXveV{c{sBR!jmJq#&qrfyb>v-n#CyT{aF-&aUqu7MuVwm&ETK#| zr}xU7Ey5M2t;qXYOeb$)Ohhbu9rb%ztiIP`Rw~h)`{uj?hEBfcH3sVW`zuG!wh*DGm>svH^NjeqA6AEmrwqa)rwr{*f5J-v>; zPb%Y5d|Kb_1mAAEd-x{zPbMWa+2>B{K(x}gX22o^!PG(h74~m95bw(w9Bo&|C42LS z&PBFQhPGGXaCZ3Cuvq)=7l<5{r;Vr2IN_fi?`z`*(n{|q;~sCjdV}}Hk2X6a##eY( z9rcaQ&Q<7;_s#MBqwGtth_Wzww;5x&(sJLI<8O;m#$>&p`=AQpoi^)zWqg(|a&|Pa zbmb?6DEDOfvgRfzEa%LB%BI+}d`$~G*y)h(r$4HSGG>(b`qQJ3V1&DmVju0D?@nOX zQukgP`gPB9x3y`zqQDs>7yq#PU4(FE<_UnNdV|Mvczy2T+ZNKpSz>s?yyhT}``jUQl zo}2Jy%j{4z;ngtu10C7*EZxsdFdB#+v?;k;jg%1D9YTkVvyrqJV`X^S(;D0fM*Aq5 zq)@$)>m-cfO5;K-oefdOd~FgGYO1F9P*Q3tMfoqzj%(_9PLE9kMj!ApPI-wQIav|gl`2N;q=4d57 z{^UDlqCy02L_lWqYE?l?k%?Jfp$8kt-12IGuUhqQ-0E{{Jd8LitPL3SdN4Wbn-Gk3 zZrMni3|@uCmLqr^*tBs27Q#i!a;$-pV%{^;z$bjpMR7^55cNz^zHR z0ghHduS=bf`tveL3zD3IZYS8ZaRXW~YAuI@ac9T|wCG(PrbV9oZ-o}q9Jr=g%Bj@#f!Zc~ry+v0+BT#$Nm+@q$!z0`b!@M&T<1|<>j+%ckj3(HN0DYSms!tTWVu+MpWwf4 z*YZ$^kympP-DEDap6w3H)ogtFJHmP4n0_UfoJkjV-nilLKbwRo5?FHb7de}>#kd-A zl`UOw)aFsKYUJcoiyEFehcv!Mmo4q4&zVEDcQpSSE9vthc)vTsNW&gw4cX3 z)ZyT1{sQ(t43u_{sKme?Gp&#Thls&u0CzNLS^Q=se*t@WhH0;H{wMq3p{8BLl4irp zmH7+Qe`R98?tw_gV8ohcvjOisYz9agf=T^V1LpqDv}X(@4KHywe}M!kvrK#OU8G(8 zmA&g@emjqBq5kGPN~e1L%sfh=I>R=fn%hvF{_WG025o0I?V?np!A<>%Vs<}6**0o1 z?tY$~TJ(M|(g=trUxd>PJ!v(~r&PUcHSMP&eNqW^rYe1I3GI(>u%Q=7)~>w}{hr12 z-p5@YunMJSWAsb(wn|~fccs*ZAb%{QwF=qv)obY>RT>jtrTYk2zK%YkY<-?i@6vgr z^LlnZZ}iweM{RUjzq|?77=7QLXfNtKwV4JGWJNhWO5`@$y}|4@GT)@x6k$%<&w~xm zX6i!vM{iLZrN*g`e3R}odc92!QbepVbSo-x;mHb0ZP(fDA}t%O{wMm-X+s2FnER>8 z;M?4K)oyx-E*k9(3L~o3;(?lm_k~oPZ0@0 z%KM=F6TgA?Q{#H0{CPk>s3&RB2LgIdwB(``e&cha4IsQPtwb6s8N6Y z9HzY9IB<@hXam{zUo^5S*8PG=B{>{R`_)(_BTNai>j&p3tMsM1%jA8wh`$_-bu1dZ!-KDf|tDJB1pK2JWF|w^v8O6padfx)2yK!i!GEc!` z957s|QF++~O1(-{deqx9F$eR}WxuZ1Ac5vGE-Bsh;jlX+o=oS;60Y$m^r=B!MJ^ zbwft#1y$;#?xpEM<0$M_1-S)Wgsw7w2u{&YSE(aBi!+5bN4i~xYy>Zq{1fDakwRN9 z`8s4X7!N6bmtC@YMCoRN(KFou@{Oa#xFfDTCegOCjqao&{iW`nhRap0H!?6>|toY62H zT>lBWUxS$HLQKG+7y_Lm?R;=_i?Yr%{4g5Bx@EWgvc z)iqHY?+(sRH*~8~Ut*};23*`NG#+kFIukPIqu`_tgIfv?91iF?$=%=#Ery->3HG0hxdlgwa8VSA)%zpP-O~OP>_zB+*}&()l|!(& zqF*JPp}1)2(J-*N=Q%{Em|hzfpTTt) z!U}kB!cc7de}5*Jur2s18dxn8bOui=4&Frx;Kiqdi>^2LE=z;b!55EV5n>H^IKfCL z7Gg+UGGI14SX!J1zHXVK0&t3DPM3mR%dmyYMYR%azPs>+e{gM1Nc-Rz6V1R zDk`C9M2i<=@o|F;FkXyqZX8F!=F1WX{0vFR-6N`9T?n5 zegHS=dk*4}Qi7?@kzw103sL&K&`M=LI)JS=+lP3QsKyQ*BmKL=Zr!2rPfy}l6TXYd z`nQ9x-yS$_a5Kk(i*WDc%lebTF{LTb;yW@PeC+TL9MZ5oR7i(e;1;aC6fp1Dh2V^$ z!0~|nUjR2@Ce3jv1DkJ=Jg}R=rMP*_N7fSDtTW)BCgWFxV@k^hz(KaS3Kb?O!MpVU z*nDy4fc4;wcVghB|8ejj%gmkxH{&5XMkbpMgD*dU zHGrFz8~6s8Kgh@_@QC-I2s@6OM+VprK88hR-u@qf<3|LaVV{EQEo=J>_##G-PyL+W zJ8;g~z!8)A3OECYF*DvTVDnQ)w1P*9tqtbfvK>T&T^Mq=Y*1%#4N9Z+hmbCjHDHTw}SSnP3TP)AG z17N(FXbm#JF|hgO5%ZDqG8D(Kh|EQ!gB#H!bJe~9wqXQHWslxvJ8mQMwyOqbguvcA zFamYpq-TQrz;SRh?kWiX2%SbD2lYKAQt=NccH+2SA^972z~OqnzsDisCvZJpRLus7 zwir_RRf8Ad1)efI zaFVoVf~Qy(sp!A>iG}@V&t8zRaNOXT#w+#e3+?dTIQFF4&Caj!ar&Jn)kHhL!8htN zQ4IJ0Z;?C_Hof~6)v1@CR3q(y54`WOC!b?$_0wGB#d}_-Qz-J|=e*wQl-d&?9wxK6 zrY~X9S%00y_xe#5Mfw#Mck7)_t34I5O&@hy?LnvX0w6u7m$8_j?+J*1uqe^Pzkv8c KAMl0R@&5r5REZh@ delta 8646 zcmai)4_s8$w#WB6XJ+^#>cF6)qK*nm{v8v=|6#}h!6G!{=i5%3C1nt79bG{}%vlS)S!k#`cA=;k^0dZrgr}Y7V%w!F+@L<2glpxo z9CxYpA!_sME@7aCYju^)WsNxBtz%@E?R8?PsaY9nDi-dbT4j!AF}c_LEL@xW z2$$YhxbsrUy(Wc*hD;pq&YduRXkN2eg#oF3gTzqFE!WewSl7P%wyxJ~<2zqGiDqy2 zj#(*;r%6l5M0ZRR$d#AsYP3CJHjynP#5Fat6Pa8^k)2&yWM~K0O+@TliMGe?Nt`1> z6r0K9E{e=k!t>%1Qc3GQ;JUE22@$TKRChwBydX+(AMM^qlU}Qz zJAZVW?lKmXg=U6*=%($O+`Pt(#P1{ zWkViR5G^Qvu!2hN5yJX)k*G#MIFtQ?EdX} zRZ*(OznXJDCA*Sx`YWsNcP+~4#h!&ZhcE%|smr^ziLlJGS%h$z)7v1w<~ODb$E&Y* zaBo`MmfHGE^F)v(>EG^=Yno*=~}QANRpMPw_(bc;^P^fb9b(=Rn~H zOE?F@+#OXVCC;@YD%L&mzy?kP*D^;R_rHuvZ!*lf#L3{gdTfP_UNO%n8pf;}K z)%_{N-B^8*{XRRL5X@DB&pi&Bb8avxr~A4K>mt-%L8ZnYP-C_EL)+?LY$p|QjrN+@ zgc%&Ay3`h2UFxnr-zBt5a+%F+5jK+rKIt8eX{61GFe|pzrZSg3GSX%JHngJ&E+&|* zwkx6L7H`<9Qn%yVkHRR()%ep;%cq~Yaw}`xaW__QuXf#>6NFy97Dj)j)DNBX2=~gU zC3@7P6zwzySm=O-Qj9BMRD_u|R<);Ba3>kIaGI)6nlZ;l>lF$z8avQMi&FKCQ4>u+ zgeYsiH_H3az5OXwZ+wVi^{9s_$5|dOv@yZnYkxqQb&gQKEg!Z4(jH|MTAI%nG7P@I zclp|AC7qte_sNFlwinvu2%$0iu2vPaMA&`0T3;vR018o`ZD9dE!QK4+zIuM?^cn*@ zu&k@Uan?tn5!PkPPG)2D93TcV8o}2>`fS{REck7U_BUE0%dEGz`Z#b)qqEx(JQD1)aR;*C z*8+cEL&35GtZ%glc888tXz$dH3a#m&_sR|^v##v0-N=aW>?BB(&gif1|CO@A73^O)K9CsL%Q^**Pw^{Po|(bKY&_4urJ|dQ<9J%+Xn0Z??uqlo}hGR#`i+AHh8aT0cF;0`NnUHgDpzh z!yr#x_1Y>R>#qF%c^lF$ zNLhlikJ-i->)2i#a5bP@VD(<5kh${84kyjWTxLD97I3jXqnGEpSDT4~RbKTmbceaj zdbWoJT=mA6|3N)yM9se2x8qHCMZga)3t|ASrv0~XTcs{jV7BGYG>5b-T(!71t=wW% zXV6+TI1#5|zKrxtx~b~vbE$q<{r@1DzVOzJoyuc->kC-q z>g!qL=zAAX0#)i)7El~j7-3n|&<0IiUrG*Wd*#rF6lc`s&|o5~5&R;hnW))#Xa##U z>%(8B`yiIQ46hmbfc2C`ar)NvbR=3+G0~W)VeNz%q=gHSZUYV3h|+%j&-rwOPU(&f z)P?GeNgL>GHPmD#tsV#2A0qUzrzu%a+yoz!@vlu(s9*%HLOMnz#vfjzhY47+nNHFK z{W+c94Mk71N*vc$_wv?Uv;{RAj6qxJ6BAw2Z*GUHRX_eF9YTN=MKpvU_wS&mh_a22 zZ-JHU2>s$tdf0g5zsO2dX6!3wuQKE7oiv9qk4Dlic0*xF2@Po1CEHF~8piX__Cj>t z8j82dL24|xYqnl~fF7r-Mkj-UiOO|*89j#?DlDV%n4vGq=mZJl&4Uz+Q5cnnfU#TR zNb7tCkq~5Cf0TdW*VI93WIvR@@ai4<{&I?>ZF*HXjiq8;J%X_1`qCqyX8l7JCB}^- zG?ge*PdbWhDK<78rGAt$*?}k*y-~u3qI}33y?zMFE#C671a8S&cJw7J;;6UGLt0oa zM3({`vp)p`n0gHDF4B*CDg0w7BlTGyLz$?bWlGa~9)~E=7qV~|M~)NTCo1)0mB1LI zeHHzU;PlmB={(tugC~$bkhiL78Bx5k;8BwNpC8{ zLNV1Zlqf~m9y;$5uK*5@`iol~jBGyRR-HXBID}RKvx2`Y$mS&P z_Cg4o12*Z4OVz18^Am8)k#41sJHWa4;=|RBgJbkdrRsgoMTtV2E8UtP>%fa7{{opd z9tQ)-Es*u#7|EdsXSyFR7Lwb8)4_?7JAw*Xs0j7X6Cu0E8{wUCb+J5917g z<>30y&?7<=PZ!z*8GsuaGl8^71mO9637j6ldEiX236kv_Am`$R-^YdEEPe3-bs`J{hQ$HL8yN1|1BVj|dKoH39B;VqN> z`Ww)P>EQ>}S$NVcKByMaH6#3xihVCipM6;E;f$2&CXjxsFa?3UxfSmM`_h&D54Yme zt+)X^VGZ68(8&l)<#ljcp3pKS|7yb8&czp_nNnfGoY&#&Rvb7SQ`Z69^s;xYI6yQw z`d4qxGRJ^t;-J9E$vhC89YLO@icl0W41DdTzrnHK5}bQZu|Mojp?z`H?9EONkPL^Y zxun64^UU?z)4^dRJk0hd!FA1e*MXh+8TOx#wFQTZ5Rn^(?EP72`O>}wb`1mYb#Mi^ zWE7H1+SfRtNW9^_>o{NmxDqq6Ogd}_4;YD4rQ|(qM@*on1Et`ZON6HbT)z^`dmz5x zdPZ0Sjt=PHci{9oq1o9Ee`g~UTLOA?6P$s7C9(nDh00p|QyvVC1Q$y?e@tt@kvUj$ zFYwG7|CB4>_<#=d2RE&|!;vx*=wg#(=PZV24tY2ZTaQS=}K_)Dr})Lsq(pkF$|{+#v((F!?u*PrwyOI)2sQfM>y_pW&034EPmTr1|%a ze}Z#z;4eFL18l`@872L1f#ZJT__+VM!P`(o224>qq;s_9pVAKCYXLK02UiE&HG{w{ z0rt`0TGd~F3OFWU#vTXndjSJR`<@PDLSY|;hYI%?BYYM-ZMo1!$Pv5%&JAc_6*w*+ z;0AEzc)TWYJk;L|E<*<~VV?NAx&HHb6P3IlydOC;B}*y}LlHGbXh>jBh2vm2YcPBG zG&u2#JNK=)3@#Yz-$-tN>-1yC@JLyUrB0XC+J*>`zd~pQavUAOfj8T6ybP3LhmMr~ z-C+;hq5FWV>xEV>>)#8$d9U}l!3&oH)^P7+%KB47u%t2niLX_7@Ug?=a2SB?p+q`7 z32s8}#esRpUIvcO^&St{|0Qq*GQl@5o4~naNNbi0Qv}Y(%`;QRdjU7=82CFRmxN$R zw+(@V>~Se73{d=c>rt@p<&9UW8oXsBCR+NRX8(YdJqNDGLv*fe;4*jw>_9xCLd1_? zet`})@qjGiHzyR80ZVVi6#L%!xWUfgLhQi45%dJl2zZ1H28Urc$Up-;BO}2Vu;1;M zoCJ0RbnsEI?@($L!NU9`6sPn{$5op%TGp8-eb7gAGFWKfN<2q>%f1$T70*c|k*Cc~ z;G2K*Z}MBf*Pg+$<38m+y#?m41io2+A8a~}+eOwt0H=se?7PkGlXy+ zeghm6U@rkT2W%LnU}tl{W9=vuE#Mj%;1t;RvSUT!o(ETAQ%;xm%V2+A1&I|%p1{Qa z9`?0Zb4nf6;&Ip9a@sw}$Xc`Ijh6M*V!rm%#gQz%P;P{0@pV zoTqUB^K4GPfGZvT4vDsyQoNH-=BEyBqkr(43f~$fg5&j-)vC=h`>|=y%$RJvUah{=j;i%;=hPl%ep$Eb z i) - pri[j][i] <= 1; - else if (j < i) - pri[i][j] <= 0; - end - end - end end - end - - genvar i, j; + end for (i = 0; i < N; ++i) begin - - wire [N-1:0] dis; - for (j = 0; j < N; ++j) begin if (j > i) begin - assign dis[j] = inputs[j] & pri[j][i]; + assign dis[j][i] = requests[i] & state[i][j]; end else if (j < i) begin - assign dis[j] = inputs[j] & ~pri[i][j]; + assign dis[j][i] = requests[i] & ~state[j][i]; end else begin - assign dis[j] = 0; + assign dis[j][i] = 0; end end - assign grant[i] = inputs[i] & ~(| dis); + assign grant_onehot[i] = requests[i] & ~(| dis[i]); end + + VX_encoder_onehot #( + .N(N) + ) encoder ( + .onehot(grant_onehot), + .valid(grant_valid), + .value(grant_index) + ); endmodule \ No newline at end of file diff --git a/runtime/intrinsics/vx_intrinsics.S b/runtime/intrinsics/vx_intrinsics.S index dfdb54b0..3e09b4b7 100644 --- a/runtime/intrinsics/vx_intrinsics.S +++ b/runtime/intrinsics/vx_intrinsics.S @@ -5,7 +5,7 @@ .type vx_wspawn, @function .global vx_wspawn vx_wspawn: - .word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN) + .word 0x00b5106b # wspawn a0(num_warps), a1(func_ptr) ret .type vx_tmc, @function @@ -17,7 +17,7 @@ vx_tmc: .type vx_barrier, @function .global vx_barrier vx_barrier: - .word 0x00b5406b # barrier a0(barrier id), a1(numWarps) + .word 0x00b5406b # barrier a0(barrier_id), a1(num_warps) ret .type vx_split, @function diff --git a/runtime/intrinsics/vx_intrinsics.h b/runtime/intrinsics/vx_intrinsics.h index b7ffdfc5..bfcd36fa 100644 --- a/runtime/intrinsics/vx_intrinsics.h +++ b/runtime/intrinsics/vx_intrinsics.h @@ -1,20 +1,18 @@ - -#ifndef VX_INTRINSICS - -#define VX_INTRINSICS +#ifndef VX_INTRINSICS_H +#define VX_INTRINSICS_H #ifdef __cplusplus extern "C" { #endif // Spawn warps -void vx_wspawn(int numWarps, int PC_spawn); +void vx_wspawn(int num_warps, unsigned func_ptr); // Set thread mask -void vx_tmc(int numThreads); +void vx_tmc(int num_threads); // Warp Barrier -void vx_barrier(int barriedID, int numWarps); +void vx_barrier(int barried_id, int num_warps); // Split on a predicate void vx_split(int predicate); diff --git a/runtime/io/vx_io.h b/runtime/io/vx_io.h index 67068554..5c1069eb 100644 --- a/runtime/io/vx_io.h +++ b/runtime/io/vx_io.h @@ -1,5 +1,5 @@ - -#pragma once +#ifndef VX_IO_H +#define VX_IO_H #include @@ -15,7 +15,8 @@ void vx_printf(const char *, unsigned); void vx_print_str(const char *); void vx_printc(unsigned, char c); - #ifdef __cplusplus } +#endif + #endif \ No newline at end of file diff --git a/runtime/tests/simple/vx_simple_main.dump b/runtime/tests/simple/vx_simple_main.dump index 7a71df5a..45dbc8ba 100644 --- a/runtime/tests/simple/vx_simple_main.dump +++ b/runtime/tests/simple/vx_simple_main.dump @@ -625,7 +625,7 @@ Disassembly of section .text: 8000083c: 28d1a023 sw a3,640(gp) # 80016a88 80000840: 26b1ae23 sw a1,636(gp) # 80016a84 80000844: 00100793 li a5,1 -80000848: 00a7fa63 bgeu a5,a0,8000085c +80000848: 00a7da63 bge a5,a0,8000085c 8000084c: 800005b7 lui a1,0x80000 80000850: 7a058593 addi a1,a1,1952 # 800007a0 <__BSS_END__+0xfffe9c78> 80000854: d55ff0ef jal ra,800005a8 diff --git a/runtime/tests/simple/vx_simple_main.elf b/runtime/tests/simple/vx_simple_main.elf index 136d74fd3fb6ec667c92c672445bf6fca9f53ece..3d72d2fd565c94200c4346c37351487173629eb5 100755 GIT binary patch delta 53 zcmca}lmE_5{tccIjJKM-B-*_s7=f5+yO#tr(;>ECPoI)1kLf|TnA6#c%%hw#g0}C! I#VjiX02t2_=Kufz delta 53 zcmca}lmE_5{tccIjK7+_B-*_s7=f5+yO#tr(;>FVTqk2w%jrS4nA6!(gK`{m3byaR I#VjiX02gx-(*OVf diff --git a/runtime/tests/simple/vx_simple_main.hex b/runtime/tests/simple/vx_simple_main.hex index 89025351..8c422eb0 100644 --- a/runtime/tests/simple/vx_simple_main.hex +++ b/runtime/tests/simple/vx_simple_main.hex @@ -132,7 +132,7 @@ :1008180013351500130101016FF01FD9130101FFF2 :100828002324810023229100232021012326110063 :1008380023A2C12823A0D12823AEB12693071000F4 -:1008480063FAA700B70500809385057AEFF05FD5B6 +:1008480063DAA700B70500809385057AEFF05FD5D6 :1008580083A5C12713850500EFF01FD503A501283F :1008680083A74128E7800700EFF01FD60324810003 :100878008320C1008324410003290100133515009A diff --git a/runtime/vx_api/vx_api.c b/runtime/vx_api/vx_api.c index 1f4940ca..27930c85 100644 --- a/runtime/vx_api/vx_api.c +++ b/runtime/vx_api/vx_api.c @@ -9,7 +9,7 @@ extern "C" { func_t global_function_pointer; void * global_argument_struct; -unsigned global_num_threads; +int global_num_threads; void spawn_warp_runonce() { // active all threads @@ -19,12 +19,12 @@ void spawn_warp_runonce() { global_function_pointer(global_argument_struct); // resume single-thread execution on exit - unsigned wid = vx_warp_id(); + int wid = vx_warp_id(); unsigned tmask = (0 == wid) ? 0x1 : 0x0; vx_tmc(tmask); } -void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) { +void vx_spawn_warps(int numWarps, int numThreads, func_t func_ptr, void * args) { global_function_pointer = func_ptr; global_argument_struct = args; global_num_threads = numThreads; @@ -34,7 +34,7 @@ void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, voi spawn_warp_runonce(); } -unsigned pocl_threads; +int pocl_threads; struct context_t * pocl_ctx; vx_pocl_workgroup_func pocl_pfn; const void * pocl_args; diff --git a/runtime/vx_api/vx_api.h b/runtime/vx_api/vx_api.h index 1dd266c0..5acbfecc 100644 --- a/runtime/vx_api/vx_api.h +++ b/runtime/vx_api/vx_api.h @@ -1,6 +1,5 @@ - -#ifndef VX_API_ -#define VX_API_ +#ifndef VX_API_H +#define VX_API_H #include #include @@ -11,7 +10,7 @@ extern "C" { typedef void (*func_t)(void *); -void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args); +void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args); struct context_t { uint32_t num_groups[3];