From 178ec59af610a35e50c515d4b1e893e9ea81bbd9 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Mon, 1 Mar 2021 23:01:53 +0100 Subject: [PATCH] Implement bfi instruction --- ptx/lib/zluda_ptx_impl.cl | 8 ++ ptx/lib/zluda_ptx_impl.spv | Bin 49500 -> 50100 bytes ptx/src/ast.rs | 9 ++ ptx/src/ptx.lalrpop | 12 ++ ptx/src/test/spirv_run/bfi.ptx | 24 ++++ ptx/src/test/spirv_run/bfi.spvtxt | 82 +++++++++++++ ptx/src/test/spirv_run/mod.rs | 5 + ptx/src/translate.rs | 192 +++++++++++++++++++++++++++--- 8 files changed, 317 insertions(+), 15 deletions(-) create mode 100644 ptx/src/test/spirv_run/bfi.ptx create mode 100644 ptx/src/test/spirv_run/bfi.spvtxt diff --git a/ptx/lib/zluda_ptx_impl.cl b/ptx/lib/zluda_ptx_impl.cl index 94d02ec..a878ddd 100644 --- a/ptx/lib/zluda_ptx_impl.cl +++ b/ptx/lib/zluda_ptx_impl.cl @@ -136,6 +136,14 @@ long FUNC(bfe_s64)(long base, uint pos, uint len) { return intel_sbfe(base, pos, len); } +uint FUNC(bfi_b32)(uint base, uint insert, uint offset, uint count) { + return intel_bfi(base, insert, offset, count); +} + +ulong FUNC(bfi_b64)(ulong base, ulong insert, uint offset, uint count) { + return intel_bfi(base, insert, offset, count); +} + void FUNC(__assertfail)( __private ulong* message, __private ulong* file, diff --git a/ptx/lib/zluda_ptx_impl.spv b/ptx/lib/zluda_ptx_impl.spv index 731966e2aa649dfd179822a634c86aaa5dfecb0c..8a2d6970af32987b1750771d168825f50eb83971 100644 GIT binary patch literal 50100 zcmb821%RGKvUUe|cU#=uU4py2g#ZD9LvVL@cXxMpcXxMpcm8u``scY5oo z>guX~>pcnI&9X4qppy>O(J@HJh#f4s*=*?<;iP;gf&JEFNa_B(9Sr{9+UzQ__CBX`UTaNWv&@-*2P;T)_J;#$9^azn$;8fag!<7g`U` z_us6~_I)@0PkY|u$Q{@4i|+sLd{_I<^H=w5{+n*QWB2oat7r2s=LhV+HouU2aR1Kp z-Oc~xb{3q?%`|rPN|L*6%c+cK{IX__k z$9gvZ&hy>Pf25wh|9Zab0?vQ$p3UF$-OhiXp3T3U?`q#U{~ZU)fA8)2i`4)8{pYfQ zKL3B8AF%%<1D*eW&UZKeT?Wek|2f}v0q6hhK>6!B-|hTA7$|@LeZH%G=lnnI+59_y z{dV{J|EN9dU(OHM|G1vbzw>-|^MA8v^RMT-F5vtZ8z}#s^YeVS^IvwL{Qvj)uJ)bt zU$1BHzw_5`ck|z^XY()T2kbv+&)$FM`R?Z5t7pId>iMn*&UZKe_5Ydw?#_2z!1>?s&-~T% z-Om59f99{7^Ih#b=l?{{=3jpQ81Vb&FFl)oIX__koq9I^`uj)M1)SdlJ)3_$-*o}c zPxa6IXMUdVcK);cGymP3?`q#U|Al(?{yTsDcK7?w20eTK<@|vCKkC{1JI{B=|5QDD z|Mh&=1)TpGJ)6JhyPf}OJ)3_y-_^cz{{I>%|9y7h|LORj-+$H}==1;g`2qVsHPHM2 z=X`hbpKPG~|DW?+7jXW&4V1sG^WDz>q=EAH-{-s9ch3K~p3T4f{?YZzzw`V9J)3_y zKVbg_2Ficu{p;@^T^DeE$MtOf^?cU_JpWbC=I{A#=l@sF=3mZtweOtYkUi_)`Rlj4 z-+!j-S^si=!2VbDZ2q0+yW{_-p3T3W@4A5V-*BM6|8%~8&v!fjO?vk0x18^4-?23R z=bsDC$TO_K+Kjay>kQV7tfyEXv;0H0aagmkmSJte+J|)->ju_StWQ{j@-D__&B0oc zwK?km);X-(STC@?VhzomCt=OUTAj5m>oC>@th-pRvVLTZ!n>KCwG^u_YhTuxtXo+x zvc6@F;ICiS;;fBW`>@Vr-Nt&E^#f}Z7@L8$3~MvifvodbceCDL{mL4X_b@AKMb=iV z!&n!y9$<~ZpQ3W@j)|NFAAQEwv5*cQ{h6@F{7l>O=Rz}FPRySbGmg$&^RtW(({jxFPkgA3F^iwLcH)kqI>s+&>bZ&NK1tjAckU1!leRj> zjhQn8ma%mCGdJHEj81fRta)qm@QlARZ(`Is_|`C;Lpqcgl z#M<}YGa0($FC6!byS!KQ)J)8K^kU6HU2ECLkF%VU|1{?PqQRg`Q?6q!-kxXe=bvT! zao*+|^UvmUcKD)4Kr8^3ylMBR zVEw{dnG!oMZ_D@T?@hjoJ@DnsKgT!LG00$@+IKoXzYaV|o_G6=s`~zU=`<{BsI56|^NpwGoakC}A8J~EZkBug{!q_a z<42y>oOxOPeqnv{t!u9TosNCAyKxS$pP%n(zLFEw(V)xWAs9gTC{dp5=YCdG1Bf^||xz7H#oCcz0^w(O;Z3zjCq| zzMsm;;`m{B3A8VYdkS$y&=AR zw0X8I(B>vDoNP(Vy$x7$qQ4PqW0p14*6iDST!}IN1(uESzkQb}ll~zgg>>Cnx5b6Haz$zWd>1N3`*9qVN1RWqBWM zo_i;BeeQ6wbBoJ~+IQ#gC+1g9cER^EDJy^7*|llk&u%Px?v4#7`=Ki*`{Nsv6Sci^ zzXQ)K}@tbMiNBEhpw0Ps~|gjc*@q&fbDHH+kXY zR$}gXH|L{&4eMH#HPqI;uKC7O^EPy?8BT5|?w;?hp0!3exdZL{w7&V)HTQa~eYN4_ zPPBc($z5n;;pA?#bFrVlr?{+do}8F#PB^)z`R<34d(pTqIy<>Uc;Kb4aQ@x#eOXnQ`44JS{bD<@Cm8VQB>*Za`iMhrTbJmaG z+ee#cdlqeO^1{h;#N6|4a-#nz>oJx!)Yg2w`NmW8d33EAPF^7HUO35FZ8&)mEhpAD z-@5jB0&8DwIC%+epK$Ur+E_Sw1?^ny*PkUP);CX1%rz&RyxM&C!^vxC*)I2;pB}LmlJh3F~4&1CcdA_$y@m0zi*~`+SJCuQr@~jkZrX`37w)oP3LRF7`W;B`4N5PfpA=C!BoOeD}l2 z_h{qcMBn*+#PUAcJogXi`rP5<#}=0pweQZq?wDUW`3c`o<>Y7laPkYh=n@vWD4Ehpw0Ps~~WMvi^7IXf6Row*omhLgdGyXW2HME`g6A1rIA zt@&s3ji=@i|5Y=b3`yL*aFVmya55D8a$(TJ-qs_C8k2W`X z;ba10?s+%oqdz)p43;(2)*Q3>##8fO=vp(JOi0|laFVmya553v_i26et!tmLu=dr4 zlZnyx2`7`FjfIm*(ayzw{$Ap;zIk$Dt~udkvgW%VP9{ei@5Nc?pzr*~W_cfNo_h*( zeeS%wDO+4l)ZxVZ%E?stekvzZ6ALHPpzS#=Hk|n10aZ?B#WyA=>dMJ%_}0t2mJ@T0 zC+4iD!?%w%&o(>S+~kFmIf%LE-JFm9^sE_J)=*n>#^xJO%{kGvW;mIPxO?FwXSLyE zZnT_O-+b%ZXC|zDwc%tQw0*+Kyl7+LWInWWv0uKQSl>K3G1r`MGJo^k4<`$tjrXF4 zbI^BwGqb#pHqX5vx;}R}S*XS3L>*4dublM4_ft7pm{>Sj1Z~envEju3p0IMVEWR;0 zQCCiu!?#}EwVar1JTYgz7`}b9dA8-z<|Z$ktU$~?@8*2;7iTTOvWD84OE%wlYOaW` zHN(kD#N7)gIjapPE2HJa`sQ2LK1*Tks|_cspzRY*Rz({NC##{Ii~aKb#QNsRiMi&4 zlhvE=emGeJZM+vXoP)meTbkv4w0Z6|(e=5*$yzNgC+cuwe&u9cd_U`8Wn*pZdQJO& z)<@g3H#VGXg07rwif>F#)RmLX@U543Ehpw0Ps~|wfNvjdo~<9++~kFm&560^-JFkp zAJ&E}YpAWcQS*(b<`(E$Gn{Nm+`Vv;v)XX76(KiYUN&N>%;=h>I#{j{0G4(K|EaI<5J z%Z)nRm|wZs3ExlUW@lpg{bd)lJ$J>1n|;ugn|<+($&I>lvmd_o^6urvT;qv3``z&E zqs_DJk2W`X;pPBh?s+%oqrW?A50*95*4(rC##8e^bgda~4kGSexXD>pvo;pRBBJ&(tRo72#ho73@)$&I>l za|XWk^6urvT;qv3`xEf(qs_COi8eQR;pQx2?s+%oqkkgnB$hSQ);ziS##8fbbgda~ z&LQqzxXD>)Pi!tbMiN8{ZIoyY?a|k#0x47J> z!;Sfsn+NdyRBj$57H%Fw+w);;xOoa)xp^AjnB1r)_nKF&Fg66y*TS!^quFE zEbphy9Ns|JIfR=xTU>6`;l}*R&0F|>DmQNv3peke?fEV?+3D>x2}CY!rE6GPQF3=-0{Cf+c(^Nhc*^&zDIk;{`rn$Kl9|qTyw(B z56yQ!-28|(-isQ}Mc;XT%<_KP%;6_=okO_!xy9v19d698-28&?r*iWvv2gPn+Md5- z!;OCrZsle$d}DH>uG|ccZ#}n-%Z<6l6La=|v_9HA+YsiIyl^ulG55Ti^U?nk{a34@ zw&vf>H=de9S?+y5pd7DIUoJ8SYxxSp|<8Y%{QK!6QgU*a5D*U_rguiYQxQ>Xt}Yz`PQ}1 zxLEsY!^vc5pF95KX#0knDbU8k&6H@**gxNI>}Q_bm}^eBnX38jhnuO<#>0)i^Bj-m z{j{0GH0U~qa5HU-%Z)nRm|wZ^e@o$~ax*=#a5Dqio-<;@jeoCyooB<%JOteHZgQhPGiw%>HPqIewfV+Vb6#|<8E)ny z?q0abS#7wPA1yc5H{ZJUnGI`SZ8%u~?Q_Rp5N+RZvk=-?xao!VjQ#E5vY&Z!W3D;j zX5r?$A8r;wcg8vET=bpi>@4r6%^Vg**Exin#adi$)Zxbb%FW{VekwOh5DPa;qV2g9 zHr%X;uH3AIZ%l5~m7A6Et><>QG1quv&VFee`)KoQtDw#0r}J#MS(Si$-p%>wFT+}v zWev48mutT9)Lac+YlfTEiMtnWa#kB|)XRzWd>3J+$%sexvU^S73QRZRW5(y3Qfo^lovvQHLAz zD>ob9`>EXYAr@{nMB8&CY`EDRUAfr;-qx>ThHxqW3KVUoc+c)_R;3qwnCfB zPv_Zivo!(tyqnzU_hoIuvWD84n>OEgYHowBHN(xe#N7)wIjapf+o9#g`sQ2LKAU0f zs|_dHqkZo9{n7RfH#?w>g_|AGp0R(v-`LMQxiQzAaI;hM-48cAqm73fedpPa<^8mo z!!GDLhj6oNi_48V+?Zdv*$v-M?hn*-35n*;HU$&I>la}d7u+zvP9 z8c)pG?}cL@ZJzC5w7L9ro((sL5OB}C$&LQrtbJJ4P+N1~<{MAVL(#QnxH*itd*LQ$ zwc+M)wA@(VeCyh0KdgPV;p7Ok&mI3rw0*gzx7>tgM`XJ-KPOPT@c7c`7#C zoP)01oQrQvZq${V^YE?bcDOOucw)}}G#vYA^K9p%&E==_Y`D3AfP3CeZuC!Qox!q( z+L~uJ-*{?Xh^{rm%|*oB3pY8d4L28~<;MEvTh~5kVeP98Czqgo?)aCY?Hg_`LmLY> zm!m!7U!&~dvY&Z!W3D;j=8E{(aC0Tvc(~Dbo@cYXpEh&23hjOP>6~r2xw^&WMjdX< zuiRYYK|Yn6Yl-Feo9oc_ydE2FZbMgYZpSw!H|omG9r)IBJKUISJTYf~1CD*PdA2*z z=JL~dHr(7rz&-CKH~KfSZem$OZOxmTZ#*^cM%S9*<{skig`1q!hMRlQzEA6$Z(aM` zg0-(UoZN@@x#QoDwr{w30BtPXJc#y8zT0T+XP(@cYfiX%sQK=Pn}^ZH!;QZ4yp`qs zw3)*r=sJgR^Jt69jXK+UGf}eYN4_4Ybc4|4p=g!_8Z0W8vm)v}fcc-*4<^p4^ygPPloe z`R<3CchSbfjlT1Ip5^_tnZtYNI)`xcev8YEI^39Fx%mL!Pvz!AV&Uc^v^_t@hMTX@ zm7A~ejmeF=a`O$o_1q3O<{D4T*?)p#A8nrPTeP|Sbe;`2-w|-nyUC6Ir>xId)=*pX z^X3~*&F|5*X1Mu*xO?FyXSLzxN3`5n-+b%Z=L@WTwc+F^w9g&?XS998%`a$U;pSJg zXXGZ|Z|rBD+?Z=lxcRO5?uVP-(Z<7#zVrN&<^8mo!yo86hj8;}i_48V+?Zdv`3v7q z<>qf<;f5=B_M}V~H~zC2DmTO68)9sUm}@*SXFn*u&!)|@4Tm;2dEsVw zV(xi2xzQhtH8{%}YHJSBeB-G(0=m`=HzN{vFWls;Hr$MamK*DvZ(aKgiM6jboQ#b2 zx#N$5Z{Kh;D%x1M84c|j``g21Kl9|qTyw(B=*@RO+>C)X9&YrV=TI!~r_CJ3MAtcl zo3UD4Zq(t%{K}303{5|kn{kMRn{mXojVEv0_4Yf5VY`*c-oDyAYhMTE~yBBV9RvT`n zM*BXkZ@zWyGZEIl+Hf)r+UJfxE!w`}W;(R7a5FvHGxpDS9Q&CkH|ClXZf0n{`{8Cr zwDEAG?>r}Fc|UFDFcZ4YA>7Q|;&P)7H|AGv{AZ8(socy;EZoe7w&(2FaN|GorE)Vr zzA?E`S8f)-w_e`8+?Z=TF=syqzJ0WLwgu7VCNJD9M9e+!CO7(XvgTr0Lv78un{PZd zd!cL1aI-LR_rguiYQxPUXt}Yz`PQ}1JXrf`!^xs(pF93yX#0kn#nHyX%@SzO*gxNI z>}Q_bm}^eBS+e==hnuC)#>0)i^PHFE{j{0G(&##eaI;K{%Z)nRm|wYB7T-_hW;tTv zW_h$dSHOmw)zOukHSmqejkzi*~`>cw!uQr^lkM_Ca_eR?{ z+-!h07H;~WJ!AiTzplvpv4`^6urvT;qv3`_1v~qs_DR zN1L0xaI*t3_q?0j=x@Q=l4T9GHMeTM@zmTAU2BG$ort>^ZgN%|Zgxh?jrGm9u6?$~ z+E*J+c0v2x@pnbrH{9&z9xL4Jj`ocG?cuVYd2(Z}IpJoH=DQzm_Cy;GH~P+V8m0((-YqUS>TqLz z8*_~(=Ijr^w~sc@b|l)|OGW z+Hi9$+V^RF^Q~*2L$LPMhLhvaK6m`%(e@2DC!mdmn-kHVv46hb*v~wu>HjfWe3=Xog0`)MUqJmNnGYJh%DA zQ}a@Etr>1EBko?f$ysfsodO1 zEZp3Lw&%^*aB~;Ba&tGnF}YD!ZtlUiUf#Xjm}@*SXMYR6eYAPDd(q}5FWlTm%suZW zH~P1-Zev+PZOz-8Z#*^cN7tI+<^kgFg`1q!hMNb`a$|k-t!tk&~*;s=E)Y9 z8+EubzjE^wzMn~1vN8$wX=35#8MHm0#fF<#(UqIm@Qulhx^nY6zV-6%<;Gm&i8=e{ z@a?0`v%P^fH+h-ao5bAn9mYcYK$)dtonUwPEi)v^}kFzIE;MGSHXhdWeFv|wypJ}|{Rz50cUb$h#br$$*37T0eTMI6 zQdU^|oLE@<0&UMPv0?2;bY<-)d}FevuB`oxZ@us$YvvkH%vpbhZy#-*?H9DU$;-@s zHJ26E^uJ+!ZJcavYW#+-HNx8O#NG4xWKG{1-?FTs_FmsL-*{^Nfvz>f+MmST3u`&6 z4Qqd)?P-1Ut!tm}vG&!5wZGB!32P44SXdhb-`VCnl-By@$(p(5WNw3^_1zC^gQ1Ow zHGSW~4=nGa&2taVUVZMcHbjfdnwp=^e{O~Ol{Nnz2Yx1Hg|(rGg|(s4_8bNq*8FGx zR@O$vHzsRpW$rf`j`hNeteI;(F=st2zJ0VgJ389j~Z%o$I%G_^89P5P_Su@vo zV$OOBeEVo~b|$pB$;-@UCgxsP)1QhpCCeJx)R+ZbYlO8~iM!|X$(p`3re;|~?Y*XH zzVXzY4P9%7wb_Zg7uIrC8`kDP+td2yTh~6*V(qI9YjdLQ6V~QJ8w+c5qn)j+<@=iT z&672A&B@&6X}9cxO+aItm#{0F_tyd-fQvZ8&Ay@(Y0n+TZy=PVJ&C1VQpo! zJ*{uPb?vhR*1p=XwhG!lVQp2kv9Pup+S%IAS-7lko~)T`PUg0H^W6_?YoLvXHGSW~ zk}U6|&2z7buFoCT)@pHCQ}ffwn)#KrwcX%TSzCu#SX&ou&-JijtuMN=wh6v5SyL-> zzfEzh7hYt|T;qv3>-F*Nqs`gP(B>vDGwVmpy|AXg0joF58rsy@99?UKwJnId=kv*$ zzBT%=tfBT^8#do~YHo?HHN)Cg#N7*PIjaq8Tcho1ee?B5URvPs~~GhHoEj&K`g^ zH+h-afyCSkYx;Yzc4t{bn;Hk9YmKmWFmd;MK3UVZ#-1!|sJ+)-%{QK!hoEcCuy!bM z_rhAvYQx%LXnR`UeCyh0Z>)W_VeN3VeZtxiXk%gRNVK!HU%szd-#l3}*PP7lsOGyL z){aIS4{Q3qgMC=uN1Nw923?;!tR36pvZm&zlQr`zYsb04r?PfDv9NXm+MXw3!`hkX z%Gz1@#$-*c%>B;Bv0iwQHFJ$8=B!V`w~sbw&q14;yv*!eV(x`C{Zm*cv#g;_jq}j8 zMp!$axO+aItm#|hRF*Z=-s`mH8&Ay((6we*yO6khVJ&C1VeKNcJ*{uPb?tLH*1p=X zb}`yMVeJyMv9NY2+S%GK-`A{fo~)T`PUd!5^W6_?m%GmjYx=%}Gg#h7o9Dg)U7tIw zUD@KYrsk)UHS;TLSGmEbvUWAGuyzgFp4Vc-+O6oy+HLs8WKFHi{cgvxUU-o;bB!nF ztgpkjk2YuTK%1Mq%09GQmNnGg>!#)# zPtAMKwPskmm$-XjEoZf1?LM?Ut#7_{?Q=8MzS^*MKiWQF?E$p0u=XI@+1k%pxU6rU zteI<0=JrtY-4AOIqm73(ec!1Mv@8AWN z_tECLKS0;#4r?E_xU8x9>156P%GyV6@TshQOf0N@g0|yG}wfFj}`NmW87j&%|)_ygY71nZA8`ge9+q1Ji*1Goj8f#x|SohzWLU*&y-mE zYQx&BXlECHHne@i-t1^&VQ&t!XZ-7_vvJwaJlQkXoXl~~=DQ#E=0Y0}d-}eUsaW1m zn>oylu5$=`^R&3^sl%T6mA!fK{Z#hmBNq1NN857&Y}i{IUD;a#-Yarb;a+0(bi!Ypg3 zz1JemH=dfyp=-^sw>)w8!d}j5!`=#Lds^Ro>)K~gtbMg%ZAG-Ri@y@uzF}`=w6U6= z`>E`$MJ(*Cjkf1H*s!+|y0W)1zA@QTSN8hiTQB^`p1H;obN1`v+ee#c+XQWH@-nkc ziMbc{^w(#t$FhbtH8w-n8ey*=arb;a+0(a1ZClb4wtK+L_cr@uREH)(CqC z5qHn$lRbTF?7^~z+I#KUeB-HkFuK+ZdxsErFYM*4HtZdWwx{*Yx2}Em!rE6G)(%5E zyZDEr?Hl%vKpP8tN1{C=d-=X+Kl5bITyrwVqnhu2*gG0+JnZTFPWEPbKW*l447$!C z>>b^Xsjc>j1BYWl=Pt4h$ zgl`{hp6wj8xyj4S&L!qv*wa6Sbu!Bu+SE7?U2BBB^NG9X^U0pRHBMz&L+!mzYrgT+ zyZ~KmhP?}kyBGFyRvY#%Lfg~&=3Cc3r(^A_4Qm&pon8D((Dn^`m!ge@z01&^vA?r% z+0Q)LGuNEV@$%-oANH<58xMQ>zLPUp-cOr3T#2r82zytxxa_IJp81u%tMUC*_O2lo z_O3wu{K%fU#uIb)*W=qqn`gTdZEo^1v%8477xwgT zWZl5BhBh_sM%Nl)?;hgr`FygcZ;hK+)=+z|o11SuHSa~&nqlug;_ijLoYjWC`_cBa zzWLU*&n;N{YQx$CXlEDyL9~6t-a}|(VeetIXY8NveD*U>_RKXWb9|)v?uWfc(Z<7` zzVGB#miNdp@7+ z>09G@mNnGg>xJeUPtCW`wPx6Ro49*nFK4x3?;W%~t#7_{?eikmzS^+%F520}e-CZn zu=hUNSlIgj?HT*$`=0&GlRb0I$s9jyzWZVCBee0br|&y?iRJyYnZw8EI)||LNsG&# zI_#NW+4~gVPi5~jVqx!dv^~GThP@xqmAxPFjme(6viB3d^}>(rnQJ^TXa6O>eYAPD zpV8(fFEjgvn0sMQ|7+G)ENf^}<5zU85%zu~?w-#ld-~S+hGh-4_xiT^##8fmbgdco z{vhsN*vnaM*!vT0PwSg+UHg28wXZg;{e^aR@&88KH|%u`$=?@@ANB^p_l*7Xeb0X8 z$)369WR8QP_1zD9gQ1OwJ$>KF_bl(H%^U`2ug)Rt4bkGVrw)7OSN4X)_fy#$idfhi z8g0*Euwic$bY*W;d}FewuI!CwE-U=Vp1H;obN0jH+ee#c8y#(K@-nkAh`AT`^oM5+ z$FhbtHO55O8ewlN;_mr;vZrs25m?qxd#@3jZ#*@}M%S8QZye(8g}t2BhP`po_O!nF z*0s+_So>>_fUfM#h;K~x)Rn!N z@U0hqWY1jVi8=cz@$I9{v(1b)H+h-aEX3Rkd-_wereaw`n;NsCYmKls8*%r1KH1Z^ z#xyKzsJ+*;%{QK!v!iRxur~*B_rhMzYQx@~XnR`UeCyh0I;?%QVQnt7vx`4B+P-0L z9<;HrH!s>V_Rse{`wjN4Yl`LviZhSb7gd`8TM8o z?q1l-S#8)`6>U%Jn{QqFEQPhNHmt3Nc6RYsN82~-t${Wc_SQsu#{T)fXFu~~&s=jd z$F-X8e%Md*)a6*2DKx*;}7j*z1k9=LXoY zw;8&!*AL&A?5Qhzo8wzA{K%fU#uIb)eemt0&9iNRHaB^h*_OoI3w!z-u{LB`Lz@~~ zp=*t>w>5G1d_LLJx5maSYpA_f-{u=n&27-NX4u=7xO-tQXSHE(JG4EmZ@zWyvkBI| z+OW1g+S$eLkG5~v+W~DX?Cps5jQ#U{&wl2~p1I~^jypBq{jj$)+IZO0_nmCY@_yRP zVHb3rL)hE3#br+&_RO#B?S}8C{_mo9Cl>bhK-+UqY}h*xUD-Pb-#ehK+0Q)LGuNEV@%ZMuANEc_8xMQ>zLNu3-cOr3oQSS-2zw{Bxa_IJp81u% zlkxpj_D&%d_D)6H^E7POI}csiJ0IVe?5Qhz7vNhj{K%fU#uIb)r{mj4n`gTaZEo^1 zvx|ti7xwheWSzmXhBh@WM%Nl)?-Jtf`FygcZ;i89)=+z|vzu=`H7`ZinqluU;_ijL zoYjWC%hC3kNA?#h>;d*)a6Zov0b*}IWg*t-dB&zrGf?{0Ku z?;d<(vZt=>-HUI%@FRQX8c)pG--2%+ZJzBuw7JR4%r+8hduKvd(Yzgsq8&REbKjxw&x4ju=fVKviBywG1*gB_TIv`UigtcbB!nF z>|eyUk2cTtHrm|eWoGXXb1&@azs!1xWesg=yo;_i!rptt-ShcmPv07^u&kl>UavOa zcxt|nt~JBn2gKbAdpWBOdmp0hX?^ppYoFJ!_SJ^9kI>F8{>NzhhP_YF#=_pGYL@Kf z`=0&GlRb0I$s9jxzWZVCbF}fWr|&y?o#p+snZp<8I)||LWsA$6I_#NW+4~CLPi60G zVqxzav^~GYhP_|WmA&8ajme(6viCc_^}>(rnQJ^TXa60(eYAPDKhWkTFEjg-n0sMQ z{|DChENf^}<1cir5%&Hj?w-#ld-~S+k!20F_xh>%##7UWuQkKoAP$8U_HtGm_6B9& zp4K?(zq4`K&pg>P*PP68sOGyL_J&3q z4}1EZ@21`Fm(Kn}a#Q|3SN7^0!rrhgE_>>*XMSaGID9{qz2S+4y%EsPWJGM(8xze> zM|zEgV@&qcmA$d?trvb|&s^h)Is1|D?W4^*9tUl1@-nk=iMbc{^haTh%(8|yHO52N z8ewmI;_mr;vZrs2QCZedd#}-&Z#*?8K-ZdK?_b2-3wt@M4SN%!?P-1Ut!tmrvG&!5 zwTaNqF8;)5`-Z(q(8j{vq-f9BKi~Q6XP)etYfk1kS@Ycwdy}J$hdq7Y$rvo}r_CIu zK-W2hhbdcJ_S9j|{L0=`_vDGn%UNyMn-6VI>zi*~`^=2BuQsgBk9KzP7eL!L>@A2k7WNi$ zk7fUS-?Lw59_*QGPUhIF`R<3kh0(^tp1$v77MAzZW)6#>>m0(~qAf0a>ab^iWp6Qj zKb5`3iG{r-(Dqys8}?R2^V7xNN;t-3PhHtt8Q*&0NA}D$o|v;=3g153JliU0bCZ{u ztxC+ju&2KaYiX7>w5hQgy4DDLs}pz6=aW5sYb?vMhT3~A*L>rtxdyt{40~%5cQ5Sa ztTybeg|?^l&9|<7mdDyx8`joFJG=PnpzRy>)y~A;g$)38hcLcum!jJ5k zYdkS$zdycxw0X88(dH&EGdqfydtp!iK-K{)YiLvBXmqU+_KqR$p3f(H`qnszWev6W zI=K19Q}bAKtr_-?Bko?<%UNyMJ05LM>zi*~`y7I`uQsfmfOdBAPej`{?45)*7WPg? zd&d6xzGpx4WY1i4GRISz?|#@j6>U81>HAI&WqChs=5QLi&LQlb-r};S4twTT_Rhff zQ`tL{SlBxYZO^l@VeeuzKV9ryf@4hf)RnzU@vRqrWY1jVi8=do@a?0`vt5QZH+h-a z<;2_zd-~_G&ShCcn;KW3YmKmXC2{wBKH1Z^#`!F3sJ+((%{QK!SD|aouy-|a_rhMz zYQx?&XnR`UeCyihLacqYVeMM9vx|Qn+P-1$dbF{ycLUlp_Rse{`^+2Uz3?M@<{D4T+24t8A8nrPVYIo)%gi1j=3dy-zngUz%Np9$cobc0 zguTazyXW)Cp1w8iVOc}%z3y$k@zi`AU2BHDCy2Wj_HtGm_MSxB)B5IH*FN`Q?W+xI zPobS%{HM|O4SUa^jfK5u(Vnq?zVF%3JlQkXoXqjL=DQ#Eo<|!Gd-}eU`&r&kn>oCI zu5$=`FSfYssl%T6mA#kn{S3wmZ!Z&zex>MF(e{50J2&*n#Or8ld&XRI{$j^?`V7tf z8~A>PVOiVQo5a%Rt)kyX+vgpueaw9qJ#0H;t~vHGo<82^J$yg)dEO_MJ|7hQA=*A4 zVe>p6qetYtXUsLnKE~6>`+S1$r#{c8#M0-pqCZF5=L>9}=S%dcocD~m=Ge!0`uIFw z;rkhrmD*nui~gqQZ;Sp8ZO`wqb2C9}{eT_|>v_+aZ=QYKOJASwM|?lyv+QN;Ct}e* zH*MWtivAUC-`}wI_1y313EFwjnQxwb-Ams;(0{Z1{DrmGpZXMu9;E3R2oGA~gB3kE P+B*z^{j+0`&ielkM)_!# literal 49500 zcmb821%RK`@pcz?cXxMpcXyW%AcR2BB)Ge~ySux)ySuwqC{Uotch2@c+e!ahvfr?j?> zQK6$_jh*}KxXh}v4;Z+02la-bN=N5ffok7Sdsckwu0&LSg^rQP>*$z?)obIq=IpoQ zz&`zZ4cKy%Q=(xr0jX zs2!jF&)ihj?b@f;Rs(zY z>f3*(EqnLxwRNAZy6!e+$9BAw|G)G7cC6>$8>Z*y`|Q|f*M3|5=K|h;A@0)C{_T9% z1w4N=AouY6fNgv4+Gp$kwCCN7*>MR_|NouuYTtSO;-1aF@6Lm|pZ`len}0b!c>mS; zh0=rjcb@NV{^$2>{`Gv<1)Sd>J)6JhyPf~2J>#dG?`q#U{|S25f54stx}X2TJ$wJ< z{NVlX>)HG}&v!TfVS4ud>-nw=IR63v^ZUhMzy3Yn?feJ!Z2skZSNqQS_ZuSry>{g< z0{`>-&v`?9{{KEdc>nQ-IRF2g?{5BnEB*6dKmX@^*9DyaqeJAc>wLHKe`ARJ{rCB< z_MP*8w`cS3{Po-2_y6I0*1wz|y#HZ6n}6r|?&kku&*opxcU{2wFEm8{JLl*5Zs)(` z5c&V_^Ih#b=f76Z-hbz>-|ptWW6$Pa&JW)I$DYl<^L%&npRZ@X{_6Rz3poF4dp3X1 zcRT+pdN%)ZzN>xb{8#;R{`ub1r}uzA`T6INKY#vi&JW)Imp{M%VCTD=|Jr}fe|P7* zF5vvH_;dd1`EKWb-=Fi>&H1kOo%4U7XY()LKL-E#^P8T{znmYu|8{@Qf9_wue{@~I z`Q6d8`PcJZ7x4U~f6jmH-|hUT?-@V#Z|A$(cg}y_p1uFhU%%ab|5>+Z@4uWMy#HH0 zn}6r|?)aayXYaqB@4A5VKcQ#y_k6eWKdxupI`<{Er$UfB${Ht9|GE59`_d%lD72 zU;drv@95e5%lX0k&pkx`JMUk=e{@~I`5o4?`PcJZ7x4T?J)6JhyPf}cJ)3_y-_^cz ze!ui={++*myZin#WzYJT^Mm)lsAu!sU{)K4SgK8lQX5$y%AUHETcC8LXRF&$7N`4ad8ginSPP zBi10+iL7f`PqIE`4Fx}wu@+`+z#7Opk#!yGY1S94;b44f))K5uS$nchW8KVpf%PqG zWacmfYgtxr)_$zBS$D9;<98{!a>tY`Kl&qLm(<~-KP}dnpIKVIzrV>yy8PMQzi{VLTy|-4*&fT&8+t^*1rFKwh=o% z!Ew*HYijnQXX44d$9$~$scS9!_;Dr+@ZZL~Uo@|?OH;06LEeIA?dR|L`f+aN8}s+D zb9U6CM?>eVJ^a0CKeg`|_&Gaf(PN==)*k+zwV&E|9Q>Rex9IWEIcs148tcb*Xuj`o zqNd%SnDrA=m;}2B)0rAO4a;ZW2jBNPeT#JrHB6^=hKs?E=VxG9Pi=g0X61R~iT#Y4 z6yHADoShMEZt||vQMbE!g5i4K)m0{QDjK&RO#- zC;nczpUTO?_+e}jv^^KahLdH`m6K)hjdjpVT{&3}-+Im_oS17oF=xFPzJ0WLw&l_0 zCNG?rI9U~KES#){b}n*~WqtGH#9VX2$?DB_Kb)+AHXcs&o!`{BW`(+MYXM!^v*w%E|8d#^gk8 zuiS4BwDt0?<-}a$i8PU_q>~&=pW5GhGh-4 zHIHq+@zgvOU2BGu(}=qlPI6WoPEJS5iS^C5u6>Tf+E*J+&OqBIoSca^7EaDWI~V&| z(`9}03D>x30ODW9_RACpV() z6HabI8w)2lqc33D&+jQN>zgMh=9&{uZfU;z;pA4d@o=K={H|bmA8nrdHgtXNaB_Q# z%Zb`|=U;csubkY0@27HdCw@4&3vJK4vEk$)bmin>d}DH=wpZ@=2-X#0edr_sj3$unr@V!ysDIkCQZa$>GI;pExoyB|)TLmLk#`p)k` zmiN)-xt~YZ=ME*ICw3Tl0)Pi{tbMiNoTzC+cuwe&xi!m&#A& zWLRS1WH_`vhsTB!e_y^rvD?&9a8tn$tAjcxujqt~JBSti;_5CpoJPC$pjD#QNr2 z*FMu??W+wZv!m@3PUb)x3nz1;os0eYvgE}2=E;e<=7f{En(ux%nHz09oaj5h=~&)J zo9CVfU7tIg%-iB}q7EnKS5EwUqx@7(<|h_T7C_r`L2Nkj|6i+|EQxPSPSll?rSPqn zcP%I88c)nwFNAL&ZJupuw7JO(C(96X&%4Qq{=%$9Sk_QmbJ6A-Pt9e~wPrY3j<|c_ zBxkkZWO=llSl@i>+GjDWeYN3a1+;y_$%<%W;bbMWbFtrnEIF~hd2(W|IpJjG=DQzG zRzVvNC;HBBahCVd=DAlz*XIr=tF^eCsKbf*m6J8`{j7nNjn%PhHSPOZ8*R^Zu;FB5 zbme3dd}DH=uAFR&Z@s*0IWgCGV$OP9eEVqgY@4CYO+yNVI2BIrBgYb>Xjk&9m)}HaB_UW)EWS zc{jPy--)#|%NlBH?$UhYsktY*)(kg$5qB@# z+UFRoeYN4_G_=nh|8%r{!_66JW8vmZv}a_dFH37b^W?@{bHdG8&38ZCoQ*afZuFh! zu`KVW%^c1_*Exinb6Z?))Zxbb%FTKBekwQT6AL#NpzV1fHr!l+uH0OSZ%l5~m7A;Z zt(SK%H|82o%-LUrZy#-*?P|2S$qP5v5OdGF$&LQStV>wdP+RlT<{MAVYtgl4xVetF zd*LQ$wc+M^wA@(VeCyihGOT^I;p7Ii&mI3pw0*_6Z9LrQJI~8m-cOr3+=i}m2sgL4xZJ42jroP+;pQE5<>p;{V{)Ue+`NZxy}Wz5G1quv&i*xg`)KoQ@1xC4Uby*y zn0ww$ZuDPgy}`1E+L~`R-*{?%h^{rm%}2!D3pY8d4L2X7eV^7h-@5jB3u|9(IQaza zbI1P_ZQpS78QNI5`5f&T`}@7dWk2)e#$0p4%@@sgKiqtYHXd&Do#)#u@2AZizCzbI zgqyEhTyE6i#{A07H~4-kH{TKqH-ACf^RL)&^CP-)^G|$Za-*)?{Dg14ynDGZ*LY&i z{%^*b&9nVX%-rOKn_q~z=iTH+|L?5tSk_Qm^B>JOo|^wc*P7wxSK{u4o1E2#o8QoK zV}0|jYoG72_SJ@y-_bsI{6En44L78B7z;N;;d{pZeOYp2Kl9|qTyw(B&}e=4!_6>g z_^72k2cRX4%%G4I?sliaS6ER-Q-4p6xOILYpAU`TJw#k z=6L8@Gu(_%+`VvuH4LuZ#}oejk(4XbN18X*hiaZn+3SB<^0g$ysfPn$U`h^})8 zHw(46+^EBi`IQ_0>;ONNn?;C)n?=#~Tnrm-mPJ=?mcutDH|omG^7z(sJKUISJTYg# zIF5a^dA1eM=JM5fHr%X8z&-EgeDs%KEy=Qm+L}u>-*{@SgswHi&C0~x3pY8d4L7Tx zeV^7h-@5i$8f#x|I9V0#bH`r|ZQpRSI@(ycSp)4E`{(By`*D*V+^k0|+^mnb=LXntvpKqQ(+l62 z+^8!zTi{#I?QmnR@x+|{hB)@o=Gl6q&E>1}Y`EEyfP3CeZuB=|ZOpQU+M1g*-*{?n zg|0Qj&DO--3pY8d4L5zza$|k-t!tl6vG&!5lWovGcl^F+`-Yos(Z<3}KeT7;-OB#!;QJd6La>v;@C%z@nV#CcT=*rEh_{QW$UAZ|8-+FF`8*_~(=IoEd zv5z*-b~@TzzBk#Bu5$=C7q__FsKbrIX2u}hpyaQk8ezF z)RmhX@U7=|xG~pwV$S{w9Q$bVY&W9K<*V~-xVeddd)`fM^si)H#j=LlnpZd9cxv8^ zt~JBWEyUdmH#w^fH@BjFpVl|uy7svSYhP_Rxee`e$G;tI-*9sW+E}=`6YZJ&Y@@ZG zd2(Z}IpOB6=DQzm?nWCAH~P-=T9)_IW)AnD>m0((y)7;`>TqLz<>o$oKb4#NiG`a7 z(Dr-~8*ZLJS8krfHzqgg%FR>w)^j`Dm}@*SXa5k6eYAPDr_tu})p<7DJVU^}2Uv2W z|1j$jmNnGYe6;z-Q}bDLtr>2fBko?f$ysf^W6_OucD2I8-3^bILrHKGl$pEbq?X?^%j>K zb+|FVa`OhhpUTaf#KO&6XnVem4L2X5D>onG8nK7>$x3n%r%~vvwsK2KH5Cn zr)YEe>O326J|p0scat0acUkYTtf98%`^`6=nxCU<&2aMtareSa&T7NWmuR`MzWLU* z&j(ohYQxD_XrDX&*J%5On{Uv@!p*m6&&W%DzOkQqa$~MJ;pQ*RcR$?x6>U7+=sV93 zS>8{ZIs6S>=MZlG-r{nj4majkZob3!Q@QyEv2gP}+MYjP!_BYg%FS>1#^gp_x%nO6 zdTxgsbB!nF?0>|uk2cTt2ijb|I?sk1#@OMWcat0af1-bKk*&7o&&@ZUnp| z;_ii;oYjV#Vc3@&>zi*~`}~5nuQr?v%f8PYe>i-fCEN^;Z!Fx5fcA|2?cuVYd2(Z} zIpJo+=DQzmMnW48H~P-=Uo7vZ%^XH%ug*5yjMCzAqYgLbS8n{XC;e1zMk5w(Mn~Io z3~adZ&%LPJOn`69!K*7b6XIKsD|g2I{bF;CC+6(O#J7(&&o&X-+~kFuiHW)A-Q-4p zEY{d8YpAU`PV z9d698+)R(}r*bm`v2ZgZ+MY9E!;ODdO66uQd}DH>uH4LxZ@s*GxiQyxV$ObMeEVqg zZ1bSaOXpOVa>|2hT595HQ#t@&WElw!_EA}-3vE4s|_~`pykH;=3Cc3 zvt#Y64JQkteeU=Rq3s)P7DgKjH;bS>WB zmiNuH0;cZ@s*GxiQyxV$ObZeEVqgY<OGW+HkWyT5hawzIE-hCDy*$aMB;`bI0ESZQpRSBidNF*$M3# z``g21Kl9|qTyw(B&dql}-0Xrj9&YrV=Te&uE$zMsm? zAY$QWH?%!>$A+8z(3PA0@r}uix^i;>zV-6%<;Gm&i8=c{@a?0`vmJ;wH+kXaAY$%$ zH@VT@leHJi8ft6q-F)Mzc`&-x3^#`mcQ4%JtTx;niuQe4-+b%ZXCJJ6wc+G2w9g&? zaI}5H%@Js0;pRxRXY8M!Z|rBD+?Z=lxH+o%?uVPB(Z<7#zVqCd<^8mo!!hVOhj4Rj zi_48V+?ZdvIS$`X<>q)|;pPOiJx|1jn={arn=|o^$&I>la~8h!^6urvT;qv3`;+kP zqs_COjW#!V;pQA-?s+%4(Lb4W3d6UT(}ao|v=02H!r~Jlm~kbCVZtZX@QNcat0aYgyN^tf98%_02b)nzy5C&2V!E zareSa&T7NWooKnSzWLU*&kb1nYQxE0XrDX&-DvxUn|sj4!p*&C&)7dd-`LMQxiQzA zaC2Yt-48eSqm73fedl>2%lm0FhX>Gg4&mm(7MB}!xG}$S^ANtD`p?-tOf1|yg0|3C#=1JHWt?2L_1r*H#rNJ_05wtbIr-z-fF)4VeM_S@vx@v zJ9viWeYAP*chL2@!`izoE^F$rW`1SuJ$yfvwfBjIwGYtt{16+~zC>5nzQQ*qYwF6{ z*Z9^8FS2H?@x+||0{)g*E+8SRb>jp-qjyplgk=_E+NW`Fygb zZ;ek`)=+z|&zf&MHUEaLHN)E9iMtopa#kDGzC+v7`sQ2LKA&Uls|{=aK-(v*eUCO4 z)_y=c+x!frwZ3_>X0ADz+mFq6Kdk)|Z9J^$`wqTfc^_?_`zLgL?y&ZAi_4liteIa~ z`vu=mW$jD%;=RzsT_BNMMR!rCar-ShcmP2U&_ZqtS##3`tbgdcIMkDTC zSj$;$SQ{N}PwSg+UHc4!wXZg;je)jLSQ`^P0d&5pOIpIWzByNj-Sff1jNGHglKzCgbiy` zp(|@s;~SGTwKDgc2FH5gMb^wUo|v4>=(*7PT3O~SH$Ucv#c-9ZbpcKH5C@oap-8 zVQsD!mo+tCovfK(S@X}l_fuJ$hgevf7j4h^uwl(V1H7`fIKDAiQ!8`7C2*`4US!Q& z{N}n0sL@XSHE%d9*#PZ@zWyvk2C{+OW0)+CE`zMYOT7wi4Re+Rs_I ztZ$yInQKnwwsP~`4{NKSjfXXT-@&3R@1xCguc~I{xx?COEiP+nzB*Ymzp}Qv8+1Mv z?_fig_tECLw?)_I4r~2dT-MZlb+TrDWoF>7Qrv0iwQHFJ$8=B$sxw~sbw zPeGfTyv*!WV(x`C{bN{1v#g;_jnmMzMp!$YxO+aItm#|hSe7-^-s`yL8&AzM(6we* zJCnG3VJ&C1VeKrmJ*{uPb?tLJ*1p=Xb~f5RVeK5Wv9NY7+S%GKKd)KeJXtf>oXqXK z=DQ!(&PN*$Yx=%}6Ik9yo9Dg&U7tIwUD)EXrsk`YHS;TL7rDWwvUV}CuyzUBo|j_7 z+O_D)+I9HGWKFHi{jSHcUU-o;bB!nFtS`g2k2Ys-K%1Mq%09GUmNnGg>#F7(Pt9AB?GCiDuy!Zf+1k%pxU6rUteI<0=5|-}-4APbqm73(ec!<~EbpVubKirm z&mGq8ZE;ys^VP|k`IWW%+~8ALyPsHCdjM_E2eD!8NpxlHDSTtHrdH;DPvclGyvUll z#uIba58>NKo3qcL%}rir_AD{?!kYdgtcO|F(5A+7=vpJJJx|;{pHJ5Gt??+!8fx$L zSo4ji<_qXrGpxNx+`X`tv)Zus655{DH{ZJUc^qqBZCHC5ZJ)6A3ffp$dll_$?U$d| ztZ$yInQKnw_FD7Z4{NWZjfXXT-@y|s@1xCgzk#mL9oF7#aamLI)ybOqm9@9r;8R(9 zn^;(T2W`)Hv0?2~bY<-`d}FevR_1=6<5(}e$eOvv6LZ$@;oC=>vtOXiO4Kf4UKP1 z*3_2F{f5D}UU-o;bB!nFtbb{Jv^hI0Ip!uWGaHVWdtpugSI+&b)zGHK@a)wZVQmEB z?)iMOrf-ekS`D@L`n~zaQ**@ssu|WsBJN&T%UNw$8<~B3THk!@+UF0fbJm8nQP}tH zVQo~jv9LB8+S%GKKd)KeJXtf>oXlj&vA%_wQ)K}$tbMg%Z3eWni$5dUzF}`Bw6U-^GuktL ze{(i2`04tRmNnGgYu)A>PtDEIwPx7sMcloxm$TZiw*}gs);Hg}_E`^WUu{_HjdphN zw?x}F>}`cM7WTGAdq(E^vb6RyPxj0;Cv)u6eD}lNHfZBvPv3X4KFj-QGl#zDI)|{g zZHvpEI_#NW+3Sbzr?R&lv9Py2+MfNfVQ(P1vNs6dnCz)5d%NLVFZ{@!xyBQ7_B-I) zN1JEc9c^y%GP6C1xfk~IcVg|xvW7M__C(hjVQ(+u?)iMOr*Dm&S=LZ{uU(pNJT>=5 z*P3B(AL8zXy`0sCy?xR4w7&V)wa>0t`)b45erRVGe}A-n!`=aCV`1+=v}a^4KkwPk zJlQkXoXqi{=DQ#E4n`Xfd-}eU0W9yQ%^VIv*Exi}Lt9+-)M3y3%HCo4ekyy16AODs zpzV1iHtd~omA#Yktrvb|&s^h)Is2pV?W4`Joq{$ud70U%#M}#e`p2-2 zW?4g<8mFOajj(q*arb;a+0(biu`Fw-z1MNgH=dejpli*rcP4T7!d}j5!`@kFds^Ro z>)PjdtbMg%?QFEOi+>K{`a?A=D(y|925Vn!Si1x5?Bd^vwr|+G z3vDdy-HrB){r%qOvY&afXRbM!<2}uHKkVI$HXio$eJ9tlyq`96xDQ?D5cckGaoJOc zJ@YGj58(T$>^(><>^+3G=fl{r_cXe)_YA%<*;7~cp2fFb_>nzxjVI>pAHlbeHqZ7P z+T7%2X3rCIFYM_*#(I=x4Q*KFQ!MYN%^coF*Exi}cUoNb)M3y3%HF&9ekyzK5es|oqwVomA$Xi8}|N=wx{*Yx2}CY z!`fFH*1khKyZHY=+c)fek2V(een5N1{`q;&e&)%Zx#nb!KQ`a}u=h{2@vx`wJNcaD z{j{0GPv|;_u=jI|%bq&ynP1ua1>a9)?_b2i-mhqT{)P>E!wo+;d&A=!lRb51Zv=en zg&*1LJWo6^Xa9Taqs_C8NRGM5%gjb1=3dy-?_mAWYG_kqWa70(*c*kodp@7+>04tc zmd~s9UPCwEcxsM{t~JBnXvEzMdpWBOd!wW6X?^ppYoB4T_SJ^9G0@H~{+Rgo4SQpu zjfK6j(VnrtvvJwaJlQkXoXl~Y=DQ#E#zh+sd-}eUVX@v%n>mb!u5$=`1Yz?_?^L_tRz$bEE4V!rnYBE_>>*XMSaGUVJ~5z4?fRz4_7hTmT#P zmOxkbmc%zEd+N&GQux*jKeA`8@x+|{g825)=Gm4;o147MY#Czig+2X+SqrhOp-qit z(X~d{TaLJUKA-IATVoNHHPqf~(dHXZ&E?UxX4qSSxO-tQXSHE(MYKJwZ@zWyvl!OC z+OW0~+S$cl8ExONw+h-=*jp9t8T;qwJ^Ps_d*+&xIj+`x_ru=mXyai|-*>V&%lm0F zhc(c34qg&)~7 z*LY&iem#8qX!C5n(B>vDGuwigdtpz11J?R1YiLuWH@emcds`BB&*zgpeQRvUvWD7w zZPa|@sks%p)(m@F6L&A{<*YXB^+DUy`sQ2LJ{x20s|{=0pq*X(zG(Y~y=~FP!d^eL zXY8M!_v~k$?3rs$=D1z+-4A=)qm73>ec#C@Ebphy9Qvc{9KzlXEiQZNuxEZ{Z%2GT z^`DF0iCEa%8Ewy9uwidcbY*WZd}FewuI%lNZ@us%d*&KX%-QdXZy#-*Z6CC`$;-_4 zCFWk((;vthz_NxmHTFZ-8ewmL;_mr;vZrs2K`d*iz1MEdH=ddYpli*rcOY^1!d}j5 z!`?w?ds^Ro>)K~`tbMg%?O?RCi+_lk74{BA8w-1fp*`bYZ=H?He&)%Zx#nb!hd1B- zuy+L7c-YhTo$SH#e%j38NOYY;*gLAlWltUU%&+Vnjqj(jcMP$xcP!eT$6>?XY3Rz{ z>G;NEPhHtN1K)b#NA}D$o|v;g9^XFNJlmOQbCZ{uokh&Ou%~|_>jaiHw5f46y4DDL z=MZ<#=aW5sYn;TghT40b+1Yz@8ndL_tRz$m!az% z!rtXAE_>>*XMSbx3Vc76y(@`@y{pjnyc!$!Zbn!3ZoxMud+N&Gt@zdpKeA`8@x+|{ zHTd??=GksTo147M>~>=Ag+2Z2Sl6l=du5$=`AGWycsl%T6mA#Mf{Z#foCKmQSLEH0FY}or2UD^8!zA@QT zSN8sjZ@us%d*&KX%-Mg2Zy#-*?Qdvvlb4zOotS%JPyY+n=PYYzQ{y{ytr7PALEJr` zPxkb!@g>U|YVY+`^NpwG_vl(P?EOI8y|9Gul|#`vvV8`#T$#{mhd+bIr*d|J8i=!``oG<6%$Vck&I(`)Mxg}n*Tp0U5*`&{-j zPxj0;Cv%*z`R<3kiO|Nwp1$v7B$oHnW)2gh>m0(vBrPs`>ab^iWp7e^Kb5`7h=sk$ z(e|7I8}?>E^VP-Pj5x+*PhHuY3Ez6*NA}D$o|vdM~o_|^+QvS+UG#GL(N`1aA}*;YWCo4m|yMPlxSJ^dwEi?ghuO^ub%wMN)mnYepC zpX}*dV@Z}Z)ZS~U<{MAVRnWC&*jtsjdtonUwP9~Hv^}kFzIE-hG}gY_u(mqd*~MQ2 zZQroBCfZooTMO+O`{(C9`^H`@ zk2cS?HQLHAK$V0k}n=CBL8&LQmW+TyaO4twTT_6Felsq76T7WM|A z?YSE^?CpoQ zd+M-fer4}kd_R@F>xhND>(TbS0UP%2K=akb-kmtcWKUh$y9?iX;Yaq&HJ+HWzY*U) z+C1CcXmgX7ncYLoy|AZ$GwUXnHMFU5FS^zUd-oA{&*zgpeQVspvWD7w-P(NPsd+!T z)(m?O5O*)^<*YXBJ&3la_06}geQv|rR~yzILOZ+o52NiH_8vhS3ww{EJ!Aj;yk|f2 zWY1i4GRMc7?|#^O9Bn-8>HAJ@XL&zu=I{i%&LQkQ+2XRN4twTT_MXD`;~zB{-kv5F z{Y=r%qV4}2b|L7KiRaPQ_Kdma_|E|wPoELle*xdmNGxj`dy!cByj1kdX#2c^wU4>4 zqDO9L%r(b8#?#09yoT?mKF{mK(&vq$-$dKzEo`3WZS?4z_l&vb*vEMKc%OIh{nY1q zmst9|SM>X6`+R`S^L&UNoAaJA*Btv8PoIy_pR)XXg0=R?*w2doyy>aQ`J%+XEcz?7 O{lCWk)-hCP{r?AZm)24M diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index 59d9f96..8bbd1d7 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -609,6 +609,7 @@ pub enum Instruction { Popc { typ: BitType, arg: Arg2

}, Xor { typ: BooleanType, arg: Arg3

}, Bfe { typ: IntType, arg: Arg4

}, + Bfi { typ: BitType, arg: Arg5

}, Rem { typ: IntType, arg: Arg3

}, } @@ -695,6 +696,14 @@ pub struct Arg4Setp { pub src2: P::Operand, } +pub struct Arg5 { + pub dst: P::Operand, + pub src1: P::Operand, + pub src2: P::Operand, + pub src3: P::Operand, + pub src4: P::Operand, +} + pub struct Arg5Setp { pub dst1: P::Id, pub dst2: Option, diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index 50d6767..ce3e387 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -143,6 +143,7 @@ match { "bar", "barrier", "bfe", + "bfi", "bra", "brev", "call", @@ -196,6 +197,7 @@ ExtendedID : &'input str = { "bar", "barrier", "bfe", + "bfi", "bra", "brev", "call", @@ -727,6 +729,7 @@ Instruction: ast::Instruction> = { InstXor, InstRem, InstBfe, + InstBfi, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -1658,6 +1661,11 @@ InstBfe: ast::Instruction> = { "bfe" => ast::Instruction::Bfe{ <> } } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfi +InstBfi: ast::Instruction> = { + "bfi" => ast::Instruction::Bfi{ <> } +} + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem InstRem: ast::Instruction> = { "rem" => ast::Instruction::Rem{ <> } @@ -1843,6 +1851,10 @@ Arg4Setp: ast::Arg4Setp> = { "," "," => ast::Arg4Setp{<>} }; +Arg5: ast::Arg5> = { + "," "," "," "," => ast::Arg5{<>} +}; + // TODO: pass src3 negation somewhere Arg5Setp: ast::Arg5Setp> = { "," "," "," "!"? => ast::Arg5Setp{<>} diff --git a/ptx/src/test/spirv_run/bfi.ptx b/ptx/src/test/spirv_run/bfi.ptx new file mode 100644 index 0000000..f2bca91 --- /dev/null +++ b/ptx/src/test/spirv_run/bfi.ptx @@ -0,0 +1,24 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry bfi( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .u32 temp<4>; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.u32 temp0, [in_addr]; + ld.u32 temp1, [in_addr+4]; + ld.u32 temp2, [in_addr+8]; + ld.u32 temp3, [in_addr+12]; + bfi.b32 temp0, temp0, temp1, temp2, temp3; + st.u32 [out_addr], temp0; + ret; +} diff --git a/ptx/src/test/spirv_run/bfi.spvtxt b/ptx/src/test/spirv_run/bfi.spvtxt new file mode 100644 index 0000000..a226f78 --- /dev/null +++ b/ptx/src/test/spirv_run/bfi.spvtxt @@ -0,0 +1,82 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %51 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "bfi" + OpDecorate %44 LinkageAttributes "__zluda_ptx_impl__bfi_b32" Import + %void = OpTypeVoid + %uint = OpTypeInt 32 0 + %54 = OpTypeFunction %uint %uint %uint %uint %uint + %ulong = OpTypeInt 64 0 + %56 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %ulong_8 = OpConstant %ulong 8 + %ulong_12 = OpConstant %ulong 12 + %44 = OpFunction %uint None %54 + %46 = OpFunctionParameter %uint + %47 = OpFunctionParameter %uint + %48 = OpFunctionParameter %uint + %49 = OpFunctionParameter %uint + OpFunctionEnd + %1 = OpFunction %void None %56 + %10 = OpFunctionParameter %ulong + %11 = OpFunctionParameter %ulong + %43 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + %8 = OpVariable %_ptr_Function_uint Function + %9 = OpVariable %_ptr_Function_uint Function + OpStore %2 %10 + OpStore %3 %11 + %12 = OpLoad %ulong %2 Aligned 8 + OpStore %4 %12 + %13 = OpLoad %ulong %3 Aligned 8 + OpStore %5 %13 + %15 = OpLoad %ulong %4 + %35 = OpConvertUToPtr %_ptr_Generic_uint %15 + %14 = OpLoad %uint %35 Aligned 4 + OpStore %6 %14 + %17 = OpLoad %ulong %4 + %30 = OpIAdd %ulong %17 %ulong_4 + %36 = OpConvertUToPtr %_ptr_Generic_uint %30 + %16 = OpLoad %uint %36 Aligned 4 + OpStore %7 %16 + %19 = OpLoad %ulong %4 + %32 = OpIAdd %ulong %19 %ulong_8 + %37 = OpConvertUToPtr %_ptr_Generic_uint %32 + %18 = OpLoad %uint %37 Aligned 4 + OpStore %8 %18 + %21 = OpLoad %ulong %4 + %34 = OpIAdd %ulong %21 %ulong_12 + %38 = OpConvertUToPtr %_ptr_Generic_uint %34 + %20 = OpLoad %uint %38 Aligned 4 + OpStore %9 %20 + %23 = OpLoad %uint %6 + %24 = OpLoad %uint %7 + %25 = OpLoad %uint %8 + %26 = OpLoad %uint %9 + %40 = OpCopyObject %uint %23 + %41 = OpCopyObject %uint %24 + %39 = OpFunctionCall %uint %44 %41 %40 %25 %26 + %22 = OpCopyObject %uint %39 + OpStore %6 %22 + %27 = OpLoad %ulong %5 + %28 = OpLoad %uint %6 + %42 = OpConvertUToPtr %_ptr_Generic_uint %27 + OpStore %42 %28 Aligned 4 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 7c790eb..91e6113 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -139,6 +139,11 @@ test_ptx!( [0b11111000_11000001_00100010_10100000u32, 16u32, 8u32], [0b11000001u32] ); +test_ptx!( + bfi, + [0b10u32, 0b101u32, 0u32, 2u32], + [0b110u32] +); test_ptx!(stateful_ld_st_simple, [121u64], [121u64]); test_ptx!(stateful_ld_st_ntid, [123u64], [123u64]); test_ptx!(stateful_ld_st_ntid_chain, [12651u64], [12651u64]); diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 18d750f..de7de82 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1451,6 +1451,9 @@ fn extract_globals<'input, 'b>( Statement::Instruction(ast::Instruction::Bfe { typ, arg }) => { local.push(to_ptx_impl_bfe_call(id_def, ptx_impl_imports, typ, arg)); } + Statement::Instruction(ast::Instruction::Bfi { typ, arg }) => { + local.push(to_ptx_impl_bfi_call(id_def, ptx_impl_imports, typ, arg)); + } Statement::Instruction(ast::Instruction::Atom( d @ @@ -1844,6 +1847,109 @@ fn to_ptx_impl_bfe_call( }) } +fn to_ptx_impl_bfi_call( + id_defs: &mut NumericIdResolver, + ptx_impl_imports: &mut HashMap, + typ: ast::BitType, + arg: ast::Arg5, +) -> ExpandedStatement { + let prefix = "__zluda_ptx_impl__"; + let suffix = match typ { + ast::BitType::B32 => "bfi_b32", + ast::BitType::B64 => "bfi_b64", + ast::BitType::B8 | ast::BitType::B16 => unreachable!(), + }; + let fn_name = format!("{}{}", prefix, suffix); + let fn_id = match ptx_impl_imports.entry(fn_name) { + hash_map::Entry::Vacant(entry) => { + let fn_id = id_defs.new_non_variable(None); + let func_decl = ast::MethodDecl::Func::( + vec![ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_non_variable(None), + array_init: Vec::new(), + }], + fn_id, + vec![ + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_non_variable(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_non_variable(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar( + ast::ScalarType::U32, + )), + name: id_defs.new_non_variable(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar( + ast::ScalarType::U32, + )), + name: id_defs.new_non_variable(None), + array_init: Vec::new(), + }, + ], + ); + let spirv_decl = SpirvMethodDecl::new(&func_decl); + let func = Function { + func_decl, + globals: Vec::new(), + body: None, + import_as: Some(entry.key().clone()), + spirv_decl, + }; + entry.insert(Directive::Method(func)); + fn_id + } + hash_map::Entry::Occupied(entry) => match entry.get() { + Directive::Method(Function { + func_decl: ast::MethodDecl::Func(_, name, _), + .. + }) => *name, + _ => unreachable!(), + }, + }; + Statement::Call(ResolvedCall { + uniform: false, + func: fn_id, + ret_params: vec![( + arg.dst, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + )], + // Note, for some reason PTX and SPIR-V order base&insert arguments differently + param_list: vec![ + ( + arg.src2, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + ), + ( + arg.src1, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + ), + ( + arg.src3, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)), + ), + ( + arg.src4, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)), + ), + ], + }) +} + fn to_resolved_fn_args( params: Vec, params_decl: &[ast::FnArgumentType], @@ -3102,21 +3208,13 @@ fn emit_function_body_ops( let result_type = map.get_or_add_scalar(builder, (*typ).into()); builder_fn(builder, result_type, Some(arg.dst), arg.src1, arg.src2)?; } - ast::Instruction::Bfe { typ, arg } => { - let builder_fn = if typ.is_signed() { - dr::Builder::bit_field_s_extract - } else { - dr::Builder::bit_field_u_extract - }; - let result_type = map.get_or_add_scalar(builder, (*typ).into()); - builder_fn( - builder, - result_type, - Some(arg.dst), - arg.src1, - arg.src2, - arg.src3, - )?; + ast::Instruction::Bfe { .. } => { + // Should have beeen replaced with a funciton call earlier + return Err(error_unreachable()); + } + ast::Instruction::Bfi { .. } => { + // Should have beeen replaced with a funciton call earlier + return Err(error_unreachable()); } ast::Instruction::Rem { typ, arg } => { let builder_fn = if typ.is_signed() { @@ -5821,6 +5919,13 @@ impl ast::Instruction { arg: arg.map_bfe(visitor, &full_type)?, } } + ast::Instruction::Bfi { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Bfi { + typ, + arg: arg.map_bfi(visitor, &full_type)?, + } + } ast::Instruction::Rem { typ, arg } => { let full_type = ast::Type::Scalar(typ.into()); ast::Instruction::Rem { @@ -6127,6 +6232,7 @@ impl ast::Instruction { ast::Instruction::Popc { .. } => None, ast::Instruction::Xor { .. } => None, ast::Instruction::Bfe { .. } => None, + ast::Instruction::Bfi { .. } => None, ast::Instruction::Rem { .. } => None, ast::Instruction::Sub(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Add(ast::ArithDetails::Float(float_control), _) @@ -6804,6 +6910,62 @@ impl ast::Arg4Setp { } } +impl ast::Arg5 { + fn map_bfi>( + self, + visitor: &mut V, + base_type: &ast::Type, + ) -> Result, TranslateError> { + let dst = visitor.operand( + ArgumentDescriptor { + op: self.dst, + is_dst: true, + sema: ArgumentSemantics::Default, + }, + base_type, + )?; + let src1 = visitor.operand( + ArgumentDescriptor { + op: self.src1, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + base_type, + )?; + let src2 = visitor.operand( + ArgumentDescriptor { + op: self.src2, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + base_type, + )?; + let src3 = visitor.operand( + ArgumentDescriptor { + op: self.src3, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + &ast::Type::Scalar(ast::ScalarType::U32), + )?; + let src4 = visitor.operand( + ArgumentDescriptor { + op: self.src4, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + &ast::Type::Scalar(ast::ScalarType::U32), + )?; + Ok(ast::Arg5 { + dst, + src1, + src2, + src3, + src4, + }) + } +} + impl ast::Arg5Setp { fn map>( self,