From ac6265f257654180f6661c406a025313190448c4 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 6 Nov 2020 00:56:45 +0100 Subject: [PATCH] Implement instructions bfe, rem, xor --- ptx/lib/notcuda_ptx_impl.cl | 22 ++- ptx/lib/notcuda_ptx_impl.spv | Bin 48348 -> 49396 bytes ptx/src/ast.rs | 19 +-- ptx/src/ptx.lalrpop | 63 +++++--- ptx/src/test/spirv_run/bfe.ptx | 23 +++ ptx/src/test/spirv_run/bfe.spvtxt | 70 +++++++++ ptx/src/test/spirv_run/mod.rs | 14 ++ ptx/src/test/spirv_run/rem.ptx | 23 +++ ptx/src/test/spirv_run/rem.spvtxt | 55 +++++++ ptx/src/test/spirv_run/xor.ptx | 23 +++ ptx/src/test/spirv_run/xor.spvtxt | 55 +++++++ ptx/src/translate.rs | 249 +++++++++++++++++++++++++++++- 12 files changed, 576 insertions(+), 40 deletions(-) create mode 100644 ptx/src/test/spirv_run/bfe.ptx create mode 100644 ptx/src/test/spirv_run/bfe.spvtxt create mode 100644 ptx/src/test/spirv_run/rem.ptx create mode 100644 ptx/src/test/spirv_run/rem.spvtxt create mode 100644 ptx/src/test/spirv_run/xor.ptx create mode 100644 ptx/src/test/spirv_run/xor.spvtxt diff --git a/ptx/lib/notcuda_ptx_impl.cl b/ptx/lib/notcuda_ptx_impl.cl index a0d487b..4249f2b 100644 --- a/ptx/lib/notcuda_ptx_impl.cl +++ b/ptx/lib/notcuda_ptx_impl.cl @@ -1,5 +1,5 @@ // Every time this file changes it must te rebuilt: -// ocloc -file notcuda_ptx_impl.cl -64 -options "-cl-std=CL2.0" -out_dir . -device kbl -output_no_suffix -spv_only +// ocloc -file notcuda_ptx_impl.cl -64 -options "-cl-std=CL2.0 -Dcl_intel_bit_instructions" -out_dir . -device kbl -output_no_suffix -spv_only // Additionally you should strip names: // spirv-opt --strip-debug notcuda_ptx_impl.spv -o notcuda_ptx_impl.spv @@ -119,3 +119,23 @@ atomic_dec(atom_relaxed_sys_shared_dec, memory_order_relaxed, memory_order_relax atomic_dec(atom_acquire_sys_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local); atomic_dec(atom_release_sys_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local); atomic_dec(atom_acq_rel_sys_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local); + +uint FUNC(bfe_u32)(uint base, uint pos, uint len) +{ + return intel_ubfe(base, pos, len); +} + +ulong FUNC(bfe_u64)(ulong base, uint pos, uint len) +{ + return intel_ubfe(base, pos, len); +} + +int FUNC(bfe_s32)(int base, uint pos, uint len) +{ + return intel_sbfe(base, pos, len); +} + +long FUNC(bfe_s64)(long base, uint pos, uint len) +{ + return intel_sbfe(base, pos, len); +} \ No newline at end of file diff --git a/ptx/lib/notcuda_ptx_impl.spv b/ptx/lib/notcuda_ptx_impl.spv index 36f37bbc53f9e204d5376588900fed05e6608728..1ef470fcf0077084439be6d03e5ec33f1ad15805 100644 GIT binary patch literal 49396 zcmb821-KqWl6Eid?(XjH?(QxjKnQ^lBnj^B?(XjH?(XjH?lAD5bN_xbMK)~ieot!a zt?KHk?*0zpPS|0F8gi=PIy#2v7`0>Ajt@|N#^@NbW9a6GX@1z|M{0iB=BLAt)G-Vl zIyzR{p+}#kR-UzQzb!lHHxykuI&C?+y-)9%iS4@rS>xq8Mxd^vV+O3-hI7o`t53fk zy}R|@e5)S4w%LZ9J*^pYkTt#kQDfhK_UTWbZT?X+;vj3bD>Wl^yv(^e`*iEGeZMXG zZ`r-u4*hoR)@!>RdUxwa!)|@&nv*V`Yow0H2UFX>)V?{G+P?G7J)rNigQ@L5pmy|* zK729%cHZv&w%@MXPCa^e-?hh<-L~l0y<6X{yYGbDvd0!}*61CF^CkUX_xI}a-~D3( zNSF8b?7jWw-FyFI{_1?m|JVI>{<{CmTm9qyeRu0SsQYg?R2S!${dNBDUA@2i2XX(& zyZZiRf1Uq+SHJ(BJMi1vxt@FR^>i1vv*ZD^b?)|%Ov19*UJN+yF z7Y^?I2eH4-pKWmWulonV|2BiW{~-3)`F#iX{b&Eb@ZWE6-~S-?*ZJdh_5BOK_4{A2 ztM6a-*ZF^Ub$;Qu&L6p}_b>bF{0qAJ{TF`g{O7y+{g?f9{zF~3zdr;0+wcFccH#X8 z-e2eU>cag8>>mXGXLsTL1Mjc%uj&H-o%{cV|4X~@{SUmq&R?Ue?_c<>-+!O3zJJ+Y z=P%IJ`xk!e{KdO^|FXZ%f2*tCf8n>zAGWLaFZ=&He~ga%2Y3GZ^MB7CeR}NFYl{It zfOr1qm@zuW$9Mf_@YW-TXg$v7KY(?qN9XrA=yQDD)pOJy1D#{euAZawJqLY`gS&c; z+GC(|{N9D{<-b4I4f-5wcHtcVqsKt!c)bhnG2rLBf8k@|!TlZuWB;SaKxUhUU5ee0y@~ydjn3ciX2h1pHpOw0r{=~-RuhZGF6|pU` zy|GiV>#?V>&#+;5qbaaOunn;O*m2lZ*kjm7*bw~vc@k_vY(1Ilk^%9oF7nCxTxk0y?uq6~1LFPA_MC!0yX84LYpsu&AEDJ)m;B^(8Mo!l zO~Ievj@Q=Bo3mz0Jl7i#Pt#)WGB#QNesA5XT!Lrpth3i@#P%?sUWXz)yRkKgA=Kve zGjId@&4bO$x;g9pc(?iZzvi41&DSzOd(QcJP4{|N|E$-K_qX1he|DX{BNsgiI(wbr zpGo_vb4Mf2-qDL51D(Cj@XxCK)VX63XYbfWkAu!$=lXM)AHNIh{VpbG+Vu&sUwMUz z@C&u?XDYOJ-;>zyXIji$#}GqxYTs}XIB@@TnElk|7v-(2HJ{vXsELW4qtD*y(blH! zYWDhl&4{_?*?y<1@g&%!m_5|?oUFy>({mK?LQ0Vnty%Nz4rKt+gdXl=3g)N zx8A{=GlB7 z<3+K>Fng%&xp<4sr|0tM+B2N2K;E@*vN+ma`f#!$+V9i;*4x)POW>{5hm)1i&Iu3n!bAbIr5KiSc^a`j|b`_S~Sw=F_t~y7mkwo0E4foMf*)oNR%X6Z>0lU*~Ly zcdkC1Y>9SGIO%~l7f!Z9`z|ulvw8blCnwfg6Hc~nvFqWa7utL{G4}m7!aR>Y_ud9w z?;TFIZFxCS``yWj^_7#}#C|F#+YyJ8K4@odj}Iq1qbn!75Sx<|wX<@*UD5W-cP%H@ znorKR-htRT`t02eZEfnp$?oJ_^K5cryd$;~W)HPJ`?lD8dhUU)J;TYKw-Up)Vy~D{tEiWhPaAJMscYv1*T~*Yr@GH zEp|PeoQXC+59T`<`+mn@o=2a1pM|dX4ku@~yqu`RiS?C}bBO&^PR=C`C+DG^c|Jaz zT!yZkTuy9GPSll?D~Qj<@?Fb`wdRxatuG*Ujy`w05^Zhj!pT+ST=Q(ckMV`rMVLL* z_Pn^o=F{_PbnO{Vt|9N5-&@^lk8pA=+V9i;*4x+GOYqLshm-5j&Iu>iqs@hr8_?%s z&hz&a&HmQOiM7^*lN(#?dN{cWZ9beB`+k>Vo=2a1-;A#J4kx#?yqu{0?)>|X^_7!b ziTzYgZX*sSx1*hT2R@wKkFK0NKx|G<)XvKJ9z@$O-?f}rYd$&O`c7i!=(G1Bw6&=V zCl8Zz&9ljg@m<*6m_5|?yr;$H)AJE@?HNuUCGT1|$zFXpc?>Nl_P5@?&bb%wTzxos z9PONN@&wvkIC&E7yEv~WCMWi{PEM?~CY(IgV%NjT(`fVI#Mt+{5A!_w-1`}Hy>~cy zw&mqS9ZsyToIFSDr*iTXCbb8@1toV-PBzkJtnVy*e)eCwBp zoukj)-bP!Sx^VIiIoCX!oEX21y@J_8ZO>O*Y(72TMc1C;nhm-fwa$>PdWHagnc)P<8V$hqd(ch!6Xy=5Jana_&$#`hr#d-c- zqS@a%IkDE7a58?2T@NP{pv{L9W8ZIN%=74T?+MZM-r;1TmX{NCII+HR;y?HFQ#qN0 zTsWB&?aayW;lzIiQaPE9*qofGD<{(v+b`d>oLFl!b`#NV4 zymR&8WI42R!pZVzbKztKwD01){V+MPzjbnAtu^6f#TL6BPF6yj4=2XH-=di3(dXVP zqwBrH$to=`C+cuwedT0zVn3_lWn)$R8cqBCtciB!TKI6XA-Zz15wSTrQCChjCbnO` zYdNvjd~&|^+QiP$=Wd&ztxa7x*_51Xo=r}S*TL4s?4h>jdM!4ep54&3XE@o6yldek zd-dU@J6cZcZ@qn;vp(Lr`f##2+IuJ70`1&zvnASGxaom*kGwcTbDni_W34seW~&yv z9&UP~&4(Lf-*W@Z^Xc;rTchhcgqvP1FE{FNV}0dj8)83|n{CPE_m|#iXKsfNH~r9+ zoBqV+I_I-$*qtD%TL0g-;aI-5p*F2lt7;lg5fZ0QB&mCKA zK0S9s*Ph{Kck-@{4&mm2mX{lKxUs%+b0D#w%FRLK z!p*^GXC8tNH%Fr@H^&g0lN)vA=2&9;<-3;~Yt1L;+aF5o9DVL~9NOB{g`4Bax#rpA z#`rMoaLgWRdmhnZ^XYj4y7mk=Cz5w9++?pl+?<4#8~a;tU*{Z&cdkC1oQ(F~iBCa0 zH{6_xHWzMAL%T<2dSZI#StmEvS`%(gZ?Ws)<_xs?aAWLy9))>6ecs_rbbW_#b5_gC zjXK;|U%5G(*iYr=9CG32T(mRK!-tzo(UqIah|S53x^ilk@G*Cw7iL zce?^@ZR*0!mE>IWY;t3K0d^r~54AlnYO(qByb4`=hMTL&yB2P;S08S!LCcN(t+%gp zF2*}oA5N}Cd+)^8p`9CUu1A{-H#eZ&BQN`5dgoauH`ZDcZf*3}mwE1vj?0a5< zc|LvK;bwGwhj4RC%gc>A+*n_^xs}*Y<>oeW;pTRl^B}SP z^4-ggwdRxa?e8RZjy`vL2yJcZ!p+0vT=Q&lV|*8OH)apDJ@09;`Sg4QU3-R`N6EVu zZn9S&ZXQF+js2~+uXFCjJ69i09!Goc#804|8*ZLNn+rEjq21$rXK2o|PHwEVCfq#T zV%NjXGidYS#@P3~5A%Hbyu-8T`VQgdxt5n3b-1yxb3 zD>rWuo0A)L<>qZ-`{lcr8*9xc=i9$b>>PdW_72+G)P{ZMjYJ0xc zV)Nv-qt!^wwe@16J~v~$DF$7plm<`cAg zobT^7n)9ra8*8lzH=nlH^>FhU+I+Y%_C4RgJfA-A@Hx7^L%8{(<>f{lZmh4|d`ax5 za`P3raPu|Vncv{U&Clq{%`e2}I_TQRoK6m?#oVBS7H@}l} z&9ljk@pstwm_5|?{Gr9>)AJ8>?HO+VB=1_d$zFZ9`43ud>~Fn&o%18!x%zPO7utI# z{u}Mwa6@^Axo|TCv3s1~6O$X~StmEvS`%)DL>s#vZiYge4>!iX>ra^H)8`$AX05(M zxEZG9lGp4ne%jV_ATJy>I z_9GHFN1wZmg|?PooqNO0*d$!@Y;t2f5;ihs54Al(pq(3TCPbSHHxr@Vc@X zu{pU>S8iq|wx7%4##-~q`Sw#2I7gqm&4RX;U!8ly&8#F`^K5csJPkH2W)HPJr)#nK z^qdV{dxo3Y$-5SAvR5B&=0MAh{jImJbEd~TS07I1M0@YVbD^CZZstat3pewi-Q#>` zXwI`vZmhK?+|1iz*TcGKZrqw70_n*~~4Zq(t%`pS*}9bZ3{ zn}x`Qn}yNNTm&C(mPS`@mLWDLH|omGvc&dtIow!lJ~`ihQ3B`abGPNt*7B=!Z@5{W zglnG7_c2}!TO6~8+MY|a*nE1ffUZ5m&5Gn*3pd%T4>v2J{XXq)y?vdtB;L9DaI!Mm zdnaB6?c8v)D%xDQSq<$T=jZ1e=UFE=)>;#8R&TNE;bslA`TTxk?0YVSc|LvKVNG;> zhj6o2%gc>A+*n_^S)15TadY(vZ*YI|f5x36-${V*9xqZmcz*oNs?Lfphe^+bL*k`PI2M+?-0n zHP0qD#>ZgCV)jtm^SBn9PtViPwP(0FoxE$|CVTba<_xsl*x!2lI_G%2bM@imOtkk- zd=}cd;pS|#xo~q1+CBb0;|$Gt*2#^v)`XjLTkLwcIS*|<+!*_wCt#jWpLaMPUEd+x zT+s4zqYgLLS8gsO_EWjJh+KZZxft!tOYq_5YINo18e(&DqpsXsOKd-v!;Q7(lk@E_ zC2)>Dce@U4Ex$VVhMVh2xaQg9#`rSqa?BoTdtT9E^XYj5y7mk=HpO&-S6W_f)ZxbZ%FV09ekwPwkqbAk zqn-H%KHR*IuH1Y;Y))>}m75QV?dNj1vDSQYzWtj7&e7*?AEB+~SLfbv^Dzn6Je%AY zzlFVx*+Xs5cUo*dJwHL$p5f+G@~(xO?A3>x&(LyXf9vh*oOkig)rXVM(cU}p7ii~( zn=jGk!p&D`_sC0rzHy#)a$~JE;pXcWyB==7L7NXZ#=hr!nCH{y9lk}^cL+D%wY=P@ z!;SToo9~JJRBnDC7jAw;JM$-excL)Zx%m&VIk{0+ZvG;+pUdIKTJy>I_CFIiN1wa> zjkcCwoqNL#&)DIbXOkP_U(mm5RIBazTZ_%7CrA88&u}v&dDp^C_UgmUP^`<1{jImJ zbAHD=S07G>X5D)y9){SvgqvZB&4ruc(C%@*Gc@N}CpXqw6K;lYvFqVx1hn~ZW9)nW zfq6cC-eE-6>f45!ky>7E)ZxbZ%8mbyG(VM_QOJdxQPIvE4Igg&_g7SI#v?Z8!>cPd z;}hGDBX{Qg^I~hwC+FLbPV5|g?luA1+SG-c3CX$U+2qD}3~Wry9%_4z)nfDMIT5<{ z3^x;#cP-pxuRh#Ng7*8gzxDQY&e(Y8>ch#TXz!hPGPHBU&E#lv;bsc7dz|m@IhymV zlN)QT2{%)=*!6HT7215bG4?&j!91Tn?=UsGzC*Z~rsd^E9d4|z+)PXCr*bnLxo|T* z+L<%p!;SyWlgiB;#OCBiUAdW)*nau$<;Gg`$@%s(5<5qqyUm5RHg(}=K1t_hegr#9m36GEiX6f zaASStW^rOam768Vg_|YO&Rhx~ZdOEBZdM{TCpYTK&C0~~%Xcp~)|yYww_lprIr`jf z6|}Xf3pcBhbIr5Kjqx(rvY0*8_FS&T=F@XEbnO{#RwwUTxXE69xLE@&H}<#QzRp=5 z?_7O2SrhHO6R(AKZn#++Z7$rbgLaSe^Ye}KtdkpStqC{lw%GM>vmV-fxH0xUSHL`< zKJTzTy1qlW*`Vd+MjdXfuiR`%?5A?G5xH=)G1{4%;KR+9=*mqGVsmn%uH0-zY`=W> za$~LeT_4VVhz0P}{S6i_NEJFLdo0Znh!sTDZwx zeYn{cEjRYJ-oDP+9PeCxIO&b{-ifzEJ2%|)L7NLV+oRp%d}nCRvrcZTwIa$~LekMr~M zjq|LN8*8lzH;1>_^>A|p+I+Y%_C5E)JfA-Aa3s3EL%2Dr<>f{lZmh4|98K(}a&rv1 zaC0o$naAP7&8g_h&1uBuI_Qw-DN1wZ$fwnev;pR+ou6Z`O zF+Kr15wnNdo+q`~e0rXRu06xe+2madH`%KXH|L<`#{SmZ*EuKSovRNg=c2uL;`7kX z4L9eb&4rr_(C%@5PfT{4XPw+wYfZShu*I&2n~Tuq!;P`;c?#zF^m&Ji(e)j|%_S`_ zH|lU>edXp-Vn3Ce%gBYB%hArf0v~R!M^|ocAT}pA>dMWH#P-W~FE`ejPtLc$lGr)= z-0dc`wW$j?HdN8E$SR?^?LYUVXT^4J|kJx8AA|!+I+Y%_C2q| zJfA-Aa4)*PL%6xG<>f{lZmh4|+)wPM{`c%2AQx^PL_6~#e7JcEUAcLh*qq#`D>u&& z+b`d}+*oTqIp6+aV&~{{w`bATrY>*x968tg4&}!9QS1@S9{Tin9$kA}$^04E_X2s> zyq`Uc?eQ3954Godyv63z^F?&+xj#KU<4fdS3wznC4|^}8ooRpT?dzN;@XpnTwO7#2 z32U#S&4sns(7vs|H~AKt{jHNVYpuz??Avg*D?3u@5kN=+onCbnOw=z9H|L_meeadwhi1L+!afZn63F{1#n% zhPCg=yB5~6S0C2CM?2I0*4x)PpWvOV4{JZ5ofFo6M4Jn1KcRiw{0ybHzjd-^tu=YK zpIhvDSo;NSKCBu09ej#;9)0fpE4to0to_#VvZfAe)>qbkC-zfW`-5Cq`xEWV|KP*g zFf^;I4NGiJ*3`y!@@2mD)L$=s_dX9{)J;T~41(OT@Pzxq0NUiW50u;@t#MYdykE- z_YQ00w7jgT`PKRFNU^@M=D%m#Pi1X9a$#+Jv@<8bhqWotm9;5}&B>Ztne$CWV88Gp zYu1`i&bOYB*g5*_of>Uz>hfmOkaI1p8BdH&gxN!%9@C<0kFYi!dDpz3tQp&563iZI z&oya_&8O$|=-M-^%|PC@u$I00ur?#wnfABdzRsBp?_7OYn+fflur@Q=Tv(e0?c2&& zeqOV`b+TryHF>vLTkLvRn+N1N^DQ*{TPJJQT9bEMvBj>3wUyB3 z!d_aPi1X2a$#+Cv@_ShhqVpRm9-6t z&B>Ztne%N#V85JC)~q$3oNv7*v2*m(%nI zrsh{CYt~oRwsC<^Wo=t>VXZgXncLyRT7Pt9ZD(S0vZhw%e7g|XFTBW_wdRxat@{u= zN1wgBqODC`-fTB=u7x$@9kA^&d+5_+cXaI$*7hLpn)j16V|(m~*+cEQc51Qt^xP9& zdxo{W$h#KSvR5D0_C`C?{?^;qIeqcY)rYlx(9Q{K`=ZT-wf)e(t@C0NQ+5Gxj^^hj|`-?tLJ--aD)v)bg^X=2s_c)>qaJc7ac2?GSQd?NGEc z55tGG&Uql){HO3F2U@fPmk--wMSUHfxK(pPu7g>aT#V0 zwdcCL#pcuVMs)2N)@~y2T3E|oeOS90?M(YyZ(rwJfp@MxtlffkPFTAYZ7!_chW2fp z=UZs@w@%iqwI=U&dy8EUYj>c{hc#orgDWx5qtCtXMAv(VwYyqg*3|s!WX<}@+TAYj zsjS^YF09>)cIJKfu=W_bvi3N!IayOHbG|1C>=#~S&06!x`PTOnJ4c_rPok|&UEb^| za;}9n;|H+^Fnj3J<7ss55!Rj|@0$0MHDh}`gxN#wxgKt@`Sg4iU3-SL=g7Mj*0NV0 z)}BW@)Be`m*Ex^iovROPFQA=iaZP>%GI;Yb`HpYJPRHW_@Mtbr<+l*4`i&*4{)r^DTT>`v_fG`$i!WqtD(?(blFeZ}u5E*TS0dyVyIJJ@o1EIlA@;YhRFe z&HKrku|3|y?4kBt@3+`|dVYznJ;T~p;j+4+Ark7+OKG5{)P{0{%@33)`lcDCu?e3=6pjD+b_JxnziPW^R0hx zbM)CeG&Rp_OLcG+FV#01?}59FF&u@-#S^d z)|$NAs4aFqtc`{?AJ&Zh4)~fn&!f-1M@QGU3~OVwysW9+?!WKa`pVjv#D403&vPtt zVQp-*Z!iu%togr%S6Q2s*qp4Xot5)VMr^5l ztQn7wjfdGopB_`9Ymcxt6?xaZpR5_%V*<<`YR@%ci_NF!)acqXtW87SwXl}G`mi=F z+L`vZ-oDP62=82dSep**+a;bJ?cA_81KM2Jn-T3Ee}D6BH0N0-d)8W$cbuuku7|yu z(dNURvERwWnCH{y9cDq-cL;m4w!G}A!=Ck(J^y##{Z#g5Cl~hSKs$3zeAx4UgR!!= z5V1MgQ&;vDCbnPrkv(h8C+FMGMeH1X?zRZp+SKLE7PSTod&cu%b7S_Bm>08$+H=jN$2(Ua)|N*5 zc8QlkJ2&hti#8YbmP5NIKg;QzXPxX>Yfav9`4+n#_Etcf4|~RbCktSnPoH;K5nbOQ z?5))DvZoGv)>rmcCiYX=TZLTMTNUlh)$n0&J#=MnePVO6r>^X6Ky1J8BYW1GPtLbr zo!B}0+-*a&wW-USZA8wsuxGp`wgzSoeR^z+u06uuCgff7ezIq5kF_v+s6E%(EjFK? zo1$yau-A>eYhf>Y^_nd^z^ zooAiwS!+$+u}6zt4|`jo&4)c>zms(_&!^8j^hDQp2zy(%yzHsNp7oWzUc`PXd)tr; zd)uO&*&84B`k^a({fW)Vp1QKPGqL@`kL+1%J~`ihJ7VYPbGKd4)}}6RwktW;!k+Q= zSRc$D`t;ZhU3-MR-O0P={bbMB9y?(6Pkyo^ts!KXlqlKH#>=(Yhlm$NbCsA9{Th+8C`pXy;I1$=KW;P z*d9k=_E3ASqg!k~Jx@i~o?-7a@~(xw?A3?8)6vefzxDQY&M|oB>ciR@Xx}dJnP}&R zy|d8f!rs|v_c-6T(VS{)9~-tn9kyB_w=MVk+M#(pQqVxCW*cQ_AS-y!Us-}17j z4tv&D_AVgyQ`x(aT-dt^?aYhuVee{mW$zkdbF!zd>|IN2zwje_)|yYwx4(qgIr`k~ zI<&Q^%bQ(K&b6>-d>M8rW)FRO+<>kGKYEqw70_y?a_-_S9j|`pVwD#C|G!_mK;G_oJQp z06y$Jfv)U5No-E`)Rn!bi0v1CWY1dj$@%sV5<5qqyFHDzHg$QkXUMr0_KY9K9>VOQ zPmgEOwMW={j=XE$Pxg%M@d#!QwdZ=Y#pcuVd35a=_Ff?GTG-28eb{>u?M(YyZ(rv; zhIg($ti6Qx?GnF?c5c{v1#K?uy^3~^^Lt{l=RE6V&suBpj<2=Y^|1Fk+I-kE_B(kT z^L+Ze!yD-O4q@-jmX|$s*t5Q}_ZG3A%HG@L!rnV*XTFOMd!M2!d!G@TlRb51?{i}N zg&*0o)_iil{d>gD(dTYopsh_^-t0?qu7y4053u(!d+5{SD|GD<_P!?Xn)j1EV|#pv z*+cEQK5DV~^!x^0dxpJl$-5TzvR5DWzC%0H{?^;qIUnPls}F16qkX%?KcJl(_I^a0 z3wuAI-Q)cHyyraYWY1b_@{T{Z*!8gY3)+0xGxj_A1oM3Qyu+{P`VL|5x0aVZb=b4M zviCc&pUU1JTWmf(M@HA4VQ&=j zu7$nq)rY-N(ayBL_4aknP>MVH|XQhp;zp%gdfR>{(yg8;{seWp8|PVQ&JoGbhA{ zy(!W4_r0lz&B>m+vNtuc{lbszS!+Hy-+m%u=jd~{Y0%cDE^jt1IoHCT@g&&9m_79A zF&(=02z%3$cg_3Bp0Pb9#q6Q>T$8ofe0t7+u06xvjO1Mld)ccGdo!V(X@BeO>zv8) z&eeytnbE#o;#ttn4STbq&4s<$(C%@5e%^DQb+TuzHF?L`TkLw+n*(h=>>2x=Oo4em zecoYCbbW`gH&@Hco;vJVU)h_R*iU6|9&%xCUbHjk!-u`a(3QQ#iOtELy0W(fvHil2 z>{)9*Ip2PMV&~{{w)`6gk(zp7Da%0+>DY>9I7r_6U2+kax}d$)2%27Q*bI z_FN0M*nD~}i>^Jx-g4w!3wznC4|~g_ooRpT?dzOH@XpnTwH465UE&qd&JBAjq0NQ8 zmC^2TetzC_o^`Tktu=YaRa)$N*jp8CKI|F$oh*uZK7HO{HFSN4u(x{4%bq&ySzp;( zgV;}HZ%uMxZ!NSl*T#pvjnS38O^D6Op1QKPDY5;+kL+1%J~`ih9b)I`bGL42Yg3mu z+l-uRVb6FyY+cMA`t<0Iu06uu=Hy-TezIq5kM%Kos6E#PEjFK?TcB&tu(u_7*TP=* z>cd_Sv@`8*y?vdtA>O(Au(lQ2w@cg;?cA`pHQHR*>xFiY^Yinb^Q@CSYpuyUZqs7d z!``-N^I^}}?_?v)^Xc;rz0vg@!rpc*FMH~+XMJU_53!&6-;3U!T-e(I?aUqVVQ)8d zWp8(4bF!zd?Cn8pzwje_)|yYwx8I4_Ir`jfPqekE%bV>*&b6>-+z;!E*+ZWmd!uWQ zu(uC+*Sw$X8QY^jW)HRJ+PTH%({o>R?HTs=Bkx++%U*rh+aK*r`&(~c=j?)au0E_C zfcEVYAE?H{-a%+{VeeqHd;I&YZ=*TSI@z<&v2*mf+i7TPQGKX3qw70_y-QkN_S9j|`pVv=#C|G! zmyrv5m!q9|1wQQEfUfM_NNi5_)Rn!Pi0v1CWY1dj$@%tI5<5qqyWNboHg$QkTgbT< z_KdH_uEOl0Pmf#CwMW>yjl65#Pxg%MaSdhTG-28eb~Db z?M(YyZ(rwJhj*?%tlfq7?GoROc5c|a2W>9w-HUdQ%=N_d&a+PTthFZZcwdWM4}15c z&4)c>zmw}R&!^8jJb^)2_>^*{Z=A-zq_bj@y_Z+b~ z*;7~co+q|n_>nzp%_ryEKSt~veeU)G+S=6R&0ZwuTG%sw0(%^@hdw=CLf0N)?`86? zc|X}Rw#Sp0J=C7-sTP}0&sWg3XV`m{ylY`Ed-Y-OHMBGBZ@qn;^EBSM`mpvo+P6#m z2HLq{?@hG1u=f_)Ju;V{_nc>)>{)9~-tp}gyB_x5L7NYI#(pQyV4hE(cX$_F-y!V1 z*YdKb4tv&D_TDG-{3-ScW)FROe2=a@!rl+$UGsjjXKatpFng#y*XJ!ZpPoOW zYtOLv6M5IdUiRw4-p^=f+TVKnI_C?#bM;~E7qoAe_*b-Z!`^Rbb7Ajyw0oTI+i1?S zPWG&|Chz!1i(L{nhfarw)78SN8rU_EXsttgtr( z+L=S*!`_H!esyHlNCf6&PhHs?**q40WY1dj$@%s}5j#hpyN!ajHg$QkQOUU$_Kb(Y zhQ{onPmj^iwMW<+oxE$_Pxg%MF)U^ewdWeH#pcs<40P=o_QoXdTG-28eb^fd?M(Yy zZ(rvOk9V#{(ygo0!;7Wp5I4VQ*5jGbh7`z3I^W8o=K41mzmsV(&!^8j%!jV;5ccM8 zdD&BkJ?krb3lRIM>@7$x>@9?L=EC@}w=|kx1K3-Jz?|%hfmGlXETX883z{irGV-9xI@0kFd8QdDpz3>>1l*am*fS&$UF0&8O!| z=-M;vtxVpvu$R61u(t}@nfABdzRp<^?_7OYTNUlwC0-5f+_1Mg+FaOM1MME?=jT1= zStoneT9bENv&F85y|vKh!=AC<$x@i-)8`%5M%Q-;d+W5k?5V?^^_9JKiTzae)*~18 z)<-*Y1AN%)j^@_@_BJOlCwuD3-WJ663qP`Ft@-49`wfYmqtD&8L|dD>yjc%&u7y40 zjj@d|d+5_+D|GD<_Ii?c&HKrou{}1y?4kBto3_||dTx!bJ;Pov@~(xw?A3?8ZP3oN zzxDQYPB*-B^>Y(RANGv>PWHk)pFZz!G`hY+*gK}> zWltUUtgq}HOYEnzcO1E}cRbpeC*Z^08EAeDVDC%-d@^xmb7AjNw0oT26O%vZ zStoneT9bFYti`T}z01+&!=AC<$?2Hq)8`$oK-YH&dsnu+?5V?^^_9J=i2YRdt|k}u zu0cEVT71~M70s^!?A=CSPWIH5z1xZH7k*^VTJy>I_SX?RN1wahfwnevd9ypoxfb?} zZ@{j{?4eJOyU?{q*t?s&Yu-=xjO}qFW)HRJx~awH)AJs5?HTs&CGT3;%U*rhyASP5 z`&(~c=iH2Uu0E{YkM`{nKY(^_*n1FdF6=#oc8~M(^PcmplRay#$vZyWV%NjoBWUws z&)Dze7R>YM^A3-q>pO(K$68+Y)M3y1%HHF|e*9NWhPNlkML${eQ)uTujb8xzWa1gL zz1?H2HU9U2&1cT=tUpWaX9UdN=AI*$InNjU0@^t*;+}j`_^-JTDXb zsrPw>T;{x5^lND6ypGR(-awDae)m{wjdRRrj^}xk*iXIBTjVn5?V{g7JLg?|?(-gc PO!m9ST5FtR-Z}pdp)Bqf literal 48348 zcmb821GuJFvPL_$?UNJRwr#W1>2&OlZQHhO+qP}nm?U?tGyBcd?c4L@zyEXg)LXS{ zSM9I9mF_>uOwVA0PBK)lUW4=+vDc8jZbA7OrPrXn25)wVW`}Hc@@A*N4%cf4PWI}x z!nXakUS!$n`|q$(FX{}&>0Ui+398&o&1vziw-izRC3=nAYaI$sMXY!4t+&~s?~WVw z>Amd^yY${{t8KUJ-Mh~Y+icZ)`+i&Y*`?n`z5DLar+5EN`)rTgs9#@B{QLaQH2MFY z-)!suod1gpb$ou4Ew|aQ&zAqI|9m|7|Mz_DzX)^a=zRZO`w#5-ml*8-f4=trm>1h| z|A9RJ2%XN~`?vG8|I?km|4p{tabVB?cBlQz`P#pKr_b;Cfjs|(oxcBazV<({gXizp zr+>fy#{cCVJpX{_YyY`Ac>cf759Il`>)`nZJYW0o*#ZAO=l=`;yLZ6y{^S4C^VjnO;eU;Pdj5f&ul+apr}v-p|AqfP|MdO`a=!K-ywmv? ze(U?6qSN`8^R@pMo%S#M*8abC+P|Ey{g3JN{TF^~|GPVV|K)t`e_aR9?{}d8_Ur$H z9X$Vl=WGAfJ9z%T&kuzE!#jBX0ngX|r**)8&-wqt|A`&&Kj8V=fBsJAU-+%>f1OU} zU(VP5lXv?3h2PqLx=x?JoUi>K>-7B>erx~lJAMD<{D1n7((CGfx_|xtze&HX`)$8j z-@m_r_xwG;DE!?=hu>lSdx+G?C0dQ6JN^5Bp7%KLd%WD~J!*{s?lDoP_vo4D!0*w& z(|gn!1Ki`|4&KXuzSj->9t(7EkN;C+fO|aL!94!{ewTL}K-;?==ARgavHw$JfP1Xb z={@R92e`+DozA1y7~md{|I>TP@AsAU0q!yRKiy+sH3qoH<^ME~)TpcvaF1^~y+^Gv zz&+;a^d5Dl1Ki_?PVZ4`3~-OZI=IJwejgn8UoUHQaF72}V}N_S-|0Q-Ob3`p_dosh z!n^(Z_tg9~H^4pmcY2RH(*f>rSEuu+HOf7f?zME!KW7@5Iy17CV)bS1$vTO3HR}=9 zyR6?>qw(C+vX)@=VeQU3k##leQP%scKUib&d;UzUWmx@K`?5}F-N<^H^(kvG{yLq2 zH3w@Y))uS-S!c6uW4*-sjx{24nwqsZYeUvvtkYOGv7Tdn#Tu61;HF?L%Id?~i*-8d z7S@Zb?^q-8J!V?gQmlk!t3tTFhfa$LDr7t4?S zDA=KUyk5PgY5U{g^Z0-J;|;Lig>RkYuZrgSfBRdbt=UDCNAJ<*Pt5P&=BbVU`!0j` z8VSwy{`N<0zIPj)=$Ke*&BLV_K~J8w=Eb*$@zmN8ksjvkgzuhl?NoHOUN>uY_KjK7 zkLTuK-@7r!9__w&`-M!OX&$n?udEs01K<0O)?&Q|8LUTJYd)%Yesq@g)W+whqvwq$ z_A6=_eEVo~b_}$+$-9cP-hV8Xd*0zUs_GBR8jfWRwKa!tzVXx?8(nMe)8;TParb=x z^{h31q?30TpCu30H{ZJEj)1kVc31A>3@2dOXFhz-PS|1}@r3#lv7Ad6%bG6hn>Rld zJa5ir?U^TTzWW!p*d%D<-7M#z@BBt&c^_?_ds1|L?!3FnT6_@Ro!WQwCuhx{lfM$e z$>jKc+z%&HH0|?D$+G8E*l;oqqm0In$tDkcxujut~JBS?8My*CpoJPCv%{EpVl|uy7rkK zYhP_RnGvM;b`CD90)V?D*F~4%+_hCPklLhf*#`m)j%bp8k!^u+U%E{9B#^gj@IavmOL6)_` ziMhrTbJmOC+ee#cTNZ6@^1{h-#N6|4&PRVy)?zGcsI9qp^NpwG^5|MKoUB0Hy>ODV z+HkTWT28EQzIE-h1lGRVaIzBGKH+3#w6Soq3VK0S7i-mKt#6*3m}^crS*`i*hm+OO z#=BY0LErf;$?`thJog&t`rP4U%@&svweLty%&(lRh407x{Ccu>)4rc|SoT~O8&3M7 zD<>P_8*3o+o3k6E%}ri7*@T#T-p%>wug~huvWD84 z8#Lc|YHo_IHN(kf#N7)g8=$SF4JVtU<;42tTh~5)u;yyR$rfn)gp)1N#=^-~Xy?+! z+PYcmnRD@q zlQYr2PwSg+U2{*s+E*J+&O+NKoScm|7EaDVI~V(PwPk(tzVo|^<$bhy?mN)+xx>kwEiNbOaAJPtIW#GLhg`1aA}*&aumo4j!H1TpvS zVL2cD`&kdLtf98%gUvUdnopu@&2aJ*areSW&T7NS(`Y%dzWLU*&qG-IYQxDhX#0ed zXVJ#O$#ZDuV!y3fT`cRHCnx5b6HcCQzWd?i1+?*QmUGZ|eh;&}k2cT!BDy|zIC-hX zsiaVoS17o zF=zcczJ0Vg`vKbArPuFh zV{)RlR_^yNwDt0?<-}a$i8<>Z@a?0`*+0*4dubgz_`>C8vLM)t2inixu*l^;%_os^%L{&6&`(W;mIdxO?FwXSLyE7POpL-+b%Z zXKJi{wc%t|w0*+KY-nTQWOlT3v0uKQSl>K3G1r`MGDq{>4<~b?jfWF`=Qj<@`)Kpr zbD`^Vhm*NmTu#*C#Qe&M|J`jr{`(@?@ZT5B-?Z;%0kl0A#Dd89)B5IH*FL?m_SJ@y&Cot~{LRt!4L4h$ zjfI;n(Vpp|ewO{rlN)o*2{&6c-~Dj2HQIQ%(RZF3u)Lo(bJzx5=MZkTZE?9#ha2-N zH{0R+soZQ&EZp=*+j9qOxY-?Dx!D8XnB1rWlKH5CnUTAZZ z7jE_@=AL(x8~vSFJF~2zw&pI)H=dgNpli)=voCS?!cER}Q_bm}^eBIk@@mhnqvt#>0)i^W2T){j{0G zq3Ak?aC2CT%Z)nRm|wX$9N$mn<_Kcp=18zi*~`y7w8uQr^ViT1hUpM|z>xH%hbEZm%f_DmP`^Zmwt=E;q@=7gJbo9}+O zIS*|-+~_;c6Ik9)n>n11u5$=C7qqzCsKbrv8TTQBckZp<~Fn6tkW-#*$r+jVGjlNWBTC+41alNJg;JTKW*l4C%Vod+}zdTa-$A6=2vd+#`jaXxrbP| zxfgBE`>^5WQFP_zF??fkqpsXMj&HrZd$}>!cw)}}eti3A^K4I`%}rjod6JlW-c4@w zA7DMmvWD844>jL-YCeUoHN(x*#N7)wIjapf&!Byu);Hg}_IVg`gf<>-^quD;Ebphy99~A(IfR>6T3l|_ z;l}*R&8zr+DmSkY3pcN$?fC{a+`Nyj+)PjC ztbMiN-PBif!nVZ+TI=*rEX_{QW$UAg%S-+FoX za$~OX#GL)l`1aA}*-S7udEsUd11#?*H~PPze`Q&tM`M3$zVXx?l-yb~+zdwCy>OGW z+Hf;C`*LG_^R4UMf5+Na8%~B`-{+1$B))ya%~1Hp!p+cV&)7fTZ|rBD+?Z=lxEZGT z?uVOU(Z;)};av2c=f7CqPn$Um$6lR7xEa31C>7J-3a^jk(4XbM~X)*hiaZ8y9UZKRsu|&3FXd^KQ;Ze^l0J zENiH(IePPrr{?(RS~J{CK-|4>le5}zGa*`TtZ%+`?K1|}zS?jy5!&aD--WhsxS1Gj zEZlUXJ!5}+xa?=1+?Z=lxS6E+?uVO6(LHg_Iv0KCIVQ{dX)}k(&~*;sX7U!78+Eub zzj8ALzMsm?l*Gc#RA_rnjSV;c_x&q3GvgbR8+GMo7JTcu9d67uo|v3e%!~Gn{kz(-pLud)t~udmzUI3hZstcD@8-O7(RZFR zvb>)*b65af=MZidY;n0!ha2-NHw)qWsoX3~EZi)Dw&$YQaI*}$aZ;evl`l1xLF2%Ansnc$ysf<*%B=`);Hg}_UVhYuQr@)h4#7QZ;iHZxY-76 zEZl61_Kf}W{lsgswHi&B4Uo3pY8d4L66N<;MEv zTh~5&W9_RACx@ba?)ZnH?Hg_mM;i+_N1#2^MSXj?>}Q_bm}^eBIkNfghnu6&#=AM| zT=bpiJ}mF2%^Z$K*ExinV_IBp)Zxbb%FVI(ekwP|5zDVP$D{3e0yf;7j;`FCfp1K1 z)Rmhv@vY}}xG~pwV$S|V9Q$bVY-gd(<)`OtxH+4Ed*03Y=%2(onPm;NHBV{2@zgvA zU2BG$bBVhbZgN%|Zq7sdKCN%Qb?tL1*1psppI)YiPN`NmW8Hgv5SZf+;;Ubx9wZMeAuEjQLT-@5j>9&2B1IJpz;bH~36 zZQpQnH`-XZxd-i;F6!s|js47%8*|MGH}^K*{cv+1+ITlLoQuBmyn*HYw3);G=sJgR z^FWKsjXK#=^~OXwP&}Ki_ZcXP(@cYfiX%z4`8kn>WzL zyQ$$^^quEZEbphy9Nt9NIfR?HT3l|_;l}*R&D;2XDmU*C3pekg?fD)y+%6PEYWW)452>m0((FD))N>TqLz<>ps>Kb4!` zh=rTq(f0foHrxz0*#G8caC~EOqpsWxfp0y2dgA_bKXZ*I=IsB#v5z*-HY7RbCNJC! zMa(_#=6v-3ME}LIhT58pyT>=4nnM$-HN(v?#N7)wIjapf!=ink);Hg}_TjO6`e?(= zaA==9{_yzr4L2j8jfI;L(Vnq?S6lWoPj1XLC)|wGeD}l6$Y|r;oOdqz&T~+#bJ1oF zqoC^?!p*2HE;s6MV}9kvKP%BsYv_{JQdx^goSzV*0% zPh4)yHJ+HW9}C|;+B{nq+T7%Yn~90J=iQu-{@AQ>Sk_QmbKK?|Pt9(0tr>16A?{wd z$ysf@`p&3we&3pY8d4L9?n<;MEvTh~6bVeP98Ckvo`?)VF$?Hg_uLK_P= z3!^<_|9ro(pLud)t~udmk>$VGb9R>Z(`F8fq3ayN&EhRCH|lU>e&uEf zd_R?&B|XbseP2tV?YT5I+^mGI+^mdmOm5Vbn^o|wmv=8W<{D4T*)M}{A8np(RkXRu z3pc9~bI-fUjsCK%)K}ptbMiN zWG%GM9e-`KeZ$Q<^8mo zLvM7QL%7+X#pOmFZp^RT^uhO2x!I6dxao_w=SJ9Yvjw_xvn9SUxlvbcw!*hw-o4zI zYdkS$-w)qD+C1CVXmgVnZnh!jo_BLT`Wv%0VOc|M%}twcJTnz> z8p{~>*d|cjk(4XbM|}V+ee#cI|OZR^1{ub#N6|4a-+WwYhRW%)Yjau`NmW8 zFm$aMZVo5zUbx9wZMZoCEjQLT-@5kMA8TK2I5`sSbH_glZQpQnG}>6WIR@<+`{(?0n_k2EU=v(6^mNnGg>*nSg zPtAMLwdQ`*^p5utcQ5SatTyc3kG7}v&9|<7Zo%4D8`d5`+b66&h&C419zr|YE_yo) zm-Wq)HFM3$+#YVe`(f=7wDE4vItP8ri@1dP-7o*RzzIn1{t~r_8`^|ShtbKqs9@g}I z2QRX`k2cT!A-XYu%`bd>kF1Ow5jnky4DD5zYurN=aV&k zYkbABhT405-F)Mz`765C3~Rr6h85OwRvXrSN88i-=3Cc3-(c;l4Qu~G+b693fi@P_ z{zN<5E=Hg4Ue-5H*3300bNj3L?uRvBmhrHr?>qRG<$bhy?m^hA&mGnVZE;ys^V9QZ zUzlH6^Usa;qsi}~gX4s?A<*_55*yb1GlMH@BjFp9HMKJL8(E(fUS!Q&)m?p|2SS#4Mw3vEy9n{QqF43D+1Hmr?}woh0a2W>2@jf-}+UCcY**Q{@zteI<0 z<~CmQ-4AQyqm73(ec!xCCtGuL=x&UzAj`)G4^I<&dT%gm-H=3ZFSpNusr z%Np9$m;qgDgtZxoyXW)Cn!YtAXIVq-y{2fs@zk6NU2BH5nTfj>)^b)G)@DK5)B5IH z*FIBX?W+xIv!d-2)@DN+3v08Zovrgtht5_M9IZ))q%s)|S9GCTnVC?zbe4 z^}>s+nQJ^TXT1QveY82d6x!V6WoAngb1$svFT`4qWesg=EQ78!!rHRL-ShcmP2U;| zv#g=^UW+u}cxo<(t~JBj^2FT>YdNb8Yb&7bX?^ppYoA52_SJ^9718zyYb&9Rg|(H@ z&enciZCT$uSu@w1%x#tCyC2q8MH>%m`o4q3Sl&mQ=Uxq6pF6Cr-r};R=BI}>^DAp> zxWPvg*4D%cYiptHxi&VeZHTU{^~EHTaGuNEVZJXx1AJ(=- z8xL#xzJoq2@1xCgZ-=hW9oDvQaamLI)5Dtim9_qE@X>^|9dN?hj%a)CgbizZp(|^9 z;~SGTwKDhH2giEhMb^wUo|v=V8Q(tIoZS~~Zt^m-{fN02*7SE}?ZUE#HZ}G~*BW8% z0OIcXe6pr*jonz*P+{MceZ@Y*;%DU0FLF-e#lQn&7oW!z*+IyYceB-Hk zF1pqXYv&PnFRbOPHmseGwx{*Yx2}Co!P-|F)-FKXC#+qFHWt<{LOWagISZHd&672A z&B@#@Zod0r?Gm){u%_=jIF;pnw0Z7J(e=5*+GQ;+YifRaSTnz}cDWmTG-2%uoUnE! z+MZWo!`e;g%G%BN#$-*c%>8b`v0iwQHFJ$8=B%&Aw~sbwZ$+D%yv*!2V(x`C{cBm* zu&kj?joZ<+Mp(OpxO+aItm#|hI+ium-s}428&AzU(Y0n+yNkGcVJ&C1VeM|TJ*{uP zb?tKl*1p=Xb`RPvM;-hgw|L)co|YW`1SuVK?|_!rCJ^VeL`0Js-n{wdc^4wde7T$(mZ3`@Mi; zz3?Jy<{D4TSwD_%A8pRQh&DHQnb}Lk+zV^^PqLn1Swou|FQaRXu=WaZ_k2EC)3?S` zENiH}*VD~6o|>8vR=YAJmpF6C**W$9K=BI}>^DArb zyTL~j);_=qYagQR`4Kj(eTA;9eT{ES*3`<}?;9NJg%?>f*LY&i`eS_iXmj>kw7JR4 z%)TS$URcxrl=TVA8rszO9$jmMwI7JP=kv*$zBN8$Swro;K5xG9)cg@$YlgL-h`Sfo za#kDGen#8V`sQ2LK3`z%s|{>`)_y}fTl?kvn)S_-HFM3$+0 zXyai`-*@mO%ll~a+<&0!bBDD*TU^%E{PeJ9er4@1H~7^5ohLZ_&DtPndk%^XYyN+` zRn~^XHzsRpOXhyV8)tVuqK$>UvCy9B;+V5>+0Q)LGuNEVaqQ;1ANIyU8xMQ>zLQ~C-cOr3 zjEk;w2z%qTxa_IJp81tM|9{N=RQ4tymS5*4MB8&BY}lIuUD=xw-20y=95J7xr>i z8}^n%+td2yTh~4dW9_RAYs;gZUHlc$_6>V0qK$>UmC&B);#K7Po&C&{J#)>;99M3> z`(bYtwDGW~?>kw9<^8mo!>Z^yhp@L=i_4xm?3rKLTOHp|Wp52)VQ)>eJ=em9y*}v5 z-iG+bWKUh$>x*x_@FRQX8c)pGuZ?dXZJuo-w7JR4%=!^?FYM{B%UXwJ4Q*;{jIK4p z-X_G|^Z8^?-x}+&tfBT^>o?zcYHo_IHN)O!#N7*fIjaqOo1^V%eeE{hKrHO-h_>fW*s!-3y0W)7zA@QTSN8V7w_f;>J#&pG z=InRIw~sc@wlCV;>F>(gg=GzGYV41$HNxHj#NG4xWKZ83yRodH_FlU; z-*{>sh^{rm-a*9O3wt@M4SNTp?P-1Ut!tk>u=dr4wL{R(F8-lt`-Z*4(8j{v;b_m; z-`TkAXP)etYfk2PMDyJbdq<*;hdq7Y$(}6lr_CIWLf1Kjy`x)P_S9j|{L0=j_)DH+h-a*~Hun zd-^A_PGng_n;Pe!YmKmXE^+sKKH1Z^#>p&esJ+)I%{QK!=b>xOuy;Oj_rhMzYQx?I zXnR`UeCyihRIGiqVeLY+vx|Qb+P-1$VzjZacL~}v_U~%Te&)%Zx#nb!mp0%1uy+~S zc-YhTot(z|WltUU%&+WSiSMVfcNMX)cQx9c*I>imE$GVLt@y@d zPhHu&4c~gpGS-w5f3?y4DDLcM*5b z=aW5sYuv!HhT41G*nH!uc{jS&414zwcQ5SatTyc3i?*lr&9|<7Zo=AE8`kbaJG=Pz zqwO2^9zYuldk>;LWB+{Lv!8jgXRbM!<3r7NKkPk>HXio$eJ3}wyq`96cm!SN5cVEz zaoJOcJ@YGjkKy~N>^)8_>^*_D=abm5_aeHo_Y%G_*;7~cUdFdx_>nzxjVI>ppTf6~ zHqZ77+T7%2X0H-+FYM_*!+M%!4Q*Z!rpsm&)7fT_v~k$ z?3rs$=J?0t`KO!m~3y&v$c7k*^VT;qv3`_J+1qs_DZh&DHQnb}Xo+zWg9U$VYn zSwou|Kcj1nu=fjb_k2Ft)3?S~ENiH}*VoNAo|?a+Yt69tn`c;IFK4x3?{~C4t#7_{ z?eh)RzS^+%FSN6Z{|DN>Vee10v9R|S+B5cdHZJ>_Cwu0alR5Sp^54GuVQ&y(#>1Yz z@8nyS_tRz$gR)oW5cURZaoJOcJ@YGjgX8nzxjVI>phsL*$HqSOH+T7%2W}^{vFYM_L%NmAd4Q*mW#br+& z_RO#BO^olSve!*4>`j8U=cL%MH!ZrdHyyq)*;7~crpLEl_>nzxjVI>pC&RaoHqSN# z+T7%2W-}6VFYM`0!J3?94Q*=7gswHh-ps_^^Z8^?-x^c0tfBT^Q#IdsYR-bLHN)Pl z#N7*fIjaqOv!U&2ee7ppAz;ec#D6Ebphy9OgyWIfT9WT3q(jVbA=^-u(D}DtikM3wsNq?YR&( z>@9_^>@AINO!m~3y=CyN7k*^VT;qv3`-SoCqs_A|i#9iTnb~s0+zWg9i?SABSwou| z%cE^X6if_H} zBYWl=Pt4i(#mu#`o5EXEbphy9Ckw2IfT8PTU_?kVbA=^ z-Y)olDto&U3wyhv?YTQP?Cp=P>>Yq_O!m~3y#w*B7k*^VT;qv3`#tdOqs_A&gf=&M znc2a_+zWg9d$IOpSwou|hoEbXuy-hN_k2Ft)3?UnENiH}*FMcRo|=cDYt68CIC1yF zUe0R6-VtbfTHk!@+Gk&^eYIijNOkd#Lfbd&9gQ{?_Krb&ri)jRWk2&|&s=jd$77rC ze%Lz>Z9MGh`%d;_c|UFDa6G!sA?%&d;d*)a6PQ>?9**l3?*gF|*&r`5r?;Lbx z?_7LivZt=>oriC|@FRQX8c)pGpNelEZJzCXw7JR4%q}42Uf9z=opl<^8rsyj5M678 zy^Dyu=kv*)zBSHZSwro;&TPK%)Vvs7Ylgi`h`Sf|a#kDmE=AkZ`sQ2LK4)R=s||aX zp`Bg)%hC1?dsm>1g}p1$p6TLM7Ry{pm2!=AqHxqTE8_@Q=5gYdIL|69i!Z#*+>dM~T_|^+Q zvS+UG#GL(2`1aA}+3rD`o4l}hFERJRp8hSYn_1S-rpA5fS|jY;PuxA9Pxkb!aVyIj zYVUPh^NpwG1L#^a>^(@_y|99cy20SbG@l?BYLywr|*b6m2Z* zJ%;v77q24U_v~k$?3rs$=JSwou|Z=-9Cu=fse_k2Ft)3?S;ENiH}*UQZ}o|^BXYt69t9&z`= zUe0R6-uq~KTHk!@+UFIleYIij1GKY?{~_AGVeccfv9R|s+B5cdHZJ>_Cwu0alR19U zeD}lNr)cA0Pv3X)D$Dz6Gl$R6bq-xCcLGuL=x&i)&G`)KoQzoE@dUS{??G55lr{&%czS=P{|#=p?D zM%ep^3rzOk@3INCGz?`q3_=E(dOC4LYtes%xr98?u9-5kys04uDmNnGg zYn0|2PtEbrwPx5GpSXKrFK4x3ZvwPEt#7_{?K3LYzS^)hA==r+p9pQ=u-An)7WO7a zd&d6xzGpx4WY1i4GRN-byC3!@K^qTy`o5FVSl&;YIZTSKa|jQUwYco5!=Cw-y~*+Y zRQ9GI7WSq@+jA;x*qaH>&)@9LjAKmp)Rnzi@U0hqWY1jVi8=eJ@$I9{v(1V&H+h-a zY{c9Pd-~I|reRq_n;NsDYmKls2XXg&KH1Z^#&j%esJ++p%{QK!bE0d_us0WR_rhMz zYQx^#XnR`UeCyh02CRLxVQn6?vx`43+P-0LKD4p0H$U1l_Rse{``n-lD|9-ePEbE{+X*%cJ@E zo4plqjLDw5vbQ3>^}>(rnQJ^TXTJo#eYAPDmC)uUFEd-2n0sMQe<{|IENf^}V-<9* z5%yLk?w-#ld-~Q`nq>{O_gbd;##3`ObgdcoRwwRW*vnaM*joc_PwSg+UHdGHwXZg; zt%-Ja@z+AzH|(vAHWv2QL3_sj&cE{pAr|&FMBB43HtcPI=I3wrw!|?ed+N&GR`}KnKeA`8 z@x+|{M)>y8=GnGJo147MY#UzLU*a-cOr3?2fK;2zz_9xa_IJp81u%J@Nfi z_Vywc_Vz~Gb02KjI|R+o-|QWVV@&qcmA%98trvb|&s^h)Is1L_?W4`J9ga3Pd70S} z#M}#e`unr?V_8F+8b_jQjj(qVarb;a+0(bi0W52%z1M-wH=deDqifBucMNg&!d}j5 z!``uIds^Ro>)PiatbMg%?Krfvi+?=YzG3eKw6U;vBHABPd`8EAW+i4A)fqWSro zy^C;+$)38hcQL;8!jJ5kYdkS$e-^%dw0X8m(B>vDGrN?Sdtp!i9M;(^YiLvBGIXsG z_AV#xp3f(H`qns?Wev6WIzi*~`<#!puQse* zjdphNuR+^4>|KjC7WS?~d&d6xzGpx4WY1i4GRNzi?|#_30c||&>HAJDV0k}n=5Qmr S&LQmG)Z((I4twUy-v0opb^8+l diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index b6ac3db..5a5f6be 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -558,7 +558,7 @@ pub enum Instruction { Add(ArithDetails, Arg3

), Setp(SetpData, Arg4Setp

), SetpBool(SetpBoolData, Arg5

), - Not(NotType, Arg2

), + Not(BooleanType, Arg2

), Bra(BraData, Arg1

), Cvt(CvtDetails, Arg2

), Cvta(CvtaDetails, Arg2

), @@ -569,12 +569,12 @@ pub enum Instruction { Call(CallInst

), Abs(AbsDetails, Arg2

), Mad(MulDetails, Arg4

), - Or(OrAndType, Arg3

), + Or(BooleanType, Arg3

), Sub(ArithDetails, Arg3

), Min(MinMaxDetails, Arg3

), Max(MinMaxDetails, Arg3

), Rcp(RcpDetails, Arg2

), - And(OrAndType, Arg3

), + And(BooleanType, Arg3

), Selp(SelpType, Arg4

), Bar(BarDetails, Arg1Bar

), Atom(AtomDetails, Arg3

), @@ -590,6 +590,9 @@ pub enum Instruction { Clz { typ: BitType, arg: Arg2

}, Brev { typ: BitType, arg: Arg2

}, Popc { typ: BitType, arg: Arg2

}, + Xor { typ: BooleanType, arg: Arg3

}, + Bfe { typ: IntType, arg: Arg4

}, + Rem { typ: IntType, arg: Arg3

}, } #[derive(Copy, Clone)] @@ -896,14 +899,6 @@ pub struct SetpBoolData { pub bool_op: SetpBoolPostOp, } -#[derive(PartialEq, Eq, Copy, Clone)] -pub enum NotType { - Pred, - B16, - B32, - B64, -} - pub struct BraData { pub uniform: bool, } @@ -1058,7 +1053,7 @@ pub struct RetData { pub uniform: bool, } -sub_enum!(OrAndType { +sub_enum!(BooleanType { Pred, B16, B32, diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index cd1c642..6c231b2 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -142,6 +142,7 @@ match { "atom", "bar", "barrier", + "bfe", "bra", "brev", "call", @@ -166,6 +167,7 @@ match { "or", "popc", "rcp", + "rem", "ret", "rsqrt", "selp", @@ -179,6 +181,7 @@ match { "sub", "texmode_independent", "texmode_unified", + "xor", } else { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID, @@ -192,6 +195,7 @@ ExtendedID : &'input str = { "atom", "bar", "barrier", + "bfe", "bra", "brev", "call", @@ -216,6 +220,7 @@ ExtendedID : &'input str = { "or", "popc", "rcp", + "rem", "ret", "rsqrt", "selp", @@ -229,6 +234,7 @@ ExtendedID : &'input str = { "sub", "texmode_independent", "texmode_unified", + "xor", ID } @@ -708,6 +714,9 @@ Instruction: ast::Instruction> = { InstClz, InstBrev, InstPopc, + InstXor, + InstRem, + InstBfe, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -874,6 +883,13 @@ IntType : ast::IntType = { ".s64" => ast::IntType::S64, }; +IntType3264: ast::IntType = { + ".u32" => ast::IntType::U32, + ".u64" => ast::IntType::U64, + ".s32" => ast::IntType::S32, + ".s64" => ast::IntType::S64, +} + UIntType: ast::UIntType = { ".u16" => ast::UIntType::U16, ".u32" => ast::UIntType::U32, @@ -979,14 +995,14 @@ SetpTypeNoF32: ast::ScalarType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not InstNot: ast::Instruction> = { - "not" => ast::Instruction::Not(t, a) + "not" => ast::Instruction::Not(t, a) }; -NotType: ast::NotType = { - ".pred" => ast::NotType::Pred, - ".b16" => ast::NotType::B16, - ".b32" => ast::NotType::B32, - ".b64" => ast::NotType::B64, +BooleanType: ast::BooleanType = { + ".pred" => ast::BooleanType::Pred, + ".b16" => ast::BooleanType::B16, + ".b32" => ast::BooleanType::B32, + ".b64" => ast::BooleanType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at @@ -1294,19 +1310,12 @@ SignedIntType: ast::ScalarType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or InstOr: ast::Instruction> = { - "or" => ast::Instruction::Or(d, a), + "or" => ast::Instruction::Or(d, a), }; -OrAndType: ast::OrAndType = { - ".pred" => ast::OrAndType::Pred, - ".b16" => ast::OrAndType::B16, - ".b32" => ast::OrAndType::B32, - ".b64" => ast::OrAndType::B64, -} - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and InstAnd: ast::Instruction> = { - "and" => ast::Instruction::And(d, a), + "and" => ast::Instruction::And(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp @@ -1447,7 +1456,7 @@ InstAtom: ast::Instruction> = { }; ast::Instruction::Atom(details,a) }, - "atom" => { + "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), @@ -1456,7 +1465,7 @@ InstAtom: ast::Instruction> = { }; ast::Instruction::Atom(details,a) }, - "atom" => { + "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), @@ -1515,12 +1524,12 @@ BitType: ast::BitType = { ".b64" => ast::BitType::B64, } -AtomUIntType: ast::UIntType = { +UIntType3264: ast::UIntType = { ".u32" => ast::UIntType::U32, ".u64" => ast::UIntType::U64, } -AtomSIntType: ast::SIntType = { +SIntType3264: ast::SIntType = { ".s32" => ast::SIntType::S32, ".s64" => ast::SIntType::S64, } @@ -1664,6 +1673,22 @@ InstPopc: ast::Instruction> = { "popc" => ast::Instruction::Popc{ <> } } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor +InstXor: ast::Instruction> = { + "xor" => ast::Instruction::Xor{ <> } +} + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe +InstBfe: ast::Instruction> = { + "bfe" => ast::Instruction::Bfe{ <> } +} + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem +InstRem: ast::Instruction> = { + "rem" => ast::Instruction::Rem{ <> } +} + + NegTypeFtz: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, diff --git a/ptx/src/test/spirv_run/bfe.ptx b/ptx/src/test/spirv_run/bfe.ptx new file mode 100644 index 0000000..60ee8a6 --- /dev/null +++ b/ptx/src/test/spirv_run/bfe.ptx @@ -0,0 +1,23 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry bfe( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .u32 temp<3>; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.u32 temp0, [in_addr]; + ld.u32 temp1, [in_addr+4]; + ld.u32 temp2, [in_addr+8]; + bfe.u32 temp0, temp0, temp1, temp2; + st.u32 [out_addr], temp0; + ret; +} diff --git a/ptx/src/test/spirv_run/bfe.spvtxt b/ptx/src/test/spirv_run/bfe.spvtxt new file mode 100644 index 0000000..edcf138 --- /dev/null +++ b/ptx/src/test/spirv_run/bfe.spvtxt @@ -0,0 +1,70 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %40 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "bfe" + OpDecorate %34 LinkageAttributes "__notcuda_ptx_impl__bfe_u32" Import + %void = OpTypeVoid + %uint = OpTypeInt 32 0 + %43 = OpTypeFunction %uint %uint %uint %uint + %ulong = OpTypeInt 64 0 + %45 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %ulong_8 = OpConstant %ulong 8 + %34 = OpFunction %uint None %43 + %36 = OpFunctionParameter %uint + %37 = OpFunctionParameter %uint + %38 = OpFunctionParameter %uint + OpFunctionEnd + %1 = OpFunction %void None %45 + %9 = OpFunctionParameter %ulong + %10 = OpFunctionParameter %ulong + %33 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + %8 = OpVariable %_ptr_Function_uint Function + OpStore %2 %9 + OpStore %3 %10 + %11 = OpLoad %ulong %2 + OpStore %4 %11 + %12 = OpLoad %ulong %3 + OpStore %5 %12 + %14 = OpLoad %ulong %4 + %29 = OpConvertUToPtr %_ptr_Generic_uint %14 + %13 = OpLoad %uint %29 + OpStore %6 %13 + %16 = OpLoad %ulong %4 + %26 = OpIAdd %ulong %16 %ulong_4 + %30 = OpConvertUToPtr %_ptr_Generic_uint %26 + %15 = OpLoad %uint %30 + OpStore %7 %15 + %18 = OpLoad %ulong %4 + %28 = OpIAdd %ulong %18 %ulong_8 + %31 = OpConvertUToPtr %_ptr_Generic_uint %28 + %17 = OpLoad %uint %31 + OpStore %8 %17 + %20 = OpLoad %uint %6 + %21 = OpLoad %uint %7 + %22 = OpLoad %uint %8 + %19 = OpFunctionCall %uint %34 %20 %21 %22 + OpStore %6 %19 + %23 = OpLoad %ulong %5 + %24 = OpLoad %uint %6 + %32 = OpConvertUToPtr %_ptr_Generic_uint %23 + OpStore %32 %24 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index a7ef75b..5bbe45a 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -116,6 +116,20 @@ test_ptx!( [0b11000111_01011100_10101110_11111011u32], [0b11011111_01110101_00111010_11100011u32] ); +test_ptx!( + xor, + [ + 0b01010010_00011010_01000000_00001101u32, + 0b11100110_10011011_00001100_00100011u32 + ], + [0b10110100100000010100110000101110u32] +); +test_ptx!(rem, [21692i32, 13i32], [8i32]); +test_ptx!( + bfe, + [0b11111000_11000001_00100010_10100000u32, 16u32, 8u32], + [0b11000001u32] +); struct DisplayError { err: T, diff --git a/ptx/src/test/spirv_run/rem.ptx b/ptx/src/test/spirv_run/rem.ptx new file mode 100644 index 0000000..2ac482d --- /dev/null +++ b/ptx/src/test/spirv_run/rem.ptx @@ -0,0 +1,23 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry rem( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .s32 temp1; + .reg .s32 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.s32 temp1, [in_addr]; + ld.s32 temp2, [in_addr+4]; + rem.s32 temp1, temp1, temp2; + st.s32 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/rem.spvtxt b/ptx/src/test/spirv_run/rem.spvtxt new file mode 100644 index 0000000..72d0965 --- /dev/null +++ b/ptx/src/test/spirv_run/rem.spvtxt @@ -0,0 +1,55 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %28 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "rem" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %31 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %1 = OpFunction %void None %31 + %8 = OpFunctionParameter %ulong + %9 = OpFunctionParameter %ulong + %26 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + OpStore %2 %8 + OpStore %3 %9 + %10 = OpLoad %ulong %2 + OpStore %4 %10 + %11 = OpLoad %ulong %3 + OpStore %5 %11 + %13 = OpLoad %ulong %4 + %23 = OpConvertUToPtr %_ptr_Generic_uint %13 + %12 = OpLoad %uint %23 + OpStore %6 %12 + %15 = OpLoad %ulong %4 + %22 = OpIAdd %ulong %15 %ulong_4 + %24 = OpConvertUToPtr %_ptr_Generic_uint %22 + %14 = OpLoad %uint %24 + OpStore %7 %14 + %17 = OpLoad %uint %6 + %18 = OpLoad %uint %7 + %16 = OpSMod %uint %17 %18 + OpStore %6 %16 + %19 = OpLoad %ulong %5 + %20 = OpLoad %uint %6 + %25 = OpConvertUToPtr %_ptr_Generic_uint %19 + OpStore %25 %20 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/xor.ptx b/ptx/src/test/spirv_run/xor.ptx new file mode 100644 index 0000000..a28b321 --- /dev/null +++ b/ptx/src/test/spirv_run/xor.ptx @@ -0,0 +1,23 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry xor( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .b32 temp1; + .reg .b32 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.b32 temp1, [in_addr]; + ld.b32 temp2, [in_addr+4]; + xor.b32 temp1, temp1, temp2; + st.b32 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/xor.spvtxt b/ptx/src/test/spirv_run/xor.spvtxt new file mode 100644 index 0000000..ee09898 --- /dev/null +++ b/ptx/src/test/spirv_run/xor.spvtxt @@ -0,0 +1,55 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %28 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "xor" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %31 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %1 = OpFunction %void None %31 + %8 = OpFunctionParameter %ulong + %9 = OpFunctionParameter %ulong + %26 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + OpStore %2 %8 + OpStore %3 %9 + %10 = OpLoad %ulong %2 + OpStore %4 %10 + %11 = OpLoad %ulong %3 + OpStore %5 %11 + %13 = OpLoad %ulong %4 + %23 = OpConvertUToPtr %_ptr_Generic_uint %13 + %12 = OpLoad %uint %23 + OpStore %6 %12 + %15 = OpLoad %ulong %4 + %22 = OpIAdd %ulong %15 %ulong_4 + %24 = OpConvertUToPtr %_ptr_Generic_uint %22 + %14 = OpLoad %uint %24 + OpStore %7 %14 + %17 = OpLoad %uint %6 + %18 = OpLoad %uint %7 + %16 = OpBitwiseXor %uint %17 %18 + OpStore %6 %16 + %19 = OpLoad %ulong %5 + %20 = OpLoad %uint %6 + %25 = OpConvertUToPtr %_ptr_Generic_uint %19 + OpStore %25 %20 + OpReturn + OpFunctionEnd diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 23a63be..365d1e8 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1289,6 +1289,9 @@ fn extract_globals<'input, 'b>( .. }, ) => global.push(var), + Statement::Instruction(ast::Instruction::Bfe { typ, arg }) => { + local.push(to_ptx_impl_bfe_call(id_def, ptx_impl_imports, typ, arg)); + } Statement::Instruction(ast::Instruction::Atom( d @ @@ -1591,6 +1594,24 @@ fn convert_to_typed_statements( arg: arg.cast(), })) } + ast::Instruction::Xor { typ, arg } => { + result.push(Statement::Instruction(ast::Instruction::Xor { + typ, + arg: arg.cast(), + })) + } + ast::Instruction::Bfe { typ, arg } => { + result.push(Statement::Instruction(ast::Instruction::Bfe { + typ, + arg: arg.cast(), + })) + } + ast::Instruction::Rem { typ, arg } => { + result.push(Statement::Instruction(ast::Instruction::Rem { + typ, + arg: arg.cast(), + })) + } }, Statement::Label(i) => result.push(Statement::Label(i)), Statement::Variable(v) => result.push(Statement::Variable(v)), @@ -1610,6 +1631,7 @@ fn convert_to_typed_statements( Ok(result) } +//TODO: share common code between this and to_ptx_impl_bfe_call fn to_ptx_impl_atomic_call( id_defs: &mut NumericIdResolver, ptx_impl_imports: &mut HashMap, @@ -1705,6 +1727,100 @@ fn to_ptx_impl_atomic_call( }) } +fn to_ptx_impl_bfe_call( + id_defs: &mut NumericIdResolver, + ptx_impl_imports: &mut HashMap, + typ: ast::IntType, + arg: ast::Arg4, +) -> ExpandedStatement { + let prefix = "__notcuda_ptx_impl__"; + let suffix = match typ { + ast::IntType::U32 => "bfe_u32", + ast::IntType::U64 => "bfe_u64", + ast::IntType::S32 => "bfe_s32", + ast::IntType::S64 => "bfe_s64", + _ => unreachable!(), + }; + let fn_name = format!("{}{}", prefix, suffix); + let fn_id = match ptx_impl_imports.entry(fn_name) { + hash_map::Entry::Vacant(entry) => { + let fn_id = id_defs.new_id(None); + let func_decl = ast::MethodDecl::Func::( + vec![ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_id(None), + array_init: Vec::new(), + }], + fn_id, + vec![ + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_id(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar( + ast::ScalarType::U32, + )), + name: id_defs.new_id(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar( + ast::ScalarType::U32, + )), + name: id_defs.new_id(None), + array_init: Vec::new(), + }, + ], + ); + let spirv_decl = SpirvMethodDecl::new(&func_decl); + let func = Function { + func_decl, + globals: Vec::new(), + body: None, + import_as: Some(entry.key().clone()), + spirv_decl, + }; + entry.insert(Directive::Method(func)); + fn_id + } + hash_map::Entry::Occupied(entry) => match entry.get() { + Directive::Method(Function { + func_decl: ast::MethodDecl::Func(_, name, _), + .. + }) => *name, + _ => unreachable!(), + }, + }; + Statement::Call(ResolvedCall { + uniform: false, + func: fn_id, + ret_params: vec![( + arg.dst, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + )], + param_list: vec![ + ( + arg.src1, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + ), + ( + arg.src2, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)), + ), + ( + arg.src3, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)), + ), + ], + }) +} + fn to_resolved_fn_args( params: Vec, params_decl: &[ast::FnArgumentType], @@ -2803,7 +2919,7 @@ fn emit_function_body_ops( let result_id = Some(a.dst); let operand = a.src; match t { - ast::NotType::Pred => { + ast::BooleanType::Pred => { // HACK ALERT // Temporary workaround until IGC gets its shit together // Currently IGC carries two copies of SPIRV-LLVM translator @@ -2854,7 +2970,7 @@ fn emit_function_body_ops( }, ast::Instruction::Or(t, a) => { let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t)); - if *t == ast::OrAndType::Pred { + if *t == ast::BooleanType::Pred { builder.logical_or(result_type, Some(a.dst), a.src1, a.src2)?; } else { builder.bitwise_or(result_type, Some(a.dst), a.src1, a.src2)?; @@ -2882,7 +2998,7 @@ fn emit_function_body_ops( } ast::Instruction::And(t, a) => { let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t)); - if *t == ast::OrAndType::Pred { + if *t == ast::BooleanType::Pred { builder.logical_and(result_type, Some(a.dst), a.src1, a.src2)?; } else { builder.bitwise_and(result_type, Some(a.dst), a.src1, a.src2)?; @@ -3033,6 +3149,39 @@ fn emit_function_body_ops( let result_type = map.get_or_add_scalar(builder, (*typ).into()); builder.bit_count(result_type, Some(arg.dst), arg.src)?; } + ast::Instruction::Xor { typ, arg } => { + let builder_fn = match typ { + ast::BooleanType::Pred => emit_logical_xor_spirv, + _ => dr::Builder::bitwise_xor, + }; + let result_type = map.get_or_add_scalar(builder, (*typ).into()); + builder_fn(builder, result_type, Some(arg.dst), arg.src1, arg.src2)?; + } + ast::Instruction::Bfe { typ, arg } => { + let builder_fn = if typ.is_signed() { + dr::Builder::bit_field_s_extract + } else { + dr::Builder::bit_field_u_extract + }; + let result_type = map.get_or_add_scalar(builder, (*typ).into()); + builder_fn( + builder, + result_type, + Some(arg.dst), + arg.src1, + arg.src2, + arg.src3, + )?; + } + ast::Instruction::Rem { typ, arg } => { + let builder_fn = if typ.is_signed() { + dr::Builder::s_mod + } else { + dr::Builder::u_mod + }; + let result_type = map.get_or_add_scalar(builder, (*typ).into()); + builder_fn(builder, result_type, Some(arg.dst), arg.src1, arg.src2)?; + } }, Statement::LoadVar(arg, typ) => { let type_id = map.get_or_add(builder, SpirvType::from(typ.clone())); @@ -3079,6 +3228,20 @@ fn emit_function_body_ops( Ok(()) } +// TODO: check what kind of assembly do we emit +fn emit_logical_xor_spirv( + builder: &mut dr::Builder, + result_type: spirv::Word, + result_id: Option, + op1: spirv::Word, + op2: spirv::Word, +) -> Result { + let temp_or = builder.logical_or(result_type, None, op1, op2)?; + let temp_and = builder.logical_and(result_type, None, op1, op2)?; + let temp_neg = builder.logical_not(result_type, None, temp_and)?; + builder.logical_and(result_type, result_id, temp_or, temp_neg) +} + fn emit_sqrt( builder: &mut dr::Builder, map: &mut TypeWordMap, @@ -5039,6 +5202,27 @@ impl ast::Instruction { arg: arg.map_different_types(visitor, &dst_type, &src_type)?, } } + ast::Instruction::Xor { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Xor { + typ, + arg: arg.map_non_shift(visitor, &full_type, false)?, + } + } + ast::Instruction::Bfe { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Bfe { + typ, + arg: arg.map_bfe(visitor, &full_type)?, + } + } + ast::Instruction::Rem { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Rem { + typ, + arg: arg.map_non_shift(visitor, &full_type, false)?, + } + } }) } } @@ -5351,6 +5535,9 @@ impl ast::Instruction { ast::Instruction::Clz { .. } => None, ast::Instruction::Brev { .. } => None, ast::Instruction::Popc { .. } => None, + ast::Instruction::Xor { .. } => None, + ast::Instruction::Bfe { .. } => None, + ast::Instruction::Rem { .. } => None, ast::Instruction::Sub(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Add(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Mul(ast::MulDetails::Float(float_control), _) @@ -6192,6 +6379,52 @@ impl ast::Arg4 { src3, }) } + + fn map_bfe>( + self, + visitor: &mut V, + typ: &ast::Type, + ) -> Result, TranslateError> { + let dst = visitor.id( + ArgumentDescriptor { + op: self.dst, + is_dst: true, + sema: ArgumentSemantics::Default, + }, + Some(typ), + )?; + let src1 = visitor.operand( + ArgumentDescriptor { + op: self.src1, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + typ, + )?; + let u32_type = ast::Type::Scalar(ast::ScalarType::U32); + let src2 = visitor.operand( + ArgumentDescriptor { + op: self.src2, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + &u32_type, + )?; + let src3 = visitor.operand( + ArgumentDescriptor { + op: self.src3, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + &u32_type, + )?; + Ok(ast::Arg4 { + dst, + src1, + src2, + src3, + }) + } } impl ast::Arg4Setp { @@ -6437,13 +6670,13 @@ impl ast::ScalarType { } } -impl ast::NotType { +impl ast::BooleanType { fn to_type(self) -> ast::Type { match self { - ast::NotType::Pred => ast::Type::Scalar(ast::ScalarType::Pred), - ast::NotType::B16 => ast::Type::Scalar(ast::ScalarType::B16), - ast::NotType::B32 => ast::Type::Scalar(ast::ScalarType::B32), - ast::NotType::B64 => ast::Type::Scalar(ast::ScalarType::B64), + ast::BooleanType::Pred => ast::Type::Scalar(ast::ScalarType::Pred), + ast::BooleanType::B16 => ast::Type::Scalar(ast::ScalarType::B16), + ast::BooleanType::B32 => ast::Type::Scalar(ast::ScalarType::B32), + ast::BooleanType::B64 => ast::Type::Scalar(ast::ScalarType::B64), } } }