From 6ef19d65010164a7cc8408663eb189b64f44d26a Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 17 Sep 2021 18:31:12 +0000 Subject: [PATCH] Add early support for more sregs --- ptx/lib/zluda_ptx_impl.bc | Bin 31224 -> 31940 bytes ptx/lib/zluda_ptx_impl.cl | 13 +++++ ptx/src/test/spirv_run/lanemask_lt.ptx | 25 ++++++++++ ptx/src/test/spirv_run/lanemask_lt.spvtxt | 45 +++++++++++++++++ ptx/src/test/spirv_run/mod.rs | 1 + ptx/src/translate.rs | 56 ++++++++-------------- 6 files changed, 103 insertions(+), 37 deletions(-) create mode 100644 ptx/src/test/spirv_run/lanemask_lt.ptx create mode 100644 ptx/src/test/spirv_run/lanemask_lt.spvtxt diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 175f4df8c13942df8c53e9608518b38a9e6eab69..7aa12c831a7188b41031fcbc65c662a774f9a9df 100644 GIT binary patch literal 31940 zcmeI54_I5}y~p2^q)DKVq%CdJlHdu{7PYnkDh&l{Ak|^5Gwtdw&Q1uB3Y9-0KpWWf zo|6PZ9d&V5FM2x%+HLBi8&kaF?$(VzRk5;O)Os&=*nw@e)9TPwFL!o&e{VRy3i)GC zAA`?xpNBk!FMRU8zw`aQ|IRsYLYi;sJ#kF}A@c|!C2__TH{JUCop*iKeRJaAicL)V zJYt9=i@;3>r<@#3kWhUL})n1u;r9bV5T|4e$UmAqC zds#xR0ZmUHjA6@~W%s8X^uo83-cEnQ!Y%i<&G(+#qT%{=j8@YT#}Q8EQJSrC!kC-$ zWW}k{F~S~>BUc>hC`@flYweKPt)_%?+D+D$^wi|opTsempU3TwZC>qBZnw%~Tk;>u zlkG~C+AW4}8{n8+PMxg7s?;b+hxH2hotPin#+g~dZN83>GuvgF)@_%Q;U{J6u6-mf znafLSB?&LM`ij%EZDn>Sz9~_eDjB@~w^wjk?p`I)^KIm{cqnIVC2}b|zg0_!o*^Xh zyUCMZz=N5P5*692^zCMSb)2uM+|%UnH7XT#slH63!d32Zbolm^`|7p6Msg-E>Ag-J z>DQey=#F>lj+3Ml2K^|XbR;i{>~Sgfq$)Tl>F{M1`uL***E1HzP+CpC@r)YF3no@nW z;KA?9)cBf=V2nm(jqu z&<%mxUfoz{d_SLbqEr8xP4^Ddp=f;E=OoHHzt2(bbCye8E-(*=Q#2BAI0~6Qst0?= zIsK7wT|cis-kWs9raLpPKh+ukp+P^$Ck->4lygA)NWr;-Q@9EgdrZE}a-V}{Dl}I~ z(iwy9MVlU)>(sdZOsDQe@Q{~u+^l=Aj~at$?-=yw;BYx4^N{Q#+AGp7Inj}YX67!l z*OKiY$m+G^cIV~x4CHt{S^F)C?b4j?Jk#(%cDH3i+dvLE$ErWEs)$YXu}1ZQRaHgQ zfAyzx^-C_9$8r3;YMREr%xe#PXn(G}fJ!MJk_2e9q=I$S$9tJImt(M$& zFh-;09Q175FXa@y@?YWr8&i{*4sHYad-1@HTAJRqO6ZHhNk3&^8tQt3~&yZmc z7|0@rbA~OZwt)@ZOwQmy_V9%!;!VdqSzrt@9kOgV10`LgvT*N7nu`LaPt5=4A*UUr zX{b5bxjuRF0?sb&jO%@dFH{!4X*SO5sx&`LpT3Ot;Kq>-6aD2{Ix-swlt1~u^moS3 z?UFM8pg)p=#QkUJ4~pXWsVe3I`YY{mas#YA^I;8IU@ zDpu6P&zisw0RnRX)|)z30c(a6o-zi02oQLxetIqIjHh-_tMwNMI~A>1U0bYZ(kONh z%Rgl)gveFJ>I?BdH7i_LvN2H7$AtwO~Hc#2iPcX+BnL!YAGA*M#*2!yRA z3a6Pq>m-VLjuv)6D!k{FfR<)tKrp9g0023;O+A|II3$xc2o= zQih(L>`y-^88s6ECk~>4Cg+GlT3m2iUd}(nlei})`s1O11WH1$!1wi}W&1nhawxzM83%~SxI|*Pa(S(kfx{7r znZQHR8_yXYc)W+Fha(J2;K7W^d;PJuhq0FtYOh2eYVR*U4z)M@czkTIz2}a};c{Z8 zz58}~BtiDxxb3+-Cdl4BzdhbTTVs0bJy23*4Q{<>p3O`RZoPfCCBlWu%+@RZRj9qY z{yWs(CwGS0i7B!J=-`Qk29g2JJ8Vj8QBkC|FzX)S;#2Uurn%Gd2`8RZij-Q4r zx3n0$DC=$Pzp=nS1Wm#+;Slbb(m4xKpv_&uwD8<80k=Si*Z z6c;>kuDQpfGX_tb_iFu8dGN%!Rpsmro;XMTn5Tks<;>ZA)rLVv8Z^6~yjhV|96Y<9 ze?N@LE`1o2{}IOIW>*-KdUfqYu*v*)4XUJIlUcck`+`kwe$Hod1)Kcfe>4hpP)lB! zdD+`z!7cg5`xaeJa7*sEyVauzZplA~G3ovyj7eKT7?Vr(hML^BVu4~|u*u)8c-t%q zUMC;fq$>zES-HPe8WU{tiG{T+6Lbph|EFZtir|*4xbZMA3+nF8d-j_A!7cf;F^tKL zD??4baWss{A3hw$Uvq>HOf9T)QOH;6_5L(nsLRxgha-%Ly?;eYjqO?}01!iqg$e zHUI(z1DE18RSvkrGBbo>DGZq0^;~>)Jgq(5r*!n2@8Dc;egwUdGE`wQ<%vLWP$sI!M8ecSOOQq!h_x~OjcWy zGz6~GryhYrKe+y>Y$7S17SLScF>gH$E9YuxF`9OIQaihB!4`j8%*t=QD6>fRn+^Ld z3)+aLoVGn7zkze*!=M>TPIq`*w=;psZMB%Z18w<%4WzpE+PHU!dTJ*lBQ8IwpYA#7 zb{gjeig^-YR|B>;U^`^U)do92KTs+f3w#dP3UWwyyL=ATw`^#E>a1TiZd6wr)h8!Z z6INA*=CJ;RK{sO1_uG`bZwq8CQQ+?8_I%|}c4AO0(sm&qXs7-gopgYX# zkJ#k@<)@nvQ(F;PJp-ow7E`Mf_B__YLC?Fhk4dw;2I5|-@Hq;7PKT$yT;VFFTX*#x zzGX+hqyCVtK+?OV5;^Q&nKB<%|9hZ*!I`t)}*Nz|}GKVg$^Z}ymm2C};>xzHfz z`*TiNvRmQTfwmW7AyeNT@S~*LY|eIm$4QOqY_ICHRrNPxO5baFutTLnu1-PmWY!fc~(~GLJ#p(~n_e8^ z9w}^MWepV`g5HN@o4{v*Z;!*b?4|YUkBzFc6RMLi&Kg-lZU=QH6|k1pxMeZU^a3`8 zZ@cflS2uNNQpMEM-)*CGB7W-~{Wof0Y z`VMxt-C0*rQ_ZGjtTC=JvZ>|u^^Up?YuDaUQD5FrvZl1AYV9^#nZ2g^W{17n0@J*q z-oCc7a(C6$4rk3SducuEtSP;E-5RLQrl+N6UBm87&svwUGt;zgUB>#d^wPAnbs4s6 zGVSZqOl9d=J4-W5GBVQE8P}&@QBhrAQ&nBzsHwMC*R#~%cPgqJm3G#$)^gQV?DqV; zyq%Rbw)(P~hLTFyTuxE;-rUBL;xsq$zMzz_O8^wMu^e;jkZ}c{KG3CnIZpEWkR~oe z_F|kREh{5Vc6!LT{eg@DhxKUOKm!0JfU<{DQuvr7t^5A?t3UqygMClk`}EKYZC9VI ztWM}_+}7l|sxGm#t`WX|bJhL-dgBl35-;br*-D?iv+jnJtLh$0yu6h>0rTAe?LR-( znF{kr(NySYhrSzt5t}tU;C@H$%5f6G`2T^h-OmN-kxNelyowCvIXZWOkn{K#MH#jwr zm{(zqq6?p52sS>m55Q!Ey+%(COicd(kf79 zjp}a>^49eDmPESM3p=N7XN|HyHX+LG zmk{Oli?Tm!l>N=M-`AUObMyCWwqKOzhcU|Y!yILQbMyD>jhCpmUzE>pbLZc$*S~Y~ zcdq>|T<^lp3tT^2=r#nQ`xCOq8M2PNxH`{hWt8ScJB+;!i`Cy(?o6V1G{butsZ~bQTo9jQ=7m4Ef@{RZ3-1-^a{+nArquQQx z>t}S=m$~y_blY?8`ZK!iId}aT<@WrV*MszLKHz>Vmj5|1?u>Esv1alxD}CX1t69$J z*B8Vk6Pbmyyy|7OH)*(ARbk?89@uM^TBgc=<`V%>Rbk?89@n?Jd&$T~Ye`dS=eY3p&M2;Vw^(S)t=&V1H z<40%xi5x#V<3Doz=&V1H<40%xi5x#V>rdqPbL~&ppUCYKo%TQUbFwLL|7Fy0D+jq} zmL~X_@|WTldX%J3LrT^el-Dnk6+f7#^uFq4;CcFvvcUVvTG&B)uSNH$zv})ga(u2& zJFD?Y|1A6OpJo4@v+Tcfmi<3E%l;poW&d;S59`lY%-^WA|LVvi?MlPogsZBgf~Wvi?MlPolE^M2^qR_V}M`e^`HJyZ!$#%j-|%_|aK^BFB%; z`V%>Rbk?89@uRc;M2;Vw^(S)t=&V1H<40%xi5!2f{ptNz9jQT@j>Zeby4}b6q z$DcY5y?>LX{*1n5k*rtVlkbo$jr-xZI#*9P;SPd@0Y*aEFPiBCSiz$CeXUvY3@p=33`a&)1~y_Qeizo=c3 z&0jgRsK>pLU+qm8k!pU-^3fwC1|C$^65`4HhFgN>rO2$l$P_E2bQ=zcfv<& zEoqn5@Eh8e_IR53oWn~;q<8Zh&o7Ye%nN%sd`H-#`Gi2f97xZCrviL#~1e|XC9!_@o*0^0N6S^Z+nA#(N3>j(`(Eb6%1JF( z?e;UDuGB8`yWICPKVDV1%-ZOFka=uX>oR|n`yu9~RWMuv4?9eP*I^7UlwbRM4;bWMPM`8@rVmd6J^PnCWE7_)*9 z`t$TJ&$lnn-D)IzKjFv}5o-c4zbxqJyDTACva5cZwnk1 zxjJSl4l42-fuka?5I8FG2L+Cbd_dr+$R`Ajid+w$oI&$KMZQ_!sL1yS92NONfukaS zRp6+|KNmPE@~hy&jQT-EUMO%>z8bCx=y6eze_!CJ$nO<6D)K)F92NP5z)_JK;199raZ!

V#$iF3UROBv!qauGo;Hb!t3mg^seE0x|G&n9Q z@*IJqBCirSD)L7Jj*9$0fu}=!ROHN}DUOOfL*S^$?E*(d{x1SYMgFqDQIUTpa8%@1 z!>0&QpQy--1dfXQK7pem9}qYy@=pYgihR}LsrE-jUMO%>LB%*$fukZX z6F4ezm%vexw*iMKLr{?eBHxE=1^7%ndQbgn4g6kI)Ku9}W-D^kyNc+CDHRpj;6s** zoc2na%U)JgT5l`5!(MH7R+JW1RF_65Z7Z#9sBqdvmWJ4NTbp}jRv3qNpc^k%7(I(Bj14WSFt&4`S1x7)J&P)go<$W#&!P&WXWD~x@L_GRoQPXO zE?O8pi;a9?Y>0_`VQgHqFt#)8!HX6~&!P%rJBuoe?Myr6qBhX8sKV%3RAKZisxW#E z%x>ZQQf9xD^GoE?=vs8_3&R7?bYB_!!uYs&X>9Dk6u)Se=v!QAY;JL-vAt>Mi1#h7 zH2M}-8hwi^jlKhYIkoOZ?i_4xQH9a9sKRJkRAKZS=ozt|MHNQRq6(vDQH9ZSpjX6t z7F8HMizuh>&Hb4<9Mb$D5h}025q{F7M|X8 zxE$EZ%8GVoq+h}%{#VzS-iV@)aiJx})xbOgV_aBZzuMLGHWhu0!0pN<9V3v3_!^i; zVBB9)9^z|Ye~7O!V>XI%g?UU{C&hJ#=n9yH=n9yG=n9xap!cShLUB2Wu7Ej+u7Ej+ zu7EiN`ub9G5M2Rt5M2Rt5M2RtpdBYHXyW=obOp>obOp>obOp?TcHAZ9Ai4tPAi4tP zAi4tPKs)Y|au8hsa}Zqta}ZqtbD$kJW7W@Gcd6&Qh&3>aOFBmQ^W9e+&y;(G@TY(G@TW z(G@U=7=kPogT|e4srossiR9x&rot=nB{mwBtls!W=|bz#K$Zz#PO>DA{Q*Y6y25 z7kDDvV4_ddMXFI3zD7k+N%$J@MA#Zm`)*o={zo8dN-FB%SNMNK>}x8*MhO(oTo+O} zTx45meZ_8jm96g1&@*|R(|$)$X=P36oq2p1{j1y9Bw+$W#^e8 z2Ms&enoF(Q0drmJRht>M9M^V%A(xOmRMrxa0RoztQEMJH+xC2a!}B$qi$0u2|2fP{ z_`rL9zwh(;{%+6n{ALDIbxp@_oJPnbLP%Cn^z1uV|0VZ{3k`RNwas2Gh?zvxL4!|1`BZzgJmp!xoHqYArZKu(tC)8MFuvj^T7x2@QbMQQR=-A`653w3Jm7X6S^gaRO4NSwl)ZI9({8;dzDgTxtc%~$ zmfG%aRBF{=~z- z>18bumYDbMB_$$u3i+)z(sG{hQ;47t^?#3okU`L}Uqp=j8y~8{Z=mhDqXsc$iA+Tp4G|*8JQ*VkS%e$Pzxm+ zbz%E-2|IO~eY&tNT|%`m`H(HCp*eZCFribpjF^c0?67p5Rt7B&y5?HxhcLO)p?Oso)?`aMBuw7!pdQ+EVO6^1S};bVCGED|v|VVCG>QL!1FTm@ zCf8Y(k?-}6I}|j%o5@JY?0$HB8+m_fMNZjs7s1znPus?b0 z(JiYHCf5mxg!DDrBeC;h z6S10OwgfN+nRe)wor02PQou+B8S(NqvLLLglI#esTt$wEjsK+S31ATJ71qh(6&>_f znMHJ8Sk(~yQKIdx)6gGi3#5@r=bFX+qg675gxn5ub27}$kVJSfC3D>Z$kfA!?U zj52N4O8*^`1+nV9xuT<$y#gq+%akX9Sylwo%{h6U-zBBBc2k#pCT*fR$p)>YLN6(W zpXJUUoCL-O%)=5!0&}Sto{Dz<;3V)=>DXH7mprv$Osy{<>}WK-tVl1ZkV`gniqA6? zLgaON*~R$hwGuOyEO(YHw$K7aHFP;BPUk9?!BdO`zQa>xa{3hg4l(5tlQV1)krZp` zv&BSGYN3Tq5V=!)7P1(PA00>!Ee|N`6#qdkS=TH5qE520Zc0ndJ#VE8hj0GyyU30= zM_OWb`}JrEVTgsup{XszB-E!3iVGP6ziWvXLrD=ZhV&4{s+TLn zpnwQU($kHc6q2Bqhg*W7fFG2EUV-mR$)@{;`ovH`KtvWmL}Upgx*KK{2?cOC;-@9> z@M{XTsGr;0$kM|R0Yl&+=oL5B1U%xwUaY&lES0;xFaE>bUgzH609Shlj)~!vda1o_ zkJ|iP?0xj$ffRv@y)A$0uA{9nw)LLNT9@wHdi&puk8*9jtq+F5IqcHb)Bn-k-V=X! zxA)yzcYDEN_v1#qQx^=~c4>Efqi7KLxg^p)&lazBb@*Ctrx-f;(w6xls2j+oDc`>* zC5&;+CFjOGp~$rj|0-T*ac!KePd%9YuP_fLD?ONeugHT*lhnho`G9pF4m4LieG#`H5clDbOYJVDjG{Om?MvFu5qe-DK+R_3q=R?D__` zal0bvDfa>S_TEnSk#^H`4Jed6KO%Epb z&u|};(?3mhACo^k=wVF05a>Q8TgvL)C(fM3?qhPr*{9qm&cFV`NZ?}pl8N(*LOEb@ zoj5mcu_>cnC(h}j8ll*A;;fbyH@Qxn_V-exaE`omcF$kdCJ?&J?pN-XBW6=#_VeHtN)QpWG*dvL;Q99 zy{;|!(U&@9l50yo^kkJy;@Xm*doXGJw+EAkG!G_cY<4%fb@mj=R9BM+XZLIUT-V7L zmn+j;O%`mg5>9qC`SR2vM&NP^Zuw<|bhc|t=HJoEid?#T#m3E=8rPP5E!u<0o94Qk z{OG6$laIgP!Q|SIV!f-$fTC3C0#}nIceSyTTui>wGdW?OtI5xj)DpODy>#Lnn>Wzp z+L8zUU8&4)ZOQI^HNrsGmRvl;-Q<>cJ(zrHwFi?o1-hI3UH%kFsEf%P9$MJX2wl4S zx&5~&b6icHd#lP8gc@ zZgTnw4k+Bcs>DH^uLV&elRY za$uv^;rAcev_VWr&?SdH{z>goaN|d-(uecPIW6pgS#(223(wFf&MWr(>=i=B37iF; zm*VT!nbtx90TD11I!!)$AXp})wa1$L{hw0GwQ$po4|;9?{ik(F3~Z0!gI?P&Da-}#f{z?IPLqW(3 zP$t&jqHOI`=1vnNSLrmBj_OqBW=BbFQP2>Pjox*~2Bn6#(ESl*ZuBH)F_&?{ z6Lw@2_TpN5c`dsDwh7MmO22E8o{1J_wb3n%(Txmck3re4R<^RLc7yoi8hXP#dfS}P z=+JD}X{v;9f4&$F`hzv`m@sjlBj|7Wc2kDE*kmislbH2%o2j(U9(wd%*;xYj{L+CM z=?Sf@kCC2%ESM-&Pjh%nO85tACG0x&^eSOL<+LC?S8)&QXe`?a+Xv~hxkU7K*pM>U z-m~&tjf}QH_@mCf8@1|qv+7AfI5n(R4I9MwJ!R8$I1=~ilA%FC%5hM>cAx=ayFJr-!9y^s=vd#lwtjFq&@AoKKe)%B3fwrSR)N zdd0A>F_11DiH__)W1zcEU41jps9_IF)v8uO51vwAm&^Pv@sKXLQ3x9@2^|@Sp!Xr! z3hQqO_bTQeOoo6V{EXl7hrr)k(RvI@L3$hCgg=?4%#^RFv^@Yrm z*hSHcqM4|?(o$2&vc-$nP*a%n1mad+?a&8*xYzcTwLtZoS5t-OX6Y;H^v*|mS}Qf5^}TS zvtnbH#6>TSxjw(Jbp5)*eAD_;V__*n4c?o-&QxGzbc=QK=QH=ErljN+tT&YAtS`$d zFcK2!Z@H_QA&E<@WVqGeuQLR|-PMex`RwABE@AqGAef62wI^L;Xc&4CmT66iT3fugW zu}x5J#v1@OJQ@W$jdO_)zJQd0mV@;k0cctrdsGb z2aMQ+PKR|!dAL3pHVR>yLl>WNj?Wo!C-l;j=`ieN19b4qPNqxG?X$3w@+jx5G}~cB z(J`4}+Ud~1p4V<0>~{IBg$b_g=~MPF$^irYeu@omF(^Chm2IN~N$@_bw6npBqyskT z5_iI6gncnv5=_h*hrm?Rqtn2yU@h$I!SdipDVBZB$i6g5za5r-M`V&)G&^+(jlyJQ zbe~B&2%B%6;;(z@UL346l1{pl_^N{&^9{{(OAaf=&K zZfK=eai_u(?3V@g=vw@snAr>Bd>9yB@f6j*jmP-dUzCsi$pd_wuVRaD<1s$=7v*Dr z6Ycl&=G(;l{h96Os)6X)N{j2GAqf$L}80=k(3 zdo4tCjF32zy@OHgC8D~?ixm1sbyS=FU`n*L$R<+zX`@AY#hw1v_eFwQqukFBXCU$Z zRj|JUb@5tSy@U{WS(*fHBFxj1bfl8L5eFUyhLG_dPaiG5rpL2fR8cDzi?Ki{_JHJ^Xi`23*1#^(piHO}vv z_us_&>D%+)#Pz3d+w*76clucbxE~8>UPY=`TlIB*a?9K{?I(c&ZKSKn*2;Lg0gFlB zV6(vMMb3M$O-v(%aOOmRhkG!2AO-L+Fr%M!xNLk67Ysg)@8Jf^r|~`9Q28{zhYJ9o z#`kc-@M(MxCkUU$_i#e+Y5a-yzx?{(qxNsT%IlBU_&!>HyvFy@`r|dekJcZr@qIM@ zy~g*^`r|dekJcZr@vr*$cfY{?zpQUw{ZF*NbA9{URbGF*#`n?s<2AmI)*r9&eYE~~ zjqjuN$7_5ajeoE4eYE~~jqjuN$7}qF_P_l4;G^~*Sw)_8-hTzbt6(v8=dFG*WMdDr z#QAnI#^vo~L3=-A7%|>|wb6Smx<`Gv`!DbD;pOeC8Xw+nc2(oU+t03Qe0Vwcs>X-6 zXI$0z@OHJU8Xw-ic2(m~v_IWH@jicj)BfLI<@LvVeBZ1;-sAgb{qY{(H|vl0_`Vtc z-sAgb{qY{(H|vl0_*Z@W`(*v}?th~F>H2ozDz87@yP*NzFB{~$M?

>&LM;KAq=<8PgPM-49 z`xLidN0y3S5GdA&Y9}e?i4MY>7Yz)&fANy~9enCPKeqnp>u;mSq4#g1?RN)*kfr`x zQcZp{gC4)Fex7I}+dOkYh3=(!qCcmk1vx)C8hDrqvS<}mEt;C;t7{LJ*QLF%xBcM0 zm7g{qz0g#*fQE4T8~wb}*!b76H#*}(E*jB1lc3qIfFGBNHVRs1##K1Dg9xDvyq2eb zdQv438Rcl9@=YXb4YDw|RZfmQ#M=bKA?PfF^v%{>x zBvSm&RI5H(a@*}WqzOJ(YhXs-8yX^F{fgk{%}_Q*Dvzin-v=ss+bn*&f;9Rg7Eyap zM(=wT@wY){eV4`mh2YxWZc9K*aHIY+OVH`yuHMfr(r2Y3`qLKqL8+qejAd%SRAczs zGHr`2qwk-V8T(~s!w(ikx2(4BN6Rd$ywNb3RlXwc>I-DUKah_Yq%8BjT+y#!BX&*E zWQDP_+oxn?g|l<5Q_cO?vrz}9)@ChaZ}@gztcH259Ty{hMTN}FFKMl;EO(}($6@zai5%C z4dtf5?~ii22!5sye4hSH%i{x|r%L}=mHL< zMLx`NROG6FQ9r22S8yB^`9_YTBHzt%ROIh-92NPG97jbyAKuJF+XoeS2FFp6KgDrW zp3HGn|$5D~nIF5?Ef#ayi2RV+4dJLGTmXLu zhjLWpu^dN5Zsa&B^51YA75TdyM@4>tiu_5AqauHu|M?8YkCufph=S7G$bt1xac+UVysNQo%KI43+m&%0!a{{{xz^a_}92(Hu7?Xd5l>n`E>{H3YZ1&3YY}%3Ydel_ePgOemU^2fI0B4fI0B4 zfH^q(dOSJsu7El4u7El4u7Ekvj^h?Ie*M6^0_MQG0_MQG0_H$FZd^I=u7El4u7El4 zu7EkvjvH4FyenW1yenW1yenW1wBs&W_2c8ld%p8p1G5;{F+88|E_aNJmk;C1gLXH+ z8kh(F8rU8DYhZuS4&>(wj^W&53Y9U=qA5 zU=GgS8&3|rD_{=1D_{=1D_{=Jo*PdNyenW1yenW1yenW1wByFEv%JO { err: T, diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 39bd07e..15dcdd1 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -9,7 +9,9 @@ use rspirv::binary::{Assemble, Disassemble}; static ZLUDA_PTX_IMPL_INTEL: &'static [u8] = include_bytes!("../lib/zluda_ptx_impl.spv"); static ZLUDA_PTX_IMPL_AMD: &'static [u8] = include_bytes!("../lib/zluda_ptx_impl.bc"); -static ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl__"; +const ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl__"; +const ZLUDA_PTX_PREFIX_SREG_CLOCK: &'static str = "__zluda_ptx_impl__sreg_clock"; +const ZLUDA_PTX_PREFIX_SREG_LANEMASK_LT: &'static str = "__zluda_ptx_impl__sreg_lanemask_lt"; quick_error! { #[derive(Debug)] @@ -1015,25 +1017,6 @@ fn compute_denorm_information<'input>( .collect() } -fn emit_builtins( - builder: &mut dr::Builder, - map: &mut TypeWordMap, - id_defs: &GlobalStringIdResolver, -) { - for (reg, id) in id_defs.special_registers.builtins() { - let result_type = map.get_or_add( - builder, - SpirvType::pointer_to(reg.get_type(), spirv::StorageClass::Input), - ); - builder.variable(result_type, Some(id), spirv::StorageClass::Input, None); - builder.decorate( - id, - spirv::Decoration::BuiltIn, - [dr::Operand::BuiltIn(reg.get_builtin())].iter().cloned(), - ); - } -} - fn emit_function_header<'a>( builder: &mut dr::Builder, map: &mut TypeWordMap, @@ -4815,6 +4798,8 @@ enum PtxSpecialRegister { Ctaid64, Nctaid, Nctaid64, + Clock, + LanemaskLt, } impl PtxSpecialRegister { @@ -4824,6 +4809,8 @@ impl PtxSpecialRegister { "%ntid" => Some(Self::Ntid), "%ctaid" => Some(Self::Ctaid), "%nctaid" => Some(Self::Nctaid), + "%clock" => Some(Self::Clock), + "%lanemask_lt" => Some(Self::LanemaskLt), _ => None, } } @@ -4838,6 +4825,8 @@ impl PtxSpecialRegister { PtxSpecialRegister::Ctaid64 => ast::Type::Vector(ast::ScalarType::U64, 3), PtxSpecialRegister::Nctaid => ast::Type::Vector(ast::ScalarType::U32, 4), PtxSpecialRegister::Nctaid64 => ast::Type::Vector(ast::ScalarType::U64, 3), + PtxSpecialRegister::Clock => ast::Type::Scalar(ast::ScalarType::U32), + PtxSpecialRegister::LanemaskLt => ast::Type::Scalar(ast::ScalarType::U32), } } @@ -4846,7 +4835,9 @@ impl PtxSpecialRegister { PtxSpecialRegister::Tid | PtxSpecialRegister::Ntid | PtxSpecialRegister::Ctaid - | PtxSpecialRegister::Nctaid => ast::ScalarType::U32, + | PtxSpecialRegister::Nctaid + | PtxSpecialRegister::Clock + | PtxSpecialRegister::LanemaskLt => ast::ScalarType::U32, PtxSpecialRegister::Tid64 | PtxSpecialRegister::Ntid64 | PtxSpecialRegister::Ctaid64 @@ -4854,21 +4845,6 @@ impl PtxSpecialRegister { } } - fn get_builtin(self) -> spirv::BuiltIn { - match self { - PtxSpecialRegister::Tid | PtxSpecialRegister::Tid64 => { - spirv::BuiltIn::LocalInvocationId - } - PtxSpecialRegister::Ntid | PtxSpecialRegister::Ntid64 => { - spirv::BuiltIn::EnqueuedWorkgroupSize - } - PtxSpecialRegister::Ctaid | PtxSpecialRegister::Ctaid64 => spirv::BuiltIn::WorkgroupId, - PtxSpecialRegister::Nctaid | PtxSpecialRegister::Nctaid64 => { - spirv::BuiltIn::NumWorkgroups - } - } - } - fn get_opencl_fn_type(self) -> (&'static str, ast::ScalarType) { match self { PtxSpecialRegister::Tid | PtxSpecialRegister::Tid64 => { @@ -4883,6 +4859,10 @@ impl PtxSpecialRegister { PtxSpecialRegister::Nctaid | PtxSpecialRegister::Nctaid64 => { ("_Z14get_num_groupsj", ast::ScalarType::U64) } + PtxSpecialRegister::Clock => (ZLUDA_PTX_PREFIX_SREG_CLOCK, ast::ScalarType::U32), + PtxSpecialRegister::LanemaskLt => { + (ZLUDA_PTX_PREFIX_SREG_LANEMASK_LT, ast::ScalarType::U32) + } } } @@ -4899,7 +4879,9 @@ impl PtxSpecialRegister { PtxSpecialRegister::Tid64 | PtxSpecialRegister::Ntid64 | PtxSpecialRegister::Ctaid64 - | PtxSpecialRegister::Nctaid64 => None, + | PtxSpecialRegister::Nctaid64 + | PtxSpecialRegister::Clock => None, + PtxSpecialRegister::LanemaskLt => None, } } }