From 52a392c294116a6434926443a49970503cdeb250 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 16 May 2024 23:01:07 +0200 Subject: [PATCH] Fix dumb mistake in label codegen and make asm for surfaces non-volatile --- ptx/lib/zluda_ptx_impl.bc | Bin 243120 -> 243120 bytes ptx/lib/zluda_ptx_impl.cpp | 70 ++++++++++++++++++------------------- ptx/src/emit.rs | 2 +- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 5c073f5738071aea1da99d65669adc05f69db151..f522728c98e45c9fb83e90c7773c2ff2d8d9e987 100644 GIT binary patch delta 4128 zcmdn+nh%8jr`B(?T41r3xub#UuLfh%f(4q)Uh=^U4j8j~T{dsvkYLnMOj@9Do%vLq z?Sh10W-r-*1q+y2x#X-Dtl7-$_0MR5h9#@l{NM#O&CFgmZ5JfmXYQJ1y1;{z)oWwG z0-v4CT^p?zD3~*QeGOiq;K+Jvw#5R5r_85f>=wL;XZHFXu%N+DkhM$DX2G9>%%^S} zFL>a}>Qxi6AZ8--sXcZJHoRr-sxVv7AkOMFEpWk{Q!yI zAmB5zSFrtp2f55%9|9M=kl%bJO?5XT!}JYD7)_?@*)n=fFHmBQf`+wVCuN?{tfz1)?NA1aJ&R)ss`QAURCb{>qc;U>g+F^WNzB0CA>$LXRL zj1#9%@?q?Ngn`L+e_zHrMn(pZ&;CPAu$|_~$TNL^7-Q)4oIuRrH=90T7NgMgbwP|1 zAcjCKNl0T{K3zYA@g#DRo8GU?XfoY9gE4RV(@@3<&`<*<&;03<;f$+MOaLV$Opk7# z5Xo2xa-y9F<0;1JYXTT$S#R3T(S@e;I4{P$=`&&(*I*bm9p>ZD(_`Ei)mY2;)48E( z-`|&U&-D8VjK?58hs0^+^qyo!MTirT!oVj5(`-Z-@JyeU%GiKx_x6M|#vDehf$$4r zxXE<6a!B@+oPIZx@h~IP_JvuDE1_`)jb4K)M#t?Za~NGfUgyhWWM@Lr49k?z#9Rl; zfE?R@7cweiF=lQt*qG^jd5n$QlS>)fpfPQ-U9Ox_4XUATIyhUMuRt?myFnGCMBvL~ zted{Vgi&(4Zw+Gw%!ui6UW}a6?I$rhg7TK+^w>JaBjNTrwiZO?AdXEMp&-4i`jG{2_&g*6@neNxhxDICO0cj?V z>AwpZJEn8DGfszyLIQ33gLcM7sCkXsA9o^obY3?+FV=0h?qw8*I|)<{ZlBP{SPKs5 z35=gX2EYmo&h7S-7)7CKp&oiT8MU0?+-@@!oI*k2TQ@ym72}ub8Z#KDLOftQ{lWxB zp6Tyf7^9{q&IA>m+XZLAL!^Fs-BiY+=^Aqw&p=X4-SqAajK$NR&1IYn(QmSS-8{Tb zU$79D)BTsCJN@G_xXp;T*fE`FC8!huJ0BDaz0eRqc6#F)u+t6KGAcqt1L|~_^^7N= zPT$_Wfsq3$3yu)4=|Y(=dz?`Rsurp{_amb)3$a(cTfL%gYg_A!}JTM8SA%mzhi8H8vydf^t107 zbEX&GVcY^W1r(Z&({n#EE}HIsk8uV>6yXt*?Q!=RT|ohI;WT3b<8+ZnjLRXRVmsaM z9V5?l+pCN*(-R*v?tth7hm6DY023yb>2sbi?nV*qyU18JUEvoa%k+R}jF+IPVtc}K zM4M#$hZl^RP(RsipMc$id*9&k;P$ic85LkAfIYW8_amblR0PRm)8p3*6_LQ@5pOd$olvlvqt$mwq{_A1&J^!GHsn+CB?K4 z;`a9K2c(%CQQWfKUY2Pd$OZT0n3R|p8MZSjGUYQeI&3dgV%mbDa=N`N)3NEAs!UfH z8MgmZW#WUncgysH8ce$x9YF52Vw}ERi>VJ{8@TX;xOKXN5!0IK8yuKerhDixT?8dd zxLeojGJ#72M}3IJ8x5FjKo)N_VCtH_L4~Pg`WqvrJ1B7)5XWS)J-~!X1nx&EGp1d* z9o=h*?C9-`L*#u4%@Nk8@^(Kll<((iw>Zs4OWqPIy$j8$+8Zd2|e%+O+ zjnM(bhB`OKo#_UKb3xg)X1awZ(^QBQxHRF}&hN!!5AuYgK9lYAgc>H%>F572nr*-C z&EyG+6h&XA_l%58+XelZ449BLPUrVxa-V)Gkm)xQ!;sF$cV;1CLYG=ORJc!Gcs*&Ol6wNI9;fgX*R@#?bA)`m@svlPG4Kkw4ae- zIy?<*-`@aAYYy8Tr!#RgPETlNnh7xhl4y{8w6O)Rj||%h`)F%77B`t~*XxDERAV35 z2#{y^nWneYGHF7b(mq{vG8T=d)8|fM0(Fq#o|(SCfoa$D)M-q&86Dakr!#GLoX(st F2LQ+nRyhCw delta 4128 zcmdn+nh%8jr`G?nT2Qf(xub#UuLfh%f+L#DTw8+|$k?)S{V{LgkYLnMOj;mvmznFH z?E;HX=2O!H7I<*8dX-o&5ZK9lO2~M@3P)Bi^N=sl!W%k--xpT&(_(?mYv!&Cb_+O?nZ2q47ZmtOvU2${-!%E*3!z*pu|bIcY@k!S4^4_c6OmYM6T&4MGY%v^<`3%-Q2a_zNT zaOEd+*KYd-9)--Oih~v?Xl_1}rn;MvW%`CAj3(3dY#F_#7br1aVRV|lFpJS{I*$q? z&-Q+0#%M;ygVP0PF`7*O@Pd(Z`h-YE$L*P_j1G*90@Lk080)rcs57=Qq8Jmv!o)Fs zP64Cf^iCy4f$5=|jO!t0*iLUqV&s{A!Hv;$`T=Ps*6A1Q7~7`vX*14&$RpV?eQq&h z#`K9gj5nb6Oiw6fWZV8um$4qzvIX2s9McQRA(rt^7cpSm05J^NG6`NLp6&jIjPh7K z^VW#b2IQG@evI|oSDG+(z|D{IV)UB6!Hn?+rhz#kOnlo#Ef{6t2KxIldQJB`&d4@B zx{)zyx}+7z<=fBsG3GK(kGEl*1r4F?dbW&mP)*1#1O>+_!@3PoEM`QR4KBPKz^JqYQZ>h z`XnF54oDc7Z1?wNtYc&p0Qu}c)CAjUo{T)x_lGftPR|L%41Tle6J{|AOkbY5h)COQZUU%gaOa=X{n42$aZf}NMp=l#2N^{AcmVv zmn(;4Ps!7elmyA1>|+UJVtgV6wRp7xIMX)u?-s2Cfnu88P%W~>ZXIU)%glEBeok}KCjyiAzY~NVVs0y{hWV*jEBj=`ytO^Lk$V51V$5Nj-lM_DGkrn}qbSU~^ST*Jru(%ru7jC+K$?kT z`tL%$$*AQ7=XRT^;1mi8p@3D4U#4r!V4Mo^fbH}P6Bv1>zi(lT znw~flRCI0^oCOb&`ssC38H=WC%waqONilWPyEiZvPk%O-aWX`|$@X>g@H&0LLR?Pw zUyAPZkIUdTBjRGmbe@%3#++MjE6yxAZ$#i-_79;0$xx~Uf8P`ArkWKO4gJg=-K6F#e4lq84Xabw!xP9(HMki=a zg&4D4?l9v-Mn;zH8;&r_Gr^J;DA8``JJKR}w)!FZ05W%`BFjP={O-!Znp4FLIK`q}r4InxX8 zFm8dG0t!vX>A4>n7fpA+$2bEbitvcZ_PG0uuAl(9aGJ4zak|JO#^sPuv7PSsj*(}& z?N!E@>4}dScR+N4L&jlxfC&@J^f^x$ccX~*U1Thqt^g_n1D-Knf~Jb?3C|I2lI^ zz2F<8Hm2tszhm=U%@4-)SUm^#m%(pFA5e;BRAg!gwZu54^ZaFe!RRzy(4Wa}`T}z% z(diDkOcvW4{xRAzGCEG5$irj;X_MK@GI?!vWr_h6WcTEly0+^uG0lM+6~Myu1){ti zROZj0p3TOz2qJ)_AJpFB+&+(;DHv3$-IHS~W1ODB#k7HuWjfeF3%Hp+Fgk9Z$is97 z)AH$WjTkw%zvE-_2WhqwVA2BBJ%u(*?bAO9fo$LY(3WXF(^RNuwob2-V%i6Bd;9hS(oBvh zZrN@x%QO$0Y zW%@x4rrnH=Aop4^PT#J@)CaK*T=+rUI^DsDY0dNv4ooc5J#?5Zf|4cNt?PA}z@>qs zKE&dU223^}i#HlDbxq%(!c;Q-jSm^Mwn?#k50=m=s%og3rM zbOXb=pzK;R-NKV;Dntrgn(%Dr_hPaKdBRbj$#!}|4U_2f^M4r4wqN&V@&pwKioQ(m z85y0n3;Ht|Fd=K4&hN$KKK)c6({Cn$CDZxy811KXM==RaH;`wt-X0vx1S(FZEBZ3E zZ&wIq$^hj|L4T(1=@WJ_vP`eZW%@9kBZ6rorh)N?8Cj>hM>2gvHFbgjlhgKdQA|!C zHwnfty@ndtF@5eqMxN~xJ8D`tV#U69kydwo4TP!7aT(+d)qJV7q|fW-*E3MMa* z5siIJJdD$qRxw>?blTpS$~2X6x==0CY={fnr<>L>Vd^%WzP6reKO@U@cpBKgzX6oi z9Jf18XX0j@p3uxR6Ji1+(IEL~V+&p%8MYJl(bjG(ZZg}h*9(cM#y+qSAkXkKO>e1X u(u6preY)yoEE-Lx&z-`w8`(3{_ct)@nw~n1={BQdyW@1G?T*u#^W^|P>ty2q diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index 9a1b29a..e564810 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -232,23 +232,23 @@ static __device__ void image_store_pck(T value, typename Coordinates::type uint value_dword = transmute(value); if constexpr (geo == ImageGeometry::_1D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:1D unorm" : : "v"(value_dword), "v"(coord.x), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x1 dim:1D unorm" : : "v"(value_dword), "v"(coord.x), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_2D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:2D unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x1 dim:2D unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_3D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:3D unorm" : : "v"(value_dword), "v"(transmute(coord)), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x1 dim:3D unorm" : : "v"(value_dword), "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm" : : "v"(value_dword), "v"(coord), "s"(*surface) : "memory"); } else { @@ -260,23 +260,23 @@ static __device__ void image_store_pck(T value, typename Coordinates::type uint2::Native_vec_ value_dword2 = transmute(value); if constexpr (geo == ImageGeometry::_1D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:1D unorm" : : "v"(value_dword2), "v"(coord.x), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x3 dim:1D unorm" : : "v"(value_dword2), "v"(coord.x), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_2D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:2D unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x3 dim:2D unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_3D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:3D unorm" : : "v"(value_dword2), "v"(transmute(coord)), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x3 dim:3D unorm" : : "v"(value_dword2), "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm" : : "v"(value_dword2), "v"(coord), "s"(*surface) : "memory"); } else { @@ -288,23 +288,23 @@ static __device__ void image_store_pck(T value, typename Coordinates::type uint4::Native_vec_ value_dword4 = transmute(value); if constexpr (geo == ImageGeometry::_1D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:1D unorm" : : "v"(value_dword4), "v"(coord.x), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0xf dim:1D unorm" : : "v"(value_dword4), "v"(coord.x), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_2D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:2D unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0xf dim:2D unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_3D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:3D unorm" : : "v"(value_dword4), "v"(transmute(coord)), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0xf dim:3D unorm" : : "v"(value_dword4), "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_store_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); + asm("image_store_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm" : : "v"(value_dword4), "v"(coord), "s"(*surface) : "memory"); } else { @@ -325,23 +325,23 @@ static __device__ T image_load_pck(typename Coordinates::type coord, surfac uint data; if constexpr (geo == ImageGeometry::_1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x1 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x1 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_3D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x1 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else { @@ -354,23 +354,23 @@ static __device__ T image_load_pck(typename Coordinates::type coord, surfac uint2::Native_vec_ data; if constexpr (geo == ImageGeometry::_1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x3 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x3 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_3D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x3 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else { @@ -383,23 +383,23 @@ static __device__ T image_load_pck(typename Coordinates::type coord, surfac uint4::Native_vec_ data; if constexpr (geo == ImageGeometry::_1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::_3D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else { @@ -419,23 +419,23 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates(coord)), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A1D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else if constexpr (geo == ImageGeometry::A2D) { - asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + asm("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); } else { diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index 981e716..f3e5caf 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -3585,7 +3585,7 @@ fn emit_store_var( fn emit_label(ctx: &mut EmitContext, label: Id) -> Result<(), TranslateError> { let new_block = unsafe { LLVMValueAsBasicBlock(ctx.names.value(label)?) }; - terminate_current_block_if_not_terminated(ctx, None); + terminate_current_block_if_not_terminated(ctx, Some(new_block)); unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) }; Ok(()) }