From afb184e10e56df507eb3a8a3af4f051e395a8dbf Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 10 Sep 2025 01:13:20 +0000 Subject: [PATCH] Make cudart_interface_fn2 behave more in line with CUDA behavior --- zluda/lib/OpenCL.lib | Bin 28824 -> 0 bytes zluda/src/impl/driver.rs | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) delete mode 100644 zluda/lib/OpenCL.lib diff --git a/zluda/lib/OpenCL.lib b/zluda/lib/OpenCL.lib deleted file mode 100644 index 2b766ee858f3f474258b211c632aef8bc5368416..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 28824 zcmY$iNi0gvu;bEKKm~?o2A1ZQCYI)gsNx1tu9=0Y351!z#lXPulYzlZjDf+zhJnGd zh=IZC6a-tFFff4eJO&2qc?=9TTu@xXz+eNyZx|SC-Y_uO#z65c1_s+(3=DQ|P`rzQ z!EP4=gS{3MFJNG>U%S1I1ey7@W2+ zFgVLW@gxQYXAu6yz~KCgfx)E!ir+CXxV&Rva7}>XYYYso*BBVwoS=9Q1B2Th1_pO6 zD4xN<;68(a!GjHoYZw?jK==g%gU1U72G0a2zQDlXd4Yk!%LR&eF)(=TVqoyrfZ}-! z4Bqn?7<~AkxQ2nj2ZUcSF!;P;VDOEA;v)X7{WB5cm@MQ7zqDiU$eX7~t^*#g`Zu5-%|@BzZvbCI*J2O$-dlDo{Lufgu@$|1dBl|6yQANrU1` z3=Aok7#LC=pm+@fL+Tm^hBOH%?qFa@1K}?W3~65&7}DdQ_yhw(`UwVx3?nFBz`&5P zfPo>C2Z}2g7&1Zl9s@(>JqCs>7bxDvz>u|zfg#%jiq|nPWUpgj$dQ5KHU@?q5dOu$ zkn@XyAvX((pD{4xK4W0Wi-Y1j3=Da97#Q+>pm+xZL;em1h5{WBj*s{B4e$>NiFa`g zb`J6k2=Nb!_YZLObM|3~kM|4p@xhd5sD~(kNQZcY`gzB@IEFYf#K&jm7Q`p#IF)AR zqy!Y@rxzvWGN5r`YMgWO^HRN2i}F%)7?N|4d2ki^xdoXysR(nNQ3c@Yic%9xQk_cE z(o%~+Mj>;tsR_?4$p|RQFGwva$xJOq(}5<5s^2+3H#adaC9pKLG!?E7MG&`oG=r0K zoWbry){UkF-K@Nl)QS=$Yax8Bs@#h5b3-Z%kR1tD4Hrk%1rm1mK?^|?L2T-SQu9($ zi;_^o9ij_G3Y*@L)QXbQB82-O+F+u1bQrn7LjXmOkqbQaK>`*<))w3}CZMXoX_BW8Hv2t6 zMHr++f}{x~lY{e1i;@vBjVXg_S8!r(K@L*!0^?&<1@b$V0tTuFO%~0B(j>HO7z`J| zQ0bCdmYIxHf(4f*!GuuNh8Cw5xt68oAxaN~2u!7GULmMhb4)HQ%`8fF_wi54N=+^S z#R@JNG@VX~MMat5AcAUu31C+T3Wo5+%o3l>Vvw^?^`J37aAsDy_RR2z~QPSruF z$w(FI%sRXGDhN91N>qQdsn2(E|~}r8XoVo0q_P;8JLM-7<4>&@~~6VNne> z4&f7!Vu%o$THnM1bb}E>m}((rzyruPu>c~1p|U6&IeGXdg85igp~OFu$sipla_G7< z(~A;IQhif%F_Nz@h7_7!KbN4yymUm91RCF{!svPuOESwawIGY4=?F?qOhFG^Br%+- z!KoA;5(ph&aV)wZQGlWWB7~tfCpEDcTYe8pMUz3(864*8R+I`VLZKlA6TnpGn^=HI zcEMr3AU+mVskxveiK-GRhE;WP0jSzR*zTK}3lhSj79RJgh9iVA^@Qf-qPZF(f~GPg zu{axHD42()0;~<5A`xoQ#L;wtibl75NU4of2%`$4>nX}aY6nBZB^*f*w|a0gf~OWF zJzxoJ`XH$ZRSQH6rrIqtFS9sKK1i3BjnU|K&keq`}6h#NP zS&OU~A%LU~QY|{>rFi5QmxN^Ig3BTI)DkS>7`i-NK;aKp4B}#{fx8xBBG^Gls=$>m zSP<+b1Q%URKv8~LW=>{aI+|isab#T;C8>GEnfZBcrFqFEnfZB+DJey%#ZV`^;*ur8 z1h@R6fSkmVwEQAaAI?3s1eamR3X#l(H6ai_K@mYw32pczD@7JUQi~RW2riNua56-g zh2SEofiyZDi;w~uRR}{Z!a#^ZFdsuzaA}fzQGRIwSQZ@F5Y;H+7`npqi?Xq5LzO_% z2QN?%7NH6usRgy)5z62^WEIee1|@E=qhT=*62PVop5EaKK_bX1AsqzpNC1-E=%Sc9 zFcgCYkkmn&(O_Y)Tag8j)s+?${ z=OFW7Dxg&(a(Ts&oP$jiLkGBcLRJhBKvM@PMG(rMd^AAL(OTq{zOp@k4!3tS9IH9R@s3O{@bkj#N57igwK4F()?n7V@tQj;?i zb26(EL2U)+{Ji3lMDVacaB2zE5FAR7%z_Ud1eYY1l!BTX!Ko$af-v==c{%xsDbS%? z(D*Ex0IE7zb%LQDLjtA`wMCqqgUTm;2FwgT16Ietz#wMEz+hv+z))n#z;Mcnfx*O@ zfnlCC0|S>014D@o1H&5|28I}028LU<3=D2|3=F&M7#Otd85kDWGcX7_Ffg<^Ffjab zU|>jbWMH`9$iQIZ#K5q{iGe}RnSo)FGXujfX9k7>7Y2rRE({C_t_%#F=Z2TA)Hu*6ysQ5E5O!H@8U<+Vis0v_UcoD$B5Esb6a4L|2 z!7PY@VObCZgGev~Lt8Kd!#`XbWO5i7+Hx2ee&sMQWaTn2 zJj-QZh|6POxRb}g;FHh5up^&=L8kyb^~u1|V9sE{V98*`V9j8|V9Q{~V9(&d;K<;_ z;LPB{;L6~};LhN|;K|^{;LYH};LG60;Li}i5Xcb35X=z55Xun75Y7<65Xlh55X}(7 z5X%t95YLdnkjRk4kj#+6kjjw8kj{|7kjaq6kj;?8kjs$Akk3%SP{>fkP|Q%mP|8ro zP|i@nP{~lmP|Z-oP|Hv^LVeB5K+aqktQ-QjbfHBkVk!b@k_)B~TC`y(1rN(Zhlb#4 zkpw~GUeJ0QG;M{d1f@pBEk(c#a6OK2JlJyQe6T2JIt4tVglafQ7(R1^O^WE5AZ(hU zeL#Y9JE(fW?R_*~fWL4GVYf|C^y!}e%eL4%)Qt>|tgJot`g6468J==$ITqOjp}GzG*DhNEdj3|OOk z4k3drB0#cO!vxw4h7BndqZ^Cf0fHtaf`iX!?gptRVpJJZ7kG3R-BCmg52Klc+L}d| zBz|ZYn@-el#X3j}(?-HDERq&TuN!656+;$nfC-`n+^s@V3hw1$6(oIJ6xmoDqoc$O zXd)To?gOo-&|OGia1zM~NZkP&jl_^ZHyEOvlp#eVBjMdmbW;fp3?i8U>y2R;LFD)z zl2OpA2Hhm$hv;BtK}!UzEg`rZxGe+EmQZ0xqC*!2CpvUNNXVm$!h;lD0$RhP3q$>k zBn%nJLRwvf@D)S~-Qgfn40nSB(VYzv!*Df75Sybx;+Sp*iK06h=hzd>;oyNGgomKb zh)TGh!SY~VVHp=eG6eU)5t1UXr{O~)ND>gAgNA{y2@*BDgQO82WcZy74MAw?!saZf z3etyKV2&Ye5Cx_SwKhRpn1{LuFF6OTp8{K#1nr@qa?u)!FiDiWh)ocuK5)W-ujxVW zgTRbJ>3yIIqVzjZrJ$uX+;oU3P*J3nT`1z{Yq{XM!L=xoE-)WmH4cA5QYYLf2p6db z0dqH~WQMDPrX7%x&|U*v8Km?D%b@lbu&BVkWD43xfT@S&dNe-B255!`bw%N_@TE#f zB9KA~G)M^PS|EA>@VR4*_CH((*78Re0674Zm{AuGAzJN-S$2dNxOoXT2q6w@;iE`_ zV-eL9un@de4_6IpwZroSw3P`G11m?EfM~1h=l?Jg_9Zz((i;B^zuDE0EgKa5K=?L7+;&LIT=yPR?-yFK7VINg|hg z;HC{!6kM($1R$9MrG*Su2oVRhjgh&aDONBaT-_l|0drAX!VqOR+P9E(6~s*EgN=d3 zEy6<77B5&4qOA+&g2M)(0aCKT+P7d85FvCWC~aCKb*S^;U-LgcpJj z_dY@h>KlXr)ZJhKP$Le*B@jVyqJ%3#atBBn;S3ND$rT_jsv|%m$Zi1f5l(=&7{L>l zuz5<*>>`#{VsZ}N_90jrcgqkg4R$!Z@dXxwxE|CRMCBs21Cb=)lX+k*S zNur7oRER-@fq~%+0|UbYr~oqqNUy;Gs5&#KFar+*1H(Tgbs%vE289Dm3=9l{44fb? z0~f+m>I}jH>415eS3{#7Qd^3}a@{9A+N)$ALG!=Y2{k);_C=|#pkUKz_ zfq{jAfq|JpfPsO570g0d!oc8=oS$2umzgrPh>`LCe~>zmK_DF<8ss)62B=Sr92gkD zuFZgnfc*xtgBd1b0#XCg30Gr^5CMtHg9s=Fna;pq0p)=xCWZh82D0WYsFg~LOe`T= z1{FZ5gbAEVb}%q7K-|i}fGw4zfSitmky42GUrZ{qhG6Sg1fE==5yFkM$;2C9#?8FnB zoJjrySq7h{MzRpWBrboL;>e3?3=9lh81|tILLsaoIj@)+V~<5e1_lOh3|pZiSxB~! zUgqG7OCDS{8oAKSPF`Gg8dGMc363-iD#iJb>;(5`De@h@oXC%E6NMp(&n^LUyC@7n zQ&{s4tsGYex6%>09vp(W3pSEV8+O z*kuMkQ3kc$t;xW^AcbicC>>#M&XJs!;N4HOoC4~vNh8|~pPZn?pQbokt#S+u3^J&; zLB`RFkrNKWX5w-uKEKH#*#;jwLbi(V_zNZsd_| zh0O&aS%zQ|7oPa~912+M0wq(>*cy1631JyhC=+L|DURF$8rxAswihugg=7cmVT!Ny zri9%-@I()i7fDY&IN}l1GFHZJCv4^o$yx-H$dJX8A63w81vf#E1D0gl@c2;`t8MW4 zLWEUF4j|Hxcv6oVvaO(LN6d~n=`o2XOx4ltqdZLU*rtJO8)62Pg0RF_qH3bqhT%1m zLlR#Ntc7YDXrdP>Wss5Q@wCIWG3~-tw2|ybd_7_v6no*5xUj{g6vQL!oH_-xT3?BQ zfk7A9R@Aw0N^Hc_2iHTf5qX*&$wJaY7Eiv{N460(El-VnkE2x%8ox2XZXe~f2AU3-P_2+84SjHBHo3|^H08oz`r>q4@EWPjiradHIv1KS1*N^Hbe=Qts3 z1W&|(mo*_-hF}tzD)6)$oMCoBMnh0mx*_Z#y-dM3`s;#fCukKNl2r&MalXS>qq?Hm z1X@>yFrT`z6W5wWN^CaAF-r(q4&h12X5`&K zNRB`-iA+a0${El&r5DT%um&f3^palU;3zpkmgjYzM5|0E#W1uy@Rts~iIIOc^wbC`+9wn6s+(8{hPWV@gx2_<&n8`DUJ z+6CI=fxLE+68rG=wNg;*!^mPJrx1KC$y6k}FtZrRsRK``n1*B%Mh2s*pVEov81-u^%$r6(Nf^YRmF2oibd%Y;J)eJ`nYB4Y{z%r;1Y0@_$dfla1(`iMms zHX-fVqr^@ec>}aErx?{v_;x@f>qyQeu+t^c=dE=Z7#K=m_Q4BH>ZVwHD|Jg@c0-F$ zSnC;f&?`lL#u2Ze9S~(`_TtzfiDW&3NnAd~*M=#F*$!Vo2i<&$u!Q8?iLakf0k#ja hjT63olLEW&<(Nt&yD+wlQeY>(Hd+ return CUresult::ERROR_INVALID_VALUE, }; - device::primary_context_retain(pctx, hip_dev) + let (_, cu_ctx) = device::get_primary_context(hip_dev)?; + *pctx = cu_ctx; + Ok(()) } unsafe extern "system" fn get_module_from_cubin_ext1( @@ -527,6 +529,8 @@ pub(crate) unsafe fn launch_kernel_ex( #[cfg(test)] mod tests { + use std::i32; + use crate::r#impl::driver::AllocationInfo; use crate::tests::CudaApi; use cuda_macros::test_cuda; @@ -571,4 +575,34 @@ mod tests { } assert_eq!(alloc_info.get_offset_and_info(0x2000 + 8), None); } + + #[test_cuda] + fn primary_context_is_inactive_on_init(api: impl CudaApi) { + api.cuInit(0); + let mut flags = u32::MAX; + let mut active = i32::MAX; + api.cuDevicePrimaryCtxGetState(0, &mut flags, &mut active); + assert_eq!(flags, 0); + assert_eq!(active, 0); + } + + #[test_cuda] + unsafe fn cudart_interface_fn2_creates_inactive_primary_ctx(api: impl CudaApi) { + api.cuInit(0); + let mut table_ptr = std::ptr::null(); + api.cuGetExportTable(&mut table_ptr, &dark_api::cuda::CudartInterface::GUID); + let cuda_rt_iface = dark_api::cuda::CudartInterface::new(table_ptr); + let mut dark_ctx = std::mem::zeroed(); + cuda_rt_iface + .cudart_interface_fn2(&mut dark_ctx, 0) + .unwrap(); + let mut flags = u32::MAX; + let mut active = i32::MAX; + api.cuDevicePrimaryCtxGetState(0, &mut flags, &mut active); + assert_eq!(flags, 0); + assert_eq!(active, 0); + let mut primary_ctx = std::mem::zeroed(); + api.cuDevicePrimaryCtxRetain(&mut primary_ctx, 0); + assert_eq!(dark_ctx.0, primary_ctx.0); + } }