From 7f4b430267a678d47010475493a6ddea31168a16 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 25 Sep 2025 19:06:34 +0000 Subject: [PATCH 1/5] Fix formatting --- ptx/src/test/spirv_run/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index acad0b6..7ac0105 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -352,7 +352,11 @@ test_ptx!( [613065134u32] ); test_ptx!(param_is_addressable, [0xDEAD], [0u64]); -test_ptx!(atomics_128, [0xce16728dead1ceb0u64, 0xe7728e3c390b7fb7], [0xce16728dead1ceb1u64, 0xe7728e3c390b7fb8]); +test_ptx!( + atomics_128, + [0xce16728dead1ceb0u64, 0xe7728e3c390b7fb7], + [0xce16728dead1ceb1u64, 0xe7728e3c390b7fb8] +); test_ptx!(assertfail); // TODO: not yet supported From b5065edc51ed2050877a5fa4d0619091d3dce144 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 25 Sep 2025 20:02:05 +0000 Subject: [PATCH 2/5] Rebuild bitcode --- ptx/lib/zluda_ptx_impl.bc | Bin 24896 -> 25144 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index afc9c2c502e413f473ecd473e5f781656d4a9e4c..fef08533c25992908b3538dc52859f68d213af89 100644 GIT binary patch delta 5772 zcmX?bh;hde#tABn?>DN-aVE1|WqBmkk;Jr+$HlO51>2$-0Wr*h62fe$3Th%(9Yl_` zFw7Q65$^E5y@RvK$VXvOqlOyCT?5At6F85W3$z+Ku4vD!oZ`(Gu6~bVpZb| zg@#569|x^$1+HYtX3m2qo0oBZXJit#-aMZ>oUxvhnVmt6jR6W2nA<|03bXMs`0zkQ z7#!GmR{C(vW=P0_i7*%?cszAJ$HLOU%)qDs5@npg=&WUSNP}U99Ec0T4LlM-6NSte z4_pE9z&Pc=gs$mxI5`;D8JHYEB1{U*^=%=qS^^miY(ac5mNZx*q`8Kvfs+Z$Wnf@9 z(V#fh&r(c)L6Cu^0VK!*a?=H_-3$qnKwL03O7LKfl4N7J%mU^zFfdGDbY6956^{ag z222Fxrk&vuDGUaoFrj(|NrNR-hF;7}2bdZd1Q-|?8bJ14ozdvX#t_U3QwB0CaJGp8 zgAM}+10w?i0|&^go3kVf7z|=ziaX}wYR))*SMrrMGX69p9 z#?8RMP!CoH3idtbY@!T4;$S`l131|K@$*_R%+P}gfvlXHXn>(C#Lxm}99SvHw2u-U z=(et!$z#LApazdvkg~#hNgoLT21&3!sI4BxFRU15n81WUwzj4-u*oy{NW;WH;qPQJ z;{XE(10PHbWSogqfV=>MEKCTb?1BkLGe5&JUYHn2*-9xNm@=^cLFy_@f7mh1FoP)r z+4?rypw@w*1SSLu-lCfielsN8f(scXcwF7a_?40I0w_|z!YKzPXq7a-XKu)32J;~a zNc0EWWp)+^c9;+-LVxn^;IBt>n!ZfKL3W18DC*4lYz!G1ULqS8s?9Db$-u|}&p05* z89hIs&CD?45X=q+kY80~Hyo5?IPeA8eO&99%`BK1X4t{vp8@3Ek7W*Q84L`QCE)r~ z4ou)W!ffQg%rFBk2vRw*(ts_Kf#EV-kRj#3gjZLXO+A zDxU-_K+hmE7a6dLG5DmxodgQ@5)lnv4hC*W8Bou_1ajtIjh;qUhRN`B2=Wk@4l8=L z6piIdPGCHcg5sN9zKxd}7&b5{Fo3eY0?2(^=V5VQObCYiI#>`+o?5$ zQJUdE35vy^!WZ(~VPIHh4EG)=yWil-e!#|HFc&3kV!{PjEf^X&;n@J>O4h?nn)L@5 z8GJI}27t=ZD6vhP49nmt5EMLbbsU5R7&4KSt*R5#;O1a3g^QhNPz?3qK8u#fwdRY> z;^$~U$)(x4289gJya3LBpjg>0W?;#{v;aAgU(M!@lwm;4T3mZs-IkeP|W(ZJxuuiGz`zL-V+Dz^ZH||gWMkYixl-`} zN9FtZj!7_c;K_EDS4&Q}}}z~jWQ;4o9OL<_@*&F^&&FfvMP-k>kT z$oPEnSp!SPDU;a^4Y{5h^hk&}a5OQtOm;FfVthWi)KHai!sOY8`hqQd2iX)2LGjAn z*Cf%xps@M2;Vfp}d2NwXSS))$A}?4cH<=oFPvZZ;07DL21sfz8Hl1aBd)V#+vppsX z$TexmDR%hxfdBUf{(le9(B%K73XTVvJ)bz+VWJ*ZMn)0A)s3jR4Z8DjrQ8fHRkH|P3)66IOm>duR%i#90jK^ z{(RQ(pi!k_k-C$U`X@zsWRwxW^<*}46Z@nGo%1HN&qp(3furFRX1MMr57iGj=^&et z5g-K7oe$N{y>SQoCb%IB90RAYqG^BEsB`8K)R2q-LDrB4Mpl^sLt}<3vzbq_Z!h46 zB8BrD42oO}MYVr_Yd7x+g*y#=CwVy-?=}cs=d}>L*C6tiH=^-=gLp4r z2ji;-shxZ~65lq+e&Bo2_@hCgpI>9s&jyv{{25Ar8q{y_cNqO|(Cidg;l$Oby-VOk z5O1UIYk?n3(v1c?1uZViG@3pVtavQnXgNV>N3d3Xqs>L38_X7s_FIKHTx=R0*NSR9 zscdxalkk|*)989lBEzMx(fzYT2g{5`&ko5IQ)V{$%$Ix-GOICQwWNmV?8cDok{MTL zH-_Jn>|kBl7}+Gf;_3g!=-(>f8YY#8fnhci1A`Frlm|Kt3=E(Wbs;wcxC!XPz`(%B zV9&r10agm3vl$o|61X4&%efgC1Q{3@c5pK=2r)3g8{9B?kgDv>2fY_F)+;hFFdSiI zU;xqo|Ns9lpu)iL6v~IuFmW~}1_n?g5Jtnq<(L@24RaU`66aK5V9|_9!CHFUk~x| z0VYTw!Dx^MkOeoH7#Ktu7#LtQNSs55f#Dq!B*b7eNF1b|pP7L{gn@wpMuWsb7ArF| zFo3$QFd8HdQtw>P%)lVbz`)=Pr9l!P4Pnd-3~CGv3@{ob4$@G=%)kKZw83bQILJYh znIR5>(I9b<`Yp^1;0`H_hKb){W?)bTbqHY$n8bgm1~?5(6Y4Aw4KNy}-iw8SL7jnt z0Y<~b3t1Q#lo%KoU^Gm8G7AF(sMijq>lt7Y8(APB0i!|UAfFv!VE}jWU^GY^qU%9%0h91$gX97j4H5@gkOtKNqe0>z z3tHG17(itcjE0GCfQrLtnD`ww1_n^+2BZIj@*l_`26l)9jD~4YVTU9-7!4D5XJ=s0 zVqjo^(J=8ssCpO;6Q2TA52In?``GIl7*rV;81}J4Xqd!fc8JelG)$bI1L7bU4HGxw zfFv#$4H5@sV>hUJ7!48!c{mfQKC>Rm07-x})Iv4DXplHa!vd%}7!48!sow%s52Hci zAobUw>R~iU9Hjm=R6UHY2T6c5@Nq(X0HZLF((E;xHO2UeB zFN}tX=W;QC2ijmXNF0=?Yq>xku4iC?Fc8^qA{PS#s96f5VH)N^#bGo|d^HyX18BGc zM#IFnb3tlL7!4CY4|OP*2IYU4#0xG+H439a;-Db?1~mvqgTz5T;DgrkFd8N<&kd=x zU^GnJgd5^97!4A)1sUWBH3&w7BtYUp+>o*zM#ID_xgnKG<>bUrcaCq6R?4c)8$xd~ oPQD+;%AJ##mztYcoE@K2GWlGX7=oXkI(c5W*yg_QO`zdc0I-w?+yDRo delta 5592 zcmdmSgz>;3#tABn=QgU!aV85~cMuWgNLA?KbTJh;+VV)MT6`G{3}GS+k_JmIDW)?qEnsY55MW?nXka+e zpeQPpD8k^w2UFz0#`Wf8DctC{_#0BA$0~1~?(_-dm-~{nN804lLQ8q~i9}y52ghBR2bzf>=Sizvc zz{0@5pa8N@G%jHR;{g}AQS~VYCTJZ|0<(!Ws=4w!{8$Z;)5{AtS@gi zH86lxfJDG%WhVwO9!P z7gh{2jKE4jal|7L^f#rRflZ#lM+zngQuohj#sLP7240vL$Z?w_1LOr5WMD!dWoL{z zn)w;jP?V`l`JgK+G5KM~FvA3<6BL_IGYx7T7z)5bVE;=REJ?ch;5S3UC9oKXFiP;) zx{mQHBjW+&;C&U}{GPeNlNqD}gikamPCd_dnVp3Jo;pFnyMuQJy35u|HymVV2u4x% zjn~Goo}u9+vT;|r*d-+y7(XD}+SS9no`)fU39b(0iO!+}Z0QUPlaWo-D(AcLfQKRB z6Uc2~Y?R=!^fII50!9X(M6dve0HyK$d?y%q84`r*Q4K9OV9R7+xD1asQ1)AOgxS=C znPJ8;kX|qb#YnzfkE0aB0T(u~2#An0STgCW1MejUhGmGH3Cf52*;5y=F&IpR`w0|% zhwKuVO&J>MSMq@Lg0WG8$JJj4B_bFMW}+y4V86gbfkBIbg8?MW0rLIC1qN(l3_fXK zeINo9?8YJ*==rExqol@#27%q7!fAGYQfOJ3C{+gg5l{dCd~tk3_clPWgr3+<&t8XIMIsH zY@NxIgoNsC#Wc7%7)(J5!T3aj;#F<#vuMd$D^6?{KL-Q6Kn10OmpTT83}~spO3c8L zfvEwWut0A2=8lwMK+Q|1*08!WGdVCeFw`T8>{#v@j4TI0r6t%QAZK3_Q{qOmb+2wn zF$2p1c&PzOrC$45k1?UzdbOT=A}gxnv?RpOJ>WwtM-@w-Gf6#R6ad#aV0#_dJaPmy zWV9F=Ca{9a(#;2i?O2#PtTsu+Gpc^zTWXM$uz``IsnLkv-5{x93nNF$ z7$34Jad}Y z4+;yK1(}%`91RSPOm31&W$AU^z&P1nT5PhcwDV*QX+g$Elf$HqHb0RTVPl*!`KRIm z#)iqeloS}5H{Vuz!^|2VVZg$$`Hb3BcD|yB03IiX8HbsgC0ZCBY|hs^z{tq4*}y=C zk@4wdUqegAlF99chFnh#dL%>~IGPw!CLc01VthLJtD!2Rz+`D7eZdsIgKUb1ps3^S zYm#VT5ZD}TG>e&cmaFH4!x|lq91XDy43nA6jJ%8Z-#y@iB8RPl4U!C-&N4oAwtLZx z1qI}qG~^UJ{5in?a{~XL188V+y_tgJ&SsB0hix%Y4=W?1h+u1>Lgyxf-p>gKSX28kG56l!L9u5Qyxt2GFOy<*a*`JG6iD$Iwl=t zGcp2%AiDFR+Br9@VBZKgWPxMg6jn6tuN-wwEP@)65g^DK(!j_n6JTh}aAh|0PWEjH z+)$)&o`XS=YoSQjLYa&JgDXsgHk)chtjVG)gAm9Mt>XBU-EZ2aWrbq5?B$$-Kc$9 z;0BXqquxwGiOW)r#)kwW9?LeGe-NAztkGz-QD_IVUVWqORN)uSR*m+(q8v~18y(+? zYfR~CblxW6;nLISdRZcarLWQbg+#}c{zk9=5+_1tH2QT*a){1s44N+Kadl>6=sw8| z*7=R$kEA-D{%wrBtpct&7#J8*c^DY_nHU&^n5PuzFfcHHO4E351_n?S@58_VZp1_| zOy=_8GR$OPU@+i({BMuRkfEQo*_1fxOXAcNAN>R~iU9HhRE3F2oM4HKWv#K0iRz`y{b|NpQ5 z|DQvLfng031A`(1f`(~0%f!F{>V?5*kT}TVhfJV87y|>028n|#W@Khy5C+8ylm>}| z)Qd1PFsLywFu-V#I7qz}sLd?Sz`$U|%)kJmK@uPb1u{b%1fxOXAPq=A3!3=A+D zCO(UqfkBypfdNLt#P>qg!)TcJQ>glS7y~B3!NS0x&cMI`qhaFuEDQ{wek_cJi3hSU zFvu}5Fu-V-cs>gxBw#d19OSVk76t}c1_lNwUC#iL0Qqb_3nWNkG)#Oy)F2oQ5(gP{ zi3JihFd8QQj0F;MFd8HdGM}9l;vg6e66esVXJC+GWnhqJK+qrwkOoVr1{e(z2U+X` zRS%;<;vn_etPlsmXplHaeLYk?j0TB=)XxK%59%|584L{IkN|1e$jSijp2BFDhO?}Y z5Q5P#@wcpyD1gy0ac(w9E`ZS>agg~cY!LMzx*imFAPJC*oY)u`K&1?fhKc7v#bGo| zd=?u61E{Ei(J=9SP;nRy6Mw`8Nu)6PKPdl!EM{eAV9;VH#0PdrT*7FOI4B>pa6r_q@f6^0Y-zwK^mq))xl_xI7t0UsCpO; z5(lY2162>BLE<3w51{H{bUjD{q=A_e;sY2B5(jAz=Y#|)j0TB=)Z0MCVKhuUk&}Tz zn}LA=M#IE=I2jn67#J8}G*rBv;T$Ieg9`%#oQ6s;2ysDDFN}tXYjZJx$K_x&NF0=^ zt+^l`fYC5ql!^C62;`Iy+5C$UqWpXhvfCds^G)zM&7o?Vh(J=8|s6$~i zOney^r1pc+AaPKLZh@Kura}21Bmwfl87@ez4WnV=cex<77L108e**h}fdNLt#Q#Ih wfz$sXm7D-Kq-clHAoaW;i_N(qm5TXf(J*(8r;zrB#^#i;n~a<5Bert_0OX?ORR910 From ab1b73ab0462bfa566e6196b7800a63beb023806 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 25 Sep 2025 22:44:49 +0000 Subject: [PATCH 3/5] Avoid memset in malloc during capturing --- zluda/src/impl/memory.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index 70395ed..7e9a1b8 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -1,16 +1,18 @@ -use std::ptr; - +use crate::r#impl::{context, driver}; use cuda_types::cuda::{CUerror, CUresult, CUresultConsts}; use hip_runtime_sys::*; +use std::{mem, ptr}; -use crate::r#impl::{context, driver}; - -pub(crate) fn alloc_v2(dptr: &mut hipDeviceptr_t, bytesize: usize) -> CUresult { +pub(crate) unsafe fn alloc_v2(dptr: &mut hipDeviceptr_t, bytesize: usize) -> CUresult { let context = context::get_current_context()?; - unsafe { hipMalloc(ptr::from_mut(dptr).cast(), bytesize) }?; + hipMalloc(ptr::from_mut(dptr).cast(), bytesize)?; add_allocation(dptr.0, bytesize, context)?; + let mut status = mem::zeroed(); + hipStreamIsCapturing(hipStream_t(ptr::null_mut()), &mut status)?; // TODO: parametrize for non-Geekbench - unsafe { hipMemsetD8(*dptr, 0, bytesize) }?; + if status != hipStreamCaptureStatus::hipStreamCaptureStatusNone { + hipMemsetD8(*dptr, 0, bytesize)?; + } Ok(()) } @@ -68,6 +70,7 @@ pub(crate) unsafe fn host_alloc( ) -> CUresult { let context = context::get_current_context()?; hipHostMalloc(pp, bytesize, flags)?; + unsafe { hipMemsetD8(hipDeviceptr_t(*pp), 0, bytesize) }?; add_allocation(*pp, bytesize, context)?; Ok(()) } From 1b269da239d4c003a20aae6cfaa28695504b0c70 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 25 Sep 2025 22:49:10 +0000 Subject: [PATCH 4/5] Free slightly more disk space --- .github/workflows/pr_master.yml | 16 ++++++++-------- .github/workflows/push_master.yml | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/pr_master.yml b/.github/workflows/pr_master.yml index 8787c81..b44bc17 100644 --- a/.github/workflows/pr_master.yml +++ b/.github/workflows/pr_master.yml @@ -24,14 +24,14 @@ jobs: name: Build (Linux) runs-on: ubuntu-22.04 steps: - - uses: jlumbroso/free-disk-space@main + - uses: jlumbroso/free-disk-space@v1.3.1 with: - # Removing Android stuff should be enough + # Removing Android, .NET, docker stuff should be enough android: true - dotnet: false + dotnet: true + docker-images: true haskell: false large-packages: false - docker-images: false swap-storage: false - uses: actions/checkout@v4 with: @@ -79,14 +79,14 @@ jobs: outputs: test_package: ${{ steps.upload_artifacts.outputs.artifact-id }} steps: - - uses: jlumbroso/free-disk-space@main + - uses: jlumbroso/free-disk-space@v1.3.1 with: - # Removing Android stuff should be enough + # Removing Android, .NET, docker stuff should be enough android: true - dotnet: false + dotnet: true + docker-images: true haskell: false large-packages: false - docker-images: false swap-storage: false - uses: actions/checkout@v4 with: diff --git a/.github/workflows/push_master.yml b/.github/workflows/push_master.yml index 378fefe..40c39f4 100644 --- a/.github/workflows/push_master.yml +++ b/.github/workflows/push_master.yml @@ -18,14 +18,14 @@ jobs: permissions: contents: write steps: - - uses: jlumbroso/free-disk-space@main + - uses: jlumbroso/free-disk-space@v1.3.1 with: - # Removing Android stuff should be enough + # Removing Android, .NET, docker stuff should be enough android: true - dotnet: false + dotnet: true + docker-images: true haskell: false large-packages: false - docker-images: false swap-storage: false - uses: actions/checkout@v4 # fetch-depth and fetch-tags are required to properly tag pre-release builds @@ -117,14 +117,14 @@ jobs: outputs: test_package: ${{ steps.upload_artifacts.outputs.artifact-id }} steps: - - uses: jlumbroso/free-disk-space@main + - uses: jlumbroso/free-disk-space@v1.3.1 with: - # Removing Android stuff should be enough + # Removing Android, .NET, docker stuff should be enough android: true - dotnet: false + dotnet: true + docker-images: true haskell: false large-packages: false - docker-images: false swap-storage: false - uses: actions/checkout@v4 with: From fa36ae539660e9a29262221fb7a426d3a58ec02b Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 25 Sep 2025 23:07:00 +0000 Subject: [PATCH 5/5] Reverse zeroing in host_alloc --- zluda/src/impl/memory.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index 7e9a1b8..4b33460 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -70,7 +70,6 @@ pub(crate) unsafe fn host_alloc( ) -> CUresult { let context = context::get_current_context()?; hipHostMalloc(pp, bytesize, flags)?; - unsafe { hipMemsetD8(hipDeviceptr_t(*pp), 0, bytesize) }?; add_allocation(*pp, bytesize, context)?; Ok(()) }