From 1f1b770a511e4dd9116167bf1b02aae0df07ee3b Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Sun, 1 Dec 2024 03:36:16 +0000 Subject: [PATCH] Fix more missing stuff in the host code --- ptx/lib/zluda_ptx_impl.bc | Bin 4624 -> 4816 bytes ptx/lib/zluda_ptx_impl.cpp | 8 ++++---- zluda/src/impl/memory.rs | 12 +++++++++++- zluda/src/impl/mod.rs | 2 +- zluda/src/lib.rs | 1 + 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 6651430dc0372d68f8e8118c1b2f5ed333d7d0a9..4b5a5d8219ac46a8c19cabf67a36dea6f3cbe886 100644 GIT binary patch delta 2528 zcmaJ?Z%iBa8Gg@q_SwdUvki$83_S;j0nMm!!5C?4k3fi<(8%@DX={>l1{=(3CkKK2 zq1AkNZcGqr?H-fR+7BjURaWf-5mMDnn`S31N-7kkU8QA_xNdU|13 z#=Rem`}|8>iVM!n>zlRCRK~;YJ&+CV3iBc(8UdcM1Dpnc+C$C<|3ag_N9U{INmz%B z{DeM6@rsr@T)C{vH^UFXFFeA$4KZbeI+74MN-)ay*!qudlRQ>5!+9xvI!aGv(vr-c z@w2CWX>T+w<=6?>?5_WG(E)??n@-1SsO}cj-`%L&Ty(5X*Z;A*{+8hQwBT4x*FNzA?+I$sM_~5IDBtD%;?S-Xf=W}0SNCFN~ z_vrO1FkGZ_RhlEOZTeM=>3%#U&e66DQj{I{rN^YSSMk%FMyOYw zqeUgkv?StCbvDP2W!Ui?J04(9srQpS8_!gp64)t`ow%XgKB`q z(((05Y*Ui5QtZ7LVB?Z;?1t`dnaU}Nos!w94EyNLQzH9flkk#ewO?%8Y*k~Nns}}a4G{tS?rEa0N2-)-PgScBA({55oOm7-34m-+bV^N#$W2r zNgt!2ZDheo8&Ca0^$tsaX3o&Kf0gMmHh^o60<)4P<_mD}E6$$?Uph2hQlA?69fO`<)0kp_tjLy|(GBz>=;ZoA1 zk0JNJe2f3bm;G;F<5tn>hn&`8!+k|jWW#%vctvC72PxjZ=l4Nwg?U$CyOqU?T6gdJ zuLjL?09VZK@wydL4`=GEPTB8fZ@&g3J9HtxOIu$s9F^}`rmtH~blrL7uN4leS2376 z%unl+g~5(=+dWH5$=CeO3$s`G_srebtsGrvP<~^o#g2b4eM9&= z$+AAPhI|d<)L+r+2i7;cuhm&L&4ugMYPu#RWT<7TxU4&hE713!6+mT*VBVhSGrE$t zh28vT{^>qRv)%E=!C71K_1(O##X2aZre{-j$A;$IV>9;~Z@;3ye@+}R%udht@*lxX ztT@I4R3LgPxm_tuLF>;Qm31FzXXInD@lV>T;`4Ilm)Zw6qO#>@x+Y6ZuD(@z3L_?f zpJJYsu9)Y^X`u1CqimRK6WeeU#XTZt=M%hH5WhquTjNxL=W$FHMa6P_qWGD#p@V!b2@;0^3(ew~yth;=C%_VNHF&A0#{UvI zB1lCw{v+f_T@}^%zaZxTe4~^GrN(3iG|rwhP*F{=BacrpsHiq@BVUQUifa55<#-j< z`0tkaB4YpIj7j6;tt|D=e zJtIbzlo&z6%S>rZ&IjQL5}v^$rkZ+gOn7e0W%3yM$yQqF`7SI00|U`fX<(>vax^?J z+&+Zwwc)lwuRGKgZV$y`r{b+cC*sm*EHd6Yc`_1KnmFDZiibyrj<*h?|HOpyEO(9> oKQSpO>s*WR_+)%wg#0_HWGyzO$+A&hEE11QguYCMvG|>3y_SlI zW`b8U++2d28PjO8#bq(svJ_zpb57afGXI%DV=`T$A;vhDY`W(yedl84+w}dt_j|s6 z_q^}jjpy~{_T@4y{duj7t-pWfhMf0iL6RtSh`VYN!Bk@?1|+8VJtDdcu~W^-c^<+wZ- zbeYXEfI}}})!ULGwCkwmA`9}&NI~79Wyw4>Bkn{?BsOR3eXF!Wb z^$CGNAyAtL+=(Vdz!_tA%EUxw43~wDc-rkC1mfiaAnaJ65$_I6JFV)>zkry2SZY#&^auCcywtDdKbbVgwj3ctJ$g3vIE!Gt-J$_?V$iD%-vOk6Nb}Wc-oz zGB{kpQ;i3I0Ih{O!FA2~^J}%D3>#`dJ51nXZ|p1;0(o4W|v{DGxlM z>JG!StP|fKObfrCr||R4jUACtZ$u#^^B!(nRv>7?_;-HuafZfQEXPzSU$5Rw9N|E{xAx}h8r2bzb zM?PjSwRm<1zo3CAr$__tA%|ZTphycCv0G;3DN_G5i<%Lk0aLqw{^ZmB8oIo zT_S)YP1KU$DN;U^$U~9lIg;?FNO%|7vmrE)eWyqaWY5UhECIZQXl0`}$*IwQl@(LK z->{`V;BRi}+~E)C$(C-hZChhQTYbwG-E7Iw&si)f%~MQg%Raw;AGsTr8}udo58+lj AKL7v# diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index d0ec853..f86a7fd 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -25,16 +25,16 @@ extern "C" return (uint32_t)__ockl_get_local_size(member); } - size_t __ockl_get_global_id(uint32_t) __device__; + size_t __ockl_get_group_id(uint32_t) __device__; uint32_t FUNC(sreg_ctaid)(uint8_t member) { - return (uint32_t)__ockl_get_global_id(member); + return (uint32_t)__ockl_get_group_id(member); } - size_t __ockl_get_global_size(uint32_t) __device__; + size_t __ockl_get_num_groups(uint32_t) __device__; uint32_t FUNC(sreg_nctaid)(uint8_t member) { - return (uint32_t)__ockl_get_global_size(member); + return (uint32_t)__ockl_get_num_groups(member); } uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device)); diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index b23afa9..3843776 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -1,7 +1,9 @@ use hip_runtime_sys::*; pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t { - unsafe { hipMalloc(dptr.cast(), bytesize) } + unsafe { hipMalloc(dptr.cast(), bytesize) }?; + // TODO: parametrize for non-Geekbench + unsafe { hipMemsetD8(*dptr, 0, bytesize) } } pub(crate) fn free_v2(dptr: hipDeviceptr_t) -> hipError_t { @@ -23,3 +25,11 @@ pub(crate) fn copy_hto_d_v2( ) -> hipError_t { unsafe { hipMemcpyHtoD(dst_device, src_host.cast_mut(), byte_count) } } + +pub(crate) fn get_address_range_v2( + pbase: *mut hipDeviceptr_t, + psize: *mut usize, + dptr: hipDeviceptr_t, +) -> hipError_t { + unsafe { hipMemGetAddressRange(pbase, psize, dptr) } +} diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 7b4afc5..766b4a5 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -162,7 +162,7 @@ impl LiveCheck { } fn as_handle(&self) -> T::CudaHandle { - unsafe { mem::transmute_copy(self) } + unsafe { mem::transmute_copy(&self) } } fn wrap(data: T) -> *mut Self { diff --git a/zluda/src/lib.rs b/zluda/src/lib.rs index bda67e1..1568f47 100644 --- a/zluda/src/lib.rs +++ b/zluda/src/lib.rs @@ -71,6 +71,7 @@ cuda_base::cuda_function_declarations!( cuModuleLoadData, cuModuleUnload, cuPointerGetAttribute, + cuMemGetAddressRange_v2, ], implemented_in_function <= [ cuLaunchKernel,