From 778c4efd2b9f10efa5618ef77a1cbe170d734da2 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 24 Sep 2025 04:05:57 +0000 Subject: [PATCH] Various fixes --- llvm_zluda/src/lib.cpp | 20 ++++++++ llvm_zluda/src/lib.rs | 6 +++ ptx/lib/zluda_ptx_impl.bc | Bin 24456 -> 24708 bytes ptx/lib/zluda_ptx_impl.cpp | 13 ++++++ ptx/src/pass/llvm/emit.rs | 92 ++++++++++++++++++++++++++++++------- ptx/src/pass/mod.rs | 5 ++ zluda/src/impl/function.rs | 27 +++++++---- zluda/src/impl/kernel.rs | 4 +- zluda_common/src/lib.rs | 4 +- 9 files changed, 143 insertions(+), 28 deletions(-) diff --git a/llvm_zluda/src/lib.cpp b/llvm_zluda/src/lib.cpp index c8ac2d7..1151330 100644 --- a/llvm_zluda/src/lib.cpp +++ b/llvm_zluda/src/lib.cpp @@ -196,4 +196,24 @@ void LLVMZludaBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, Name); } +void LLVMZludaSetAtomic( + LLVMValueRef AtomicInst, + LLVMAtomicOrdering Ordering, + char * SSID) +{ + auto inst = unwrap(AtomicInst); + if (LoadInst *LI = dyn_cast(inst)) + { + LI->setAtomic(mapFromLLVMOrdering(Ordering), LI->getContext().getOrInsertSyncScopeID(SSID)); + } + else if (StoreInst *SI = dyn_cast(inst)) + { + SI->setAtomic(mapFromLLVMOrdering(Ordering), SI->getContext().getOrInsertSyncScopeID(SSID)); + } + else + { + llvm_unreachable("Invalid instruction type for LLVMZludaSetAtomic"); + } +} + LLVM_C_EXTERN_C_END \ No newline at end of file diff --git a/llvm_zluda/src/lib.rs b/llvm_zluda/src/lib.rs index 18046a5..37b1d97 100644 --- a/llvm_zluda/src/lib.rs +++ b/llvm_zluda/src/lib.rs @@ -78,4 +78,10 @@ extern "C" { scope: *const i8, Name: *const i8, ) -> LLVMValueRef; + + pub fn LLVMZludaSetAtomic( + AtomicInst: LLVMValueRef, + Ordering: LLVMAtomicOrdering, + SSID: *const i8, + ); } diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index bc375c3b14ff5032dff6fc592f683e1b081a93be..1bc9856d3b240aa232f637da122f74c265ebc041 100644 GIT binary patch delta 5570 zcmeC!&)9O1ae@ltjE$;noXIR#SsqDsBrz@IaWQOM!M12dKn!!BgfLsGf||%x2a#hf z46_ANggd-%@8E1Q@=;jSsG-Jj*TC_^1kR)80iryQ8jHGK{z2Ln3;lLJVENrAb(E#y^8AcKJ|h!4h+21|rA*Dy73GJ&}a3=Ah4 z6sP)GiU}|XGO#p&1X)0Cy1=!YAz>1T3&utX9;{K4Yz&uKz+46fh6#+$tM07gQDD%3 ziGbX+Gh8Bt!5|bSRL>x3u%yb+i<#*FQv-tl0|P??$iAyH8Xegff>~k8KxPHbHc?>E zVc=k3WME+60GV}jmSh2gK`cxW$f!r#Pct!rRe(iLG$>v@)9T2|a2eSstzFK{d<@ID z85kJq!OB3vzQ>$Rl)*SikY4MholIsN zVBlcjgNcERGm#3A7hsTu34xSdFyUzCXIRDy69Xw*DdhuG2KGNlU4`inJBAr%Fl8WH z-)0-sIxv*Lgh0Vtbo0S)hJ;&iA)^G3tJ@gAGBREOMG9Cr<-i23lIHi!4VlbfJ|qE& z{$RVz&f>rh69Pr(Pu?B;^=MAhmuWc2&M+B8ojIS4Aw$DUWaC1$*(D_z7#ZLh2jn=T z=LfWz8D<=U*}(wvtBUM~gOUsfz974gYaO$h1vA48J6QZPfV}&$%z-U~fnl-)Tz|@e z30y~*jU1R6X21nODkoMNuw^nZT!srWq#T&=>I$=|2Q$NrV{q*t>p#l%I7%@ba6xhC zqMr`Dmlzn988R@KLsf(F*nRfY1#AokQ{hep#mh1K1ZGo)hL!MS1Iqnhn}wl=YgFN6N2G{&Xri45G0D>gq@Qw2#V-_3SY=`hk;?4G2F$V zJbQyH`vDt+!CaJ}hzXx8C?s6ZdYDP`03(A>23!ZI^otVH;O1z6=NeEUB5K8b7Cp^X ziOu5YXh6v{O1cJx45&#{Yr2?$B?Hp}m{f$>5;+)bcJU(f2!%;dm`RCI-A zbI)L8VL&c#R(%pv;zqOen{G%k151Mx+!HAWCS)CLJ;sD;t7sqhL{=2XnL4jB5kL2U zuYmzvhJ*Ek3g6dEQcoBK7&yQJ;G)DMM?hnb8Y9C5RwjncVZsh9j2)Y&izhIueBfJZ zkd&~7k)x^6h~M2HsbK>nN7F$WpM$nC$2u4U!950+g9{udPm#1|oH_Z5qy^u+fDZ}_ngyAe7#s}@j!c%4N@eVw+#n^%C^UJN zl=S48QqGfqNNF%~P5vpR$p1iK&H+}Idw6r7}c zVW=mV!gr8O(GV2J+D@01ZvvZK~k7v)SX$VOvbp!^+4gBG_7} z(7DN=_w$5Yz7F{xjq*Pf zbx-a!Qxb=2gQ$McUULF$%7e+r%oHUcHo|m*Oo7-w`9HFaP_1BnC)#T!TbQfY7qL(L z!9Hh4do>zb;3znS@#nLKJB}(PN$UR?)!#joLq-_^Tu){*7qL&g&^foEeIA+_3mgrn zFvE2}Sg2mdq=RfmMt~4RcOXDC%x2!nzAb?piYDt@$TDVa_OV5#Yc=l)h1(5$J9#-6?=%P<=d}>L+aPk6H=^-ggZN+G4#t-aQY-m(B)(~o zeZcpk@qL5Be?E;(KN?h~^JggiZcsnL-(mExLGvg73MbA+?NtINf_NHrZwvfjl4>+q zDQIz7y3zEKV8vs(M#~1F9l@IQjW!2`ZZPXN+AkI6aItQ5oGYsFq@vOJpSZ`A?nc*R z5*aSNjqZ;nI#~J}J%327m@=c$r(g0#$jruo*^(Nfvl>H|OJ-c1)fj$GvV(O+W8^2P z6;J$OxK28Mnn1_mMKDFr$V3=GT+3=9?A4B&>A4+8_ZWgWp_4XI=z7#J8b85kH0 zxFGyGZUzQH1_p+HZUzP+1_pSu7A6l;mAN_5YcXTJA_D`%5k>|E5dHuE|NjCi3=B`9 zd>9QAXJcYu05!E>G)!EMi2>aFh0!2!P89|Q9jJO34H5^L?*bKfVPdEU8wit#ff@v( zLE<2TvY;AZG)Np|P!m)fM#IEsLCt~DAaRg6E1>FO^#A|$5Fa04f&>zb25A6UaFdCF zL6m`k0Y-zwIdm8p-Z4Q!3`T>*LF)OL85l$u7#LtQNE~FbGBX1MsDlWjLE<3w&h^X; z48jZy49-v*BmvS8#>~K=#=yV;qe0>z4K>UR44|$Nj0TB=95k63;vg6e5(lZ@!ps2f z_Q7bF_#I{j24zsU3&wy+{D*3Q)6g`b&H~W@qhad3SQr@885kH~G)%mZg@Hkdfq?-= z!^9`EFff4n#!$MR0Vc7L1ric48YB+#*%1~7a7PM8gTz5Td&mL_QWy;rXJmz#1EWFW zAag`mAyEUPA>#E63>vJEpo7sM36KSzP=jDJNE~EP3@ZbJJOcv*j0TB=)K^2*!)TB= zNPQnvJ&Xp4gVe7DISAD412Y&Hz##$Ba1g2iM#D5bfLZ{fLE<3wpII3gWEdD2U^GY^ zWC1@LB!plzOkAH05(OZ-9uyZa312oyE`ZS>agYURPz^8|Bo4Bmg^hs$R5rnAnD_>$ zIE;pg-(h25P+?$TfYJX!`440e13N?lM#D6yutO3ZjE0H3vokPgF)%Q|Xqb2*R6UG_ ziBEy5htV+ceeCrN45|za4Exw2G)&?#JH%%&8Ya%q0dWwFhKU<-KoS>>28n~Ru^Uu9 zj0TB=Je&ztpIHxOfFwW~YM~lnG)NqzVF6Sfj0TB=)Ng^RhtVK$koxOT^)MPF4pRRb zsvbtygCsy2_&6axfYBgvkOn1Ah>u}3NF1cz6{;RagTz7VBRL@<38O*cAoX=naTpC1 zuV+}y$-tn^zyPPA5)9`!85o=x7~nKaT!4##!G(c=0Y<~bL%1LwfYBgvP~y$tVqgFb zy1;0dcqv%Co`C_vKxDrrE(QirCm%+`H1u&nYCjka6JN#!$u%$oLgMuWt8 zK^6yaLn?`Y$rpp&Ip0899f|sz6+>4kE%FsR~`3E~WxUTOLVu9GYRE!ECs21=}J6HI^GK z3bP$zZXNR#n3BjO$m?S0_@a%osoh6oU!z73r<19VzySpwHI@W}#1(524+ZcZVPWJs zlE4%c$s8zgTEd588Kkq0i%#dJ!f(9OmppSjVjEol;92htl z7#JKFQVvY;sx{zZP-BOxVNhUh3sGwEWiT*=i7-eSEV-nZ&cw8Uv4KH=fq|ic;Y5R? zs8pf|gAX4}kpmmgN(au>3<*^*5eB0KkNQ^cK2{b676wKIkTBx}MrWc?KUTm>@{qKcg837&sbuVPYW1ZITR-7hsTq34xTIG3IFI zXHY{?rY_}!uB^o5haJNV6PQj=Y(C92sC8f{01JWrFKMtO>FR^u3<;OOVj#jO!DH(> z#;=Tw2atpJRebY%<_1q@kO~k!(V#f>Jlkb<76y3g1O@L7-W}*JTPxjgkewkIMcFrA z8^d~rhLgy~UFBkzlw@H1fNX155A%8+h6E}*GcZg>Hc_jb@5TcjhJ;Tb zw}G)yg2&R!jFJl&8GI7K0w4mE#`p7`VBlp)5UNKtwA_F#lY!wfJl;UrZ`BcIQx9f_ z8OK0+!59=H`EosuQVa)N*uWwnLegN#q^}OVmlzn9A#x@tAMR&QUBJd*Fct16Q1l(L zOJFu-Xs}qx1JVn|MhPBQe;t&FU@(}8qV$3N0uu!WEd~w-kT3_x_Y)UPHWU=DHx|)A z&pFi^J&mjklO@3>f{2s@6HbY>n&qnn8I(6D1Ob0~- zLmb456CBVz|g$YYL+@!+{bMs~!a}nbS@Z;I zDK?9rg8^PNg5q<5jzJ*xMs#3tXBa-RG`N|5o=WHD)@%}b<3 z*ceMD?^Ha%*f6S)zsE!e)KF z1B^^RST}#vmu6&qJek+fk}+hmyP+Z1V}l+E5eJSYMwiJ=hDMB!CvP=WWtw0y`HrEU zpbOtYHbp~F)NuDTNwhF9Y?d~f#mqa))pNpOjSfeSh8PBh$%jmhyo31P9N>c@hpmDQ zk_?;9GTuFG`+(UF69wd&G~^UJ{93^Oy@3DM0yH$)-b}%9BeTa9XIo6v!^+4gBG_7} z(7DN=_w$5YJ`edXj`Cj~%Dr<0GvMUp1!js8m)L7BfYFZjY6t^DPQGcTC=SvGQ~vIu z9E>|z&|FCzstuz0LVNWNuqhWNdzmXrKx~BR1ZjiVKDpT($wsJFu)ZDb)sy#_tJf#7 zPk6yTdqsN{8d~5eIEC@&vxXatDg{dFzYeOsS}2E%G6J}s%w|qvpKzdaPC@%zG&2@B z8ctz`>%NnyUUdk?jEn#wi0*u-cFy@T*f+opS>PBrg%wTvGe(^wNjeaRWCRGZhBPp; z$^;l1GhCU?ypnxu05=pVoabOrc&Nog1 zjQ%%hb_%R;;%e01C2->tf1_T9pu}OxM&pfw5tn5e&F>4&aF%bhS|GIJvrbl{ZHw>= zW6MVSKO!7g@){lQifgoVHaahr@G$9abUh@I@ujEH{klX)OJAecJBbr6{f&OVBsf@S zGzPUvdK{h67`j$6Iw_hGr%P1|jAt3OWo744{(KZ1P+$X~T3- zg~bIT7#M8085jf^7#MuH85o2Z7#QHq3aC6tRr=~LLG)!EI31SY628n~r(SWMgU}C68vDg7B z0i!`0Ko&$m4T8}iagaf2Q1vhxBo0zv#{}^+jE0F%XJTLwWnf@{(f|L~|NqaS!@#hH ziGe|p0YSqwoMmEQ0CkOEG)Np|@k1t1$B2OeMuWsb7Bez4FbFd+Fu-V#I7qz+GXsMf z0|Nt$28n~zTY(y~;tUK7R?G|xAQ~hAa!?>M#6d6`Bo5M0#LU2;!N9-(qhaE+m>C$9 z85kH~G)#OiR6UG_i9dy^uZJ;U5*#cH4C)LF3@{oduFt~20P0-AXqb2)3j>230|Nt$ zhKc91KtcjWgTz4|YhqzwkY!+CfYS90APJDq=CeS86h_0u_d^YW(I9bi-CawM#D7dvqLn%Xqb2qR6UG_iPx|* zFsL#xFu-V-_(FDw$6z!}{49Gt#DcR>i(nET*dcKVqe0@Je9XcDQ4ga*;vf&Gb3oL? zXplHay){%lj0TB=)W<>9$JIj_APJC$BB%x!4H5@wmz^(&$3VKhh_r2Y(4 zJ&Xp4gVaBOs)y0_APJBLW=@C?U^GY^q(Ph$5~MI1Bo0z<0~LqSF!4lA1_o^g1_l@n z6Yt?Mkb!Uf4TFdC+zmJ3pw!Dx^;D1^Gf1~D+eXplI_ zp^Ld7l`xEkiErV8R6<}n1}Wd4g!%wR!!%rj8U&+Z;?KDtMKz3uiSuzo>IlBcogwa= Vw;`>C{d${khTLG>{5h MethodEmitContext<'a> { data: ast::LdDetails, arguments: ast::LdArgs, ) -> Result<(), TranslateError> { - if data.qualifier != ast::LdStQualifier::Weak { - todo!() - } let builder = self.builder; - let type_ = get_type(self.context, &data.typ)?; - let ptr = self.resolver.value(arguments.src)?; - self.resolver.with_result(arguments.dst, |dst| { - let load = unsafe { LLVMBuildLoad2(builder, type_, ptr, dst) }; - unsafe { LLVMSetAlignment(load, data.typ.layout().align() as u32) }; - load - }); + let needs_cast = !matches!(data.typ, ast::Type::Scalar(_)) + && !matches!(data.qualifier, ast::LdStQualifier::Weak); + let underlying_type = get_type(self.context, &data.typ)?; + let op_type = if needs_cast { + unsafe { LLVMIntTypeInContext(self.context, data.typ.layout().size() as u32 * 8) } + } else { + underlying_type + }; + let src = self.resolver.value(arguments.src)?; + let load = unsafe { LLVMBuildLoad2(builder, op_type, src, LLVM_UNNAMED.as_ptr()) }; + apply_qualifier(load, data.qualifier)?; + unsafe { LLVMSetAlignment(load, data.typ.layout().align() as u32) }; + if needs_cast { + self.resolver.with_result(arguments.dst, |dst| unsafe { + LLVMBuildBitCast(builder, load, underlying_type, dst) + }); + } else { + self.resolver.register(arguments.dst, load); + } Ok(()) } @@ -761,11 +770,21 @@ impl<'a> MethodEmitContext<'a> { arguments: ast::StArgs, ) -> Result<(), TranslateError> { let ptr = self.resolver.value(arguments.src1)?; - let value = self.resolver.value(arguments.src2)?; - if data.qualifier != ast::LdStQualifier::Weak { - todo!() + let needs_cast = !matches!(data.typ, ast::Type::Scalar(_)) + && !matches!(data.qualifier, ast::LdStQualifier::Weak); + let mut value = self.resolver.value(arguments.src2)?; + if needs_cast { + value = unsafe { + LLVMBuildBitCast( + self.builder, + value, + LLVMIntTypeInContext(self.context, data.typ.layout().size() as u32 * 8), + LLVM_UNNAMED.as_ptr(), + ) + }; } let store = unsafe { LLVMBuildStore(self.builder, value, ptr) }; + apply_qualifier(store, data.qualifier)?; unsafe { LLVMSetAlignment(store, data.typ.layout().align() as u32); } @@ -2237,7 +2256,7 @@ impl<'a> MethodEmitContext<'a> { } fn emit_bar_warp(&mut self) -> Result<(), TranslateError> { - self.emit_intrinsic(c"llvm.amdgcn.barrier.warp", None, None, vec![])?; + self.emit_intrinsic(c"llvm.amdgcn.wave.barrier", None, None, vec![])?; Ok(()) } @@ -2966,6 +2985,47 @@ impl<'a> MethodEmitContext<'a> { */ } +fn apply_qualifier( + value: LLVMValueRef, + qualifier: ptx_parser::LdStQualifier, +) -> Result<(), TranslateError> { + match qualifier { + ptx_parser::LdStQualifier::Weak => {} + ptx_parser::LdStQualifier::Volatile => unsafe { + LLVMSetVolatile(value, 1); + // The semantics of volatile operations are equivalent to a relaxed memory operation + // with system-scope but with the following extra implementation-specific constraints... + LLVMZludaSetAtomic( + value, + LLVMAtomicOrdering::LLVMAtomicOrderingMonotonic, + get_scope(ast::MemScope::Sys)?, + ); + }, + ptx_parser::LdStQualifier::Relaxed(mem_scope) => unsafe { + LLVMZludaSetAtomic( + value, + LLVMAtomicOrdering::LLVMAtomicOrderingMonotonic, + get_scope(mem_scope)?, + ); + }, + ptx_parser::LdStQualifier::Acquire(mem_scope) => unsafe { + LLVMZludaSetAtomic( + value, + LLVMAtomicOrdering::LLVMAtomicOrderingAcquire, + get_scope(mem_scope)?, + ); + }, + ptx_parser::LdStQualifier::Release(mem_scope) => unsafe { + LLVMZludaSetAtomic( + value, + LLVMAtomicOrdering::LLVMAtomicOrderingRelease, + get_scope(mem_scope)?, + ); + }, + } + Ok(()) +} + fn get_pointer_type<'ctx>( context: LLVMContextRef, to_space: ast::StateSpace, @@ -2979,7 +3039,7 @@ fn get_scope(scope: ast::MemScope) -> Result<*const i8, TranslateError> { ast::MemScope::Cta => c"workgroup-one-as", ast::MemScope::Gpu => c"agent-one-as", ast::MemScope::Sys => c"one-as", - ast::MemScope::Cluster => todo!(), + ast::MemScope::Cluster => return Err(error_todo()), } .as_ptr()) } @@ -2989,7 +3049,7 @@ fn get_scope_membar(scope: ast::MemScope) -> Result<*const i8, TranslateError> { ast::MemScope::Cta => c"workgroup", ast::MemScope::Gpu => c"agent", ast::MemScope::Sys => c"system", - ast::MemScope::Cluster => todo!(), + ast::MemScope::Cluster => return Err(error_todo()), } .as_ptr()) } diff --git a/ptx/src/pass/mod.rs b/ptx/src/pass/mod.rs index 2e88367..b0614d5 100644 --- a/ptx/src/pass/mod.rs +++ b/ptx/src/pass/mod.rs @@ -139,6 +139,7 @@ enum PtxSpecialRegister { Nctaid, Clock, LanemaskLt, + LanemaskGe, Laneid, } @@ -151,6 +152,7 @@ impl PtxSpecialRegister { Self::Nctaid => "%nctaid", Self::Clock => "%clock", Self::LanemaskLt => "%lanemask_lt", + Self::LanemaskGe => "%lanemask_ge", Self::Laneid => "%laneid", } } @@ -173,6 +175,7 @@ impl PtxSpecialRegister { PtxSpecialRegister::Nctaid => ast::ScalarType::U32, PtxSpecialRegister::Clock => ast::ScalarType::U32, PtxSpecialRegister::LanemaskLt => ast::ScalarType::U32, + PtxSpecialRegister::LanemaskGe => ast::ScalarType::U32, PtxSpecialRegister::Laneid => ast::ScalarType::U32, } } @@ -185,6 +188,7 @@ impl PtxSpecialRegister { | PtxSpecialRegister::Nctaid => Some(ast::ScalarType::U8), PtxSpecialRegister::Clock | PtxSpecialRegister::LanemaskLt + | PtxSpecialRegister::LanemaskGe | PtxSpecialRegister::Laneid => None, } } @@ -197,6 +201,7 @@ impl PtxSpecialRegister { PtxSpecialRegister::Nctaid => "sreg_nctaid", PtxSpecialRegister::Clock => "sreg_clock", PtxSpecialRegister::LanemaskLt => "sreg_lanemask_lt", + PtxSpecialRegister::LanemaskGe => "sreg_lanemask_ge", PtxSpecialRegister::Laneid => "sreg_laneid", } } diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 90afb51..ee1b557 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -1,22 +1,33 @@ +use cuda_types::cuda::CUfunction_attribute; use hip_runtime_sys::*; +use std::mem; pub(crate) fn get_attribute( pi: &mut i32, - cu_attrib: hipFunction_attribute, + cu_attrib: CUfunction_attribute, func: hipFunction_t, ) -> hipError_t { // TODO: implement HIP_FUNC_ATTRIBUTE_PTX_VERSION // TODO: implement HIP_FUNC_ATTRIBUTE_BINARY_VERSION match cu_attrib { - hipFunction_attribute::HIP_FUNC_ATTRIBUTE_PTX_VERSION - | hipFunction_attribute::HIP_FUNC_ATTRIBUTE_BINARY_VERSION => { + CUfunction_attribute::CU_FUNC_ATTRIBUTE_PTX_VERSION + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_BINARY_VERSION => { *pi = 120; return Ok(()); } + CUfunction_attribute::CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => { + *pi = 0; + return Ok(()); + } _ => {} } - unsafe { hipFuncGetAttribute(pi, cu_attrib, func) }?; - if cu_attrib == hipFunction_attribute::HIP_FUNC_ATTRIBUTE_NUM_REGS { + unsafe { hipFuncGetAttribute(pi, mem::transmute(cu_attrib), func) }?; + if cu_attrib == CUfunction_attribute::CU_FUNC_ATTRIBUTE_NUM_REGS { *pi = (*pi).max(1); } Ok(()) @@ -55,12 +66,12 @@ pub(crate) fn launch_kernel( pub(crate) unsafe fn set_attribute( func: hipFunction_t, - attribute: hipFunction_attribute, + attribute: CUfunction_attribute, value: i32, ) -> hipError_t { match attribute { - hipFunction_attribute::HIP_FUNC_ATTRIBUTE_PTX_VERSION - | hipFunction_attribute::HIP_FUNC_ATTRIBUTE_BINARY_VERSION => { + CUfunction_attribute::CU_FUNC_ATTRIBUTE_PTX_VERSION + | CUfunction_attribute::CU_FUNC_ATTRIBUTE_BINARY_VERSION => { return hipError_t::ErrorNotSupported; } _ => {} diff --git a/zluda/src/impl/kernel.rs b/zluda/src/impl/kernel.rs index ab45b04..e4c3404 100644 --- a/zluda/src/impl/kernel.rs +++ b/zluda/src/impl/kernel.rs @@ -1,4 +1,4 @@ -use cuda_types::cuda::CUresult; +use cuda_types::cuda::{CUfunction_attribute, CUresult}; use hip_runtime_sys::*; use crate::r#impl::function; @@ -9,7 +9,7 @@ pub(crate) unsafe fn get_function(func: &mut hipFunction_t, kernel: hipFunction_ } pub(crate) unsafe fn set_attribute( - attrib: hipFunction_attribute, + attrib: CUfunction_attribute, val: ::core::ffi::c_int, kernel: hipFunction_t, _dev: hipDevice_t, diff --git a/zluda_common/src/lib.rs b/zluda_common/src/lib.rs index 4f8aef7..4c76ef1 100644 --- a/zluda_common/src/lib.rs +++ b/zluda_common/src/lib.rs @@ -173,12 +173,12 @@ from_cuda_nop!( cublasLtMatmulDescAttributes_t, CUmemAllocationGranularity_flags, CUmemAllocationProp, - CUresult + CUresult, + CUfunction_attribute ); from_cuda_transmute!( CUuuid => hipUUID, CUfunction => hipFunction_t, - CUfunction_attribute => hipFunction_attribute, CUstream => hipStream_t, CUpointer_attribute => hipPointer_attribute, CUdeviceptr_v2 => hipDeviceptr_t,