From ea60c6a249141a958670ee48e772f2ceef43a6ea Mon Sep 17 00:00:00 2001 From: Violet Date: Fri, 12 Sep 2025 21:06:41 +0000 Subject: [PATCH] Fixes --- ptx/src/pass/llvm/emit.rs | 10 +++------- ptx/src/test/ll/fmax.ll | 4 ++-- ptx/src/test/ll/max.ll | 4 ++-- ptx/src/test/ll/min.ll | 4 ++-- ptx/src/test/ll/min_f16.ll | 6 +++--- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ptx/src/pass/llvm/emit.rs b/ptx/src/pass/llvm/emit.rs index 2f39678..6ac676e 100644 --- a/ptx/src/pass/llvm/emit.rs +++ b/ptx/src/pass/llvm/emit.rs @@ -2305,8 +2305,6 @@ impl<'a> MethodEmitContext<'a> { ) }); } else { - let name = self.resolver.get_or_add(arguments.dst); - unsafe { LLVMSetValueName2(min, name.as_ptr().cast(), name.len()) }; self.resolver.register(arguments.dst, min); } Ok(()) @@ -2328,7 +2326,7 @@ impl<'a> MethodEmitContext<'a> { let a = self.resolver.value(arguments.src1)?; let b = self.resolver.value(arguments.src2)?; - let min = self.emit_intrinsic( + let max = self.emit_intrinsic( unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) }, None, Some(&data.type_().into()), @@ -2353,14 +2351,12 @@ impl<'a> MethodEmitContext<'a> { self.builder, is_nan, LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN), - min, + max, dst, ) }); } else { - let name = self.resolver.get_or_add(arguments.dst); - unsafe { LLVMSetValueName2(min, name.as_ptr().cast(), name.len()) }; - self.resolver.register(arguments.dst, min); + self.resolver.register(arguments.dst, max); } Ok(()) } diff --git a/ptx/src/test/ll/fmax.ll b/ptx/src/test/ll/fmax.ll index e7854dc..546ed5d 100644 --- a/ptx/src/test/ll/fmax.ll +++ b/ptx/src/test/ll/fmax.ll @@ -28,8 +28,8 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"38", ptr addrspace store half %"51", ptr addrspace(5) %"43", align 2 %"53" = load half, ptr addrspace(5) %"43", align 2 %"54" = load half, ptr addrspace(5) %"42", align 2 - %"52" = call half @llvm.maxnum.f16(half %"53", half %"54") - store half %"52", ptr addrspace(5) %"44", align 2 + %2 = call half @llvm.maxnum.f16(half %"53", half %"54") + store half %2, ptr addrspace(5) %"44", align 2 %"55" = load i64, ptr addrspace(5) %"41", align 8 %"56" = load half, ptr addrspace(5) %"44", align 2 %"61" = inttoptr i64 %"55" to ptr diff --git a/ptx/src/test/ll/max.ll b/ptx/src/test/ll/max.ll index 88a9b60..f7d6e65 100644 --- a/ptx/src/test/ll/max.ll +++ b/ptx/src/test/ll/max.ll @@ -24,8 +24,8 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"36", ptr addrspace( store i32 %"47", ptr addrspace(5) %"41", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4 %"50" = load i32, ptr addrspace(5) %"41", align 4 - %"48" = call i32 @llvm.smax.i32(i32 %"49", i32 %"50") - store i32 %"48", ptr addrspace(5) %"40", align 4 + %2 = call i32 @llvm.smax.i32(i32 %"49", i32 %"50") + store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load i32, ptr addrspace(5) %"40", align 4 %"55" = inttoptr i64 %"51" to ptr diff --git a/ptx/src/test/ll/min.ll b/ptx/src/test/ll/min.ll index f7207b6..1c276ab 100644 --- a/ptx/src/test/ll/min.ll +++ b/ptx/src/test/ll/min.ll @@ -24,8 +24,8 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace( store i32 %"47", ptr addrspace(5) %"41", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4 %"50" = load i32, ptr addrspace(5) %"41", align 4 - %"48" = call i32 @llvm.smin.i32(i32 %"49", i32 %"50") - store i32 %"48", ptr addrspace(5) %"40", align 4 + %2 = call i32 @llvm.smin.i32(i32 %"49", i32 %"50") + store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load i32, ptr addrspace(5) %"40", align 4 %"55" = inttoptr i64 %"51" to ptr diff --git a/ptx/src/test/ll/min_f16.ll b/ptx/src/test/ll/min_f16.ll index cdfc46c..f0c7c4d 100644 --- a/ptx/src/test/ll/min_f16.ll +++ b/ptx/src/test/ll/min_f16.ll @@ -26,8 +26,8 @@ define amdgpu_kernel void @min_f16(ptr addrspace(4) byref(i64) %"36", ptr addrsp store half %"47", ptr addrspace(5) %"41", align 2 %"49" = load half, ptr addrspace(5) %"40", align 2 %"50" = load half, ptr addrspace(5) %"41", align 2 - %"48" = call half @llvm.minnum.f16(half %"49", half %"50") - store half %"48", ptr addrspace(5) %"40", align 2 + %2 = call half @llvm.minnum.f16(half %"49", half %"50") + store half %2, ptr addrspace(5) %"40", align 2 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load half, ptr addrspace(5) %"40", align 2 %"57" = inttoptr i64 %"51" to ptr @@ -40,4 +40,4 @@ define amdgpu_kernel void @min_f16(ptr addrspace(4) byref(i64) %"36", ptr addrsp declare half @llvm.minnum.f16(half, half) #1 attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file