From 5307412337bac585786773540df7ce4c55bfbc74 Mon Sep 17 00:00:00 2001 From: Violet Date: Fri, 12 Sep 2025 18:51:33 +0000 Subject: [PATCH] Disable wave mode IEEE --- ptx/src/pass/llvm/emit.rs | 1 + ptx/src/test/ll/min.ll | 2 +- ptx/src/test/ll/min_f16.ll | 2 +- ptx/src/test/ll/min_nan_f16.ll | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ptx/src/pass/llvm/emit.rs b/ptx/src/pass/llvm/emit.rs index 345d7fd..6da96ed 100644 --- a/ptx/src/pass/llvm/emit.rs +++ b/ptx/src/pass/llvm/emit.rs @@ -149,6 +149,7 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> { llvm_ftz(method.flush_to_zero_f16f64), ); } + self.emit_fn_attribute(fn_, "amdgpu-ieee", "false"); for (i, param) in method.input_arguments.iter().enumerate() { let value = unsafe { LLVMGetParam(fn_, i as u32) }; let name = self.resolver.get_or_add(param.name); diff --git a/ptx/src/test/ll/min.ll b/ptx/src/test/ll/min.ll index 70b678e..f7207b6 100644 --- a/ptx/src/test/ll/min.ll +++ b/ptx/src/test/ll/min.ll @@ -36,5 +36,5 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/min_f16.ll b/ptx/src/test/ll/min_f16.ll index d5e4f92..cdfc46c 100644 --- a/ptx/src/test/ll/min_f16.ll +++ b/ptx/src/test/ll/min_f16.ll @@ -39,5 +39,5 @@ define amdgpu_kernel void @min_f16(ptr addrspace(4) byref(i64) %"36", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare half @llvm.minnum.f16(half, half) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/ptx/src/test/ll/min_nan_f16.ll b/ptx/src/test/ll/min_nan_f16.ll index 0ffd981..90b4780 100644 --- a/ptx/src/test/ll/min_nan_f16.ll +++ b/ptx/src/test/ll/min_nan_f16.ll @@ -41,5 +41,5 @@ define amdgpu_kernel void @min_nan_f16(ptr addrspace(4) byref(i64) %"36", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare half @llvm.minnum.f16(half, half) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }