diff --git a/ptx/src/pass/llvm/emit.rs b/ptx/src/pass/llvm/emit.rs index 8bcf9e1..c811a53 100644 --- a/ptx/src/pass/llvm/emit.rs +++ b/ptx/src/pass/llvm/emit.rs @@ -149,6 +149,7 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> { llvm_ftz(method.flush_to_zero_f16f64), ); } + self.emit_fn_attribute(fn_, "amdgpu-ieee", "false"); for (i, param) in method.input_arguments.iter().enumerate() { let value = unsafe { LLVMGetParam(fn_, i as u32) }; let name = self.resolver.get_or_add(param.name); @@ -2266,22 +2267,46 @@ impl<'a> MethodEmitContext<'a> { let llvm_prefix = match data { ptx_parser::MinMaxDetails::Signed(..) => "llvm.smin", ptx_parser::MinMaxDetails::Unsigned(..) => "llvm.umin", - ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => { - "llvm.minimum" - } ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.minnum", }; let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_())); let llvm_type = get_scalar_type(self.context, data.type_()); - self.emit_intrinsic( + + let a = self.resolver.value(arguments.src1)?; + let b = self.resolver.value(arguments.src2)?; + + let min = self.emit_intrinsic( unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) }, - Some(arguments.dst), + None, Some(&data.type_().into()), - vec![ - (self.resolver.value(arguments.src1)?, llvm_type), - (self.resolver.value(arguments.src2)?, llvm_type), - ], + vec![(a, llvm_type), (b, llvm_type)], )?; + + if let ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { + nan: true, type_, .. + }) = data + { + let is_nan = unsafe { + LLVMBuildFCmp( + self.builder, + LLVMRealPredicate::LLVMRealUNO, + a, + b, + LLVM_UNNAMED.as_ptr(), + ) + }; + self.resolver.with_result(arguments.dst, |dst| unsafe { + LLVMBuildSelect( + self.builder, + is_nan, + LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN), + min, + dst, + ) + }); + } else { + self.resolver.register(arguments.dst, min); + } Ok(()) } @@ -2293,22 +2318,46 @@ impl<'a> MethodEmitContext<'a> { let llvm_prefix = match data { ptx_parser::MinMaxDetails::Signed(..) => "llvm.smax", ptx_parser::MinMaxDetails::Unsigned(..) => "llvm.umax", - ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => { - "llvm.maximum" - } ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.maxnum", }; let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_())); let llvm_type = get_scalar_type(self.context, data.type_()); - self.emit_intrinsic( + + let a = self.resolver.value(arguments.src1)?; + let b = self.resolver.value(arguments.src2)?; + + let max = self.emit_intrinsic( unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) }, - Some(arguments.dst), + None, Some(&data.type_().into()), - vec![ - (self.resolver.value(arguments.src1)?, llvm_type), - (self.resolver.value(arguments.src2)?, llvm_type), - ], + vec![(a, llvm_type), (b, llvm_type)], )?; + + if let ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { + nan: true, type_, .. + }) = data + { + let is_nan = unsafe { + LLVMBuildFCmp( + self.builder, + LLVMRealPredicate::LLVMRealUNO, + a, + b, + LLVM_UNNAMED.as_ptr(), + ) + }; + self.resolver.with_result(arguments.dst, |dst| unsafe { + LLVMBuildSelect( + self.builder, + is_nan, + LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN), + max, + dst, + ) + }); + } else { + self.resolver.register(arguments.dst, max); + } Ok(()) } diff --git a/ptx/src/test/ll/abs.ll b/ptx/src/test/ll/abs.ll index ce76c4a..dda3cb5 100644 --- a/ptx/src/test/ll/abs.ll +++ b/ptx/src/test/ll/abs.ll @@ -30,5 +30,5 @@ define amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.abs.i32(i32, i1 immarg) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/activemask.ll b/ptx/src/test/ll/activemask.ll index 9378005..267d929 100644 --- a/ptx/src/test/ll/activemask.ll +++ b/ptx/src/test/ll/activemask.ll @@ -20,5 +20,5 @@ define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"32", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/add.ll b/ptx/src/test/ll/add.ll index 486c349..6df1984 100644 --- a/ptx/src/test/ll/add.ll +++ b/ptx/src/test/ll/add.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"35", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/add_non_coherent.ll b/ptx/src/test/ll/add_non_coherent.ll index 479df65..0cc3e92 100644 --- a/ptx/src/test/ll/add_non_coherent.ll +++ b/ptx/src/test/ll/add_non_coherent.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"35", p ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/add_s32_sat.ll b/ptx/src/test/ll/add_s32_sat.ll index ff6e26f..69ff9db 100644 --- a/ptx/src/test/ll/add_s32_sat.ll +++ b/ptx/src/test/ll/add_s32_sat.ll @@ -47,5 +47,5 @@ define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"40", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.sadd.sat.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/add_tuning.ll b/ptx/src/test/ll/add_tuning.ll index 7119903..51e6d61 100644 --- a/ptx/src/test/ll/add_tuning.ll +++ b/ptx/src/test/ll/add_tuning.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"35", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/and.ll b/ptx/src/test/ll/and.ll index 145458b..48d1ac4 100644 --- a/ptx/src/test/ll/and.ll +++ b/ptx/src/test/ll/and.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/assertfail.ll b/ptx/src/test/ll/assertfail.ll index c168cbd..4399e92 100644 --- a/ptx/src/test/ll/assertfail.ll +++ b/ptx/src/test/ll/assertfail.ll @@ -60,5 +60,5 @@ define amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"89", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_add.ll b/ptx/src/test/ll/atom_add.ll index 117cda9..ee87cad 100644 --- a/ptx/src/test/ll/atom_add.ll +++ b/ptx/src/test/ll/atom_add.ll @@ -43,4 +43,4 @@ define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"39", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_add_float.ll b/ptx/src/test/ll/atom_add_float.ll index 587689c..e261e78 100644 --- a/ptx/src/test/ll/atom_add_float.ll +++ b/ptx/src/test/ll/atom_add_float.ll @@ -43,4 +43,4 @@ define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"39", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_cas.ll b/ptx/src/test/ll/atom_cas.ll index 35dcfc2..78593d9 100644 --- a/ptx/src/test/ll/atom_cas.ll +++ b/ptx/src/test/ll/atom_cas.ll @@ -41,4 +41,4 @@ define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"41", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_inc.ll b/ptx/src/test/ll/atom_inc.ll index 6c35f3f..f63e224 100644 --- a/ptx/src/test/ll/atom_inc.ll +++ b/ptx/src/test/ll/atom_inc.ll @@ -43,4 +43,4 @@ define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"41", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/b64tof64.ll b/ptx/src/test/ll/b64tof64.ll index bcc972a..373aaf8 100644 --- a/ptx/src/test/ll/b64tof64.ll +++ b/ptx/src/test/ll/b64tof64.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"34", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bar_red_and_pred.ll b/ptx/src/test/ll/bar_red_and_pred.ll index efebbe3..8a0ab36 100644 --- a/ptx/src/test/ll/bar_red_and_pred.ll +++ b/ptx/src/test/ll/bar_red_and_pred.ll @@ -117,5 +117,5 @@ define amdgpu_kernel void @bar_red_and_pred(ptr addrspace(4) byref(i64) %"76", p ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bfe.ll b/ptx/src/test/ll/bfe.ll index b262629..c60c67f 100644 --- a/ptx/src/test/ll/bfe.ll +++ b/ptx/src/test/ll/bfe.ll @@ -42,5 +42,5 @@ define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"39", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bfi.ll b/ptx/src/test/ll/bfi.ll index 827a637..50cb144 100644 --- a/ptx/src/test/ll/bfi.ll +++ b/ptx/src/test/ll/bfi.ll @@ -49,5 +49,5 @@ define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"42", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/block.ll b/ptx/src/test/ll/block.ll index 27465f2..89ab97c 100644 --- a/ptx/src/test/ll/block.ll +++ b/ptx/src/test/ll/block.ll @@ -31,4 +31,4 @@ define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"37", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bra.ll b/ptx/src/test/ll/bra.ll index 90b186c..16d4636 100644 --- a/ptx/src/test/ll/bra.ll +++ b/ptx/src/test/ll/bra.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"39", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/brev.ll b/ptx/src/test/ll/brev.ll index 21a9dc6..31027a5 100644 --- a/ptx/src/test/ll/brev.ll +++ b/ptx/src/test/ll/brev.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"33", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.bitreverse.i32(i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/call.ll b/ptx/src/test/ll/call.ll index 2b19d1d..26f22a8 100644 --- a/ptx/src/test/ll/call.ll +++ b/ptx/src/test/ll/call.ll @@ -60,5 +60,5 @@ define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"51", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/call_rnd.ll b/ptx/src/test/ll/call_rnd.ll index dcc0458..0772498 100644 --- a/ptx/src/test/ll/call_rnd.ll +++ b/ptx/src/test/ll/call_rnd.ll @@ -150,6 +150,6 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"95", ptr addrs ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #2 = { nocallback nofree nosync nounwind willreturn } \ No newline at end of file diff --git a/ptx/src/test/ll/clz.ll b/ptx/src/test/ll/clz.ll index 9cc90ab..56d1fd6 100644 --- a/ptx/src/test/ll/clz.ll +++ b/ptx/src/test/ll/clz.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.ctlz.i32(i32, i1 immarg) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/const.ll b/ptx/src/test/ll/const.ll index 69d877a..0049b11 100644 --- a/ptx/src/test/ll/const.ll +++ b/ptx/src/test/ll/const.ll @@ -47,4 +47,4 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"49", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/const_ident.ll b/ptx/src/test/ll/const_ident.ll index c927ef8..a3f349e 100644 --- a/ptx/src/test/ll/const_ident.ll +++ b/ptx/src/test/ll/const_ident.ll @@ -1,6 +1,6 @@ @x = addrspace(4) global i64 1 @y = addrspace(4) global [4 x i64] [i64 4, i64 5, i64 6, i64 0] -@constparams = addrspace(4) global [4 x i64] [i64 ptrtoint (ptr addrspace(4) @x to i64), i64 ptrtoint (ptr addrspace(4) @y to i64)] +@constparams = addrspace(4) global [2 x i64] [i64 ptrtoint (ptr addrspace(4) @x to i64), i64 ptrtoint (ptr addrspace(4) @y to i64)] define amdgpu_kernel void @const_ident(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #0 { %"51" = alloca i64, align 8, addrspace(5) @@ -52,4 +52,4 @@ define amdgpu_kernel void @const_ident(ptr addrspace(4) byref(i64) %"49", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/constant_f32.ll b/ptx/src/test/ll/constant_f32.ll index 416d390..2e2cbf1 100644 --- a/ptx/src/test/ll/constant_f32.ll +++ b/ptx/src/test/ll/constant_f32.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"34", ptr a ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/constant_negative.ll b/ptx/src/test/ll/constant_negative.ll index f835eb0..b369192 100644 --- a/ptx/src/test/ll/constant_negative.ll +++ b/ptx/src/test/ll/constant_negative.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"34", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cos.ll b/ptx/src/test/ll/cos.ll index 258860b..d77fefe 100644 --- a/ptx/src/test/ll/cos.ll +++ b/ptx/src/test/ll/cos.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.cos.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cp_async.ll b/ptx/src/test/ll/cp_async.ll index 757e81c..24da789 100644 --- a/ptx/src/test/ll/cp_async.ll +++ b/ptx/src/test/ll/cp_async.ll @@ -51,4 +51,4 @@ define amdgpu_kernel void @cp_async(ptr addrspace(4) byref(i64) %"51", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_f64_f32.ll b/ptx/src/test/ll/cvt_f64_f32.ll index 149e004..7c8027f 100644 --- a/ptx/src/test/ll/cvt_f64_f32.ll +++ b/ptx/src/test/ll/cvt_f64_f32.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"34", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll b/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll index 1e19037..fb3f353 100644 --- a/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll +++ b/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll @@ -38,4 +38,4 @@ define amdgpu_kernel void @cvt_rn_bf16x2_f32(ptr addrspace(4) byref(i64) %"37", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll b/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll index 9feec9b..8c647c6 100644 --- a/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll +++ b/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @cvt_rn_f16x2_e4m3x2(ptr addrspace(4) byref(i64) %"34" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll b/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll index 35c7a19..81aeb53 100644 --- a/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll +++ b/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @cvt_rn_f16x2_e5m2x2(ptr addrspace(4) byref(i64) %"34" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll b/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll index 5438f43..6747e22 100644 --- a/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll +++ b/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll @@ -36,5 +36,5 @@ define amdgpu_kernel void @cvt_rn_satfinite_e4m3x2_f32(ptr addrspace(4) byref(i6 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll b/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll index 01a465c..8825542 100644 --- a/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll +++ b/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll @@ -36,5 +36,5 @@ define amdgpu_kernel void @cvt_rn_satfinite_e5m2x2_f32(ptr addrspace(4) byref(i6 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rni.ll b/ptx/src/test/ll/cvt_rni.ll index fc79b9a..d8bd390 100644 --- a/ptx/src/test/ll/cvt_rni.ll +++ b/ptx/src/test/ll/cvt_rni.ll @@ -43,5 +43,5 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"38", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.roundeven.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rni_u16_f32.ll b/ptx/src/test/ll/cvt_rni_u16_f32.ll index 867488a..14191d0 100644 --- a/ptx/src/test/ll/cvt_rni_u16_f32.ll +++ b/ptx/src/test/ll/cvt_rni_u16_f32.ll @@ -34,5 +34,5 @@ declare float @llvm.roundeven.f32(float) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i16 @llvm.fptoui.sat.i16.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rzi.ll b/ptx/src/test/ll/cvt_rzi.ll index 7fb730b..77a31e7 100644 --- a/ptx/src/test/ll/cvt_rzi.ll +++ b/ptx/src/test/ll/cvt_rzi.ll @@ -43,5 +43,5 @@ define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"38", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.trunc.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_s16_s8.ll b/ptx/src/test/ll/cvt_s16_s8.ll index fbccbb9..65d9486 100644 --- a/ptx/src/test/ll/cvt_s16_s8.ll +++ b/ptx/src/test/ll/cvt_s16_s8.ll @@ -29,4 +29,4 @@ define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"34", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_s32_f32.ll b/ptx/src/test/ll/cvt_s32_f32.ll index 608e485..e6c3301 100644 --- a/ptx/src/test/ll/cvt_s32_f32.ll +++ b/ptx/src/test/ll/cvt_s32_f32.ll @@ -51,5 +51,5 @@ define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"38", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.ceil.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_s64_s32.ll b/ptx/src/test/ll/cvt_s64_s32.ll index b9e402f..f2eba44 100644 --- a/ptx/src/test/ll/cvt_s64_s32.ll +++ b/ptx/src/test/ll/cvt_s64_s32.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"34", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_sat_s_u.ll b/ptx/src/test/ll/cvt_sat_s_u.ll index c3cedfb..286d67f 100644 --- a/ptx/src/test/ll/cvt_sat_s_u.ll +++ b/ptx/src/test/ll/cvt_sat_s_u.ll @@ -37,5 +37,5 @@ declare i32 @llvm.smax.i32(i32, i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvta.ll b/ptx/src/test/ll/cvta.ll index 05902c1..a50af47 100644 --- a/ptx/src/test/ll/cvta.ll +++ b/ptx/src/test/ll/cvta.ll @@ -31,4 +31,4 @@ define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"33", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/div_approx.ll b/ptx/src/test/ll/div_approx.ll index 9015cf5..3efdd9d 100644 --- a/ptx/src/test/ll/div_approx.ll +++ b/ptx/src/test/ll/div_approx.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"36", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/div_ftz.ll b/ptx/src/test/ll/div_ftz.ll index f85b107..b66d894 100644 --- a/ptx/src/test/ll/div_ftz.ll +++ b/ptx/src/test/ll/div_ftz.ll @@ -69,6 +69,6 @@ define amdgpu_kernel void @div_ftz(ptr addrspace(4) byref(i64) %"66", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #2 = { nocallback nofree nosync nounwind willreturn } \ No newline at end of file diff --git a/ptx/src/test/ll/div_noftz.ll b/ptx/src/test/ll/div_noftz.ll index 8389f3f..8dd302c 100644 --- a/ptx/src/test/ll/div_noftz.ll +++ b/ptx/src/test/ll/div_noftz.ll @@ -66,6 +66,6 @@ define amdgpu_kernel void @div_noftz(ptr addrspace(4) byref(i64) %"65", ptr addr ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #2 = { nocallback nofree nosync nounwind willreturn } \ No newline at end of file diff --git a/ptx/src/test/ll/dp4a.ll b/ptx/src/test/ll/dp4a.ll index 19439b9..58ea314 100644 --- a/ptx/src/test/ll/dp4a.ll +++ b/ptx/src/test/ll/dp4a.ll @@ -44,5 +44,5 @@ define amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"40", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1 immarg) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/ex2.ll b/ptx/src/test/ll/ex2.ll index 1202f1e..c1ebec6 100644 --- a/ptx/src/test/ll/ex2.ll +++ b/ptx/src/test/ll/ex2.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_func.ll b/ptx/src/test/ll/extern_func.ll index a21fb6d..d5f6aa0 100644 --- a/ptx/src/test/ll/extern_func.ll +++ b/ptx/src/test/ll/extern_func.ll @@ -39,5 +39,5 @@ define amdgpu_kernel void @extern_func(ptr addrspace(4) byref(i64) %"47", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_shared.ll b/ptx/src/test/ll/extern_shared.ll index e92d592..12e426a 100644 --- a/ptx/src/test/ll/extern_shared.ll +++ b/ptx/src/test/ll/extern_shared.ll @@ -29,4 +29,4 @@ define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"34", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_shared_call.ll b/ptx/src/test/ll/extern_shared_call.ll index 28e3989..4c826f5 100644 --- a/ptx/src/test/ll/extern_shared_call.ll +++ b/ptx/src/test/ll/extern_shared_call.ll @@ -51,5 +51,5 @@ define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"44", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/fma.ll b/ptx/src/test/ll/fma.ll index 780017b..254bfce 100644 --- a/ptx/src/test/ll/fma.ll +++ b/ptx/src/test/ll/fma.ll @@ -43,5 +43,5 @@ define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"39", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.fma.f32(float, float, float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/fma_bf16x2.ll b/ptx/src/test/ll/fma_bf16x2.ll index ff7a638..b3cc521 100644 --- a/ptx/src/test/ll/fma_bf16x2.ll +++ b/ptx/src/test/ll/fma_bf16x2.ll @@ -47,5 +47,5 @@ define amdgpu_kernel void @fma_bf16x2(ptr addrspace(4) byref(i64) %"39", ptr add ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/fmax.ll b/ptx/src/test/ll/fmax.ll index dbaa4ed..546ed5d 100644 --- a/ptx/src/test/ll/fmax.ll +++ b/ptx/src/test/ll/fmax.ll @@ -28,8 +28,8 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"38", ptr addrspace store half %"51", ptr addrspace(5) %"43", align 2 %"53" = load half, ptr addrspace(5) %"43", align 2 %"54" = load half, ptr addrspace(5) %"42", align 2 - %"52" = call half @llvm.maxnum.f16(half %"53", half %"54") - store half %"52", ptr addrspace(5) %"44", align 2 + %2 = call half @llvm.maxnum.f16(half %"53", half %"54") + store half %2, ptr addrspace(5) %"44", align 2 %"55" = load i64, ptr addrspace(5) %"41", align 8 %"56" = load half, ptr addrspace(5) %"44", align 2 %"61" = inttoptr i64 %"55" to ptr @@ -41,5 +41,5 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"38", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare half @llvm.maxnum.f16(half, half) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/global_array.ll b/ptx/src/test/ll/global_array.ll index d0f3cb9..68e3a66 100644 --- a/ptx/src/test/ll/global_array.ll +++ b/ptx/src/test/ll/global_array.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"34", ptr a ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/global_array_f32.ll b/ptx/src/test/ll/global_array_f32.ll index 201a754..79cf5f8 100644 --- a/ptx/src/test/ll/global_array_f32.ll +++ b/ptx/src/test/ll/global_array_f32.ll @@ -25,4 +25,4 @@ define amdgpu_kernel void @global_array_f32(ptr addrspace(4) byref(i64) %"36", p ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/lanemask_lt.ll b/ptx/src/test/ll/lanemask_lt.ll index 5c966e4..912752f 100644 --- a/ptx/src/test/ll/lanemask_lt.ll +++ b/ptx/src/test/ll/lanemask_lt.ll @@ -39,5 +39,5 @@ define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"39", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ld_st.ll b/ptx/src/test/ll/ld_st.ll index cadcb61..07c0e56 100644 --- a/ptx/src/test/ll/ld_st.ll +++ b/ptx/src/test/ll/ld_st.ll @@ -23,4 +23,4 @@ define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"33", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ld_st_implicit.ll b/ptx/src/test/ll/ld_st_implicit.ll index 9fb584a..00db109 100644 --- a/ptx/src/test/ll/ld_st_implicit.ll +++ b/ptx/src/test/ll/ld_st_implicit.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"34", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ld_st_offset.ll b/ptx/src/test/ll/ld_st_offset.ll index 7b701e8..483e822 100644 --- a/ptx/src/test/ll/ld_st_offset.ll +++ b/ptx/src/test/ll/ld_st_offset.ll @@ -34,4 +34,4 @@ define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"38", ptr a ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ldmatrix.ll b/ptx/src/test/ll/ldmatrix.ll index f5687ae..3004ca6 100644 --- a/ptx/src/test/ll/ldmatrix.ll +++ b/ptx/src/test/ll/ldmatrix.ll @@ -95,5 +95,5 @@ define amdgpu_kernel void @ldmatrix(ptr addrspace(4) byref(i64) %"55") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ldmatrix_trans.ll b/ptx/src/test/ll/ldmatrix_trans.ll index 2552140..b6350b9 100644 --- a/ptx/src/test/ll/ldmatrix_trans.ll +++ b/ptx/src/test/ll/ldmatrix_trans.ll @@ -160,5 +160,5 @@ define amdgpu_kernel void @ldmatrix_trans(ptr addrspace(4) byref(i64) %"86") #1 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/lg2.ll b/ptx/src/test/ll/lg2.ll index ee6d85b..f918f0c 100644 --- a/ptx/src/test/ll/lg2.ll +++ b/ptx/src/test/ll/lg2.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/local_align.ll b/ptx/src/test/ll/local_align.ll index a2ebd84..c51a99c 100644 --- a/ptx/src/test/ll/local_align.ll +++ b/ptx/src/test/ll/local_align.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"34", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mad_s32.ll b/ptx/src/test/ll/mad_s32.ll index 29f80ab..bb15154 100644 --- a/ptx/src/test/ll/mad_s32.ll +++ b/ptx/src/test/ll/mad_s32.ll @@ -42,4 +42,4 @@ define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"40", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mad_wide.ll b/ptx/src/test/ll/mad_wide.ll index 8a13a2e..cc0422d 100644 --- a/ptx/src/test/ll/mad_wide.ll +++ b/ptx/src/test/ll/mad_wide.ll @@ -44,4 +44,4 @@ define amdgpu_kernel void @mad_wide(ptr addrspace(4) byref(i64) %"40", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/malformed_label.ll b/ptx/src/test/ll/malformed_label.ll index b088b0e..0383f4c 100644 --- a/ptx/src/test/ll/malformed_label.ll +++ b/ptx/src/test/ll/malformed_label.ll @@ -30,4 +30,4 @@ define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"37", pt ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/max.ll b/ptx/src/test/ll/max.ll index 8f10384..f7d6e65 100644 --- a/ptx/src/test/ll/max.ll +++ b/ptx/src/test/ll/max.ll @@ -24,8 +24,8 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"36", ptr addrspace( store i32 %"47", ptr addrspace(5) %"41", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4 %"50" = load i32, ptr addrspace(5) %"41", align 4 - %"48" = call i32 @llvm.smax.i32(i32 %"49", i32 %"50") - store i32 %"48", ptr addrspace(5) %"40", align 4 + %2 = call i32 @llvm.smax.i32(i32 %"49", i32 %"50") + store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load i32, ptr addrspace(5) %"40", align 4 %"55" = inttoptr i64 %"51" to ptr @@ -36,5 +36,5 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/membar.ll b/ptx/src/test/ll/membar.ll index 52c7718..7256fd3 100644 --- a/ptx/src/test/ll/membar.ll +++ b/ptx/src/test/ll/membar.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"33", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/min.ll b/ptx/src/test/ll/min.ll index 70b678e..1c276ab 100644 --- a/ptx/src/test/ll/min.ll +++ b/ptx/src/test/ll/min.ll @@ -24,8 +24,8 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace( store i32 %"47", ptr addrspace(5) %"41", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4 %"50" = load i32, ptr addrspace(5) %"41", align 4 - %"48" = call i32 @llvm.smin.i32(i32 %"49", i32 %"50") - store i32 %"48", ptr addrspace(5) %"40", align 4 + %2 = call i32 @llvm.smin.i32(i32 %"49", i32 %"50") + store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load i32, ptr addrspace(5) %"40", align 4 %"55" = inttoptr i64 %"51" to ptr @@ -36,5 +36,5 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/min_f16.ll b/ptx/src/test/ll/min_f16.ll new file mode 100644 index 0000000..f0c7c4d --- /dev/null +++ b/ptx/src/test/ll/min_f16.ll @@ -0,0 +1,43 @@ +define amdgpu_kernel void @min_f16(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { + %"38" = alloca i64, align 8, addrspace(5) + %"39" = alloca i64, align 8, addrspace(5) + %"40" = alloca half, align 2, addrspace(5) + %"41" = alloca half, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"35" + +"35": ; preds = %1 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 + %"54" = inttoptr i64 %"45" to ptr + %"53" = load i16, ptr %"54", align 2 + %"44" = bitcast i16 %"53" to half + store half %"44", ptr addrspace(5) %"40", align 2 + %"46" = load i64, ptr addrspace(5) %"38", align 8 + %"55" = inttoptr i64 %"46" to ptr + %"34" = getelementptr inbounds i8, ptr %"55", i64 2 + %"56" = load i16, ptr %"34", align 2 + %"47" = bitcast i16 %"56" to half + store half %"47", ptr addrspace(5) %"41", align 2 + %"49" = load half, ptr addrspace(5) %"40", align 2 + %"50" = load half, ptr addrspace(5) %"41", align 2 + %2 = call half @llvm.minnum.f16(half %"49", half %"50") + store half %2, ptr addrspace(5) %"40", align 2 + %"51" = load i64, ptr addrspace(5) %"39", align 8 + %"52" = load half, ptr addrspace(5) %"40", align 2 + %"57" = inttoptr i64 %"51" to ptr + %"58" = bitcast half %"52" to i16 + store i16 %"58", ptr %"57", align 2 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare half @llvm.minnum.f16(half, half) #1 + +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/min_nan_f16.ll b/ptx/src/test/ll/min_nan_f16.ll new file mode 100644 index 0000000..90b4780 --- /dev/null +++ b/ptx/src/test/ll/min_nan_f16.ll @@ -0,0 +1,45 @@ +define amdgpu_kernel void @min_nan_f16(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { + %"38" = alloca i64, align 8, addrspace(5) + %"39" = alloca i64, align 8, addrspace(5) + %"40" = alloca half, align 2, addrspace(5) + %"41" = alloca half, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"35" + +"35": ; preds = %1 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 + %"54" = inttoptr i64 %"45" to ptr + %"53" = load i16, ptr %"54", align 2 + %"44" = bitcast i16 %"53" to half + store half %"44", ptr addrspace(5) %"40", align 2 + %"46" = load i64, ptr addrspace(5) %"38", align 8 + %"55" = inttoptr i64 %"46" to ptr + %"34" = getelementptr inbounds i8, ptr %"55", i64 2 + %"56" = load i16, ptr %"34", align 2 + %"47" = bitcast i16 %"56" to half + store half %"47", ptr addrspace(5) %"41", align 2 + %"49" = load half, ptr addrspace(5) %"40", align 2 + %"50" = load half, ptr addrspace(5) %"41", align 2 + %2 = call half @llvm.minnum.f16(half %"49", half %"50") + %3 = fcmp uno half %"49", %"50" + %"48" = select i1 %3, half 0xH7E00, half %2 + store half %"48", ptr addrspace(5) %"40", align 2 + %"51" = load i64, ptr addrspace(5) %"39", align 8 + %"52" = load half, ptr addrspace(5) %"40", align 2 + %"57" = inttoptr i64 %"51" to ptr + %"58" = bitcast half %"52" to i16 + store i16 %"58", ptr %"57", align 2 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare half @llvm.minnum.f16(half, half) #1 + +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/ptx/src/test/ll/mov.ll b/ptx/src/test/ll/mov.ll index 0c39773..3500058 100644 --- a/ptx/src/test/ll/mov.ll +++ b/ptx/src/test/ll/mov.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mov_address.ll b/ptx/src/test/ll/mov_address.ll index da04c37..9ebb3a8 100644 --- a/ptx/src/test/ll/mov_address.ll +++ b/ptx/src/test/ll/mov_address.ll @@ -12,4 +12,4 @@ define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"32", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_hi_s32.ll b/ptx/src/test/ll/mul24_hi_s32.ll index 5b67718..5fd7924 100644 --- a/ptx/src/test/ll/mul24_hi_s32.ll +++ b/ptx/src/test/ll/mul24_hi_s32.ll @@ -42,5 +42,5 @@ declare i32 @llvm.amdgcn.mul.i24(i32, i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mulhi.i24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_hi_u32.ll b/ptx/src/test/ll/mul24_hi_u32.ll index 94018d8..efd0a9c 100644 --- a/ptx/src/test/ll/mul24_hi_u32.ll +++ b/ptx/src/test/ll/mul24_hi_u32.ll @@ -38,5 +38,5 @@ declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mulhi.u24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_lo_s32.ll b/ptx/src/test/ll/mul24_lo_s32.ll index bb4137e..734ee5c 100644 --- a/ptx/src/test/ll/mul24_lo_s32.ll +++ b/ptx/src/test/ll/mul24_lo_s32.ll @@ -35,5 +35,5 @@ define amdgpu_kernel void @mul24_lo_s32(ptr addrspace(4) byref(i64) %"35", ptr a ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mul.i24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_lo_u32.ll b/ptx/src/test/ll/mul24_lo_u32.ll index a2a0e6f..dfd56b1 100644 --- a/ptx/src/test/ll/mul24_lo_u32.ll +++ b/ptx/src/test/ll/mul24_lo_u32.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @mul24_lo_u32(ptr addrspace(4) byref(i64) %"34", ptr a ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_ftz.ll b/ptx/src/test/ll/mul_ftz.ll index da1ae40..fa169d9 100644 --- a/ptx/src/test/ll/mul_ftz.ll +++ b/ptx/src/test/ll/mul_ftz.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"36", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_hi.ll b/ptx/src/test/ll/mul_hi.ll index f019cfe..8e09715 100644 --- a/ptx/src/test/ll/mul_hi.ll +++ b/ptx/src/test/ll/mul_hi.ll @@ -30,4 +30,4 @@ define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"35", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_lo.ll b/ptx/src/test/ll/mul_lo.ll index 5909dc7..818aaa5 100644 --- a/ptx/src/test/ll/mul_lo.ll +++ b/ptx/src/test/ll/mul_lo.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"35", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_non_ftz.ll b/ptx/src/test/ll/mul_non_ftz.ll index 00ad221..9ecd39c 100644 --- a/ptx/src/test/ll/mul_non_ftz.ll +++ b/ptx/src/test/ll/mul_non_ftz.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"36", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_wide.ll b/ptx/src/test/ll/mul_wide.ll index 451e70a..654b204 100644 --- a/ptx/src/test/ll/mul_wide.ll +++ b/ptx/src/test/ll/mul_wide.ll @@ -36,4 +36,4 @@ define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"37", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/multiple_return.ll b/ptx/src/test/ll/multiple_return.ll index c265efa..75b1392 100644 --- a/ptx/src/test/ll/multiple_return.ll +++ b/ptx/src/test/ll/multiple_return.ll @@ -66,5 +66,5 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"53", pt ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/nanosleep.ll b/ptx/src/test/ll/nanosleep.ll index 6d75f05..bc59083 100644 --- a/ptx/src/test/ll/nanosleep.ll +++ b/ptx/src/test/ll/nanosleep.ll @@ -11,5 +11,5 @@ define amdgpu_kernel void @nanosleep(ptr addrspace(4) byref(i64) %"31", ptr addr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/neg.ll b/ptx/src/test/ll/neg.ll index e5c307a..97c0292 100644 --- a/ptx/src/test/ll/neg.ll +++ b/ptx/src/test/ll/neg.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/non_scalar_ptr_offset.ll b/ptx/src/test/ll/non_scalar_ptr_offset.ll index 00e65da..211ce80 100644 --- a/ptx/src/test/ll/non_scalar_ptr_offset.ll +++ b/ptx/src/test/ll/non_scalar_ptr_offset.ll @@ -32,4 +32,4 @@ define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"3 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/not.ll b/ptx/src/test/ll/not.ll index e8ead6e..879876e 100644 --- a/ptx/src/test/ll/not.ll +++ b/ptx/src/test/ll/not.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ntid.ll b/ptx/src/test/ll/ntid.ll index fa713ac..3a51e7f 100644 --- a/ptx/src/test/ll/ntid.ll +++ b/ptx/src/test/ll/ntid.ll @@ -35,5 +35,5 @@ define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"38", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/or.ll b/ptx/src/test/ll/or.ll index 9ee2e00..6acc96a 100644 --- a/ptx/src/test/ll/or.ll +++ b/ptx/src/test/ll/or.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/param_is_addressable.ll b/ptx/src/test/ll/param_is_addressable.ll index 6f75d5e..e9e9f8e 100644 --- a/ptx/src/test/ll/param_is_addressable.ll +++ b/ptx/src/test/ll/param_is_addressable.ll @@ -31,4 +31,4 @@ define amdgpu_kernel void @param_is_addressable(ptr addrspace(4) byref(i64) %"33 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/popc.ll b/ptx/src/test/ll/popc.ll index f60b5a0..0a89c1e 100644 --- a/ptx/src/test/ll/popc.ll +++ b/ptx/src/test/ll/popc.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"33", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.ctpop.i32(i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/pred_not.ll b/ptx/src/test/ll/pred_not.ll index 40930af..a111c4a 100644 --- a/ptx/src/test/ll/pred_not.ll +++ b/ptx/src/test/ll/pred_not.ll @@ -54,4 +54,4 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"44", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/prmt.ll b/ptx/src/test/ll/prmt.ll index 2b630b7..7753f5c 100644 --- a/ptx/src/test/ll/prmt.ll +++ b/ptx/src/test/ll/prmt.ll @@ -35,4 +35,4 @@ define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"36", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/rcp.ll b/ptx/src/test/ll/rcp.ll index 4b3cc99..91f453c 100644 --- a/ptx/src/test/ll/rcp.ll +++ b/ptx/src/test/ll/rcp.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/redux_sync_add_u32_partial.ll b/ptx/src/test/ll/redux_sync_add_u32_partial.ll index ab55c15..f17249b 100644 --- a/ptx/src/test/ll/redux_sync_add_u32_partial.ll +++ b/ptx/src/test/ll/redux_sync_add_u32_partial.ll @@ -54,5 +54,5 @@ define amdgpu_kernel void @redux_sync_add_u32_partial(ptr addrspace(4) byref(i64 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/redux_sync_op_s32.ll b/ptx/src/test/ll/redux_sync_op_s32.ll index a84624b..bea6e03 100644 --- a/ptx/src/test/ll/redux_sync_op_s32.ll +++ b/ptx/src/test/ll/redux_sync_op_s32.ll @@ -63,5 +63,5 @@ define amdgpu_kernel void @redux_sync_op_s32(ptr addrspace(4) byref(i64) %"46") ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/redux_sync_op_u32.ll b/ptx/src/test/ll/redux_sync_op_u32.ll index 3629939..36c2529 100644 --- a/ptx/src/test/ll/redux_sync_op_u32.ll +++ b/ptx/src/test/ll/redux_sync_op_u32.ll @@ -59,5 +59,5 @@ define amdgpu_kernel void @redux_sync_op_u32(ptr addrspace(4) byref(i64) %"44") ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/reg_local.ll b/ptx/src/test/ll/reg_local.ll index 6e011d5..8d6856a 100644 --- a/ptx/src/test/ll/reg_local.ll +++ b/ptx/src/test/ll/reg_local.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"40", ptr addr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/rem.ll b/ptx/src/test/ll/rem.ll index 07da66a..d63b53e 100644 --- a/ptx/src/test/ll/rem.ll +++ b/ptx/src/test/ll/rem.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/rsqrt.ll b/ptx/src/test/ll/rsqrt.ll index 727f82b..933b1a0 100644 --- a/ptx/src/test/ll/rsqrt.ll +++ b/ptx/src/test/ll/rsqrt.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"33", ptr addrspac ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare double @llvm.amdgcn.rsq.f64(double) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/selp.ll b/ptx/src/test/ll/selp.ll index 8c8f1d7..78f6e06 100644 --- a/ptx/src/test/ll/selp.ll +++ b/ptx/src/test/ll/selp.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"37", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/selp_true.ll b/ptx/src/test/ll/selp_true.ll index f50a208..4873cb3 100644 --- a/ptx/src/test/ll/selp_true.ll +++ b/ptx/src/test/ll/selp_true.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"37", ptr addr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp.ll b/ptx/src/test/ll/setp.ll index d82b3cc..c4835b4 100644 --- a/ptx/src/test/ll/setp.ll +++ b/ptx/src/test/ll/setp.ll @@ -51,4 +51,4 @@ define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"44", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_gt.ll b/ptx/src/test/ll/setp_gt.ll index d8eb311..54ffcdc 100644 --- a/ptx/src/test/ll/setp_gt.ll +++ b/ptx/src/test/ll/setp_gt.ll @@ -53,4 +53,4 @@ define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"42", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_leu.ll b/ptx/src/test/ll/setp_leu.ll index 4f6dd68..cca35ef 100644 --- a/ptx/src/test/ll/setp_leu.ll +++ b/ptx/src/test/ll/setp_leu.ll @@ -53,4 +53,4 @@ define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"42", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_nan.ll b/ptx/src/test/ll/setp_nan.ll index 82f5dbe..341e89c 100644 --- a/ptx/src/test/ll/setp_nan.ll +++ b/ptx/src/test/ll/setp_nan.ll @@ -162,4 +162,4 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"86", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_num.ll b/ptx/src/test/ll/setp_num.ll index e208314..f8b5995 100644 --- a/ptx/src/test/ll/setp_num.ll +++ b/ptx/src/test/ll/setp_num.ll @@ -162,4 +162,4 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"86", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_ptr_32.ll b/ptx/src/test/ll/shared_ptr_32.ll index 0a1602e..bc8e593 100644 --- a/ptx/src/test/ll/shared_ptr_32.ll +++ b/ptx/src/test/ll/shared_ptr_32.ll @@ -37,4 +37,4 @@ define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"38", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_ptr_take_address.ll b/ptx/src/test/ll/shared_ptr_take_address.ll index 20d1feb..87a0fd5 100644 --- a/ptx/src/test/ll/shared_ptr_take_address.ll +++ b/ptx/src/test/ll/shared_ptr_take_address.ll @@ -36,4 +36,4 @@ define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_unify_extern.ll b/ptx/src/test/ll/shared_unify_extern.ll index 271b4b8..968ee98 100644 --- a/ptx/src/test/ll/shared_unify_extern.ll +++ b/ptx/src/test/ll/shared_unify_extern.ll @@ -80,5 +80,5 @@ define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"59" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_unify_local.ll b/ptx/src/test/ll/shared_unify_local.ll index 65bbd91..143c308 100644 --- a/ptx/src/test/ll/shared_unify_local.ll +++ b/ptx/src/test/ll/shared_unify_local.ll @@ -77,5 +77,5 @@ define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"57", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_variable.ll b/ptx/src/test/ll/shared_variable.ll index 013d340..f7d59b5 100644 --- a/ptx/src/test/ll/shared_variable.ll +++ b/ptx/src/test/ll/shared_variable.ll @@ -30,4 +30,4 @@ define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"35", pt ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_l.ll b/ptx/src/test/ll/shf_l.ll index 3a80428..ad44191 100644 --- a/ptx/src/test/ll/shf_l.ll +++ b/ptx/src/test/ll/shf_l.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_l(ptr addrspace(4) byref(i64) %"40", ptr addrspac ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshl.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_l_clamp.ll b/ptx/src/test/ll/shf_l_clamp.ll index 62efed0..2f7c6d1 100644 --- a/ptx/src/test/ll/shf_l_clamp.ll +++ b/ptx/src/test/ll/shf_l_clamp.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_l_clamp(ptr addrspace(4) byref(i64) %"40", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshl.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_l_wrap.ll b/ptx/src/test/ll/shf_l_wrap.ll index cbf1f66..b71222c 100644 --- a/ptx/src/test/ll/shf_l_wrap.ll +++ b/ptx/src/test/ll/shf_l_wrap.ll @@ -44,5 +44,5 @@ define amdgpu_kernel void @shf_l_wrap(ptr addrspace(4) byref(i64) %"40", ptr add ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshl.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_r.ll b/ptx/src/test/ll/shf_r.ll index 1b794a7..aaf87ea 100644 --- a/ptx/src/test/ll/shf_r.ll +++ b/ptx/src/test/ll/shf_r.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_r(ptr addrspace(4) byref(i64) %"40", ptr addrspac ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshr.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_r_clamp.ll b/ptx/src/test/ll/shf_r_clamp.ll index 4c74049..5d3b886 100644 --- a/ptx/src/test/ll/shf_r_clamp.ll +++ b/ptx/src/test/ll/shf_r_clamp.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_r_clamp(ptr addrspace(4) byref(i64) %"40", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshr.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_r_wrap.ll b/ptx/src/test/ll/shf_r_wrap.ll index 5f3bb6a..371eaf9 100644 --- a/ptx/src/test/ll/shf_r_wrap.ll +++ b/ptx/src/test/ll/shf_r_wrap.ll @@ -44,5 +44,5 @@ define amdgpu_kernel void @shf_r_wrap(ptr addrspace(4) byref(i64) %"40", ptr add ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshr.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll b/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll index cede751..888abd0 100644 --- a/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_bfly_b32_pred(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_down_b32_pred.ll b/ptx/src/test/ll/shfl_sync_down_b32_pred.ll index d0b2664..aaf6d3e 100644 --- a/ptx/src/test/ll/shfl_sync_down_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_down_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_down_b32_pred(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll b/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll index 25afb27..c7661be 100644 --- a/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_idx_b32_pred(ptr addrspace(4) byref(i64) %" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_mode_b32.ll b/ptx/src/test/ll/shfl_sync_mode_b32.ll index d90f5fb..633a3dc 100644 --- a/ptx/src/test/ll/shfl_sync_mode_b32.ll +++ b/ptx/src/test/ll/shfl_sync_mode_b32.ll @@ -70,5 +70,5 @@ define amdgpu_kernel void @shfl_sync_mode_b32(ptr addrspace(4) byref(i64) %"51") ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_up_b32_pred.ll b/ptx/src/test/ll/shfl_sync_up_b32_pred.ll index c61b758..82f688f 100644 --- a/ptx/src/test/ll/shfl_sync_up_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_up_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_up_b32_pred(ptr addrspace(4) byref(i64) %"4 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shl.ll b/ptx/src/test/ll/shl.ll index 311c05b..df42cd6 100644 --- a/ptx/src/test/ll/shl.ll +++ b/ptx/src/test/ll/shl.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"35", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shr.ll b/ptx/src/test/ll/shr.ll index 8b84067..536b43d 100644 --- a/ptx/src/test/ll/shr.ll +++ b/ptx/src/test/ll/shr.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shr_oob.ll b/ptx/src/test/ll/shr_oob.ll index dac2dd1..45def22 100644 --- a/ptx/src/test/ll/shr_oob.ll +++ b/ptx/src/test/ll/shr_oob.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @shr_oob(ptr addrspace(4) byref(i64) %"34", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sign_extend.ll b/ptx/src/test/ll/sign_extend.ll index 9ebc818..2afda88 100644 --- a/ptx/src/test/ll/sign_extend.ll +++ b/ptx/src/test/ll/sign_extend.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"33", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sin.ll b/ptx/src/test/ll/sin.ll index 010a889..dae1aea 100644 --- a/ptx/src/test/ll/sin.ll +++ b/ptx/src/test/ll/sin.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.sin.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/sqrt.ll b/ptx/src/test/ll/sqrt.ll index e58b21a..5fb76ab 100644 --- a/ptx/src/test/ll/sqrt.ll +++ b/ptx/src/test/ll/sqrt.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"33", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sqrt_rn_ftz.ll b/ptx/src/test/ll/sqrt_rn_ftz.ll index d38c59d..b7fc7a6 100644 --- a/ptx/src/test/ll/sqrt_rn_ftz.ll +++ b/ptx/src/test/ll/sqrt_rn_ftz.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @sqrt_rn_ftz(ptr addrspace(4) byref(i64) %"33", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_ntid.ll b/ptx/src/test/ll/stateful_ld_st_ntid.ll index 22dbe50..0c8c203 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid.ll @@ -51,5 +51,5 @@ define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"39" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll b/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll index f5e1fc2..caa2b82 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll b/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll index 1e2bea6..4b3b0e8 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll @@ -57,5 +57,5 @@ define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_simple.ll b/ptx/src/test/ll/stateful_ld_st_simple.ll index 183aa78..affe0fc 100644 --- a/ptx/src/test/ll/stateful_ld_st_simple.ll +++ b/ptx/src/test/ll/stateful_ld_st_simple.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"3 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_neg_offset.ll b/ptx/src/test/ll/stateful_neg_offset.ll index 36e16cd..48668bf 100644 --- a/ptx/src/test/ll/stateful_neg_offset.ll +++ b/ptx/src/test/ll/stateful_neg_offset.ll @@ -42,4 +42,4 @@ define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"36" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sub.ll b/ptx/src/test/ll/sub.ll index 029efcd..ff991ab 100644 --- a/ptx/src/test/ll/sub.ll +++ b/ptx/src/test/ll/sub.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"35", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/tanh.ll b/ptx/src/test/ll/tanh.ll index a946370..b0c8663 100644 --- a/ptx/src/test/ll/tanh.ll +++ b/ptx/src/test/ll/tanh.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @tanh(ptr addrspace(4) byref(i64) %"33", ptr addrspace declare float @__ocml_tanh_f32(float) -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/tid.ll b/ptx/src/test/ll/tid.ll index a057e42..22d1c0f 100644 --- a/ptx/src/test/ll/tid.ll +++ b/ptx/src/test/ll/tid.ll @@ -35,5 +35,5 @@ define amdgpu_kernel void @tid(ptr addrspace(4) byref(i64) %"37") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/trap.ll b/ptx/src/test/ll/trap.ll index 97dc102..0639453 100644 --- a/ptx/src/test/ll/trap.ll +++ b/ptx/src/test/ll/trap.ll @@ -12,5 +12,5 @@ define amdgpu_kernel void @trap(ptr addrspace(4) byref(i64) %"30", ptr addrspace ; Function Attrs: cold noreturn nounwind declare void @llvm.trap() #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { cold noreturn nounwind } \ No newline at end of file diff --git a/ptx/src/test/ll/vector.ll b/ptx/src/test/ll/vector.ll index 126ce8d..ddd7f9a 100644 --- a/ptx/src/test/ll/vector.ll +++ b/ptx/src/test/ll/vector.ll @@ -73,5 +73,5 @@ define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"70", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vector4.ll b/ptx/src/test/ll/vector4.ll index 4cb7208..837530a 100644 --- a/ptx/src/test/ll/vector4.ll +++ b/ptx/src/test/ll/vector4.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"35", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vector_extract.ll b/ptx/src/test/ll/vector_extract.ll index fe73736..387ac00 100644 --- a/ptx/src/test/ll/vector_extract.ll +++ b/ptx/src/test/ll/vector_extract.ll @@ -83,4 +83,4 @@ define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"43", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vector_operand.ll b/ptx/src/test/ll/vector_operand.ll index 564d0da..af73ab5 100644 --- a/ptx/src/test/ll/vector_operand.ll +++ b/ptx/src/test/ll/vector_operand.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @vector_operand(ptr addrspace(4) byref(i64) %"36", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_all.ll b/ptx/src/test/ll/vote_all.ll index 175edb8..f5e28fe 100644 --- a/ptx/src/test/ll/vote_all.ll +++ b/ptx/src/test/ll/vote_all.ll @@ -62,5 +62,5 @@ define amdgpu_kernel void @vote_all(ptr addrspace(4) byref(i64) %"51") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_all_sub.ll b/ptx/src/test/ll/vote_all_sub.ll index 5b7007d..a10140b 100644 --- a/ptx/src/test/ll/vote_all_sub.ll +++ b/ptx/src/test/ll/vote_all_sub.ll @@ -61,5 +61,5 @@ define amdgpu_kernel void @vote_all_sub(ptr addrspace(4) byref(i64) %"53") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_any.ll b/ptx/src/test/ll/vote_any.ll index bc24522..8a4895a 100644 --- a/ptx/src/test/ll/vote_any.ll +++ b/ptx/src/test/ll/vote_any.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @vote_any(ptr addrspace(4) byref(i64) %"44") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_ballot.ll b/ptx/src/test/ll/vote_ballot.ll index 350d837..69f5836 100644 --- a/ptx/src/test/ll/vote_ballot.ll +++ b/ptx/src/test/ll/vote_ballot.ll @@ -42,5 +42,5 @@ define amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"41") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/warp_sz.ll b/ptx/src/test/ll/warp_sz.ll index 32b203a..7b6e36b 100644 --- a/ptx/src/test/ll/warp_sz.ll +++ b/ptx/src/test/ll/warp_sz.ll @@ -14,4 +14,4 @@ define amdgpu_kernel void @warp_sz(ptr addrspace(4) byref(i64) %"32", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/xor.ll b/ptx/src/test/ll/xor.ll index ac9a1d1..0f88d82 100644 --- a/ptx/src/test/ll/xor.ll +++ b/ptx/src/test/ll/xor.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/spirv_run/min_f16.ptx b/ptx/src/test/spirv_run/min_f16.ptx new file mode 100644 index 0000000..61d30c6 --- /dev/null +++ b/ptx/src/test/spirv_run/min_f16.ptx @@ -0,0 +1,23 @@ +.version 7.0 +.target sm_80 +.address_size 64 + +.visible .entry min_f16( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .f16 temp1; + .reg .f16 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.b16 temp1, [in_addr]; + ld.b16 temp2, [in_addr+2]; + min.f16 temp1, temp1, temp2; + st.b16 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/min_nan_f16.ptx b/ptx/src/test/spirv_run/min_nan_f16.ptx new file mode 100644 index 0000000..3c1ca60 --- /dev/null +++ b/ptx/src/test/spirv_run/min_nan_f16.ptx @@ -0,0 +1,23 @@ +.version 7.0 +.target sm_80 +.address_size 64 + +.visible .entry min_nan_f16( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .f16 temp1; + .reg .f16 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.b16 temp1, [in_addr]; + ld.b16 temp2, [in_addr+2]; + min.NaN.f16 temp1, temp1, temp2; + st.b16 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 0a39523..46bdd0b 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -143,6 +143,12 @@ test_ptx!(shr_oob, [-32768i16], [-1i16]); test_ptx!(or, [1u64, 2u64], [3u64]); test_ptx!(sub, [2u64], [1u64]); test_ptx!(min, [555i32, 444i32], [444i32]); +test_ptx!( + min_f16, + [half::f16::NAN, half::f16::from_f64(123.0)], + [half::f16::from_f64(123.0)] +); +test_ptx!(min_nan_f16); test_ptx!(max, [555i32, 444i32], [555i32]); test_ptx!(global_array, [0xDEADu32], [1u32]); test_ptx!(global_array_f32, [0x0], [0f32]);