From e3a99b7ee11a3a78608e9273f5c4491f9c368d6c Mon Sep 17 00:00:00 2001 From: Violet Date: Fri, 12 Sep 2025 16:48:47 -0700 Subject: [PATCH] Fix min.ftz.nan.f16 for ROCm 6.3.4 (#506) This PR fixes compatibility issues with ROCm 6.3.4 by replacing the unavailable llvm.minimum.f16 intrinsic with an equivalent implementation using llvm.minnum.f16 combined with NaN handling logic. Same applies to max operations This also adds `"amdgpu-ieee"="false"` attribute everywhere, which gives us better codegen for min/max everywhere --- ptx/src/pass/llvm/emit.rs | 85 +++++++++++++++---- ptx/src/test/ll/abs.ll | 2 +- ptx/src/test/ll/activemask.ll | 4 +- ptx/src/test/ll/add.ll | 2 +- ptx/src/test/ll/add_non_coherent.ll | 2 +- ptx/src/test/ll/add_s32_sat.ll | 2 +- ptx/src/test/ll/add_tuning.ll | 2 +- ptx/src/test/ll/and.ll | 2 +- ptx/src/test/ll/assertfail.ll | 4 +- ptx/src/test/ll/atom_add.ll | 2 +- ptx/src/test/ll/atom_add_float.ll | 2 +- ptx/src/test/ll/atom_cas.ll | 2 +- ptx/src/test/ll/atom_inc.ll | 2 +- ptx/src/test/ll/b64tof64.ll | 2 +- ptx/src/test/ll/bar_red_and_pred.ll | 4 +- ptx/src/test/ll/bfe.ll | 4 +- ptx/src/test/ll/bfi.ll | 4 +- ptx/src/test/ll/block.ll | 2 +- ptx/src/test/ll/bra.ll | 2 +- ptx/src/test/ll/brev.ll | 2 +- ptx/src/test/ll/call.ll | 4 +- ptx/src/test/ll/call_rnd.ll | 4 +- ptx/src/test/ll/clz.ll | 2 +- ptx/src/test/ll/const.ll | 2 +- ptx/src/test/ll/const_ident.ll | 4 +- ptx/src/test/ll/constant_f32.ll | 2 +- ptx/src/test/ll/constant_negative.ll | 2 +- ptx/src/test/ll/cos.ll | 2 +- ptx/src/test/ll/cp_async.ll | 2 +- ptx/src/test/ll/cvt_f64_f32.ll | 2 +- ptx/src/test/ll/cvt_rn_bf16x2_f32.ll | 2 +- ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll | 4 +- ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll | 4 +- .../test/ll/cvt_rn_satfinite_e4m3x2_f32.ll | 4 +- .../test/ll/cvt_rn_satfinite_e5m2x2_f32.ll | 4 +- ptx/src/test/ll/cvt_rni.ll | 2 +- ptx/src/test/ll/cvt_rni_u16_f32.ll | 2 +- ptx/src/test/ll/cvt_rzi.ll | 2 +- ptx/src/test/ll/cvt_s16_s8.ll | 2 +- ptx/src/test/ll/cvt_s32_f32.ll | 2 +- ptx/src/test/ll/cvt_s64_s32.ll | 2 +- ptx/src/test/ll/cvt_sat_s_u.ll | 2 +- ptx/src/test/ll/cvta.ll | 2 +- ptx/src/test/ll/div_approx.ll | 2 +- ptx/src/test/ll/div_ftz.ll | 4 +- ptx/src/test/ll/div_noftz.ll | 4 +- ptx/src/test/ll/dp4a.ll | 2 +- ptx/src/test/ll/ex2.ll | 4 +- ptx/src/test/ll/extern_func.ll | 4 +- ptx/src/test/ll/extern_shared.ll | 2 +- ptx/src/test/ll/extern_shared_call.ll | 4 +- ptx/src/test/ll/fma.ll | 2 +- ptx/src/test/ll/fma_bf16x2.ll | 4 +- ptx/src/test/ll/fmax.ll | 6 +- ptx/src/test/ll/global_array.ll | 2 +- ptx/src/test/ll/global_array_f32.ll | 2 +- ptx/src/test/ll/lanemask_lt.ll | 4 +- ptx/src/test/ll/ld_st.ll | 2 +- ptx/src/test/ll/ld_st_implicit.ll | 2 +- ptx/src/test/ll/ld_st_offset.ll | 2 +- ptx/src/test/ll/ldmatrix.ll | 4 +- ptx/src/test/ll/ldmatrix_trans.ll | 4 +- ptx/src/test/ll/lg2.ll | 4 +- ptx/src/test/ll/local_align.ll | 2 +- ptx/src/test/ll/mad_s32.ll | 2 +- ptx/src/test/ll/mad_wide.ll | 2 +- ptx/src/test/ll/malformed_label.ll | 2 +- ptx/src/test/ll/max.ll | 6 +- ptx/src/test/ll/membar.ll | 2 +- ptx/src/test/ll/min.ll | 6 +- ptx/src/test/ll/min_f16.ll | 43 ++++++++++ ptx/src/test/ll/min_nan_f16.ll | 45 ++++++++++ ptx/src/test/ll/mov.ll | 2 +- ptx/src/test/ll/mov_address.ll | 2 +- ptx/src/test/ll/mul24_hi_s32.ll | 2 +- ptx/src/test/ll/mul24_hi_u32.ll | 2 +- ptx/src/test/ll/mul24_lo_s32.ll | 2 +- ptx/src/test/ll/mul24_lo_u32.ll | 2 +- ptx/src/test/ll/mul_ftz.ll | 2 +- ptx/src/test/ll/mul_hi.ll | 2 +- ptx/src/test/ll/mul_lo.ll | 2 +- ptx/src/test/ll/mul_non_ftz.ll | 2 +- ptx/src/test/ll/mul_wide.ll | 2 +- ptx/src/test/ll/multiple_return.ll | 4 +- ptx/src/test/ll/nanosleep.ll | 4 +- ptx/src/test/ll/neg.ll | 2 +- ptx/src/test/ll/non_scalar_ptr_offset.ll | 2 +- ptx/src/test/ll/not.ll | 2 +- ptx/src/test/ll/ntid.ll | 4 +- ptx/src/test/ll/or.ll | 2 +- ptx/src/test/ll/param_is_addressable.ll | 2 +- ptx/src/test/ll/popc.ll | 2 +- ptx/src/test/ll/pred_not.ll | 2 +- ptx/src/test/ll/prmt.ll | 2 +- ptx/src/test/ll/rcp.ll | 4 +- ptx/src/test/ll/redux_sync_add_u32_partial.ll | 4 +- ptx/src/test/ll/redux_sync_op_s32.ll | 4 +- ptx/src/test/ll/redux_sync_op_u32.ll | 4 +- ptx/src/test/ll/reg_local.ll | 2 +- ptx/src/test/ll/rem.ll | 2 +- ptx/src/test/ll/rsqrt.ll | 2 +- ptx/src/test/ll/selp.ll | 2 +- ptx/src/test/ll/selp_true.ll | 2 +- ptx/src/test/ll/setp.ll | 2 +- ptx/src/test/ll/setp_gt.ll | 2 +- ptx/src/test/ll/setp_leu.ll | 2 +- ptx/src/test/ll/setp_nan.ll | 2 +- ptx/src/test/ll/setp_num.ll | 2 +- ptx/src/test/ll/shared_ptr_32.ll | 2 +- ptx/src/test/ll/shared_ptr_take_address.ll | 2 +- ptx/src/test/ll/shared_unify_extern.ll | 4 +- ptx/src/test/ll/shared_unify_local.ll | 4 +- ptx/src/test/ll/shared_variable.ll | 2 +- ptx/src/test/ll/shf_l.ll | 2 +- ptx/src/test/ll/shf_l_clamp.ll | 2 +- ptx/src/test/ll/shf_l_wrap.ll | 2 +- ptx/src/test/ll/shf_r.ll | 2 +- ptx/src/test/ll/shf_r_clamp.ll | 2 +- ptx/src/test/ll/shf_r_wrap.ll | 2 +- ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll | 4 +- ptx/src/test/ll/shfl_sync_down_b32_pred.ll | 4 +- ptx/src/test/ll/shfl_sync_idx_b32_pred.ll | 4 +- ptx/src/test/ll/shfl_sync_mode_b32.ll | 4 +- ptx/src/test/ll/shfl_sync_up_b32_pred.ll | 4 +- ptx/src/test/ll/shl.ll | 2 +- ptx/src/test/ll/shr.ll | 2 +- ptx/src/test/ll/shr_oob.ll | 2 +- ptx/src/test/ll/sign_extend.ll | 2 +- ptx/src/test/ll/sin.ll | 2 +- ptx/src/test/ll/sqrt.ll | 4 +- ptx/src/test/ll/sqrt_rn_ftz.ll | 4 +- ptx/src/test/ll/stateful_ld_st_ntid.ll | 4 +- ptx/src/test/ll/stateful_ld_st_ntid_chain.ll | 4 +- ptx/src/test/ll/stateful_ld_st_ntid_sub.ll | 4 +- ptx/src/test/ll/stateful_ld_st_simple.ll | 2 +- ptx/src/test/ll/stateful_neg_offset.ll | 2 +- ptx/src/test/ll/sub.ll | 2 +- ptx/src/test/ll/tanh.ll | 2 +- ptx/src/test/ll/tid.ll | 4 +- ptx/src/test/ll/trap.ll | 2 +- ptx/src/test/ll/vector.ll | 4 +- ptx/src/test/ll/vector4.ll | 2 +- ptx/src/test/ll/vector_extract.ll | 2 +- ptx/src/test/ll/vector_operand.ll | 2 +- ptx/src/test/ll/vote_all.ll | 4 +- ptx/src/test/ll/vote_all_sub.ll | 4 +- ptx/src/test/ll/vote_any.ll | 4 +- ptx/src/test/ll/vote_ballot.ll | 4 +- ptx/src/test/ll/warp_sz.ll | 2 +- ptx/src/test/ll/xor.ll | 2 +- ptx/src/test/spirv_run/min_f16.ptx | 23 +++++ ptx/src/test/spirv_run/min_nan_f16.ptx | 23 +++++ ptx/src/test/spirv_run/mod.rs | 6 ++ 153 files changed, 407 insertions(+), 218 deletions(-) create mode 100644 ptx/src/test/ll/min_f16.ll create mode 100644 ptx/src/test/ll/min_nan_f16.ll create mode 100644 ptx/src/test/spirv_run/min_f16.ptx create mode 100644 ptx/src/test/spirv_run/min_nan_f16.ptx diff --git a/ptx/src/pass/llvm/emit.rs b/ptx/src/pass/llvm/emit.rs index 8bcf9e1..c811a53 100644 --- a/ptx/src/pass/llvm/emit.rs +++ b/ptx/src/pass/llvm/emit.rs @@ -149,6 +149,7 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> { llvm_ftz(method.flush_to_zero_f16f64), ); } + self.emit_fn_attribute(fn_, "amdgpu-ieee", "false"); for (i, param) in method.input_arguments.iter().enumerate() { let value = unsafe { LLVMGetParam(fn_, i as u32) }; let name = self.resolver.get_or_add(param.name); @@ -2266,22 +2267,46 @@ impl<'a> MethodEmitContext<'a> { let llvm_prefix = match data { ptx_parser::MinMaxDetails::Signed(..) => "llvm.smin", ptx_parser::MinMaxDetails::Unsigned(..) => "llvm.umin", - ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => { - "llvm.minimum" - } ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.minnum", }; let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_())); let llvm_type = get_scalar_type(self.context, data.type_()); - self.emit_intrinsic( + + let a = self.resolver.value(arguments.src1)?; + let b = self.resolver.value(arguments.src2)?; + + let min = self.emit_intrinsic( unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) }, - Some(arguments.dst), + None, Some(&data.type_().into()), - vec![ - (self.resolver.value(arguments.src1)?, llvm_type), - (self.resolver.value(arguments.src2)?, llvm_type), - ], + vec![(a, llvm_type), (b, llvm_type)], )?; + + if let ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { + nan: true, type_, .. + }) = data + { + let is_nan = unsafe { + LLVMBuildFCmp( + self.builder, + LLVMRealPredicate::LLVMRealUNO, + a, + b, + LLVM_UNNAMED.as_ptr(), + ) + }; + self.resolver.with_result(arguments.dst, |dst| unsafe { + LLVMBuildSelect( + self.builder, + is_nan, + LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN), + min, + dst, + ) + }); + } else { + self.resolver.register(arguments.dst, min); + } Ok(()) } @@ -2293,22 +2318,46 @@ impl<'a> MethodEmitContext<'a> { let llvm_prefix = match data { ptx_parser::MinMaxDetails::Signed(..) => "llvm.smax", ptx_parser::MinMaxDetails::Unsigned(..) => "llvm.umax", - ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => { - "llvm.maximum" - } ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.maxnum", }; let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_())); let llvm_type = get_scalar_type(self.context, data.type_()); - self.emit_intrinsic( + + let a = self.resolver.value(arguments.src1)?; + let b = self.resolver.value(arguments.src2)?; + + let max = self.emit_intrinsic( unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) }, - Some(arguments.dst), + None, Some(&data.type_().into()), - vec![ - (self.resolver.value(arguments.src1)?, llvm_type), - (self.resolver.value(arguments.src2)?, llvm_type), - ], + vec![(a, llvm_type), (b, llvm_type)], )?; + + if let ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { + nan: true, type_, .. + }) = data + { + let is_nan = unsafe { + LLVMBuildFCmp( + self.builder, + LLVMRealPredicate::LLVMRealUNO, + a, + b, + LLVM_UNNAMED.as_ptr(), + ) + }; + self.resolver.with_result(arguments.dst, |dst| unsafe { + LLVMBuildSelect( + self.builder, + is_nan, + LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN), + max, + dst, + ) + }); + } else { + self.resolver.register(arguments.dst, max); + } Ok(()) } diff --git a/ptx/src/test/ll/abs.ll b/ptx/src/test/ll/abs.ll index ce76c4a..dda3cb5 100644 --- a/ptx/src/test/ll/abs.ll +++ b/ptx/src/test/ll/abs.ll @@ -30,5 +30,5 @@ define amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.abs.i32(i32, i1 immarg) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/activemask.ll b/ptx/src/test/ll/activemask.ll index 9378005..267d929 100644 --- a/ptx/src/test/ll/activemask.ll +++ b/ptx/src/test/ll/activemask.ll @@ -20,5 +20,5 @@ define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"32", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/add.ll b/ptx/src/test/ll/add.ll index 486c349..6df1984 100644 --- a/ptx/src/test/ll/add.ll +++ b/ptx/src/test/ll/add.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"35", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/add_non_coherent.ll b/ptx/src/test/ll/add_non_coherent.ll index 479df65..0cc3e92 100644 --- a/ptx/src/test/ll/add_non_coherent.ll +++ b/ptx/src/test/ll/add_non_coherent.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"35", p ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/add_s32_sat.ll b/ptx/src/test/ll/add_s32_sat.ll index ff6e26f..69ff9db 100644 --- a/ptx/src/test/ll/add_s32_sat.ll +++ b/ptx/src/test/ll/add_s32_sat.ll @@ -47,5 +47,5 @@ define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"40", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.sadd.sat.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/add_tuning.ll b/ptx/src/test/ll/add_tuning.ll index 7119903..51e6d61 100644 --- a/ptx/src/test/ll/add_tuning.ll +++ b/ptx/src/test/ll/add_tuning.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"35", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/and.ll b/ptx/src/test/ll/and.ll index 145458b..48d1ac4 100644 --- a/ptx/src/test/ll/and.ll +++ b/ptx/src/test/ll/and.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/assertfail.ll b/ptx/src/test/ll/assertfail.ll index c168cbd..4399e92 100644 --- a/ptx/src/test/ll/assertfail.ll +++ b/ptx/src/test/ll/assertfail.ll @@ -60,5 +60,5 @@ define amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"89", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_add.ll b/ptx/src/test/ll/atom_add.ll index 117cda9..ee87cad 100644 --- a/ptx/src/test/ll/atom_add.ll +++ b/ptx/src/test/ll/atom_add.ll @@ -43,4 +43,4 @@ define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"39", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_add_float.ll b/ptx/src/test/ll/atom_add_float.ll index 587689c..e261e78 100644 --- a/ptx/src/test/ll/atom_add_float.ll +++ b/ptx/src/test/ll/atom_add_float.ll @@ -43,4 +43,4 @@ define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"39", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_cas.ll b/ptx/src/test/ll/atom_cas.ll index 35dcfc2..78593d9 100644 --- a/ptx/src/test/ll/atom_cas.ll +++ b/ptx/src/test/ll/atom_cas.ll @@ -41,4 +41,4 @@ define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"41", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/atom_inc.ll b/ptx/src/test/ll/atom_inc.ll index 6c35f3f..f63e224 100644 --- a/ptx/src/test/ll/atom_inc.ll +++ b/ptx/src/test/ll/atom_inc.ll @@ -43,4 +43,4 @@ define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"41", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/b64tof64.ll b/ptx/src/test/ll/b64tof64.ll index bcc972a..373aaf8 100644 --- a/ptx/src/test/ll/b64tof64.ll +++ b/ptx/src/test/ll/b64tof64.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"34", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bar_red_and_pred.ll b/ptx/src/test/ll/bar_red_and_pred.ll index efebbe3..8a0ab36 100644 --- a/ptx/src/test/ll/bar_red_and_pred.ll +++ b/ptx/src/test/ll/bar_red_and_pred.ll @@ -117,5 +117,5 @@ define amdgpu_kernel void @bar_red_and_pred(ptr addrspace(4) byref(i64) %"76", p ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bfe.ll b/ptx/src/test/ll/bfe.ll index b262629..c60c67f 100644 --- a/ptx/src/test/ll/bfe.ll +++ b/ptx/src/test/ll/bfe.ll @@ -42,5 +42,5 @@ define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"39", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bfi.ll b/ptx/src/test/ll/bfi.ll index 827a637..50cb144 100644 --- a/ptx/src/test/ll/bfi.ll +++ b/ptx/src/test/ll/bfi.ll @@ -49,5 +49,5 @@ define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"42", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/block.ll b/ptx/src/test/ll/block.ll index 27465f2..89ab97c 100644 --- a/ptx/src/test/ll/block.ll +++ b/ptx/src/test/ll/block.ll @@ -31,4 +31,4 @@ define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"37", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/bra.ll b/ptx/src/test/ll/bra.ll index 90b186c..16d4636 100644 --- a/ptx/src/test/ll/bra.ll +++ b/ptx/src/test/ll/bra.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"39", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/brev.ll b/ptx/src/test/ll/brev.ll index 21a9dc6..31027a5 100644 --- a/ptx/src/test/ll/brev.ll +++ b/ptx/src/test/ll/brev.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"33", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.bitreverse.i32(i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/call.ll b/ptx/src/test/ll/call.ll index 2b19d1d..26f22a8 100644 --- a/ptx/src/test/ll/call.ll +++ b/ptx/src/test/ll/call.ll @@ -60,5 +60,5 @@ define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"51", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/call_rnd.ll b/ptx/src/test/ll/call_rnd.ll index dcc0458..0772498 100644 --- a/ptx/src/test/ll/call_rnd.ll +++ b/ptx/src/test/ll/call_rnd.ll @@ -150,6 +150,6 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"95", ptr addrs ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #2 = { nocallback nofree nosync nounwind willreturn } \ No newline at end of file diff --git a/ptx/src/test/ll/clz.ll b/ptx/src/test/ll/clz.ll index 9cc90ab..56d1fd6 100644 --- a/ptx/src/test/ll/clz.ll +++ b/ptx/src/test/ll/clz.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.ctlz.i32(i32, i1 immarg) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/const.ll b/ptx/src/test/ll/const.ll index 69d877a..0049b11 100644 --- a/ptx/src/test/ll/const.ll +++ b/ptx/src/test/ll/const.ll @@ -47,4 +47,4 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"49", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/const_ident.ll b/ptx/src/test/ll/const_ident.ll index c927ef8..a3f349e 100644 --- a/ptx/src/test/ll/const_ident.ll +++ b/ptx/src/test/ll/const_ident.ll @@ -1,6 +1,6 @@ @x = addrspace(4) global i64 1 @y = addrspace(4) global [4 x i64] [i64 4, i64 5, i64 6, i64 0] -@constparams = addrspace(4) global [4 x i64] [i64 ptrtoint (ptr addrspace(4) @x to i64), i64 ptrtoint (ptr addrspace(4) @y to i64)] +@constparams = addrspace(4) global [2 x i64] [i64 ptrtoint (ptr addrspace(4) @x to i64), i64 ptrtoint (ptr addrspace(4) @y to i64)] define amdgpu_kernel void @const_ident(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #0 { %"51" = alloca i64, align 8, addrspace(5) @@ -52,4 +52,4 @@ define amdgpu_kernel void @const_ident(ptr addrspace(4) byref(i64) %"49", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/constant_f32.ll b/ptx/src/test/ll/constant_f32.ll index 416d390..2e2cbf1 100644 --- a/ptx/src/test/ll/constant_f32.ll +++ b/ptx/src/test/ll/constant_f32.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"34", ptr a ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/constant_negative.ll b/ptx/src/test/ll/constant_negative.ll index f835eb0..b369192 100644 --- a/ptx/src/test/ll/constant_negative.ll +++ b/ptx/src/test/ll/constant_negative.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"34", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cos.ll b/ptx/src/test/ll/cos.ll index 258860b..d77fefe 100644 --- a/ptx/src/test/ll/cos.ll +++ b/ptx/src/test/ll/cos.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.cos.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cp_async.ll b/ptx/src/test/ll/cp_async.ll index 757e81c..24da789 100644 --- a/ptx/src/test/ll/cp_async.ll +++ b/ptx/src/test/ll/cp_async.ll @@ -51,4 +51,4 @@ define amdgpu_kernel void @cp_async(ptr addrspace(4) byref(i64) %"51", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_f64_f32.ll b/ptx/src/test/ll/cvt_f64_f32.ll index 149e004..7c8027f 100644 --- a/ptx/src/test/ll/cvt_f64_f32.ll +++ b/ptx/src/test/ll/cvt_f64_f32.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"34", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll b/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll index 1e19037..fb3f353 100644 --- a/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll +++ b/ptx/src/test/ll/cvt_rn_bf16x2_f32.ll @@ -38,4 +38,4 @@ define amdgpu_kernel void @cvt_rn_bf16x2_f32(ptr addrspace(4) byref(i64) %"37", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll b/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll index 9feec9b..8c647c6 100644 --- a/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll +++ b/ptx/src/test/ll/cvt_rn_f16x2_e4m3x2.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @cvt_rn_f16x2_e4m3x2(ptr addrspace(4) byref(i64) %"34" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll b/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll index 35c7a19..81aeb53 100644 --- a/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll +++ b/ptx/src/test/ll/cvt_rn_f16x2_e5m2x2.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @cvt_rn_f16x2_e5m2x2(ptr addrspace(4) byref(i64) %"34" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll b/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll index 5438f43..6747e22 100644 --- a/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll +++ b/ptx/src/test/ll/cvt_rn_satfinite_e4m3x2_f32.ll @@ -36,5 +36,5 @@ define amdgpu_kernel void @cvt_rn_satfinite_e4m3x2_f32(ptr addrspace(4) byref(i6 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll b/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll index 01a465c..8825542 100644 --- a/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll +++ b/ptx/src/test/ll/cvt_rn_satfinite_e5m2x2_f32.ll @@ -36,5 +36,5 @@ define amdgpu_kernel void @cvt_rn_satfinite_e5m2x2_f32(ptr addrspace(4) byref(i6 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rni.ll b/ptx/src/test/ll/cvt_rni.ll index fc79b9a..d8bd390 100644 --- a/ptx/src/test/ll/cvt_rni.ll +++ b/ptx/src/test/ll/cvt_rni.ll @@ -43,5 +43,5 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"38", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.roundeven.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rni_u16_f32.ll b/ptx/src/test/ll/cvt_rni_u16_f32.ll index 867488a..14191d0 100644 --- a/ptx/src/test/ll/cvt_rni_u16_f32.ll +++ b/ptx/src/test/ll/cvt_rni_u16_f32.ll @@ -34,5 +34,5 @@ declare float @llvm.roundeven.f32(float) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i16 @llvm.fptoui.sat.i16.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_rzi.ll b/ptx/src/test/ll/cvt_rzi.ll index 7fb730b..77a31e7 100644 --- a/ptx/src/test/ll/cvt_rzi.ll +++ b/ptx/src/test/ll/cvt_rzi.ll @@ -43,5 +43,5 @@ define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"38", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.trunc.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_s16_s8.ll b/ptx/src/test/ll/cvt_s16_s8.ll index fbccbb9..65d9486 100644 --- a/ptx/src/test/ll/cvt_s16_s8.ll +++ b/ptx/src/test/ll/cvt_s16_s8.ll @@ -29,4 +29,4 @@ define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"34", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_s32_f32.ll b/ptx/src/test/ll/cvt_s32_f32.ll index 608e485..e6c3301 100644 --- a/ptx/src/test/ll/cvt_s32_f32.ll +++ b/ptx/src/test/ll/cvt_s32_f32.ll @@ -51,5 +51,5 @@ define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"38", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.ceil.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_s64_s32.ll b/ptx/src/test/ll/cvt_s64_s32.ll index b9e402f..f2eba44 100644 --- a/ptx/src/test/ll/cvt_s64_s32.ll +++ b/ptx/src/test/ll/cvt_s64_s32.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"34", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/cvt_sat_s_u.ll b/ptx/src/test/ll/cvt_sat_s_u.ll index c3cedfb..286d67f 100644 --- a/ptx/src/test/ll/cvt_sat_s_u.ll +++ b/ptx/src/test/ll/cvt_sat_s_u.ll @@ -37,5 +37,5 @@ declare i32 @llvm.smax.i32(i32, i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/cvta.ll b/ptx/src/test/ll/cvta.ll index 05902c1..a50af47 100644 --- a/ptx/src/test/ll/cvta.ll +++ b/ptx/src/test/ll/cvta.ll @@ -31,4 +31,4 @@ define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"33", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/div_approx.ll b/ptx/src/test/ll/div_approx.ll index 9015cf5..3efdd9d 100644 --- a/ptx/src/test/ll/div_approx.ll +++ b/ptx/src/test/ll/div_approx.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"36", ptr add ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/div_ftz.ll b/ptx/src/test/ll/div_ftz.ll index f85b107..b66d894 100644 --- a/ptx/src/test/ll/div_ftz.ll +++ b/ptx/src/test/ll/div_ftz.ll @@ -69,6 +69,6 @@ define amdgpu_kernel void @div_ftz(ptr addrspace(4) byref(i64) %"66", ptr addrsp ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #2 = { nocallback nofree nosync nounwind willreturn } \ No newline at end of file diff --git a/ptx/src/test/ll/div_noftz.ll b/ptx/src/test/ll/div_noftz.ll index 8389f3f..8dd302c 100644 --- a/ptx/src/test/ll/div_noftz.ll +++ b/ptx/src/test/ll/div_noftz.ll @@ -66,6 +66,6 @@ define amdgpu_kernel void @div_noftz(ptr addrspace(4) byref(i64) %"65", ptr addr ; Function Attrs: nocallback nofree nosync nounwind willreturn declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #2 = { nocallback nofree nosync nounwind willreturn } \ No newline at end of file diff --git a/ptx/src/test/ll/dp4a.ll b/ptx/src/test/ll/dp4a.ll index 19439b9..58ea314 100644 --- a/ptx/src/test/ll/dp4a.ll +++ b/ptx/src/test/ll/dp4a.ll @@ -44,5 +44,5 @@ define amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"40", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1 immarg) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/ex2.ll b/ptx/src/test/ll/ex2.ll index 1202f1e..c1ebec6 100644 --- a/ptx/src/test/ll/ex2.ll +++ b/ptx/src/test/ll/ex2.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_func.ll b/ptx/src/test/ll/extern_func.ll index a21fb6d..d5f6aa0 100644 --- a/ptx/src/test/ll/extern_func.ll +++ b/ptx/src/test/ll/extern_func.ll @@ -39,5 +39,5 @@ define amdgpu_kernel void @extern_func(ptr addrspace(4) byref(i64) %"47", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_shared.ll b/ptx/src/test/ll/extern_shared.ll index e92d592..12e426a 100644 --- a/ptx/src/test/ll/extern_shared.ll +++ b/ptx/src/test/ll/extern_shared.ll @@ -29,4 +29,4 @@ define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"34", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_shared_call.ll b/ptx/src/test/ll/extern_shared_call.ll index 28e3989..4c826f5 100644 --- a/ptx/src/test/ll/extern_shared_call.ll +++ b/ptx/src/test/ll/extern_shared_call.ll @@ -51,5 +51,5 @@ define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"44", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/fma.ll b/ptx/src/test/ll/fma.ll index 780017b..254bfce 100644 --- a/ptx/src/test/ll/fma.ll +++ b/ptx/src/test/ll/fma.ll @@ -43,5 +43,5 @@ define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"39", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.fma.f32(float, float, float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/fma_bf16x2.ll b/ptx/src/test/ll/fma_bf16x2.ll index ff7a638..b3cc521 100644 --- a/ptx/src/test/ll/fma_bf16x2.ll +++ b/ptx/src/test/ll/fma_bf16x2.ll @@ -47,5 +47,5 @@ define amdgpu_kernel void @fma_bf16x2(ptr addrspace(4) byref(i64) %"39", ptr add ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/fmax.ll b/ptx/src/test/ll/fmax.ll index dbaa4ed..546ed5d 100644 --- a/ptx/src/test/ll/fmax.ll +++ b/ptx/src/test/ll/fmax.ll @@ -28,8 +28,8 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"38", ptr addrspace store half %"51", ptr addrspace(5) %"43", align 2 %"53" = load half, ptr addrspace(5) %"43", align 2 %"54" = load half, ptr addrspace(5) %"42", align 2 - %"52" = call half @llvm.maxnum.f16(half %"53", half %"54") - store half %"52", ptr addrspace(5) %"44", align 2 + %2 = call half @llvm.maxnum.f16(half %"53", half %"54") + store half %2, ptr addrspace(5) %"44", align 2 %"55" = load i64, ptr addrspace(5) %"41", align 8 %"56" = load half, ptr addrspace(5) %"44", align 2 %"61" = inttoptr i64 %"55" to ptr @@ -41,5 +41,5 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"38", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare half @llvm.maxnum.f16(half, half) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/global_array.ll b/ptx/src/test/ll/global_array.ll index d0f3cb9..68e3a66 100644 --- a/ptx/src/test/ll/global_array.ll +++ b/ptx/src/test/ll/global_array.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"34", ptr a ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/global_array_f32.ll b/ptx/src/test/ll/global_array_f32.ll index 201a754..79cf5f8 100644 --- a/ptx/src/test/ll/global_array_f32.ll +++ b/ptx/src/test/ll/global_array_f32.ll @@ -25,4 +25,4 @@ define amdgpu_kernel void @global_array_f32(ptr addrspace(4) byref(i64) %"36", p ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/lanemask_lt.ll b/ptx/src/test/ll/lanemask_lt.ll index 5c966e4..912752f 100644 --- a/ptx/src/test/ll/lanemask_lt.ll +++ b/ptx/src/test/ll/lanemask_lt.ll @@ -39,5 +39,5 @@ define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"39", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ld_st.ll b/ptx/src/test/ll/ld_st.ll index cadcb61..07c0e56 100644 --- a/ptx/src/test/ll/ld_st.ll +++ b/ptx/src/test/ll/ld_st.ll @@ -23,4 +23,4 @@ define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"33", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ld_st_implicit.ll b/ptx/src/test/ll/ld_st_implicit.ll index 9fb584a..00db109 100644 --- a/ptx/src/test/ll/ld_st_implicit.ll +++ b/ptx/src/test/ll/ld_st_implicit.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"34", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ld_st_offset.ll b/ptx/src/test/ll/ld_st_offset.ll index 7b701e8..483e822 100644 --- a/ptx/src/test/ll/ld_st_offset.ll +++ b/ptx/src/test/ll/ld_st_offset.ll @@ -34,4 +34,4 @@ define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"38", ptr a ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ldmatrix.ll b/ptx/src/test/ll/ldmatrix.ll index f5687ae..3004ca6 100644 --- a/ptx/src/test/ll/ldmatrix.ll +++ b/ptx/src/test/ll/ldmatrix.ll @@ -95,5 +95,5 @@ define amdgpu_kernel void @ldmatrix(ptr addrspace(4) byref(i64) %"55") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ldmatrix_trans.ll b/ptx/src/test/ll/ldmatrix_trans.ll index 2552140..b6350b9 100644 --- a/ptx/src/test/ll/ldmatrix_trans.ll +++ b/ptx/src/test/ll/ldmatrix_trans.ll @@ -160,5 +160,5 @@ define amdgpu_kernel void @ldmatrix_trans(ptr addrspace(4) byref(i64) %"86") #1 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/lg2.ll b/ptx/src/test/ll/lg2.ll index ee6d85b..f918f0c 100644 --- a/ptx/src/test/ll/lg2.ll +++ b/ptx/src/test/ll/lg2.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/local_align.ll b/ptx/src/test/ll/local_align.ll index a2ebd84..c51a99c 100644 --- a/ptx/src/test/ll/local_align.ll +++ b/ptx/src/test/ll/local_align.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"34", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mad_s32.ll b/ptx/src/test/ll/mad_s32.ll index 29f80ab..bb15154 100644 --- a/ptx/src/test/ll/mad_s32.ll +++ b/ptx/src/test/ll/mad_s32.ll @@ -42,4 +42,4 @@ define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"40", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mad_wide.ll b/ptx/src/test/ll/mad_wide.ll index 8a13a2e..cc0422d 100644 --- a/ptx/src/test/ll/mad_wide.ll +++ b/ptx/src/test/ll/mad_wide.ll @@ -44,4 +44,4 @@ define amdgpu_kernel void @mad_wide(ptr addrspace(4) byref(i64) %"40", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/malformed_label.ll b/ptx/src/test/ll/malformed_label.ll index b088b0e..0383f4c 100644 --- a/ptx/src/test/ll/malformed_label.ll +++ b/ptx/src/test/ll/malformed_label.ll @@ -30,4 +30,4 @@ define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"37", pt ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/max.ll b/ptx/src/test/ll/max.ll index 8f10384..f7d6e65 100644 --- a/ptx/src/test/ll/max.ll +++ b/ptx/src/test/ll/max.ll @@ -24,8 +24,8 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"36", ptr addrspace( store i32 %"47", ptr addrspace(5) %"41", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4 %"50" = load i32, ptr addrspace(5) %"41", align 4 - %"48" = call i32 @llvm.smax.i32(i32 %"49", i32 %"50") - store i32 %"48", ptr addrspace(5) %"40", align 4 + %2 = call i32 @llvm.smax.i32(i32 %"49", i32 %"50") + store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load i32, ptr addrspace(5) %"40", align 4 %"55" = inttoptr i64 %"51" to ptr @@ -36,5 +36,5 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/membar.ll b/ptx/src/test/ll/membar.ll index 52c7718..7256fd3 100644 --- a/ptx/src/test/ll/membar.ll +++ b/ptx/src/test/ll/membar.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"33", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/min.ll b/ptx/src/test/ll/min.ll index 70b678e..1c276ab 100644 --- a/ptx/src/test/ll/min.ll +++ b/ptx/src/test/ll/min.ll @@ -24,8 +24,8 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace( store i32 %"47", ptr addrspace(5) %"41", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4 %"50" = load i32, ptr addrspace(5) %"41", align 4 - %"48" = call i32 @llvm.smin.i32(i32 %"49", i32 %"50") - store i32 %"48", ptr addrspace(5) %"40", align 4 + %2 = call i32 @llvm.smin.i32(i32 %"49", i32 %"50") + store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i64, ptr addrspace(5) %"39", align 8 %"52" = load i32, ptr addrspace(5) %"40", align 4 %"55" = inttoptr i64 %"51" to ptr @@ -36,5 +36,5 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/min_f16.ll b/ptx/src/test/ll/min_f16.ll new file mode 100644 index 0000000..f0c7c4d --- /dev/null +++ b/ptx/src/test/ll/min_f16.ll @@ -0,0 +1,43 @@ +define amdgpu_kernel void @min_f16(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { + %"38" = alloca i64, align 8, addrspace(5) + %"39" = alloca i64, align 8, addrspace(5) + %"40" = alloca half, align 2, addrspace(5) + %"41" = alloca half, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"35" + +"35": ; preds = %1 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 + %"54" = inttoptr i64 %"45" to ptr + %"53" = load i16, ptr %"54", align 2 + %"44" = bitcast i16 %"53" to half + store half %"44", ptr addrspace(5) %"40", align 2 + %"46" = load i64, ptr addrspace(5) %"38", align 8 + %"55" = inttoptr i64 %"46" to ptr + %"34" = getelementptr inbounds i8, ptr %"55", i64 2 + %"56" = load i16, ptr %"34", align 2 + %"47" = bitcast i16 %"56" to half + store half %"47", ptr addrspace(5) %"41", align 2 + %"49" = load half, ptr addrspace(5) %"40", align 2 + %"50" = load half, ptr addrspace(5) %"41", align 2 + %2 = call half @llvm.minnum.f16(half %"49", half %"50") + store half %2, ptr addrspace(5) %"40", align 2 + %"51" = load i64, ptr addrspace(5) %"39", align 8 + %"52" = load half, ptr addrspace(5) %"40", align 2 + %"57" = inttoptr i64 %"51" to ptr + %"58" = bitcast half %"52" to i16 + store i16 %"58", ptr %"57", align 2 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare half @llvm.minnum.f16(half, half) #1 + +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/min_nan_f16.ll b/ptx/src/test/ll/min_nan_f16.ll new file mode 100644 index 0000000..90b4780 --- /dev/null +++ b/ptx/src/test/ll/min_nan_f16.ll @@ -0,0 +1,45 @@ +define amdgpu_kernel void @min_nan_f16(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { + %"38" = alloca i64, align 8, addrspace(5) + %"39" = alloca i64, align 8, addrspace(5) + %"40" = alloca half, align 2, addrspace(5) + %"41" = alloca half, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"35" + +"35": ; preds = %1 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 + %"54" = inttoptr i64 %"45" to ptr + %"53" = load i16, ptr %"54", align 2 + %"44" = bitcast i16 %"53" to half + store half %"44", ptr addrspace(5) %"40", align 2 + %"46" = load i64, ptr addrspace(5) %"38", align 8 + %"55" = inttoptr i64 %"46" to ptr + %"34" = getelementptr inbounds i8, ptr %"55", i64 2 + %"56" = load i16, ptr %"34", align 2 + %"47" = bitcast i16 %"56" to half + store half %"47", ptr addrspace(5) %"41", align 2 + %"49" = load half, ptr addrspace(5) %"40", align 2 + %"50" = load half, ptr addrspace(5) %"41", align 2 + %2 = call half @llvm.minnum.f16(half %"49", half %"50") + %3 = fcmp uno half %"49", %"50" + %"48" = select i1 %3, half 0xH7E00, half %2 + store half %"48", ptr addrspace(5) %"40", align 2 + %"51" = load i64, ptr addrspace(5) %"39", align 8 + %"52" = load half, ptr addrspace(5) %"40", align 2 + %"57" = inttoptr i64 %"51" to ptr + %"58" = bitcast half %"52" to i16 + store i16 %"58", ptr %"57", align 2 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare half @llvm.minnum.f16(half, half) #1 + +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/ptx/src/test/ll/mov.ll b/ptx/src/test/ll/mov.ll index 0c39773..3500058 100644 --- a/ptx/src/test/ll/mov.ll +++ b/ptx/src/test/ll/mov.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mov_address.ll b/ptx/src/test/ll/mov_address.ll index da04c37..9ebb3a8 100644 --- a/ptx/src/test/ll/mov_address.ll +++ b/ptx/src/test/ll/mov_address.ll @@ -12,4 +12,4 @@ define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"32", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_hi_s32.ll b/ptx/src/test/ll/mul24_hi_s32.ll index 5b67718..5fd7924 100644 --- a/ptx/src/test/ll/mul24_hi_s32.ll +++ b/ptx/src/test/ll/mul24_hi_s32.ll @@ -42,5 +42,5 @@ declare i32 @llvm.amdgcn.mul.i24(i32, i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mulhi.i24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_hi_u32.ll b/ptx/src/test/ll/mul24_hi_u32.ll index 94018d8..efd0a9c 100644 --- a/ptx/src/test/ll/mul24_hi_u32.ll +++ b/ptx/src/test/ll/mul24_hi_u32.ll @@ -38,5 +38,5 @@ declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mulhi.u24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_lo_s32.ll b/ptx/src/test/ll/mul24_lo_s32.ll index bb4137e..734ee5c 100644 --- a/ptx/src/test/ll/mul24_lo_s32.ll +++ b/ptx/src/test/ll/mul24_lo_s32.ll @@ -35,5 +35,5 @@ define amdgpu_kernel void @mul24_lo_s32(ptr addrspace(4) byref(i64) %"35", ptr a ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mul.i24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul24_lo_u32.ll b/ptx/src/test/ll/mul24_lo_u32.ll index a2a0e6f..dfd56b1 100644 --- a/ptx/src/test/ll/mul24_lo_u32.ll +++ b/ptx/src/test/ll/mul24_lo_u32.ll @@ -31,5 +31,5 @@ define amdgpu_kernel void @mul24_lo_u32(ptr addrspace(4) byref(i64) %"34", ptr a ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_ftz.ll b/ptx/src/test/ll/mul_ftz.ll index da1ae40..fa169d9 100644 --- a/ptx/src/test/ll/mul_ftz.ll +++ b/ptx/src/test/ll/mul_ftz.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"36", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_hi.ll b/ptx/src/test/ll/mul_hi.ll index f019cfe..8e09715 100644 --- a/ptx/src/test/ll/mul_hi.ll +++ b/ptx/src/test/ll/mul_hi.ll @@ -30,4 +30,4 @@ define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"35", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_lo.ll b/ptx/src/test/ll/mul_lo.ll index 5909dc7..818aaa5 100644 --- a/ptx/src/test/ll/mul_lo.ll +++ b/ptx/src/test/ll/mul_lo.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"35", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_non_ftz.ll b/ptx/src/test/ll/mul_non_ftz.ll index 00ad221..9ecd39c 100644 --- a/ptx/src/test/ll/mul_non_ftz.ll +++ b/ptx/src/test/ll/mul_non_ftz.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"36", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/mul_wide.ll b/ptx/src/test/ll/mul_wide.ll index 451e70a..654b204 100644 --- a/ptx/src/test/ll/mul_wide.ll +++ b/ptx/src/test/ll/mul_wide.ll @@ -36,4 +36,4 @@ define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"37", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/multiple_return.ll b/ptx/src/test/ll/multiple_return.ll index c265efa..75b1392 100644 --- a/ptx/src/test/ll/multiple_return.ll +++ b/ptx/src/test/ll/multiple_return.ll @@ -66,5 +66,5 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"53", pt ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/nanosleep.ll b/ptx/src/test/ll/nanosleep.ll index 6d75f05..bc59083 100644 --- a/ptx/src/test/ll/nanosleep.ll +++ b/ptx/src/test/ll/nanosleep.ll @@ -11,5 +11,5 @@ define amdgpu_kernel void @nanosleep(ptr addrspace(4) byref(i64) %"31", ptr addr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/neg.ll b/ptx/src/test/ll/neg.ll index e5c307a..97c0292 100644 --- a/ptx/src/test/ll/neg.ll +++ b/ptx/src/test/ll/neg.ll @@ -26,4 +26,4 @@ define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/non_scalar_ptr_offset.ll b/ptx/src/test/ll/non_scalar_ptr_offset.ll index 00e65da..211ce80 100644 --- a/ptx/src/test/ll/non_scalar_ptr_offset.ll +++ b/ptx/src/test/ll/non_scalar_ptr_offset.ll @@ -32,4 +32,4 @@ define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"3 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/not.ll b/ptx/src/test/ll/not.ll index e8ead6e..879876e 100644 --- a/ptx/src/test/ll/not.ll +++ b/ptx/src/test/ll/not.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/ntid.ll b/ptx/src/test/ll/ntid.ll index fa713ac..3a51e7f 100644 --- a/ptx/src/test/ll/ntid.ll +++ b/ptx/src/test/ll/ntid.ll @@ -35,5 +35,5 @@ define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"38", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/or.ll b/ptx/src/test/ll/or.ll index 9ee2e00..6acc96a 100644 --- a/ptx/src/test/ll/or.ll +++ b/ptx/src/test/ll/or.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/param_is_addressable.ll b/ptx/src/test/ll/param_is_addressable.ll index 6f75d5e..e9e9f8e 100644 --- a/ptx/src/test/ll/param_is_addressable.ll +++ b/ptx/src/test/ll/param_is_addressable.ll @@ -31,4 +31,4 @@ define amdgpu_kernel void @param_is_addressable(ptr addrspace(4) byref(i64) %"33 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/popc.ll b/ptx/src/test/ll/popc.ll index f60b5a0..0a89c1e 100644 --- a/ptx/src/test/ll/popc.ll +++ b/ptx/src/test/ll/popc.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"33", ptr addrspace ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.ctpop.i32(i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/pred_not.ll b/ptx/src/test/ll/pred_not.ll index 40930af..a111c4a 100644 --- a/ptx/src/test/ll/pred_not.ll +++ b/ptx/src/test/ll/pred_not.ll @@ -54,4 +54,4 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"44", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/prmt.ll b/ptx/src/test/ll/prmt.ll index 2b630b7..7753f5c 100644 --- a/ptx/src/test/ll/prmt.ll +++ b/ptx/src/test/ll/prmt.ll @@ -35,4 +35,4 @@ define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"36", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/rcp.ll b/ptx/src/test/ll/rcp.ll index 4b3cc99..91f453c 100644 --- a/ptx/src/test/ll/rcp.ll +++ b/ptx/src/test/ll/rcp.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/redux_sync_add_u32_partial.ll b/ptx/src/test/ll/redux_sync_add_u32_partial.ll index ab55c15..f17249b 100644 --- a/ptx/src/test/ll/redux_sync_add_u32_partial.ll +++ b/ptx/src/test/ll/redux_sync_add_u32_partial.ll @@ -54,5 +54,5 @@ define amdgpu_kernel void @redux_sync_add_u32_partial(ptr addrspace(4) byref(i64 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/redux_sync_op_s32.ll b/ptx/src/test/ll/redux_sync_op_s32.ll index a84624b..bea6e03 100644 --- a/ptx/src/test/ll/redux_sync_op_s32.ll +++ b/ptx/src/test/ll/redux_sync_op_s32.ll @@ -63,5 +63,5 @@ define amdgpu_kernel void @redux_sync_op_s32(ptr addrspace(4) byref(i64) %"46") ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/redux_sync_op_u32.ll b/ptx/src/test/ll/redux_sync_op_u32.ll index 3629939..36c2529 100644 --- a/ptx/src/test/ll/redux_sync_op_u32.ll +++ b/ptx/src/test/ll/redux_sync_op_u32.ll @@ -59,5 +59,5 @@ define amdgpu_kernel void @redux_sync_op_u32(ptr addrspace(4) byref(i64) %"44") ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/reg_local.ll b/ptx/src/test/ll/reg_local.ll index 6e011d5..8d6856a 100644 --- a/ptx/src/test/ll/reg_local.ll +++ b/ptx/src/test/ll/reg_local.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"40", ptr addr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/rem.ll b/ptx/src/test/ll/rem.ll index 07da66a..d63b53e 100644 --- a/ptx/src/test/ll/rem.ll +++ b/ptx/src/test/ll/rem.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/rsqrt.ll b/ptx/src/test/ll/rsqrt.ll index 727f82b..933b1a0 100644 --- a/ptx/src/test/ll/rsqrt.ll +++ b/ptx/src/test/ll/rsqrt.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"33", ptr addrspac ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare double @llvm.amdgcn.rsq.f64(double) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/selp.ll b/ptx/src/test/ll/selp.ll index 8c8f1d7..78f6e06 100644 --- a/ptx/src/test/ll/selp.ll +++ b/ptx/src/test/ll/selp.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"37", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/selp_true.ll b/ptx/src/test/ll/selp_true.ll index f50a208..4873cb3 100644 --- a/ptx/src/test/ll/selp_true.ll +++ b/ptx/src/test/ll/selp_true.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"37", ptr addr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp.ll b/ptx/src/test/ll/setp.ll index d82b3cc..c4835b4 100644 --- a/ptx/src/test/ll/setp.ll +++ b/ptx/src/test/ll/setp.ll @@ -51,4 +51,4 @@ define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"44", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_gt.ll b/ptx/src/test/ll/setp_gt.ll index d8eb311..54ffcdc 100644 --- a/ptx/src/test/ll/setp_gt.ll +++ b/ptx/src/test/ll/setp_gt.ll @@ -53,4 +53,4 @@ define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"42", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_leu.ll b/ptx/src/test/ll/setp_leu.ll index 4f6dd68..cca35ef 100644 --- a/ptx/src/test/ll/setp_leu.ll +++ b/ptx/src/test/ll/setp_leu.ll @@ -53,4 +53,4 @@ define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"42", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_nan.ll b/ptx/src/test/ll/setp_nan.ll index 82f5dbe..341e89c 100644 --- a/ptx/src/test/ll/setp_nan.ll +++ b/ptx/src/test/ll/setp_nan.ll @@ -162,4 +162,4 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"86", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/setp_num.ll b/ptx/src/test/ll/setp_num.ll index e208314..f8b5995 100644 --- a/ptx/src/test/ll/setp_num.ll +++ b/ptx/src/test/ll/setp_num.ll @@ -162,4 +162,4 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"86", ptr addrs ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_ptr_32.ll b/ptx/src/test/ll/shared_ptr_32.ll index 0a1602e..bc8e593 100644 --- a/ptx/src/test/ll/shared_ptr_32.ll +++ b/ptx/src/test/ll/shared_ptr_32.ll @@ -37,4 +37,4 @@ define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"38", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_ptr_take_address.ll b/ptx/src/test/ll/shared_ptr_take_address.ll index 20d1feb..87a0fd5 100644 --- a/ptx/src/test/ll/shared_ptr_take_address.ll +++ b/ptx/src/test/ll/shared_ptr_take_address.ll @@ -36,4 +36,4 @@ define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_unify_extern.ll b/ptx/src/test/ll/shared_unify_extern.ll index 271b4b8..968ee98 100644 --- a/ptx/src/test/ll/shared_unify_extern.ll +++ b/ptx/src/test/ll/shared_unify_extern.ll @@ -80,5 +80,5 @@ define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"59" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_unify_local.ll b/ptx/src/test/ll/shared_unify_local.ll index 65bbd91..143c308 100644 --- a/ptx/src/test/ll/shared_unify_local.ll +++ b/ptx/src/test/ll/shared_unify_local.ll @@ -77,5 +77,5 @@ define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"57", ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shared_variable.ll b/ptx/src/test/ll/shared_variable.ll index 013d340..f7d59b5 100644 --- a/ptx/src/test/ll/shared_variable.ll +++ b/ptx/src/test/ll/shared_variable.ll @@ -30,4 +30,4 @@ define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"35", pt ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_l.ll b/ptx/src/test/ll/shf_l.ll index 3a80428..ad44191 100644 --- a/ptx/src/test/ll/shf_l.ll +++ b/ptx/src/test/ll/shf_l.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_l(ptr addrspace(4) byref(i64) %"40", ptr addrspac ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshl.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_l_clamp.ll b/ptx/src/test/ll/shf_l_clamp.ll index 62efed0..2f7c6d1 100644 --- a/ptx/src/test/ll/shf_l_clamp.ll +++ b/ptx/src/test/ll/shf_l_clamp.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_l_clamp(ptr addrspace(4) byref(i64) %"40", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshl.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_l_wrap.ll b/ptx/src/test/ll/shf_l_wrap.ll index cbf1f66..b71222c 100644 --- a/ptx/src/test/ll/shf_l_wrap.ll +++ b/ptx/src/test/ll/shf_l_wrap.ll @@ -44,5 +44,5 @@ define amdgpu_kernel void @shf_l_wrap(ptr addrspace(4) byref(i64) %"40", ptr add ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshl.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_r.ll b/ptx/src/test/ll/shf_r.ll index 1b794a7..aaf87ea 100644 --- a/ptx/src/test/ll/shf_r.ll +++ b/ptx/src/test/ll/shf_r.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_r(ptr addrspace(4) byref(i64) %"40", ptr addrspac ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshr.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_r_clamp.ll b/ptx/src/test/ll/shf_r_clamp.ll index 4c74049..5d3b886 100644 --- a/ptx/src/test/ll/shf_r_clamp.ll +++ b/ptx/src/test/ll/shf_r_clamp.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @shf_r_clamp(ptr addrspace(4) byref(i64) %"40", ptr ad ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshr.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shf_r_wrap.ll b/ptx/src/test/ll/shf_r_wrap.ll index 5f3bb6a..371eaf9 100644 --- a/ptx/src/test/ll/shf_r_wrap.ll +++ b/ptx/src/test/ll/shf_r_wrap.ll @@ -44,5 +44,5 @@ define amdgpu_kernel void @shf_r_wrap(ptr addrspace(4) byref(i64) %"40", ptr add ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.fshr.i32(i32, i32, i32) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll b/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll index cede751..888abd0 100644 --- a/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_bfly_b32_pred(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_down_b32_pred.ll b/ptx/src/test/ll/shfl_sync_down_b32_pred.ll index d0b2664..aaf6d3e 100644 --- a/ptx/src/test/ll/shfl_sync_down_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_down_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_down_b32_pred(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll b/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll index 25afb27..c7661be 100644 --- a/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_idx_b32_pred(ptr addrspace(4) byref(i64) %" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_mode_b32.ll b/ptx/src/test/ll/shfl_sync_mode_b32.ll index d90f5fb..633a3dc 100644 --- a/ptx/src/test/ll/shfl_sync_mode_b32.ll +++ b/ptx/src/test/ll/shfl_sync_mode_b32.ll @@ -70,5 +70,5 @@ define amdgpu_kernel void @shfl_sync_mode_b32(ptr addrspace(4) byref(i64) %"51") ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shfl_sync_up_b32_pred.ll b/ptx/src/test/ll/shfl_sync_up_b32_pred.ll index c61b758..82f688f 100644 --- a/ptx/src/test/ll/shfl_sync_up_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_up_b32_pred.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @shfl_sync_up_b32_pred(ptr addrspace(4) byref(i64) %"4 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shl.ll b/ptx/src/test/ll/shl.ll index 311c05b..df42cd6 100644 --- a/ptx/src/test/ll/shl.ll +++ b/ptx/src/test/ll/shl.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"35", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shr.ll b/ptx/src/test/ll/shr.ll index 8b84067..536b43d 100644 --- a/ptx/src/test/ll/shr.ll +++ b/ptx/src/test/ll/shr.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"34", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/shr_oob.ll b/ptx/src/test/ll/shr_oob.ll index dac2dd1..45def22 100644 --- a/ptx/src/test/ll/shr_oob.ll +++ b/ptx/src/test/ll/shr_oob.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @shr_oob(ptr addrspace(4) byref(i64) %"34", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sign_extend.ll b/ptx/src/test/ll/sign_extend.ll index 9ebc818..2afda88 100644 --- a/ptx/src/test/ll/sign_extend.ll +++ b/ptx/src/test/ll/sign_extend.ll @@ -24,4 +24,4 @@ define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"33", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sin.ll b/ptx/src/test/ll/sin.ll index 010a889..dae1aea 100644 --- a/ptx/src/test/ll/sin.ll +++ b/ptx/src/test/ll/sin.ll @@ -29,5 +29,5 @@ define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"33", ptr addrspace( ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.sin.f32(float) #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file diff --git a/ptx/src/test/ll/sqrt.ll b/ptx/src/test/ll/sqrt.ll index e58b21a..5fb76ab 100644 --- a/ptx/src/test/ll/sqrt.ll +++ b/ptx/src/test/ll/sqrt.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"33", ptr addrspace ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sqrt_rn_ftz.ll b/ptx/src/test/ll/sqrt_rn_ftz.ll index d38c59d..b7fc7a6 100644 --- a/ptx/src/test/ll/sqrt_rn_ftz.ll +++ b/ptx/src/test/ll/sqrt_rn_ftz.ll @@ -28,5 +28,5 @@ define amdgpu_kernel void @sqrt_rn_ftz(ptr addrspace(4) byref(i64) %"33", ptr ad ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_ntid.ll b/ptx/src/test/ll/stateful_ld_st_ntid.ll index 22dbe50..0c8c203 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid.ll @@ -51,5 +51,5 @@ define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"39" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll b/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll index f5e1fc2..caa2b82 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll @@ -55,5 +55,5 @@ define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll b/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll index 1e2bea6..4b3b0e8 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll @@ -57,5 +57,5 @@ define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) % ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_ld_st_simple.ll b/ptx/src/test/ll/stateful_ld_st_simple.ll index 183aa78..affe0fc 100644 --- a/ptx/src/test/ll/stateful_ld_st_simple.ll +++ b/ptx/src/test/ll/stateful_ld_st_simple.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"3 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/stateful_neg_offset.ll b/ptx/src/test/ll/stateful_neg_offset.ll index 36e16cd..48668bf 100644 --- a/ptx/src/test/ll/stateful_neg_offset.ll +++ b/ptx/src/test/ll/stateful_neg_offset.ll @@ -42,4 +42,4 @@ define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"36" ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/sub.ll b/ptx/src/test/ll/sub.ll index 029efcd..ff991ab 100644 --- a/ptx/src/test/ll/sub.ll +++ b/ptx/src/test/ll/sub.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"35", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/tanh.ll b/ptx/src/test/ll/tanh.ll index a946370..b0c8663 100644 --- a/ptx/src/test/ll/tanh.ll +++ b/ptx/src/test/ll/tanh.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @tanh(ptr addrspace(4) byref(i64) %"33", ptr addrspace declare float @__ocml_tanh_f32(float) -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/tid.ll b/ptx/src/test/ll/tid.ll index a057e42..22d1c0f 100644 --- a/ptx/src/test/ll/tid.ll +++ b/ptx/src/test/ll/tid.ll @@ -35,5 +35,5 @@ define amdgpu_kernel void @tid(ptr addrspace(4) byref(i64) %"37") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/trap.ll b/ptx/src/test/ll/trap.ll index 97dc102..0639453 100644 --- a/ptx/src/test/ll/trap.ll +++ b/ptx/src/test/ll/trap.ll @@ -12,5 +12,5 @@ define amdgpu_kernel void @trap(ptr addrspace(4) byref(i64) %"30", ptr addrspace ; Function Attrs: cold noreturn nounwind declare void @llvm.trap() #1 -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { cold noreturn nounwind } \ No newline at end of file diff --git a/ptx/src/test/ll/vector.ll b/ptx/src/test/ll/vector.ll index 126ce8d..ddd7f9a 100644 --- a/ptx/src/test/ll/vector.ll +++ b/ptx/src/test/ll/vector.ll @@ -73,5 +73,5 @@ define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"70", ptr addrspa ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vector4.ll b/ptx/src/test/ll/vector4.ll index 4cb7208..837530a 100644 --- a/ptx/src/test/ll/vector4.ll +++ b/ptx/src/test/ll/vector4.ll @@ -27,4 +27,4 @@ define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"35", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vector_extract.ll b/ptx/src/test/ll/vector_extract.ll index fe73736..387ac00 100644 --- a/ptx/src/test/ll/vector_extract.ll +++ b/ptx/src/test/ll/vector_extract.ll @@ -83,4 +83,4 @@ define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"43", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vector_operand.ll b/ptx/src/test/ll/vector_operand.ll index 564d0da..af73ab5 100644 --- a/ptx/src/test/ll/vector_operand.ll +++ b/ptx/src/test/ll/vector_operand.ll @@ -28,4 +28,4 @@ define amdgpu_kernel void @vector_operand(ptr addrspace(4) byref(i64) %"36", ptr ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_all.ll b/ptx/src/test/ll/vote_all.ll index 175edb8..f5e28fe 100644 --- a/ptx/src/test/ll/vote_all.ll +++ b/ptx/src/test/ll/vote_all.ll @@ -62,5 +62,5 @@ define amdgpu_kernel void @vote_all(ptr addrspace(4) byref(i64) %"51") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_all_sub.ll b/ptx/src/test/ll/vote_all_sub.ll index 5b7007d..a10140b 100644 --- a/ptx/src/test/ll/vote_all_sub.ll +++ b/ptx/src/test/ll/vote_all_sub.ll @@ -61,5 +61,5 @@ define amdgpu_kernel void @vote_all_sub(ptr addrspace(4) byref(i64) %"53") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_any.ll b/ptx/src/test/ll/vote_any.ll index bc24522..8a4895a 100644 --- a/ptx/src/test/ll/vote_any.ll +++ b/ptx/src/test/ll/vote_any.ll @@ -46,5 +46,5 @@ define amdgpu_kernel void @vote_any(ptr addrspace(4) byref(i64) %"44") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/vote_ballot.ll b/ptx/src/test/ll/vote_ballot.ll index 350d837..69f5836 100644 --- a/ptx/src/test/ll/vote_ballot.ll +++ b/ptx/src/test/ll/vote_ballot.ll @@ -42,5 +42,5 @@ define amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"41") #1 { ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/warp_sz.ll b/ptx/src/test/ll/warp_sz.ll index 32b203a..7b6e36b 100644 --- a/ptx/src/test/ll/warp_sz.ll +++ b/ptx/src/test/ll/warp_sz.ll @@ -14,4 +14,4 @@ define amdgpu_kernel void @warp_sz(ptr addrspace(4) byref(i64) %"32", ptr addrsp ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/xor.ll b/ptx/src/test/ll/xor.ll index ac9a1d1..0f88d82 100644 --- a/ptx/src/test/ll/xor.ll +++ b/ptx/src/test/ll/xor.ll @@ -33,4 +33,4 @@ define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"36", ptr addrspace( ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-ieee"="false" "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/spirv_run/min_f16.ptx b/ptx/src/test/spirv_run/min_f16.ptx new file mode 100644 index 0000000..61d30c6 --- /dev/null +++ b/ptx/src/test/spirv_run/min_f16.ptx @@ -0,0 +1,23 @@ +.version 7.0 +.target sm_80 +.address_size 64 + +.visible .entry min_f16( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .f16 temp1; + .reg .f16 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.b16 temp1, [in_addr]; + ld.b16 temp2, [in_addr+2]; + min.f16 temp1, temp1, temp2; + st.b16 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/min_nan_f16.ptx b/ptx/src/test/spirv_run/min_nan_f16.ptx new file mode 100644 index 0000000..3c1ca60 --- /dev/null +++ b/ptx/src/test/spirv_run/min_nan_f16.ptx @@ -0,0 +1,23 @@ +.version 7.0 +.target sm_80 +.address_size 64 + +.visible .entry min_nan_f16( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .f16 temp1; + .reg .f16 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.b16 temp1, [in_addr]; + ld.b16 temp2, [in_addr+2]; + min.NaN.f16 temp1, temp1, temp2; + st.b16 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 0a39523..46bdd0b 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -143,6 +143,12 @@ test_ptx!(shr_oob, [-32768i16], [-1i16]); test_ptx!(or, [1u64, 2u64], [3u64]); test_ptx!(sub, [2u64], [1u64]); test_ptx!(min, [555i32, 444i32], [444i32]); +test_ptx!( + min_f16, + [half::f16::NAN, half::f16::from_f64(123.0)], + [half::f16::from_f64(123.0)] +); +test_ptx!(min_nan_f16); test_ptx!(max, [555i32, 444i32], [555i32]); test_ptx!(global_array, [0xDEADu32], [1u32]); test_ptx!(global_array_f32, [0x0], [0f32]);