From 80607c07db3d0c4dee6c4628b6fe705f571633f2 Mon Sep 17 00:00:00 2001 From: Violet Date: Tue, 24 Jun 2025 11:53:30 -0700 Subject: [PATCH] Check LLVM IR for `test_ptx!` with no input/output (#394) --- ptx/src/test/ll/assertfail.ll | 64 ++++++++++++++++++++++++++++++++++ ptx/src/test/ll/extern_func.ll | 43 +++++++++++++++++++++++ ptx/src/test/ll/lanemask_lt.ll | 43 +++++++++++++++++++++++ ptx/src/test/spirv_run/mod.rs | 14 ++++++-- 4 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 ptx/src/test/ll/assertfail.ll create mode 100644 ptx/src/test/ll/extern_func.ll create mode 100644 ptx/src/test/ll/lanemask_lt.ll diff --git a/ptx/src/test/ll/assertfail.ll b/ptx/src/test/ll/assertfail.ll new file mode 100644 index 0000000..50d51fe --- /dev/null +++ b/ptx/src/test/ll/assertfail.ll @@ -0,0 +1,64 @@ +declare void @__zluda_ptx_impl___assertfail(i64, i64, i32, i64, i64) #0 + +define amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"86", ptr addrspace(4) byref(i64) %"87") #1 { + %"88" = alloca i64, align 8, addrspace(5) + %"89" = alloca i64, align 8, addrspace(5) + %"90" = alloca i64, align 8, addrspace(5) + %"91" = alloca i64, align 8, addrspace(5) + %"94" = alloca i32, align 4, addrspace(5) + %"96" = alloca i64, align 8, addrspace(5) + %"99" = alloca i64, align 8, addrspace(5) + %"102" = alloca i32, align 4, addrspace(5) + %"105" = alloca i64, align 8, addrspace(5) + %"108" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"84" + +"84": ; preds = %1 + %"92" = load i64, ptr addrspace(4) %"86", align 4 + store i64 %"92", ptr addrspace(5) %"88", align 4 + %"93" = load i64, ptr addrspace(4) %"87", align 4 + store i64 %"93", ptr addrspace(5) %"89", align 4 + store i32 0, ptr addrspace(5) %"94", align 4 + %"97" = getelementptr inbounds i8, ptr addrspace(5) %"96", i64 0 + %"98" = load i64, ptr addrspace(5) %"88", align 4 + store i64 %"98", ptr addrspace(5) %"97", align 4 + %"100" = getelementptr inbounds i8, ptr addrspace(5) %"99", i64 0 + %"101" = load i64, ptr addrspace(5) %"88", align 4 + store i64 %"101", ptr addrspace(5) %"100", align 4 + %"103" = getelementptr inbounds i8, ptr addrspace(5) %"102", i64 0 + %"104" = load i32, ptr addrspace(5) %"94", align 4 + store i32 %"104", ptr addrspace(5) %"103", align 4 + %"106" = getelementptr inbounds i8, ptr addrspace(5) %"105", i64 0 + %"107" = load i64, ptr addrspace(5) %"88", align 4 + store i64 %"107", ptr addrspace(5) %"106", align 4 + %"109" = getelementptr inbounds i8, ptr addrspace(5) %"108", i64 0 + %"110" = load i64, ptr addrspace(5) %"88", align 4 + store i64 %"110", ptr addrspace(5) %"109", align 4 + %"74" = load i64, ptr addrspace(5) %"96", align 4 + %"75" = load i64, ptr addrspace(5) %"99", align 4 + %"76" = load i32, ptr addrspace(5) %"102", align 4 + %"77" = load i64, ptr addrspace(5) %"105", align 4 + %"78" = load i64, ptr addrspace(5) %"108", align 4 + call void @__zluda_ptx_impl___assertfail(i64 %"74", i64 %"75", i32 %"76", i64 %"77", i64 %"78") + br label %"85" + +"85": ; preds = %"84" + %"112" = load i64, ptr addrspace(5) %"88", align 4 + %"122" = inttoptr i64 %"112" to ptr + %"111" = load i64, ptr %"122", align 4 + store i64 %"111", ptr addrspace(5) %"90", align 4 + %"114" = load i64, ptr addrspace(5) %"90", align 4 + %"113" = add i64 %"114", 1 + store i64 %"113", ptr addrspace(5) %"91", align 4 + %"115" = load i64, ptr addrspace(5) %"89", align 4 + %"116" = load i64, ptr addrspace(5) %"91", align 4 + %"123" = inttoptr i64 %"115" to ptr + store i64 %"116", ptr %"123", align 4 + ret void +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/extern_func.ll b/ptx/src/test/ll/extern_func.ll new file mode 100644 index 0000000..5d11365 --- /dev/null +++ b/ptx/src/test/ll/extern_func.ll @@ -0,0 +1,43 @@ +declare [16 x i8] @foobar(i64) #0 + +define amdgpu_kernel void @extern_func(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 { + %"46" = alloca i64, align 8, addrspace(5) + %"47" = alloca i64, align 8, addrspace(5) + %"48" = alloca i64, align 8, addrspace(5) + %"49" = alloca i64, align 8, addrspace(5) + %"54" = alloca i64, align 8, addrspace(5) + %"57" = alloca [16 x i8], align 16, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"41" + +"41": ; preds = %1 + %"50" = load i64, ptr addrspace(4) %"44", align 4 + store i64 %"50", ptr addrspace(5) %"46", align 4 + %"51" = load i64, ptr addrspace(4) %"45", align 4 + store i64 %"51", ptr addrspace(5) %"47", align 4 + %"53" = load i64, ptr addrspace(5) %"46", align 4 + %"61" = inttoptr i64 %"53" to ptr addrspace(1) + %"52" = load i64, ptr addrspace(1) %"61", align 4 + store i64 %"52", ptr addrspace(5) %"48", align 4 + %"55" = getelementptr inbounds i8, ptr addrspace(5) %"54", i64 0 + %"56" = load i64, ptr addrspace(5) %"48", align 4 + store i64 %"56", ptr addrspace(5) %"55", align 4 + %"39" = load i64, ptr addrspace(5) %"54", align 4 + %"40" = call [16 x i8] @foobar(i64 %"39") + br label %"42" + +"42": ; preds = %"41" + store [16 x i8] %"40", ptr addrspace(5) %"57", align 1 + %"58" = load i64, ptr addrspace(5) %"57", align 4 + store i64 %"58", ptr addrspace(5) %"49", align 4 + %"59" = load i64, ptr addrspace(5) %"47", align 4 + %"60" = load i64, ptr addrspace(5) %"49", align 4 + %"64" = inttoptr i64 %"59" to ptr + store i64 %"60", ptr %"64", align 4 + ret void +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/ll/lanemask_lt.ll b/ptx/src/test/ll/lanemask_lt.ll new file mode 100644 index 0000000..0efae06 --- /dev/null +++ b/ptx/src/test/ll/lanemask_lt.ll @@ -0,0 +1,43 @@ +declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 + +define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 { + %"38" = alloca i64, align 8, addrspace(5) + %"39" = alloca i64, align 8, addrspace(5) + %"40" = alloca i32, align 4, addrspace(5) + %"41" = alloca i32, align 4, addrspace(5) + %"42" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + br label %"33" + +"33": ; preds = %1 + %"43" = load i64, ptr addrspace(4) %"36", align 4 + store i64 %"43", ptr addrspace(5) %"38", align 4 + %"44" = load i64, ptr addrspace(4) %"37", align 4 + store i64 %"44", ptr addrspace(5) %"39", align 4 + %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"56" = inttoptr i64 %"46" to ptr + %"55" = load i32, ptr %"56", align 4 + store i32 %"55", ptr addrspace(5) %"40", align 4 + %"48" = load i32, ptr addrspace(5) %"40", align 4 + %"57" = add i32 %"48", 1 + store i32 %"57", ptr addrspace(5) %"41", align 4 + %"31" = call i32 @__zluda_ptx_impl_sreg_lanemask_lt() + br label %"34" + +"34": ; preds = %"33" + store i32 %"31", ptr addrspace(5) %"42", align 4 + %"51" = load i32, ptr addrspace(5) %"41", align 4 + %"52" = load i32, ptr addrspace(5) %"42", align 4 + %"60" = add i32 %"51", %"52" + store i32 %"60", ptr addrspace(5) %"41", align 4 + %"53" = load i64, ptr addrspace(5) %"39", align 4 + %"54" = load i32, ptr addrspace(5) %"41", align 4 + %"63" = inttoptr i64 %"53" to ptr + store i32 %"54", ptr %"63", align 4 + ret void +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } +attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 424a1b8..8451826 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -46,7 +46,16 @@ macro_rules! test_ptx { } }; - ($fn_name:ident) => {}; + ($fn_name:ident) => { + paste::item! { + #[test] + fn [<$fn_name _llvm>]() -> Result<(), Box> { + let ptx = include_str!(concat!(stringify!($fn_name), ".ptx")); + let ll = include_str!(concat!("../ll/", stringify!($fn_name), ".ll")).trim(); + test_llvm_assert(stringify!($fn_name), ptx, &ll) + } + } + }; } test_ptx!(ld_st, [1u64], [1u64]); @@ -241,7 +250,8 @@ test_ptx!( ); test_ptx!(assertfail); -test_ptx!(func_ptr); +// TODO: not yet supported +//test_ptx!(func_ptr); test_ptx!(lanemask_lt); test_ptx!(extern_func);