Check LLVM IR for test_ptx! with no input/output (#394)
Some checks failed
ZLUDA / Build (Linux) (push) Has been cancelled
ZLUDA / Build (Windows) (push) Has been cancelled

This commit is contained in:
Violet 2025-06-24 11:53:30 -07:00 committed by GitHub
commit 80607c07db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 162 additions and 2 deletions

View file

@ -0,0 +1,64 @@
declare void @__zluda_ptx_impl___assertfail(i64, i64, i32, i64, i64) #0
define amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"86", ptr addrspace(4) byref(i64) %"87") #1 {
%"88" = alloca i64, align 8, addrspace(5)
%"89" = alloca i64, align 8, addrspace(5)
%"90" = alloca i64, align 8, addrspace(5)
%"91" = alloca i64, align 8, addrspace(5)
%"94" = alloca i32, align 4, addrspace(5)
%"96" = alloca i64, align 8, addrspace(5)
%"99" = alloca i64, align 8, addrspace(5)
%"102" = alloca i32, align 4, addrspace(5)
%"105" = alloca i64, align 8, addrspace(5)
%"108" = alloca i64, align 8, addrspace(5)
br label %1
1: ; preds = %0
br label %"84"
"84": ; preds = %1
%"92" = load i64, ptr addrspace(4) %"86", align 4
store i64 %"92", ptr addrspace(5) %"88", align 4
%"93" = load i64, ptr addrspace(4) %"87", align 4
store i64 %"93", ptr addrspace(5) %"89", align 4
store i32 0, ptr addrspace(5) %"94", align 4
%"97" = getelementptr inbounds i8, ptr addrspace(5) %"96", i64 0
%"98" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"98", ptr addrspace(5) %"97", align 4
%"100" = getelementptr inbounds i8, ptr addrspace(5) %"99", i64 0
%"101" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"101", ptr addrspace(5) %"100", align 4
%"103" = getelementptr inbounds i8, ptr addrspace(5) %"102", i64 0
%"104" = load i32, ptr addrspace(5) %"94", align 4
store i32 %"104", ptr addrspace(5) %"103", align 4
%"106" = getelementptr inbounds i8, ptr addrspace(5) %"105", i64 0
%"107" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"107", ptr addrspace(5) %"106", align 4
%"109" = getelementptr inbounds i8, ptr addrspace(5) %"108", i64 0
%"110" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"110", ptr addrspace(5) %"109", align 4
%"74" = load i64, ptr addrspace(5) %"96", align 4
%"75" = load i64, ptr addrspace(5) %"99", align 4
%"76" = load i32, ptr addrspace(5) %"102", align 4
%"77" = load i64, ptr addrspace(5) %"105", align 4
%"78" = load i64, ptr addrspace(5) %"108", align 4
call void @__zluda_ptx_impl___assertfail(i64 %"74", i64 %"75", i32 %"76", i64 %"77", i64 %"78")
br label %"85"
"85": ; preds = %"84"
%"112" = load i64, ptr addrspace(5) %"88", align 4
%"122" = inttoptr i64 %"112" to ptr
%"111" = load i64, ptr %"122", align 4
store i64 %"111", ptr addrspace(5) %"90", align 4
%"114" = load i64, ptr addrspace(5) %"90", align 4
%"113" = add i64 %"114", 1
store i64 %"113", ptr addrspace(5) %"91", align 4
%"115" = load i64, ptr addrspace(5) %"89", align 4
%"116" = load i64, ptr addrspace(5) %"91", align 4
%"123" = inttoptr i64 %"115" to ptr
store i64 %"116", ptr %"123", align 4
ret void
}
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -0,0 +1,43 @@
declare [16 x i8] @foobar(i64) #0
define amdgpu_kernel void @extern_func(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5)
%"54" = alloca i64, align 8, addrspace(5)
%"57" = alloca [16 x i8], align 16, addrspace(5)
br label %1
1: ; preds = %0
br label %"41"
"41": ; preds = %1
%"50" = load i64, ptr addrspace(4) %"44", align 4
store i64 %"50", ptr addrspace(5) %"46", align 4
%"51" = load i64, ptr addrspace(4) %"45", align 4
store i64 %"51", ptr addrspace(5) %"47", align 4
%"53" = load i64, ptr addrspace(5) %"46", align 4
%"61" = inttoptr i64 %"53" to ptr addrspace(1)
%"52" = load i64, ptr addrspace(1) %"61", align 4
store i64 %"52", ptr addrspace(5) %"48", align 4
%"55" = getelementptr inbounds i8, ptr addrspace(5) %"54", i64 0
%"56" = load i64, ptr addrspace(5) %"48", align 4
store i64 %"56", ptr addrspace(5) %"55", align 4
%"39" = load i64, ptr addrspace(5) %"54", align 4
%"40" = call [16 x i8] @foobar(i64 %"39")
br label %"42"
"42": ; preds = %"41"
store [16 x i8] %"40", ptr addrspace(5) %"57", align 1
%"58" = load i64, ptr addrspace(5) %"57", align 4
store i64 %"58", ptr addrspace(5) %"49", align 4
%"59" = load i64, ptr addrspace(5) %"47", align 4
%"60" = load i64, ptr addrspace(5) %"49", align 4
%"64" = inttoptr i64 %"59" to ptr
store i64 %"60", ptr %"64", align 4
ret void
}
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -0,0 +1,43 @@
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5)
br label %1
1: ; preds = %0
br label %"33"
"33": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"43", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"56" = inttoptr i64 %"46" to ptr
%"55" = load i32, ptr %"56", align 4
store i32 %"55", ptr addrspace(5) %"40", align 4
%"48" = load i32, ptr addrspace(5) %"40", align 4
%"57" = add i32 %"48", 1
store i32 %"57", ptr addrspace(5) %"41", align 4
%"31" = call i32 @__zluda_ptx_impl_sreg_lanemask_lt()
br label %"34"
"34": ; preds = %"33"
store i32 %"31", ptr addrspace(5) %"42", align 4
%"51" = load i32, ptr addrspace(5) %"41", align 4
%"52" = load i32, ptr addrspace(5) %"42", align 4
%"60" = add i32 %"51", %"52"
store i32 %"60", ptr addrspace(5) %"41", align 4
%"53" = load i64, ptr addrspace(5) %"39", align 4
%"54" = load i32, ptr addrspace(5) %"41", align 4
%"63" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"63", align 4
ret void
}
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -46,7 +46,16 @@ macro_rules! test_ptx {
} }
}; };
($fn_name:ident) => {}; ($fn_name:ident) => {
paste::item! {
#[test]
fn [<$fn_name _llvm>]() -> Result<(), Box<dyn std::error::Error>> {
let ptx = include_str!(concat!(stringify!($fn_name), ".ptx"));
let ll = include_str!(concat!("../ll/", stringify!($fn_name), ".ll")).trim();
test_llvm_assert(stringify!($fn_name), ptx, &ll)
}
}
};
} }
test_ptx!(ld_st, [1u64], [1u64]); test_ptx!(ld_st, [1u64], [1u64]);
@ -241,7 +250,8 @@ test_ptx!(
); );
test_ptx!(assertfail); test_ptx!(assertfail);
test_ptx!(func_ptr); // TODO: not yet supported
//test_ptx!(func_ptr);
test_ptx!(lanemask_lt); test_ptx!(lanemask_lt);
test_ptx!(extern_func); test_ptx!(extern_func);