mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-10-01 13:49:11 +00:00
One more test
This commit is contained in:
parent
432f0bb2ec
commit
0accc421ad
3 changed files with 108 additions and 0 deletions
65
ptx/src/test/ll/vote_all_sub.ll
Normal file
65
ptx/src/test/ll/vote_all_sub.ll
Normal file
|
@ -0,0 +1,65 @@
|
|||
declare hidden i1 @__zluda_ptx_impl_vote_sync_all_pred(i1, i32) #0
|
||||
|
||||
declare hidden i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare hidden i32 @__zluda_ptx_impl_sreg_laneid() #0
|
||||
|
||||
define amdgpu_kernel void @vote_all_sub(ptr addrspace(4) byref(i64) %"53") #1 {
|
||||
%"54" = alloca i32, align 4, addrspace(5)
|
||||
%"55" = alloca i32, align 4, addrspace(5)
|
||||
%"56" = alloca i1, align 1, addrspace(5)
|
||||
%"57" = alloca i1, align 1, addrspace(5)
|
||||
%"58" = alloca i32, align 4, addrspace(5)
|
||||
%"59" = alloca i64, align 8, addrspace(5)
|
||||
%"70" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"48"
|
||||
|
||||
"48": ; preds = %1
|
||||
%"60" = load i64, ptr addrspace(4) %"53", align 8
|
||||
store i64 %"60", ptr addrspace(5) %"59", align 8
|
||||
%"38" = call i32 @__zluda_ptx_impl_sreg_laneid()
|
||||
br label %"49"
|
||||
|
||||
"49": ; preds = %"48"
|
||||
store i32 %"38", ptr addrspace(5) %"54", align 4
|
||||
%"40" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||
br label %"50"
|
||||
|
||||
"50": ; preds = %"49"
|
||||
store i32 %"40", ptr addrspace(5) %"55", align 4
|
||||
%"64" = load i32, ptr addrspace(5) %"54", align 4
|
||||
%2 = icmp eq i32 %"64", 0
|
||||
store i1 %2, ptr addrspace(5) %"56", align 1
|
||||
store i1 false, ptr addrspace(5) %"57", align 1
|
||||
%"66" = load i1, ptr addrspace(5) %"56", align 1
|
||||
br i1 %"66", label %"10", label %"19"
|
||||
|
||||
"19": ; preds = %"50"
|
||||
%"67" = call i1 @__zluda_ptx_impl_vote_sync_all_pred(i1 true, i32 -1)
|
||||
store i1 %"67", ptr addrspace(5) %"57", align 1
|
||||
br label %"10"
|
||||
|
||||
"10": ; preds = %"19", %"50"
|
||||
%"69" = load i1, ptr addrspace(5) %"57", align 1
|
||||
%"68" = select i1 %"69", i32 1, i32 0
|
||||
store i32 %"68", ptr addrspace(5) %"58", align 4
|
||||
%"72" = load i32, ptr addrspace(5) %"55", align 4
|
||||
%3 = zext i32 %"72" to i64
|
||||
%"71" = mul i64 %3, 4
|
||||
store i64 %"71", ptr addrspace(5) %"70", align 8
|
||||
%"74" = load i64, ptr addrspace(5) %"59", align 8
|
||||
%"75" = load i64, ptr addrspace(5) %"70", align 8
|
||||
%"73" = add i64 %"74", %"75"
|
||||
store i64 %"73", ptr addrspace(5) %"59", align 8
|
||||
%"76" = load i64, ptr addrspace(5) %"59", align 8
|
||||
%"77" = load i32, ptr addrspace(5) %"58", align 4
|
||||
%"78" = inttoptr i64 %"76" to ptr
|
||||
store i32 %"77", ptr %"78", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -409,6 +409,14 @@ test_ptx_warp!(
|
|||
1, 1, 1, 1, 1
|
||||
]
|
||||
);
|
||||
test_ptx_warp!(
|
||||
vote_all_sub,
|
||||
[
|
||||
0u32, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1
|
||||
]
|
||||
);
|
||||
test_ptx_warp!(
|
||||
vote_any,
|
||||
[
|
||||
|
|
35
ptx/src/test/spirv_run/vote_all_sub.ptx
Normal file
35
ptx/src/test/spirv_run/vote_all_sub.ptx
Normal file
|
@ -0,0 +1,35 @@
|
|||
.version 7.0
|
||||
.target sm_70
|
||||
.address_size 64
|
||||
|
||||
.visible .entry vote_all_sub(
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u32 laneid;
|
||||
.reg .u32 tid;
|
||||
.reg .pred first_lane;
|
||||
.reg .pred result_pred;
|
||||
.reg .u32 result;
|
||||
.reg .u64 out_ptr;
|
||||
|
||||
ld.param.u64 out_ptr, [output];
|
||||
|
||||
mov.u32 laneid, %laneid;
|
||||
mov.u32 tid, %tid.x;
|
||||
setp.eq.u32 first_lane, laneid, 0;
|
||||
|
||||
mov.pred result_pred, 0;
|
||||
|
||||
@first_lane bra EXIT;
|
||||
// IMPORTANT: it is legal for membermask to be bigger than the execution mask
|
||||
vote.sync.all.pred result_pred, 1, 0xFFFFFFFF;
|
||||
EXIT:
|
||||
selp.u32 result, 1, 0, result_pred;
|
||||
|
||||
.reg .u64 out_offset;
|
||||
mul.wide.u32 out_offset, tid, 4;
|
||||
add.u64 out_ptr, out_ptr, out_offset;
|
||||
st.u32 [out_ptr], result;
|
||||
ret;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue