Fix buggy carry flags when mixing subc/sub.cc with addc/add.cc (#197)

This commit is contained in:
Andrzej Janik 2024-04-05 23:26:08 +02:00 committed by GitHub
commit 0d9ace2475
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
139 changed files with 4208 additions and 4464 deletions

View file

@ -1,44 +1,42 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
"38":
define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"37":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"13" to ptr
%"30" = load i32, ptr %"31", align 4
store i32 %"30", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"15" to ptr
%"40" = getelementptr inbounds i8, ptr %"32", i64 4
%"33" = load i32, ptr %"40", align 4
store i32 %"33", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"16" = call i32 @llvm.abs.i32(i32 %"17", i1 false)
store i32 %"16", ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
%"18" = call i32 @llvm.abs.i32(i32 %"19", i1 false)
store i32 %"18", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"34" = inttoptr i64 %"20" to ptr
store i32 %"21", ptr %"34", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"7", align 4
%"36" = inttoptr i64 %"22" to ptr
%"42" = getelementptr inbounds i8, ptr %"36", i64 4
store i32 %"23", ptr %"42", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"12" to ptr
%"29" = load i32, ptr %"30", align 4
store i32 %"29", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
%"39" = getelementptr inbounds i8, ptr %"31", i64 4
%"32" = load i32, ptr %"39", align 4
store i32 %"32", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false)
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"17" = call i32 @llvm.abs.i32(i32 %"18", i1 false)
store i32 %"17", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"33" = inttoptr i64 %"19" to ptr
store i32 %"20", ptr %"33", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"35" = inttoptr i64 %"21" to ptr
%"41" = getelementptr inbounds i8, ptr %"35", i64 4
store i32 %"22", ptr %"41", align 4
ret void
}

View file

@ -3,22 +3,20 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__activemask() #0
define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #1 {
"16":
define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 {
"15":
%"6" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"6", align 1
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"13", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = call i32 @__zluda_ptx_impl__activemask()
store i32 %"9", ptr addrspace(5) %"5", align 4
%"10" = load i64, ptr addrspace(5) %"4", align 8
%"11" = load i32, ptr addrspace(5) %"5", align 4
%"14" = inttoptr i64 %"10" to ptr
store i32 %"11", ptr %"14", align 4
%"7" = load i64, ptr addrspace(4) %"12", align 8
store i64 %"7", ptr addrspace(5) %"4", align 8
%"8" = call i32 @__zluda_ptx_impl__activemask()
store i32 %"8", ptr addrspace(5) %"5", align 4
%"9" = load i64, ptr addrspace(5) %"4", align 8
%"10" = load i32, ptr addrspace(5) %"5", align 4
%"13" = inttoptr i64 %"9" to ptr
store i32 %"10", ptr %"13", align 4
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"23":
define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"21", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = add i64 %"15", 1
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"13" = add i64 %"14", 1
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"21", align 8
ret void
}

View file

@ -3,34 +3,32 @@ target triple = "amdgcn-amd-amdhsa"
@PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4
define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 {
"25":
define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 {
"24":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"21", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"23" = inttoptr i64 %"14" to ptr
%"13" = load float, ptr %"23", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"15" = load float, ptr addrspace(1) @PI, align 4
store float %"15", ptr addrspace(5) %"8", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
%"18" = load float, ptr addrspace(5) %"8", align 4
%"16" = fadd float %"17", %"18"
store float %"16", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"6", align 8
%"20" = load float, ptr addrspace(5) %"7", align 4
%"24" = inttoptr i64 %"19" to ptr
store float %"20", ptr %"24", align 4
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"22" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"22", align 4
store float %"12", ptr addrspace(5) %"7", align 4
%"14" = load float, ptr addrspace(1) @PI, align 4
store float %"14", ptr addrspace(5) %"8", align 4
%"16" = load float, ptr addrspace(5) %"7", align 4
%"17" = load float, ptr addrspace(5) %"8", align 4
%"15" = fadd float %"16", %"17"
store float %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"6", align 8
%"19" = load float, ptr addrspace(5) %"7", align 4
%"23" = inttoptr i64 %"18" to ptr
store float %"19", ptr %"23", align 4
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"23":
define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load i64, ptr addrspace(1) %"21", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = add i64 %"15", 1
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr addrspace(1)
store i64 %"17", ptr addrspace(1) %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr addrspace(1)
%"11" = load i64, ptr addrspace(1) %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"13" = add i64 %"14", 1
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr addrspace(1)
store i64 %"16", ptr addrspace(1) %"21", align 8
ret void
}

View file

@ -1,47 +1,45 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"39":
define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
"38":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"32" = ptrtoint ptr addrspace(4) %"27" to i64
%"31" = ptrtoint ptr addrspace(4) %"26" to i64
%0 = alloca i64, align 8, addrspace(5)
store i64 %"32", ptr addrspace(5) %0, align 8
%"31" = load i64, ptr addrspace(5) %0, align 8
store i64 %"31", ptr addrspace(5) %"4", align 8
%"34" = ptrtoint ptr addrspace(4) %"28" to i64
store i64 %"31", ptr addrspace(5) %0, align 8
%"30" = load i64, ptr addrspace(5) %0, align 8
store i64 %"30", ptr addrspace(5) %"4", align 8
%"33" = ptrtoint ptr addrspace(4) %"27" to i64
%1 = alloca i64, align 8, addrspace(5)
store i64 %"34", ptr addrspace(5) %1, align 8
%"33" = load i64, ptr addrspace(5) %1, align 8
store i64 %"33", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"35" = inttoptr i64 %"13" to ptr addrspace(4)
%"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0
%"12" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"36" = inttoptr i64 %"15" to ptr addrspace(4)
%"43" = getelementptr inbounds i8, ptr addrspace(4) %"36", i64 0
%"14" = load i64, ptr addrspace(4) %"43", align 8
store i64 %"14", ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"17" to ptr
%"16" = load i64, ptr %"37", align 8
store i64 %"16", ptr addrspace(5) %"6", align 8
%"19" = load i64, ptr addrspace(5) %"6", align 8
%"18" = add i64 %"19", 1
store i64 %"18", ptr addrspace(5) %"7", align 8
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i64, ptr addrspace(5) %"7", align 8
%"38" = inttoptr i64 %"20" to ptr
store i64 %"21", ptr %"38", align 8
store i64 %"33", ptr addrspace(5) %1, align 8
%"32" = load i64, ptr addrspace(5) %1, align 8
store i64 %"32", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"12" to ptr addrspace(4)
%"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0
%"11" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"35" = inttoptr i64 %"14" to ptr addrspace(4)
%"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0
%"13" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"36" = inttoptr i64 %"16" to ptr
%"15" = load i64, ptr %"36", align 8
store i64 %"15", ptr addrspace(5) %"6", align 8
%"18" = load i64, ptr addrspace(5) %"6", align 8
%"17" = add i64 %"18", 1
store i64 %"17", ptr addrspace(5) %"7", align 8
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i64, ptr addrspace(5) %"7", align 8
%"37" = inttoptr i64 %"19" to ptr
store i64 %"20", ptr %"37", align 8
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"23":
define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"21", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = add i64 %"15", 1
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"13" = add i64 %"14", 1
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"21", align 8
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 {
"69":
define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 {
"68":
%"13" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"13", align 1
%"14" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@ -16,70 +14,70 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"54",
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i32, align 4, addrspace(5)
%"14" = load i64, ptr addrspace(4) %"53", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"54", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"55", align 8
store i64 %"16", ptr addrspace(5) %"5", align 8
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"57" = inttoptr i64 %"18" to ptr
%"56" = load i32, ptr %"57", align 4
store i32 %"56", ptr addrspace(5) %"9", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"58" = inttoptr i64 %"20" to ptr
%"71" = getelementptr inbounds i8, ptr %"58", i64 4
%"59" = load i32, ptr %"71", align 4
store i32 %"59", ptr addrspace(5) %"10", align 4
%"22" = load i64, ptr addrspace(5) %"4", align 8
%"60" = inttoptr i64 %"22" to ptr
%"73" = getelementptr inbounds i8, ptr %"60", i64 8
%"21" = load i32, ptr %"73", align 4
store i32 %"21", ptr addrspace(5) %"11", align 4
%"24" = load i64, ptr addrspace(5) %"4", align 8
%"61" = inttoptr i64 %"24" to ptr
%"75" = getelementptr inbounds i8, ptr %"61", i64 12
%"23" = load i32, ptr %"75", align 4
store i32 %"23", ptr addrspace(5) %"12", align 4
%"27" = load i32, ptr addrspace(5) %"9", align 4
%"28" = load i32, ptr addrspace(5) %"10", align 4
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"27", i32 %"28")
%"25" = extractvalue { i32, i1 } %0, 0
%"26" = extractvalue { i32, i1 } %0, 1
store i32 %"25", ptr addrspace(5) %"6", align 4
store i1 %"26", ptr addrspace(5) %"13", align 1
%"31" = load i1, ptr addrspace(5) %"13", align 1
%"32" = load i32, ptr addrspace(5) %"6", align 4
%"33" = load i32, ptr addrspace(5) %"11", align 4
%1 = zext i1 %"31" to i32
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"32", i32 %"33")
store i64 %"15", ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"56" = inttoptr i64 %"17" to ptr
%"55" = load i32, ptr %"56", align 4
store i32 %"55", ptr addrspace(5) %"9", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"57" = inttoptr i64 %"19" to ptr
%"70" = getelementptr inbounds i8, ptr %"57", i64 4
%"58" = load i32, ptr %"70", align 4
store i32 %"58", ptr addrspace(5) %"10", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"59" = inttoptr i64 %"21" to ptr
%"72" = getelementptr inbounds i8, ptr %"59", i64 8
%"20" = load i32, ptr %"72", align 4
store i32 %"20", ptr addrspace(5) %"11", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"60" = inttoptr i64 %"23" to ptr
%"74" = getelementptr inbounds i8, ptr %"60", i64 12
%"22" = load i32, ptr %"74", align 4
store i32 %"22", ptr addrspace(5) %"12", align 4
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"27" = load i32, ptr addrspace(5) %"10", align 4
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27")
%"24" = extractvalue { i32, i1 } %0, 0
%"25" = extractvalue { i32, i1 } %0, 1
store i32 %"24", ptr addrspace(5) %"6", align 4
store i1 %"25", ptr addrspace(5) %"13", align 1
%"30" = load i1, ptr addrspace(5) %"13", align 1
%"31" = load i32, ptr addrspace(5) %"6", align 4
%"32" = load i32, ptr addrspace(5) %"11", align 4
%1 = zext i1 %"30" to i32
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32")
%3 = extractvalue { i32, i1 } %2, 0
%4 = extractvalue { i32, i1 } %2, 1
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
%"29" = extractvalue { i32, i1 } %5, 0
%"28" = extractvalue { i32, i1 } %5, 0
%6 = extractvalue { i32, i1 } %5, 1
%"30" = xor i1 %4, %6
store i32 %"29", ptr addrspace(5) %"7", align 4
store i1 %"30", ptr addrspace(5) %"13", align 1
%"35" = load i1, ptr addrspace(5) %"13", align 1
%"36" = load i32, ptr addrspace(5) %"7", align 4
%"37" = load i32, ptr addrspace(5) %"12", align 4
%7 = zext i1 %"35" to i32
%8 = add i32 %"36", %"37"
%"34" = add i32 %8, %7
store i32 %"34", ptr addrspace(5) %"8", align 4
%"38" = load i64, ptr addrspace(5) %"5", align 8
%"39" = load i32, ptr addrspace(5) %"6", align 4
%"66" = inttoptr i64 %"38" to ptr
store i32 %"39", ptr %"66", align 4
%"40" = load i64, ptr addrspace(5) %"5", align 8
%"41" = load i32, ptr addrspace(5) %"7", align 4
%"67" = inttoptr i64 %"40" to ptr
%"77" = getelementptr inbounds i8, ptr %"67", i64 4
store i32 %"41", ptr %"77", align 4
%"42" = load i64, ptr addrspace(5) %"5", align 8
%"43" = load i32, ptr addrspace(5) %"8", align 4
%"68" = inttoptr i64 %"42" to ptr
%"79" = getelementptr inbounds i8, ptr %"68", i64 8
store i32 %"43", ptr %"79", align 4
%"29" = xor i1 %4, %6
store i32 %"28", ptr addrspace(5) %"7", align 4
store i1 %"29", ptr addrspace(5) %"13", align 1
%"34" = load i1, ptr addrspace(5) %"13", align 1
%"35" = load i32, ptr addrspace(5) %"7", align 4
%"36" = load i32, ptr addrspace(5) %"12", align 4
%7 = zext i1 %"34" to i32
%8 = add i32 %"35", %"36"
%"33" = add i32 %8, %7
store i32 %"33", ptr addrspace(5) %"8", align 4
%"37" = load i64, ptr addrspace(5) %"5", align 8
%"38" = load i32, ptr addrspace(5) %"6", align 4
%"65" = inttoptr i64 %"37" to ptr
store i32 %"38", ptr %"65", align 4
%"39" = load i64, ptr addrspace(5) %"5", align 8
%"40" = load i32, ptr addrspace(5) %"7", align 4
%"66" = inttoptr i64 %"39" to ptr
%"76" = getelementptr inbounds i8, ptr %"66", i64 4
store i32 %"40", ptr %"76", align 4
%"41" = load i64, ptr addrspace(5) %"5", align 8
%"42" = load i32, ptr addrspace(5) %"8", align 4
%"67" = inttoptr i64 %"41" to ptr
%"78" = getelementptr inbounds i8, ptr %"67", i64 8
store i32 %"42", ptr %"78", align 4
ret void
}

View file

@ -1,63 +1,61 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
"51":
define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
"50":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"11" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
%"42" = extractvalue { i32, i1 } %0, 0
%"13" = extractvalue { i32, i1 } %0, 1
store i32 %"42", ptr addrspace(5) %"6", align 4
store i1 %"13", ptr addrspace(5) %"9", align 1
%"16" = load i1, ptr addrspace(5) %"9", align 1
%1 = zext i1 %"16" to i32
%"41" = extractvalue { i32, i1 } %0, 0
%"12" = extractvalue { i32, i1 } %0, 1
store i32 %"41", ptr addrspace(5) %"6", align 4
store i1 %"12", ptr addrspace(5) %"9", align 1
%"15" = load i1, ptr addrspace(5) %"9", align 1
%1 = zext i1 %"15" to i32
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4)
%3 = extractvalue { i32, i1 } %2, 0
%4 = extractvalue { i32, i1 } %2, 1
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
%"43" = extractvalue { i32, i1 } %5, 0
%"42" = extractvalue { i32, i1 } %5, 0
%6 = extractvalue { i32, i1 } %5, 1
%"15" = xor i1 %4, %6
store i32 %"43", ptr addrspace(5) %"6", align 4
store i1 %"15", ptr addrspace(5) %"9", align 1
%"18" = load i1, ptr addrspace(5) %"9", align 1
%7 = zext i1 %"18" to i32
%"44" = add i32 0, %7
store i32 %"44", ptr addrspace(5) %"7", align 4
%"21" = load i1, ptr addrspace(5) %"9", align 1
%8 = zext i1 %"21" to i32
%"14" = xor i1 %4, %6
store i32 %"42", ptr addrspace(5) %"6", align 4
store i1 %"14", ptr addrspace(5) %"9", align 1
%"17" = load i1, ptr addrspace(5) %"9", align 1
%7 = zext i1 %"17" to i32
%"43" = add i32 0, %7
store i32 %"43", ptr addrspace(5) %"7", align 4
%"20" = load i1, ptr addrspace(5) %"9", align 1
%8 = zext i1 %"20" to i32
%9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1)
%10 = extractvalue { i32, i1 } %9, 0
%11 = extractvalue { i32, i1 } %9, 1
%12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8)
%"45" = extractvalue { i32, i1 } %12, 0
%"44" = extractvalue { i32, i1 } %12, 0
%13 = extractvalue { i32, i1 } %12, 1
%"20" = xor i1 %11, %13
store i32 %"45", ptr addrspace(5) %"6", align 4
store i1 %"20", ptr addrspace(5) %"9", align 1
%"23" = load i1, ptr addrspace(5) %"9", align 1
%14 = zext i1 %"23" to i32
%"46" = add i32 0, %14
store i32 %"46", ptr addrspace(5) %"8", align 4
%"24" = load i64, ptr addrspace(5) %"5", align 8
%"25" = load i32, ptr addrspace(5) %"7", align 4
%"47" = inttoptr i64 %"24" to ptr
store i32 %"25", ptr %"47", align 4
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load i32, ptr addrspace(5) %"8", align 4
%"49" = inttoptr i64 %"26" to ptr
%"53" = getelementptr inbounds i8, ptr %"49", i64 4
store i32 %"27", ptr %"53", align 4
%"19" = xor i1 %11, %13
store i32 %"44", ptr addrspace(5) %"6", align 4
store i1 %"19", ptr addrspace(5) %"9", align 1
%"22" = load i1, ptr addrspace(5) %"9", align 1
%14 = zext i1 %"22" to i32
%"45" = add i32 0, %14
store i32 %"45", ptr addrspace(5) %"8", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"7", align 4
%"46" = inttoptr i64 %"23" to ptr
store i32 %"24", ptr %"46", align 4
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"8", align 4
%"48" = inttoptr i64 %"25" to ptr
%"52" = getelementptr inbounds i8, ptr %"48", i64 4
store i32 %"26", ptr %"52", align 4
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 {
"59":
define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
"58":
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
%"23" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"23", align 1
%"7" = alloca i1, align 1, addrspace(5)
%"8" = alloca double, align 8, addrspace(5)
%"9" = alloca double, align 8, addrspace(5)
@ -14,47 +12,47 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr
%"11" = alloca i64, align 8, addrspace(5)
%"12" = alloca i64, align 8, addrspace(5)
%"13" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5)
%"49" = alloca [4 x i32], align 16, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
%"48" = alloca [4 x i32], align 16, addrspace(5)
%"50" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"50", ptr addrspace(5) %"10", align 8
%"51" = load i64, ptr addrspace(4) %"43", align 8
store i64 %"51", ptr addrspace(5) %"10", align 8
store i64 %"51", ptr addrspace(5) %"11", align 8
%"52" = load i64, ptr addrspace(4) %"44", align 8
store i64 %"52", ptr addrspace(5) %"11", align 8
store i64 %"52", ptr addrspace(5) %"12", align 8
%"53" = load i64, ptr addrspace(4) %"45", align 8
store i64 %"53", ptr addrspace(5) %"12", align 8
%"54" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"54", ptr addrspace(5) %"13", align 8
%"29" = load i64, ptr addrspace(5) %"12", align 8
%"30" = load i64, ptr addrspace(5) %"13", align 8
%"28" = icmp sge i64 %"29", %"30"
store i1 %"28", ptr addrspace(5) %"7", align 1
%"31" = load i1, ptr addrspace(5) %"7", align 1
br i1 %"31", label %"6", label %"18"
store i64 %"53", ptr addrspace(5) %"13", align 8
%"28" = load i64, ptr addrspace(5) %"12", align 8
%"29" = load i64, ptr addrspace(5) %"13", align 8
%"27" = icmp sge i64 %"28", %"29"
store i1 %"27", ptr addrspace(5) %"7", align 1
%"30" = load i1, ptr addrspace(5) %"7", align 1
br i1 %"30", label %"6", label %"18"
"18": ; preds = %"59"
"18": ; preds = %"58"
%"31" = load i64, ptr addrspace(5) %"11", align 8
%"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0
store i64 %"31", ptr addrspace(5) %"60", align 8
%"32" = load i64, ptr addrspace(5) %"11", align 8
%"61" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0
store i64 %"32", ptr addrspace(5) %"61", align 8
%"33" = load i64, ptr addrspace(5) %"11", align 8
%0 = inttoptr i64 %"33" to ptr
%0 = inttoptr i64 %"32" to ptr
%"21" = call [4 x i32] %0()
store [4 x i32] %"21", ptr addrspace(5) %"49", align 4
%"63" = getelementptr inbounds i8, ptr addrspace(5) %"49", i64 0
%"19" = load <2 x double>, ptr addrspace(5) %"63", align 16
%"34" = extractelement <2 x double> %"19", i32 0
%"35" = extractelement <2 x double> %"19", i32 1
store double %"34", ptr addrspace(5) %"8", align 8
store double %"35", ptr addrspace(5) %"9", align 8
%"36" = load double, ptr addrspace(5) %"8", align 8
%"37" = load double, ptr addrspace(5) %"9", align 8
%1 = insertelement <2 x double> undef, double %"36", i32 0
%"20" = insertelement <2 x double> %1, double %"37", i32 1
%"38" = load i64, ptr addrspace(5) %"10", align 8
%"58" = inttoptr i64 %"38" to ptr addrspace(1)
store <2 x double> %"20", ptr addrspace(1) %"58", align 16
store [4 x i32] %"21", ptr addrspace(5) %"48", align 4
%"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0
%"19" = load <2 x double>, ptr addrspace(5) %"62", align 16
%"33" = extractelement <2 x double> %"19", i32 0
%"34" = extractelement <2 x double> %"19", i32 1
store double %"33", ptr addrspace(5) %"8", align 8
store double %"34", ptr addrspace(5) %"9", align 8
%"35" = load double, ptr addrspace(5) %"8", align 8
%"36" = load double, ptr addrspace(5) %"9", align 8
%1 = insertelement <2 x double> undef, double %"35", i32 0
%"20" = insertelement <2 x double> %1, double %"36", i32 1
%"37" = load i64, ptr addrspace(5) %"10", align 8
%"57" = inttoptr i64 %"37" to ptr addrspace(1)
store <2 x double> %"20", ptr addrspace(1) %"57", align 16
br label %"6"
"6": ; preds = %"18", %"59"
"6": ; preds = %"18", %"58"
ret void
}

View file

@ -7,12 +7,10 @@ target triple = "amdgcn-amd-amdhsa"
declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0
define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"58", ptr addrspace(4) byref(i64) %"59") #1 {
"74":
define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 {
"73":
%"33" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"33", align 1
%"34" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"34", align 1
%"14" = alloca i64, align 8, addrspace(5)
%"15" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
@ -20,63 +18,63 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
%"18" = alloca i1, align 1, addrspace(5)
%"19" = alloca i64, align 8, addrspace(5)
%"20" = alloca i32, align 4, addrspace(5)
%"59" = alloca i64, align 8, addrspace(5)
%"60" = alloca i64, align 8, addrspace(5)
%"61" = alloca i64, align 8, addrspace(5)
%"62" = alloca i32, align 4, addrspace(5)
%"61" = alloca i32, align 4, addrspace(5)
%"62" = alloca i64, align 8, addrspace(5)
%"63" = alloca i64, align 8, addrspace(5)
%"64" = alloca i64, align 8, addrspace(5)
%"34" = load i64, ptr addrspace(4) %"57", align 8
store i64 %"34", ptr addrspace(5) %"14", align 8
%"35" = load i64, ptr addrspace(4) %"58", align 8
store i64 %"35", ptr addrspace(5) %"14", align 8
%"36" = load i64, ptr addrspace(4) %"59", align 8
store i64 %"36", ptr addrspace(5) %"15", align 8
%"38" = load i64, ptr addrspace(5) %"14", align 8
%"66" = inttoptr i64 %"38" to ptr
%"37" = load i64, ptr %"66", align 8
store i64 %"37", ptr addrspace(5) %"16", align 8
%"40" = load i64, ptr addrspace(5) %"16", align 8
%"39" = icmp uge i64 %"40", 1
store i1 %"39", ptr addrspace(5) %"18", align 1
%"41" = load i1, ptr addrspace(5) %"18", align 1
br i1 %"41", label %"13", label %"27"
store i64 %"35", ptr addrspace(5) %"15", align 8
%"37" = load i64, ptr addrspace(5) %"14", align 8
%"65" = inttoptr i64 %"37" to ptr
%"36" = load i64, ptr %"65", align 8
store i64 %"36", ptr addrspace(5) %"16", align 8
%"39" = load i64, ptr addrspace(5) %"16", align 8
%"38" = icmp uge i64 %"39", 1
store i1 %"38", ptr addrspace(5) %"18", align 1
%"40" = load i1, ptr addrspace(5) %"18", align 1
br i1 %"40", label %"13", label %"27"
"27": ; preds = %"74"
"27": ; preds = %"73"
%0 = alloca i64, align 8, addrspace(5)
store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8
%"67" = load i64, ptr addrspace(5) %0, align 8
store i64 %"67", ptr addrspace(5) %"19", align 8
%"43" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"43", ptr addrspace(5) %"60", align 8
%"66" = load i64, ptr addrspace(5) %0, align 8
store i64 %"66", ptr addrspace(5) %"19", align 8
%"42" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"42", ptr addrspace(5) %"59", align 8
%1 = alloca i64, align 8, addrspace(5)
store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8
%"69" = load i64, ptr addrspace(5) %1, align 8
store i64 %"69", ptr addrspace(5) %"19", align 8
%"45" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"45", ptr addrspace(5) %"61", align 8
store i32 1, ptr addrspace(5) %"62", align 4
%"68" = load i64, ptr addrspace(5) %1, align 8
store i64 %"68", ptr addrspace(5) %"19", align 8
%"44" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"44", ptr addrspace(5) %"60", align 8
store i32 1, ptr addrspace(5) %"61", align 4
%2 = alloca i64, align 8, addrspace(5)
store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8
%"71" = load i64, ptr addrspace(5) %2, align 8
store i64 %"71", ptr addrspace(5) %"19", align 8
%"47" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"47", ptr addrspace(5) %"63", align 8
%"76" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0
store i64 1, ptr addrspace(5) %"76", align 8
%"28" = load i64, ptr addrspace(5) %"60", align 8
%"29" = load i64, ptr addrspace(5) %"61", align 8
%"30" = load i32, ptr addrspace(5) %"62", align 4
%"31" = load i64, ptr addrspace(5) %"63", align 8
%"32" = load i64, ptr addrspace(5) %"64", align 8
%"70" = load i64, ptr addrspace(5) %2, align 8
store i64 %"70", ptr addrspace(5) %"19", align 8
%"46" = load i64, ptr addrspace(5) %"19", align 8
store i64 %"46", ptr addrspace(5) %"62", align 8
%"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0
store i64 1, ptr addrspace(5) %"75", align 8
%"28" = load i64, ptr addrspace(5) %"59", align 8
%"29" = load i64, ptr addrspace(5) %"60", align 8
%"30" = load i32, ptr addrspace(5) %"61", align 4
%"31" = load i64, ptr addrspace(5) %"62", align 8
%"32" = load i64, ptr addrspace(5) %"63", align 8
call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32")
br label %"13"
"13": ; preds = %"27", %"74"
%"49" = load i64, ptr addrspace(5) %"16", align 8
%"48" = add i64 %"49", 1
store i64 %"48", ptr addrspace(5) %"17", align 8
%"50" = load i64, ptr addrspace(5) %"15", align 8
%"51" = load i64, ptr addrspace(5) %"17", align 8
%"73" = inttoptr i64 %"50" to ptr
store i64 %"51", ptr %"73", align 8
"13": ; preds = %"27", %"73"
%"48" = load i64, ptr addrspace(5) %"16", align 8
%"47" = add i64 %"48", 1
store i64 %"47", ptr addrspace(5) %"17", align 8
%"49" = load i64, ptr addrspace(5) %"15", align 8
%"50" = load i64, ptr addrspace(5) %"17", align 8
%"72" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"72", align 8
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"31":
define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"30":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"25", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"33" = getelementptr inbounds i8, ptr %"26", i64 4
%"14" = load i32, ptr %"33", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"27" = and i32 %"17", %"18"
store i32 %"27", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"30" = inttoptr i64 %"19" to ptr
store i32 %"20", ptr %"30", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"24", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"32" = getelementptr inbounds i8, ptr %"25", i64 4
%"13" = load i32, ptr %"32", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
%"26" = and i32 %"16", %"17"
store i32 %"26", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"29" = inttoptr i64 %"18" to ptr
store i32 %"19", ptr %"29", align 4
ret void
}

View file

@ -3,62 +3,60 @@ target triple = "amdgcn-amd-amdhsa"
declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0
define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"63", ptr addrspace(4) byref(i64) %"64") #1 {
"82":
define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 {
"81":
%"35" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"35", align 1
%"36" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"36", align 1
%"15" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
%"17" = alloca i64, align 8, addrspace(5)
%"18" = alloca i64, align 8, addrspace(5)
%"19" = alloca i32, align 4, addrspace(5)
%"65" = alloca i64, align 8, addrspace(5)
%"67" = alloca i64, align 8, addrspace(5)
%"69" = alloca i32, align 4, addrspace(5)
%"71" = alloca i64, align 8, addrspace(5)
%"73" = alloca i64, align 8, addrspace(5)
%"64" = alloca i64, align 8, addrspace(5)
%"66" = alloca i64, align 8, addrspace(5)
%"68" = alloca i32, align 4, addrspace(5)
%"70" = alloca i64, align 8, addrspace(5)
%"72" = alloca i64, align 8, addrspace(5)
%"36" = load i64, ptr addrspace(4) %"62", align 8
store i64 %"36", ptr addrspace(5) %"15", align 8
%"37" = load i64, ptr addrspace(4) %"63", align 8
store i64 %"37", ptr addrspace(5) %"15", align 8
%"38" = load i64, ptr addrspace(4) %"64", align 8
store i64 %"38", ptr addrspace(5) %"16", align 8
store i64 %"37", ptr addrspace(5) %"16", align 8
%0 = alloca i32, align 4, addrspace(5)
store i32 0, ptr addrspace(5) %0, align 4
%"75" = load i32, ptr addrspace(5) %0, align 4
store i32 %"75", ptr addrspace(5) %"19", align 4
%"74" = load i32, ptr addrspace(5) %0, align 4
store i32 %"74", ptr addrspace(5) %"19", align 4
%"39" = load i64, ptr addrspace(5) %"15", align 8
%"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0
store i64 %"39", ptr addrspace(5) %"83", align 8
%"40" = load i64, ptr addrspace(5) %"15", align 8
%"84" = getelementptr inbounds i8, ptr addrspace(5) %"65", i64 0
store i64 %"40", ptr addrspace(5) %"84", align 8
%"41" = load i64, ptr addrspace(5) %"15", align 8
%"86" = getelementptr inbounds i8, ptr addrspace(5) %"67", i64 0
store i64 %"41", ptr addrspace(5) %"86", align 8
%"42" = load i32, ptr addrspace(5) %"19", align 4
%"88" = getelementptr inbounds i8, ptr addrspace(5) %"69", i64 0
store i32 %"42", ptr addrspace(5) %"88", align 4
%"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0
store i64 %"40", ptr addrspace(5) %"85", align 8
%"41" = load i32, ptr addrspace(5) %"19", align 4
%"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0
store i32 %"41", ptr addrspace(5) %"87", align 4
%"42" = load i64, ptr addrspace(5) %"15", align 8
%"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0
store i64 %"42", ptr addrspace(5) %"89", align 8
%"43" = load i64, ptr addrspace(5) %"15", align 8
%"90" = getelementptr inbounds i8, ptr addrspace(5) %"71", i64 0
store i64 %"43", ptr addrspace(5) %"90", align 8
%"44" = load i64, ptr addrspace(5) %"15", align 8
%"92" = getelementptr inbounds i8, ptr addrspace(5) %"73", i64 0
store i64 %"44", ptr addrspace(5) %"92", align 8
%"30" = load i64, ptr addrspace(5) %"65", align 8
%"31" = load i64, ptr addrspace(5) %"67", align 8
%"32" = load i32, ptr addrspace(5) %"69", align 4
%"33" = load i64, ptr addrspace(5) %"71", align 8
%"34" = load i64, ptr addrspace(5) %"73", align 8
%"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0
store i64 %"43", ptr addrspace(5) %"91", align 8
%"30" = load i64, ptr addrspace(5) %"64", align 8
%"31" = load i64, ptr addrspace(5) %"66", align 8
%"32" = load i32, ptr addrspace(5) %"68", align 4
%"33" = load i64, ptr addrspace(5) %"70", align 8
%"34" = load i64, ptr addrspace(5) %"72", align 8
call void @__zluda_ptx_impl____assertfail(i64 %"30", i64 %"31", i32 %"32", i64 %"33", i64 %"34")
%"46" = load i64, ptr addrspace(5) %"15", align 8
%"80" = inttoptr i64 %"46" to ptr
%"45" = load i64, ptr %"80", align 8
store i64 %"45", ptr addrspace(5) %"17", align 8
%"48" = load i64, ptr addrspace(5) %"17", align 8
%"47" = add i64 %"48", 1
store i64 %"47", ptr addrspace(5) %"18", align 8
%"49" = load i64, ptr addrspace(5) %"16", align 8
%"50" = load i64, ptr addrspace(5) %"18", align 8
%"81" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"81", align 8
%"45" = load i64, ptr addrspace(5) %"15", align 8
%"79" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"79", align 8
store i64 %"44", ptr addrspace(5) %"17", align 8
%"47" = load i64, ptr addrspace(5) %"17", align 8
%"46" = add i64 %"47", 1
store i64 %"46", ptr addrspace(5) %"18", align 8
%"48" = load i64, ptr addrspace(5) %"16", align 8
%"49" = load i64, ptr addrspace(5) %"18", align 8
%"80" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"80", align 8
ret void
}

View file

@ -3,45 +3,43 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
"38":
define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
"37":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"31" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"31", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"32" = inttoptr i64 %"16" to ptr
%"40" = getelementptr inbounds i8, ptr %"32", i64 4
%"15" = load i32, ptr %"40", align 4
store i32 %"15", ptr addrspace(5) %"8", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
store i32 %"17", ptr addrspace(3) @"4", align 4
%"19" = load i32, ptr addrspace(5) %"8", align 4
%"18" = atomicrmw add ptr addrspace(3) @"4", i32 %"19" syncscope("agent-one-as") monotonic, align 4
store i32 %"18", ptr addrspace(5) %"7", align 4
%"20" = load i32, ptr addrspace(3) @"4", align 4
store i32 %"20", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"6", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"36" = inttoptr i64 %"21" to ptr
store i32 %"22", ptr %"36", align 4
%"23" = load i64, ptr addrspace(5) %"6", align 8
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"37" = inttoptr i64 %"23" to ptr
%"42" = getelementptr inbounds i8, ptr %"37", i64 4
store i32 %"24", ptr %"42", align 4
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"30", align 4
store i32 %"12", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"31" = inttoptr i64 %"15" to ptr
%"39" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load i32, ptr %"39", align 4
store i32 %"14", ptr addrspace(5) %"8", align 4
%"16" = load i32, ptr addrspace(5) %"7", align 4
store i32 %"16", ptr addrspace(3) @"4", align 4
%"18" = load i32, ptr addrspace(5) %"8", align 4
%"17" = atomicrmw add ptr addrspace(3) @"4", i32 %"18" syncscope("agent-one-as") monotonic, align 4
store i32 %"17", ptr addrspace(5) %"7", align 4
%"19" = load i32, ptr addrspace(3) @"4", align 4
store i32 %"19", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"35" = inttoptr i64 %"20" to ptr
store i32 %"21", ptr %"35", align 4
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"36" = inttoptr i64 %"22" to ptr
%"41" = getelementptr inbounds i8, ptr %"36", i64 4
store i32 %"23", ptr %"41", align 4
ret void
}

View file

@ -3,46 +3,44 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"38":
define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
"37":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca half, align 2, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"29" = inttoptr i64 %"13" to ptr
%"40" = getelementptr inbounds i8, ptr %"29", i64 2
%"30" = load i16, ptr %"40", align 2
%"12" = bitcast i16 %"30" to half
store half %"12", ptr addrspace(5) %"7", align 2
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load half, ptr addrspace(5) %"7", align 2
%"31" = inttoptr i64 %"15" to ptr
%"14" = atomicrmw fadd ptr %"31", half %"16" syncscope("agent-one-as") monotonic, align 2
store half %"14", ptr addrspace(5) %"7", align 2
%"17" = load i64, ptr addrspace(5) %"6", align 8
%"18" = load half, ptr addrspace(5) %"7", align 2
%"32" = inttoptr i64 %"17" to ptr
%"33" = bitcast half %"18" to i16
store i16 %"33", ptr %"32", align 2
%"20" = load i64, ptr addrspace(5) %"5", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"28" = inttoptr i64 %"12" to ptr
%"39" = getelementptr inbounds i8, ptr %"28", i64 2
%"29" = load i16, ptr %"39", align 2
%"11" = bitcast i16 %"29" to half
store half %"11", ptr addrspace(5) %"7", align 2
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load half, ptr addrspace(5) %"7", align 2
%"30" = inttoptr i64 %"14" to ptr
%"13" = atomicrmw fadd ptr %"30", half %"15" syncscope("agent-one-as") monotonic, align 2
store half %"13", ptr addrspace(5) %"7", align 2
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load half, ptr addrspace(5) %"7", align 2
%"31" = inttoptr i64 %"16" to ptr
%"32" = bitcast half %"17" to i16
store i16 %"32", ptr %"31", align 2
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"34" = inttoptr i64 %"19" to ptr
%"33" = load i16, ptr %"34", align 2
%"18" = bitcast i16 %"33" to half
store half %"18", ptr addrspace(5) %"7", align 2
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load half, ptr addrspace(5) %"7", align 2
%"35" = inttoptr i64 %"20" to ptr
%"34" = load i16, ptr %"35", align 2
%"19" = bitcast i16 %"34" to half
store half %"19", ptr addrspace(5) %"7", align 2
%"21" = load i64, ptr addrspace(5) %"6", align 8
%"22" = load half, ptr addrspace(5) %"7", align 2
%"36" = inttoptr i64 %"21" to ptr
%"42" = getelementptr inbounds i8, ptr %"36", i64 2
%"37" = bitcast half %"22" to i16
store i16 %"37", ptr %"42", align 2
%"41" = getelementptr inbounds i8, ptr %"35", i64 2
%"36" = bitcast half %"21" to i16
store i16 %"36", ptr %"41", align 2
ret void
}

View file

@ -3,45 +3,43 @@ target triple = "amdgcn-amd-amdhsa"
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
"38":
define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
"37":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"31" = inttoptr i64 %"14" to ptr
%"13" = load float, ptr %"31", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"32" = inttoptr i64 %"16" to ptr
%"40" = getelementptr inbounds i8, ptr %"32", i64 4
%"15" = load float, ptr %"40", align 4
store float %"15", ptr addrspace(5) %"8", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
store float %"17", ptr addrspace(3) @"4", align 4
%"19" = load float, ptr addrspace(5) %"8", align 4
%"18" = atomicrmw fadd ptr addrspace(3) @"4", float %"19" syncscope("agent-one-as") monotonic, align 4
store float %"18", ptr addrspace(5) %"7", align 4
%"20" = load float, ptr addrspace(3) @"4", align 4
store float %"20", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"6", align 8
%"22" = load float, ptr addrspace(5) %"7", align 4
%"36" = inttoptr i64 %"21" to ptr
store float %"22", ptr %"36", align 4
%"23" = load i64, ptr addrspace(5) %"6", align 8
%"24" = load float, ptr addrspace(5) %"8", align 4
%"37" = inttoptr i64 %"23" to ptr
%"42" = getelementptr inbounds i8, ptr %"37", i64 4
store float %"24", ptr %"42", align 4
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"30", align 4
store float %"12", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"31" = inttoptr i64 %"15" to ptr
%"39" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load float, ptr %"39", align 4
store float %"14", ptr addrspace(5) %"8", align 4
%"16" = load float, ptr addrspace(5) %"7", align 4
store float %"16", ptr addrspace(3) @"4", align 4
%"18" = load float, ptr addrspace(5) %"8", align 4
%"17" = atomicrmw fadd ptr addrspace(3) @"4", float %"18" syncscope("agent-one-as") monotonic, align 4
store float %"17", ptr addrspace(5) %"7", align 4
%"19" = load float, ptr addrspace(3) @"4", align 4
store float %"19", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load float, ptr addrspace(5) %"7", align 4
%"35" = inttoptr i64 %"20" to ptr
store float %"21", ptr %"35", align 4
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load float, ptr addrspace(5) %"8", align 4
%"36" = inttoptr i64 %"22" to ptr
%"41" = getelementptr inbounds i8, ptr %"36", i64 4
store float %"23", ptr %"41", align 4
ret void
}

View file

@ -1,45 +1,43 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
"39":
define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
"38":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"32", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"33" = inttoptr i64 %"15" to ptr
%"41" = getelementptr inbounds i8, ptr %"33", i64 4
%0 = cmpxchg ptr %"41", i32 %"16", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
%"34" = extractvalue { i32, i1 } %0, 0
store i32 %"34", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"31", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"32" = inttoptr i64 %"14" to ptr
%"40" = getelementptr inbounds i8, ptr %"32", i64 4
%0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
%"33" = extractvalue { i32, i1 } %0, 0
store i32 %"33", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"35" = inttoptr i64 %"17" to ptr
%"42" = getelementptr inbounds i8, ptr %"35", i64 4
%"16" = load i32, ptr %"42", align 4
store i32 %"16", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"36" = inttoptr i64 %"18" to ptr
%"43" = getelementptr inbounds i8, ptr %"36", i64 4
%"17" = load i32, ptr %"43", align 4
store i32 %"17", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"37" = inttoptr i64 %"19" to ptr
store i32 %"20", ptr %"37", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"38" = inttoptr i64 %"21" to ptr
%"45" = getelementptr inbounds i8, ptr %"38", i64 4
store i32 %"22", ptr %"45", align 4
store i32 %"19", ptr %"36", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"37" = inttoptr i64 %"20" to ptr
%"44" = getelementptr inbounds i8, ptr %"37", i64 4
store i32 %"21", ptr %"44", align 4
ret void
}

View file

@ -5,47 +5,45 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0
declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0
define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #1 {
"39":
define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 {
"38":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"14" to ptr
%"13" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"33", i32 101)
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"16" to ptr addrspace(1)
%"15" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1) %"34", i32 101)
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"13" to ptr
%"12" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr %"32", i32 101)
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"15" to ptr addrspace(1)
%"14" = call i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1) %"33", i32 101)
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"17" to ptr
%"16" = load i32, ptr %"34", align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"35" = inttoptr i64 %"18" to ptr
%"17" = load i32, ptr %"35", align 4
store i32 %"17", ptr addrspace(5) %"8", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"36" = inttoptr i64 %"19" to ptr
store i32 %"20", ptr %"36", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"37" = inttoptr i64 %"21" to ptr
%"49" = getelementptr inbounds i8, ptr %"37", i64 4
store i32 %"22", ptr %"49", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"38" = inttoptr i64 %"23" to ptr
%"51" = getelementptr inbounds i8, ptr %"38", i64 8
store i32 %"24", ptr %"51", align 4
store i32 %"19", ptr %"35", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"36" = inttoptr i64 %"20" to ptr
%"48" = getelementptr inbounds i8, ptr %"36", i64 4
store i32 %"21", ptr %"48", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"37" = inttoptr i64 %"22" to ptr
%"50" = getelementptr inbounds i8, ptr %"37", i64 8
store i32 %"23", ptr %"50", align 4
ret void
}

View file

@ -1,27 +1,25 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 {
"19":
define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
"18":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"16" = inttoptr i64 %"11" to ptr
%"10" = load atomic i32, ptr %"16" syncscope("agent-one-as") acquire, align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"13" = load i32, ptr addrspace(5) %"6", align 4
%"17" = inttoptr i64 %"12" to ptr
%"11" = load atomic i32, ptr %"17" syncscope("agent-one-as") acquire, align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"18" = inttoptr i64 %"13" to ptr
store atomic i32 %"14", ptr %"18" syncscope("agent-one-as") release, align 4
store atomic i32 %"13", ptr %"17" syncscope("agent-one-as") release, align 4
ret void
}

View file

@ -1,36 +1,34 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 {
"24":
define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"23":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"21", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"22" = inttoptr i64 %"14" to ptr
%0 = load atomic i128, ptr %"22" syncscope("agent-one-as") acquire, align 16
store i64 %"12", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16
%"8" = bitcast i128 %0 to <2 x i64>
%"15" = extractelement <2 x i64> %"8", i32 0
%"16" = extractelement <2 x i64> %"8", i32 1
store i64 %"15", ptr addrspace(5) %"6", align 8
store i64 %"16", ptr addrspace(5) %"7", align 8
%"17" = load i64, ptr addrspace(5) %"6", align 8
%"18" = load i64, ptr addrspace(5) %"7", align 8
%1 = insertelement <2 x i64> undef, i64 %"17", i32 0
%"9" = insertelement <2 x i64> %1, i64 %"18", i32 1
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"23" = inttoptr i64 %"19" to ptr
%"14" = extractelement <2 x i64> %"8", i32 0
%"15" = extractelement <2 x i64> %"8", i32 1
store i64 %"14", ptr addrspace(5) %"6", align 8
store i64 %"15", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%1 = insertelement <2 x i64> undef, i64 %"16", i32 0
%"9" = insertelement <2 x i64> %1, i64 %"17", i32 1
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"22" = inttoptr i64 %"18" to ptr
%2 = bitcast <2 x i64> %"9" to i128
store atomic i128 %2, ptr %"23" syncscope("agent-one-as") release, align 16
store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16
ret void
}

View file

@ -1,38 +1,36 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"31":
define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"30":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"24", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"25" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"25", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"14" to ptr
store i32 %"15", ptr %"26", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"27" = inttoptr i64 %"17" to ptr
%"33" = getelementptr inbounds i8, ptr %"27", i64 4
%"16" = load i32, ptr %"33", align 4
store i32 %"16", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"7", align 4
%"29" = inttoptr i64 %"19" to ptr
%"28" = atomicrmw umax ptr %"29", i32 %"20" syncscope("agent-one-as") monotonic, align 4
store i32 %"28", ptr addrspace(5) %"6", align 4
store i32 %"14", ptr %"25", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"16" to ptr
%"32" = getelementptr inbounds i8, ptr %"26", i64 4
%"15" = load i32, ptr %"32", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"7", align 4
%"28" = inttoptr i64 %"18" to ptr
%"27" = atomicrmw umax ptr %"28", i32 %"19" syncscope("agent-one-as") monotonic, align 4
store i32 %"27", ptr addrspace(5) %"6", align 4
ret void
}

View file

@ -1,34 +1,32 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"24":
define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca double, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"10" = load double, ptr addrspace(4) %"18", align 8
store double %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load double, ptr addrspace(5) %"4", align 8
%"21" = bitcast double %"13" to i64
%"9" = load double, ptr addrspace(4) %"17", align 8
store double %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load double, ptr addrspace(5) %"4", align 8
%"20" = bitcast double %"12" to i64
%0 = alloca i64, align 8, addrspace(5)
store i64 %"21", ptr addrspace(5) %0, align 8
%"12" = load i64, ptr addrspace(5) %0, align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
store i64 %"20", ptr addrspace(5) %0, align 8
%"11" = load i64, ptr addrspace(5) %0, align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"21" = inttoptr i64 %"14" to ptr
%"13" = load i64, ptr %"21", align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"15" to ptr
%"14" = load i64, ptr %"22", align 8
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"23" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"23", align 8
store i64 %"16", ptr %"22", align 8
ret void
}

View file

@ -4,11 +4,9 @@ target triple = "amdgcn-amd-amdhsa"
declare void @__zluda_ptx_impl__barrier_sync(i32) #0
define protected amdgpu_kernel void @barrier() #1 {
"5":
"4":
%"2" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"2", align 1
%"3" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"3", align 1
call void @__zluda_ptx_impl__barrier_sync(i32 0)
ret void
}

View file

@ -3,44 +3,42 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0
define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 {
"35":
define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
"34":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"31", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"16" to ptr
%"42" = getelementptr inbounds i8, ptr %"32", i64 4
%"15" = load i32, ptr %"42", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"18" to ptr
%"44" = getelementptr inbounds i8, ptr %"33", i64 8
%"17" = load i32, ptr %"44", align 4
store i32 %"17", ptr addrspace(5) %"8", align 4
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"22" = load i32, ptr addrspace(5) %"8", align 4
%"19" = call i32 @__zluda_ptx_impl__bfe_u32(i32 %"20", i32 %"21", i32 %"22")
store i32 %"19", ptr addrspace(5) %"6", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"6", align 4
%"34" = inttoptr i64 %"23" to ptr
store i32 %"24", ptr %"34", align 4
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"30", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
%"41" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load i32, ptr %"41", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"17" to ptr
%"43" = getelementptr inbounds i8, ptr %"32", i64 8
%"16" = load i32, ptr %"43", align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
%"21" = load i32, ptr addrspace(5) %"8", align 4
%"18" = call i32 @__zluda_ptx_impl__bfe_u32(i32 %"19", i32 %"20", i32 %"21")
store i32 %"18", ptr addrspace(5) %"6", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"6", align 4
%"33" = inttoptr i64 %"22" to ptr
store i32 %"23", ptr %"33", align 4
ret void
}

View file

@ -3,51 +3,49 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0
define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
"45":
define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
"44":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"11" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"15" to ptr
%"14" = load i32, ptr %"37", align 4
store i32 %"14", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"38" = inttoptr i64 %"17" to ptr
%"53" = getelementptr inbounds i8, ptr %"38", i64 4
%"16" = load i32, ptr %"53", align 4
store i32 %"16", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"19" to ptr
%"55" = getelementptr inbounds i8, ptr %"39", i64 8
%"18" = load i32, ptr %"55", align 4
store i32 %"18", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"40" = inttoptr i64 %"21" to ptr
%"57" = getelementptr inbounds i8, ptr %"40", i64 12
%"20" = load i32, ptr %"57", align 4
store i32 %"20", ptr addrspace(5) %"9", align 4
%"23" = load i32, ptr addrspace(5) %"6", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
%"25" = load i32, ptr addrspace(5) %"8", align 4
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"41" = call i32 @__zluda_ptx_impl__bfi_b32(i32 %"23", i32 %"24", i32 %"25", i32 %"26")
store i32 %"41", ptr addrspace(5) %"6", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load i32, ptr addrspace(5) %"6", align 4
%"44" = inttoptr i64 %"27" to ptr
store i32 %"28", ptr %"44", align 4
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"36" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"36", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"16" to ptr
%"52" = getelementptr inbounds i8, ptr %"37", i64 4
%"15" = load i32, ptr %"52", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"38" = inttoptr i64 %"18" to ptr
%"54" = getelementptr inbounds i8, ptr %"38", i64 8
%"17" = load i32, ptr %"54", align 4
store i32 %"17", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"20" to ptr
%"56" = getelementptr inbounds i8, ptr %"39", i64 12
%"19" = load i32, ptr %"56", align 4
store i32 %"19", ptr addrspace(5) %"9", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%"23" = load i32, ptr addrspace(5) %"7", align 4
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"25" = load i32, ptr addrspace(5) %"9", align 4
%"40" = call i32 @__zluda_ptx_impl__bfi_b32(i32 %"22", i32 %"23", i32 %"24", i32 %"25")
store i32 %"40", ptr addrspace(5) %"6", align 4
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load i32, ptr addrspace(5) %"6", align 4
%"43" = inttoptr i64 %"26" to ptr
store i32 %"27", ptr %"43", align 4
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
"53":
define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
"52":
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"13" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"13", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@ -15,56 +13,56 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"42", pt
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"13" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"13", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"43", align 8
store i64 %"15", ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"17" to ptr
%"16" = load i32, ptr %"44", align 4
store i32 %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"19" to ptr
%"55" = getelementptr inbounds i8, ptr %"45", i64 4
%"18" = load i32, ptr %"55", align 4
store i32 %"18", ptr addrspace(5) %"7", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"46" = inttoptr i64 %"21" to ptr
%"57" = getelementptr inbounds i8, ptr %"46", i64 8
%"20" = load i32, ptr %"57", align 4
store i32 %"20", ptr addrspace(5) %"8", align 4
%"23" = load i32, ptr addrspace(5) %"6", align 4
%0 = icmp eq i32 %"23", 0
%1 = call i32 @llvm.ctlz.i32(i32 %"23", i1 true)
store i64 %"14", ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"43" = inttoptr i64 %"16" to ptr
%"15" = load i32, ptr %"43", align 4
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"18" to ptr
%"54" = getelementptr inbounds i8, ptr %"44", i64 4
%"17" = load i32, ptr %"54", align 4
store i32 %"17", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"20" to ptr
%"56" = getelementptr inbounds i8, ptr %"45", i64 8
%"19" = load i32, ptr %"56", align 4
store i32 %"19", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%0 = icmp eq i32 %"22", 0
%1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
%2 = sub i32 31, %1
%"47" = select i1 %0, i32 -1, i32 %2
store i32 %"47", ptr addrspace(5) %"9", align 4
%"25" = load i32, ptr addrspace(5) %"7", align 4
%3 = icmp eq i32 %"25", 0
%4 = call i32 @llvm.ctlz.i32(i32 %"25", i1 true)
%"46" = select i1 %0, i32 -1, i32 %2
store i32 %"46", ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
%3 = icmp eq i32 %"24", 0
%4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
%5 = sub i32 31, %4
%"48" = select i1 %3, i32 -1, i32 %5
store i32 %"48", ptr addrspace(5) %"10", align 4
%"27" = load i32, ptr addrspace(5) %"8", align 4
%6 = icmp eq i32 %"27", 0
%7 = call i32 @llvm.ctlz.i32(i32 %"27", i1 true)
%"47" = select i1 %3, i32 -1, i32 %5
store i32 %"47", ptr addrspace(5) %"10", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
%6 = icmp eq i32 %"26", 0
%7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
%8 = sub i32 31, %7
%"49" = select i1 %6, i32 -1, i32 %8
store i32 %"49", ptr addrspace(5) %"11", align 4
%"28" = load i64, ptr addrspace(5) %"5", align 8
%"29" = load i32, ptr addrspace(5) %"9", align 4
%"50" = inttoptr i64 %"28" to ptr
store i32 %"29", ptr %"50", align 4
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"10", align 4
%"51" = inttoptr i64 %"30" to ptr
%"59" = getelementptr inbounds i8, ptr %"51", i64 4
store i32 %"31", ptr %"59", align 4
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load i32, ptr addrspace(5) %"11", align 4
%"52" = inttoptr i64 %"32" to ptr
%"61" = getelementptr inbounds i8, ptr %"52", i64 8
store i32 %"33", ptr %"61", align 4
%"48" = select i1 %6, i32 -1, i32 %8
store i32 %"48", ptr addrspace(5) %"11", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load i32, ptr addrspace(5) %"9", align 4
%"49" = inttoptr i64 %"27" to ptr
store i32 %"28", ptr %"49", align 4
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"10", align 4
%"50" = inttoptr i64 %"29" to ptr
%"58" = getelementptr inbounds i8, ptr %"50", i64 4
store i32 %"30", ptr %"58", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"11", align 4
%"51" = inttoptr i64 %"31" to ptr
%"60" = getelementptr inbounds i8, ptr %"51", i64 8
store i32 %"32", ptr %"60", align 4
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
"53":
define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
"52":
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"13" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"13", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@ -15,53 +13,53 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"13" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"13", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"43", align 8
store i64 %"15", ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"17" to ptr
%"16" = load i32, ptr %"44", align 4
store i32 %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"19" to ptr
%"55" = getelementptr inbounds i8, ptr %"45", i64 4
%"18" = load i32, ptr %"55", align 4
store i32 %"18", ptr addrspace(5) %"7", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"46" = inttoptr i64 %"21" to ptr
%"57" = getelementptr inbounds i8, ptr %"46", i64 8
%"20" = load i32, ptr %"57", align 4
store i32 %"20", ptr addrspace(5) %"8", align 4
%"23" = load i32, ptr addrspace(5) %"6", align 4
%0 = icmp eq i32 %"23", 0
%1 = call i32 @llvm.ctlz.i32(i32 %"23", i1 true)
%"47" = select i1 %0, i32 -1, i32 %1
store i32 %"47", ptr addrspace(5) %"9", align 4
%"25" = load i32, ptr addrspace(5) %"7", align 4
%2 = icmp eq i32 %"25", 0
%3 = call i32 @llvm.ctlz.i32(i32 %"25", i1 true)
%"48" = select i1 %2, i32 -1, i32 %3
store i32 %"48", ptr addrspace(5) %"10", align 4
%"27" = load i32, ptr addrspace(5) %"8", align 4
%4 = icmp eq i32 %"27", 0
%5 = call i32 @llvm.ctlz.i32(i32 %"27", i1 true)
%"49" = select i1 %4, i32 -1, i32 %5
store i32 %"49", ptr addrspace(5) %"11", align 4
%"28" = load i64, ptr addrspace(5) %"5", align 8
%"29" = load i32, ptr addrspace(5) %"9", align 4
%"50" = inttoptr i64 %"28" to ptr
store i32 %"29", ptr %"50", align 4
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"10", align 4
%"51" = inttoptr i64 %"30" to ptr
%"59" = getelementptr inbounds i8, ptr %"51", i64 4
store i32 %"31", ptr %"59", align 4
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load i32, ptr addrspace(5) %"11", align 4
%"52" = inttoptr i64 %"32" to ptr
%"61" = getelementptr inbounds i8, ptr %"52", i64 8
store i32 %"33", ptr %"61", align 4
store i64 %"14", ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"43" = inttoptr i64 %"16" to ptr
%"15" = load i32, ptr %"43", align 4
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"18" to ptr
%"54" = getelementptr inbounds i8, ptr %"44", i64 4
%"17" = load i32, ptr %"54", align 4
store i32 %"17", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"20" to ptr
%"56" = getelementptr inbounds i8, ptr %"45", i64 8
%"19" = load i32, ptr %"56", align 4
store i32 %"19", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%0 = icmp eq i32 %"22", 0
%1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
%"46" = select i1 %0, i32 -1, i32 %1
store i32 %"46", ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
%2 = icmp eq i32 %"24", 0
%3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
%"47" = select i1 %2, i32 -1, i32 %3
store i32 %"47", ptr addrspace(5) %"10", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
%4 = icmp eq i32 %"26", 0
%5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
%"48" = select i1 %4, i32 -1, i32 %5
store i32 %"48", ptr addrspace(5) %"11", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load i32, ptr addrspace(5) %"9", align 4
%"49" = inttoptr i64 %"27" to ptr
store i32 %"28", ptr %"49", align 4
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"10", align 4
%"50" = inttoptr i64 %"29" to ptr
%"58" = getelementptr inbounds i8, ptr %"50", i64 4
store i32 %"30", ptr %"58", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"11", align 4
%"51" = inttoptr i64 %"31" to ptr
%"60" = getelementptr inbounds i8, ptr %"51", i64 8
store i32 %"32", ptr %"60", align 4
ret void
}

View file

@ -1,35 +1,33 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"27":
define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"26":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"13" = load i64, ptr %"25", align 8
store i64 %"13", ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"15" = add i64 %"16", 1
store i64 %"15", ptr addrspace(5) %"7", align 8
%"18" = load i64, ptr addrspace(5) %"8", align 8
%"17" = add i64 %"18", 1
store i64 %"17", ptr addrspace(5) %"8", align 8
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i64, ptr addrspace(5) %"7", align 8
%"26" = inttoptr i64 %"19" to ptr
store i64 %"20", ptr %"26", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"24", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = add i64 %"15", 1
store i64 %"14", ptr addrspace(5) %"7", align 8
%"17" = load i64, ptr addrspace(5) %"8", align 8
%"16" = add i64 %"17", 1
store i64 %"16", ptr addrspace(5) %"8", align 8
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"7", align 8
%"25" = inttoptr i64 %"18" to ptr
store i64 %"19", ptr %"25", align 8
ret void
}

View file

@ -1,43 +1,41 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
"29":
define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
"28":
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"12" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"12", ptr addrspace(5) %"7", align 8
%"13" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"14" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"14", ptr addrspace(5) %"8", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"27" = inttoptr i64 %"16" to ptr
%"15" = load i64, ptr %"27", align 8
store i64 %"15", ptr addrspace(5) %"9", align 8
store i64 %"13", ptr addrspace(5) %"8", align 8
%"15" = load i64, ptr addrspace(5) %"7", align 8
%"26" = inttoptr i64 %"15" to ptr
%"14" = load i64, ptr %"26", align 8
store i64 %"14", ptr addrspace(5) %"9", align 8
br label %"4"
"4": ; preds = %"29"
%"18" = load i64, ptr addrspace(5) %"9", align 8
%"17" = add i64 %"18", 1
store i64 %"17", ptr addrspace(5) %"10", align 8
"4": ; preds = %"28"
%"17" = load i64, ptr addrspace(5) %"9", align 8
%"16" = add i64 %"17", 1
store i64 %"16", ptr addrspace(5) %"10", align 8
br label %"6"
0: ; No predecessors!
%"20" = load i64, ptr addrspace(5) %"9", align 8
%"19" = add i64 %"20", 2
store i64 %"19", ptr addrspace(5) %"10", align 8
%"19" = load i64, ptr addrspace(5) %"9", align 8
%"18" = add i64 %"19", 2
store i64 %"18", ptr addrspace(5) %"10", align 8
br label %"6"
"6": ; preds = %0, %"4"
%"21" = load i64, ptr addrspace(5) %"8", align 8
%"22" = load i64, ptr addrspace(5) %"10", align 8
%"28" = inttoptr i64 %"21" to ptr
store i64 %"22", ptr %"28", align 8
%"20" = load i64, ptr addrspace(5) %"8", align 8
%"21" = load i64, ptr addrspace(5) %"10", align 8
%"27" = inttoptr i64 %"20" to ptr
store i64 %"21", ptr %"27", align 8
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"19", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"13" = call i32 @llvm.bitreverse.i32(i32 %"14")
store i32 %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"20", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load i32, ptr %"18", align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"13" = load i32, ptr addrspace(5) %"6", align 4
%"12" = call i32 @llvm.bitreverse.i32(i32 %"13")
store i32 %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store i32 %"15", ptr %"19", align 4
ret void
}

View file

@ -1,63 +1,59 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define private i64 @incr(i64 %"31") #0 {
"51":
define private i64 @incr(i64 %"29") #0 {
"49":
%"18" = alloca i64, align 8, addrspace(5)
%"17" = alloca i64, align 8, addrspace(5)
%"21" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"21", align 1
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
%"14" = alloca i64, align 8, addrspace(5)
store i64 %"31", ptr addrspace(5) %"18", align 8
%"32" = load i64, ptr addrspace(5) %"18", align 8
store i64 %"32", ptr addrspace(5) %"45", align 8
%"33" = load i64, ptr addrspace(5) %"45", align 8
store i64 %"33", ptr addrspace(5) %"14", align 8
%"35" = load i64, ptr addrspace(5) %"14", align 8
%"34" = add i64 %"35", 1
store i64 %"34", ptr addrspace(5) %"14", align 8
%"36" = load i64, ptr addrspace(5) %"14", align 8
store i64 %"36", ptr addrspace(5) %"44", align 8
%"37" = load i64, ptr addrspace(5) %"44", align 8
store i64 %"37", ptr addrspace(5) %"17", align 8
%"38" = load i64, ptr addrspace(5) %"17", align 8
ret i64 %"38"
}
define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
"50":
%"19" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"19", align 1
%"20" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"20", align 1
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"14" = alloca i64, align 8, addrspace(5)
store i64 %"29", ptr addrspace(5) %"18", align 8
%"30" = load i64, ptr addrspace(5) %"18", align 8
store i64 %"30", ptr addrspace(5) %"43", align 8
%"31" = load i64, ptr addrspace(5) %"43", align 8
store i64 %"31", ptr addrspace(5) %"14", align 8
%"33" = load i64, ptr addrspace(5) %"14", align 8
%"32" = add i64 %"33", 1
store i64 %"32", ptr addrspace(5) %"14", align 8
%"34" = load i64, ptr addrspace(5) %"14", align 8
store i64 %"34", ptr addrspace(5) %"42", align 8
%"35" = load i64, ptr addrspace(5) %"42", align 8
store i64 %"35", ptr addrspace(5) %"17", align 8
%"36" = load i64, ptr addrspace(5) %"17", align 8
ret i64 %"36"
}
define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
"48":
%"19" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"19", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"23" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"23", ptr addrspace(5) %"7", align 8
%"24" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"24", ptr addrspace(5) %"8", align 8
%"26" = load i64, ptr addrspace(5) %"7", align 8
%"46" = inttoptr i64 %"26" to ptr addrspace(1)
%"25" = load i64, ptr addrspace(1) %"46", align 8
store i64 %"25", ptr addrspace(5) %"9", align 8
%"27" = load i64, ptr addrspace(5) %"9", align 8
store i64 %"27", ptr addrspace(5) %"42", align 8
%"15" = load i64, ptr addrspace(5) %"42", align 8
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
%"21" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"21", ptr addrspace(5) %"7", align 8
%"22" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"22", ptr addrspace(5) %"8", align 8
%"24" = load i64, ptr addrspace(5) %"7", align 8
%"44" = inttoptr i64 %"24" to ptr addrspace(1)
%"23" = load i64, ptr addrspace(1) %"44", align 8
store i64 %"23", ptr addrspace(5) %"9", align 8
%"25" = load i64, ptr addrspace(5) %"9", align 8
store i64 %"25", ptr addrspace(5) %"40", align 8
%"15" = load i64, ptr addrspace(5) %"40", align 8
%"16" = call i64 @incr(i64 %"15")
store i64 %"16", ptr addrspace(5) %"43", align 8
%"28" = load i64, ptr addrspace(5) %"43", align 8
store i64 %"28", ptr addrspace(5) %"9", align 8
%"29" = load i64, ptr addrspace(5) %"8", align 8
%"30" = load i64, ptr addrspace(5) %"9", align 8
%"49" = inttoptr i64 %"29" to ptr addrspace(1)
store i64 %"30", ptr addrspace(1) %"49", align 8
store i64 %"16", ptr addrspace(5) %"41", align 8
%"26" = load i64, ptr addrspace(5) %"41", align 8
store i64 %"26", ptr addrspace(5) %"9", align 8
%"27" = load i64, ptr addrspace(5) %"8", align 8
%"28" = load i64, ptr addrspace(5) %"9", align 8
%"47" = inttoptr i64 %"27" to ptr addrspace(1)
store i64 %"28", ptr addrspace(1) %"47", align 8
ret void
}

View file

@ -1,68 +1,64 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define private [2 x i32] @incr(i64 %"23") #0 {
"58":
define private [2 x i32] @incr(i64 %"21") #0 {
"56":
%"16" = alloca i64, align 8, addrspace(5)
%"15" = alloca [2 x i32], align 4, addrspace(5)
%"19" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"19", align 1
%"20" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"20", align 1
%"44" = alloca [2 x i32], align 4, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
%"42" = alloca [2 x i32], align 4, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"4" = alloca i64, align 8, addrspace(5)
store i64 %"23", ptr addrspace(5) %"16", align 8
%"24" = load i64, ptr addrspace(5) %"16", align 8
store i64 %"24", ptr addrspace(5) %"45", align 8
%"25" = load i64, ptr addrspace(5) %"45", align 8
store i64 %"25", ptr addrspace(5) %"4", align 8
%"27" = load i64, ptr addrspace(5) %"4", align 8
%"26" = add i64 %"27", 1
store i64 %"26", ptr addrspace(5) %"4", align 8
%"28" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"28", ptr addrspace(5) %"44", align 8
%"29" = load [2 x i32], ptr addrspace(5) %"44", align 4
store [2 x i32] %"29", ptr addrspace(5) %"15", align 4
%"30" = load [2 x i32], ptr addrspace(5) %"15", align 4
ret [2 x i32] %"30"
store i64 %"21", ptr addrspace(5) %"16", align 8
%"22" = load i64, ptr addrspace(5) %"16", align 8
store i64 %"22", ptr addrspace(5) %"43", align 8
%"23" = load i64, ptr addrspace(5) %"43", align 8
store i64 %"23", ptr addrspace(5) %"4", align 8
%"25" = load i64, ptr addrspace(5) %"4", align 8
%"24" = add i64 %"25", 1
store i64 %"24", ptr addrspace(5) %"4", align 8
%"26" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"26", ptr addrspace(5) %"42", align 8
%"27" = load [2 x i32], ptr addrspace(5) %"42", align 4
store [2 x i32] %"27", ptr addrspace(5) %"15", align 4
%"28" = load [2 x i32], ptr addrspace(5) %"15", align 4
ret [2 x i32] %"28"
}
define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
"59":
%"21" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"21", align 1
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
"57":
%"20" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"20", align 1
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca [2 x i32], align 4, addrspace(5)
%"31" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"31", ptr addrspace(5) %"8", align 8
%"32" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"32", ptr addrspace(5) %"9", align 8
%"34" = load i64, ptr addrspace(5) %"8", align 8
%"52" = inttoptr i64 %"34" to ptr addrspace(1)
%"33" = load i64, ptr addrspace(1) %"52", align 8
store i64 %"33", ptr addrspace(5) %"10", align 8
%"35" = load i64, ptr addrspace(5) %"10", align 8
store i64 %"35", ptr addrspace(5) %"48", align 8
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca [2 x i32], align 4, addrspace(5)
%"29" = load i64, ptr addrspace(4) %"44", align 8
store i64 %"29", ptr addrspace(5) %"8", align 8
%"30" = load i64, ptr addrspace(4) %"45", align 8
store i64 %"30", ptr addrspace(5) %"9", align 8
%"32" = load i64, ptr addrspace(5) %"8", align 8
%"50" = inttoptr i64 %"32" to ptr addrspace(1)
%"31" = load i64, ptr addrspace(1) %"50", align 8
store i64 %"31", ptr addrspace(5) %"10", align 8
%"33" = load i64, ptr addrspace(5) %"10", align 8
store i64 %"33", ptr addrspace(5) %"46", align 8
store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8
%"17" = load i64, ptr addrspace(5) %"48", align 8
%"37" = load i64, ptr addrspace(5) %"11", align 8
%0 = inttoptr i64 %"37" to ptr
%"17" = load i64, ptr addrspace(5) %"46", align 8
%"35" = load i64, ptr addrspace(5) %"11", align 8
%0 = inttoptr i64 %"35" to ptr
%"18" = call [2 x i32] %0(i64 %"17")
store [2 x i32] %"18", ptr addrspace(5) %"49", align 4
%"61" = getelementptr inbounds i8, ptr addrspace(5) %"49", i64 0
%"38" = load i64, ptr addrspace(5) %"61", align 8
store i64 %"38", ptr addrspace(5) %"10", align 8
%"39" = load i64, ptr addrspace(5) %"9", align 8
%"40" = load i64, ptr addrspace(5) %"10", align 8
%"57" = inttoptr i64 %"39" to ptr addrspace(1)
store i64 %"40", ptr addrspace(1) %"57", align 8
store [2 x i32] %"18", ptr addrspace(5) %"47", align 4
%"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0
%"36" = load i64, ptr addrspace(5) %"59", align 8
store i64 %"36", ptr addrspace(5) %"10", align 8
%"37" = load i64, ptr addrspace(5) %"9", align 8
%"38" = load i64, ptr addrspace(5) %"10", align 8
%"55" = inttoptr i64 %"37" to ptr addrspace(1)
store i64 %"38", ptr addrspace(1) %"55", align 8
ret void
}

View file

@ -3,43 +3,39 @@ target triple = "amdgcn-amd-amdhsa"
%struct.i64i32 = type { i64, i32 }
define private %struct.i64i32 @"1"(i32 %"41", i32 %"42") #0 {
"64":
define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 {
"62":
%"18" = alloca i32, align 4, addrspace(5)
%"19" = alloca i32, align 4, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
%"17" = alloca i32, align 4, addrspace(5)
%"23" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"23", align 1
%"24" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"24", align 1
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
%"20" = alloca i32, align 4, addrspace(5)
store i32 %"41", ptr addrspace(5) %"18", align 4
store i32 %"42", ptr addrspace(5) %"19", align 4
%"44" = load i32, ptr addrspace(5) %"18", align 4
%"45" = load i32, ptr addrspace(5) %"19", align 4
%"43" = add i32 %"44", %"45"
store i32 %"43", ptr addrspace(5) %"20", align 4
%"47" = load i32, ptr addrspace(5) %"20", align 4
%"46" = zext i32 %"47" to i64
store i64 %"46", ptr addrspace(5) %"16", align 8
%"49" = load i32, ptr addrspace(5) %"18", align 4
%"50" = load i32, ptr addrspace(5) %"19", align 4
%"48" = mul i32 %"49", %"50"
store i32 %"48", ptr addrspace(5) %"17", align 4
%"51" = load i64, ptr addrspace(5) %"16", align 8
%"52" = load i32, ptr addrspace(5) %"17", align 4
%0 = insertvalue %struct.i64i32 undef, i64 %"51", 0
%1 = insertvalue %struct.i64i32 %0, i32 %"52", 1
store i32 %"39", ptr addrspace(5) %"18", align 4
store i32 %"40", ptr addrspace(5) %"19", align 4
%"42" = load i32, ptr addrspace(5) %"18", align 4
%"43" = load i32, ptr addrspace(5) %"19", align 4
%"41" = add i32 %"42", %"43"
store i32 %"41", ptr addrspace(5) %"20", align 4
%"45" = load i32, ptr addrspace(5) %"20", align 4
%"44" = zext i32 %"45" to i64
store i64 %"44", ptr addrspace(5) %"16", align 8
%"47" = load i32, ptr addrspace(5) %"18", align 4
%"48" = load i32, ptr addrspace(5) %"19", align 4
%"46" = mul i32 %"47", %"48"
store i32 %"46", ptr addrspace(5) %"17", align 4
%"49" = load i64, ptr addrspace(5) %"16", align 8
%"50" = load i32, ptr addrspace(5) %"17", align 4
%0 = insertvalue %struct.i64i32 undef, i64 %"49", 0
%1 = insertvalue %struct.i64i32 %0, i32 %"50", 1
ret %struct.i64i32 %1
}
define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 {
"63":
define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 {
"61":
%"21" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"21", align 1
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
@ -47,38 +43,38 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
%"13" = alloca i64, align 8, addrspace(5)
%"14" = alloca i64, align 8, addrspace(5)
%"15" = alloca i32, align 4, addrspace(5)
%"25" = load i64, ptr addrspace(4) %"57", align 8
store i64 %"25", ptr addrspace(5) %"9", align 8
%"26" = load i64, ptr addrspace(4) %"58", align 8
store i64 %"26", ptr addrspace(5) %"10", align 8
%"23" = load i64, ptr addrspace(4) %"55", align 8
store i64 %"23", ptr addrspace(5) %"9", align 8
%"24" = load i64, ptr addrspace(4) %"56", align 8
store i64 %"24", ptr addrspace(5) %"10", align 8
%"26" = load i64, ptr addrspace(5) %"9", align 8
%"57" = inttoptr i64 %"26" to ptr addrspace(1)
%"25" = load i32, ptr addrspace(1) %"57", align 4
store i32 %"25", ptr addrspace(5) %"11", align 4
%"28" = load i64, ptr addrspace(5) %"9", align 8
%"59" = inttoptr i64 %"28" to ptr addrspace(1)
%"27" = load i32, ptr addrspace(1) %"59", align 4
store i32 %"27", ptr addrspace(5) %"11", align 4
%"30" = load i64, ptr addrspace(5) %"9", align 8
%"60" = inttoptr i64 %"30" to ptr addrspace(1)
%"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 4
%"29" = load i32, ptr addrspace(1) %"66", align 4
store i32 %"29", ptr addrspace(5) %"12", align 4
%"33" = load i32, ptr addrspace(5) %"11", align 4
%"34" = load i32, ptr addrspace(5) %"12", align 4
%0 = call %struct.i64i32 @"1"(i32 %"33", i32 %"34")
%"31" = extractvalue %struct.i64i32 %0, 0
%"32" = extractvalue %struct.i64i32 %0, 1
store i64 %"31", ptr addrspace(5) %"13", align 8
store i32 %"32", ptr addrspace(5) %"15", align 4
%"36" = load i32, ptr addrspace(5) %"15", align 4
%"35" = zext i32 %"36" to i64
store i64 %"35", ptr addrspace(5) %"14", align 8
%"58" = inttoptr i64 %"28" to ptr addrspace(1)
%"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
%"27" = load i32, ptr addrspace(1) %"64", align 4
store i32 %"27", ptr addrspace(5) %"12", align 4
%"31" = load i32, ptr addrspace(5) %"11", align 4
%"32" = load i32, ptr addrspace(5) %"12", align 4
%0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32")
%"29" = extractvalue %struct.i64i32 %0, 0
%"30" = extractvalue %struct.i64i32 %0, 1
store i64 %"29", ptr addrspace(5) %"13", align 8
store i32 %"30", ptr addrspace(5) %"15", align 4
%"34" = load i32, ptr addrspace(5) %"15", align 4
%"33" = zext i32 %"34" to i64
store i64 %"33", ptr addrspace(5) %"14", align 8
%"35" = load i64, ptr addrspace(5) %"10", align 8
%"36" = load i64, ptr addrspace(5) %"13", align 8
%"59" = inttoptr i64 %"35" to ptr addrspace(1)
store i64 %"36", ptr addrspace(1) %"59", align 8
%"37" = load i64, ptr addrspace(5) %"10", align 8
%"38" = load i64, ptr addrspace(5) %"13", align 8
%"61" = inttoptr i64 %"37" to ptr addrspace(1)
store i64 %"38", ptr addrspace(1) %"61", align 8
%"39" = load i64, ptr addrspace(5) %"10", align 8
%"40" = load i64, ptr addrspace(5) %"14", align 8
%"62" = inttoptr i64 %"39" to ptr addrspace(1)
%"68" = getelementptr inbounds i8, ptr addrspace(1) %"62", i64 8
store i64 %"40", ptr addrspace(1) %"68", align 8
%"38" = load i64, ptr addrspace(5) %"14", align 8
%"60" = inttoptr i64 %"37" to ptr addrspace(1)
%"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8
store i64 %"38", ptr addrspace(1) %"66", align 8
ret void
}

View file

@ -1,67 +1,63 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define private i64 @incr(i64 %"35") #0 {
"56":
define private i64 @incr(i64 %"33") #0 {
"54":
%"20" = alloca i64, align 8, addrspace(5)
%"19" = alloca i64, align 8, addrspace(5)
%"23" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"23", align 1
%"24" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"24", align 1
%"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
store i64 %"35", ptr addrspace(5) %"20", align 8
%"36" = load i64, ptr addrspace(5) %"20", align 8
store i64 %"36", ptr addrspace(5) %"49", align 8
%"37" = load i64, ptr addrspace(5) %"49", align 8
store i64 %"37", ptr addrspace(5) %"16", align 8
%"39" = load i64, ptr addrspace(5) %"16", align 8
%"38" = add i64 %"39", 1
store i64 %"38", ptr addrspace(5) %"16", align 8
%"40" = load i64, ptr addrspace(5) %"16", align 8
store i64 %"40", ptr addrspace(5) %"48", align 8
%"41" = load i64, ptr addrspace(5) %"48", align 8
store i64 %"41", ptr addrspace(5) %"19", align 8
%"42" = load i64, ptr addrspace(5) %"19", align 8
ret i64 %"42"
}
define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
"55":
%"21" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"21", align 1
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5)
%"16" = alloca i64, align 8, addrspace(5)
store i64 %"33", ptr addrspace(5) %"20", align 8
%"34" = load i64, ptr addrspace(5) %"20", align 8
store i64 %"34", ptr addrspace(5) %"47", align 8
%"35" = load i64, ptr addrspace(5) %"47", align 8
store i64 %"35", ptr addrspace(5) %"16", align 8
%"37" = load i64, ptr addrspace(5) %"16", align 8
%"36" = add i64 %"37", 1
store i64 %"36", ptr addrspace(5) %"16", align 8
%"38" = load i64, ptr addrspace(5) %"16", align 8
store i64 %"38", ptr addrspace(5) %"46", align 8
%"39" = load i64, ptr addrspace(5) %"46", align 8
store i64 %"39", ptr addrspace(5) %"19", align 8
%"40" = load i64, ptr addrspace(5) %"19", align 8
ret i64 %"40"
}
define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
"53":
%"21" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"21", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5)
%"25" = load i64, ptr addrspace(4) %"44", align 8
store i64 %"25", ptr addrspace(5) %"7", align 8
%"26" = load i64, ptr addrspace(4) %"45", align 8
store i64 %"26", ptr addrspace(5) %"8", align 8
%"28" = load i64, ptr addrspace(5) %"7", align 8
%"50" = inttoptr i64 %"28" to ptr addrspace(1)
%"27" = load i64, ptr addrspace(1) %"50", align 8
store i64 %"27", ptr addrspace(5) %"9", align 8
%"29" = load i64, ptr addrspace(5) %"9", align 8
store i64 %"29", ptr addrspace(5) %"46", align 8
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
%"23" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"23", ptr addrspace(5) %"7", align 8
%"24" = load i64, ptr addrspace(4) %"43", align 8
store i64 %"24", ptr addrspace(5) %"8", align 8
%"26" = load i64, ptr addrspace(5) %"7", align 8
%"48" = inttoptr i64 %"26" to ptr addrspace(1)
%"25" = load i64, ptr addrspace(1) %"48", align 8
store i64 %"25", ptr addrspace(5) %"9", align 8
%"27" = load i64, ptr addrspace(5) %"9", align 8
store i64 %"27", ptr addrspace(5) %"44", align 8
store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8
%"17" = load i64, ptr addrspace(5) %"46", align 8
%"31" = load i64, ptr addrspace(5) %"10", align 8
%0 = inttoptr i64 %"31" to ptr
%"17" = load i64, ptr addrspace(5) %"44", align 8
%"29" = load i64, ptr addrspace(5) %"10", align 8
%0 = inttoptr i64 %"29" to ptr
%"18" = call i64 %0(i64 %"17")
store i64 %"18", ptr addrspace(5) %"47", align 8
%"32" = load i64, ptr addrspace(5) %"47", align 8
store i64 %"32", ptr addrspace(5) %"9", align 8
%"33" = load i64, ptr addrspace(5) %"8", align 8
%"34" = load i64, ptr addrspace(5) %"9", align 8
%"54" = inttoptr i64 %"33" to ptr addrspace(1)
store i64 %"34", ptr addrspace(1) %"54", align 8
store i64 %"18", ptr addrspace(5) %"45", align 8
%"30" = load i64, ptr addrspace(5) %"45", align 8
store i64 %"30", ptr addrspace(5) %"9", align 8
%"31" = load i64, ptr addrspace(5) %"8", align 8
%"32" = load i64, ptr addrspace(5) %"9", align 8
%"52" = inttoptr i64 %"31" to ptr addrspace(1)
store i64 %"32", ptr addrspace(1) %"52", align 8
ret void
}

View file

@ -1,51 +0,0 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @carry_mixed(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
"44":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"11" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
%"36" = extractvalue { i32, i1 } %0, 0
%"13" = extractvalue { i32, i1 } %0, 1
store i32 %"36", ptr addrspace(5) %"6", align 4
store i1 %"13", ptr addrspace(5) %"10", align 1
%"15" = load i1, ptr addrspace(5) %"10", align 1
%1 = zext i1 %"15" to i32
%"37" = sub i32 2, %1
store i32 %"37", ptr addrspace(5) %"7", align 4
%2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
%"38" = extractvalue { i32, i1 } %2, 0
%"17" = extractvalue { i32, i1 } %2, 1
store i32 %"38", ptr addrspace(5) %"6", align 4
store i1 %"17", ptr addrspace(5) %"10", align 1
%"19" = load i1, ptr addrspace(5) %"9", align 1
%3 = zext i1 %"19" to i32
%"39" = add i32 1, %3
store i32 %"39", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"40" = inttoptr i64 %"20" to ptr
store i32 %"21", ptr %"40", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"42" = inttoptr i64 %"22" to ptr
%"46" = getelementptr inbounds i8, ptr %"42", i64 4
store i32 %"23", ptr %"46", align 4
ret void
}
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }

View file

@ -1,32 +0,0 @@
.version 6.5
.target sm_30
.address_size 64
.visible .entry carry_mixed(
.param .u64 input,
.param .u64 output
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .b32 unused;
.reg .b32 carry_out_1;
.reg .b32 carry_out_2;
ld.param.u64 out_addr, [output];
// set carry with sub
sub.cc.s32 unused, 0, 1;
// write carry with sub
subc.s32 carry_out_1, 2, 0;
// set carry with sub
sub.cc.s32 unused, 0, 1;
// fail writing carry with add
addc.s32 carry_out_2, 1, 0;
st.s32 [out_addr], carry_out_1;
st.s32 [out_addr+4], carry_out_2;
ret;
}

View file

@ -0,0 +1,257 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 {
"268":
%"22" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"22", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i32, align 4, addrspace(5)
%"13" = alloca i32, align 4, addrspace(5)
%"14" = alloca i32, align 4, addrspace(5)
%"15" = alloca i32, align 4, addrspace(5)
%"16" = alloca i32, align 4, addrspace(5)
%"17" = alloca i32, align 4, addrspace(5)
%"18" = alloca i32, align 4, addrspace(5)
%"19" = alloca i32, align 4, addrspace(5)
%"20" = alloca i32, align 4, addrspace(5)
%"21" = alloca i32, align 4, addrspace(5)
%"37" = load i64, ptr addrspace(4) %"209", align 8
store i64 %"37", ptr addrspace(5) %"5", align 8
%0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
%"210" = extractvalue { i32, i1 } %0, 0
%"23" = extractvalue { i32, i1 } %0, 1
store i32 %"210", ptr addrspace(5) %"6", align 4
%"39" = xor i1 %"23", true
store i1 %"39", ptr addrspace(5) %"22", align 1
%"41" = load i1, ptr addrspace(5) %"22", align 1
%1 = zext i1 %"41" to i32
%"211" = add i32 0, %1
store i32 %"211", ptr addrspace(5) %"6", align 4
%"42" = load i1, ptr addrspace(5) %"22", align 1
%"24" = xor i1 %"42", true
%2 = zext i1 %"24" to i32
%"212" = sub i32 0, %2
store i32 %"212", ptr addrspace(5) %"7", align 4
%3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
%"213" = extractvalue { i32, i1 } %3, 0
%"25" = extractvalue { i32, i1 } %3, 1
store i32 %"213", ptr addrspace(5) %"8", align 4
%"45" = xor i1 %"25", true
store i1 %"45", ptr addrspace(5) %"22", align 1
%"47" = load i1, ptr addrspace(5) %"22", align 1
%4 = zext i1 %"47" to i32
%"214" = add i32 0, %4
store i32 %"214", ptr addrspace(5) %"8", align 4
%"48" = load i1, ptr addrspace(5) %"22", align 1
%"26" = xor i1 %"48", true
%5 = zext i1 %"26" to i32
%"215" = sub i32 0, %5
store i32 %"215", ptr addrspace(5) %"9", align 4
%6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
%"216" = extractvalue { i32, i1 } %6, 0
%"51" = extractvalue { i32, i1 } %6, 1
store i32 %"216", ptr addrspace(5) %"10", align 4
store i1 %"51", ptr addrspace(5) %"22", align 1
%"53" = load i1, ptr addrspace(5) %"22", align 1
%7 = zext i1 %"53" to i32
%"217" = add i32 0, %7
store i32 %"217", ptr addrspace(5) %"10", align 4
%"54" = load i1, ptr addrspace(5) %"22", align 1
%"27" = xor i1 %"54", true
%8 = zext i1 %"27" to i32
%"218" = sub i32 0, %8
store i32 %"218", ptr addrspace(5) %"11", align 4
%9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
%"219" = extractvalue { i32, i1 } %9, 0
%"57" = extractvalue { i32, i1 } %9, 1
store i32 %"219", ptr addrspace(5) %"12", align 4
store i1 %"57", ptr addrspace(5) %"22", align 1
%"59" = load i1, ptr addrspace(5) %"22", align 1
%10 = zext i1 %"59" to i32
%"220" = add i32 0, %10
store i32 %"220", ptr addrspace(5) %"12", align 4
%"60" = load i1, ptr addrspace(5) %"22", align 1
%"28" = xor i1 %"60", true
%11 = zext i1 %"28" to i32
%"221" = sub i32 0, %11
store i32 %"221", ptr addrspace(5) %"13", align 4
%12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
%"222" = extractvalue { i32, i1 } %12, 0
%"63" = extractvalue { i32, i1 } %12, 1
store i32 %"222", ptr addrspace(5) %"14", align 4
store i1 %"63", ptr addrspace(5) %"22", align 1
%"65" = load i1, ptr addrspace(5) %"22", align 1
%13 = zext i1 %"65" to i32
%"223" = add i32 0, %13
store i32 %"223", ptr addrspace(5) %"14", align 4
%"66" = load i1, ptr addrspace(5) %"22", align 1
%"29" = xor i1 %"66", true
%14 = zext i1 %"29" to i32
%"224" = sub i32 0, %14
store i32 %"224", ptr addrspace(5) %"15", align 4
%15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
%"225" = extractvalue { i32, i1 } %15, 0
%"69" = extractvalue { i32, i1 } %15, 1
store i32 %"225", ptr addrspace(5) %"16", align 4
store i1 %"69", ptr addrspace(5) %"22", align 1
%"71" = load i1, ptr addrspace(5) %"22", align 1
%16 = zext i1 %"71" to i32
%"226" = add i32 0, %16
store i32 %"226", ptr addrspace(5) %"16", align 4
%"72" = load i1, ptr addrspace(5) %"22", align 1
%"30" = xor i1 %"72", true
%17 = zext i1 %"30" to i32
%"227" = sub i32 0, %17
store i32 %"227", ptr addrspace(5) %"17", align 4
%18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
%"228" = extractvalue { i32, i1 } %18, 0
%"75" = extractvalue { i32, i1 } %18, 1
store i32 %"228", ptr addrspace(5) %"18", align 4
store i1 %"75", ptr addrspace(5) %"22", align 1
%"76" = load i1, ptr addrspace(5) %"22", align 1
%"31" = xor i1 %"76", true
%19 = zext i1 %"31" to i32
%20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
%21 = extractvalue { i32, i1 } %20, 0
%22 = extractvalue { i32, i1 } %20, 1
%23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19)
%"229" = extractvalue { i32, i1 } %23, 0
%24 = extractvalue { i32, i1 } %23, 1
%"32" = xor i1 %22, %24
store i32 %"229", ptr addrspace(5) %"18", align 4
%"78" = xor i1 %"32", true
store i1 %"78", ptr addrspace(5) %"22", align 1
%"80" = load i1, ptr addrspace(5) %"22", align 1
%25 = zext i1 %"80" to i32
%"230" = add i32 0, %25
store i32 %"230", ptr addrspace(5) %"18", align 4
%"81" = load i1, ptr addrspace(5) %"22", align 1
%"33" = xor i1 %"81", true
%26 = zext i1 %"33" to i32
%"231" = sub i32 0, %26
store i32 %"231", ptr addrspace(5) %"19", align 4
%27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
%"232" = extractvalue { i32, i1 } %27, 0
%"84" = extractvalue { i32, i1 } %27, 1
store i32 %"232", ptr addrspace(5) %"20", align 4
store i1 %"84", ptr addrspace(5) %"22", align 1
%"85" = load i1, ptr addrspace(5) %"22", align 1
%"34" = xor i1 %"85", true
%28 = zext i1 %"34" to i32
%29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
%30 = extractvalue { i32, i1 } %29, 0
%31 = extractvalue { i32, i1 } %29, 1
%32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28)
%"233" = extractvalue { i32, i1 } %32, 0
%33 = extractvalue { i32, i1 } %32, 1
%"35" = xor i1 %31, %33
store i32 %"233", ptr addrspace(5) %"20", align 4
%"87" = xor i1 %"35", true
store i1 %"87", ptr addrspace(5) %"22", align 1
%"89" = load i1, ptr addrspace(5) %"22", align 1
%34 = zext i1 %"89" to i32
%"234" = add i32 0, %34
store i32 %"234", ptr addrspace(5) %"20", align 4
%"90" = load i1, ptr addrspace(5) %"22", align 1
%"36" = xor i1 %"90", true
%35 = zext i1 %"36" to i32
%"235" = sub i32 0, %35
store i32 %"235", ptr addrspace(5) %"21", align 4
%"92" = load i64, ptr addrspace(5) %"5", align 8
%"93" = load i32, ptr addrspace(5) %"6", align 4
%"236" = inttoptr i64 %"92" to ptr
store i32 %"93", ptr %"236", align 4
%"94" = load i64, ptr addrspace(5) %"5", align 8
%"95" = load i32, ptr addrspace(5) %"8", align 4
%"238" = inttoptr i64 %"94" to ptr
%"270" = getelementptr inbounds i8, ptr %"238", i64 4
store i32 %"95", ptr %"270", align 4
%"96" = load i64, ptr addrspace(5) %"5", align 8
%"97" = load i32, ptr addrspace(5) %"10", align 4
%"240" = inttoptr i64 %"96" to ptr
%"272" = getelementptr inbounds i8, ptr %"240", i64 8
store i32 %"97", ptr %"272", align 4
%"98" = load i64, ptr addrspace(5) %"5", align 8
%"99" = load i32, ptr addrspace(5) %"12", align 4
%"242" = inttoptr i64 %"98" to ptr
%"274" = getelementptr inbounds i8, ptr %"242", i64 12
store i32 %"99", ptr %"274", align 4
%"100" = load i64, ptr addrspace(5) %"5", align 8
%"101" = load i32, ptr addrspace(5) %"14", align 4
%"244" = inttoptr i64 %"100" to ptr
%"276" = getelementptr inbounds i8, ptr %"244", i64 16
store i32 %"101", ptr %"276", align 4
%"102" = load i64, ptr addrspace(5) %"5", align 8
%"103" = load i32, ptr addrspace(5) %"16", align 4
%"246" = inttoptr i64 %"102" to ptr
%"278" = getelementptr inbounds i8, ptr %"246", i64 20
store i32 %"103", ptr %"278", align 4
%"104" = load i64, ptr addrspace(5) %"5", align 8
%"105" = load i32, ptr addrspace(5) %"18", align 4
%"248" = inttoptr i64 %"104" to ptr
%"280" = getelementptr inbounds i8, ptr %"248", i64 24
store i32 %"105", ptr %"280", align 4
%"106" = load i64, ptr addrspace(5) %"5", align 8
%"107" = load i32, ptr addrspace(5) %"20", align 4
%"250" = inttoptr i64 %"106" to ptr
%"282" = getelementptr inbounds i8, ptr %"250", i64 28
store i32 %"107", ptr %"282", align 4
%"108" = load i64, ptr addrspace(5) %"5", align 8
%"109" = load i32, ptr addrspace(5) %"7", align 4
%"252" = inttoptr i64 %"108" to ptr
%"284" = getelementptr inbounds i8, ptr %"252", i64 32
store i32 %"109", ptr %"284", align 4
%"110" = load i64, ptr addrspace(5) %"5", align 8
%"111" = load i32, ptr addrspace(5) %"9", align 4
%"254" = inttoptr i64 %"110" to ptr
%"286" = getelementptr inbounds i8, ptr %"254", i64 36
store i32 %"111", ptr %"286", align 4
%"112" = load i64, ptr addrspace(5) %"5", align 8
%"113" = load i32, ptr addrspace(5) %"11", align 4
%"256" = inttoptr i64 %"112" to ptr
%"288" = getelementptr inbounds i8, ptr %"256", i64 40
store i32 %"113", ptr %"288", align 4
%"114" = load i64, ptr addrspace(5) %"5", align 8
%"115" = load i32, ptr addrspace(5) %"13", align 4
%"258" = inttoptr i64 %"114" to ptr
%"290" = getelementptr inbounds i8, ptr %"258", i64 44
store i32 %"115", ptr %"290", align 4
%"116" = load i64, ptr addrspace(5) %"5", align 8
%"117" = load i32, ptr addrspace(5) %"15", align 4
%"260" = inttoptr i64 %"116" to ptr
%"292" = getelementptr inbounds i8, ptr %"260", i64 48
store i32 %"117", ptr %"292", align 4
%"118" = load i64, ptr addrspace(5) %"5", align 8
%"119" = load i32, ptr addrspace(5) %"17", align 4
%"262" = inttoptr i64 %"118" to ptr
%"294" = getelementptr inbounds i8, ptr %"262", i64 52
store i32 %"119", ptr %"294", align 4
%"120" = load i64, ptr addrspace(5) %"5", align 8
%"121" = load i32, ptr addrspace(5) %"19", align 4
%"264" = inttoptr i64 %"120" to ptr
%"296" = getelementptr inbounds i8, ptr %"264", i64 56
store i32 %"121", ptr %"296", align 4
%"122" = load i64, ptr addrspace(5) %"5", align 8
%"123" = load i32, ptr addrspace(5) %"21", align 4
%"266" = inttoptr i64 %"122" to ptr
%"298" = getelementptr inbounds i8, ptr %"266", i64 60
store i32 %"123", ptr %"298", align 4
ret void
}
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }

View file

@ -0,0 +1,84 @@
.version 6.5
.target sm_30
.address_size 64
.visible .entry carry_set_all(
.param .u64 input,
.param .u64 output
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .b32 carry1_add;
.reg .b32 carry1_sub;
.reg .b32 carry2_add;
.reg .b32 carry2_sub;
.reg .b32 carry3_add;
.reg .b32 carry3_sub;
.reg .b32 carry4_add;
.reg .b32 carry4_sub;
.reg .b32 carry5_add;
.reg .b32 carry5_sub;
.reg .b32 carry6_add;
.reg .b32 carry6_sub;
.reg .b32 carry7_add;
.reg .b32 carry7_sub;
.reg .b32 carry8_add;
.reg .b32 carry8_sub;
ld.param.u64 out_addr, [output];
sub.cc.u32 carry1_add, 0, 0;
addc.u32 carry1_add, 0, 0;
subc.u32 carry1_sub, 0, 0;
sub.cc.u32 carry2_add, 0, 1;
addc.u32 carry2_add, 0, 0;
subc.u32 carry2_sub, 0, 0;
add.cc.u32 carry3_add, 0, 0;
addc.u32 carry3_add, 0, 0;
subc.u32 carry3_sub, 0, 0;
add.cc.u32 carry4_add, 4294967295, 4294967295;
addc.u32 carry4_add, 0, 0;
subc.u32 carry4_sub, 0, 0;
mad.lo.cc.u32 carry5_add, 0, 0, 0;
addc.u32 carry5_add, 0, 0;
subc.u32 carry5_sub, 0, 0;
mad.lo.cc.u32 carry6_add, 1, 4294967295, 4294967295;
addc.u32 carry6_add, 0, 0;
subc.u32 carry6_sub, 0, 0;
add.cc.u32 carry7_add, 0, 0;
subc.cc.u32 carry7_add, 0, 0;
addc.u32 carry7_add, 0, 0;
subc.u32 carry7_sub, 0, 0;
add.cc.u32 carry8_add, 0, 0;
subc.cc.u32 carry8_add, 0, 1;
addc.u32 carry8_add, 0, 0;
subc.u32 carry8_sub, 0, 0;
st.u32 [out_addr], carry1_add;
st.u32 [out_addr+4], carry2_add;
st.u32 [out_addr+8], carry3_add;
st.u32 [out_addr+12], carry4_add;
st.u32 [out_addr+16], carry5_add;
st.u32 [out_addr+20], carry6_add;
st.u32 [out_addr+24], carry7_add;
st.u32 [out_addr+28], carry8_add;
st.u32 [out_addr+32], carry1_sub;
st.u32 [out_addr+36], carry2_sub;
st.u32 [out_addr+40], carry3_sub;
st.u32 [out_addr+44], carry4_sub;
st.u32 [out_addr+48], carry5_sub;
st.u32 [out_addr+52], carry6_sub;
st.u32 [out_addr+56], carry7_sub;
st.u32 [out_addr+60], carry8_sub;
ret;
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"19", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i32, ptr addrspace(5) %"6", align 4
%0 = call i32 @llvm.ctlz.i32(i32 %"14", i1 false)
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load i32, ptr %"18", align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"13" = load i32, ptr addrspace(5) %"6", align 4
%0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false)
store i32 %0, ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"20", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store i32 %"15", ptr %"19", align 4
ret void
}

View file

@ -3,49 +3,47 @@ target triple = "amdgcn-amd-amdhsa"
@constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8
define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
"53":
define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
"52":
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i16, align 2, addrspace(5)
%"8" = alloca i16, align 2, addrspace(5)
%"9" = alloca i16, align 2, addrspace(5)
%"10" = alloca i16, align 2, addrspace(5)
%"12" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"14", ptr addrspace(5) %"6", align 8
%"15" = load i16, ptr addrspace(4) @constparams, align 2
store i16 %"15", ptr addrspace(5) %"7", align 2
%"16" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
store i16 %"16", ptr addrspace(5) %"8", align 2
%"17" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
store i16 %"17", ptr addrspace(5) %"9", align 2
%"18" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
store i16 %"18", ptr addrspace(5) %"10", align 2
%"19" = load i64, ptr addrspace(5) %"6", align 8
%"20" = load i16, ptr addrspace(5) %"7", align 2
%"45" = inttoptr i64 %"19" to ptr
store i16 %"20", ptr %"45", align 2
%"21" = load i64, ptr addrspace(5) %"6", align 8
%"22" = load i16, ptr addrspace(5) %"8", align 2
%"47" = inttoptr i64 %"21" to ptr
%"61" = getelementptr inbounds i8, ptr %"47", i64 2
store i16 %"22", ptr %"61", align 2
%"23" = load i64, ptr addrspace(5) %"6", align 8
%"24" = load i16, ptr addrspace(5) %"9", align 2
%"49" = inttoptr i64 %"23" to ptr
%"63" = getelementptr inbounds i8, ptr %"49", i64 4
store i16 %"24", ptr %"63", align 2
%"25" = load i64, ptr addrspace(5) %"6", align 8
%"26" = load i16, ptr addrspace(5) %"10", align 2
%"51" = inttoptr i64 %"25" to ptr
%"65" = getelementptr inbounds i8, ptr %"51", i64 6
store i16 %"26", ptr %"65", align 2
store i64 %"13", ptr addrspace(5) %"6", align 8
%"14" = load i16, ptr addrspace(4) @constparams, align 2
store i16 %"14", ptr addrspace(5) %"7", align 2
%"15" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
store i16 %"15", ptr addrspace(5) %"8", align 2
%"16" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
store i16 %"16", ptr addrspace(5) %"9", align 2
%"17" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
store i16 %"17", ptr addrspace(5) %"10", align 2
%"18" = load i64, ptr addrspace(5) %"6", align 8
%"19" = load i16, ptr addrspace(5) %"7", align 2
%"44" = inttoptr i64 %"18" to ptr
store i16 %"19", ptr %"44", align 2
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"21" = load i16, ptr addrspace(5) %"8", align 2
%"46" = inttoptr i64 %"20" to ptr
%"60" = getelementptr inbounds i8, ptr %"46", i64 2
store i16 %"21", ptr %"60", align 2
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i16, ptr addrspace(5) %"9", align 2
%"48" = inttoptr i64 %"22" to ptr
%"62" = getelementptr inbounds i8, ptr %"48", i64 4
store i16 %"23", ptr %"62", align 2
%"24" = load i64, ptr addrspace(5) %"6", align 8
%"25" = load i16, ptr addrspace(5) %"10", align 2
%"50" = inttoptr i64 %"24" to ptr
%"64" = getelementptr inbounds i8, ptr %"50", i64 6
store i16 %"25", ptr %"64", align 2
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"20", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = fmul float %"14", 5.000000e-01
store float %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"6", align 4
%"21" = inttoptr i64 %"15" to ptr
store float %"16", ptr %"21", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"11" to ptr
%"10" = load float, ptr %"19", align 4
store float %"10", ptr addrspace(5) %"6", align 4
%"13" = load float, ptr addrspace(5) %"6", align 4
%"12" = fmul float %"13", 5.000000e-01
store float %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load float, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"14" to ptr
store float %"15", ptr %"20", align 4
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"20", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"13" = mul i32 %"14", -1
store i32 %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"21" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"21", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"11" to ptr
%"10" = load i32, ptr %"19", align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"13" = load i32, ptr addrspace(5) %"6", align 4
%"12" = mul i32 %"13", -1
store i32 %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"14" to ptr
store i32 %"15", ptr %"20", align 4
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"19", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = call afn float @llvm.cos.f32(float %"14")
store float %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store float %"16", ptr %"20", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load float, ptr %"18", align 4
store float %"10", ptr addrspace(5) %"6", align 4
%"13" = load float, ptr addrspace(5) %"6", align 4
%"12" = call afn float @llvm.cos.f32(float %"13")
store float %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load float, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store float %"15", ptr %"19", align 4
ret void
}

View file

@ -3,69 +3,67 @@ target triple = "amdgcn-amd-amdhsa"
declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0
define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
"57":
define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
"56":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"48", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"49" = inttoptr i64 %"12" to ptr addrspace(1)
%"11" = load float, ptr addrspace(1) %"49", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"14")
store float %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"6", align 4
%"50" = inttoptr i64 %"15" to ptr addrspace(1)
store float %"16", ptr addrspace(1) %"50", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"51" = inttoptr i64 %"18" to ptr addrspace(1)
%"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4
%"17" = load float, ptr addrspace(1) %"62", align 4
store float %"17", ptr addrspace(5) %"6", align 4
%"20" = load float, ptr addrspace(5) %"6", align 4
%"19" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"20")
store float %"19", ptr addrspace(5) %"6", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load float, ptr addrspace(5) %"6", align 4
%"52" = inttoptr i64 %"21" to ptr addrspace(1)
%"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 4
store float %"22", ptr addrspace(1) %"64", align 4
%"24" = load i64, ptr addrspace(5) %"4", align 8
%"53" = inttoptr i64 %"24" to ptr addrspace(1)
%"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8
%"23" = load float, ptr addrspace(1) %"66", align 4
store float %"23", ptr addrspace(5) %"6", align 4
%"26" = load float, ptr addrspace(5) %"6", align 4
%"25" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"26")
store float %"25", ptr addrspace(5) %"6", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load float, ptr addrspace(5) %"6", align 4
%"54" = inttoptr i64 %"27" to ptr addrspace(1)
%"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8
store float %"28", ptr addrspace(1) %"68", align 4
%"30" = load i64, ptr addrspace(5) %"4", align 8
%"55" = inttoptr i64 %"30" to ptr addrspace(1)
%"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12
%"29" = load float, ptr addrspace(1) %"70", align 4
store float %"29", ptr addrspace(5) %"6", align 4
%"32" = load float, ptr addrspace(5) %"6", align 4
%"31" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"32")
store float %"31", ptr addrspace(5) %"6", align 4
%"33" = load i64, ptr addrspace(5) %"5", align 8
%"34" = load float, ptr addrspace(5) %"6", align 4
%"56" = inttoptr i64 %"33" to ptr addrspace(1)
%"72" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 12
store float %"34", ptr addrspace(1) %"72", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"48" = inttoptr i64 %"11" to ptr addrspace(1)
%"10" = load float, ptr addrspace(1) %"48", align 4
store float %"10", ptr addrspace(5) %"6", align 4
%"13" = load float, ptr addrspace(5) %"6", align 4
%"12" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"13")
store float %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load float, ptr addrspace(5) %"6", align 4
%"49" = inttoptr i64 %"14" to ptr addrspace(1)
store float %"15", ptr addrspace(1) %"49", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"50" = inttoptr i64 %"17" to ptr addrspace(1)
%"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4
%"16" = load float, ptr addrspace(1) %"61", align 4
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load float, ptr addrspace(5) %"6", align 4
%"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19")
store float %"18", ptr addrspace(5) %"6", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load float, ptr addrspace(5) %"6", align 4
%"51" = inttoptr i64 %"20" to ptr addrspace(1)
%"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4
store float %"21", ptr addrspace(1) %"63", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"52" = inttoptr i64 %"23" to ptr addrspace(1)
%"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8
%"22" = load float, ptr addrspace(1) %"65", align 4
store float %"22", ptr addrspace(5) %"6", align 4
%"25" = load float, ptr addrspace(5) %"6", align 4
%"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25")
store float %"24", ptr addrspace(5) %"6", align 4
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load float, ptr addrspace(5) %"6", align 4
%"53" = inttoptr i64 %"26" to ptr addrspace(1)
%"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8
store float %"27", ptr addrspace(1) %"67", align 4
%"29" = load i64, ptr addrspace(5) %"4", align 8
%"54" = inttoptr i64 %"29" to ptr addrspace(1)
%"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12
%"28" = load float, ptr addrspace(1) %"69", align 4
store float %"28", ptr addrspace(5) %"6", align 4
%"31" = load float, ptr addrspace(5) %"6", align 4
%"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31")
store float %"30", ptr addrspace(5) %"6", align 4
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load float, ptr addrspace(5) %"6", align 4
%"55" = inttoptr i64 %"32" to ptr addrspace(1)
%"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12
store float %"33", ptr addrspace(1) %"71", align 4
ret void
}

View file

@ -1,32 +1,30 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"23":
define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca half, align 2, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr addrspace(1)
%"20" = load i16, ptr addrspace(1) %"21", align 2
%"12" = bitcast i16 %"20" to half
store half %"12", ptr addrspace(5) %"6", align 2
%"15" = load half, ptr addrspace(5) %"6", align 2
%"14" = fpext half %"15" to float
store float %"14", ptr addrspace(5) %"7", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load float, ptr addrspace(5) %"7", align 4
%"22" = inttoptr i64 %"16" to ptr
store float %"17", ptr %"22", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr addrspace(1)
%"19" = load i16, ptr addrspace(1) %"20", align 2
%"11" = bitcast i16 %"19" to half
store half %"11", ptr addrspace(5) %"6", align 2
%"14" = load half, ptr addrspace(5) %"6", align 2
%"13" = fpext half %"14" to float
store float %"13", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"7", align 4
%"21" = inttoptr i64 %"15" to ptr
store float %"16", ptr %"21", align 4
ret void
}

View file

@ -9,80 +9,78 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0
declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0
define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #1 {
"76":
define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 {
"75":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"11" = load i64, ptr addrspace(4) %"49", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"50", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"51", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"53" = inttoptr i64 %"15" to ptr
%"52" = load i32, ptr %"53", align 4
store i32 %"52", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"54" = inttoptr i64 %"17" to ptr
%"90" = getelementptr inbounds i8, ptr %"54", i64 4
%"55" = load i32, ptr %"90", align 4
store i32 %"55", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"56" = inttoptr i64 %"19" to ptr
%"92" = getelementptr inbounds i8, ptr %"56", i64 8
%"57" = load i32, ptr %"92", align 4
store i32 %"57", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"58" = inttoptr i64 %"21" to ptr
%"94" = getelementptr inbounds i8, ptr %"58", i64 12
%"59" = load i32, ptr %"94", align 4
store i32 %"59", ptr addrspace(5) %"9", align 4
%"23" = load i32, ptr addrspace(5) %"6", align 4
%"60" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"23")
%"22" = bitcast float %"60" to i32
store i32 %"22", ptr addrspace(5) %"6", align 4
%"25" = load i32, ptr addrspace(5) %"7", align 4
%"62" = call float @__zluda_ptx_impl__cvt_rz_f32_s32(i32 %"25")
%"24" = bitcast float %"62" to i32
store i32 %"24", ptr addrspace(5) %"7", align 4
%"27" = load i32, ptr addrspace(5) %"8", align 4
%"64" = call float @__zluda_ptx_impl__cvt_rm_f32_s32(i32 %"27")
%"26" = bitcast float %"64" to i32
store i32 %"26", ptr addrspace(5) %"8", align 4
%"29" = load i32, ptr addrspace(5) %"9", align 4
%"66" = call float @__zluda_ptx_impl__cvt_rp_f32_s32(i32 %"29")
%"28" = bitcast float %"66" to i32
store i32 %"28", ptr addrspace(5) %"9", align 4
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"6", align 4
%"68" = inttoptr i64 %"30" to ptr addrspace(1)
%"69" = bitcast i32 %"31" to float
store float %"69", ptr addrspace(1) %"68", align 4
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load i32, ptr addrspace(5) %"7", align 4
%"70" = inttoptr i64 %"32" to ptr addrspace(1)
%"96" = getelementptr inbounds i8, ptr addrspace(1) %"70", i64 4
%"71" = bitcast i32 %"33" to float
store float %"71", ptr addrspace(1) %"96", align 4
%"34" = load i64, ptr addrspace(5) %"5", align 8
%"35" = load i32, ptr addrspace(5) %"8", align 4
%"72" = inttoptr i64 %"34" to ptr addrspace(1)
%"98" = getelementptr inbounds i8, ptr addrspace(1) %"72", i64 8
%"73" = bitcast i32 %"35" to float
store float %"73", ptr addrspace(1) %"98", align 4
%"36" = load i64, ptr addrspace(5) %"5", align 8
%"37" = load i32, ptr addrspace(5) %"9", align 4
%"74" = inttoptr i64 %"36" to ptr addrspace(1)
%"100" = getelementptr inbounds i8, ptr addrspace(1) %"74", i64 12
%"75" = bitcast i32 %"37" to float
store float %"75", ptr addrspace(1) %"100", align 4
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"52" = inttoptr i64 %"14" to ptr
%"51" = load i32, ptr %"52", align 4
store i32 %"51", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"53" = inttoptr i64 %"16" to ptr
%"89" = getelementptr inbounds i8, ptr %"53", i64 4
%"54" = load i32, ptr %"89", align 4
store i32 %"54", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"55" = inttoptr i64 %"18" to ptr
%"91" = getelementptr inbounds i8, ptr %"55", i64 8
%"56" = load i32, ptr %"91", align 4
store i32 %"56", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"57" = inttoptr i64 %"20" to ptr
%"93" = getelementptr inbounds i8, ptr %"57", i64 12
%"58" = load i32, ptr %"93", align 4
store i32 %"58", ptr addrspace(5) %"9", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22")
%"21" = bitcast float %"59" to i32
store i32 %"21", ptr addrspace(5) %"6", align 4
%"24" = load i32, ptr addrspace(5) %"7", align 4
%"61" = call float @__zluda_ptx_impl__cvt_rz_f32_s32(i32 %"24")
%"23" = bitcast float %"61" to i32
store i32 %"23", ptr addrspace(5) %"7", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
%"63" = call float @__zluda_ptx_impl__cvt_rm_f32_s32(i32 %"26")
%"25" = bitcast float %"63" to i32
store i32 %"25", ptr addrspace(5) %"8", align 4
%"28" = load i32, ptr addrspace(5) %"9", align 4
%"65" = call float @__zluda_ptx_impl__cvt_rp_f32_s32(i32 %"28")
%"27" = bitcast float %"65" to i32
store i32 %"27", ptr addrspace(5) %"9", align 4
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"6", align 4
%"67" = inttoptr i64 %"29" to ptr addrspace(1)
%"68" = bitcast i32 %"30" to float
store float %"68", ptr addrspace(1) %"67", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"7", align 4
%"69" = inttoptr i64 %"31" to ptr addrspace(1)
%"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4
%"70" = bitcast i32 %"32" to float
store float %"70", ptr addrspace(1) %"95", align 4
%"33" = load i64, ptr addrspace(5) %"5", align 8
%"34" = load i32, ptr addrspace(5) %"8", align 4
%"71" = inttoptr i64 %"33" to ptr addrspace(1)
%"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8
%"72" = bitcast i32 %"34" to float
store float %"72", ptr addrspace(1) %"97", align 4
%"35" = load i64, ptr addrspace(5) %"5", align 8
%"36" = load i32, ptr addrspace(5) %"9", align 4
%"73" = inttoptr i64 %"35" to ptr addrspace(1)
%"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12
%"74" = bitcast i32 %"36" to float
store float %"74", ptr addrspace(1) %"99", align 4
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca double, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load float, ptr addrspace(1) %"20", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load float, ptr addrspace(5) %"6", align 4
%"14" = fpext float %"15" to double
store double %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load double, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"16" to ptr
store double %"17", ptr %"21", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
%"11" = load float, ptr addrspace(1) %"19", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = fpext float %"14" to double
store double %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load double, ptr addrspace(5) %"7", align 8
%"20" = inttoptr i64 %"15" to ptr
store double %"16", ptr %"20", align 8
ret void
}

View file

@ -1,44 +1,42 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
"34":
define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"33":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"30", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
%"36" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load float, ptr %"36", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load float, ptr addrspace(5) %"6", align 4
%"16" = call float @llvm.rint.f32(float %"17")
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load float, ptr addrspace(5) %"7", align 4
%"18" = call float @llvm.rint.f32(float %"19")
store float %"18", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load float, ptr addrspace(5) %"6", align 4
%"32" = inttoptr i64 %"20" to ptr
store float %"21", ptr %"32", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load float, ptr addrspace(5) %"7", align 4
%"33" = inttoptr i64 %"22" to ptr
%"38" = getelementptr inbounds i8, ptr %"33", i64 4
store float %"23", ptr %"38", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"29" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"29", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"14" to ptr
%"35" = getelementptr inbounds i8, ptr %"30", i64 4
%"13" = load float, ptr %"35", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"15" = call float @llvm.rint.f32(float %"16")
store float %"15", ptr addrspace(5) %"6", align 4
%"18" = load float, ptr addrspace(5) %"7", align 4
%"17" = call float @llvm.rint.f32(float %"18")
store float %"17", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load float, ptr addrspace(5) %"6", align 4
%"31" = inttoptr i64 %"19" to ptr
store float %"20", ptr %"31", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load float, ptr addrspace(5) %"7", align 4
%"32" = inttoptr i64 %"21" to ptr
%"37" = getelementptr inbounds i8, ptr %"32", i64 4
store float %"22", ptr %"37", align 4
ret void
}

View file

@ -1,44 +1,42 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
"34":
define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"33":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"30", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
%"36" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load float, ptr %"36", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load float, ptr addrspace(5) %"6", align 4
%"16" = call float @llvm.trunc.f32(float %"17")
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load float, ptr addrspace(5) %"7", align 4
%"18" = call float @llvm.trunc.f32(float %"19")
store float %"18", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load float, ptr addrspace(5) %"6", align 4
%"32" = inttoptr i64 %"20" to ptr
store float %"21", ptr %"32", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load float, ptr addrspace(5) %"7", align 4
%"33" = inttoptr i64 %"22" to ptr
%"38" = getelementptr inbounds i8, ptr %"33", i64 4
store float %"23", ptr %"38", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"29" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"29", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"14" to ptr
%"35" = getelementptr inbounds i8, ptr %"30", i64 4
%"13" = load float, ptr %"35", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"15" = call float @llvm.trunc.f32(float %"16")
store float %"15", ptr addrspace(5) %"6", align 4
%"18" = load float, ptr addrspace(5) %"7", align 4
%"17" = call float @llvm.trunc.f32(float %"18")
store float %"17", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load float, ptr addrspace(5) %"6", align 4
%"31" = inttoptr i64 %"19" to ptr
store float %"20", ptr %"31", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load float, ptr addrspace(5) %"7", align 4
%"32" = inttoptr i64 %"21" to ptr
%"37" = getelementptr inbounds i8, ptr %"32", i64 4
store float %"22", ptr %"37", align 4
ret void
}

View file

@ -1,33 +1,31 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"24":
define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load i32, ptr addrspace(1) %"20", align 4
store i32 %"12", ptr addrspace(5) %"7", align 4
%"15" = load i32, ptr addrspace(5) %"7", align 4
%"26" = trunc i32 %"15" to i8
%"21" = sext i8 %"26" to i16
%"14" = sext i16 %"21" to i32
store i32 %"14", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"23" = inttoptr i64 %"16" to ptr
store i32 %"17", ptr %"23", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
%"11" = load i32, ptr addrspace(1) %"19", align 4
store i32 %"11", ptr addrspace(5) %"7", align 4
%"14" = load i32, ptr addrspace(5) %"7", align 4
%"25" = trunc i32 %"14" to i8
%"20" = sext i8 %"25" to i16
%"13" = sext i16 %"20" to i32
store i32 %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"22" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"22", align 4
ret void
}

View file

@ -3,48 +3,46 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0
define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
"42":
define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 {
"41":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"13" to ptr
%"30" = load float, ptr %"31", align 4
%"12" = bitcast float %"30" to i32
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"15" to ptr
%"47" = getelementptr inbounds i8, ptr %"32", i64 4
%"33" = load float, ptr %"47", align 4
%"14" = bitcast float %"33" to i32
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"35" = bitcast i32 %"17" to float
%"34" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"35")
store i32 %"34", ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
%"37" = bitcast i32 %"19" to float
%"36" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"37")
store i32 %"36", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"38" = inttoptr i64 %"20" to ptr addrspace(1)
store i32 %"21", ptr addrspace(1) %"38", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"7", align 4
%"40" = inttoptr i64 %"22" to ptr addrspace(1)
%"49" = getelementptr inbounds i8, ptr addrspace(1) %"40", i64 4
store i32 %"23", ptr addrspace(1) %"49", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"12" to ptr
%"29" = load float, ptr %"30", align 4
%"11" = bitcast float %"29" to i32
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
%"46" = getelementptr inbounds i8, ptr %"31", i64 4
%"32" = load float, ptr %"46", align 4
%"13" = bitcast float %"32" to i32
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"34" = bitcast i32 %"16" to float
%"33" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"34")
store i32 %"33", ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"36" = bitcast i32 %"18" to float
%"35" = call i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float %"36")
store i32 %"35", ptr addrspace(5) %"7", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"37" = inttoptr i64 %"19" to ptr addrspace(1)
store i32 %"20", ptr addrspace(1) %"37", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"39" = inttoptr i64 %"21" to ptr addrspace(1)
%"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4
store i32 %"22", ptr addrspace(1) %"48", align 4
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"24":
define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%"20" = load i32, ptr %"21", align 4
store i32 %"20", ptr addrspace(5) %"6", align 4
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"14" = sext i32 %"15" to i64
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"19" = load i32, ptr %"20", align 4
store i32 %"19", ptr addrspace(5) %"6", align 4
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"13" = sext i32 %"14" to i64
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"21", align 8
ret void
}

View file

@ -1,50 +1,48 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"35":
define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
"34":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"29" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"29", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%0 = call i32 @llvm.smax.i32(i32 %"16", i32 0)
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"28" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"28", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i32, ptr addrspace(5) %"6", align 4
%0 = call i32 @llvm.smax.i32(i32 %"15", i32 0)
%1 = alloca i32, align 4, addrspace(5)
store i32 %0, ptr addrspace(5) %1, align 4
%"15" = load i32, ptr addrspace(5) %1, align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"14" = load i32, ptr addrspace(5) %1, align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
%2 = alloca i32, align 4, addrspace(5)
store i32 %"18", ptr addrspace(5) %2, align 4
%"30" = load i32, ptr addrspace(5) %2, align 4
store i32 %"30", ptr addrspace(5) %"7", align 4
%"20" = load i32, ptr addrspace(5) %"6", align 4
store i32 %"17", ptr addrspace(5) %2, align 4
%"29" = load i32, ptr addrspace(5) %2, align 4
store i32 %"29", ptr addrspace(5) %"7", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%3 = alloca i32, align 4, addrspace(5)
store i32 %"20", ptr addrspace(5) %3, align 4
%"31" = load i32, ptr addrspace(5) %3, align 4
store i32 %"31", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"32" = inttoptr i64 %"21" to ptr
store i32 %"22", ptr %"32", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"34" = inttoptr i64 %"23" to ptr
%"37" = getelementptr inbounds i8, ptr %"34", i64 4
store i32 %"24", ptr %"37", align 4
store i32 %"19", ptr addrspace(5) %3, align 4
%"30" = load i32, ptr addrspace(5) %3, align 4
store i32 %"30", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"31" = inttoptr i64 %"20" to ptr
store i32 %"21", ptr %"31", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"33" = inttoptr i64 %"22" to ptr
%"36" = getelementptr inbounds i8, ptr %"33", i64 4
store i32 %"23", ptr %"36", align 4
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"24":
define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i16, align 2, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load i16, ptr addrspace(1) %"20", align 2
store i16 %"12", ptr addrspace(5) %"6", align 2
%"15" = load i16, ptr addrspace(5) %"6", align 2
%"21" = sext i16 %"15" to i32
store i32 %"21", ptr addrspace(5) %"7", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i32, ptr addrspace(5) %"7", align 4
%"23" = inttoptr i64 %"16" to ptr
store i32 %"17", ptr %"23", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
%"11" = load i16, ptr addrspace(1) %"19", align 2
store i16 %"11", ptr addrspace(5) %"6", align 2
%"14" = load i16, ptr addrspace(5) %"6", align 2
%"20" = sext i16 %"14" to i32
store i32 %"20", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"7", align 4
%"22" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"22", align 4
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"27":
define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"26":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%0 = inttoptr i64 %"12" to ptr
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%0 = inttoptr i64 %"11" to ptr
%1 = addrspacecast ptr %0 to ptr addrspace(1)
%"21" = ptrtoint ptr addrspace(1) %1 to i64
store i64 %"21", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%2 = inttoptr i64 %"14" to ptr
%"20" = ptrtoint ptr addrspace(1) %1 to i64
store i64 %"20", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%2 = inttoptr i64 %"13" to ptr
%3 = addrspacecast ptr %2 to ptr addrspace(1)
%"23" = ptrtoint ptr addrspace(1) %3 to i64
store i64 %"23", ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"22" = ptrtoint ptr addrspace(1) %3 to i64
store i64 %"22", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"15" to ptr addrspace(1)
%"14" = load float, ptr addrspace(1) %"24", align 4
store float %"14", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load float, ptr addrspace(5) %"6", align 4
%"25" = inttoptr i64 %"16" to ptr addrspace(1)
%"15" = load float, ptr addrspace(1) %"25", align 4
store float %"15", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"5", align 8
%"18" = load float, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"17" to ptr addrspace(1)
store float %"18", ptr addrspace(1) %"26", align 4
store float %"17", ptr addrspace(1) %"25", align 4
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"28":
define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"27":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"25", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"30" = getelementptr inbounds i8, ptr %"26", i64 4
%"14" = load float, ptr %"30", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load float, ptr addrspace(5) %"6", align 4
%"18" = load float, ptr addrspace(5) %"7", align 4
%"16" = fdiv arcp afn float %"17", %"18"
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load float, ptr addrspace(5) %"6", align 4
%"27" = inttoptr i64 %"19" to ptr
store float %"20", ptr %"27", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"24", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
%"13" = load float, ptr %"29", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
%"15" = fdiv arcp afn float %"16", %"17"
store float %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load float, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"18" to ptr
store float %"19", ptr %"26", align 4
ret void
}

View file

@ -3,44 +3,42 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0
define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 {
"39":
define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
"38":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"31", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"16" to ptr
%"46" = getelementptr inbounds i8, ptr %"32", i64 4
%"15" = load i32, ptr %"46", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"18" to ptr
%"48" = getelementptr inbounds i8, ptr %"33", i64 8
%"17" = load i32, ptr %"48", align 4
store i32 %"17", ptr addrspace(5) %"8", align 4
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"22" = load i32, ptr addrspace(5) %"8", align 4
%"34" = call i32 @__zluda_ptx_impl__dp4a_s32_s32(i32 %"20", i32 %"21", i32 %"22")
store i32 %"34", ptr addrspace(5) %"6", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"6", align 4
%"38" = inttoptr i64 %"23" to ptr
store i32 %"24", ptr %"38", align 4
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"30", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
%"45" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load i32, ptr %"45", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"17" to ptr
%"47" = getelementptr inbounds i8, ptr %"32", i64 8
%"16" = load i32, ptr %"47", align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
%"21" = load i32, ptr addrspace(5) %"8", align 4
%"33" = call i32 @__zluda_ptx_impl__dp4a_s32_s32(i32 %"19", i32 %"20", i32 %"21")
store i32 %"33", ptr addrspace(5) %"6", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load i32, ptr addrspace(5) %"6", align 4
%"37" = inttoptr i64 %"22" to ptr
store i32 %"23", ptr %"37", align 4
ret void
}

View file

@ -1,69 +1,67 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 {
"57":
define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
"56":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"48", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"49" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"49", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = call afn float @llvm.exp2.f32(float %"14")
store float %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"6", align 4
%"50" = inttoptr i64 %"15" to ptr
store float %"16", ptr %"50", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"51" = inttoptr i64 %"18" to ptr
%"59" = getelementptr inbounds i8, ptr %"51", i64 4
%"17" = load float, ptr %"59", align 4
store float %"17", ptr addrspace(5) %"6", align 4
%"20" = load float, ptr addrspace(5) %"6", align 4
%"19" = call afn float @llvm.exp2.f32(float %"20")
store float %"19", ptr addrspace(5) %"6", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load float, ptr addrspace(5) %"6", align 4
%"52" = inttoptr i64 %"21" to ptr
%"61" = getelementptr inbounds i8, ptr %"52", i64 4
store float %"22", ptr %"61", align 4
%"24" = load i64, ptr addrspace(5) %"4", align 8
%"53" = inttoptr i64 %"24" to ptr
%"63" = getelementptr inbounds i8, ptr %"53", i64 8
%"23" = load float, ptr %"63", align 4
store float %"23", ptr addrspace(5) %"6", align 4
%"26" = load float, ptr addrspace(5) %"6", align 4
%"25" = call afn float @llvm.exp2.f32(float %"26")
store float %"25", ptr addrspace(5) %"6", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load float, ptr addrspace(5) %"6", align 4
%"54" = inttoptr i64 %"27" to ptr
%"65" = getelementptr inbounds i8, ptr %"54", i64 8
store float %"28", ptr %"65", align 4
%"30" = load i64, ptr addrspace(5) %"4", align 8
%"55" = inttoptr i64 %"30" to ptr
%"67" = getelementptr inbounds i8, ptr %"55", i64 12
%"29" = load float, ptr %"67", align 4
store float %"29", ptr addrspace(5) %"6", align 4
%"32" = load float, ptr addrspace(5) %"6", align 4
%"31" = call afn float @llvm.exp2.f32(float %"32")
store float %"31", ptr addrspace(5) %"6", align 4
%"33" = load i64, ptr addrspace(5) %"5", align 8
%"34" = load float, ptr addrspace(5) %"6", align 4
%"56" = inttoptr i64 %"33" to ptr
%"69" = getelementptr inbounds i8, ptr %"56", i64 12
store float %"34", ptr %"69", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"48" = inttoptr i64 %"11" to ptr
%"10" = load float, ptr %"48", align 4
store float %"10", ptr addrspace(5) %"6", align 4
%"13" = load float, ptr addrspace(5) %"6", align 4
%"12" = call afn float @llvm.exp2.f32(float %"13")
store float %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load float, ptr addrspace(5) %"6", align 4
%"49" = inttoptr i64 %"14" to ptr
store float %"15", ptr %"49", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"50" = inttoptr i64 %"17" to ptr
%"58" = getelementptr inbounds i8, ptr %"50", i64 4
%"16" = load float, ptr %"58", align 4
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load float, ptr addrspace(5) %"6", align 4
%"18" = call afn float @llvm.exp2.f32(float %"19")
store float %"18", ptr addrspace(5) %"6", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load float, ptr addrspace(5) %"6", align 4
%"51" = inttoptr i64 %"20" to ptr
%"60" = getelementptr inbounds i8, ptr %"51", i64 4
store float %"21", ptr %"60", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"52" = inttoptr i64 %"23" to ptr
%"62" = getelementptr inbounds i8, ptr %"52", i64 8
%"22" = load float, ptr %"62", align 4
store float %"22", ptr addrspace(5) %"6", align 4
%"25" = load float, ptr addrspace(5) %"6", align 4
%"24" = call afn float @llvm.exp2.f32(float %"25")
store float %"24", ptr addrspace(5) %"6", align 4
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load float, ptr addrspace(5) %"6", align 4
%"53" = inttoptr i64 %"26" to ptr
%"64" = getelementptr inbounds i8, ptr %"53", i64 8
store float %"27", ptr %"64", align 4
%"29" = load i64, ptr addrspace(5) %"4", align 8
%"54" = inttoptr i64 %"29" to ptr
%"66" = getelementptr inbounds i8, ptr %"54", i64 12
%"28" = load float, ptr %"66", align 4
store float %"28", ptr addrspace(5) %"6", align 4
%"31" = load float, ptr addrspace(5) %"6", align 4
%"30" = call afn float @llvm.exp2.f32(float %"31")
store float %"30", ptr addrspace(5) %"6", align 4
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load float, ptr addrspace(5) %"6", align 4
%"55" = inttoptr i64 %"32" to ptr
%"68" = getelementptr inbounds i8, ptr %"55", i64 12
store float %"33", ptr %"68", align 4
ret void
}

View file

@ -3,31 +3,29 @@ target triple = "amdgcn-amd-amdhsa"
@shared_mem = external hidden addrspace(3) global [0 x i32]
define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"24":
define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load i64, ptr addrspace(1) %"20", align 8
store i64 %"12", ptr addrspace(5) %"7", align 8
%"14" = load i64, ptr addrspace(5) %"7", align 8
store i64 %"14", ptr addrspace(3) @shared_mem, align 8
%"15" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"15", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"23" = inttoptr i64 %"16" to ptr addrspace(1)
store i64 %"17", ptr addrspace(1) %"23", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
%"11" = load i64, ptr addrspace(1) %"19", align 8
store i64 %"11", ptr addrspace(5) %"7", align 8
%"13" = load i64, ptr addrspace(5) %"7", align 8
store i64 %"13", ptr addrspace(3) @shared_mem, align 8
%"14" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"14", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"15" to ptr addrspace(1)
store i64 %"16", ptr addrspace(1) %"22", align 8
ret void
}

View file

@ -3,49 +3,45 @@ target triple = "amdgcn-amd-amdhsa"
@shared_mem = external hidden addrspace(3) global [0 x i32], align 4
define private void @"2"(ptr addrspace(3) %"37") #0 {
"35":
define private void @"2"(ptr addrspace(3) %"35") #0 {
"33":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"3" = alloca i64, align 8, addrspace(5)
%"14" = load i64, ptr addrspace(3) %"37", align 8
store i64 %"14", ptr addrspace(5) %"3", align 8
%"16" = load i64, ptr addrspace(5) %"3", align 8
%"15" = add i64 %"16", 2
store i64 %"15", ptr addrspace(5) %"3", align 8
%"17" = load i64, ptr addrspace(5) %"3", align 8
store i64 %"17", ptr addrspace(3) %"37", align 8
%"12" = load i64, ptr addrspace(3) %"35", align 8
store i64 %"12", ptr addrspace(5) %"3", align 8
%"14" = load i64, ptr addrspace(5) %"3", align 8
%"13" = add i64 %"14", 2
store i64 %"13", ptr addrspace(5) %"3", align 8
%"15" = load i64, ptr addrspace(5) %"3", align 8
store i64 %"15", ptr addrspace(3) %"35", align 8
ret void
}
define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
"36":
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"13" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"13", align 1
define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
"34":
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"18" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"18", ptr addrspace(5) %"7", align 8
%"19" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"19", ptr addrspace(5) %"8", align 8
%"21" = load i64, ptr addrspace(5) %"7", align 8
%"31" = inttoptr i64 %"21" to ptr addrspace(1)
%"20" = load i64, ptr addrspace(1) %"31", align 8
store i64 %"20", ptr addrspace(5) %"9", align 8
%"22" = load i64, ptr addrspace(5) %"9", align 8
store i64 %"22", ptr addrspace(3) @shared_mem, align 8
%"16" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"16", ptr addrspace(5) %"7", align 8
%"17" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"17", ptr addrspace(5) %"8", align 8
%"19" = load i64, ptr addrspace(5) %"7", align 8
%"29" = inttoptr i64 %"19" to ptr addrspace(1)
%"18" = load i64, ptr addrspace(1) %"29", align 8
store i64 %"18", ptr addrspace(5) %"9", align 8
%"20" = load i64, ptr addrspace(5) %"9", align 8
store i64 %"20", ptr addrspace(3) @shared_mem, align 8
call void @"2"(ptr addrspace(3) @shared_mem)
%"23" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"23", ptr addrspace(5) %"9", align 8
%"24" = load i64, ptr addrspace(5) %"8", align 8
%"25" = load i64, ptr addrspace(5) %"9", align 8
%"34" = inttoptr i64 %"24" to ptr addrspace(1)
store i64 %"25", ptr addrspace(1) %"34", align 8
%"21" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"21", ptr addrspace(5) %"9", align 8
%"22" = load i64, ptr addrspace(5) %"8", align 8
%"23" = load i64, ptr addrspace(5) %"9", align 8
%"32" = inttoptr i64 %"22" to ptr addrspace(1)
store i64 %"23", ptr addrspace(1) %"32", align 8
ret void
}

View file

@ -1,44 +1,42 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
"35":
define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
"34":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"8" = alloca float, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"14" to ptr
%"13" = load float, ptr %"31", align 4
store float %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"16" to ptr
%"37" = getelementptr inbounds i8, ptr %"32", i64 4
%"15" = load float, ptr %"37", align 4
store float %"15", ptr addrspace(5) %"7", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"18" to ptr
%"39" = getelementptr inbounds i8, ptr %"33", i64 8
%"17" = load float, ptr %"39", align 4
store float %"17", ptr addrspace(5) %"8", align 4
%"20" = load float, ptr addrspace(5) %"6", align 4
%"21" = load float, ptr addrspace(5) %"7", align 4
%"22" = load float, ptr addrspace(5) %"8", align 4
%"19" = call float @llvm.fma.f32(float %"20", float %"21", float %"22")
store float %"19", ptr addrspace(5) %"6", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load float, ptr addrspace(5) %"6", align 4
%"34" = inttoptr i64 %"23" to ptr
store float %"24", ptr %"34", align 4
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"30", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"15" to ptr
%"36" = getelementptr inbounds i8, ptr %"31", i64 4
%"14" = load float, ptr %"36", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"32" = inttoptr i64 %"17" to ptr
%"38" = getelementptr inbounds i8, ptr %"32", i64 8
%"16" = load float, ptr %"38", align 4
store float %"16", ptr addrspace(5) %"8", align 4
%"19" = load float, ptr addrspace(5) %"6", align 4
%"20" = load float, ptr addrspace(5) %"7", align 4
%"21" = load float, ptr addrspace(5) %"8", align 4
%"18" = call float @llvm.fma.f32(float %"19", float %"20", float %"21")
store float %"18", ptr addrspace(5) %"6", align 4
%"22" = load i64, ptr addrspace(5) %"5", align 8
%"23" = load float, ptr addrspace(5) %"6", align 4
%"33" = inttoptr i64 %"22" to ptr
store float %"23", ptr %"33", align 4
ret void
}

View file

@ -1,56 +1,52 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define private float @"1"(float %"17", float %"18") #0 {
"40":
define private float @"1"(float %"15", float %"16") #0 {
"38":
%"3" = alloca float, align 4, addrspace(5)
%"4" = alloca float, align 4, addrspace(5)
%"2" = alloca float, align 4, addrspace(5)
%"13" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"13", align 1
%"14" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"14", align 1
store float %"17", ptr addrspace(5) %"3", align 4
store float %"18", ptr addrspace(5) %"4", align 4
%"20" = load float, ptr addrspace(5) %"3", align 4
%"21" = load float, ptr addrspace(5) %"4", align 4
%"19" = fadd float %"20", %"21"
store float %"19", ptr addrspace(5) %"2", align 4
%"22" = load float, ptr addrspace(5) %"2", align 4
ret float %"22"
store float %"15", ptr addrspace(5) %"3", align 4
store float %"16", ptr addrspace(5) %"4", align 4
%"18" = load float, ptr addrspace(5) %"3", align 4
%"19" = load float, ptr addrspace(5) %"4", align 4
%"17" = fadd float %"18", %"19"
store float %"17", ptr addrspace(5) %"2", align 4
%"20" = load float, ptr addrspace(5) %"2", align 4
ret float %"20"
}
define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
"41":
%"15" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"15", align 1
%"16" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"16", align 1
define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
"39":
%"14" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"14", align 1
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i64, align 8, addrspace(5)
%"10" = alloca i64, align 8, addrspace(5)
%"11" = alloca i64, align 8, addrspace(5)
%"12" = alloca i64, align 8, addrspace(5)
%"23" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"23", ptr addrspace(5) %"8", align 8
%"24" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"24", ptr addrspace(5) %"9", align 8
%"26" = load i64, ptr addrspace(5) %"8", align 8
%"38" = inttoptr i64 %"26" to ptr
%"25" = load i64, ptr %"38", align 8
store i64 %"25", ptr addrspace(5) %"10", align 8
%"28" = load i64, ptr addrspace(5) %"10", align 8
%"27" = add i64 %"28", 1
store i64 %"27", ptr addrspace(5) %"11", align 8
%"21" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"21", ptr addrspace(5) %"8", align 8
%"22" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"22", ptr addrspace(5) %"9", align 8
%"24" = load i64, ptr addrspace(5) %"8", align 8
%"36" = inttoptr i64 %"24" to ptr
%"23" = load i64, ptr %"36", align 8
store i64 %"23", ptr addrspace(5) %"10", align 8
%"26" = load i64, ptr addrspace(5) %"10", align 8
%"25" = add i64 %"26", 1
store i64 %"25", ptr addrspace(5) %"11", align 8
store i64 ptrtoint (ptr @"1" to i64), ptr addrspace(5) %"12", align 8
%"31" = load i64, ptr addrspace(5) %"11", align 8
%"32" = load i64, ptr addrspace(5) %"12", align 8
%"30" = add i64 %"31", %"32"
store i64 %"30", ptr addrspace(5) %"11", align 8
%"33" = load i64, ptr addrspace(5) %"9", align 8
%"34" = load i64, ptr addrspace(5) %"11", align 8
%"39" = inttoptr i64 %"33" to ptr
store i64 %"34", ptr %"39", align 8
%"29" = load i64, ptr addrspace(5) %"11", align 8
%"30" = load i64, ptr addrspace(5) %"12", align 8
%"28" = add i64 %"29", %"30"
store i64 %"28", ptr addrspace(5) %"11", align 8
%"31" = load i64, ptr addrspace(5) %"9", align 8
%"32" = load i64, ptr addrspace(5) %"11", align 8
%"37" = inttoptr i64 %"31" to ptr
store i64 %"32", ptr %"37", align 8
ret void
}

View file

@ -4,66 +4,64 @@ target triple = "amdgcn-amd-amdhsa"
@foo = protected addrspace(1) externally_initialized global [4 x i32] [i32 2, i32 3, i32 5, i32 7]
@bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)]
define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 {
"58":
define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
"57":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"12" = load i64, ptr addrspace(4) %"48", align 8
store i64 %"12", ptr addrspace(5) %"7", align 8
%"11" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"11", ptr addrspace(5) %"7", align 8
%0 = alloca i32, align 4, addrspace(5)
store i32 1, ptr addrspace(5) %0, align 4
%"13" = load i32, ptr addrspace(5) %0, align 4
store i32 %"13", ptr addrspace(5) %"8", align 4
%"14" = load i64, ptr addrspace(1) @bar, align 8
store i64 %"14", ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"50" = inttoptr i64 %"16" to ptr
%"15" = load i32, ptr %"50", align 4
store i32 %"15", ptr addrspace(5) %"9", align 4
%"18" = load i32, ptr addrspace(5) %"8", align 4
%"19" = load i32, ptr addrspace(5) %"9", align 4
%"17" = mul i32 %"18", %"19"
store i32 %"17", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 8), align 8
store i64 %"20", ptr addrspace(5) %"6", align 8
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"52" = inttoptr i64 %"22" to ptr
%"21" = load i32, ptr %"52", align 4
store i32 %"21", ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"25" = load i32, ptr addrspace(5) %"9", align 4
%"23" = mul i32 %"24", %"25"
store i32 %"23", ptr addrspace(5) %"8", align 4
%"26" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 16), align 8
store i64 %"26", ptr addrspace(5) %"6", align 8
%"28" = load i64, ptr addrspace(5) %"6", align 8
%"54" = inttoptr i64 %"28" to ptr
%"27" = load i32, ptr %"54", align 4
store i32 %"27", ptr addrspace(5) %"9", align 4
%"30" = load i32, ptr addrspace(5) %"8", align 4
%"31" = load i32, ptr addrspace(5) %"9", align 4
%"29" = mul i32 %"30", %"31"
store i32 %"29", ptr addrspace(5) %"8", align 4
%"32" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 24), align 8
store i64 %"32", ptr addrspace(5) %"6", align 8
%"34" = load i64, ptr addrspace(5) %"6", align 8
%"56" = inttoptr i64 %"34" to ptr
%"33" = load i32, ptr %"56", align 4
store i32 %"33", ptr addrspace(5) %"9", align 4
%"36" = load i32, ptr addrspace(5) %"8", align 4
%"37" = load i32, ptr addrspace(5) %"9", align 4
%"35" = mul i32 %"36", %"37"
store i32 %"35", ptr addrspace(5) %"8", align 4
%"38" = load i64, ptr addrspace(5) %"7", align 8
%"39" = load i32, ptr addrspace(5) %"8", align 4
%"57" = inttoptr i64 %"38" to ptr
store i32 %"39", ptr %"57", align 4
%"12" = load i32, ptr addrspace(5) %0, align 4
store i32 %"12", ptr addrspace(5) %"8", align 4
%"13" = load i64, ptr addrspace(1) @bar, align 8
store i64 %"13", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"49" = inttoptr i64 %"15" to ptr
%"14" = load i32, ptr %"49", align 4
store i32 %"14", ptr addrspace(5) %"9", align 4
%"17" = load i32, ptr addrspace(5) %"8", align 4
%"18" = load i32, ptr addrspace(5) %"9", align 4
%"16" = mul i32 %"17", %"18"
store i32 %"16", ptr addrspace(5) %"8", align 4
%"19" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 8), align 8
store i64 %"19", ptr addrspace(5) %"6", align 8
%"21" = load i64, ptr addrspace(5) %"6", align 8
%"51" = inttoptr i64 %"21" to ptr
%"20" = load i32, ptr %"51", align 4
store i32 %"20", ptr addrspace(5) %"9", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"24" = load i32, ptr addrspace(5) %"9", align 4
%"22" = mul i32 %"23", %"24"
store i32 %"22", ptr addrspace(5) %"8", align 4
%"25" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 16), align 8
store i64 %"25", ptr addrspace(5) %"6", align 8
%"27" = load i64, ptr addrspace(5) %"6", align 8
%"53" = inttoptr i64 %"27" to ptr
%"26" = load i32, ptr %"53", align 4
store i32 %"26", ptr addrspace(5) %"9", align 4
%"29" = load i32, ptr addrspace(5) %"8", align 4
%"30" = load i32, ptr addrspace(5) %"9", align 4
%"28" = mul i32 %"29", %"30"
store i32 %"28", ptr addrspace(5) %"8", align 4
%"31" = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @bar, i64 24), align 8
store i64 %"31", ptr addrspace(5) %"6", align 8
%"33" = load i64, ptr addrspace(5) %"6", align 8
%"55" = inttoptr i64 %"33" to ptr
%"32" = load i32, ptr %"55", align 4
store i32 %"32", ptr addrspace(5) %"9", align 4
%"35" = load i32, ptr addrspace(5) %"8", align 4
%"36" = load i32, ptr addrspace(5) %"9", align 4
%"34" = mul i32 %"35", %"36"
store i32 %"34", ptr addrspace(5) %"8", align 4
%"37" = load i64, ptr addrspace(5) %"7", align 8
%"38" = load i32, ptr addrspace(5) %"8", align 4
%"56" = inttoptr i64 %"37" to ptr
store i32 %"38", ptr %"56", align 4
ret void
}

View file

@ -4,29 +4,27 @@ target triple = "amdgcn-amd-amdhsa"
@asdas = protected addrspace(1) externally_initialized global [4 x [2 x i32]] [[2 x i32] [i32 -1, i32 2], [2 x i32] [i32 3, i32 0], [2 x i32] zeroinitializer, [2 x i32] zeroinitializer]
@foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer]
define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"22":
define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"21":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%0 = alloca i64, align 8, addrspace(5)
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8
%"11" = load i64, ptr addrspace(5) %0, align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"12", ptr addrspace(5) %"7", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"10" = load i64, ptr addrspace(5) %0, align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"11" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"11", ptr addrspace(5) %"7", align 8
%"13" = load i64, ptr addrspace(5) %"6", align 8
%"19" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load i32, ptr addrspace(1) %"19", align 4
store i32 %"12", ptr addrspace(5) %"8", align 4
%"14" = load i64, ptr addrspace(5) %"7", align 8
%"15" = load i32, ptr addrspace(5) %"8", align 4
%"20" = inttoptr i64 %"14" to ptr addrspace(1)
%"13" = load i32, ptr addrspace(1) %"20", align 4
store i32 %"13", ptr addrspace(5) %"8", align 4
%"15" = load i64, ptr addrspace(5) %"7", align 8
%"16" = load i32, ptr addrspace(5) %"8", align 4
%"21" = inttoptr i64 %"15" to ptr addrspace(1)
store i32 %"16", ptr addrspace(1) %"21", align 4
store i32 %"15", ptr addrspace(1) %"20", align 4
ret void
}

View file

@ -3,41 +3,39 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0
define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
"40":
define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 {
"39":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"14" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"28", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"29", align 8
store i64 %"16", ptr addrspace(5) %"5", align 8
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"31" = inttoptr i64 %"18" to ptr
%"30" = load i32, ptr %"31", align 4
store i32 %"30", ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"32" = add i32 %"20", 1
store i32 %"32", ptr addrspace(5) %"7", align 4
%"12" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt()
store i64 %"15", ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"30" = inttoptr i64 %"17" to ptr
%"29" = load i32, ptr %"30", align 4
store i32 %"29", ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"31" = add i32 %"19", 1
store i32 %"31", ptr addrspace(5) %"7", align 4
%"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt()
%0 = alloca i32, align 4, addrspace(5)
store i32 %"12", ptr addrspace(5) %0, align 4
%"34" = load i32, ptr addrspace(5) %0, align 4
store i32 %"34", ptr addrspace(5) %"8", align 4
%"23" = load i32, ptr addrspace(5) %"7", align 4
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"35" = add i32 %"23", %"24"
store i32 %"35", ptr addrspace(5) %"7", align 4
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"7", align 4
%"38" = inttoptr i64 %"25" to ptr
store i32 %"26", ptr %"38", align 4
store i32 %"11", ptr addrspace(5) %0, align 4
%"33" = load i32, ptr addrspace(5) %0, align 4
store i32 %"33", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"34" = add i32 %"22", %"23"
store i32 %"34", ptr addrspace(5) %"7", align 4
%"24" = load i64, ptr addrspace(5) %"5", align 8
%"25" = load i32, ptr addrspace(5) %"7", align 4
%"37" = inttoptr i64 %"24" to ptr
store i32 %"25", ptr %"37", align 4
ret void
}

View file

@ -1,27 +1,25 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 {
"19":
define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
"18":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"16" = inttoptr i64 %"11" to ptr
%"10" = load i64, ptr %"16", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"6", align 8
%"17" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"17", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"18" = inttoptr i64 %"13" to ptr
store i64 %"14", ptr %"18", align 8
store i64 %"13", ptr %"17", align 8
ret void
}

View file

@ -1,35 +1,33 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"22":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%0 = alloca i64, align 8, addrspace(5)
store i64 81985529216486895, ptr addrspace(5) %0, align 8
%"11" = load i64, ptr addrspace(5) %0, align 8
%"10" = load i64, ptr addrspace(5) %0, align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
%"18" = load float, ptr addrspace(1) %"19", align 4
%"23" = bitcast float %"18" to i32
%"11" = zext i32 %"23" to i64
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
%"19" = load float, ptr addrspace(1) %"20", align 4
%"24" = bitcast float %"19" to i32
%"12" = zext i32 %"24" to i64
store i64 %"12", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"21" = inttoptr i64 %"14" to ptr addrspace(1)
%"26" = trunc i64 %"15" to i32
%"22" = bitcast i32 %"26" to float
store float %"22", ptr addrspace(1) %"21", align 4
%"25" = trunc i64 %"14" to i32
%"21" = bitcast i32 %"25" to float
store float %"21", ptr addrspace(1) %"20", align 4
ret void
}

View file

@ -1,38 +1,36 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
"30":
define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"29":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"26", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"25", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"14" to ptr
%"31" = getelementptr inbounds i8, ptr %"26", i64 4
%"13" = load i32, ptr %"31", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"7", align 4
%"27" = inttoptr i64 %"15" to ptr
%"32" = getelementptr inbounds i8, ptr %"27", i64 4
%"14" = load i32, ptr %"32", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i32, ptr addrspace(5) %"7", align 4
%"28" = inttoptr i64 %"16" to ptr
store i32 %"17", ptr %"28", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"29" = inttoptr i64 %"18" to ptr
%"34" = getelementptr inbounds i8, ptr %"29", i64 4
store i32 %"19", ptr %"34", align 4
store i32 %"16", ptr %"27", align 4
%"17" = load i64, ptr addrspace(5) %"5", align 8
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"28" = inttoptr i64 %"17" to ptr
%"33" = getelementptr inbounds i8, ptr %"28", i64 4
store i32 %"18", ptr %"33", align 4
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"19", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = call afn float @llvm.log2.f32(float %"14")
store float %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store float %"16", ptr %"20", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load float, ptr %"18", align 4
store float %"10", ptr addrspace(5) %"6", align 4
%"13" = load float, ptr addrspace(5) %"6", align 4
%"12" = call afn float @llvm.log2.f32(float %"13")
store float %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load float, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store float %"15", ptr %"19", align 4
ret void
}

View file

@ -1,28 +1,26 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 {
"19":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca [8 x i8], align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"15", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"17" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"17", align 8
store i64 %"11", ptr addrspace(5) %"7", align 8
%"13" = load i64, ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"7", align 8
%"18" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"18", align 8
store i64 %"12", ptr addrspace(5) %"7", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"7", align 8
%"19" = inttoptr i64 %"14" to ptr
store i64 %"15", ptr %"19", align 8
store i64 %"14", ptr %"18", align 8
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61", ptr addrspace(4) byref(i64) %"62") #0 {
"78":
define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 {
"77":
%"14" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"14", align 1
%"15" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"15", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@ -17,69 +15,69 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"61"
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i32, align 4, addrspace(5)
%"13" = alloca i32, align 4, addrspace(5)
%"15" = load i64, ptr addrspace(4) %"60", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"61", align 8
store i64 %"16", ptr addrspace(5) %"4", align 8
%"17" = load i64, ptr addrspace(4) %"62", align 8
store i64 %"17", ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"64" = inttoptr i64 %"19" to ptr
%"63" = load i32, ptr %"64", align 4
store i32 %"63", ptr addrspace(5) %"8", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"65" = inttoptr i64 %"21" to ptr
%"80" = getelementptr inbounds i8, ptr %"65", i64 4
%"66" = load i32, ptr %"80", align 4
store i32 %"66", ptr addrspace(5) %"9", align 4
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"67" = inttoptr i64 %"23" to ptr
%"82" = getelementptr inbounds i8, ptr %"67", i64 8
%"22" = load i32, ptr %"82", align 4
store i32 %"22", ptr addrspace(5) %"10", align 4
%"26" = load i32, ptr addrspace(5) %"8", align 4
%"27" = load i32, ptr addrspace(5) %"9", align 4
%"28" = load i32, ptr addrspace(5) %"10", align 4
%0 = sext i32 %"26" to i64
%1 = sext i32 %"27" to i64
store i64 %"16", ptr addrspace(5) %"5", align 8
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"63" = inttoptr i64 %"18" to ptr
%"62" = load i32, ptr %"63", align 4
store i32 %"62", ptr addrspace(5) %"8", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"64" = inttoptr i64 %"20" to ptr
%"79" = getelementptr inbounds i8, ptr %"64", i64 4
%"65" = load i32, ptr %"79", align 4
store i32 %"65", ptr addrspace(5) %"9", align 4
%"22" = load i64, ptr addrspace(5) %"4", align 8
%"66" = inttoptr i64 %"22" to ptr
%"81" = getelementptr inbounds i8, ptr %"66", i64 8
%"21" = load i32, ptr %"81", align 4
store i32 %"21", ptr addrspace(5) %"10", align 4
%"25" = load i32, ptr addrspace(5) %"8", align 4
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"27" = load i32, ptr addrspace(5) %"10", align 4
%0 = sext i32 %"25" to i64
%1 = sext i32 %"26" to i64
%2 = mul nsw i64 %0, %1
%3 = lshr i64 %2, 32
%4 = trunc i64 %3 to i32
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"28")
%"24" = extractvalue { i32, i1 } %5, 0
%"25" = extractvalue { i32, i1 } %5, 1
store i32 %"24", ptr addrspace(5) %"7", align 4
store i1 %"25", ptr addrspace(5) %"14", align 1
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27")
%"23" = extractvalue { i32, i1 } %5, 0
%"24" = extractvalue { i32, i1 } %5, 1
store i32 %"23", ptr addrspace(5) %"7", align 4
store i1 %"24", ptr addrspace(5) %"14", align 1
%6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2)
%"29" = extractvalue { i32, i1 } %6, 0
%"30" = extractvalue { i32, i1 } %6, 1
store i32 %"29", ptr addrspace(5) %"6", align 4
store i1 %"30", ptr addrspace(5) %"14", align 1
%"32" = load i1, ptr addrspace(5) %"14", align 1
%7 = zext i1 %"32" to i32
%"71" = add i32 0, %7
store i32 %"71", ptr addrspace(5) %"12", align 4
%"28" = extractvalue { i32, i1 } %6, 0
%"29" = extractvalue { i32, i1 } %6, 1
store i32 %"28", ptr addrspace(5) %"6", align 4
store i1 %"29", ptr addrspace(5) %"14", align 1
%"31" = load i1, ptr addrspace(5) %"14", align 1
%7 = zext i1 %"31" to i32
%"70" = add i32 0, %7
store i32 %"70", ptr addrspace(5) %"12", align 4
%8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1)
%"33" = extractvalue { i32, i1 } %8, 0
%"34" = extractvalue { i32, i1 } %8, 1
store i32 %"33", ptr addrspace(5) %"6", align 4
store i1 %"34", ptr addrspace(5) %"14", align 1
%"36" = load i1, ptr addrspace(5) %"14", align 1
%9 = zext i1 %"36" to i32
%"72" = add i32 0, %9
store i32 %"72", ptr addrspace(5) %"13", align 4
%"37" = load i64, ptr addrspace(5) %"5", align 8
%"38" = load i32, ptr addrspace(5) %"7", align 4
%"73" = inttoptr i64 %"37" to ptr
store i32 %"38", ptr %"73", align 4
%"39" = load i64, ptr addrspace(5) %"5", align 8
%"40" = load i32, ptr addrspace(5) %"12", align 4
%"74" = inttoptr i64 %"39" to ptr
%"84" = getelementptr inbounds i8, ptr %"74", i64 4
store i32 %"40", ptr %"84", align 4
%"41" = load i64, ptr addrspace(5) %"5", align 8
%"42" = load i32, ptr addrspace(5) %"13", align 4
%"76" = inttoptr i64 %"41" to ptr
%"86" = getelementptr inbounds i8, ptr %"76", i64 8
store i32 %"42", ptr %"86", align 4
%"32" = extractvalue { i32, i1 } %8, 0
%"33" = extractvalue { i32, i1 } %8, 1
store i32 %"32", ptr addrspace(5) %"6", align 4
store i1 %"33", ptr addrspace(5) %"14", align 1
%"35" = load i1, ptr addrspace(5) %"14", align 1
%9 = zext i1 %"35" to i32
%"71" = add i32 0, %9
store i32 %"71", ptr addrspace(5) %"13", align 4
%"36" = load i64, ptr addrspace(5) %"5", align 8
%"37" = load i32, ptr addrspace(5) %"7", align 4
%"72" = inttoptr i64 %"36" to ptr
store i32 %"37", ptr %"72", align 4
%"38" = load i64, ptr addrspace(5) %"5", align 8
%"39" = load i32, ptr addrspace(5) %"12", align 4
%"73" = inttoptr i64 %"38" to ptr
%"83" = getelementptr inbounds i8, ptr %"73", i64 4
store i32 %"39", ptr %"83", align 4
%"40" = load i64, ptr addrspace(5) %"5", align 8
%"41" = load i32, ptr addrspace(5) %"13", align 4
%"75" = inttoptr i64 %"40" to ptr
%"85" = getelementptr inbounds i8, ptr %"75", i64 8
store i32 %"41", ptr %"85", align 4
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 {
"76":
define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 {
"75":
%"13" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"13", align 1
%"14" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"14", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@ -16,67 +14,67 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"53",
%"10" = alloca i32, align 4, addrspace(5)
%"11" = alloca i32, align 4, addrspace(5)
%"12" = alloca i64, align 8, addrspace(5)
%"14" = load i64, ptr addrspace(4) %"52", align 8
store i64 %"14", ptr addrspace(5) %"4", align 8
%"15" = load i64, ptr addrspace(4) %"53", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"54", align 8
store i64 %"16", ptr addrspace(5) %"5", align 8
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"56" = inttoptr i64 %"18" to ptr
%"55" = load i32, ptr %"56", align 4
store i32 %"55", ptr addrspace(5) %"9", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"57" = inttoptr i64 %"20" to ptr
%"78" = getelementptr inbounds i8, ptr %"57", i64 4
%"58" = load i32, ptr %"78", align 4
store i32 %"58", ptr addrspace(5) %"10", align 4
%"22" = load i64, ptr addrspace(5) %"4", align 8
%"59" = inttoptr i64 %"22" to ptr
%"80" = getelementptr inbounds i8, ptr %"59", i64 8
%"21" = load i64, ptr %"80", align 8
store i64 %"21", ptr addrspace(5) %"12", align 8
%"24" = load i64, ptr addrspace(5) %"4", align 8
%"60" = inttoptr i64 %"24" to ptr
%"82" = getelementptr inbounds i8, ptr %"60", i64 16
%"61" = load i32, ptr %"82", align 4
store i32 %"61", ptr addrspace(5) %"11", align 4
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"27" = load i32, ptr addrspace(5) %"10", align 4
%"28" = load i32, ptr addrspace(5) %"11", align 4
%0 = mul i32 %"26", %"27"
%"25" = add i32 %0, %"28"
store i32 %"25", ptr addrspace(5) %"6", align 4
%"30" = load i32, ptr addrspace(5) %"9", align 4
%"31" = load i32, ptr addrspace(5) %"10", align 4
%"32" = load i32, ptr addrspace(5) %"11", align 4
%1 = sext i32 %"30" to i64
%2 = sext i32 %"31" to i64
store i64 %"15", ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"55" = inttoptr i64 %"17" to ptr
%"54" = load i32, ptr %"55", align 4
store i32 %"54", ptr addrspace(5) %"9", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"56" = inttoptr i64 %"19" to ptr
%"77" = getelementptr inbounds i8, ptr %"56", i64 4
%"57" = load i32, ptr %"77", align 4
store i32 %"57", ptr addrspace(5) %"10", align 4
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"58" = inttoptr i64 %"21" to ptr
%"79" = getelementptr inbounds i8, ptr %"58", i64 8
%"20" = load i64, ptr %"79", align 8
store i64 %"20", ptr addrspace(5) %"12", align 8
%"23" = load i64, ptr addrspace(5) %"4", align 8
%"59" = inttoptr i64 %"23" to ptr
%"81" = getelementptr inbounds i8, ptr %"59", i64 16
%"60" = load i32, ptr %"81", align 4
store i32 %"60", ptr addrspace(5) %"11", align 4
%"25" = load i32, ptr addrspace(5) %"9", align 4
%"26" = load i32, ptr addrspace(5) %"10", align 4
%"27" = load i32, ptr addrspace(5) %"11", align 4
%0 = mul i32 %"25", %"26"
%"24" = add i32 %0, %"27"
store i32 %"24", ptr addrspace(5) %"6", align 4
%"29" = load i32, ptr addrspace(5) %"9", align 4
%"30" = load i32, ptr addrspace(5) %"10", align 4
%"31" = load i32, ptr addrspace(5) %"11", align 4
%1 = sext i32 %"29" to i64
%2 = sext i32 %"30" to i64
%3 = mul nsw i64 %1, %2
%4 = lshr i64 %3, 32
%5 = trunc i64 %4 to i32
%"29" = add i32 %5, %"32"
store i32 %"29", ptr addrspace(5) %"7", align 4
%"34" = load i32, ptr addrspace(5) %"9", align 4
%"35" = load i32, ptr addrspace(5) %"10", align 4
%"36" = load i64, ptr addrspace(5) %"12", align 8
%6 = sext i32 %"34" to i64
%7 = sext i32 %"35" to i64
%"28" = add i32 %5, %"31"
store i32 %"28", ptr addrspace(5) %"7", align 4
%"33" = load i32, ptr addrspace(5) %"9", align 4
%"34" = load i32, ptr addrspace(5) %"10", align 4
%"35" = load i64, ptr addrspace(5) %"12", align 8
%6 = sext i32 %"33" to i64
%7 = sext i32 %"34" to i64
%8 = mul nsw i64 %6, %7
%"68" = add i64 %8, %"36"
store i64 %"68", ptr addrspace(5) %"8", align 8
%"37" = load i64, ptr addrspace(5) %"5", align 8
%"38" = load i32, ptr addrspace(5) %"6", align 4
%"72" = inttoptr i64 %"37" to ptr
store i32 %"38", ptr %"72", align 4
%"39" = load i64, ptr addrspace(5) %"5", align 8
%"40" = load i32, ptr addrspace(5) %"7", align 4
%"73" = inttoptr i64 %"39" to ptr
%"84" = getelementptr inbounds i8, ptr %"73", i64 8
store i32 %"40", ptr %"84", align 4
%"41" = load i64, ptr addrspace(5) %"5", align 8
%"42" = load i64, ptr addrspace(5) %"8", align 8
%"74" = inttoptr i64 %"41" to ptr
%"86" = getelementptr inbounds i8, ptr %"74", i64 16
store i64 %"42", ptr %"86", align 8
%"67" = add i64 %8, %"35"
store i64 %"67", ptr addrspace(5) %"8", align 8
%"36" = load i64, ptr addrspace(5) %"5", align 8
%"37" = load i32, ptr addrspace(5) %"6", align 4
%"71" = inttoptr i64 %"36" to ptr
store i32 %"37", ptr %"71", align 4
%"38" = load i64, ptr addrspace(5) %"5", align 8
%"39" = load i32, ptr addrspace(5) %"7", align 4
%"72" = inttoptr i64 %"38" to ptr
%"83" = getelementptr inbounds i8, ptr %"72", i64 8
store i32 %"39", ptr %"83", align 4
%"40" = load i64, ptr addrspace(5) %"5", align 8
%"41" = load i64, ptr addrspace(5) %"8", align 8
%"73" = inttoptr i64 %"40" to ptr
%"85" = getelementptr inbounds i8, ptr %"73", i64 16
store i64 %"41", ptr %"85", align 8
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
"55":
define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
"54":
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
@ -14,54 +12,54 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"41",
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"12" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"13", ptr addrspace(5) %"4", align 8
%"14" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"14", ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"16" to ptr
%"43" = load i32, ptr %"44", align 4
store i32 %"43", ptr addrspace(5) %"8", align 4
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"45" = inttoptr i64 %"18" to ptr
%"57" = getelementptr inbounds i8, ptr %"45", i64 4
%"46" = load i32, ptr %"57", align 4
store i32 %"46", ptr addrspace(5) %"9", align 4
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"47" = inttoptr i64 %"20" to ptr
%"59" = getelementptr inbounds i8, ptr %"47", i64 8
%"19" = load i32, ptr %"59", align 4
store i32 %"19", ptr addrspace(5) %"10", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
%"24" = load i32, ptr addrspace(5) %"9", align 4
%"25" = load i32, ptr addrspace(5) %"10", align 4
%0 = mul i32 %"23", %"24"
%1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"25")
%"21" = extractvalue { i32, i1 } %1, 0
%"22" = extractvalue { i32, i1 } %1, 1
store i32 %"21", ptr addrspace(5) %"6", align 4
store i1 %"22", ptr addrspace(5) %"11", align 1
%"27" = load i1, ptr addrspace(5) %"11", align 1
%"28" = load i32, ptr addrspace(5) %"8", align 4
%"29" = load i32, ptr addrspace(5) %"9", align 4
%2 = sext i32 %"28" to i64
%3 = sext i32 %"29" to i64
store i64 %"13", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"43" = inttoptr i64 %"15" to ptr
%"42" = load i32, ptr %"43", align 4
store i32 %"42", ptr addrspace(5) %"8", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"44" = inttoptr i64 %"17" to ptr
%"56" = getelementptr inbounds i8, ptr %"44", i64 4
%"45" = load i32, ptr %"56", align 4
store i32 %"45", ptr addrspace(5) %"9", align 4
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"46" = inttoptr i64 %"19" to ptr
%"58" = getelementptr inbounds i8, ptr %"46", i64 8
%"18" = load i32, ptr %"58", align 4
store i32 %"18", ptr addrspace(5) %"10", align 4
%"22" = load i32, ptr addrspace(5) %"8", align 4
%"23" = load i32, ptr addrspace(5) %"9", align 4
%"24" = load i32, ptr addrspace(5) %"10", align 4
%0 = mul i32 %"22", %"23"
%1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24")
%"20" = extractvalue { i32, i1 } %1, 0
%"21" = extractvalue { i32, i1 } %1, 1
store i32 %"20", ptr addrspace(5) %"6", align 4
store i1 %"21", ptr addrspace(5) %"11", align 1
%"26" = load i1, ptr addrspace(5) %"11", align 1
%"27" = load i32, ptr addrspace(5) %"8", align 4
%"28" = load i32, ptr addrspace(5) %"9", align 4
%2 = sext i32 %"27" to i64
%3 = sext i32 %"28" to i64
%4 = mul nsw i64 %2, %3
%5 = lshr i64 %4, 32
%6 = trunc i64 %5 to i32
%7 = zext i1 %"27" to i32
%7 = zext i1 %"26" to i32
%8 = add i32 %6, 3
%"26" = add i32 %8, %7
store i32 %"26", ptr addrspace(5) %"7", align 4
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i32, ptr addrspace(5) %"6", align 4
%"53" = inttoptr i64 %"30" to ptr
store i32 %"31", ptr %"53", align 4
%"32" = load i64, ptr addrspace(5) %"5", align 8
%"33" = load i32, ptr addrspace(5) %"7", align 4
%"54" = inttoptr i64 %"32" to ptr
%"61" = getelementptr inbounds i8, ptr %"54", i64 4
store i32 %"33", ptr %"61", align 4
%"25" = add i32 %8, %7
store i32 %"25", ptr addrspace(5) %"7", align 4
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"6", align 4
%"52" = inttoptr i64 %"29" to ptr
store i32 %"30", ptr %"52", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"7", align 4
%"53" = inttoptr i64 %"31" to ptr
%"60" = getelementptr inbounds i8, ptr %"53", i64 4
store i32 %"32", ptr %"60", align 4
ret void
}

View file

@ -1,73 +0,0 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @madc_cc2(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 {
"66":
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"12" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"12", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"10" = alloca i32, align 4, addrspace(5)
%"13" = load i64, ptr addrspace(4) %"53", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1)
%"14" = extractvalue { i32, i1 } %0, 0
%"15" = extractvalue { i32, i1 } %0, 1
store i32 %"14", ptr addrspace(5) %"6", align 4
store i1 %"15", ptr addrspace(5) %"11", align 1
%"18" = load i1, ptr addrspace(5) %"11", align 1
%1 = zext i1 %"18" to i32
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1)
%3 = extractvalue { i32, i1 } %2, 0
%4 = extractvalue { i32, i1 } %2, 1
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
%"54" = extractvalue { i32, i1 } %5, 0
%6 = extractvalue { i32, i1 } %5, 1
%"17" = xor i1 %4, %6
store i32 %"54", ptr addrspace(5) %"7", align 4
store i1 %"17", ptr addrspace(5) %"11", align 1
%"20" = load i1, ptr addrspace(5) %"11", align 1
%7 = zext i1 %"20" to i32
%"55" = add i32 0, %7
store i32 %"55", ptr addrspace(5) %"8", align 4
%"22" = load i1, ptr addrspace(5) %"11", align 1
%8 = zext i1 %"22" to i32
%"56" = add i32 0, %8
store i32 %"56", ptr addrspace(5) %"9", align 4
%"24" = load i1, ptr addrspace(5) %"12", align 1
%9 = zext i1 %"24" to i32
%"57" = sub i32 2, %9
store i32 %"57", ptr addrspace(5) %"10", align 4
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"7", align 4
%"58" = inttoptr i64 %"25" to ptr
store i32 %"26", ptr %"58", align 4
%"27" = load i64, ptr addrspace(5) %"5", align 8
%"28" = load i32, ptr addrspace(5) %"8", align 4
%"60" = inttoptr i64 %"27" to ptr
%"68" = getelementptr inbounds i8, ptr %"60", i64 4
store i32 %"28", ptr %"68", align 4
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load i32, ptr addrspace(5) %"9", align 4
%"62" = inttoptr i64 %"29" to ptr
%"70" = getelementptr inbounds i8, ptr %"62", i64 8
store i32 %"30", ptr %"70", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i32, ptr addrspace(5) %"10", align 4
%"64" = inttoptr i64 %"31" to ptr
%"72" = getelementptr inbounds i8, ptr %"64", i64 12
store i32 %"32", ptr %"72", align 4
ret void
}
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }

View file

@ -1,38 +0,0 @@
.version 6.5
.target sm_30
.address_size 64
.visible .entry madc_cc2(
.param .u64 input,
.param .u64 output
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .u32 unused;
.reg .b32 result_1;
.reg .b32 carry_out_1_1;
.reg .b32 carry_out_1_2;
.reg .b32 carry_out_1_3;
ld.param.u64 out_addr, [output];
// set carry=1
mad.lo.cc.u32 unused, 0, 0, 4294967295;
// overflow addition
madc.lo.cc.u32 result_1, 1, 1, 4294967295;
// write carry
madc.lo.u32 carry_out_1_1, 0, 0, 0;
// overflow is also detected by addc
addc.u32 carry_out_1_2, 0, 0;
// but not subc
subc.u32 carry_out_1_3, 2, 0;
st.s32 [out_addr], result_1;
st.s32 [out_addr+4], carry_out_1_1;
st.s32 [out_addr+8], carry_out_1_2;
st.s32 [out_addr+12], carry_out_1_3;
ret;
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"28":
define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"27":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"25", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"30" = getelementptr inbounds i8, ptr %"26", i64 4
%"14" = load i32, ptr %"30", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"16" = call i32 @llvm.smax.i32(i32 %"17", i32 %"18")
store i32 %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"27" = inttoptr i64 %"19" to ptr
store i32 %"20", ptr %"27", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"24", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
%"13" = load i32, ptr %"29", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
%"15" = call i32 @llvm.smax.i32(i32 %"16", i32 %"17")
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"18" to ptr
store i32 %"19", ptr %"26", align 4
ret void
}

View file

@ -1,28 +1,26 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 {
"20":
define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
"19":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"14", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"15", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"12" to ptr
%"17" = load i32, ptr %"18", align 4
store i32 %"17", ptr addrspace(5) %"6", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"17" = inttoptr i64 %"11" to ptr
%"16" = load i32, ptr %"17", align 4
store i32 %"16", ptr addrspace(5) %"6", align 4
fence seq_cst
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"13" to ptr
store i32 %"14", ptr %"19", align 4
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"13" = load i32, ptr addrspace(5) %"6", align 4
%"18" = inttoptr i64 %"12" to ptr
store i32 %"13", ptr %"18", align 4
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"28":
define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"27":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"25", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"30" = getelementptr inbounds i8, ptr %"26", i64 4
%"14" = load i32, ptr %"30", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"16" = call i32 @llvm.smin.i32(i32 %"17", i32 %"18")
store i32 %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"27" = inttoptr i64 %"19" to ptr
store i32 %"20", ptr %"27", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"24", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
%"13" = load i32, ptr %"29", align 4
store i32 %"13", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
%"15" = call i32 @llvm.smin.i32(i32 %"16", i32 %"17")
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"18" to ptr
store i32 %"19", ptr %"26", align 4
ret void
}

View file

@ -271,7 +271,11 @@ test_ptx!(const, [0u16], [10u16, 20, 30, 40]);
test_ptx!(cvt_s16_s8, [0x139231C2u32], [0xFFFFFFC2u32]);
test_ptx!(cvt_f64_f32, [0.125f32], [0.125f64]);
test_ptx!(cvt_f32_f16, [0xa1u16], [0x37210000u32]);
test_ptx!(prmt, [0x70c507d6u32, 0x6fbd4b5cu32], [0x6fbdd65cu32, 0x6FFFD600]);
test_ptx!(
prmt,
[0x70c507d6u32, 0x6fbd4b5cu32],
[0x6fbdd65cu32, 0x6FFFD600]
);
test_ptx!(
prmt_non_immediate,
[0x70c507d6u32, 0x6fbd4b5cu32],
@ -289,8 +293,11 @@ test_ptx!(
[65521u32, 2147549199, 0x1000],
[2147487519u32, 4294934539]
);
test_ptx!(madc_cc2, [0xDEADu32], [0u32, 1, 1, 2]);
test_ptx!(mad_hi_cc, [0x26223377u32, 0x70777766u32, 0x60666633u32], [0x71272866u32, 0u32, 1u32]); // Multi-tap :)
test_ptx!(
mad_hi_cc,
[0x26223377u32, 0x70777766u32, 0x60666633u32],
[0x71272866u32, 0u32, 1u32]
); // Multi-tap :)
test_ptx!(mov_vector_cast, [0x200000001u64], [2u32, 1u32]);
test_ptx!(
cvt_clamp,
@ -323,11 +330,13 @@ test_ptx!(
],
[4294967295u32, 0, 2]
);
test_ptx!(carry_mixed, [0xDEADu32], [1u32, 1u32]);
test_ptx!(
subc_cc2,
carry_set_all,
[0xDEADu32],
[0u32, 1, 0, 4294967295, 1, 4294967295, 1]
[
1u32, 0, 0, 1, 0, 1, 0, 0, 0u32, 4294967295, 4294967295, 0, 4294967295, 0, 4294967295,
4294967295
]
);
test_ptx!(vshr, [0x6f3650f4u32, 22, 0xc62d4586], [0xC62D4742u32]);
test_ptx!(bfind, [0u32, 1u32, 0x64eb0414], [u32::MAX, 0, 30]);
@ -337,7 +346,11 @@ test_ptx!(
[f16::from_f32(2.0), f16::from_f32(3.0)],
[f16::from_f32(2.0), f16::from_f32(5.0)]
);
test_ptx!(set_f16x2, [0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC], [0xffffu32, 0x3C000000]);
test_ptx!(
set_f16x2,
[0xc1690e6eu32, 0x13739444u32, 0x424834CC, 0x4248B4CC],
[0xffffu32, 0x3C000000]
);
test_ptx!(
dp4a,
[0xde3032f5u32, 0x2474fe15, 0xf51d8d6c],

View file

@ -1,33 +1,31 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"20", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"19", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%0 = alloca i64, align 8, addrspace(5)
store i64 %"15", ptr addrspace(5) %0, align 8
%"14" = load i64, ptr addrspace(5) %0, align 8
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"21", align 8
store i64 %"14", ptr addrspace(5) %0, align 8
%"13" = load i64, ptr addrspace(5) %0, align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"20" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"20", align 8
ret void
}

View file

@ -1,19 +1,17 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"9", ptr addrspace(4) byref(i64) %"10") #0 {
"12":
define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 {
"11":
%"6" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"6", align 1
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"4" = alloca [8 x i8], align 1, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"11" = ptrtoint ptr addrspace(5) %"4" to i64
%"10" = ptrtoint ptr addrspace(5) %"4" to i64
%0 = alloca i64, align 8, addrspace(5)
store i64 %"11", ptr addrspace(5) %0, align 8
%"8" = load i64, ptr addrspace(5) %0, align 8
store i64 %"8", ptr addrspace(5) %"5", align 8
store i64 %"10", ptr addrspace(5) %0, align 8
%"7" = load i64, ptr addrspace(5) %0, align 8
store i64 %"7", ptr addrspace(5) %"5", align 8
ret void
}

View file

@ -1,12 +1,10 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
"50":
define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
"49":
%"15" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"15", align 1
%"16" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"16", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
@ -16,51 +14,51 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
%"10" = alloca half, align 2, addrspace(5)
%"11" = alloca half, align 2, addrspace(5)
%"12" = alloca half, align 2, addrspace(5)
%"16" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"16", ptr addrspace(5) %"4", align 8
%"17" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"17", ptr addrspace(5) %"4", align 8
%"18" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"18", ptr addrspace(5) %"5", align 8
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"37" = inttoptr i64 %"20" to ptr
%"19" = load i64, ptr %"37", align 8
store i64 %"19", ptr addrspace(5) %"6", align 8
%"21" = load i64, ptr addrspace(5) %"6", align 8
store i64 %"17", ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"36" = inttoptr i64 %"19" to ptr
%"18" = load i64, ptr %"36", align 8
store i64 %"18", ptr addrspace(5) %"6", align 8
%"20" = load i64, ptr addrspace(5) %"6", align 8
%0 = alloca i64, align 8, addrspace(5)
store i64 %"21", ptr addrspace(5) %0, align 8
store i64 %"20", ptr addrspace(5) %0, align 8
%"13" = load i64, ptr addrspace(5) %0, align 8
%"39" = bitcast i64 %"13" to <2 x i32>
%"40" = extractelement <2 x i32> %"39", i32 0
%"41" = extractelement <2 x i32> %"39", i32 1
%"38" = bitcast i64 %"13" to <2 x i32>
%"39" = extractelement <2 x i32> %"38", i32 0
%"40" = extractelement <2 x i32> %"38", i32 1
%"21" = bitcast i32 %"39" to float
%"22" = bitcast i32 %"40" to float
%"23" = bitcast i32 %"41" to float
store float %"22", ptr addrspace(5) %"7", align 4
store float %"23", ptr addrspace(5) %"8", align 4
%"24" = load i64, ptr addrspace(5) %"6", align 8
store float %"21", ptr addrspace(5) %"7", align 4
store float %"22", ptr addrspace(5) %"8", align 4
%"23" = load i64, ptr addrspace(5) %"6", align 8
%1 = alloca i64, align 8, addrspace(5)
store i64 %"24", ptr addrspace(5) %1, align 8
store i64 %"23", ptr addrspace(5) %1, align 8
%"14" = load i64, ptr addrspace(5) %1, align 8
%"43" = bitcast i64 %"14" to <4 x i16>
%"44" = extractelement <4 x i16> %"43", i32 0
%"45" = extractelement <4 x i16> %"43", i32 1
%"46" = extractelement <4 x i16> %"43", i32 2
%"47" = extractelement <4 x i16> %"43", i32 3
%"42" = bitcast i64 %"14" to <4 x i16>
%"43" = extractelement <4 x i16> %"42", i32 0
%"44" = extractelement <4 x i16> %"42", i32 1
%"45" = extractelement <4 x i16> %"42", i32 2
%"46" = extractelement <4 x i16> %"42", i32 3
%"24" = bitcast i16 %"43" to half
%"25" = bitcast i16 %"44" to half
%"26" = bitcast i16 %"45" to half
%"27" = bitcast i16 %"46" to half
%"28" = bitcast i16 %"47" to half
store half %"25", ptr addrspace(5) %"9", align 2
store half %"26", ptr addrspace(5) %"10", align 2
store half %"27", ptr addrspace(5) %"11", align 2
store half %"28", ptr addrspace(5) %"12", align 2
%"29" = load i64, ptr addrspace(5) %"5", align 8
%"30" = load float, ptr addrspace(5) %"8", align 4
%"48" = inttoptr i64 %"29" to ptr
store float %"30", ptr %"48", align 4
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load float, ptr addrspace(5) %"7", align 4
%"49" = inttoptr i64 %"31" to ptr
%"52" = getelementptr inbounds i8, ptr %"49", i64 4
store float %"32", ptr %"52", align 4
store half %"24", ptr addrspace(5) %"9", align 2
store half %"25", ptr addrspace(5) %"10", align 2
store half %"26", ptr addrspace(5) %"11", align 2
store half %"27", ptr addrspace(5) %"12", align 2
%"28" = load i64, ptr addrspace(5) %"5", align 8
%"29" = load float, ptr addrspace(5) %"8", align 4
%"47" = inttoptr i64 %"28" to ptr
store float %"29", ptr %"47", align 4
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load float, ptr addrspace(5) %"7", align 4
%"48" = inttoptr i64 %"30" to ptr
%"51" = getelementptr inbounds i8, ptr %"48", i64 4
store float %"31", ptr %"51", align 4
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"28":
define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"27":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"25", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"30" = getelementptr inbounds i8, ptr %"26", i64 4
%"14" = load float, ptr %"30", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load float, ptr addrspace(5) %"6", align 4
%"18" = load float, ptr addrspace(5) %"7", align 4
%"16" = fmul float %"17", %"18"
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load float, ptr addrspace(5) %"6", align 4
%"27" = inttoptr i64 %"19" to ptr
store float %"20", ptr %"27", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"24", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
%"13" = load float, ptr %"29", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
%"15" = fmul float %"16", %"17"
store float %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load float, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"18" to ptr
store float %"19", ptr %"26", align 4
ret void
}

View file

@ -3,31 +3,29 @@ target triple = "amdgcn-amd-amdhsa"
declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0
define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #1 {
"23":
define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"21", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = call i64 @__zluda_ptx_impl__mul_hi_u64(i64 %"15", i64 2)
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"13" = call i64 @__zluda_ptx_impl__mul_hi_u64(i64 %"14", i64 2)
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"21", align 8
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"23":
define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"21", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = mul i64 %"15", 2
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"13" = mul i64 %"14", 2
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"21", align 8
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"28":
define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"27":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"7" = alloca float, align 4, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load float, ptr %"25", align 4
store float %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"30" = getelementptr inbounds i8, ptr %"26", i64 4
%"14" = load float, ptr %"30", align 4
store float %"14", ptr addrspace(5) %"7", align 4
%"17" = load float, ptr addrspace(5) %"6", align 4
%"18" = load float, ptr addrspace(5) %"7", align 4
%"16" = fmul float %"17", %"18"
store float %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load float, ptr addrspace(5) %"6", align 4
%"27" = inttoptr i64 %"19" to ptr
store float %"20", ptr %"27", align 4
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"24", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
%"13" = load float, ptr %"29", align 4
store float %"13", ptr addrspace(5) %"7", align 4
%"16" = load float, ptr addrspace(5) %"6", align 4
%"17" = load float, ptr addrspace(5) %"7", align 4
%"15" = fmul float %"16", %"17"
store float %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load float, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"18" to ptr
store float %"19", ptr %"26", align 4
ret void
}

View file

@ -1,40 +1,38 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
"30":
define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"29":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"14" to ptr addrspace(1)
%"13" = load i32, ptr addrspace(1) %"26", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"27" = inttoptr i64 %"16" to ptr addrspace(1)
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"27", i64 4
%"15" = load i32, ptr addrspace(1) %"32", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
%0 = sext i32 %"18" to i64
%1 = sext i32 %"19" to i64
%"17" = mul nsw i64 %0, %1
store i64 %"17", ptr addrspace(5) %"8", align 8
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i64, ptr addrspace(5) %"8", align 8
%"28" = inttoptr i64 %"20" to ptr
store i64 %"21", ptr %"28", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr addrspace(1)
%"12" = load i32, ptr addrspace(1) %"25", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr addrspace(1)
%"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4
%"14" = load i32, ptr addrspace(1) %"31", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%0 = sext i32 %"17" to i64
%1 = sext i32 %"18" to i64
%"16" = mul nsw i64 %0, %1
store i64 %"16", ptr addrspace(5) %"8", align 8
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i64, ptr addrspace(5) %"8", align 8
%"27" = inttoptr i64 %"19" to ptr
store i64 %"20", ptr %"27", align 8
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
"23":
define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"22":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"20", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"21" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"21", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"14" = add i64 %"15", 1
store i64 %"14", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"22", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"20", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"13" = add i64 %"14", 1
store i64 %"13", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"21" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"21", align 8
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"19", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"13" = sub i32 0, %"14"
store i32 %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"20", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load i32, ptr %"18", align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"13" = load i32, ptr addrspace(5) %"6", align 4
%"12" = sub i32 0, %"13"
store i32 %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store i32 %"15", ptr %"19", align 4
ret void
}

View file

@ -1,36 +1,34 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"27":
define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"26":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr addrspace(1)
%"29" = getelementptr inbounds i8, ptr addrspace(1) %"25", i64 8
%"8" = load <2 x i32>, ptr addrspace(1) %"29", align 8
%"14" = extractelement <2 x i32> %"8", i32 0
%"15" = extractelement <2 x i32> %"8", i32 1
store i32 %"14", ptr addrspace(5) %"6", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"17" = load i32, ptr addrspace(5) %"6", align 4
%"18" = load i32, ptr addrspace(5) %"7", align 4
%"16" = add i32 %"17", %"18"
store i32 %"16", ptr addrspace(5) %"6", align 4
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i32, ptr addrspace(5) %"6", align 4
%"26" = inttoptr i64 %"19" to ptr addrspace(1)
store i32 %"20", ptr addrspace(1) %"26", align 4
store i64 %"11", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr addrspace(1)
%"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8
%"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8
%"13" = extractelement <2 x i32> %"8", i32 0
%"14" = extractelement <2 x i32> %"8", i32 1
store i32 %"13", ptr addrspace(5) %"6", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"17" = load i32, ptr addrspace(5) %"7", align 4
%"15" = add i32 %"16", %"17"
store i32 %"15", ptr addrspace(5) %"6", align 4
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"25" = inttoptr i64 %"18" to ptr addrspace(1)
store i32 %"19", ptr addrspace(1) %"25", align 4
ret void
}

View file

@ -1,31 +1,29 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
"24":
define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"23":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"19", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"20" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"20", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"21" = xor i64 %"15", -1
store i64 %"21", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"23" = inttoptr i64 %"16" to ptr
store i64 %"17", ptr %"23", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"19", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"6", align 8
%"20" = xor i64 %"14", -1
store i64 %"20", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"22" = inttoptr i64 %"15" to ptr
store i64 %"16", ptr %"22", align 8
ret void
}

View file

@ -3,37 +3,35 @@ target triple = "amdgcn-amd-amdhsa"
declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0
define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #1 {
"30":
define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 {
"29":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"15" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"16", ptr addrspace(5) %"4", align 8
%"17" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"17", ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"28" = inttoptr i64 %"19" to ptr
%"18" = load i32, ptr %"28", align 4
store i32 %"18", ptr addrspace(5) %"6", align 4
%"12" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0)
store i64 %"16", ptr addrspace(5) %"5", align 8
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"27" = inttoptr i64 %"18" to ptr
%"17" = load i32, ptr %"27", align 4
store i32 %"17", ptr addrspace(5) %"6", align 4
%"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0)
%0 = alloca i32, align 4, addrspace(5)
store i32 %"12", ptr addrspace(5) %0, align 4
%"20" = load i32, ptr addrspace(5) %0, align 4
store i32 %"20", ptr addrspace(5) %"7", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%"23" = load i32, ptr addrspace(5) %"7", align 4
%"21" = add i32 %"22", %"23"
store i32 %"21", ptr addrspace(5) %"6", align 4
%"24" = load i64, ptr addrspace(5) %"5", align 8
%"25" = load i32, ptr addrspace(5) %"6", align 4
%"29" = inttoptr i64 %"24" to ptr
store i32 %"25", ptr %"29", align 4
store i32 %"11", ptr addrspace(5) %0, align 4
%"19" = load i32, ptr addrspace(5) %0, align 4
store i32 %"19", ptr addrspace(5) %"7", align 4
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"20" = add i32 %"21", %"22"
store i32 %"20", ptr addrspace(5) %"6", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"6", align 4
%"28" = inttoptr i64 %"23" to ptr
store i32 %"24", ptr %"28", align 4
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"31":
define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"30":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"13" to ptr
%"12" = load i64, ptr %"25", align 8
store i64 %"12", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"15" to ptr
%"33" = getelementptr inbounds i8, ptr %"26", i64 8
%"14" = load i64, ptr %"33", align 8
store i64 %"14", ptr addrspace(5) %"7", align 8
%"17" = load i64, ptr addrspace(5) %"6", align 8
%"18" = load i64, ptr addrspace(5) %"7", align 8
%"27" = or i64 %"17", %"18"
store i64 %"27", ptr addrspace(5) %"6", align 8
%"19" = load i64, ptr addrspace(5) %"5", align 8
%"20" = load i64, ptr addrspace(5) %"6", align 8
%"30" = inttoptr i64 %"19" to ptr
store i64 %"20", ptr %"30", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"24" = inttoptr i64 %"12" to ptr
%"11" = load i64, ptr %"24", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"14" to ptr
%"32" = getelementptr inbounds i8, ptr %"25", i64 8
%"13" = load i64, ptr %"32", align 8
store i64 %"13", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"26" = or i64 %"16", %"17"
store i64 %"26", ptr addrspace(5) %"6", align 8
%"18" = load i64, ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"6", align 8
%"29" = inttoptr i64 %"18" to ptr
store i64 %"19", ptr %"29", align 8
ret void
}

View file

@ -1,39 +1,37 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
"29":
define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 {
"28":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"25" = ptrtoint ptr addrspace(4) %"22" to i64
%"24" = ptrtoint ptr addrspace(4) %"21" to i64
%0 = alloca i64, align 8, addrspace(5)
store i64 %"25", ptr addrspace(5) %0, align 8
%"24" = load i64, ptr addrspace(5) %0, align 8
store i64 %"24", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"26" = inttoptr i64 %"13" to ptr addrspace(4)
%"12" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"14", ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"5", align 8
%"27" = inttoptr i64 %"16" to ptr
%"15" = load i64, ptr %"27", align 8
store i64 %"15", ptr addrspace(5) %"7", align 8
%"18" = load i64, ptr addrspace(5) %"7", align 8
%"17" = add i64 %"18", 1
store i64 %"17", ptr addrspace(5) %"8", align 8
%"19" = load i64, ptr addrspace(5) %"6", align 8
%"20" = load i64, ptr addrspace(5) %"8", align 8
%"28" = inttoptr i64 %"19" to ptr
store i64 %"20", ptr %"28", align 8
store i64 %"24", ptr addrspace(5) %0, align 8
%"23" = load i64, ptr addrspace(5) %0, align 8
store i64 %"23", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"25" = inttoptr i64 %"12" to ptr addrspace(4)
%"11" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(4) %"22", align 8
store i64 %"13", ptr addrspace(5) %"6", align 8
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"26" = inttoptr i64 %"15" to ptr
%"14" = load i64, ptr %"26", align 8
store i64 %"14", ptr addrspace(5) %"7", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"16" = add i64 %"17", 1
store i64 %"16", ptr addrspace(5) %"8", align 8
%"18" = load i64, ptr addrspace(5) %"6", align 8
%"19" = load i64, ptr addrspace(5) %"8", align 8
%"27" = inttoptr i64 %"18" to ptr
store i64 %"19", ptr %"27", align 8
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load i32, ptr %"19", align 4
store i32 %"11", ptr addrspace(5) %"6", align 4
%"14" = load i32, ptr addrspace(5) %"6", align 4
%"13" = call i32 @llvm.ctpop.i32(i32 %"14")
store i32 %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load i32, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store i32 %"16", ptr %"20", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load i32, ptr %"18", align 4
store i32 %"10", ptr addrspace(5) %"6", align 4
%"13" = load i32, ptr addrspace(5) %"6", align 4
%"12" = call i32 @llvm.ctpop.i32(i32 %"13")
store i32 %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load i32, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store i32 %"15", ptr %"19", align 4
ret void
}

View file

@ -1,64 +1,62 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
"42":
define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
"41":
%"14" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"14", align 1
%"15" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"15", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"8" = alloca i64, align 8, addrspace(5)
%"9" = alloca i1, align 1, addrspace(5)
%"15" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"15", ptr addrspace(5) %"4", align 8
%"16" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"16", ptr addrspace(5) %"4", align 8
%"17" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"17", ptr addrspace(5) %"5", align 8
%"19" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"19" to ptr
%"18" = load i64, ptr %"39", align 8
store i64 %"18", ptr addrspace(5) %"6", align 8
%"21" = load i64, ptr addrspace(5) %"4", align 8
%"40" = inttoptr i64 %"21" to ptr
%"44" = getelementptr inbounds i8, ptr %"40", i64 8
%"20" = load i64, ptr %"44", align 8
store i64 %"20", ptr addrspace(5) %"7", align 8
%"23" = load i64, ptr addrspace(5) %"6", align 8
%"24" = load i64, ptr addrspace(5) %"7", align 8
%"22" = icmp ult i64 %"23", %"24"
store i1 %"22", ptr addrspace(5) %"9", align 1
store i64 %"16", ptr addrspace(5) %"5", align 8
%"18" = load i64, ptr addrspace(5) %"4", align 8
%"38" = inttoptr i64 %"18" to ptr
%"17" = load i64, ptr %"38", align 8
store i64 %"17", ptr addrspace(5) %"6", align 8
%"20" = load i64, ptr addrspace(5) %"4", align 8
%"39" = inttoptr i64 %"20" to ptr
%"43" = getelementptr inbounds i8, ptr %"39", i64 8
%"19" = load i64, ptr %"43", align 8
store i64 %"19", ptr addrspace(5) %"7", align 8
%"22" = load i64, ptr addrspace(5) %"6", align 8
%"23" = load i64, ptr addrspace(5) %"7", align 8
%"21" = icmp ult i64 %"22", %"23"
store i1 %"21", ptr addrspace(5) %"9", align 1
%"25" = load i1, ptr addrspace(5) %"9", align 1
%"24" = xor i1 %"25", true
store i1 %"24", ptr addrspace(5) %"9", align 1
%"26" = load i1, ptr addrspace(5) %"9", align 1
%"25" = xor i1 %"26", true
store i1 %"25", ptr addrspace(5) %"9", align 1
%"27" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"27", label %"10", label %"11"
br i1 %"26", label %"10", label %"11"
"10": ; preds = %"42"
"10": ; preds = %"41"
%0 = alloca i64, align 8, addrspace(5)
store i64 1, ptr addrspace(5) %0, align 8
%"28" = load i64, ptr addrspace(5) %0, align 8
store i64 %"28", ptr addrspace(5) %"8", align 8
%"27" = load i64, ptr addrspace(5) %0, align 8
store i64 %"27", ptr addrspace(5) %"8", align 8
br label %"11"
"11": ; preds = %"10", %"42"
%"29" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"29", label %"13", label %"12"
"11": ; preds = %"10", %"41"
%"28" = load i1, ptr addrspace(5) %"9", align 1
br i1 %"28", label %"13", label %"12"
"12": ; preds = %"11"
%1 = alloca i64, align 8, addrspace(5)
store i64 2, ptr addrspace(5) %1, align 8
%"30" = load i64, ptr addrspace(5) %1, align 8
store i64 %"30", ptr addrspace(5) %"8", align 8
%"29" = load i64, ptr addrspace(5) %1, align 8
store i64 %"29", ptr addrspace(5) %"8", align 8
br label %"13"
"13": ; preds = %"12", %"11"
%"31" = load i64, ptr addrspace(5) %"5", align 8
%"32" = load i64, ptr addrspace(5) %"8", align 8
%"41" = inttoptr i64 %"31" to ptr
store i64 %"32", ptr %"41", align 8
%"30" = load i64, ptr addrspace(5) %"5", align 8
%"31" = load i64, ptr addrspace(5) %"8", align 8
%"40" = inttoptr i64 %"30" to ptr
store i64 %"31", ptr %"40", align 8
ret void
}

View file

@ -1,42 +1,40 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
"44":
define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
"43":
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"11" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"11", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"9" = alloca i32, align 4, addrspace(5)
%"11" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"12", ptr addrspace(5) %"4", align 8
%"13" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"13", ptr addrspace(5) %"5", align 8
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"15" to ptr
%"14" = load i32, ptr %"34", align 4
store i32 %"14", ptr addrspace(5) %"6", align 4
%"17" = load i64, ptr addrspace(5) %"4", align 8
%"35" = inttoptr i64 %"17" to ptr
%"46" = getelementptr inbounds i8, ptr %"35", i64 4
%"16" = load i32, ptr %"46", align 4
store i32 %"16", ptr addrspace(5) %"7", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
%0 = bitcast i32 %"19" to <4 x i8>
%1 = bitcast i32 %"20" to <4 x i8>
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"33" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"33", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"34" = inttoptr i64 %"16" to ptr
%"45" = getelementptr inbounds i8, ptr %"34", i64 4
%"15" = load i32, ptr %"45", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
%0 = bitcast i32 %"18" to <4 x i8>
%1 = bitcast i32 %"19" to <4 x i8>
%2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
%"36" = bitcast <4 x i8> %2 to i32
store i32 %"36", ptr addrspace(5) %"8", align 4
%"22" = load i32, ptr addrspace(5) %"6", align 4
%"23" = load i32, ptr addrspace(5) %"7", align 4
%3 = bitcast i32 %"22" to <4 x i8>
%4 = bitcast i32 %"23" to <4 x i8>
%"35" = bitcast <4 x i8> %2 to i32
store i32 %"35", ptr addrspace(5) %"8", align 4
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%3 = bitcast i32 %"21" to <4 x i8>
%4 = bitcast i32 %"22" to <4 x i8>
%5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
%6 = extractelement <4 x i8> %5, i32 0
%7 = ashr i8 %6, 7
@ -44,17 +42,17 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"32", ptr
%9 = extractelement <4 x i8> %8, i32 2
%10 = ashr i8 %9, 7
%11 = insertelement <4 x i8> %8, i8 %10, i32 2
%"39" = bitcast <4 x i8> %11 to i32
store i32 %"39", ptr addrspace(5) %"9", align 4
%"24" = load i64, ptr addrspace(5) %"5", align 8
%"25" = load i32, ptr addrspace(5) %"8", align 4
%"42" = inttoptr i64 %"24" to ptr
store i32 %"25", ptr %"42", align 4
%"26" = load i64, ptr addrspace(5) %"5", align 8
%"27" = load i32, ptr addrspace(5) %"9", align 4
%"43" = inttoptr i64 %"26" to ptr
%"48" = getelementptr inbounds i8, ptr %"43", i64 4
store i32 %"27", ptr %"48", align 4
%"38" = bitcast <4 x i8> %11 to i32
store i32 %"38", ptr addrspace(5) %"9", align 4
%"23" = load i64, ptr addrspace(5) %"5", align 8
%"24" = load i32, ptr addrspace(5) %"8", align 4
%"41" = inttoptr i64 %"23" to ptr
store i32 %"24", ptr %"41", align 4
%"25" = load i64, ptr addrspace(5) %"5", align 8
%"26" = load i32, ptr addrspace(5) %"9", align 4
%"42" = inttoptr i64 %"25" to ptr
%"47" = getelementptr inbounds i8, ptr %"42", i64 4
store i32 %"26", ptr %"47", align 4
ret void
}

View file

@ -1,45 +1,43 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
"34":
define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
"33":
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"10" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"10", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i32, align 4, addrspace(5)
%"7" = alloca i32, align 4, addrspace(5)
%"8" = alloca i32, align 4, addrspace(5)
%"10" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"10", ptr addrspace(5) %"4", align 8
%"11" = load i64, ptr addrspace(4) %"26", align 8
store i64 %"11", ptr addrspace(5) %"4", align 8
%"12" = load i64, ptr addrspace(4) %"27", align 8
store i64 %"12", ptr addrspace(5) %"5", align 8
%"14" = load i64, ptr addrspace(5) %"4", align 8
%"28" = inttoptr i64 %"14" to ptr
%"13" = load i32, ptr %"28", align 4
store i32 %"13", ptr addrspace(5) %"6", align 4
%"16" = load i64, ptr addrspace(5) %"4", align 8
%"29" = inttoptr i64 %"16" to ptr
%"36" = getelementptr inbounds i8, ptr %"29", i64 4
%"15" = load i32, ptr %"36", align 4
store i32 %"15", ptr addrspace(5) %"7", align 4
store i64 %"11", ptr addrspace(5) %"5", align 8
%"13" = load i64, ptr addrspace(5) %"4", align 8
%"27" = inttoptr i64 %"13" to ptr
%"12" = load i32, ptr %"27", align 4
store i32 %"12", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"4", align 8
%"28" = inttoptr i64 %"15" to ptr
%"35" = getelementptr inbounds i8, ptr %"28", i64 4
%"14" = load i32, ptr %"35", align 4
store i32 %"14", ptr addrspace(5) %"7", align 4
%0 = alloca i32, align 4, addrspace(5)
store i32 64, ptr addrspace(5) %0, align 4
%"17" = load i32, ptr addrspace(5) %0, align 4
store i32 %"17", ptr addrspace(5) %"8", align 4
%"19" = load i32, ptr addrspace(5) %"6", align 4
%"20" = load i32, ptr addrspace(5) %"7", align 4
%1 = bitcast i32 %"19" to <4 x i8>
%2 = bitcast i32 %"20" to <4 x i8>
%"16" = load i32, ptr addrspace(5) %0, align 4
store i32 %"16", ptr addrspace(5) %"8", align 4
%"18" = load i32, ptr addrspace(5) %"6", align 4
%"19" = load i32, ptr addrspace(5) %"7", align 4
%1 = bitcast i32 %"18" to <4 x i8>
%2 = bitcast i32 %"19" to <4 x i8>
%3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
%"30" = bitcast <4 x i8> %3 to i32
store i32 %"30", ptr addrspace(5) %"7", align 4
%"21" = load i64, ptr addrspace(5) %"5", align 8
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"33" = inttoptr i64 %"21" to ptr
store i32 %"22", ptr %"33", align 4
%"29" = bitcast <4 x i8> %3 to i32
store i32 %"29", ptr addrspace(5) %"7", align 4
%"20" = load i64, ptr addrspace(5) %"5", align 8
%"21" = load i32, ptr addrspace(5) %"7", align 4
%"32" = inttoptr i64 %"20" to ptr
store i32 %"21", ptr %"32", align 4
ret void
}

View file

@ -1,30 +1,28 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
"21":
define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
"20":
%"7" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"7", align 1
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"4" = alloca i64, align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca float, align 4, addrspace(5)
%"8" = load i64, ptr addrspace(4) %"16", align 8
store i64 %"8", ptr addrspace(5) %"4", align 8
%"9" = load i64, ptr addrspace(4) %"17", align 8
store i64 %"9", ptr addrspace(5) %"4", align 8
%"10" = load i64, ptr addrspace(4) %"18", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"12" = load i64, ptr addrspace(5) %"4", align 8
%"19" = inttoptr i64 %"12" to ptr
%"11" = load float, ptr %"19", align 4
store float %"11", ptr addrspace(5) %"6", align 4
%"14" = load float, ptr addrspace(5) %"6", align 4
%"13" = fdiv arcp afn float 1.000000e+00, %"14"
store float %"13", ptr addrspace(5) %"6", align 4
%"15" = load i64, ptr addrspace(5) %"5", align 8
%"16" = load float, ptr addrspace(5) %"6", align 4
%"20" = inttoptr i64 %"15" to ptr
store float %"16", ptr %"20", align 4
store i64 %"9", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(5) %"4", align 8
%"18" = inttoptr i64 %"11" to ptr
%"10" = load float, ptr %"18", align 4
store float %"10", ptr addrspace(5) %"6", align 4
%"13" = load float, ptr addrspace(5) %"6", align 4
%"12" = fdiv arcp afn float 1.000000e+00, %"13"
store float %"12", ptr addrspace(5) %"6", align 4
%"14" = load i64, ptr addrspace(5) %"5", align 8
%"15" = load float, ptr addrspace(5) %"6", align 4
%"19" = inttoptr i64 %"14" to ptr
store float %"15", ptr %"19", align 4
ret void
}

View file

@ -1,37 +1,35 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
"34":
define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
"33":
%"8" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"8", align 1
%"9" = alloca i1, align 1, addrspace(5)
store i1 false, ptr addrspace(5) %"9", align 1
%"4" = alloca [8 x i8], align 8, addrspace(5)
%"5" = alloca i64, align 8, addrspace(5)
%"6" = alloca i64, align 8, addrspace(5)
%"7" = alloca i64, align 8, addrspace(5)
%"9" = load i64, ptr addrspace(4) %"23", align 8
store i64 %"9", ptr addrspace(5) %"5", align 8
%"10" = load i64, ptr addrspace(4) %"24", align 8
store i64 %"10", ptr addrspace(5) %"5", align 8
%"11" = load i64, ptr addrspace(4) %"25", align 8
store i64 %"11", ptr addrspace(5) %"6", align 8
%"13" = load i64, ptr addrspace(5) %"5", align 8
%"27" = inttoptr i64 %"13" to ptr addrspace(1)
%"26" = load i64, ptr addrspace(1) %"27", align 8
store i64 %"26", ptr addrspace(5) %"7", align 8
%"14" = load i64, ptr addrspace(5) %"7", align 8
%"19" = add i64 %"14", 1
%"28" = addrspacecast ptr addrspace(5) %"4" to ptr
store i64 %"19", ptr %"28", align 8
%"30" = addrspacecast ptr addrspace(5) %"4" to ptr
%"38" = getelementptr inbounds i8, ptr %"30", i64 0
%"31" = load i64, ptr %"38", align 8
store i64 %"31", ptr addrspace(5) %"7", align 8
%"16" = load i64, ptr addrspace(5) %"6", align 8
%"17" = load i64, ptr addrspace(5) %"7", align 8
%"32" = inttoptr i64 %"16" to ptr addrspace(1)
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"32", i64 0
store i64 %"17", ptr addrspace(1) %"40", align 8
store i64 %"10", ptr addrspace(5) %"6", align 8
%"12" = load i64, ptr addrspace(5) %"5", align 8
%"26" = inttoptr i64 %"12" to ptr addrspace(1)
%"25" = load i64, ptr addrspace(1) %"26", align 8
store i64 %"25", ptr addrspace(5) %"7", align 8
%"13" = load i64, ptr addrspace(5) %"7", align 8
%"18" = add i64 %"13", 1
%"27" = addrspacecast ptr addrspace(5) %"4" to ptr
store i64 %"18", ptr %"27", align 8
%"29" = addrspacecast ptr addrspace(5) %"4" to ptr
%"37" = getelementptr inbounds i8, ptr %"29", i64 0
%"30" = load i64, ptr %"37", align 8
store i64 %"30", ptr addrspace(5) %"7", align 8
%"15" = load i64, ptr addrspace(5) %"6", align 8
%"16" = load i64, ptr addrspace(5) %"7", align 8
%"31" = inttoptr i64 %"15" to ptr addrspace(1)
%"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0
store i64 %"16", ptr addrspace(1) %"39", align 8
ret void
}

Some files were not shown because too many files have changed in this diff Show more