mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Emit slightly cleaner LLVM bitcode
This commit is contained in:
parent
bdc652f9eb
commit
abb0ec0414
133 changed files with 1543 additions and 1341 deletions
|
@ -7,7 +7,7 @@ use std::ffi::CStr;
|
|||
use std::fmt::Display;
|
||||
use std::io::Write;
|
||||
use std::ptr::null_mut;
|
||||
use std::{convert, iter, mem, ptr};
|
||||
use std::{iter, mem, ptr};
|
||||
use zluda_llvm::core::*;
|
||||
use zluda_llvm::prelude::*;
|
||||
use zluda_llvm::zluda::*;
|
||||
|
@ -157,7 +157,7 @@ impl NamedIdGenerator {
|
|||
if let Some(id) = id {
|
||||
self.register_result(id, func)
|
||||
} else {
|
||||
func(b"\0".as_ptr() as _)
|
||||
func(LLVM_UNNAMED)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -505,10 +505,12 @@ fn emit_function_variable(
|
|||
) -> Result<(), TranslateError> {
|
||||
let builder = ctx.builder.get();
|
||||
let llvm_type = get_llvm_type(ctx, &variable.type_)?;
|
||||
let addr_space = get_llvm_address_space(&ctx.constants, variable.state_space)?;
|
||||
let value = ctx.names.register_result(variable.name, |name| unsafe {
|
||||
LLVMZludaBuildAlloca(builder, llvm_type, addr_space, name)
|
||||
});
|
||||
let value = emit_alloca(
|
||||
ctx,
|
||||
llvm_type,
|
||||
get_llvm_address_space(&ctx.constants, variable.state_space)?,
|
||||
Some(variable.name),
|
||||
);
|
||||
match variable.initializer {
|
||||
None => {}
|
||||
Some(init) => {
|
||||
|
@ -531,12 +533,27 @@ fn emit_method<'a, 'input>(
|
|||
let llvm_method = emit_method_declaration(ctx, &method)?;
|
||||
emit_linkage_for_method(&method, is_kernel, llvm_method);
|
||||
emit_tuning(ctx, llvm_method, &method.tuning);
|
||||
for statement in method.body.iter().flat_map(convert::identity) {
|
||||
let statements = match method.body {
|
||||
Some(statements) => statements,
|
||||
None => return Ok(()),
|
||||
};
|
||||
// Initial BB that holds all the variable declarations
|
||||
let bb_with_variables =
|
||||
unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) };
|
||||
// Rest of the code
|
||||
let starting_bb =
|
||||
unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) };
|
||||
unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), starting_bb) };
|
||||
for statement in statements.iter() {
|
||||
register_basic_blocks(ctx, llvm_method, statement);
|
||||
}
|
||||
for statement in method.body.into_iter().flatten() {
|
||||
for statement in statements.into_iter() {
|
||||
emit_statement(ctx, is_kernel, statement)?;
|
||||
}
|
||||
// happens if there is a post-ret trailing label
|
||||
terminate_current_block_if_needed(ctx, None);
|
||||
unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), bb_with_variables) };
|
||||
unsafe { LLVMBuildBr(ctx.builder.get(), starting_bb) };
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -604,7 +621,6 @@ fn emit_statement(
|
|||
is_kernel: bool,
|
||||
statement: crate::translate::ExpandedStatement,
|
||||
) -> Result<(), TranslateError> {
|
||||
start_synthetic_basic_block_if_needed(ctx, &statement);
|
||||
Ok(match statement {
|
||||
crate::translate::Statement::Label(label) => emit_label(ctx, label)?,
|
||||
crate::translate::Statement::Variable(var) => emit_function_variable(ctx, var)?,
|
||||
|
@ -749,27 +765,6 @@ fn emit_ret_value(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn start_synthetic_basic_block_if_needed(
|
||||
ctx: &mut EmitContext,
|
||||
statement: &crate::translate::ExpandedStatement,
|
||||
) {
|
||||
let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) };
|
||||
if current_block == ptr::null_mut() {
|
||||
return;
|
||||
}
|
||||
let terminator = unsafe { LLVMGetBasicBlockTerminator(current_block) };
|
||||
if terminator == ptr::null_mut() {
|
||||
return;
|
||||
}
|
||||
if let crate::translate::Statement::Label(..) = statement {
|
||||
return;
|
||||
}
|
||||
let new_block =
|
||||
unsafe { LLVMCreateBasicBlockInContext(ctx.context.get(), b"\0".as_ptr() as _) };
|
||||
unsafe { LLVMInsertExistingBasicBlockAfterInsertBlock(ctx.builder.get(), new_block) };
|
||||
unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) };
|
||||
}
|
||||
|
||||
fn emit_ptr_access(
|
||||
ctx: &mut EmitContext,
|
||||
ptr_access: &crate::translate::PtrAccess<crate::translate::ExpandedArgParams>,
|
||||
|
@ -1073,7 +1068,7 @@ fn emit_value_copy(
|
|||
) -> Result<(), TranslateError> {
|
||||
let builder = ctx.builder.get();
|
||||
let type_ = get_llvm_type(ctx, type_)?;
|
||||
let temp_value = unsafe { LLVMBuildAlloca(builder, type_, LLVM_UNNAMED) };
|
||||
let temp_value = emit_alloca(ctx, type_, ctx.constants.private_space, None);
|
||||
unsafe { LLVMBuildStore(builder, src, temp_value) };
|
||||
ctx.names.register_result(dst, |dst| unsafe {
|
||||
LLVMBuildLoad2(builder, type_, temp_value, dst)
|
||||
|
@ -1081,6 +1076,28 @@ fn emit_value_copy(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html):
|
||||
// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca
|
||||
// instructions that are in the entry basic block. Given SSA is the canonical form expected by much
|
||||
// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to
|
||||
// be less effective than it could be."
|
||||
fn emit_alloca(
|
||||
ctx: &mut EmitContext,
|
||||
type_: LLVMTypeRef,
|
||||
addr_space: u32,
|
||||
name: Option<Id>,
|
||||
) -> LLVMValueRef {
|
||||
let builder = ctx.builder.get();
|
||||
let current_bb = unsafe { LLVMGetInsertBlock(builder) };
|
||||
let variables_bb = unsafe { LLVMGetFirstBasicBlock(LLVMGetBasicBlockParent(current_bb)) };
|
||||
unsafe { LLVMPositionBuilderAtEnd(builder, variables_bb) };
|
||||
let result = ctx.names.register_result_option(name, |name| unsafe {
|
||||
LLVMZludaBuildAlloca(builder, type_, addr_space, name)
|
||||
});
|
||||
unsafe { LLVMPositionBuilderAtEnd(builder, current_bb) };
|
||||
result
|
||||
}
|
||||
|
||||
fn emit_instruction(
|
||||
ctx: &mut EmitContext,
|
||||
is_kernel: bool,
|
||||
|
@ -3494,12 +3511,12 @@ fn emit_store_var(
|
|||
|
||||
fn emit_label(ctx: &mut EmitContext, label: Id) -> Result<(), TranslateError> {
|
||||
let new_block = unsafe { LLVMValueAsBasicBlock(ctx.names.value(label)?) };
|
||||
terminate_current_block_if_needed(ctx, new_block);
|
||||
terminate_current_block_if_needed(ctx, Some(new_block));
|
||||
unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) };
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasicBlockRef) {
|
||||
fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: Option<LLVMBasicBlockRef>) {
|
||||
let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) };
|
||||
if current_block == ptr::null_mut() {
|
||||
return;
|
||||
|
@ -3508,7 +3525,10 @@ fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasic
|
|||
if terminator != ptr::null_mut() {
|
||||
return;
|
||||
}
|
||||
unsafe { LLVMBuildBr(ctx.builder.get(), new_block) };
|
||||
match new_block {
|
||||
Some(new_block) => unsafe { LLVMBuildBr(ctx.builder.get(), new_block) },
|
||||
None => unsafe { LLVMBuildUnreachable(ctx.builder.get()) },
|
||||
};
|
||||
}
|
||||
|
||||
fn emit_method_declaration<'input>(
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
|
||||
"37":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"27", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr
|
|||
store i32 %"29", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"31" = inttoptr i64 %"14" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"32" = load i32, ptr %"39", align 4
|
||||
%"38" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"32" = load i32, ptr %"38", align 4
|
||||
store i32 %"32", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false)
|
||||
|
@ -35,8 +37,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr
|
|||
%"21" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"35" = inttoptr i64 %"21" to ptr
|
||||
%"41" = getelementptr inbounds i8, ptr %"35", i64 4
|
||||
store i32 %"22", ptr %"41", align 4
|
||||
%"40" = getelementptr inbounds i8, ptr %"35", i64 4
|
||||
store i32 %"22", ptr %"40", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -4,11 +4,13 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__activemask() #0
|
||||
|
||||
define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 {
|
||||
"15":
|
||||
%"6" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"6", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"6", align 1
|
||||
%"7" = load i64, ptr addrspace(4) %"12", align 8
|
||||
store i64 %"7", ptr addrspace(5) %"4", align 8
|
||||
%"8" = call i32 @__zluda_ptx_impl__activemask()
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"19", align 8
|
||||
|
|
|
@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4
|
||||
|
||||
define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 {
|
||||
"24":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
%"8" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"20", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"5", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"21", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"19", align 8
|
||||
|
|
|
@ -2,32 +2,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
|
||||
"38":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
%2 = alloca i64, align 8, addrspace(5)
|
||||
br label %3
|
||||
|
||||
3: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"31" = ptrtoint ptr addrspace(4) %"26" to i64
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"31", ptr addrspace(5) %0, align 8
|
||||
%"30" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 %"31", ptr addrspace(5) %1, align 8
|
||||
%"30" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"30", ptr addrspace(5) %"4", align 8
|
||||
%"33" = ptrtoint ptr addrspace(4) %"27" to i64
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"33", ptr addrspace(5) %1, align 8
|
||||
%"32" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"33", ptr addrspace(5) %2, align 8
|
||||
%"32" = load i64, ptr addrspace(5) %2, align 8
|
||||
store i64 %"32", ptr addrspace(5) %"5", align 8
|
||||
%"12" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"34" = inttoptr i64 %"12" to ptr addrspace(4)
|
||||
%"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0
|
||||
%"11" = load i64, ptr addrspace(4) %"40", align 8
|
||||
%"39" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0
|
||||
%"11" = load i64, ptr addrspace(4) %"39", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"4", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"35" = inttoptr i64 %"14" to ptr addrspace(4)
|
||||
%"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0
|
||||
%"13" = load i64, ptr addrspace(4) %"42", align 8
|
||||
%"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0
|
||||
%"13" = load i64, ptr addrspace(4) %"41", align 8
|
||||
store i64 %"13", ptr addrspace(5) %"5", align 8
|
||||
%"16" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"36" = inttoptr i64 %"16" to ptr
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"19", align 8
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 {
|
||||
"68":
|
||||
%"13" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"13", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -14,6 +12,10 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53",
|
|||
%"10" = alloca i32, align 4, addrspace(5)
|
||||
%"11" = alloca i32, align 4, addrspace(5)
|
||||
%"12" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"13", align 1
|
||||
%"14" = load i64, ptr addrspace(4) %"53", align 8
|
||||
store i64 %"14", ptr addrspace(5) %"4", align 8
|
||||
%"15" = load i64, ptr addrspace(4) %"54", align 8
|
||||
|
@ -24,45 +26,45 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53",
|
|||
store i32 %"55", ptr addrspace(5) %"9", align 4
|
||||
%"19" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"57" = inttoptr i64 %"19" to ptr
|
||||
%"70" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"58" = load i32, ptr %"70", align 4
|
||||
%"69" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"58" = load i32, ptr %"69", align 4
|
||||
store i32 %"58", ptr addrspace(5) %"10", align 4
|
||||
%"21" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"59" = inttoptr i64 %"21" to ptr
|
||||
%"72" = getelementptr inbounds i8, ptr %"59", i64 8
|
||||
%"20" = load i32, ptr %"72", align 4
|
||||
%"71" = getelementptr inbounds i8, ptr %"59", i64 8
|
||||
%"20" = load i32, ptr %"71", align 4
|
||||
store i32 %"20", ptr addrspace(5) %"11", align 4
|
||||
%"23" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"60" = inttoptr i64 %"23" to ptr
|
||||
%"74" = getelementptr inbounds i8, ptr %"60", i64 12
|
||||
%"22" = load i32, ptr %"74", align 4
|
||||
%"73" = getelementptr inbounds i8, ptr %"60", i64 12
|
||||
%"22" = load i32, ptr %"73", align 4
|
||||
store i32 %"22", ptr addrspace(5) %"12", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"27" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27")
|
||||
%"24" = extractvalue { i32, i1 } %0, 0
|
||||
%"25" = extractvalue { i32, i1 } %0, 1
|
||||
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27")
|
||||
%"24" = extractvalue { i32, i1 } %2, 0
|
||||
%"25" = extractvalue { i32, i1 } %2, 1
|
||||
store i32 %"24", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"25", ptr addrspace(5) %"13", align 1
|
||||
%"30" = load i1, ptr addrspace(5) %"13", align 1
|
||||
%"31" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"32" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%1 = zext i1 %"30" to i32
|
||||
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32")
|
||||
%3 = extractvalue { i32, i1 } %2, 0
|
||||
%4 = extractvalue { i32, i1 } %2, 1
|
||||
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
|
||||
%"28" = extractvalue { i32, i1 } %5, 0
|
||||
%6 = extractvalue { i32, i1 } %5, 1
|
||||
%"29" = xor i1 %4, %6
|
||||
%3 = zext i1 %"30" to i32
|
||||
%4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32")
|
||||
%5 = extractvalue { i32, i1 } %4, 0
|
||||
%6 = extractvalue { i32, i1 } %4, 1
|
||||
%7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3)
|
||||
%"28" = extractvalue { i32, i1 } %7, 0
|
||||
%8 = extractvalue { i32, i1 } %7, 1
|
||||
%"29" = xor i1 %6, %8
|
||||
store i32 %"28", ptr addrspace(5) %"7", align 4
|
||||
store i1 %"29", ptr addrspace(5) %"13", align 1
|
||||
%"34" = load i1, ptr addrspace(5) %"13", align 1
|
||||
%"35" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"36" = load i32, ptr addrspace(5) %"12", align 4
|
||||
%7 = zext i1 %"34" to i32
|
||||
%8 = add i32 %"35", %"36"
|
||||
%"33" = add i32 %8, %7
|
||||
%9 = zext i1 %"34" to i32
|
||||
%10 = add i32 %"35", %"36"
|
||||
%"33" = add i32 %10, %9
|
||||
store i32 %"33", ptr addrspace(5) %"8", align 4
|
||||
%"37" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"38" = load i32, ptr addrspace(5) %"6", align 4
|
||||
|
@ -71,13 +73,13 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53",
|
|||
%"39" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"40" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"66" = inttoptr i64 %"39" to ptr
|
||||
%"76" = getelementptr inbounds i8, ptr %"66", i64 4
|
||||
store i32 %"40", ptr %"76", align 4
|
||||
%"75" = getelementptr inbounds i8, ptr %"66", i64 4
|
||||
store i32 %"40", ptr %"75", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"42" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"67" = inttoptr i64 %"41" to ptr
|
||||
%"78" = getelementptr inbounds i8, ptr %"67", i64 8
|
||||
store i32 %"42", ptr %"78", align 4
|
||||
%"77" = getelementptr inbounds i8, ptr %"67", i64 8
|
||||
store i32 %"42", ptr %"77", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,50 +2,52 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
||||
"50":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"40", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"5", align 8
|
||||
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
|
||||
%"41" = extractvalue { i32, i1 } %0, 0
|
||||
%"12" = extractvalue { i32, i1 } %0, 1
|
||||
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
|
||||
%"41" = extractvalue { i32, i1 } %2, 0
|
||||
%"12" = extractvalue { i32, i1 } %2, 1
|
||||
store i32 %"41", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"12", ptr addrspace(5) %"9", align 1
|
||||
%"15" = load i1, ptr addrspace(5) %"9", align 1
|
||||
%1 = zext i1 %"15" to i32
|
||||
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4)
|
||||
%3 = extractvalue { i32, i1 } %2, 0
|
||||
%4 = extractvalue { i32, i1 } %2, 1
|
||||
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1)
|
||||
%"42" = extractvalue { i32, i1 } %5, 0
|
||||
%6 = extractvalue { i32, i1 } %5, 1
|
||||
%"14" = xor i1 %4, %6
|
||||
%3 = zext i1 %"15" to i32
|
||||
%4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4)
|
||||
%5 = extractvalue { i32, i1 } %4, 0
|
||||
%6 = extractvalue { i32, i1 } %4, 1
|
||||
%7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3)
|
||||
%"42" = extractvalue { i32, i1 } %7, 0
|
||||
%8 = extractvalue { i32, i1 } %7, 1
|
||||
%"14" = xor i1 %6, %8
|
||||
store i32 %"42", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"14", ptr addrspace(5) %"9", align 1
|
||||
%"17" = load i1, ptr addrspace(5) %"9", align 1
|
||||
%7 = zext i1 %"17" to i32
|
||||
%"43" = add i32 0, %7
|
||||
%9 = zext i1 %"17" to i32
|
||||
%"43" = add i32 0, %9
|
||||
store i32 %"43", ptr addrspace(5) %"7", align 4
|
||||
%"20" = load i1, ptr addrspace(5) %"9", align 1
|
||||
%8 = zext i1 %"20" to i32
|
||||
%9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1)
|
||||
%10 = extractvalue { i32, i1 } %9, 0
|
||||
%11 = extractvalue { i32, i1 } %9, 1
|
||||
%12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8)
|
||||
%"44" = extractvalue { i32, i1 } %12, 0
|
||||
%13 = extractvalue { i32, i1 } %12, 1
|
||||
%"19" = xor i1 %11, %13
|
||||
%10 = zext i1 %"20" to i32
|
||||
%11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1)
|
||||
%12 = extractvalue { i32, i1 } %11, 0
|
||||
%13 = extractvalue { i32, i1 } %11, 1
|
||||
%14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %12, i32 %10)
|
||||
%"44" = extractvalue { i32, i1 } %14, 0
|
||||
%15 = extractvalue { i32, i1 } %14, 1
|
||||
%"19" = xor i1 %13, %15
|
||||
store i32 %"44", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"19", ptr addrspace(5) %"9", align 1
|
||||
%"22" = load i1, ptr addrspace(5) %"9", align 1
|
||||
%14 = zext i1 %"22" to i32
|
||||
%"45" = add i32 0, %14
|
||||
%16 = zext i1 %"22" to i32
|
||||
%"45" = add i32 0, %16
|
||||
store i32 %"45", ptr addrspace(5) %"8", align 4
|
||||
%"23" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"24" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
@ -54,8 +56,8 @@ define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39",
|
|||
%"25" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"26" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"48" = inttoptr i64 %"25" to ptr
|
||||
%"52" = getelementptr inbounds i8, ptr %"48", i64 4
|
||||
store i32 %"26", ptr %"52", align 4
|
||||
%"51" = getelementptr inbounds i8, ptr %"48", i64 4
|
||||
store i32 %"26", ptr %"51", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
|
||||
"58":
|
||||
%"22" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
%"8" = alloca double, align 8, addrspace(5)
|
||||
%"9" = alloca double, align 8, addrspace(5)
|
||||
|
@ -14,6 +12,10 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr
|
|||
%"13" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca [4 x i32], align 16, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"50" = load i64, ptr addrspace(4) %"42", align 8
|
||||
store i64 %"50", ptr addrspace(5) %"10", align 8
|
||||
%"51" = load i64, ptr addrspace(4) %"43", align 8
|
||||
|
@ -29,30 +31,30 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr
|
|||
%"30" = load i1, ptr addrspace(5) %"7", align 1
|
||||
br i1 %"30", label %"6", label %"18"
|
||||
|
||||
"18": ; preds = %"58"
|
||||
"18": ; preds = %1
|
||||
%"31" = load i64, ptr addrspace(5) %"11", align 8
|
||||
%"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0
|
||||
store i64 %"31", ptr addrspace(5) %"60", align 8
|
||||
%"59" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0
|
||||
store i64 %"31", ptr addrspace(5) %"59", align 8
|
||||
%"32" = load i64, ptr addrspace(5) %"11", align 8
|
||||
%0 = inttoptr i64 %"32" to ptr
|
||||
%"21" = call [4 x i32] %0()
|
||||
%2 = inttoptr i64 %"32" to ptr
|
||||
%"21" = call [4 x i32] %2()
|
||||
store [4 x i32] %"21", ptr addrspace(5) %"48", align 4
|
||||
%"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0
|
||||
%"19" = load <2 x double>, ptr addrspace(5) %"62", align 16
|
||||
%"61" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0
|
||||
%"19" = load <2 x double>, ptr addrspace(5) %"61", align 16
|
||||
%"33" = extractelement <2 x double> %"19", i32 0
|
||||
%"34" = extractelement <2 x double> %"19", i32 1
|
||||
store double %"33", ptr addrspace(5) %"8", align 8
|
||||
store double %"34", ptr addrspace(5) %"9", align 8
|
||||
%"35" = load double, ptr addrspace(5) %"8", align 8
|
||||
%"36" = load double, ptr addrspace(5) %"9", align 8
|
||||
%1 = insertelement <2 x double> undef, double %"35", i32 0
|
||||
%"20" = insertelement <2 x double> %1, double %"36", i32 1
|
||||
%3 = insertelement <2 x double> undef, double %"35", i32 0
|
||||
%"20" = insertelement <2 x double> %3, double %"36", i32 1
|
||||
%"37" = load i64, ptr addrspace(5) %"10", align 8
|
||||
%"57" = inttoptr i64 %"37" to ptr addrspace(1)
|
||||
store <2 x double> %"20", ptr addrspace(1) %"57", align 16
|
||||
br label %"6"
|
||||
|
||||
"6": ; preds = %"18", %"58"
|
||||
"6": ; preds = %"18", %1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -8,9 +8,7 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0
|
||||
|
||||
define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 {
|
||||
"73":
|
||||
%"33" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"33", align 1
|
||||
%"14" = alloca i64, align 8, addrspace(5)
|
||||
%"15" = alloca i64, align 8, addrspace(5)
|
||||
%"16" = alloca i64, align 8, addrspace(5)
|
||||
|
@ -19,10 +17,17 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
|
|||
%"19" = alloca i64, align 8, addrspace(5)
|
||||
%"20" = alloca i32, align 4, addrspace(5)
|
||||
%"59" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
%"60" = alloca i64, align 8, addrspace(5)
|
||||
%2 = alloca i64, align 8, addrspace(5)
|
||||
%"61" = alloca i32, align 4, addrspace(5)
|
||||
%"62" = alloca i64, align 8, addrspace(5)
|
||||
%3 = alloca i64, align 8, addrspace(5)
|
||||
%"63" = alloca i64, align 8, addrspace(5)
|
||||
br label %4
|
||||
|
||||
4: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"33", align 1
|
||||
%"34" = load i64, ptr addrspace(4) %"57", align 8
|
||||
store i64 %"34", ptr addrspace(5) %"14", align 8
|
||||
%"35" = load i64, ptr addrspace(4) %"58", align 8
|
||||
|
@ -37,28 +42,25 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
|
|||
%"40" = load i1, ptr addrspace(5) %"18", align 1
|
||||
br i1 %"40", label %"13", label %"27"
|
||||
|
||||
"27": ; preds = %"73"
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8
|
||||
%"66" = load i64, ptr addrspace(5) %0, align 8
|
||||
"27": ; preds = %4
|
||||
store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %1, align 8
|
||||
%"66" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"66", ptr addrspace(5) %"19", align 8
|
||||
%"42" = load i64, ptr addrspace(5) %"19", align 8
|
||||
store i64 %"42", ptr addrspace(5) %"59", align 8
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8
|
||||
%"68" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %2, align 8
|
||||
%"68" = load i64, ptr addrspace(5) %2, align 8
|
||||
store i64 %"68", ptr addrspace(5) %"19", align 8
|
||||
%"44" = load i64, ptr addrspace(5) %"19", align 8
|
||||
store i64 %"44", ptr addrspace(5) %"60", align 8
|
||||
store i32 1, ptr addrspace(5) %"61", align 4
|
||||
%2 = alloca i64, align 8, addrspace(5)
|
||||
store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8
|
||||
%"70" = load i64, ptr addrspace(5) %2, align 8
|
||||
store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %3, align 8
|
||||
%"70" = load i64, ptr addrspace(5) %3, align 8
|
||||
store i64 %"70", ptr addrspace(5) %"19", align 8
|
||||
%"46" = load i64, ptr addrspace(5) %"19", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"62", align 8
|
||||
%"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0
|
||||
store i64 1, ptr addrspace(5) %"75", align 8
|
||||
%"74" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0
|
||||
store i64 1, ptr addrspace(5) %"74", align 8
|
||||
%"28" = load i64, ptr addrspace(5) %"59", align 8
|
||||
%"29" = load i64, ptr addrspace(5) %"60", align 8
|
||||
%"30" = load i32, ptr addrspace(5) %"61", align 4
|
||||
|
@ -67,7 +69,7 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64)
|
|||
call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32")
|
||||
br label %"13"
|
||||
|
||||
"13": ; preds = %"27", %"73"
|
||||
"13": ; preds = %"27", %4
|
||||
%"48" = load i64, ptr addrspace(5) %"16", align 8
|
||||
%"47" = add i64 %"48", 1
|
||||
store i64 %"47", ptr addrspace(5) %"17", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"30":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr
|
|||
store i32 %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"32", align 4
|
||||
%"31" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"31", align 4
|
||||
store i32 %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -4,42 +4,44 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0
|
||||
|
||||
define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 {
|
||||
"81":
|
||||
%"35" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"35", align 1
|
||||
%"15" = alloca i64, align 8, addrspace(5)
|
||||
%"16" = alloca i64, align 8, addrspace(5)
|
||||
%"17" = alloca i64, align 8, addrspace(5)
|
||||
%"18" = alloca i64, align 8, addrspace(5)
|
||||
%"19" = alloca i32, align 4, addrspace(5)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
%"64" = alloca i64, align 8, addrspace(5)
|
||||
%"66" = alloca i64, align 8, addrspace(5)
|
||||
%"68" = alloca i32, align 4, addrspace(5)
|
||||
%"70" = alloca i64, align 8, addrspace(5)
|
||||
%"72" = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"35", align 1
|
||||
%"36" = load i64, ptr addrspace(4) %"62", align 8
|
||||
store i64 %"36", ptr addrspace(5) %"15", align 8
|
||||
%"37" = load i64, ptr addrspace(4) %"63", align 8
|
||||
store i64 %"37", ptr addrspace(5) %"16", align 8
|
||||
%0 = alloca i32, align 4, addrspace(5)
|
||||
store i32 0, ptr addrspace(5) %0, align 4
|
||||
%"74" = load i32, ptr addrspace(5) %0, align 4
|
||||
store i32 0, ptr addrspace(5) %1, align 4
|
||||
%"74" = load i32, ptr addrspace(5) %1, align 4
|
||||
store i32 %"74", ptr addrspace(5) %"19", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"15", align 8
|
||||
%"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0
|
||||
store i64 %"39", ptr addrspace(5) %"83", align 8
|
||||
%"82" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0
|
||||
store i64 %"39", ptr addrspace(5) %"82", align 8
|
||||
%"40" = load i64, ptr addrspace(5) %"15", align 8
|
||||
%"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0
|
||||
store i64 %"40", ptr addrspace(5) %"85", align 8
|
||||
%"84" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0
|
||||
store i64 %"40", ptr addrspace(5) %"84", align 8
|
||||
%"41" = load i32, ptr addrspace(5) %"19", align 4
|
||||
%"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0
|
||||
store i32 %"41", ptr addrspace(5) %"87", align 4
|
||||
%"86" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0
|
||||
store i32 %"41", ptr addrspace(5) %"86", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"15", align 8
|
||||
%"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0
|
||||
store i64 %"42", ptr addrspace(5) %"89", align 8
|
||||
%"88" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0
|
||||
store i64 %"42", ptr addrspace(5) %"88", align 8
|
||||
%"43" = load i64, ptr addrspace(5) %"15", align 8
|
||||
%"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0
|
||||
store i64 %"43", ptr addrspace(5) %"91", align 8
|
||||
%"90" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0
|
||||
store i64 %"43", ptr addrspace(5) %"90", align 8
|
||||
%"30" = load i64, ptr addrspace(5) %"64", align 8
|
||||
%"31" = load i64, ptr addrspace(5) %"66", align 8
|
||||
%"32" = load i32, ptr addrspace(5) %"68", align 4
|
||||
|
|
|
@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
|
||||
|
||||
define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
|
||||
"37":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"5", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"29", align 8
|
||||
|
@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28",
|
|||
store i32 %"12", ptr addrspace(5) %"7", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"31" = inttoptr i64 %"15" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load i32, ptr %"39", align 4
|
||||
%"38" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load i32, ptr %"38", align 4
|
||||
store i32 %"14", ptr addrspace(5) %"8", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"7", align 4
|
||||
store i32 %"16", ptr addrspace(3) @"4", align 4
|
||||
|
@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28",
|
|||
%"22" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"36" = inttoptr i64 %"22" to ptr
|
||||
%"41" = getelementptr inbounds i8, ptr %"36", i64 4
|
||||
store i32 %"23", ptr %"41", align 4
|
||||
%"40" = getelementptr inbounds i8, ptr %"36", i64 4
|
||||
store i32 %"23", ptr %"40", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -4,20 +4,22 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
|
||||
|
||||
define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
|
||||
"37":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca half, align 2, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"26", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"5", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"27", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"6", align 8
|
||||
%"12" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"28" = inttoptr i64 %"12" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"28", i64 2
|
||||
%"29" = load i16, ptr %"39", align 2
|
||||
%"38" = getelementptr inbounds i8, ptr %"28", i64 2
|
||||
%"29" = load i16, ptr %"38", align 2
|
||||
%"11" = bitcast i16 %"29" to half
|
||||
store half %"11", ptr addrspace(5) %"7", align 2
|
||||
%"14" = load i64, ptr addrspace(5) %"5", align 8
|
||||
|
@ -38,9 +40,9 @@ define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"
|
|||
%"20" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"21" = load half, ptr addrspace(5) %"7", align 2
|
||||
%"35" = inttoptr i64 %"20" to ptr
|
||||
%"41" = getelementptr inbounds i8, ptr %"35", i64 2
|
||||
%"40" = getelementptr inbounds i8, ptr %"35", i64 2
|
||||
%"36" = bitcast half %"21" to i16
|
||||
store i16 %"36", ptr %"41", align 2
|
||||
store i16 %"36", ptr %"40", align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@"4" = private addrspace(3) global [1024 x i8] undef, align 4
|
||||
|
||||
define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
|
||||
"37":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
%"8" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"5", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"29", align 8
|
||||
|
@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64)
|
|||
store float %"12", ptr addrspace(5) %"7", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"31" = inttoptr i64 %"15" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load float, ptr %"39", align 4
|
||||
%"38" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load float, ptr %"38", align 4
|
||||
store float %"14", ptr addrspace(5) %"8", align 4
|
||||
%"16" = load float, ptr addrspace(5) %"7", align 4
|
||||
store float %"16", ptr addrspace(3) @"4", align 4
|
||||
|
@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64)
|
|||
%"22" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"23" = load float, ptr addrspace(5) %"8", align 4
|
||||
%"36" = inttoptr i64 %"22" to ptr
|
||||
%"41" = getelementptr inbounds i8, ptr %"36", i64 4
|
||||
store float %"23", ptr %"41", align 4
|
||||
%"40" = getelementptr inbounds i8, ptr %"36", i64 4
|
||||
store float %"23", ptr %"40", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
|
||||
"38":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"29", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"30", align 8
|
||||
|
@ -20,14 +22,14 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29",
|
|||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"15" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"32" = inttoptr i64 %"14" to ptr
|
||||
%"40" = getelementptr inbounds i8, ptr %"32", i64 4
|
||||
%0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
||||
%"33" = extractvalue { i32, i1 } %0, 0
|
||||
%"39" = getelementptr inbounds i8, ptr %"32", i64 4
|
||||
%2 = cmpxchg ptr %"39", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
||||
%"33" = extractvalue { i32, i1 } %2, 0
|
||||
store i32 %"33", ptr addrspace(5) %"6", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"35" = inttoptr i64 %"17" to ptr
|
||||
%"42" = getelementptr inbounds i8, ptr %"35", i64 4
|
||||
%"16" = load i32, ptr %"42", align 4
|
||||
%"41" = getelementptr inbounds i8, ptr %"35", i64 4
|
||||
%"16" = load i32, ptr %"41", align 4
|
||||
store i32 %"16", ptr addrspace(5) %"7", align 4
|
||||
%"18" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"19" = load i32, ptr addrspace(5) %"6", align 4
|
||||
|
@ -36,8 +38,8 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29",
|
|||
%"20" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"21" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"37" = inttoptr i64 %"20" to ptr
|
||||
%"44" = getelementptr inbounds i8, ptr %"37", i64 4
|
||||
store i32 %"21", ptr %"44", align 4
|
||||
%"43" = getelementptr inbounds i8, ptr %"37", i64 4
|
||||
store i32 %"21", ptr %"43", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -6,14 +6,16 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0
|
|||
declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0
|
||||
|
||||
define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 {
|
||||
"38":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"30", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"31", align 8
|
||||
|
@ -37,13 +39,13 @@ define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30",
|
|||
%"20" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"21" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"36" = inttoptr i64 %"20" to ptr
|
||||
%"48" = getelementptr inbounds i8, ptr %"36", i64 4
|
||||
store i32 %"21", ptr %"48", align 4
|
||||
%"47" = getelementptr inbounds i8, ptr %"36", i64 4
|
||||
store i32 %"21", ptr %"47", align 4
|
||||
%"22" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"37" = inttoptr i64 %"22" to ptr
|
||||
%"50" = getelementptr inbounds i8, ptr %"37", i64 8
|
||||
store i32 %"23", ptr %"50", align 4
|
||||
%"49" = getelementptr inbounds i8, ptr %"37", i64 8
|
||||
store i32 %"23", ptr %"49", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
|
||||
"18":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"14", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"15", align 8
|
||||
|
|
|
@ -2,33 +2,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 {
|
||||
"23":
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"11" = load i64, ptr addrspace(4) %"19", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"4", align 8
|
||||
%"12" = load i64, ptr addrspace(4) %"20", align 8
|
||||
store i64 %"12", ptr addrspace(5) %"5", align 8
|
||||
%"13" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"21" = inttoptr i64 %"13" to ptr
|
||||
%0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16
|
||||
%"8" = bitcast i128 %0 to <2 x i64>
|
||||
%2 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16
|
||||
%"8" = bitcast i128 %2 to <2 x i64>
|
||||
%"14" = extractelement <2 x i64> %"8", i32 0
|
||||
%"15" = extractelement <2 x i64> %"8", i32 1
|
||||
store i64 %"14", ptr addrspace(5) %"6", align 8
|
||||
store i64 %"15", ptr addrspace(5) %"7", align 8
|
||||
%"16" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"17" = load i64, ptr addrspace(5) %"7", align 8
|
||||
%1 = insertelement <2 x i64> undef, i64 %"16", i32 0
|
||||
%"9" = insertelement <2 x i64> %1, i64 %"17", i32 1
|
||||
%3 = insertelement <2 x i64> undef, i64 %"16", i32 0
|
||||
%"9" = insertelement <2 x i64> %3, i64 %"17", i32 1
|
||||
%"18" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"22" = inttoptr i64 %"18" to ptr
|
||||
%2 = bitcast <2 x i64> %"9" to i128
|
||||
store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16
|
||||
%4 = bitcast <2 x i64> %"9" to i128
|
||||
store atomic i128 %4, ptr %"22" syncscope("agent-one-as") release, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"30":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -23,8 +25,8 @@ define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"
|
|||
store i32 %"14", ptr %"25", align 4
|
||||
%"16" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"26" = inttoptr i64 %"16" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"26", i64 4
|
||||
%"15" = load i32, ptr %"32", align 4
|
||||
%"31" = getelementptr inbounds i8, ptr %"26", i64 4
|
||||
%"15" = load i32, ptr %"31", align 4
|
||||
store i32 %"15", ptr addrspace(5) %"7", align 4
|
||||
%"18" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"19" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,22 +2,24 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"23":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca double, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load double, ptr addrspace(4) %"17", align 8
|
||||
store double %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"6", align 8
|
||||
%"12" = load double, ptr addrspace(5) %"4", align 8
|
||||
%"20" = bitcast double %"12" to i64
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"20", ptr addrspace(5) %0, align 8
|
||||
%"11" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 %"20", ptr addrspace(5) %1, align 8
|
||||
%"11" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"11", ptr addrspace(5) %"5", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"21" = inttoptr i64 %"14" to ptr
|
||||
|
|
|
@ -4,8 +4,10 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare void @__zluda_ptx_impl__barrier_sync(i32) #0
|
||||
|
||||
define protected amdgpu_kernel void @barrier() #1 {
|
||||
"4":
|
||||
%"2" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"2", align 1
|
||||
call void @__zluda_ptx_impl__barrier_sync(i32 0)
|
||||
ret void
|
||||
|
|
|
@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0
|
||||
|
||||
define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
|
||||
"34":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"29", align 8
|
||||
|
@ -22,13 +24,13 @@ define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr
|
|||
store i32 %"12", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"31" = inttoptr i64 %"15" to ptr
|
||||
%"41" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load i32, ptr %"41", align 4
|
||||
%"40" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load i32, ptr %"40", align 4
|
||||
store i32 %"14", ptr addrspace(5) %"7", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"32" = inttoptr i64 %"17" to ptr
|
||||
%"43" = getelementptr inbounds i8, ptr %"32", i64 8
|
||||
%"16" = load i32, ptr %"43", align 4
|
||||
%"42" = getelementptr inbounds i8, ptr %"32", i64 8
|
||||
%"16" = load i32, ptr %"42", align 4
|
||||
store i32 %"16", ptr addrspace(5) %"8", align 4
|
||||
%"19" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"20" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0
|
||||
|
||||
define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
|
||||
"44":
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"11" = load i64, ptr addrspace(4) %"34", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"4", align 8
|
||||
%"12" = load i64, ptr addrspace(4) %"35", align 8
|
||||
|
@ -23,18 +25,18 @@ define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr
|
|||
store i32 %"13", ptr addrspace(5) %"6", align 4
|
||||
%"16" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"37" = inttoptr i64 %"16" to ptr
|
||||
%"52" = getelementptr inbounds i8, ptr %"37", i64 4
|
||||
%"15" = load i32, ptr %"52", align 4
|
||||
%"51" = getelementptr inbounds i8, ptr %"37", i64 4
|
||||
%"15" = load i32, ptr %"51", align 4
|
||||
store i32 %"15", ptr addrspace(5) %"7", align 4
|
||||
%"18" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"38" = inttoptr i64 %"18" to ptr
|
||||
%"54" = getelementptr inbounds i8, ptr %"38", i64 8
|
||||
%"17" = load i32, ptr %"54", align 4
|
||||
%"53" = getelementptr inbounds i8, ptr %"38", i64 8
|
||||
%"17" = load i32, ptr %"53", align 4
|
||||
store i32 %"17", ptr addrspace(5) %"8", align 4
|
||||
%"20" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"39" = inttoptr i64 %"20" to ptr
|
||||
%"56" = getelementptr inbounds i8, ptr %"39", i64 12
|
||||
%"19" = load i32, ptr %"56", align 4
|
||||
%"55" = getelementptr inbounds i8, ptr %"39", i64 12
|
||||
%"19" = load i32, ptr %"55", align 4
|
||||
store i32 %"19", ptr addrspace(5) %"9", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"23" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
|
||||
"52":
|
||||
%"12" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"12", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt
|
|||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
%"10" = alloca i32, align 4, addrspace(5)
|
||||
%"11" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"12", align 1
|
||||
%"13" = load i64, ptr addrspace(4) %"41", align 8
|
||||
store i64 %"13", ptr addrspace(5) %"4", align 8
|
||||
%"14" = load i64, ptr addrspace(4) %"42", align 8
|
||||
|
@ -23,31 +25,31 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt
|
|||
store i32 %"15", ptr addrspace(5) %"6", align 4
|
||||
%"18" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"44" = inttoptr i64 %"18" to ptr
|
||||
%"54" = getelementptr inbounds i8, ptr %"44", i64 4
|
||||
%"17" = load i32, ptr %"54", align 4
|
||||
%"53" = getelementptr inbounds i8, ptr %"44", i64 4
|
||||
%"17" = load i32, ptr %"53", align 4
|
||||
store i32 %"17", ptr addrspace(5) %"7", align 4
|
||||
%"20" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"45" = inttoptr i64 %"20" to ptr
|
||||
%"56" = getelementptr inbounds i8, ptr %"45", i64 8
|
||||
%"19" = load i32, ptr %"56", align 4
|
||||
%"55" = getelementptr inbounds i8, ptr %"45", i64 8
|
||||
%"19" = load i32, ptr %"55", align 4
|
||||
store i32 %"19", ptr addrspace(5) %"8", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%0 = icmp eq i32 %"22", 0
|
||||
%1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
|
||||
%2 = sub i32 31, %1
|
||||
%"46" = select i1 %0, i32 -1, i32 %2
|
||||
%2 = icmp eq i32 %"22", 0
|
||||
%3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
|
||||
%4 = sub i32 31, %3
|
||||
%"46" = select i1 %2, i32 -1, i32 %4
|
||||
store i32 %"46", ptr addrspace(5) %"9", align 4
|
||||
%"24" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%3 = icmp eq i32 %"24", 0
|
||||
%4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
|
||||
%5 = sub i32 31, %4
|
||||
%"47" = select i1 %3, i32 -1, i32 %5
|
||||
%5 = icmp eq i32 %"24", 0
|
||||
%6 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
|
||||
%7 = sub i32 31, %6
|
||||
%"47" = select i1 %5, i32 -1, i32 %7
|
||||
store i32 %"47", ptr addrspace(5) %"10", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%6 = icmp eq i32 %"26", 0
|
||||
%7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
|
||||
%8 = sub i32 31, %7
|
||||
%"48" = select i1 %6, i32 -1, i32 %8
|
||||
%8 = icmp eq i32 %"26", 0
|
||||
%9 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
|
||||
%10 = sub i32 31, %9
|
||||
%"48" = select i1 %8, i32 -1, i32 %10
|
||||
store i32 %"48", ptr addrspace(5) %"11", align 4
|
||||
%"27" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"28" = load i32, ptr addrspace(5) %"9", align 4
|
||||
|
@ -56,13 +58,13 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt
|
|||
%"29" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"30" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"50" = inttoptr i64 %"29" to ptr
|
||||
%"58" = getelementptr inbounds i8, ptr %"50", i64 4
|
||||
store i32 %"30", ptr %"58", align 4
|
||||
%"57" = getelementptr inbounds i8, ptr %"50", i64 4
|
||||
store i32 %"30", ptr %"57", align 4
|
||||
%"31" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"32" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%"51" = inttoptr i64 %"31" to ptr
|
||||
%"60" = getelementptr inbounds i8, ptr %"51", i64 8
|
||||
store i32 %"32", ptr %"60", align 4
|
||||
%"59" = getelementptr inbounds i8, ptr %"51", i64 8
|
||||
store i32 %"32", ptr %"59", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
|
||||
"52":
|
||||
%"12" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"12", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
|
|||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
%"10" = alloca i32, align 4, addrspace(5)
|
||||
%"11" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"12", align 1
|
||||
%"13" = load i64, ptr addrspace(4) %"41", align 8
|
||||
store i64 %"13", ptr addrspace(5) %"4", align 8
|
||||
%"14" = load i64, ptr addrspace(4) %"42", align 8
|
||||
|
@ -23,28 +25,28 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
|
|||
store i32 %"15", ptr addrspace(5) %"6", align 4
|
||||
%"18" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"44" = inttoptr i64 %"18" to ptr
|
||||
%"54" = getelementptr inbounds i8, ptr %"44", i64 4
|
||||
%"17" = load i32, ptr %"54", align 4
|
||||
%"53" = getelementptr inbounds i8, ptr %"44", i64 4
|
||||
%"17" = load i32, ptr %"53", align 4
|
||||
store i32 %"17", ptr addrspace(5) %"7", align 4
|
||||
%"20" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"45" = inttoptr i64 %"20" to ptr
|
||||
%"56" = getelementptr inbounds i8, ptr %"45", i64 8
|
||||
%"19" = load i32, ptr %"56", align 4
|
||||
%"55" = getelementptr inbounds i8, ptr %"45", i64 8
|
||||
%"19" = load i32, ptr %"55", align 4
|
||||
store i32 %"19", ptr addrspace(5) %"8", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%0 = icmp eq i32 %"22", 0
|
||||
%1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
|
||||
%"46" = select i1 %0, i32 -1, i32 %1
|
||||
%2 = icmp eq i32 %"22", 0
|
||||
%3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true)
|
||||
%"46" = select i1 %2, i32 -1, i32 %3
|
||||
store i32 %"46", ptr addrspace(5) %"9", align 4
|
||||
%"24" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%2 = icmp eq i32 %"24", 0
|
||||
%3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
|
||||
%"47" = select i1 %2, i32 -1, i32 %3
|
||||
%4 = icmp eq i32 %"24", 0
|
||||
%5 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true)
|
||||
%"47" = select i1 %4, i32 -1, i32 %5
|
||||
store i32 %"47", ptr addrspace(5) %"10", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%4 = icmp eq i32 %"26", 0
|
||||
%5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
|
||||
%"48" = select i1 %4, i32 -1, i32 %5
|
||||
%6 = icmp eq i32 %"26", 0
|
||||
%7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true)
|
||||
%"48" = select i1 %6, i32 -1, i32 %7
|
||||
store i32 %"48", ptr addrspace(5) %"11", align 4
|
||||
%"27" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"28" = load i32, ptr addrspace(5) %"9", align 4
|
||||
|
@ -53,13 +55,13 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64)
|
|||
%"29" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"30" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"50" = inttoptr i64 %"29" to ptr
|
||||
%"58" = getelementptr inbounds i8, ptr %"50", i64 4
|
||||
store i32 %"30", ptr %"58", align 4
|
||||
%"57" = getelementptr inbounds i8, ptr %"50", i64 4
|
||||
store i32 %"30", ptr %"57", align 4
|
||||
%"31" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"32" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%"51" = inttoptr i64 %"31" to ptr
|
||||
%"60" = getelementptr inbounds i8, ptr %"51", i64 8
|
||||
store i32 %"32", ptr %"60", align 4
|
||||
%"59" = getelementptr inbounds i8, ptr %"51", i64 8
|
||||
store i32 %"32", ptr %"59", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"26":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 {
|
||||
"28":
|
||||
%"11" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
%"10" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"12" = load i64, ptr addrspace(4) %"24", align 8
|
||||
store i64 %"12", ptr addrspace(5) %"7", align 8
|
||||
%"13" = load i64, ptr addrspace(4) %"25", align 8
|
||||
|
@ -19,19 +21,19 @@ define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr
|
|||
store i64 %"14", ptr addrspace(5) %"9", align 8
|
||||
br label %"4"
|
||||
|
||||
"4": ; preds = %"28"
|
||||
"4": ; preds = %1
|
||||
%"17" = load i64, ptr addrspace(5) %"9", align 8
|
||||
%"16" = add i64 %"17", 1
|
||||
store i64 %"16", ptr addrspace(5) %"10", align 8
|
||||
br label %"6"
|
||||
|
||||
0: ; No predecessors!
|
||||
"5": ; No predecessors!
|
||||
%"19" = load i64, ptr addrspace(5) %"9", align 8
|
||||
%"18" = add i64 %"19", 2
|
||||
store i64 %"18", ptr addrspace(5) %"10", align 8
|
||||
br label %"6"
|
||||
|
||||
"6": ; preds = %0, %"4"
|
||||
"6": ; preds = %"5", %"4"
|
||||
%"20" = load i64, ptr addrspace(5) %"8", align 8
|
||||
%"21" = load i64, ptr addrspace(5) %"10", align 8
|
||||
%"27" = inttoptr i64 %"20" to ptr
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
|
|
@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define private i64 @incr(i64 %"29") #0 {
|
||||
"49":
|
||||
%"18" = alloca i64, align 8, addrspace(5)
|
||||
%"17" = alloca i64, align 8, addrspace(5)
|
||||
%"20" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"20", align 1
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"14" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i64 %"29", ptr addrspace(5) %"18", align 8
|
||||
store i1 false, ptr addrspace(5) %"20", align 1
|
||||
%"30" = load i64, ptr addrspace(5) %"18", align 8
|
||||
store i64 %"30", ptr addrspace(5) %"43", align 8
|
||||
%"31" = load i64, ptr addrspace(5) %"43", align 8
|
||||
|
@ -27,14 +29,16 @@ define private i64 @incr(i64 %"29") #0 {
|
|||
}
|
||||
|
||||
define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
"48":
|
||||
%"19" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"19", align 1
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"19", align 1
|
||||
%"21" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"21", ptr addrspace(5) %"7", align 8
|
||||
%"22" = load i64, ptr addrspace(4) %"39", align 8
|
||||
|
|
|
@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define private [2 x i32] @incr(i64 %"21") #0 {
|
||||
"56":
|
||||
%"16" = alloca i64, align 8, addrspace(5)
|
||||
%"15" = alloca [2 x i32], align 4, addrspace(5)
|
||||
%"19" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"19", align 1
|
||||
%"42" = alloca [2 x i32], align 4, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i64 %"21", ptr addrspace(5) %"16", align 8
|
||||
store i1 false, ptr addrspace(5) %"19", align 1
|
||||
%"22" = load i64, ptr addrspace(5) %"16", align 8
|
||||
store i64 %"22", ptr addrspace(5) %"43", align 8
|
||||
%"23" = load i64, ptr addrspace(5) %"43", align 8
|
||||
|
@ -27,15 +29,17 @@ define private [2 x i32] @incr(i64 %"21") #0 {
|
|||
}
|
||||
|
||||
define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
|
||||
"57":
|
||||
%"20" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"20", align 1
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
%"10" = alloca i64, align 8, addrspace(5)
|
||||
%"11" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca [2 x i32], align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"20", align 1
|
||||
%"29" = load i64, ptr addrspace(4) %"44", align 8
|
||||
store i64 %"29", ptr addrspace(5) %"8", align 8
|
||||
%"30" = load i64, ptr addrspace(4) %"45", align 8
|
||||
|
@ -49,11 +53,11 @@ define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44",
|
|||
store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8
|
||||
%"17" = load i64, ptr addrspace(5) %"46", align 8
|
||||
%"35" = load i64, ptr addrspace(5) %"11", align 8
|
||||
%0 = inttoptr i64 %"35" to ptr
|
||||
%"18" = call [2 x i32] %0(i64 %"17")
|
||||
%2 = inttoptr i64 %"35" to ptr
|
||||
%"18" = call [2 x i32] %2(i64 %"17")
|
||||
store [2 x i32] %"18", ptr addrspace(5) %"47", align 4
|
||||
%"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0
|
||||
%"36" = load i64, ptr addrspace(5) %"59", align 8
|
||||
%"57" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0
|
||||
%"36" = load i64, ptr addrspace(5) %"57", align 8
|
||||
store i64 %"36", ptr addrspace(5) %"10", align 8
|
||||
%"37" = load i64, ptr addrspace(5) %"9", align 8
|
||||
%"38" = load i64, ptr addrspace(5) %"10", align 8
|
||||
|
|
|
@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
%struct.i64i32 = type { i64, i32 }
|
||||
|
||||
define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 {
|
||||
"62":
|
||||
%"18" = alloca i32, align 4, addrspace(5)
|
||||
%"19" = alloca i32, align 4, addrspace(5)
|
||||
%"16" = alloca i64, align 8, addrspace(5)
|
||||
%"17" = alloca i32, align 4, addrspace(5)
|
||||
%"22" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"20" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i32 %"39", ptr addrspace(5) %"18", align 4
|
||||
store i32 %"40", ptr addrspace(5) %"19", align 4
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"42" = load i32, ptr addrspace(5) %"18", align 4
|
||||
%"43" = load i32, ptr addrspace(5) %"19", align 4
|
||||
%"41" = add i32 %"42", %"43"
|
||||
|
@ -27,15 +29,13 @@ define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 {
|
|||
store i32 %"46", ptr addrspace(5) %"17", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"16", align 8
|
||||
%"50" = load i32, ptr addrspace(5) %"17", align 4
|
||||
%0 = insertvalue %struct.i64i32 undef, i64 %"49", 0
|
||||
%1 = insertvalue %struct.i64i32 %0, i32 %"50", 1
|
||||
ret %struct.i64i32 %1
|
||||
%2 = insertvalue %struct.i64i32 undef, i64 %"49", 0
|
||||
%3 = insertvalue %struct.i64i32 %2, i32 %"50", 1
|
||||
ret %struct.i64i32 %3
|
||||
}
|
||||
|
||||
define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 {
|
||||
"61":
|
||||
%"21" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"21", align 1
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
%"10" = alloca i64, align 8, addrspace(5)
|
||||
%"11" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -43,6 +43,10 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
|
|||
%"13" = alloca i64, align 8, addrspace(5)
|
||||
%"14" = alloca i64, align 8, addrspace(5)
|
||||
%"15" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"21", align 1
|
||||
%"23" = load i64, ptr addrspace(4) %"55", align 8
|
||||
store i64 %"23", ptr addrspace(5) %"9", align 8
|
||||
%"24" = load i64, ptr addrspace(4) %"56", align 8
|
||||
|
@ -53,14 +57,14 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
|
|||
store i32 %"25", ptr addrspace(5) %"11", align 4
|
||||
%"28" = load i64, ptr addrspace(5) %"9", align 8
|
||||
%"58" = inttoptr i64 %"28" to ptr addrspace(1)
|
||||
%"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
|
||||
%"27" = load i32, ptr addrspace(1) %"64", align 4
|
||||
%"62" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
|
||||
%"27" = load i32, ptr addrspace(1) %"62", align 4
|
||||
store i32 %"27", ptr addrspace(5) %"12", align 4
|
||||
%"31" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%"32" = load i32, ptr addrspace(5) %"12", align 4
|
||||
%0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32")
|
||||
%"29" = extractvalue %struct.i64i32 %0, 0
|
||||
%"30" = extractvalue %struct.i64i32 %0, 1
|
||||
%2 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32")
|
||||
%"29" = extractvalue %struct.i64i32 %2, 0
|
||||
%"30" = extractvalue %struct.i64i32 %2, 1
|
||||
store i64 %"29", ptr addrspace(5) %"13", align 8
|
||||
store i32 %"30", ptr addrspace(5) %"15", align 4
|
||||
%"34" = load i32, ptr addrspace(5) %"15", align 4
|
||||
|
@ -73,8 +77,8 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6
|
|||
%"37" = load i64, ptr addrspace(5) %"10", align 8
|
||||
%"38" = load i64, ptr addrspace(5) %"14", align 8
|
||||
%"60" = inttoptr i64 %"37" to ptr addrspace(1)
|
||||
%"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8
|
||||
store i64 %"38", ptr addrspace(1) %"66", align 8
|
||||
%"64" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8
|
||||
store i64 %"38", ptr addrspace(1) %"64", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define private i64 @incr(i64 %"33") #0 {
|
||||
"54":
|
||||
%"20" = alloca i64, align 8, addrspace(5)
|
||||
%"19" = alloca i64, align 8, addrspace(5)
|
||||
%"22" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"16" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i64 %"33", ptr addrspace(5) %"20", align 8
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"34" = load i64, ptr addrspace(5) %"20", align 8
|
||||
store i64 %"34", ptr addrspace(5) %"47", align 8
|
||||
%"35" = load i64, ptr addrspace(5) %"47", align 8
|
||||
|
@ -27,15 +29,17 @@ define private i64 @incr(i64 %"33") #0 {
|
|||
}
|
||||
|
||||
define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
|
||||
"53":
|
||||
%"21" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"21", align 1
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
%"10" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"21", align 1
|
||||
%"23" = load i64, ptr addrspace(4) %"42", align 8
|
||||
store i64 %"23", ptr addrspace(5) %"7", align 8
|
||||
%"24" = load i64, ptr addrspace(4) %"43", align 8
|
||||
|
@ -49,8 +53,8 @@ define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %
|
|||
store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8
|
||||
%"17" = load i64, ptr addrspace(5) %"44", align 8
|
||||
%"29" = load i64, ptr addrspace(5) %"10", align 8
|
||||
%0 = inttoptr i64 %"29" to ptr
|
||||
%"18" = call i64 %0(i64 %"17")
|
||||
%2 = inttoptr i64 %"29" to ptr
|
||||
%"18" = call i64 %2(i64 %"17")
|
||||
store i64 %"18", ptr addrspace(5) %"45", align 8
|
||||
%"30" = load i64, ptr addrspace(5) %"45", align 8
|
||||
store i64 %"30", ptr addrspace(5) %"9", align 8
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 {
|
||||
"268":
|
||||
%"22" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -23,147 +21,151 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %
|
|||
%"19" = alloca i32, align 4, addrspace(5)
|
||||
%"20" = alloca i32, align 4, addrspace(5)
|
||||
%"21" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"22", align 1
|
||||
%"37" = load i64, ptr addrspace(4) %"209", align 8
|
||||
store i64 %"37", ptr addrspace(5) %"5", align 8
|
||||
%0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
|
||||
%"210" = extractvalue { i32, i1 } %0, 0
|
||||
%"23" = extractvalue { i32, i1 } %0, 1
|
||||
%2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
|
||||
%"210" = extractvalue { i32, i1 } %2, 0
|
||||
%"23" = extractvalue { i32, i1 } %2, 1
|
||||
store i32 %"210", ptr addrspace(5) %"6", align 4
|
||||
%"39" = xor i1 %"23", true
|
||||
store i1 %"39", ptr addrspace(5) %"22", align 1
|
||||
%"41" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%1 = zext i1 %"41" to i32
|
||||
%"211" = add i32 0, %1
|
||||
%3 = zext i1 %"41" to i32
|
||||
%"211" = add i32 0, %3
|
||||
store i32 %"211", ptr addrspace(5) %"6", align 4
|
||||
%"42" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"24" = xor i1 %"42", true
|
||||
%2 = zext i1 %"24" to i32
|
||||
%"212" = sub i32 0, %2
|
||||
%4 = zext i1 %"24" to i32
|
||||
%"212" = sub i32 0, %4
|
||||
store i32 %"212", ptr addrspace(5) %"7", align 4
|
||||
%3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
|
||||
%"213" = extractvalue { i32, i1 } %3, 0
|
||||
%"25" = extractvalue { i32, i1 } %3, 1
|
||||
%5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
|
||||
%"213" = extractvalue { i32, i1 } %5, 0
|
||||
%"25" = extractvalue { i32, i1 } %5, 1
|
||||
store i32 %"213", ptr addrspace(5) %"8", align 4
|
||||
%"45" = xor i1 %"25", true
|
||||
store i1 %"45", ptr addrspace(5) %"22", align 1
|
||||
%"47" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%4 = zext i1 %"47" to i32
|
||||
%"214" = add i32 0, %4
|
||||
%6 = zext i1 %"47" to i32
|
||||
%"214" = add i32 0, %6
|
||||
store i32 %"214", ptr addrspace(5) %"8", align 4
|
||||
%"48" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"26" = xor i1 %"48", true
|
||||
%5 = zext i1 %"26" to i32
|
||||
%"215" = sub i32 0, %5
|
||||
%7 = zext i1 %"26" to i32
|
||||
%"215" = sub i32 0, %7
|
||||
store i32 %"215", ptr addrspace(5) %"9", align 4
|
||||
%6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"216" = extractvalue { i32, i1 } %6, 0
|
||||
%"51" = extractvalue { i32, i1 } %6, 1
|
||||
%8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"216" = extractvalue { i32, i1 } %8, 0
|
||||
%"51" = extractvalue { i32, i1 } %8, 1
|
||||
store i32 %"216", ptr addrspace(5) %"10", align 4
|
||||
store i1 %"51", ptr addrspace(5) %"22", align 1
|
||||
%"53" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%7 = zext i1 %"53" to i32
|
||||
%"217" = add i32 0, %7
|
||||
%9 = zext i1 %"53" to i32
|
||||
%"217" = add i32 0, %9
|
||||
store i32 %"217", ptr addrspace(5) %"10", align 4
|
||||
%"54" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"27" = xor i1 %"54", true
|
||||
%8 = zext i1 %"27" to i32
|
||||
%"218" = sub i32 0, %8
|
||||
%10 = zext i1 %"27" to i32
|
||||
%"218" = sub i32 0, %10
|
||||
store i32 %"218", ptr addrspace(5) %"11", align 4
|
||||
%9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
|
||||
%"219" = extractvalue { i32, i1 } %9, 0
|
||||
%"57" = extractvalue { i32, i1 } %9, 1
|
||||
%11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
|
||||
%"219" = extractvalue { i32, i1 } %11, 0
|
||||
%"57" = extractvalue { i32, i1 } %11, 1
|
||||
store i32 %"219", ptr addrspace(5) %"12", align 4
|
||||
store i1 %"57", ptr addrspace(5) %"22", align 1
|
||||
%"59" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%10 = zext i1 %"59" to i32
|
||||
%"220" = add i32 0, %10
|
||||
%12 = zext i1 %"59" to i32
|
||||
%"220" = add i32 0, %12
|
||||
store i32 %"220", ptr addrspace(5) %"12", align 4
|
||||
%"60" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"28" = xor i1 %"60", true
|
||||
%11 = zext i1 %"28" to i32
|
||||
%"221" = sub i32 0, %11
|
||||
%13 = zext i1 %"28" to i32
|
||||
%"221" = sub i32 0, %13
|
||||
store i32 %"221", ptr addrspace(5) %"13", align 4
|
||||
%12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"222" = extractvalue { i32, i1 } %12, 0
|
||||
%"63" = extractvalue { i32, i1 } %12, 1
|
||||
%14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"222" = extractvalue { i32, i1 } %14, 0
|
||||
%"63" = extractvalue { i32, i1 } %14, 1
|
||||
store i32 %"222", ptr addrspace(5) %"14", align 4
|
||||
store i1 %"63", ptr addrspace(5) %"22", align 1
|
||||
%"65" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%13 = zext i1 %"65" to i32
|
||||
%"223" = add i32 0, %13
|
||||
%15 = zext i1 %"65" to i32
|
||||
%"223" = add i32 0, %15
|
||||
store i32 %"223", ptr addrspace(5) %"14", align 4
|
||||
%"66" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"29" = xor i1 %"66", true
|
||||
%14 = zext i1 %"29" to i32
|
||||
%"224" = sub i32 0, %14
|
||||
%16 = zext i1 %"29" to i32
|
||||
%"224" = sub i32 0, %16
|
||||
store i32 %"224", ptr addrspace(5) %"15", align 4
|
||||
%15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
|
||||
%"225" = extractvalue { i32, i1 } %15, 0
|
||||
%"69" = extractvalue { i32, i1 } %15, 1
|
||||
%17 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1)
|
||||
%"225" = extractvalue { i32, i1 } %17, 0
|
||||
%"69" = extractvalue { i32, i1 } %17, 1
|
||||
store i32 %"225", ptr addrspace(5) %"16", align 4
|
||||
store i1 %"69", ptr addrspace(5) %"22", align 1
|
||||
%"71" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%16 = zext i1 %"71" to i32
|
||||
%"226" = add i32 0, %16
|
||||
%18 = zext i1 %"71" to i32
|
||||
%"226" = add i32 0, %18
|
||||
store i32 %"226", ptr addrspace(5) %"16", align 4
|
||||
%"72" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"30" = xor i1 %"72", true
|
||||
%17 = zext i1 %"30" to i32
|
||||
%"227" = sub i32 0, %17
|
||||
%19 = zext i1 %"30" to i32
|
||||
%"227" = sub i32 0, %19
|
||||
store i32 %"227", ptr addrspace(5) %"17", align 4
|
||||
%18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"228" = extractvalue { i32, i1 } %18, 0
|
||||
%"75" = extractvalue { i32, i1 } %18, 1
|
||||
%20 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"228" = extractvalue { i32, i1 } %20, 0
|
||||
%"75" = extractvalue { i32, i1 } %20, 1
|
||||
store i32 %"228", ptr addrspace(5) %"18", align 4
|
||||
store i1 %"75", ptr addrspace(5) %"22", align 1
|
||||
%"76" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"31" = xor i1 %"76", true
|
||||
%19 = zext i1 %"31" to i32
|
||||
%20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
|
||||
%21 = extractvalue { i32, i1 } %20, 0
|
||||
%22 = extractvalue { i32, i1 } %20, 1
|
||||
%23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19)
|
||||
%"229" = extractvalue { i32, i1 } %23, 0
|
||||
%24 = extractvalue { i32, i1 } %23, 1
|
||||
%"32" = xor i1 %22, %24
|
||||
%21 = zext i1 %"31" to i32
|
||||
%22 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0)
|
||||
%23 = extractvalue { i32, i1 } %22, 0
|
||||
%24 = extractvalue { i32, i1 } %22, 1
|
||||
%25 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %23, i32 %21)
|
||||
%"229" = extractvalue { i32, i1 } %25, 0
|
||||
%26 = extractvalue { i32, i1 } %25, 1
|
||||
%"32" = xor i1 %24, %26
|
||||
store i32 %"229", ptr addrspace(5) %"18", align 4
|
||||
%"78" = xor i1 %"32", true
|
||||
store i1 %"78", ptr addrspace(5) %"22", align 1
|
||||
%"80" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%25 = zext i1 %"80" to i32
|
||||
%"230" = add i32 0, %25
|
||||
%27 = zext i1 %"80" to i32
|
||||
%"230" = add i32 0, %27
|
||||
store i32 %"230", ptr addrspace(5) %"18", align 4
|
||||
%"81" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"33" = xor i1 %"81", true
|
||||
%26 = zext i1 %"33" to i32
|
||||
%"231" = sub i32 0, %26
|
||||
%28 = zext i1 %"33" to i32
|
||||
%"231" = sub i32 0, %28
|
||||
store i32 %"231", ptr addrspace(5) %"19", align 4
|
||||
%27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"232" = extractvalue { i32, i1 } %27, 0
|
||||
%"84" = extractvalue { i32, i1 } %27, 1
|
||||
%29 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
|
||||
%"232" = extractvalue { i32, i1 } %29, 0
|
||||
%"84" = extractvalue { i32, i1 } %29, 1
|
||||
store i32 %"232", ptr addrspace(5) %"20", align 4
|
||||
store i1 %"84", ptr addrspace(5) %"22", align 1
|
||||
%"85" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"34" = xor i1 %"85", true
|
||||
%28 = zext i1 %"34" to i32
|
||||
%29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
|
||||
%30 = extractvalue { i32, i1 } %29, 0
|
||||
%31 = extractvalue { i32, i1 } %29, 1
|
||||
%32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28)
|
||||
%"233" = extractvalue { i32, i1 } %32, 0
|
||||
%33 = extractvalue { i32, i1 } %32, 1
|
||||
%"35" = xor i1 %31, %33
|
||||
%30 = zext i1 %"34" to i32
|
||||
%31 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1)
|
||||
%32 = extractvalue { i32, i1 } %31, 0
|
||||
%33 = extractvalue { i32, i1 } %31, 1
|
||||
%34 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %32, i32 %30)
|
||||
%"233" = extractvalue { i32, i1 } %34, 0
|
||||
%35 = extractvalue { i32, i1 } %34, 1
|
||||
%"35" = xor i1 %33, %35
|
||||
store i32 %"233", ptr addrspace(5) %"20", align 4
|
||||
%"87" = xor i1 %"35", true
|
||||
store i1 %"87", ptr addrspace(5) %"22", align 1
|
||||
%"89" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%34 = zext i1 %"89" to i32
|
||||
%"234" = add i32 0, %34
|
||||
%36 = zext i1 %"89" to i32
|
||||
%"234" = add i32 0, %36
|
||||
store i32 %"234", ptr addrspace(5) %"20", align 4
|
||||
%"90" = load i1, ptr addrspace(5) %"22", align 1
|
||||
%"36" = xor i1 %"90", true
|
||||
%35 = zext i1 %"36" to i32
|
||||
%"235" = sub i32 0, %35
|
||||
%37 = zext i1 %"36" to i32
|
||||
%"235" = sub i32 0, %37
|
||||
store i32 %"235", ptr addrspace(5) %"21", align 4
|
||||
%"92" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"93" = load i32, ptr addrspace(5) %"6", align 4
|
||||
|
@ -172,78 +174,78 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %
|
|||
%"94" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"95" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"238" = inttoptr i64 %"94" to ptr
|
||||
%"270" = getelementptr inbounds i8, ptr %"238", i64 4
|
||||
store i32 %"95", ptr %"270", align 4
|
||||
%"269" = getelementptr inbounds i8, ptr %"238", i64 4
|
||||
store i32 %"95", ptr %"269", align 4
|
||||
%"96" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"97" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"240" = inttoptr i64 %"96" to ptr
|
||||
%"272" = getelementptr inbounds i8, ptr %"240", i64 8
|
||||
store i32 %"97", ptr %"272", align 4
|
||||
%"271" = getelementptr inbounds i8, ptr %"240", i64 8
|
||||
store i32 %"97", ptr %"271", align 4
|
||||
%"98" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"99" = load i32, ptr addrspace(5) %"12", align 4
|
||||
%"242" = inttoptr i64 %"98" to ptr
|
||||
%"274" = getelementptr inbounds i8, ptr %"242", i64 12
|
||||
store i32 %"99", ptr %"274", align 4
|
||||
%"273" = getelementptr inbounds i8, ptr %"242", i64 12
|
||||
store i32 %"99", ptr %"273", align 4
|
||||
%"100" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"101" = load i32, ptr addrspace(5) %"14", align 4
|
||||
%"244" = inttoptr i64 %"100" to ptr
|
||||
%"276" = getelementptr inbounds i8, ptr %"244", i64 16
|
||||
store i32 %"101", ptr %"276", align 4
|
||||
%"275" = getelementptr inbounds i8, ptr %"244", i64 16
|
||||
store i32 %"101", ptr %"275", align 4
|
||||
%"102" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"103" = load i32, ptr addrspace(5) %"16", align 4
|
||||
%"246" = inttoptr i64 %"102" to ptr
|
||||
%"278" = getelementptr inbounds i8, ptr %"246", i64 20
|
||||
store i32 %"103", ptr %"278", align 4
|
||||
%"277" = getelementptr inbounds i8, ptr %"246", i64 20
|
||||
store i32 %"103", ptr %"277", align 4
|
||||
%"104" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"105" = load i32, ptr addrspace(5) %"18", align 4
|
||||
%"248" = inttoptr i64 %"104" to ptr
|
||||
%"280" = getelementptr inbounds i8, ptr %"248", i64 24
|
||||
store i32 %"105", ptr %"280", align 4
|
||||
%"279" = getelementptr inbounds i8, ptr %"248", i64 24
|
||||
store i32 %"105", ptr %"279", align 4
|
||||
%"106" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"107" = load i32, ptr addrspace(5) %"20", align 4
|
||||
%"250" = inttoptr i64 %"106" to ptr
|
||||
%"282" = getelementptr inbounds i8, ptr %"250", i64 28
|
||||
store i32 %"107", ptr %"282", align 4
|
||||
%"281" = getelementptr inbounds i8, ptr %"250", i64 28
|
||||
store i32 %"107", ptr %"281", align 4
|
||||
%"108" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"109" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"252" = inttoptr i64 %"108" to ptr
|
||||
%"284" = getelementptr inbounds i8, ptr %"252", i64 32
|
||||
store i32 %"109", ptr %"284", align 4
|
||||
%"283" = getelementptr inbounds i8, ptr %"252", i64 32
|
||||
store i32 %"109", ptr %"283", align 4
|
||||
%"110" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"111" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"254" = inttoptr i64 %"110" to ptr
|
||||
%"286" = getelementptr inbounds i8, ptr %"254", i64 36
|
||||
store i32 %"111", ptr %"286", align 4
|
||||
%"285" = getelementptr inbounds i8, ptr %"254", i64 36
|
||||
store i32 %"111", ptr %"285", align 4
|
||||
%"112" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"113" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%"256" = inttoptr i64 %"112" to ptr
|
||||
%"288" = getelementptr inbounds i8, ptr %"256", i64 40
|
||||
store i32 %"113", ptr %"288", align 4
|
||||
%"287" = getelementptr inbounds i8, ptr %"256", i64 40
|
||||
store i32 %"113", ptr %"287", align 4
|
||||
%"114" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"115" = load i32, ptr addrspace(5) %"13", align 4
|
||||
%"258" = inttoptr i64 %"114" to ptr
|
||||
%"290" = getelementptr inbounds i8, ptr %"258", i64 44
|
||||
store i32 %"115", ptr %"290", align 4
|
||||
%"289" = getelementptr inbounds i8, ptr %"258", i64 44
|
||||
store i32 %"115", ptr %"289", align 4
|
||||
%"116" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"117" = load i32, ptr addrspace(5) %"15", align 4
|
||||
%"260" = inttoptr i64 %"116" to ptr
|
||||
%"292" = getelementptr inbounds i8, ptr %"260", i64 48
|
||||
store i32 %"117", ptr %"292", align 4
|
||||
%"291" = getelementptr inbounds i8, ptr %"260", i64 48
|
||||
store i32 %"117", ptr %"291", align 4
|
||||
%"118" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"119" = load i32, ptr addrspace(5) %"17", align 4
|
||||
%"262" = inttoptr i64 %"118" to ptr
|
||||
%"294" = getelementptr inbounds i8, ptr %"262", i64 52
|
||||
store i32 %"119", ptr %"294", align 4
|
||||
%"293" = getelementptr inbounds i8, ptr %"262", i64 52
|
||||
store i32 %"119", ptr %"293", align 4
|
||||
%"120" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"121" = load i32, ptr addrspace(5) %"19", align 4
|
||||
%"264" = inttoptr i64 %"120" to ptr
|
||||
%"296" = getelementptr inbounds i8, ptr %"264", i64 56
|
||||
store i32 %"121", ptr %"296", align 4
|
||||
%"295" = getelementptr inbounds i8, ptr %"264", i64 56
|
||||
store i32 %"121", ptr %"295", align 4
|
||||
%"122" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"123" = load i32, ptr addrspace(5) %"21", align 4
|
||||
%"266" = inttoptr i64 %"122" to ptr
|
||||
%"298" = getelementptr inbounds i8, ptr %"266", i64 60
|
||||
store i32 %"123", ptr %"298", align 4
|
||||
%"297" = getelementptr inbounds i8, ptr %"266", i64 60
|
||||
store i32 %"123", ptr %"297", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
@ -17,8 +19,8 @@ define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr
|
|||
%"10" = load i32, ptr %"18", align 4
|
||||
store i32 %"10", ptr addrspace(5) %"6", align 4
|
||||
%"13" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false)
|
||||
store i32 %0, ptr addrspace(5) %"6", align 4
|
||||
%2 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false)
|
||||
store i32 %2, ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"15" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"19" = inttoptr i64 %"14" to ptr
|
||||
|
|
|
@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8
|
||||
|
||||
define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
"52":
|
||||
%"11" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i16, align 2, addrspace(5)
|
||||
%"8" = alloca i16, align 2, addrspace(5)
|
||||
%"9" = alloca i16, align 2, addrspace(5)
|
||||
%"10" = alloca i16, align 2, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"12" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"12", ptr addrspace(5) %"5", align 8
|
||||
%"13" = load i64, ptr addrspace(4) %"39", align 8
|
||||
|
@ -32,18 +34,18 @@ define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", pt
|
|||
%"20" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"21" = load i16, ptr addrspace(5) %"8", align 2
|
||||
%"46" = inttoptr i64 %"20" to ptr
|
||||
%"60" = getelementptr inbounds i8, ptr %"46", i64 2
|
||||
store i16 %"21", ptr %"60", align 2
|
||||
%"59" = getelementptr inbounds i8, ptr %"46", i64 2
|
||||
store i16 %"21", ptr %"59", align 2
|
||||
%"22" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"23" = load i16, ptr addrspace(5) %"9", align 2
|
||||
%"48" = inttoptr i64 %"22" to ptr
|
||||
%"62" = getelementptr inbounds i8, ptr %"48", i64 4
|
||||
store i16 %"23", ptr %"62", align 2
|
||||
%"61" = getelementptr inbounds i8, ptr %"48", i64 4
|
||||
store i16 %"23", ptr %"61", align 2
|
||||
%"24" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"25" = load i16, ptr addrspace(5) %"10", align 2
|
||||
%"50" = inttoptr i64 %"24" to ptr
|
||||
%"64" = getelementptr inbounds i8, ptr %"50", i64 6
|
||||
store i16 %"25", ptr %"64", align 2
|
||||
%"63" = getelementptr inbounds i8, ptr %"50", i64 6
|
||||
store i16 %"25", ptr %"63", align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"21":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"21":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
|
|
@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0
|
||||
|
||||
define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
|
||||
"56":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"46", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"47", align 8
|
||||
|
@ -27,8 +29,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
|
|||
store float %"15", ptr addrspace(1) %"49", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"50" = inttoptr i64 %"17" to ptr addrspace(1)
|
||||
%"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4
|
||||
%"16" = load float, ptr addrspace(1) %"61", align 4
|
||||
%"60" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4
|
||||
%"16" = load float, ptr addrspace(1) %"60", align 4
|
||||
store float %"16", ptr addrspace(5) %"6", align 4
|
||||
%"19" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19")
|
||||
|
@ -36,12 +38,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
|
|||
%"20" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"21" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"51" = inttoptr i64 %"20" to ptr addrspace(1)
|
||||
%"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4
|
||||
store float %"21", ptr addrspace(1) %"63", align 4
|
||||
%"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4
|
||||
store float %"21", ptr addrspace(1) %"62", align 4
|
||||
%"23" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"52" = inttoptr i64 %"23" to ptr addrspace(1)
|
||||
%"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8
|
||||
%"22" = load float, ptr addrspace(1) %"65", align 4
|
||||
%"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8
|
||||
%"22" = load float, ptr addrspace(1) %"64", align 4
|
||||
store float %"22", ptr addrspace(5) %"6", align 4
|
||||
%"25" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25")
|
||||
|
@ -49,12 +51,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
|
|||
%"26" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"27" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"53" = inttoptr i64 %"26" to ptr addrspace(1)
|
||||
%"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8
|
||||
store float %"27", ptr addrspace(1) %"67", align 4
|
||||
%"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8
|
||||
store float %"27", ptr addrspace(1) %"66", align 4
|
||||
%"29" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"54" = inttoptr i64 %"29" to ptr addrspace(1)
|
||||
%"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12
|
||||
%"28" = load float, ptr addrspace(1) %"69", align 4
|
||||
%"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12
|
||||
%"28" = load float, ptr addrspace(1) %"68", align 4
|
||||
store float %"28", ptr addrspace(5) %"6", align 4
|
||||
%"31" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31")
|
||||
|
@ -62,8 +64,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46"
|
|||
%"32" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"33" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"55" = inttoptr i64 %"32" to ptr addrspace(1)
|
||||
%"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12
|
||||
store float %"33", ptr addrspace(1) %"71", align 4
|
||||
%"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12
|
||||
store float %"33", ptr addrspace(1) %"70", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca half, align 2, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -10,15 +10,17 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0
|
|||
declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0
|
||||
|
||||
define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 {
|
||||
"75":
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"11" = load i64, ptr addrspace(4) %"49", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"4", align 8
|
||||
%"12" = load i64, ptr addrspace(4) %"50", align 8
|
||||
|
@ -29,18 +31,18 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4
|
|||
store i32 %"51", ptr addrspace(5) %"6", align 4
|
||||
%"16" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"53" = inttoptr i64 %"16" to ptr
|
||||
%"89" = getelementptr inbounds i8, ptr %"53", i64 4
|
||||
%"54" = load i32, ptr %"89", align 4
|
||||
%"88" = getelementptr inbounds i8, ptr %"53", i64 4
|
||||
%"54" = load i32, ptr %"88", align 4
|
||||
store i32 %"54", ptr addrspace(5) %"7", align 4
|
||||
%"18" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"55" = inttoptr i64 %"18" to ptr
|
||||
%"91" = getelementptr inbounds i8, ptr %"55", i64 8
|
||||
%"56" = load i32, ptr %"91", align 4
|
||||
%"90" = getelementptr inbounds i8, ptr %"55", i64 8
|
||||
%"56" = load i32, ptr %"90", align 4
|
||||
store i32 %"56", ptr addrspace(5) %"8", align 4
|
||||
%"20" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"57" = inttoptr i64 %"20" to ptr
|
||||
%"93" = getelementptr inbounds i8, ptr %"57", i64 12
|
||||
%"58" = load i32, ptr %"93", align 4
|
||||
%"92" = getelementptr inbounds i8, ptr %"57", i64 12
|
||||
%"58" = load i32, ptr %"92", align 4
|
||||
store i32 %"58", ptr addrspace(5) %"9", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22")
|
||||
|
@ -66,21 +68,21 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4
|
|||
%"31" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"32" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"69" = inttoptr i64 %"31" to ptr addrspace(1)
|
||||
%"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4
|
||||
%"94" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4
|
||||
%"70" = bitcast i32 %"32" to float
|
||||
store float %"70", ptr addrspace(1) %"95", align 4
|
||||
store float %"70", ptr addrspace(1) %"94", align 4
|
||||
%"33" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"34" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"71" = inttoptr i64 %"33" to ptr addrspace(1)
|
||||
%"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8
|
||||
%"96" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8
|
||||
%"72" = bitcast i32 %"34" to float
|
||||
store float %"72", ptr addrspace(1) %"97", align 4
|
||||
store float %"72", ptr addrspace(1) %"96", align 4
|
||||
%"35" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"36" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"73" = inttoptr i64 %"35" to ptr addrspace(1)
|
||||
%"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12
|
||||
%"98" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12
|
||||
%"74" = bitcast i32 %"36" to float
|
||||
store float %"74", ptr addrspace(1) %"99", align 4
|
||||
store float %"74", ptr addrspace(1) %"98", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"21":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca double, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
|
||||
"33":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"27", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27",
|
|||
store float %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"30" = inttoptr i64 %"14" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"30", i64 4
|
||||
%"13" = load float, ptr %"35", align 4
|
||||
%"34" = getelementptr inbounds i8, ptr %"30", i64 4
|
||||
%"13" = load float, ptr %"34", align 4
|
||||
store float %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"15" = call float @llvm.rint.f32(float %"16")
|
||||
|
@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27",
|
|||
%"21" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"22" = load float, ptr addrspace(5) %"7", align 4
|
||||
%"32" = inttoptr i64 %"21" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"32", i64 4
|
||||
store float %"22", ptr %"37", align 4
|
||||
%"36" = getelementptr inbounds i8, ptr %"32", i64 4
|
||||
store float %"22", ptr %"36", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 {
|
||||
"33":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"27", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27",
|
|||
store float %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"30" = inttoptr i64 %"14" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"30", i64 4
|
||||
%"13" = load float, ptr %"35", align 4
|
||||
%"34" = getelementptr inbounds i8, ptr %"30", i64 4
|
||||
%"13" = load float, ptr %"34", align 4
|
||||
store float %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"15" = call float @llvm.trunc.f32(float %"16")
|
||||
|
@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27",
|
|||
%"21" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"22" = load float, ptr addrspace(5) %"7", align 4
|
||||
%"32" = inttoptr i64 %"21" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"32", i64 4
|
||||
store float %"22", ptr %"37", align 4
|
||||
%"36" = getelementptr inbounds i8, ptr %"32", i64 4
|
||||
store float %"22", ptr %"36", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"23":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
@ -18,8 +20,8 @@ define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17
|
|||
%"11" = load i32, ptr addrspace(1) %"19", align 4
|
||||
store i32 %"11", ptr addrspace(5) %"7", align 4
|
||||
%"14" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"25" = trunc i32 %"14" to i8
|
||||
%"20" = sext i8 %"25" to i16
|
||||
%"24" = trunc i32 %"14" to i8
|
||||
%"20" = sext i8 %"24" to i16
|
||||
%"13" = sext i16 %"20" to i32
|
||||
store i32 %"13", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"5", align 8
|
||||
|
|
|
@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0
|
||||
|
||||
define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 {
|
||||
"41":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"27", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
|
@ -22,8 +24,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2
|
|||
store i32 %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"31" = inttoptr i64 %"14" to ptr
|
||||
%"46" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"32" = load float, ptr %"46", align 4
|
||||
%"45" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"32" = load float, ptr %"45", align 4
|
||||
%"13" = bitcast float %"32" to i32
|
||||
store i32 %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"6", align 4
|
||||
|
@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2
|
|||
%"21" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"39" = inttoptr i64 %"21" to ptr addrspace(1)
|
||||
%"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4
|
||||
store i32 %"22", ptr addrspace(1) %"48", align 4
|
||||
%"47" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4
|
||||
store i32 %"22", ptr addrspace(1) %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"23":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -2,14 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 {
|
||||
"34":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
%2 = alloca i32, align 4, addrspace(5)
|
||||
%3 = alloca i32, align 4, addrspace(5)
|
||||
br label %4
|
||||
|
||||
4: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"26", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"27", align 8
|
||||
|
@ -19,18 +24,15 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2
|
|||
%"12" = load i32, ptr %"28", align 4
|
||||
store i32 %"12", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%0 = call i32 @llvm.smax.i32(i32 %"15", i32 0)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
store i32 %0, ptr addrspace(5) %1, align 4
|
||||
%5 = call i32 @llvm.smax.i32(i32 %"15", i32 0)
|
||||
store i32 %5, ptr addrspace(5) %1, align 4
|
||||
%"14" = load i32, ptr addrspace(5) %1, align 4
|
||||
store i32 %"14", ptr addrspace(5) %"7", align 4
|
||||
%"17" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%2 = alloca i32, align 4, addrspace(5)
|
||||
store i32 %"17", ptr addrspace(5) %2, align 4
|
||||
%"29" = load i32, ptr addrspace(5) %2, align 4
|
||||
store i32 %"29", ptr addrspace(5) %"7", align 4
|
||||
%"19" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%3 = alloca i32, align 4, addrspace(5)
|
||||
store i32 %"19", ptr addrspace(5) %3, align 4
|
||||
%"30" = load i32, ptr addrspace(5) %3, align 4
|
||||
store i32 %"30", ptr addrspace(5) %"8", align 4
|
||||
|
@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2
|
|||
%"22" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"33" = inttoptr i64 %"22" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"33", i64 4
|
||||
store i32 %"23", ptr %"36", align 4
|
||||
%"35" = getelementptr inbounds i8, ptr %"33", i64 4
|
||||
store i32 %"23", ptr %"35", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"23":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i16, align 2, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -2,25 +2,27 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
|
||||
"26":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"19", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"5", align 8
|
||||
%"11" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%0 = inttoptr i64 %"11" to ptr
|
||||
%1 = addrspacecast ptr %0 to ptr addrspace(1)
|
||||
%"20" = ptrtoint ptr addrspace(1) %1 to i64
|
||||
%2 = inttoptr i64 %"11" to ptr
|
||||
%3 = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
%"20" = ptrtoint ptr addrspace(1) %3 to i64
|
||||
store i64 %"20", ptr addrspace(5) %"4", align 8
|
||||
%"13" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%2 = inttoptr i64 %"13" to ptr
|
||||
%3 = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
%"22" = ptrtoint ptr addrspace(1) %3 to i64
|
||||
%4 = inttoptr i64 %"13" to ptr
|
||||
%5 = addrspacecast ptr %4 to ptr addrspace(1)
|
||||
%"22" = ptrtoint ptr addrspace(1) %5 to i64
|
||||
store i64 %"22", ptr addrspace(5) %"5", align 8
|
||||
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"24" = inttoptr i64 %"15" to ptr addrspace(1)
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"27":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22
|
|||
store float %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load float, ptr %"29", align 4
|
||||
%"28" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load float, ptr %"28", align 4
|
||||
store float %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load float, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0
|
||||
|
||||
define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
|
||||
"38":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"29", align 8
|
||||
|
@ -22,13 +24,13 @@ define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr
|
|||
store i32 %"12", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"31" = inttoptr i64 %"15" to ptr
|
||||
%"45" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load i32, ptr %"45", align 4
|
||||
%"44" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load i32, ptr %"44", align 4
|
||||
store i32 %"14", ptr addrspace(5) %"7", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"32" = inttoptr i64 %"17" to ptr
|
||||
%"47" = getelementptr inbounds i8, ptr %"32", i64 8
|
||||
%"16" = load i32, ptr %"47", align 4
|
||||
%"46" = getelementptr inbounds i8, ptr %"32", i64 8
|
||||
%"16" = load i32, ptr %"46", align 4
|
||||
store i32 %"16", ptr addrspace(5) %"8", align 4
|
||||
%"19" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"20" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
|
||||
"56":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"46", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"47", align 8
|
||||
|
@ -25,8 +27,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
|
|||
store float %"15", ptr %"49", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"50" = inttoptr i64 %"17" to ptr
|
||||
%"58" = getelementptr inbounds i8, ptr %"50", i64 4
|
||||
%"16" = load float, ptr %"58", align 4
|
||||
%"57" = getelementptr inbounds i8, ptr %"50", i64 4
|
||||
%"16" = load float, ptr %"57", align 4
|
||||
store float %"16", ptr addrspace(5) %"6", align 4
|
||||
%"19" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"18" = call afn float @llvm.exp2.f32(float %"19")
|
||||
|
@ -34,12 +36,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
|
|||
%"20" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"21" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"51" = inttoptr i64 %"20" to ptr
|
||||
%"60" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
store float %"21", ptr %"60", align 4
|
||||
%"59" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
store float %"21", ptr %"59", align 4
|
||||
%"23" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"52" = inttoptr i64 %"23" to ptr
|
||||
%"62" = getelementptr inbounds i8, ptr %"52", i64 8
|
||||
%"22" = load float, ptr %"62", align 4
|
||||
%"61" = getelementptr inbounds i8, ptr %"52", i64 8
|
||||
%"22" = load float, ptr %"61", align 4
|
||||
store float %"22", ptr addrspace(5) %"6", align 4
|
||||
%"25" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"24" = call afn float @llvm.exp2.f32(float %"25")
|
||||
|
@ -47,12 +49,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
|
|||
%"26" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"27" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"53" = inttoptr i64 %"26" to ptr
|
||||
%"64" = getelementptr inbounds i8, ptr %"53", i64 8
|
||||
store float %"27", ptr %"64", align 4
|
||||
%"63" = getelementptr inbounds i8, ptr %"53", i64 8
|
||||
store float %"27", ptr %"63", align 4
|
||||
%"29" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"54" = inttoptr i64 %"29" to ptr
|
||||
%"66" = getelementptr inbounds i8, ptr %"54", i64 12
|
||||
%"28" = load float, ptr %"66", align 4
|
||||
%"65" = getelementptr inbounds i8, ptr %"54", i64 12
|
||||
%"28" = load float, ptr %"65", align 4
|
||||
store float %"28", ptr addrspace(5) %"6", align 4
|
||||
%"31" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"30" = call afn float @llvm.exp2.f32(float %"31")
|
||||
|
@ -60,8 +62,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr
|
|||
%"32" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"33" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"55" = inttoptr i64 %"32" to ptr
|
||||
%"68" = getelementptr inbounds i8, ptr %"55", i64 12
|
||||
store float %"33", ptr %"68", align 4
|
||||
%"67" = getelementptr inbounds i8, ptr %"55", i64 12
|
||||
store float %"33", ptr %"67", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@shared_mem = external hidden addrspace(3) global [0 x i32]
|
||||
|
||||
define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"23":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"5", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -3,28 +3,32 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
|
||||
@shared_mem = external hidden addrspace(3) global [0 x i32], align 4
|
||||
|
||||
define private void @"2"(ptr addrspace(3) %"35") #0 {
|
||||
"33":
|
||||
define private void @"2"(ptr addrspace(3) %"33") #0 {
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"3" = alloca i64, align 8, addrspace(5)
|
||||
%"12" = load i64, ptr addrspace(3) %"35", align 8
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"12" = load i64, ptr addrspace(3) %"33", align 8
|
||||
store i64 %"12", ptr addrspace(5) %"3", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"3", align 8
|
||||
%"13" = add i64 %"14", 2
|
||||
store i64 %"13", ptr addrspace(5) %"3", align 8
|
||||
%"15" = load i64, ptr addrspace(5) %"3", align 8
|
||||
store i64 %"15", ptr addrspace(3) %"35", align 8
|
||||
store i64 %"15", ptr addrspace(3) %"33", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
|
||||
"34":
|
||||
%"11" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"16" = load i64, ptr addrspace(4) %"25", align 8
|
||||
store i64 %"16", ptr addrspace(5) %"7", align 8
|
||||
%"17" = load i64, ptr addrspace(4) %"26", align 8
|
||||
|
|
|
@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 {
|
||||
"34":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
%"8" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"28", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"29", align 8
|
||||
|
@ -20,13 +22,13 @@ define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr
|
|||
store float %"12", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"31" = inttoptr i64 %"15" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load float, ptr %"36", align 4
|
||||
%"35" = getelementptr inbounds i8, ptr %"31", i64 4
|
||||
%"14" = load float, ptr %"35", align 4
|
||||
store float %"14", ptr addrspace(5) %"7", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"32" = inttoptr i64 %"17" to ptr
|
||||
%"38" = getelementptr inbounds i8, ptr %"32", i64 8
|
||||
%"16" = load float, ptr %"38", align 4
|
||||
%"37" = getelementptr inbounds i8, ptr %"32", i64 8
|
||||
%"16" = load float, ptr %"37", align 4
|
||||
store float %"16", ptr addrspace(5) %"8", align 4
|
||||
%"19" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"20" = load float, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define private float @"1"(float %"15", float %"16") #0 {
|
||||
"38":
|
||||
%"3" = alloca float, align 4, addrspace(5)
|
||||
%"4" = alloca float, align 4, addrspace(5)
|
||||
%"2" = alloca float, align 4, addrspace(5)
|
||||
%"13" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"13", align 1
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store float %"15", ptr addrspace(5) %"3", align 4
|
||||
store float %"16", ptr addrspace(5) %"4", align 4
|
||||
store i1 false, ptr addrspace(5) %"13", align 1
|
||||
%"18" = load float, ptr addrspace(5) %"3", align 4
|
||||
%"19" = load float, ptr addrspace(5) %"4", align 4
|
||||
%"17" = fadd float %"18", %"19"
|
||||
|
@ -19,14 +21,16 @@ define private float @"1"(float %"15", float %"16") #0 {
|
|||
}
|
||||
|
||||
define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
"39":
|
||||
%"14" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"14", align 1
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i64, align 8, addrspace(5)
|
||||
%"10" = alloca i64, align 8, addrspace(5)
|
||||
%"11" = alloca i64, align 8, addrspace(5)
|
||||
%"12" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"14", align 1
|
||||
%"21" = load i64, ptr addrspace(4) %"34", align 8
|
||||
store i64 %"21", ptr addrspace(5) %"8", align 8
|
||||
%"22" = load i64, ptr addrspace(4) %"35", align 8
|
||||
|
|
|
@ -5,18 +5,20 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)]
|
||||
|
||||
define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
|
||||
"57":
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"11" = load i64, ptr addrspace(4) %"47", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"7", align 8
|
||||
%0 = alloca i32, align 4, addrspace(5)
|
||||
store i32 1, ptr addrspace(5) %0, align 4
|
||||
%"12" = load i32, ptr addrspace(5) %0, align 4
|
||||
store i32 1, ptr addrspace(5) %1, align 4
|
||||
%"12" = load i32, ptr addrspace(5) %1, align 4
|
||||
store i32 %"12", ptr addrspace(5) %"8", align 4
|
||||
%"13" = load i64, ptr addrspace(1) @bar, align 8
|
||||
store i64 %"13", ptr addrspace(5) %"6", align 8
|
||||
|
|
|
@ -5,15 +5,17 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
@foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer]
|
||||
|
||||
define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"21":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8
|
||||
%"10" = load i64, ptr addrspace(5) %0, align 8
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %1, align 8
|
||||
%"10" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"10", ptr addrspace(5) %"6", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"7", align 8
|
||||
|
|
|
@ -4,14 +4,17 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0
|
||||
|
||||
define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 {
|
||||
"39":
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"14" = load i64, ptr addrspace(4) %"27", align 8
|
||||
store i64 %"14", ptr addrspace(5) %"4", align 8
|
||||
%"15" = load i64, ptr addrspace(4) %"28", align 8
|
||||
|
@ -24,9 +27,8 @@ define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"2
|
|||
%"31" = add i32 %"19", 1
|
||||
store i32 %"31", ptr addrspace(5) %"7", align 4
|
||||
%"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt()
|
||||
%0 = alloca i32, align 4, addrspace(5)
|
||||
store i32 %"11", ptr addrspace(5) %0, align 4
|
||||
%"33" = load i32, ptr addrspace(5) %0, align 4
|
||||
store i32 %"11", ptr addrspace(5) %1, align 4
|
||||
%"33" = load i32, ptr addrspace(5) %1, align 4
|
||||
store i32 %"33", ptr addrspace(5) %"8", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
|
||||
"18":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"14", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"15", align 8
|
||||
|
|
|
@ -2,31 +2,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"22":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"5", align 8
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 81985529216486895, ptr addrspace(5) %0, align 8
|
||||
%"10" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 81985529216486895, ptr addrspace(5) %1, align 8
|
||||
%"10" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"10", ptr addrspace(5) %"6", align 8
|
||||
%"12" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"19" = inttoptr i64 %"12" to ptr addrspace(1)
|
||||
%"18" = load float, ptr addrspace(1) %"19", align 4
|
||||
%"23" = bitcast float %"18" to i32
|
||||
%"11" = zext i32 %"23" to i64
|
||||
%"22" = bitcast float %"18" to i32
|
||||
%"11" = zext i32 %"22" to i64
|
||||
store i64 %"11", ptr addrspace(5) %"6", align 8
|
||||
%"13" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"20" = inttoptr i64 %"13" to ptr addrspace(1)
|
||||
%"25" = trunc i64 %"14" to i32
|
||||
%"21" = bitcast i32 %"25" to float
|
||||
%"24" = trunc i64 %"14" to i32
|
||||
%"21" = bitcast i32 %"24" to float
|
||||
store float %"21", ptr addrspace(1) %"20", align 4
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
|
||||
"29":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"23", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"24", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"
|
|||
store i32 %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"26" = inttoptr i64 %"14" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"26", i64 4
|
||||
%"13" = load i32, ptr %"31", align 4
|
||||
%"30" = getelementptr inbounds i8, ptr %"26", i64 4
|
||||
%"13" = load i32, ptr %"30", align 4
|
||||
store i32 %"13", ptr addrspace(5) %"7", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"16" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
@ -29,8 +31,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"
|
|||
%"17" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"18" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"28" = inttoptr i64 %"17" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"28", i64 4
|
||||
store i32 %"18", ptr %"33", align 4
|
||||
%"32" = getelementptr inbounds i8, ptr %"28", i64 4
|
||||
store i32 %"18", ptr %"32", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 {
|
||||
"19":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca [8 x i8], align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"15", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"5", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"16", align 8
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 {
|
||||
"77":
|
||||
%"14" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"14", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -15,6 +13,10 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60"
|
|||
%"11" = alloca i32, align 4, addrspace(5)
|
||||
%"12" = alloca i32, align 4, addrspace(5)
|
||||
%"13" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"14", align 1
|
||||
%"15" = load i64, ptr addrspace(4) %"60", align 8
|
||||
store i64 %"15", ptr addrspace(5) %"4", align 8
|
||||
%"16" = load i64, ptr addrspace(4) %"61", align 8
|
||||
|
@ -25,44 +27,44 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60"
|
|||
store i32 %"62", ptr addrspace(5) %"8", align 4
|
||||
%"20" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"64" = inttoptr i64 %"20" to ptr
|
||||
%"79" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"65" = load i32, ptr %"79", align 4
|
||||
%"78" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"65" = load i32, ptr %"78", align 4
|
||||
store i32 %"65", ptr addrspace(5) %"9", align 4
|
||||
%"22" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"66" = inttoptr i64 %"22" to ptr
|
||||
%"81" = getelementptr inbounds i8, ptr %"66", i64 8
|
||||
%"21" = load i32, ptr %"81", align 4
|
||||
%"80" = getelementptr inbounds i8, ptr %"66", i64 8
|
||||
%"21" = load i32, ptr %"80", align 4
|
||||
store i32 %"21", ptr addrspace(5) %"10", align 4
|
||||
%"25" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"27" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%0 = sext i32 %"25" to i64
|
||||
%1 = sext i32 %"26" to i64
|
||||
%2 = mul nsw i64 %0, %1
|
||||
%3 = lshr i64 %2, 32
|
||||
%4 = trunc i64 %3 to i32
|
||||
%5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27")
|
||||
%"23" = extractvalue { i32, i1 } %5, 0
|
||||
%"24" = extractvalue { i32, i1 } %5, 1
|
||||
%2 = sext i32 %"25" to i64
|
||||
%3 = sext i32 %"26" to i64
|
||||
%4 = mul nsw i64 %2, %3
|
||||
%5 = lshr i64 %4, 32
|
||||
%6 = trunc i64 %5 to i32
|
||||
%7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %6, i32 %"27")
|
||||
%"23" = extractvalue { i32, i1 } %7, 0
|
||||
%"24" = extractvalue { i32, i1 } %7, 1
|
||||
store i32 %"23", ptr addrspace(5) %"7", align 4
|
||||
store i1 %"24", ptr addrspace(5) %"14", align 1
|
||||
%6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2)
|
||||
%"28" = extractvalue { i32, i1 } %6, 0
|
||||
%"29" = extractvalue { i32, i1 } %6, 1
|
||||
%8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2)
|
||||
%"28" = extractvalue { i32, i1 } %8, 0
|
||||
%"29" = extractvalue { i32, i1 } %8, 1
|
||||
store i32 %"28", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"29", ptr addrspace(5) %"14", align 1
|
||||
%"31" = load i1, ptr addrspace(5) %"14", align 1
|
||||
%7 = zext i1 %"31" to i32
|
||||
%"70" = add i32 0, %7
|
||||
%9 = zext i1 %"31" to i32
|
||||
%"70" = add i32 0, %9
|
||||
store i32 %"70", ptr addrspace(5) %"12", align 4
|
||||
%8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1)
|
||||
%"32" = extractvalue { i32, i1 } %8, 0
|
||||
%"33" = extractvalue { i32, i1 } %8, 1
|
||||
%10 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1)
|
||||
%"32" = extractvalue { i32, i1 } %10, 0
|
||||
%"33" = extractvalue { i32, i1 } %10, 1
|
||||
store i32 %"32", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"33", ptr addrspace(5) %"14", align 1
|
||||
%"35" = load i1, ptr addrspace(5) %"14", align 1
|
||||
%9 = zext i1 %"35" to i32
|
||||
%"71" = add i32 0, %9
|
||||
%11 = zext i1 %"35" to i32
|
||||
%"71" = add i32 0, %11
|
||||
store i32 %"71", ptr addrspace(5) %"13", align 4
|
||||
%"36" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"37" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
@ -71,13 +73,13 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60"
|
|||
%"38" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"39" = load i32, ptr addrspace(5) %"12", align 4
|
||||
%"73" = inttoptr i64 %"38" to ptr
|
||||
%"83" = getelementptr inbounds i8, ptr %"73", i64 4
|
||||
store i32 %"39", ptr %"83", align 4
|
||||
%"82" = getelementptr inbounds i8, ptr %"73", i64 4
|
||||
store i32 %"39", ptr %"82", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"41" = load i32, ptr addrspace(5) %"13", align 4
|
||||
%"75" = inttoptr i64 %"40" to ptr
|
||||
%"85" = getelementptr inbounds i8, ptr %"75", i64 8
|
||||
store i32 %"41", ptr %"85", align 4
|
||||
%"84" = getelementptr inbounds i8, ptr %"75", i64 8
|
||||
store i32 %"41", ptr %"84", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 {
|
||||
"75":
|
||||
%"13" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"13", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -14,6 +12,10 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52",
|
|||
%"10" = alloca i32, align 4, addrspace(5)
|
||||
%"11" = alloca i32, align 4, addrspace(5)
|
||||
%"12" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"13", align 1
|
||||
%"14" = load i64, ptr addrspace(4) %"52", align 8
|
||||
store i64 %"14", ptr addrspace(5) %"4", align 8
|
||||
%"15" = load i64, ptr addrspace(4) %"53", align 8
|
||||
|
@ -24,42 +26,42 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52",
|
|||
store i32 %"54", ptr addrspace(5) %"9", align 4
|
||||
%"19" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"56" = inttoptr i64 %"19" to ptr
|
||||
%"77" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||
%"57" = load i32, ptr %"77", align 4
|
||||
%"76" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||
%"57" = load i32, ptr %"76", align 4
|
||||
store i32 %"57", ptr addrspace(5) %"10", align 4
|
||||
%"21" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"58" = inttoptr i64 %"21" to ptr
|
||||
%"79" = getelementptr inbounds i8, ptr %"58", i64 8
|
||||
%"20" = load i64, ptr %"79", align 8
|
||||
%"78" = getelementptr inbounds i8, ptr %"58", i64 8
|
||||
%"20" = load i64, ptr %"78", align 8
|
||||
store i64 %"20", ptr addrspace(5) %"12", align 8
|
||||
%"23" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"59" = inttoptr i64 %"23" to ptr
|
||||
%"81" = getelementptr inbounds i8, ptr %"59", i64 16
|
||||
%"60" = load i32, ptr %"81", align 4
|
||||
%"80" = getelementptr inbounds i8, ptr %"59", i64 16
|
||||
%"60" = load i32, ptr %"80", align 4
|
||||
store i32 %"60", ptr addrspace(5) %"11", align 4
|
||||
%"25" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"27" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%0 = mul i32 %"25", %"26"
|
||||
%"24" = add i32 %0, %"27"
|
||||
%2 = mul i32 %"25", %"26"
|
||||
%"24" = add i32 %2, %"27"
|
||||
store i32 %"24", ptr addrspace(5) %"6", align 4
|
||||
%"29" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"30" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"31" = load i32, ptr addrspace(5) %"11", align 4
|
||||
%1 = sext i32 %"29" to i64
|
||||
%2 = sext i32 %"30" to i64
|
||||
%3 = mul nsw i64 %1, %2
|
||||
%4 = lshr i64 %3, 32
|
||||
%5 = trunc i64 %4 to i32
|
||||
%"28" = add i32 %5, %"31"
|
||||
%3 = sext i32 %"29" to i64
|
||||
%4 = sext i32 %"30" to i64
|
||||
%5 = mul nsw i64 %3, %4
|
||||
%6 = lshr i64 %5, 32
|
||||
%7 = trunc i64 %6 to i32
|
||||
%"28" = add i32 %7, %"31"
|
||||
store i32 %"28", ptr addrspace(5) %"7", align 4
|
||||
%"33" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"34" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"35" = load i64, ptr addrspace(5) %"12", align 8
|
||||
%6 = sext i32 %"33" to i64
|
||||
%7 = sext i32 %"34" to i64
|
||||
%8 = mul nsw i64 %6, %7
|
||||
%"67" = add i64 %8, %"35"
|
||||
%8 = sext i32 %"33" to i64
|
||||
%9 = sext i32 %"34" to i64
|
||||
%10 = mul nsw i64 %8, %9
|
||||
%"67" = add i64 %10, %"35"
|
||||
store i64 %"67", ptr addrspace(5) %"8", align 8
|
||||
%"36" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"37" = load i32, ptr addrspace(5) %"6", align 4
|
||||
|
@ -68,13 +70,13 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52",
|
|||
%"38" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"39" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"72" = inttoptr i64 %"38" to ptr
|
||||
%"83" = getelementptr inbounds i8, ptr %"72", i64 8
|
||||
store i32 %"39", ptr %"83", align 4
|
||||
%"82" = getelementptr inbounds i8, ptr %"72", i64 8
|
||||
store i32 %"39", ptr %"82", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"41" = load i64, ptr addrspace(5) %"8", align 8
|
||||
%"73" = inttoptr i64 %"40" to ptr
|
||||
%"85" = getelementptr inbounds i8, ptr %"73", i64 16
|
||||
store i64 %"41", ptr %"85", align 8
|
||||
%"84" = getelementptr inbounds i8, ptr %"73", i64 16
|
||||
store i64 %"41", ptr %"84", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
||||
"54":
|
||||
%"11" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -12,6 +10,10 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40",
|
|||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
%"10" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"12" = load i64, ptr addrspace(4) %"40", align 8
|
||||
store i64 %"12", ptr addrspace(5) %"4", align 8
|
||||
%"13" = load i64, ptr addrspace(4) %"41", align 8
|
||||
|
@ -22,34 +24,34 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40",
|
|||
store i32 %"42", ptr addrspace(5) %"8", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"44" = inttoptr i64 %"17" to ptr
|
||||
%"56" = getelementptr inbounds i8, ptr %"44", i64 4
|
||||
%"45" = load i32, ptr %"56", align 4
|
||||
%"55" = getelementptr inbounds i8, ptr %"44", i64 4
|
||||
%"45" = load i32, ptr %"55", align 4
|
||||
store i32 %"45", ptr addrspace(5) %"9", align 4
|
||||
%"19" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"46" = inttoptr i64 %"19" to ptr
|
||||
%"58" = getelementptr inbounds i8, ptr %"46", i64 8
|
||||
%"18" = load i32, ptr %"58", align 4
|
||||
%"57" = getelementptr inbounds i8, ptr %"46", i64 8
|
||||
%"18" = load i32, ptr %"57", align 4
|
||||
store i32 %"18", ptr addrspace(5) %"10", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"23" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"24" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%0 = mul i32 %"22", %"23"
|
||||
%1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24")
|
||||
%"20" = extractvalue { i32, i1 } %1, 0
|
||||
%"21" = extractvalue { i32, i1 } %1, 1
|
||||
%2 = mul i32 %"22", %"23"
|
||||
%3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %2, i32 %"24")
|
||||
%"20" = extractvalue { i32, i1 } %3, 0
|
||||
%"21" = extractvalue { i32, i1 } %3, 1
|
||||
store i32 %"20", ptr addrspace(5) %"6", align 4
|
||||
store i1 %"21", ptr addrspace(5) %"11", align 1
|
||||
%"26" = load i1, ptr addrspace(5) %"11", align 1
|
||||
%"27" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%"28" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%2 = sext i32 %"27" to i64
|
||||
%3 = sext i32 %"28" to i64
|
||||
%4 = mul nsw i64 %2, %3
|
||||
%5 = lshr i64 %4, 32
|
||||
%6 = trunc i64 %5 to i32
|
||||
%7 = zext i1 %"26" to i32
|
||||
%8 = add i32 %6, 3
|
||||
%"25" = add i32 %8, %7
|
||||
%4 = sext i32 %"27" to i64
|
||||
%5 = sext i32 %"28" to i64
|
||||
%6 = mul nsw i64 %4, %5
|
||||
%7 = lshr i64 %6, 32
|
||||
%8 = trunc i64 %7 to i32
|
||||
%9 = zext i1 %"26" to i32
|
||||
%10 = add i32 %8, 3
|
||||
%"25" = add i32 %10, %9
|
||||
store i32 %"25", ptr addrspace(5) %"7", align 4
|
||||
%"29" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"30" = load i32, ptr addrspace(5) %"6", align 4
|
||||
|
@ -58,8 +60,8 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40",
|
|||
%"31" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"32" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"53" = inttoptr i64 %"31" to ptr
|
||||
%"60" = getelementptr inbounds i8, ptr %"53", i64 4
|
||||
store i32 %"32", ptr %"60", align 4
|
||||
%"59" = getelementptr inbounds i8, ptr %"53", i64 4
|
||||
store i32 %"32", ptr %"59", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"27":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr
|
|||
store i32 %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"29", align 4
|
||||
%"28" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"28", align 4
|
||||
store i32 %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 {
|
||||
"19":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"14", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"15", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"27":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr
|
|||
store i32 %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"29", align 4
|
||||
%"28" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"28", align 4
|
||||
store i32 %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"21":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
@ -18,9 +21,8 @@ define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr
|
|||
%"11" = load i64, ptr %"19", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"6", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"14", ptr addrspace(5) %0, align 8
|
||||
%"13" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 %"14", ptr addrspace(5) %1, align 8
|
||||
%"13" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"13", ptr addrspace(5) %"7", align 8
|
||||
%"15" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"16" = load i64, ptr addrspace(5) %"7", align 8
|
||||
|
|
|
@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 {
|
||||
"11":
|
||||
%"6" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"6", align 1
|
||||
%"4" = alloca [8 x i8], align 1, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"6", align 1
|
||||
%"10" = ptrtoint ptr addrspace(5) %"4" to i64
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"10", ptr addrspace(5) %0, align 8
|
||||
%"7" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 %"10", ptr addrspace(5) %1, align 8
|
||||
%"7" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"7", ptr addrspace(5) %"5", align 8
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
"49":
|
||||
%"15" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"15", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
|
@ -14,6 +12,12 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
|
|||
%"10" = alloca half, align 2, addrspace(5)
|
||||
%"11" = alloca half, align 2, addrspace(5)
|
||||
%"12" = alloca half, align 2, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
%2 = alloca i64, align 8, addrspace(5)
|
||||
br label %3
|
||||
|
||||
3: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"15", align 1
|
||||
%"16" = load i64, ptr addrspace(4) %"34", align 8
|
||||
store i64 %"16", ptr addrspace(5) %"4", align 8
|
||||
%"17" = load i64, ptr addrspace(4) %"35", align 8
|
||||
|
@ -23,9 +27,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
|
|||
%"18" = load i64, ptr %"36", align 8
|
||||
store i64 %"18", ptr addrspace(5) %"6", align 8
|
||||
%"20" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"20", ptr addrspace(5) %0, align 8
|
||||
%"13" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 %"20", ptr addrspace(5) %1, align 8
|
||||
%"13" = load i64, ptr addrspace(5) %1, align 8
|
||||
%"38" = bitcast i64 %"13" to <2 x i32>
|
||||
%"39" = extractelement <2 x i32> %"38", i32 0
|
||||
%"40" = extractelement <2 x i32> %"38", i32 1
|
||||
|
@ -34,9 +37,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
|
|||
store float %"21", ptr addrspace(5) %"7", align 4
|
||||
store float %"22", ptr addrspace(5) %"8", align 4
|
||||
%"23" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"23", ptr addrspace(5) %1, align 8
|
||||
%"14" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"23", ptr addrspace(5) %2, align 8
|
||||
%"14" = load i64, ptr addrspace(5) %2, align 8
|
||||
%"42" = bitcast i64 %"14" to <4 x i16>
|
||||
%"43" = extractelement <4 x i16> %"42", i32 0
|
||||
%"44" = extractelement <4 x i16> %"42", i32 1
|
||||
|
@ -57,8 +59,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64)
|
|||
%"30" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"31" = load float, ptr addrspace(5) %"7", align 4
|
||||
%"48" = inttoptr i64 %"30" to ptr
|
||||
%"51" = getelementptr inbounds i8, ptr %"48", i64 4
|
||||
store float %"31", ptr %"51", align 4
|
||||
%"50" = getelementptr inbounds i8, ptr %"48", i64 4
|
||||
store float %"31", ptr %"50", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"27":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22",
|
|||
store float %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load float, ptr %"29", align 4
|
||||
%"28" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load float, ptr %"28", align 4
|
||||
store float %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load float, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0
|
||||
|
||||
define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"19", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"19", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"27":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
%"7" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"2
|
|||
store float %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load float, ptr %"29", align 4
|
||||
%"28" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load float, ptr %"28", align 4
|
||||
store float %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load float, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load float, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
|
||||
"29":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"24", align 8
|
||||
|
@ -20,14 +22,14 @@ define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23",
|
|||
store i32 %"12", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"26" = inttoptr i64 %"15" to ptr addrspace(1)
|
||||
%"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4
|
||||
%"14" = load i32, ptr addrspace(1) %"31", align 4
|
||||
%"30" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4
|
||||
%"14" = load i32, ptr addrspace(1) %"30", align 4
|
||||
store i32 %"14", ptr addrspace(5) %"7", align 4
|
||||
%"17" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"18" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%0 = sext i32 %"17" to i64
|
||||
%1 = sext i32 %"18" to i64
|
||||
%"16" = mul nsw i64 %0, %1
|
||||
%2 = sext i32 %"17" to i64
|
||||
%3 = sext i32 %"18" to i64
|
||||
%"16" = mul nsw i64 %2, %3
|
||||
store i64 %"16", ptr addrspace(5) %"8", align 8
|
||||
%"19" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"20" = load i64, ptr addrspace(5) %"8", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 {
|
||||
"22":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"18", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"19", align 8
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
|
|
@ -2,21 +2,23 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"26":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"23", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"5", align 8
|
||||
%"12" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"24" = inttoptr i64 %"12" to ptr addrspace(1)
|
||||
%"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8
|
||||
%"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8
|
||||
%"27" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8
|
||||
%"8" = load <2 x i32>, ptr addrspace(1) %"27", align 8
|
||||
%"13" = extractelement <2 x i32> %"8", i32 0
|
||||
%"14" = extractelement <2 x i32> %"8", i32 1
|
||||
store i32 %"13", ptr addrspace(5) %"6", align 4
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 {
|
||||
"23":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"18", align 8
|
||||
|
|
|
@ -4,13 +4,16 @@ target triple = "amdgcn-amd-amdhsa"
|
|||
declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0
|
||||
|
||||
define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 {
|
||||
"29":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"15" = load i64, ptr addrspace(4) %"25", align 8
|
||||
store i64 %"15", ptr addrspace(5) %"4", align 8
|
||||
%"16" = load i64, ptr addrspace(4) %"26", align 8
|
||||
|
@ -20,9 +23,8 @@ define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr
|
|||
%"17" = load i32, ptr %"27", align 4
|
||||
store i32 %"17", ptr addrspace(5) %"6", align 4
|
||||
%"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0)
|
||||
%0 = alloca i32, align 4, addrspace(5)
|
||||
store i32 %"11", ptr addrspace(5) %0, align 4
|
||||
%"19" = load i32, ptr addrspace(5) %0, align 4
|
||||
store i32 %"11", ptr addrspace(5) %1, align 4
|
||||
%"19" = load i32, ptr addrspace(5) %1, align 4
|
||||
store i32 %"19", ptr addrspace(5) %"7", align 4
|
||||
%"21" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"30":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr a
|
|||
store i64 %"11", ptr addrspace(5) %"6", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"25", i64 8
|
||||
%"13" = load i64, ptr %"32", align 8
|
||||
%"31" = getelementptr inbounds i8, ptr %"25", i64 8
|
||||
%"13" = load i64, ptr %"31", align 8
|
||||
store i64 %"13", ptr addrspace(5) %"7", align 8
|
||||
%"16" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"17" = load i64, ptr addrspace(5) %"7", align 8
|
||||
|
|
|
@ -2,18 +2,20 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 {
|
||||
"28":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"24" = ptrtoint ptr addrspace(4) %"21" to i64
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 %"24", ptr addrspace(5) %0, align 8
|
||||
%"23" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 %"24", ptr addrspace(5) %1, align 8
|
||||
%"23" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"23", ptr addrspace(5) %"4", align 8
|
||||
%"12" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"12" to ptr addrspace(4)
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
|
|
@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
"41":
|
||||
%"14" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"14", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
%"8" = alloca i64, align 8, addrspace(5)
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
%2 = alloca i64, align 8, addrspace(5)
|
||||
br label %3
|
||||
|
||||
3: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"14", align 1
|
||||
%"15" = load i64, ptr addrspace(4) %"36", align 8
|
||||
store i64 %"15", ptr addrspace(5) %"4", align 8
|
||||
%"16" = load i64, ptr addrspace(4) %"37", align 8
|
||||
|
@ -21,8 +25,8 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36",
|
|||
store i64 %"17", ptr addrspace(5) %"6", align 8
|
||||
%"20" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"39" = inttoptr i64 %"20" to ptr
|
||||
%"43" = getelementptr inbounds i8, ptr %"39", i64 8
|
||||
%"19" = load i64, ptr %"43", align 8
|
||||
%"42" = getelementptr inbounds i8, ptr %"39", i64 8
|
||||
%"19" = load i64, ptr %"42", align 8
|
||||
store i64 %"19", ptr addrspace(5) %"7", align 8
|
||||
%"22" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"23" = load i64, ptr addrspace(5) %"7", align 8
|
||||
|
@ -34,21 +38,19 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36",
|
|||
%"26" = load i1, ptr addrspace(5) %"9", align 1
|
||||
br i1 %"26", label %"10", label %"11"
|
||||
|
||||
"10": ; preds = %"41"
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 1, ptr addrspace(5) %0, align 8
|
||||
%"27" = load i64, ptr addrspace(5) %0, align 8
|
||||
"10": ; preds = %3
|
||||
store i64 1, ptr addrspace(5) %1, align 8
|
||||
%"27" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"27", ptr addrspace(5) %"8", align 8
|
||||
br label %"11"
|
||||
|
||||
"11": ; preds = %"10", %"41"
|
||||
"11": ; preds = %"10", %3
|
||||
%"28" = load i1, ptr addrspace(5) %"9", align 1
|
||||
br i1 %"28", label %"13", label %"12"
|
||||
|
||||
"12": ; preds = %"11"
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
store i64 2, ptr addrspace(5) %1, align 8
|
||||
%"29" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 2, ptr addrspace(5) %2, align 8
|
||||
%"29" = load i64, ptr addrspace(5) %2, align 8
|
||||
store i64 %"29", ptr addrspace(5) %"8", align 8
|
||||
br label %"13"
|
||||
|
||||
|
|
|
@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
"43":
|
||||
%"10" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"10", align 1
|
||||
%"11" = load i64, ptr addrspace(4) %"31", align 8
|
||||
store i64 %"11", ptr addrspace(5) %"4", align 8
|
||||
%"12" = load i64, ptr addrspace(4) %"32", align 8
|
||||
|
@ -21,28 +23,28 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr
|
|||
store i32 %"13", ptr addrspace(5) %"6", align 4
|
||||
%"16" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"34" = inttoptr i64 %"16" to ptr
|
||||
%"45" = getelementptr inbounds i8, ptr %"34", i64 4
|
||||
%"15" = load i32, ptr %"45", align 4
|
||||
%"44" = getelementptr inbounds i8, ptr %"34", i64 4
|
||||
%"15" = load i32, ptr %"44", align 4
|
||||
store i32 %"15", ptr addrspace(5) %"7", align 4
|
||||
%"18" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"19" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%0 = bitcast i32 %"18" to <4 x i8>
|
||||
%1 = bitcast i32 %"19" to <4 x i8>
|
||||
%2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||
%"35" = bitcast <4 x i8> %2 to i32
|
||||
%2 = bitcast i32 %"18" to <4 x i8>
|
||||
%3 = bitcast i32 %"19" to <4 x i8>
|
||||
%4 = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||
%"35" = bitcast <4 x i8> %4 to i32
|
||||
store i32 %"35", ptr addrspace(5) %"8", align 4
|
||||
%"21" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%3 = bitcast i32 %"21" to <4 x i8>
|
||||
%4 = bitcast i32 %"22" to <4 x i8>
|
||||
%5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||
%6 = extractelement <4 x i8> %5, i32 0
|
||||
%7 = ashr i8 %6, 7
|
||||
%8 = insertelement <4 x i8> %5, i8 %7, i32 0
|
||||
%9 = extractelement <4 x i8> %8, i32 2
|
||||
%10 = ashr i8 %9, 7
|
||||
%11 = insertelement <4 x i8> %8, i8 %10, i32 2
|
||||
%"38" = bitcast <4 x i8> %11 to i32
|
||||
%5 = bitcast i32 %"21" to <4 x i8>
|
||||
%6 = bitcast i32 %"22" to <4 x i8>
|
||||
%7 = shufflevector <4 x i8> %5, <4 x i8> %6, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||
%8 = extractelement <4 x i8> %7, i32 0
|
||||
%9 = ashr i8 %8, 7
|
||||
%10 = insertelement <4 x i8> %7, i8 %9, i32 0
|
||||
%11 = extractelement <4 x i8> %10, i32 2
|
||||
%12 = ashr i8 %11, 7
|
||||
%13 = insertelement <4 x i8> %10, i8 %12, i32 2
|
||||
%"38" = bitcast <4 x i8> %13 to i32
|
||||
store i32 %"38", ptr addrspace(5) %"9", align 4
|
||||
%"23" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"24" = load i32, ptr addrspace(5) %"8", align 4
|
||||
|
@ -51,8 +53,8 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr
|
|||
%"25" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"26" = load i32, ptr addrspace(5) %"9", align 4
|
||||
%"42" = inttoptr i64 %"25" to ptr
|
||||
%"47" = getelementptr inbounds i8, ptr %"42", i64 4
|
||||
store i32 %"26", ptr %"47", align 4
|
||||
%"46" = getelementptr inbounds i8, ptr %"42", i64 4
|
||||
store i32 %"26", ptr %"46", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,14 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 {
|
||||
"33":
|
||||
%"9" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%1 = alloca i32, align 4, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"9", align 1
|
||||
%"10" = load i64, ptr addrspace(4) %"25", align 8
|
||||
store i64 %"10", ptr addrspace(5) %"4", align 8
|
||||
%"11" = load i64, ptr addrspace(4) %"26", align 8
|
||||
|
@ -20,19 +23,18 @@ define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i
|
|||
store i32 %"12", ptr addrspace(5) %"6", align 4
|
||||
%"15" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"28" = inttoptr i64 %"15" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"28", i64 4
|
||||
%"14" = load i32, ptr %"35", align 4
|
||||
%"34" = getelementptr inbounds i8, ptr %"28", i64 4
|
||||
%"14" = load i32, ptr %"34", align 4
|
||||
store i32 %"14", ptr addrspace(5) %"7", align 4
|
||||
%0 = alloca i32, align 4, addrspace(5)
|
||||
store i32 64, ptr addrspace(5) %0, align 4
|
||||
%"16" = load i32, ptr addrspace(5) %0, align 4
|
||||
store i32 64, ptr addrspace(5) %1, align 4
|
||||
%"16" = load i32, ptr addrspace(5) %1, align 4
|
||||
store i32 %"16", ptr addrspace(5) %"8", align 4
|
||||
%"18" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"19" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%1 = bitcast i32 %"18" to <4 x i8>
|
||||
%2 = bitcast i32 %"19" to <4 x i8>
|
||||
%3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
|
||||
%"29" = bitcast <4 x i8> %3 to i32
|
||||
%3 = bitcast i32 %"18" to <4 x i8>
|
||||
%4 = bitcast i32 %"19" to <4 x i8>
|
||||
%5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
|
||||
%"29" = bitcast <4 x i8> %5 to i32
|
||||
store i32 %"29", ptr addrspace(5) %"7", align 4
|
||||
%"20" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"21" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
|
||||
"33":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca [8 x i8], align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i64, align 8, addrspace(5)
|
||||
%"7" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"23", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"5", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"24", align 8
|
||||
|
@ -22,14 +24,14 @@ define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23"
|
|||
%"27" = addrspacecast ptr addrspace(5) %"4" to ptr
|
||||
store i64 %"18", ptr %"27", align 8
|
||||
%"29" = addrspacecast ptr addrspace(5) %"4" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"29", i64 0
|
||||
%"30" = load i64, ptr %"37", align 8
|
||||
%"36" = getelementptr inbounds i8, ptr %"29", i64 0
|
||||
%"30" = load i64, ptr %"36", align 8
|
||||
store i64 %"30", ptr addrspace(5) %"7", align 8
|
||||
%"15" = load i64, ptr addrspace(5) %"6", align 8
|
||||
%"16" = load i64, ptr addrspace(5) %"7", align 8
|
||||
%"31" = inttoptr i64 %"15" to ptr addrspace(1)
|
||||
%"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0
|
||||
store i64 %"16", ptr addrspace(1) %"39", align 8
|
||||
%"38" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0
|
||||
store i64 %"16", ptr addrspace(1) %"38", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 {
|
||||
"27":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
%"7" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"22", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"23", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr
|
|||
store i32 %"11", ptr addrspace(5) %"6", align 4
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"25" = inttoptr i64 %"14" to ptr
|
||||
%"29" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"29", align 4
|
||||
%"28" = getelementptr inbounds i8, ptr %"25", i64 4
|
||||
%"13" = load i32, ptr %"28", align 4
|
||||
store i32 %"13", ptr addrspace(5) %"7", align 4
|
||||
%"16" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"17" = load i32, ptr addrspace(5) %"7", align 4
|
||||
|
|
|
@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 {
|
||||
"20":
|
||||
%"7" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca double, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"7", align 1
|
||||
%"8" = load i64, ptr addrspace(4) %"16", align 8
|
||||
store i64 %"8", ptr addrspace(5) %"4", align 8
|
||||
%"9" = load i64, ptr addrspace(4) %"17", align 8
|
||||
|
@ -17,8 +19,8 @@ define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", pt
|
|||
%"10" = load double, ptr %"18", align 8
|
||||
store double %"10", ptr addrspace(5) %"6", align 8
|
||||
%"13" = load double, ptr addrspace(5) %"6", align 8
|
||||
%0 = call afn double @llvm.sqrt.f64(double %"13")
|
||||
%"12" = fdiv arcp afn double 1.000000e+00, %0
|
||||
%2 = call afn double @llvm.sqrt.f64(double %"13")
|
||||
%"12" = fdiv arcp afn double 1.000000e+00, %2
|
||||
store double %"12", ptr addrspace(5) %"6", align 8
|
||||
%"14" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"15" = load double, ptr addrspace(5) %"6", align 8
|
||||
|
|
|
@ -2,16 +2,18 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #0 {
|
||||
"15":
|
||||
%"6" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"6", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%1 = alloca i64, align 8, addrspace(5)
|
||||
br label %2
|
||||
|
||||
2: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"6", align 1
|
||||
%"7" = load i64, ptr addrspace(4) %"13", align 8
|
||||
store i64 %"7", ptr addrspace(5) %"4", align 8
|
||||
%0 = alloca i64, align 8, addrspace(5)
|
||||
store i64 -9223372036854775808, ptr addrspace(5) %0, align 8
|
||||
%"8" = load i64, ptr addrspace(5) %0, align 8
|
||||
store i64 -9223372036854775808, ptr addrspace(5) %1, align 8
|
||||
%"8" = load i64, ptr addrspace(5) %1, align 8
|
||||
store i64 %"8", ptr addrspace(5) %"5", align 8
|
||||
%"9" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(5) %"5", align 8
|
||||
|
|
|
@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
"56":
|
||||
%"11" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i32, align 4, addrspace(5)
|
||||
|
@ -12,6 +10,10 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
|
|||
%"8" = alloca i32, align 4, addrspace(5)
|
||||
%"9" = alloca i32, align 4, addrspace(5)
|
||||
%"10" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"11", align 1
|
||||
%"12" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"12", ptr addrspace(5) %"4", align 8
|
||||
%"13" = load i64, ptr addrspace(4) %"39", align 8
|
||||
|
@ -22,31 +24,31 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
|
|||
store i32 %"40", ptr addrspace(5) %"6", align 4
|
||||
%"17" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"42" = inttoptr i64 %"17" to ptr
|
||||
%"58" = getelementptr inbounds i8, ptr %"42", i64 4
|
||||
%"43" = load i32, ptr %"58", align 4
|
||||
%"57" = getelementptr inbounds i8, ptr %"42", i64 4
|
||||
%"43" = load i32, ptr %"57", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"7", align 4
|
||||
%"19" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"44" = inttoptr i64 %"19" to ptr
|
||||
%"60" = getelementptr inbounds i8, ptr %"44", i64 8
|
||||
%"45" = load i32, ptr %"60", align 4
|
||||
%"59" = getelementptr inbounds i8, ptr %"44", i64 8
|
||||
%"45" = load i32, ptr %"59", align 4
|
||||
store i32 %"45", ptr addrspace(5) %"8", align 4
|
||||
%"21" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%0 = icmp ugt i32 %"21", %"22"
|
||||
%1 = sub i32 %"21", %"22"
|
||||
%2 = sub i32 %"22", %"21"
|
||||
%3 = select i1 %0, i32 %1, i32 %2
|
||||
%"46" = add i32 %"23", %3
|
||||
%2 = icmp ugt i32 %"21", %"22"
|
||||
%3 = sub i32 %"21", %"22"
|
||||
%4 = sub i32 %"22", %"21"
|
||||
%5 = select i1 %2, i32 %3, i32 %4
|
||||
%"46" = add i32 %"23", %5
|
||||
store i32 %"46", ptr addrspace(5) %"9", align 4
|
||||
%"25" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"27" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%4 = icmp sgt i32 %"25", %"26"
|
||||
%5 = sub i32 %"25", %"26"
|
||||
%6 = sub i32 %"26", %"25"
|
||||
%7 = select i1 %4, i32 %5, i32 %6
|
||||
%"50" = add i32 %"27", %7
|
||||
%6 = icmp sgt i32 %"25", %"26"
|
||||
%7 = sub i32 %"25", %"26"
|
||||
%8 = sub i32 %"26", %"25"
|
||||
%9 = select i1 %6, i32 %7, i32 %8
|
||||
%"50" = add i32 %"27", %9
|
||||
store i32 %"50", ptr addrspace(5) %"10", align 4
|
||||
%"28" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"29" = load i32, ptr addrspace(5) %"9", align 4
|
||||
|
@ -55,8 +57,8 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
|
|||
%"30" = load i64, ptr addrspace(5) %"5", align 8
|
||||
%"31" = load i32, ptr addrspace(5) %"10", align 4
|
||||
%"55" = inttoptr i64 %"30" to ptr
|
||||
%"62" = getelementptr inbounds i8, ptr %"55", i64 4
|
||||
store i32 %"31", ptr %"62", align 4
|
||||
%"61" = getelementptr inbounds i8, ptr %"55", i64 4
|
||||
store i32 %"31", ptr %"61", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
|||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 {
|
||||
"28":
|
||||
%"8" = alloca i1, align 1, addrspace(5)
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"4" = alloca i64, align 8, addrspace(5)
|
||||
%"5" = alloca i64, align 8, addrspace(5)
|
||||
%"6" = alloca i16, align 2, addrspace(5)
|
||||
%"7" = alloca i16, align 2, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
store i1 false, ptr addrspace(5) %"8", align 1
|
||||
%"9" = load i64, ptr addrspace(4) %"23", align 8
|
||||
store i64 %"9", ptr addrspace(5) %"4", align 8
|
||||
%"10" = load i64, ptr addrspace(4) %"24", align 8
|
||||
|
@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr
|
|||
store i16 %"11", ptr addrspace(5) %"6", align 2
|
||||
%"14" = load i64, ptr addrspace(5) %"4", align 8
|
||||
%"26" = inttoptr i64 %"14" to ptr
|
||||
%"30" = getelementptr inbounds i8, ptr %"26", i64 2
|
||||
%"13" = load i16, ptr %"30", align 2
|
||||
%"29" = getelementptr inbounds i8, ptr %"26", i64 2
|
||||
%"13" = load i16, ptr %"29", align 2
|
||||
store i16 %"13", ptr addrspace(5) %"7", align 2
|
||||
%"16" = load i16, ptr addrspace(5) %"6", align 2
|
||||
%"17" = load i16, ptr addrspace(5) %"7", align 2
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue