mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Tweak LLVM IR to actually get v_sad_u16/u32
This commit is contained in:
parent
fca92e31e4
commit
d6017e4509
2 changed files with 9 additions and 9 deletions
|
@ -1174,7 +1174,7 @@ fn emit_inst_sad(
|
|||
&SetpData {
|
||||
typ: type_,
|
||||
flush_to_zero: None,
|
||||
cmp_op: ast::SetpCompareOp::Less,
|
||||
cmp_op: ast::SetpCompareOp::Greater,
|
||||
},
|
||||
None,
|
||||
arg.src1,
|
||||
|
@ -1182,9 +1182,9 @@ fn emit_inst_sad(
|
|||
)?;
|
||||
let a = ctx.names.value(arg.src1)?;
|
||||
let b = ctx.names.value(arg.src2)?;
|
||||
let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) };
|
||||
let a_minus_b = unsafe { LLVMBuildSub(builder, a, b, LLVM_UNNAMED) };
|
||||
let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, b_minus_a, a_minus_b, LLVM_UNNAMED) };
|
||||
let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) };
|
||||
let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, a_minus_b, b_minus_a, LLVM_UNNAMED) };
|
||||
let src3 = ctx.names.value(arg.src3)?;
|
||||
ctx.names.register_result(arg.dst, |dst_name| unsafe {
|
||||
LLVMBuildAdd(builder, src3, a_or_b, dst_name)
|
||||
|
|
|
@ -33,18 +33,18 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
|
|||
%"21" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"22" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"23" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%0 = icmp ult i32 %"21", %"22"
|
||||
%1 = sub i32 %"22", %"21"
|
||||
%2 = sub i32 %"21", %"22"
|
||||
%0 = icmp ugt i32 %"21", %"22"
|
||||
%1 = sub i32 %"21", %"22"
|
||||
%2 = sub i32 %"22", %"21"
|
||||
%3 = select i1 %0, i32 %1, i32 %2
|
||||
%"46" = add i32 %"23", %3
|
||||
store i32 %"46", ptr addrspace(5) %"9", align 4
|
||||
%"25" = load i32, ptr addrspace(5) %"6", align 4
|
||||
%"26" = load i32, ptr addrspace(5) %"7", align 4
|
||||
%"27" = load i32, ptr addrspace(5) %"8", align 4
|
||||
%4 = icmp slt i32 %"25", %"26"
|
||||
%5 = sub i32 %"26", %"25"
|
||||
%6 = sub i32 %"25", %"26"
|
||||
%4 = icmp sgt i32 %"25", %"26"
|
||||
%5 = sub i32 %"25", %"26"
|
||||
%6 = sub i32 %"26", %"25"
|
||||
%7 = select i1 %4, i32 %5, i32 %6
|
||||
%"50" = add i32 %"27", %7
|
||||
store i32 %"50", ptr addrspace(5) %"10", align 4
|
||||
|
|
Loading…
Add table
Reference in a new issue