Tweak LLVM IR to actually get v_sad_u16/u32

This commit is contained in:
Andrzej Janik 2024-04-05 23:23:01 +00:00
parent fca92e31e4
commit d6017e4509
2 changed files with 9 additions and 9 deletions

View file

@ -1174,7 +1174,7 @@ fn emit_inst_sad(
&SetpData {
typ: type_,
flush_to_zero: None,
cmp_op: ast::SetpCompareOp::Less,
cmp_op: ast::SetpCompareOp::Greater,
},
None,
arg.src1,
@ -1182,9 +1182,9 @@ fn emit_inst_sad(
)?;
let a = ctx.names.value(arg.src1)?;
let b = ctx.names.value(arg.src2)?;
let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) };
let a_minus_b = unsafe { LLVMBuildSub(builder, a, b, LLVM_UNNAMED) };
let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, b_minus_a, a_minus_b, LLVM_UNNAMED) };
let b_minus_a = unsafe { LLVMBuildSub(builder, b, a, LLVM_UNNAMED) };
let a_or_b = unsafe { LLVMBuildSelect(builder, less_than, a_minus_b, b_minus_a, LLVM_UNNAMED) };
let src3 = ctx.names.value(arg.src3)?;
ctx.names.register_result(arg.dst, |dst_name| unsafe {
LLVMBuildAdd(builder, src3, a_or_b, dst_name)

View file

@ -33,18 +33,18 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr
%"21" = load i32, ptr addrspace(5) %"6", align 4
%"22" = load i32, ptr addrspace(5) %"7", align 4
%"23" = load i32, ptr addrspace(5) %"8", align 4
%0 = icmp ult i32 %"21", %"22"
%1 = sub i32 %"22", %"21"
%2 = sub i32 %"21", %"22"
%0 = icmp ugt i32 %"21", %"22"
%1 = sub i32 %"21", %"22"
%2 = sub i32 %"22", %"21"
%3 = select i1 %0, i32 %1, i32 %2
%"46" = add i32 %"23", %3
store i32 %"46", ptr addrspace(5) %"9", align 4
%"25" = load i32, ptr addrspace(5) %"6", align 4
%"26" = load i32, ptr addrspace(5) %"7", align 4
%"27" = load i32, ptr addrspace(5) %"8", align 4
%4 = icmp slt i32 %"25", %"26"
%5 = sub i32 %"26", %"25"
%6 = sub i32 %"25", %"26"
%4 = icmp sgt i32 %"25", %"26"
%5 = sub i32 %"25", %"26"
%6 = sub i32 %"26", %"25"
%7 = select i1 %4, i32 %5, i32 %6
%"50" = add i32 %"27", %7
store i32 %"50", ptr addrspace(5) %"10", align 4