diff --git a/ptx/src/pass/emit_llvm.rs b/ptx/src/pass/emit_llvm.rs index 6f3f381..0f432ca 100644 --- a/ptx/src/pass/emit_llvm.rs +++ b/ptx/src/pass/emit_llvm.rs @@ -2289,7 +2289,7 @@ impl<'a> MethodEmitContext<'a> { }; let res_lo = self.emit_intrinsic( name_lo, - Some(arguments.dst), + if data.control == Mul24Control::Lo { Some(arguments.dst) } else { None }, Some(&ast::Type::Scalar(data.type_)), vec![ (src1, get_scalar_type(self.context, data.type_)), @@ -2317,14 +2317,14 @@ impl<'a> MethodEmitContext<'a> { )?; let shift_number = unsafe { LLVMConstInt(LLVMInt32TypeInContext(self.context), 16, 0) }; let res_lo_shr = unsafe { - LLVMBuildLShr(self.builder, res_lo, shift_number, c"res_lo_shr".as_ptr()) + LLVMBuildLShr(self.builder, res_lo, shift_number, LLVM_UNNAMED.as_ptr()) }; let res_hi_shl = - unsafe { LLVMBuildShl(self.builder, res_hi, shift_number, c"res_hi_shl".as_ptr()) }; + unsafe { LLVMBuildShl(self.builder, res_hi, shift_number, LLVM_UNNAMED.as_ptr()) }; self.resolver .with_result(arguments.dst, |dst: *const i8| unsafe { - LLVMBuildAdd(self.builder, res_lo_shr, res_hi_shl, dst) + LLVMBuildOr(self.builder, res_lo_shr, res_hi_shl, dst) }); } Ok(()) diff --git a/ptx/src/test/ll/mul24_hi_s32.ll b/ptx/src/test/ll/mul24_hi_s32.ll index 8cbfaaa..20e32ed 100644 --- a/ptx/src/test/ll/mul24_hi_s32.ll +++ b/ptx/src/test/ll/mul24_hi_s32.ll @@ -23,12 +23,12 @@ define amdgpu_kernel void @mul24_hi_s32(ptr addrspace(4) byref(i64) %"32", ptr a store i32 %"43", ptr addrspace(5) %"37", align 4 %"46" = load i32, ptr addrspace(5) %"37", align 4 %"47" = load i32, ptr addrspace(5) %"36", align 4 - %"45" = call i32 @llvm.amdgcn.mul.i24(i32 %"46", i32 %"47") - %2 = call i32 @llvm.amdgcn.mulhi.i24(i32 %"46", i32 %"47") - %res_lo_shr = lshr i32 %"45", 16 - %res_hi_shl = shl i32 %2, 16 - %"451" = add i32 %res_lo_shr, %res_hi_shl - store i32 %"451", ptr addrspace(5) %"38", align 4 + %2 = call i32 @llvm.amdgcn.mul.i24(i32 %"46", i32 %"47") + %3 = call i32 @llvm.amdgcn.mulhi.i24(i32 %"46", i32 %"47") + %4 = lshr i32 %2, 16 + %5 = shl i32 %3, 16 + %"45" = or i32 %4, %5 + store i32 %"45", ptr addrspace(5) %"38", align 4 %"48" = load i64, ptr addrspace(5) %"35", align 4 %"49" = load i32, ptr addrspace(5) %"38", align 4 %"51" = inttoptr i64 %"48" to ptr diff --git a/ptx/src/test/ll/mul24_hi_u32.ll b/ptx/src/test/ll/mul24_hi_u32.ll index 344dda2..427adb6 100644 --- a/ptx/src/test/ll/mul24_hi_u32.ll +++ b/ptx/src/test/ll/mul24_hi_u32.ll @@ -19,12 +19,12 @@ define amdgpu_kernel void @mul24_hi_u32(ptr addrspace(4) byref(i64) %"31", ptr a store i32 %"39", ptr addrspace(5) %"35", align 4 %"42" = load i32, ptr addrspace(5) %"35", align 4 %"43" = load i32, ptr addrspace(5) %"35", align 4 - %"41" = call i32 @llvm.amdgcn.mul.u24(i32 %"42", i32 %"43") - %2 = call i32 @llvm.amdgcn.mulhi.u24(i32 %"42", i32 %"43") - %res_lo_shr = lshr i32 %"41", 16 - %res_hi_shl = shl i32 %2, 16 - %"411" = add i32 %res_lo_shr, %res_hi_shl - store i32 %"411", ptr addrspace(5) %"36", align 4 + %2 = call i32 @llvm.amdgcn.mul.u24(i32 %"42", i32 %"43") + %3 = call i32 @llvm.amdgcn.mulhi.u24(i32 %"42", i32 %"43") + %4 = lshr i32 %2, 16 + %5 = shl i32 %3, 16 + %"41" = or i32 %4, %5 + store i32 %"41", ptr addrspace(5) %"36", align 4 %"44" = load i64, ptr addrspace(5) %"34", align 4 %"45" = load i32, ptr addrspace(5) %"36", align 4 %"47" = inttoptr i64 %"44" to ptr