mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-09-27 19:59:08 +00:00
Fixes
This commit is contained in:
parent
85e8e61f2f
commit
ea60c6a249
5 changed files with 12 additions and 16 deletions
|
@ -2305,8 +2305,6 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
let name = self.resolver.get_or_add(arguments.dst);
|
|
||||||
unsafe { LLVMSetValueName2(min, name.as_ptr().cast(), name.len()) };
|
|
||||||
self.resolver.register(arguments.dst, min);
|
self.resolver.register(arguments.dst, min);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -2328,7 +2326,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
let a = self.resolver.value(arguments.src1)?;
|
let a = self.resolver.value(arguments.src1)?;
|
||||||
let b = self.resolver.value(arguments.src2)?;
|
let b = self.resolver.value(arguments.src2)?;
|
||||||
|
|
||||||
let min = self.emit_intrinsic(
|
let max = self.emit_intrinsic(
|
||||||
unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) },
|
unsafe { CStr::from_bytes_with_nul_unchecked(intrinsic.as_bytes()) },
|
||||||
None,
|
None,
|
||||||
Some(&data.type_().into()),
|
Some(&data.type_().into()),
|
||||||
|
@ -2353,14 +2351,12 @@ impl<'a> MethodEmitContext<'a> {
|
||||||
self.builder,
|
self.builder,
|
||||||
is_nan,
|
is_nan,
|
||||||
LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN),
|
LLVMConstReal(get_scalar_type(self.context, type_), f64::NAN),
|
||||||
min,
|
max,
|
||||||
dst,
|
dst,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
let name = self.resolver.get_or_add(arguments.dst);
|
self.resolver.register(arguments.dst, max);
|
||||||
unsafe { LLVMSetValueName2(min, name.as_ptr().cast(), name.len()) };
|
|
||||||
self.resolver.register(arguments.dst, min);
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,8 +28,8 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"38", ptr addrspace
|
||||||
store half %"51", ptr addrspace(5) %"43", align 2
|
store half %"51", ptr addrspace(5) %"43", align 2
|
||||||
%"53" = load half, ptr addrspace(5) %"43", align 2
|
%"53" = load half, ptr addrspace(5) %"43", align 2
|
||||||
%"54" = load half, ptr addrspace(5) %"42", align 2
|
%"54" = load half, ptr addrspace(5) %"42", align 2
|
||||||
%"52" = call half @llvm.maxnum.f16(half %"53", half %"54")
|
%2 = call half @llvm.maxnum.f16(half %"53", half %"54")
|
||||||
store half %"52", ptr addrspace(5) %"44", align 2
|
store half %2, ptr addrspace(5) %"44", align 2
|
||||||
%"55" = load i64, ptr addrspace(5) %"41", align 8
|
%"55" = load i64, ptr addrspace(5) %"41", align 8
|
||||||
%"56" = load half, ptr addrspace(5) %"44", align 2
|
%"56" = load half, ptr addrspace(5) %"44", align 2
|
||||||
%"61" = inttoptr i64 %"55" to ptr
|
%"61" = inttoptr i64 %"55" to ptr
|
||||||
|
|
|
@ -24,8 +24,8 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
|
||||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i32, ptr addrspace(5) %"40", align 4
|
%"49" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
||||||
%"48" = call i32 @llvm.smax.i32(i32 %"49", i32 %"50")
|
%2 = call i32 @llvm.smax.i32(i32 %"49", i32 %"50")
|
||||||
store i32 %"48", ptr addrspace(5) %"40", align 4
|
store i32 %2, ptr addrspace(5) %"40", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||||
%"52" = load i32, ptr addrspace(5) %"40", align 4
|
%"52" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
%"55" = inttoptr i64 %"51" to ptr
|
%"55" = inttoptr i64 %"51" to ptr
|
||||||
|
|
|
@ -24,8 +24,8 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
|
||||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i32, ptr addrspace(5) %"40", align 4
|
%"49" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
||||||
%"48" = call i32 @llvm.smin.i32(i32 %"49", i32 %"50")
|
%2 = call i32 @llvm.smin.i32(i32 %"49", i32 %"50")
|
||||||
store i32 %"48", ptr addrspace(5) %"40", align 4
|
store i32 %2, ptr addrspace(5) %"40", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||||
%"52" = load i32, ptr addrspace(5) %"40", align 4
|
%"52" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
%"55" = inttoptr i64 %"51" to ptr
|
%"55" = inttoptr i64 %"51" to ptr
|
||||||
|
|
|
@ -26,8 +26,8 @@ define amdgpu_kernel void @min_f16(ptr addrspace(4) byref(i64) %"36", ptr addrsp
|
||||||
store half %"47", ptr addrspace(5) %"41", align 2
|
store half %"47", ptr addrspace(5) %"41", align 2
|
||||||
%"49" = load half, ptr addrspace(5) %"40", align 2
|
%"49" = load half, ptr addrspace(5) %"40", align 2
|
||||||
%"50" = load half, ptr addrspace(5) %"41", align 2
|
%"50" = load half, ptr addrspace(5) %"41", align 2
|
||||||
%"48" = call half @llvm.minnum.f16(half %"49", half %"50")
|
%2 = call half @llvm.minnum.f16(half %"49", half %"50")
|
||||||
store half %"48", ptr addrspace(5) %"40", align 2
|
store half %2, ptr addrspace(5) %"40", align 2
|
||||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||||
%"52" = load half, ptr addrspace(5) %"40", align 2
|
%"52" = load half, ptr addrspace(5) %"40", align 2
|
||||||
%"57" = inttoptr i64 %"51" to ptr
|
%"57" = inttoptr i64 %"51" to ptr
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue