mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-06 00:00:13 +00:00
Improve AMD compatibility
This commit is contained in:
parent
043172bd9b
commit
e2432d0df1
2 changed files with 14 additions and 32 deletions
|
@ -27,6 +27,7 @@ impl HasLivenessCookie for FunctionData {
|
||||||
|
|
||||||
pub struct FunctionData {
|
pub struct FunctionData {
|
||||||
pub base: ocl_core::Kernel,
|
pub base: ocl_core::Kernel,
|
||||||
|
pub device: ocl_core::DeviceId,
|
||||||
pub arg_size: Vec<(usize, bool)>,
|
pub arg_size: Vec<(usize, bool)>,
|
||||||
pub use_shared_mem: bool,
|
pub use_shared_mem: bool,
|
||||||
pub legacy_args: LegacyArguments,
|
pub legacy_args: LegacyArguments,
|
||||||
|
@ -215,9 +216,9 @@ pub(crate) fn get_attribute(
|
||||||
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
||||||
let max_threads = GlobalState::lock_function(func, |func| {
|
let max_threads = GlobalState::lock_function(func, |func| {
|
||||||
if let ocl_core::KernelWorkGroupInfoResult::WorkGroupSize(size) =
|
if let ocl_core::KernelWorkGroupInfoResult::WorkGroupSize(size) =
|
||||||
ocl_core::get_kernel_work_group_info::<()>(
|
ocl_core::get_kernel_work_group_info(
|
||||||
&func.base,
|
&func.base,
|
||||||
(),
|
&func.device,
|
||||||
ocl_core::KernelWorkGroupInfo::WorkGroupSize,
|
ocl_core::KernelWorkGroupInfo::WorkGroupSize,
|
||||||
)?
|
)?
|
||||||
{
|
{
|
||||||
|
|
|
@ -290,7 +290,7 @@ impl SpirvModule {
|
||||||
let binary = binary_prog.as_ptr();
|
let binary = binary_prog.as_ptr();
|
||||||
let mut binary_status = 0;
|
let mut binary_status = 0;
|
||||||
let mut errcode_ret = 0;
|
let mut errcode_ret = 0;
|
||||||
let program = unsafe {
|
let raw_program = unsafe {
|
||||||
ocl_core::ffi::clCreateProgramWithBinary(
|
ocl_core::ffi::clCreateProgramWithBinary(
|
||||||
ctx.as_ptr(),
|
ctx.as_ptr(),
|
||||||
1,
|
1,
|
||||||
|
@ -303,7 +303,15 @@ impl SpirvModule {
|
||||||
};
|
};
|
||||||
assert_eq!(binary_status, 0, "clCreateProgramWithBinary");
|
assert_eq!(binary_status, 0, "clCreateProgramWithBinary");
|
||||||
assert_eq!(errcode_ret, 0, "clCreateProgramWithBinary");
|
assert_eq!(errcode_ret, 0, "clCreateProgramWithBinary");
|
||||||
unsafe { ocl_core::Program::from_raw_create_ptr(program) }
|
let ocl_program = unsafe { ocl_core::Program::from_raw_create_ptr(raw_program) };
|
||||||
|
ocl_core::build_program(
|
||||||
|
&ocl_program,
|
||||||
|
Some(&[dev]),
|
||||||
|
&CString::new("").unwrap(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
ocl_program
|
||||||
} else {
|
} else {
|
||||||
Self::compile_amd("gfx1011:xnack-", byte_il, self.should_link_ptx_impl).unwrap();
|
Self::compile_amd("gfx1011:xnack-", byte_il, self.should_link_ptx_impl).unwrap();
|
||||||
Self::compile_intel(
|
Self::compile_intel(
|
||||||
|
@ -359,36 +367,9 @@ pub fn get_function(
|
||||||
&compiled_module.base,
|
&compiled_module.base,
|
||||||
&entry.key().as_c_str().to_string_lossy(),
|
&entry.key().as_c_str().to_string_lossy(),
|
||||||
)?;
|
)?;
|
||||||
let true_b: ocl_core::ffi::cl_bool = 1;
|
|
||||||
let err = unsafe {
|
|
||||||
ocl_core::ffi::clSetKernelExecInfo(
|
|
||||||
kernel.as_ptr(),
|
|
||||||
CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
|
|
||||||
mem::size_of::<ocl_core::ffi::cl_bool>(),
|
|
||||||
&true_b as *const _ as *const _,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
assert_eq!(err, 0);
|
|
||||||
let err = unsafe {
|
|
||||||
ocl_core::ffi::clSetKernelExecInfo(
|
|
||||||
kernel.as_ptr(),
|
|
||||||
CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
|
|
||||||
mem::size_of::<ocl_core::ffi::cl_bool>(),
|
|
||||||
&true_b as *const _ as *const _,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
assert_eq!(err, 0);
|
|
||||||
let err = unsafe {
|
|
||||||
ocl_core::ffi::clSetKernelExecInfo(
|
|
||||||
kernel.as_ptr(),
|
|
||||||
CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
|
|
||||||
mem::size_of::<ocl_core::ffi::cl_bool>(),
|
|
||||||
&true_b as *const _ as *const _,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
assert_eq!(err, 0);
|
|
||||||
entry.insert(Box::new(Function::new(FunctionData {
|
entry.insert(Box::new(Function::new(FunctionData {
|
||||||
base: kernel,
|
base: kernel,
|
||||||
|
device: device.ocl_base.clone(),
|
||||||
arg_size: kernel_info.arguments_sizes.clone(),
|
arg_size: kernel_info.arguments_sizes.clone(),
|
||||||
use_shared_mem: kernel_info.uses_shared_mem,
|
use_shared_mem: kernel_info.uses_shared_mem,
|
||||||
legacy_args: LegacyArguments::new(),
|
legacy_args: LegacyArguments::new(),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue