Add missing V-RAY host functions

This commit is contained in:
Andrzej Janik 2021-09-16 23:11:34 +00:00
commit 314e3dcb49
8 changed files with 306 additions and 51 deletions

View file

@ -2184,11 +2184,11 @@ pub struct CUgraphExecUpdateResult_enum(pub ::std::os::raw::c_uint);
pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult; pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult;
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuGetErrorString( pub unsafe extern "system" fn cuGetErrorString(
CUresult(e): CUresult, CUresult(e): CUresult,
pStr: *mut *const ::std::os::raw::c_char, pStr: *mut *const ::std::os::raw::c_char,
) -> CUresult { ) -> CUresult {
unsafe { *pStr = hipGetErrorString(hipError_t(e)) }; *pStr = hipGetErrorString(hipError_t(e));
CUresult::CUDA_SUCCESS CUresult::CUDA_SUCCESS
} }
@ -2273,17 +2273,20 @@ pub extern "system" fn cuDeviceGetNvSciSyncAttributes(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult { pub unsafe extern "system" fn cuDeviceGetProperties(
r#impl::unimplemented() prop: *mut CUdevprop,
dev: CUdevice,
) -> CUresult {
r#impl::device::get_properties(prop, dev).encuda()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuDeviceComputeCapability( pub unsafe extern "system" fn cuDeviceComputeCapability(
major: *mut ::std::os::raw::c_int, major: *mut ::std::os::raw::c_int,
minor: *mut ::std::os::raw::c_int, minor: *mut ::std::os::raw::c_int,
dev: CUdevice, dev: CUdevice,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() hipDeviceComputeCapability(major, minor, dev.0).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2309,7 +2312,7 @@ pub extern "system" fn cuDevicePrimaryCtxSetFlags(
dev: CUdevice, dev: CUdevice,
flags: ::std::os::raw::c_uint, flags: ::std::os::raw::c_uint,
) -> CUresult { ) -> CUresult {
cuDevicePrimaryCtxSetFlags_v2(dev, flags) CUresult::CUDA_SUCCESS
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2317,7 +2320,7 @@ pub extern "system" fn cuDevicePrimaryCtxSetFlags_v2(
dev: CUdevice, dev: CUdevice,
flags: ::std::os::raw::c_uint, flags: ::std::os::raw::c_uint,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() cuDevicePrimaryCtxSetFlags(dev, flags)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2391,12 +2394,12 @@ pub extern "system" fn cuCtxSynchronize() -> CUresult {
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult { pub extern "system" fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult {
r#impl::unimplemented() r#impl::context::set_limit(limit, value)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult { pub unsafe extern "system" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
r#impl::unimplemented() r#impl::context::get_limit(pvalue, limit)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2406,7 +2409,7 @@ pub extern "system" fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresu
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult { pub extern "system" fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult {
r#impl::unimplemented() CUresult::CUDA_SUCCESS
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2504,13 +2507,13 @@ pub extern "system" fn cuModuleGetFunction(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuModuleGetGlobal_v2( pub unsafe extern "system" fn cuModuleGetGlobal_v2(
dptr: *mut CUdeviceptr, dptr: *mut CUdeviceptr,
bytes: *mut usize, bytes: *mut usize,
hmod: CUmodule, hmod: CUmodule,
name: *const ::std::os::raw::c_char, name: *const ::std::os::raw::c_char,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() hipModuleGetGlobal(dptr as _, bytes, hmod as _, name).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2532,17 +2535,17 @@ pub extern "system" fn cuModuleGetSurfRef(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuLinkCreate_v2( pub unsafe extern "system" fn cuLinkCreate_v2(
numOptions: ::std::os::raw::c_uint, numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option, options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void, optionValues: *mut *mut ::std::os::raw::c_void,
stateOut: *mut CUlinkState, stateOut: *mut CUlinkState,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() r#impl::link::create(numOptions, options, optionValues, stateOut)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuLinkAddData_v2( pub unsafe extern "system" fn cuLinkAddData_v2(
state: CUlinkState, state: CUlinkState,
type_: CUjitInputType, type_: CUjitInputType,
data: *mut ::std::os::raw::c_void, data: *mut ::std::os::raw::c_void,
@ -2552,7 +2555,16 @@ pub extern "system" fn cuLinkAddData_v2(
options: *mut CUjit_option, options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void, optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() r#impl::link::add_data(
state,
type_,
data,
size,
name,
numOptions,
options,
optionValues,
)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2573,17 +2585,17 @@ pub extern "system" fn cuLinkComplete(
cubinOut: *mut *mut ::std::os::raw::c_void, cubinOut: *mut *mut ::std::os::raw::c_void,
sizeOut: *mut usize, sizeOut: *mut usize,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() r#impl::link::complete(state, cubinOut, sizeOut)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuLinkDestroy(state: CUlinkState) -> CUresult { pub unsafe extern "system" fn cuLinkDestroy(state: CUlinkState) -> CUresult {
r#impl::unimplemented() r#impl::link::destroy(state)
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult { pub unsafe extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult {
r#impl::unimplemented() hipMemGetInfo(free, total).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2641,17 +2653,17 @@ pub extern "system" fn cuMemAllocHost_v2(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult { pub unsafe extern "system" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult {
r#impl::unimplemented() hipFreeHost(p).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemHostAlloc( pub unsafe extern "system" fn cuMemHostAlloc(
pp: *mut *mut ::std::os::raw::c_void, pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize, bytesize: usize,
Flags: ::std::os::raw::c_uint, Flags: ::std::os::raw::c_uint,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() hipMemAllocHost(pp, bytesize).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2694,7 +2706,7 @@ pub extern "system" fn cuDeviceGetPCIBusId(
len: ::std::os::raw::c_int, len: ::std::os::raw::c_int,
dev: CUdevice, dev: CUdevice,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() unsafe { hipDeviceGetPCIBusId(pciBusId, len, dev.0) }.into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2883,8 +2895,8 @@ pub extern "system" fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CU
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult { pub unsafe extern "system" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult {
r#impl::unimplemented() r#impl::memory::copy_3d(pCopy).encuda()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -2915,13 +2927,13 @@ pub extern "system" fn cuMemcpyPeerAsync(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemcpyHtoDAsync_v2( pub unsafe extern "system" fn cuMemcpyHtoDAsync_v2(
dstDevice: CUdeviceptr, dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void, srcHost: *const ::std::os::raw::c_void,
ByteCount: usize, ByteCount: usize,
hStream: CUstream, hStream: CUstream,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() hipMemcpyHtoDAsync(dstDevice.0 as _, srcHost as _, ByteCount, hStream as _).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -3153,16 +3165,16 @@ pub extern "system" fn cuArrayGetDescriptor_v2(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuArrayDestroy(hArray: CUarray) -> CUresult { pub unsafe extern "system" fn cuArrayDestroy(hArray: CUarray) -> CUresult {
r#impl::unimplemented() hipArrayDestroy(hArray as _).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuArray3DCreate_v2( pub unsafe extern "system" fn cuArray3DCreate_v2(
pHandle: *mut CUarray, pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() hipArray3DCreate(pHandle as _, pAllocateArray as _).into()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
@ -3307,12 +3319,12 @@ pub extern "system" fn cuMemRetainAllocationHandle(
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuPointerGetAttribute( pub unsafe extern "system" fn cuPointerGetAttribute(
data: *mut ::std::os::raw::c_void, data: *mut ::std::os::raw::c_void,
attribute: CUpointer_attribute, attribute: CUpointer_attribute,
ptr: CUdeviceptr, ptr: CUdeviceptr,
) -> CUresult { ) -> CUresult {
r#impl::unimplemented() r#impl::pointer::get_attribute(data, attribute, ptr).encuda()
} }
#[cfg_attr(not(test), no_mangle)] #[cfg_attr(not(test), no_mangle)]

24
zluda/src/impl/context.rs Normal file
View file

@ -0,0 +1,24 @@
use std::ptr;
use crate::cuda::CUlimit;
use crate::cuda::CUresult;
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
if pvalue == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
*pvalue = 512; // GTX 1060 reports 1024
CUresult::CUDA_SUCCESS
} else {
CUresult::CUDA_ERROR_NOT_SUPPORTED
}
}
pub(crate) fn set_limit(limit: CUlimit, value: usize) -> CUresult {
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
CUresult::CUDA_SUCCESS
} else {
CUresult::CUDA_ERROR_NOT_SUPPORTED
}
}

View file

@ -1,5 +1,8 @@
use super::{transmute_lifetime, transmute_lifetime_mut, CUresult}; use super::{transmute_lifetime, transmute_lifetime_mut, CUresult};
use crate::cuda; use crate::{
cuda::{self, CUdevice, CUdevprop},
hip_call,
};
use cuda::{CUdevice_attribute, CUuuid_st}; use cuda::{CUdevice_attribute, CUuuid_st};
use hip_runtime_sys::{ use hip_runtime_sys::{
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties, hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
@ -325,3 +328,21 @@ pub fn get_luid(
unsafe { *dev_node_mask = 0 }; unsafe { *dev_node_mask = 0 };
Ok(()) Ok(())
} }
pub(crate) unsafe fn get_properties(prop: *mut CUdevprop, dev: CUdevice) -> Result<(), hipError_t> {
if prop == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
}
let mut hip_props = mem::zeroed();
hip_call! { hipGetDeviceProperties(&mut hip_props, dev.0) };
(*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
(*prop).maxThreadsDim = hip_props.maxThreadsDim;
(*prop).maxGridSize = hip_props.maxGridSize;
(*prop).totalConstantMemory = usize::min(hip_props.totalConstMem, i32::MAX as usize) as i32;
(*prop).SIMDWidth = hip_props.warpSize;
(*prop).memPitch = usize::min(hip_props.memPitch, i32::MAX as usize) as i32;
(*prop).regsPerBlock = hip_props.regsPerBlock;
(*prop).clockRate = hip_props.clockRate;
(*prop).textureAlign = usize::min(hip_props.textureAlignment, i32::MAX as usize) as i32;
Ok(())
}

67
zluda/src/impl/link.rs Normal file
View file

@ -0,0 +1,67 @@
use std::{
ffi::{c_void, CStr},
mem, ptr, slice,
};
use crate::cuda::{CUjitInputType, CUjit_option, CUlinkState, CUresult};
struct LinkState {
modules: Vec<String>,
}
pub(crate) unsafe fn create(
num_options: u32,
options: *mut CUjit_option,
option_values: *mut *mut c_void,
state_out: *mut CUlinkState,
) -> CUresult {
if state_out == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
let state = Box::new(LinkState {
modules: Vec::new(),
});
*state_out = mem::transmute(state);
CUresult::CUDA_SUCCESS
}
pub(crate) unsafe fn add_data(
state: CUlinkState,
type_: CUjitInputType,
data: *mut c_void,
size: usize,
name: *const i8,
num_options: u32,
options: *mut CUjit_option,
option_values: *mut *mut c_void,
) -> CUresult {
if state == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
let state: *mut LinkState = mem::transmute(state);
let state = &mut *state;
// V-RAY specific hack
if state.modules.len() == 2 {
return CUresult::CUDA_SUCCESS;
}
let ptx = slice::from_raw_parts(data as *mut u8, size);
state.modules.push(
CStr::from_bytes_with_nul_unchecked(ptx)
.to_string_lossy()
.to_string(),
);
CUresult::CUDA_SUCCESS
}
pub(crate) fn complete(
state: CUlinkState,
cubin_out: *mut *mut c_void,
size_out: *mut usize,
) -> CUresult {
CUresult::CUDA_SUCCESS
}
pub(crate) unsafe fn destroy(state: CUlinkState) -> CUresult {
let state: Box<LinkState> = mem::transmute(state);
CUresult::CUDA_SUCCESS
}

55
zluda/src/impl/memory.rs Normal file
View file

@ -0,0 +1,55 @@
use hip_runtime_sys::{
hipDrvMemcpy3D, hipError_t, hipMemcpy3D, hipMemcpy3DParms, hipMemoryType, hipPitchedPtr,
hipPos, HIP_MEMCPY3D,
};
use std::ptr;
use crate::{
cuda::{CUDA_MEMCPY3D_st, CUdeviceptr, CUmemorytype, CUresult},
hip_call,
};
// TODO change HIP impl to 64 bits
pub(crate) unsafe fn copy_3d(cu_copy: *const CUDA_MEMCPY3D_st) -> Result<(), hipError_t> {
if cu_copy == ptr::null() {
return Err(hipError_t::hipErrorInvalidValue);
}
let cu_copy = *cu_copy;
let hip_copy = HIP_MEMCPY3D {
srcXInBytes: cu_copy.srcXInBytes as u32,
srcY: cu_copy.srcY as u32,
srcZ: cu_copy.srcZ as u32,
srcLOD: cu_copy.srcLOD as u32,
srcMemoryType: memory_type(cu_copy.srcMemoryType)?,
srcHost: cu_copy.srcHost,
srcDevice: cu_copy.srcDevice.0 as _,
srcArray: cu_copy.srcArray as _,
srcPitch: cu_copy.srcPitch as u32,
srcHeight: cu_copy.srcHeight as u32,
dstXInBytes: cu_copy.dstXInBytes as u32,
dstY: cu_copy.dstY as u32,
dstZ: cu_copy.dstZ as u32,
dstLOD: cu_copy.dstLOD as u32,
dstMemoryType: memory_type(cu_copy.dstMemoryType)?,
dstHost: cu_copy.dstHost,
dstDevice: cu_copy.dstDevice.0 as _,
dstArray: cu_copy.dstArray as _,
dstPitch: cu_copy.dstPitch as u32,
dstHeight: cu_copy.dstHeight as u32,
WidthInBytes: cu_copy.WidthInBytes as u32,
Height: cu_copy.Height as u32,
Depth: cu_copy.Depth as u32,
};
hip_call! { hipDrvMemcpy3D(&hip_copy) };
Ok(())
}
pub(crate) fn memory_type(cu: CUmemorytype) -> Result<hipMemoryType, hipError_t> {
match cu {
CUmemorytype::CU_MEMORYTYPE_HOST => Ok(hipMemoryType::hipMemoryTypeHost),
CUmemorytype::CU_MEMORYTYPE_DEVICE => Ok(hipMemoryType::hipMemoryTypeDevice),
CUmemorytype::CU_MEMORYTYPE_ARRAY => Ok(hipMemoryType::hipMemoryTypeArray),
CUmemorytype::CU_MEMORYTYPE_UNIFIED => Ok(hipMemoryType::hipMemoryTypeUnified),
_ => Err(hipError_t::hipErrorInvalidValue),
}
}

View file

@ -20,6 +20,10 @@ pub mod function;
#[cfg_attr(not(windows), path = "os_unix.rs")] #[cfg_attr(not(windows), path = "os_unix.rs")]
pub(crate) mod os; pub(crate) mod os;
pub(crate) mod module; pub(crate) mod module;
pub(crate) mod context;
pub(crate) mod memory;
pub(crate) mod link;
pub(crate) mod pointer;
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
pub fn unimplemented() -> CUresult { pub fn unimplemented() -> CUresult {
@ -31,6 +35,19 @@ pub fn unimplemented() -> CUresult {
CUresult::CUDA_ERROR_NOT_SUPPORTED CUresult::CUDA_ERROR_NOT_SUPPORTED
} }
#[macro_export]
macro_rules! hip_call {
($expr:expr) => {
#[allow(unused_unsafe)]
{
let err = unsafe { $expr };
if err != hip_runtime_sys::hipError_t::hipSuccess {
return Result::Err(err);
}
}
};
}
pub trait HasLivenessCookie: Sized { pub trait HasLivenessCookie: Sized {
const COOKIE: usize; const COOKIE: usize;
const LIVENESS_FAIL: CUresult; const LIVENESS_FAIL: CUresult;

View file

@ -87,7 +87,7 @@ pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<()
let err = unsafe { hipGetDeviceProperties(&mut props, dev) }; let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
let arch_binary = compile_amd( let arch_binary = compile_amd(
&props, &props,
&spirv_data.binaries[..], &[&spirv_data.binaries[..]],
spirv_data.should_link_ptx_impl, spirv_data.should_link_ptx_impl,
) )
.map_err(|_| hipError_t::hipErrorUnknown)?; .map_err(|_| hipError_t::hipErrorUnknown)?;
@ -115,7 +115,7 @@ const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
fn compile_amd( fn compile_amd(
device_pros: &hipDeviceProp_t, device_pros: &hipDeviceProp_t,
spirv_il: &[u32], spirv_il: &[&[u32]],
ptx_lib: Option<(&'static [u8], &'static [u8])>, ptx_lib: Option<(&'static [u8], &'static [u8])>,
) -> io::Result<Vec<u8>> { ) -> io::Result<Vec<u8>> {
let null_terminator = device_pros let null_terminator = device_pros
@ -133,24 +133,30 @@ fn compile_amd(
return Err(io::Error::new(io::ErrorKind::Other, "")); return Err(io::Error::new(io::ErrorKind::Other, ""));
}; };
let dir = tempfile::tempdir()?; let dir = tempfile::tempdir()?;
let mut spirv = NamedTempFile::new_in(&dir)?; let spirv_files = spirv_il
let llvm = NamedTempFile::new_in(&dir)?; .iter()
let spirv_il_u8 = unsafe { .map(|spirv| {
slice::from_raw_parts( let mut spirv = NamedTempFile::new_in(&dir)?;
spirv_il.as_ptr() as *const u8, let spirv_il_u8 = unsafe {
spirv_il.len() * mem::size_of::<u32>(), slice::from_raw_parts(
) spirv_il.as_ptr() as *const u8,
}; spirv_il.len() * mem::size_of::<u32>(),
spirv.write_all(spirv_il_u8)?; )
};
spirv.write_all(spirv_il_u8)?;
Ok::<_, io::Error>(spirv)
})
.collect::<Result<Vec<_>, _>>()?;
let llvm_spirv_path = match env::var("LLVM_SPIRV") { let llvm_spirv_path = match env::var("LLVM_SPIRV") {
Ok(path) => Cow::Owned(path), Ok(path) => Cow::Owned(path),
Err(_) => Cow::Borrowed(LLVM_SPIRV), Err(_) => Cow::Borrowed(LLVM_SPIRV),
}; };
let llvm = NamedTempFile::new_in(&dir)?;
let to_llvm_cmd = Command::new(&*llvm_spirv_path) let to_llvm_cmd = Command::new(&*llvm_spirv_path)
.arg("-r") .arg("-r")
.arg("-o") .arg("-o")
.arg(llvm.path()) .arg(llvm.path())
.arg(spirv.path()) .args(spirv_files.iter().map(|f| f.path()))
.status()?; .status()?;
assert!(to_llvm_cmd.success()); assert!(to_llvm_cmd.success());
if cfg!(debug_assertions) { if cfg!(debug_assertions) {

53
zluda/src/impl/pointer.rs Normal file
View file

@ -0,0 +1,53 @@
use std::{ffi::c_void, mem, ptr};
use hip_runtime_sys::{hipError_t, hipMemoryType, hipPointerGetAttributes};
use crate::{
cuda::{CUdeviceptr, CUmemorytype, CUpointer_attribute},
hip_call,
};
pub(crate) unsafe fn get_attribute(
data: *mut c_void,
attribute: CUpointer_attribute,
ptr: CUdeviceptr,
) -> Result<(), hipError_t> {
if data == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
}
let mut attribs = mem::zeroed();
hip_call! { hipPointerGetAttributes(&mut attribs, ptr.0 as _) };
match attribute {
CUpointer_attribute::CU_POINTER_ATTRIBUTE_CONTEXT => {
*(data as *mut _) = attribs.device;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE => {
*(data as *mut _) = memory_type(attribs.memoryType)?;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_POINTER => {
*(data as *mut _) = attribs.devicePointer;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_HOST_POINTER => {
*(data as *mut _) = attribs.hostPointer;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_MANAGED => {
*(data as *mut _) = attribs.isManaged;
Ok(())
}
_ => Err(hipError_t::hipErrorNotSupported),
}
}
pub(crate) fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipError_t> {
match cu {
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
_ => Err(hipError_t::hipErrorInvalidValue),
}
}