mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-09 17:49:48 +00:00
HIP conversion part #1
This commit is contained in:
parent
4ae7feb93a
commit
5ec18f14a1
5 changed files with 298 additions and 130 deletions
|
@ -2,6 +2,6 @@ use std::env::VarError;
|
||||||
|
|
||||||
fn main() -> Result<(), VarError> {
|
fn main() -> Result<(), VarError> {
|
||||||
println!("cargo:rustc-link-lib=dylib=amdhip64");
|
println!("cargo:rustc-link-lib=dylib=amdhip64");
|
||||||
println!("cargo:rustc-link-search=/opt/rocm/lib/");
|
println!("cargo:rustc-link-search=native=/opt/rocm/lib/");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ lazy_static = "1.4"
|
||||||
num_enum = "0.4"
|
num_enum = "0.4"
|
||||||
lz4-sys = "1.9"
|
lz4-sys = "1.9"
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
|
paste = "1.0"
|
||||||
|
|
||||||
[dependencies.ocl-core]
|
[dependencies.ocl-core]
|
||||||
version = "0.11"
|
version = "0.11"
|
||||||
|
@ -24,4 +25,3 @@ winapi = { version = "0.3", features = ["heapapi", "std"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
cuda-driver-sys = "0.3.0"
|
cuda-driver-sys = "0.3.0"
|
||||||
paste = "1.0"
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
use hip_runtime_sys::*;
|
||||||
|
|
||||||
use super::r#impl;
|
use super::r#impl;
|
||||||
use super::r#impl::{Decuda, Encuda};
|
use super::r#impl::{Decuda, Encuda};
|
||||||
|
|
||||||
|
@ -2183,10 +2185,11 @@ pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult;
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuGetErrorString(
|
pub extern "system" fn cuGetErrorString(
|
||||||
error: CUresult,
|
CUresult(e): CUresult,
|
||||||
pStr: *mut *const ::std::os::raw::c_char,
|
pStr: *mut *const ::std::os::raw::c_char,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
r#impl::get_error_string(error, pStr).encuda()
|
unsafe { *pStr = hipGetErrorString(hipError_t(e)) };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -2199,13 +2202,12 @@ pub extern "system" fn cuGetErrorName(
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult {
|
pub extern "system" fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult {
|
||||||
r#impl::init().encuda()
|
unsafe { hipInit(Flags).into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult {
|
pub extern "system" fn cuDriverGetVersion(driverVersion: *mut ::std::os::raw::c_int) -> CUresult {
|
||||||
unsafe { *driverVersion = r#impl::driver_get_version() };
|
unsafe { hipDriverGetVersion(driverVersion).into() }
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -2213,21 +2215,21 @@ pub extern "system" fn cuDeviceGet(
|
||||||
device: *mut CUdevice,
|
device: *mut CUdevice,
|
||||||
ordinal: ::std::os::raw::c_int,
|
ordinal: ::std::os::raw::c_int,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
r#impl::device::get(device.decuda(), ordinal).encuda()
|
unsafe { hipDeviceGet(device as _, ordinal).into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult {
|
pub extern "system" fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult {
|
||||||
r#impl::device::get_count(count).encuda()
|
unsafe { hipGetDeviceCount(count).into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuDeviceGetName(
|
pub extern "system" fn cuDeviceGetName(
|
||||||
name: *mut ::std::os::raw::c_char,
|
name: *mut ::std::os::raw::c_char,
|
||||||
len: ::std::os::raw::c_int,
|
len: ::std::os::raw::c_int,
|
||||||
dev: CUdevice,
|
CUdevice(dev): CUdevice,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
r#impl::device::get_name(name, len, dev.decuda()).encuda()
|
unsafe { hipDeviceGetName(name, len, dev).into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -2245,17 +2247,17 @@ pub extern "system" fn cuDeviceGetLuid(
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult {
|
pub extern "system" fn cuDeviceTotalMem_v2(bytes: *mut usize, CUdevice(dev): CUdevice) -> CUresult {
|
||||||
r#impl::device::total_mem_v2(bytes, dev.decuda()).encuda()
|
unsafe { hipDeviceTotalMem(bytes, dev).into() }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuDeviceGetAttribute(
|
pub extern "system" fn cuDeviceGetAttribute(
|
||||||
pi: *mut ::std::os::raw::c_int,
|
pi: *mut ::std::os::raw::c_int,
|
||||||
attrib: CUdevice_attribute,
|
attrib: CUdevice_attribute,
|
||||||
dev: CUdevice,
|
CUdevice(dev): CUdevice,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
r#impl::device::get_attribute(pi, attrib, dev.decuda()).encuda()
|
r#impl::device::get_attribute(pi, attrib, dev).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
|
|
@ -1,7 +1,11 @@
|
||||||
use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState};
|
use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState};
|
||||||
use crate::cuda;
|
use crate::cuda;
|
||||||
use cuda::{CUdevice_attribute, CUuuid_st};
|
use cuda::{CUdevice_attribute, CUuuid_st};
|
||||||
|
use hip_runtime_sys::{
|
||||||
|
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
|
||||||
|
};
|
||||||
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
|
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
|
||||||
|
use paste::paste;
|
||||||
use std::{
|
use std::{
|
||||||
cmp,
|
cmp,
|
||||||
collections::HashSet,
|
collections::HashSet,
|
||||||
|
@ -133,138 +137,290 @@ pub fn total_mem_v2(bytes: *mut usize, dev_idx: Index) -> Result<(), CUresult> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CUdevice_attribute {
|
#[allow(warnings)]
|
||||||
fn get_static_value(self) -> Option<i32> {
|
trait hipDeviceAttribute_t_ext {
|
||||||
match self {
|
const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t =
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP => Some(1),
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth;
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT => Some(1),
|
const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t =
|
||||||
// TODO: go back to this once we have more funcitonality implemented
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth;
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => Some(8),
|
const hipDeviceAttributeMaximumTexture2DHeight: hipDeviceAttribute_t =
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => Some(0),
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight;
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY => Some(1),
|
const hipDeviceAttributeMaximumTexture3DWidth: hipDeviceAttribute_t =
|
||||||
_ => None,
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth;
|
||||||
|
const hipDeviceAttributeMaximumTexture3DHeight: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight;
|
||||||
|
const hipDeviceAttributeMaximumTexture3DDepth: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth;
|
||||||
|
const hipDeviceAttributeGlobalMemoryBusWidth: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMemoryBusWidth;
|
||||||
|
const hipDeviceAttributeMaxThreadsPerMultiprocessor: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor;
|
||||||
|
const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl hipDeviceAttribute_t_ext for hipDeviceAttribute_t {}
|
||||||
|
|
||||||
|
macro_rules! remap_attribute {
|
||||||
|
($attrib:expr => $([ $($word:expr)* ]),*,) => {
|
||||||
|
match $attrib {
|
||||||
|
$(
|
||||||
|
paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
||||||
|
paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
_ => return hipError_t::hipErrorInvalidValue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_attribute(
|
pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -> hipError_t {
|
||||||
pi: *mut i32,
|
|
||||||
attrib: CUdevice_attribute,
|
|
||||||
dev_idx: Index,
|
|
||||||
) -> Result<(), CUresult> {
|
|
||||||
if pi == ptr::null_mut() {
|
if pi == ptr::null_mut() {
|
||||||
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
return hipError_t::hipErrorInvalidValue;
|
||||||
}
|
}
|
||||||
if let Some(value) = attrib.get_static_value() {
|
//let mut props = unsafe { mem::zeroed() };
|
||||||
unsafe { *pi = value };
|
let hip_attrib = match attrib {
|
||||||
return Ok(());
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
|
||||||
}
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
|
||||||
let value = match attrib {
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_INTEGRATED => {
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
|
||||||
GlobalState::lock_device(dev_idx, |dev| if dev.is_amd { 0i32 } else { 1i32 })?
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED => {
|
||||||
|
unsafe { *pi = 1 };
|
||||||
|
return hipError_t::hipSuccess;
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => 1,
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER
|
||||||
// Streaming Multiprocessor corresponds roughly to a sub-slice (thread group can't cross either)
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT => {
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
|
||||||
let props =
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
|
||||||
ocl_core::get_device_info(dev.ocl_base, ocl_core::DeviceInfo::MaxComputeUnits)?;
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
|
||||||
if let ocl_core::DeviceInfoResult::MaxComputeUnits(count) = props {
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
|
||||||
Ok(count as i32)
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
|
||||||
} else {
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
|
||||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
|
||||||
}
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
|
||||||
})??
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID => {
|
||||||
|
unsafe { *pi = 0 };
|
||||||
|
return hipError_t::hipSuccess;
|
||||||
}
|
}
|
||||||
// I honestly don't know how to answer this query
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR => {
|
unsafe { *pi = 8 };
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
return hipError_t::hipSuccess;
|
||||||
if !dev.is_amd {
|
|
||||||
7 // correct for GEN9
|
|
||||||
} else {
|
|
||||||
4i32 * 32 // probably correct for RDNA
|
|
||||||
}
|
|
||||||
})?
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => {
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
unsafe { *pi = 0 };
|
||||||
let props = ocl_core::get_device_info(
|
return hipError_t::hipSuccess;
|
||||||
dev.ocl_base,
|
|
||||||
ocl_core::DeviceInfo::MaxWorkGroupSize,
|
|
||||||
)?;
|
|
||||||
if let ocl_core::DeviceInfoResult::MaxWorkGroupSize(size) = props {
|
|
||||||
Ok(size as i32)
|
|
||||||
} else {
|
|
||||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
|
||||||
}
|
|
||||||
})??
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X => {
|
// we assume that arrayed texts have the same limits
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH => {
|
||||||
let props = ocl_core::get_device_info(
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||||
dev.ocl_base,
|
|
||||||
ocl_core::DeviceInfo::MaxWorkItemSizes,
|
|
||||||
)?;
|
|
||||||
if let ocl_core::DeviceInfoResult::MaxWorkItemSizes(sizes) = props {
|
|
||||||
Ok(sizes)
|
|
||||||
} else {
|
|
||||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
|
||||||
}
|
|
||||||
})??[0] as i32
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT => {
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||||
let props = ocl_core::get_device_info(
|
|
||||||
dev.ocl_base,
|
|
||||||
ocl_core::DeviceInfo::MaxWorkItemSizes,
|
|
||||||
)?;
|
|
||||||
if let ocl_core::DeviceInfoResult::MaxWorkItemSizes(sizes) = props {
|
|
||||||
Ok(sizes)
|
|
||||||
} else {
|
|
||||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
|
||||||
}
|
|
||||||
})??[1] as i32
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||||
let props = ocl_core::get_device_info(
|
|
||||||
dev.ocl_base,
|
|
||||||
ocl_core::DeviceInfo::MaxWorkItemSizes,
|
|
||||||
)?;
|
|
||||||
if let ocl_core::DeviceInfoResult::MaxWorkItemSizes(sizes) = props {
|
|
||||||
Ok(sizes)
|
|
||||||
} else {
|
|
||||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
|
||||||
}
|
|
||||||
})??[2] as i32
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
|
||||||
GlobalState::lock_device(dev_idx, |dev| {
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||||
let props =
|
|
||||||
ocl_core::get_device_info(dev.ocl_base, ocl_core::DeviceInfo::LocalMemSize)?;
|
|
||||||
if let ocl_core::DeviceInfoResult::LocalMemSize(size) = props {
|
|
||||||
Ok(size)
|
|
||||||
} else {
|
|
||||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
|
||||||
}
|
|
||||||
})?? as i32
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => 32,
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
|
||||||
_ => {
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||||
// TODO: support more attributes for CUDA runtime
|
|
||||||
/*
|
|
||||||
return Err(l0::Error(
|
|
||||||
l0::sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE,
|
|
||||||
))
|
|
||||||
*/
|
|
||||||
0
|
|
||||||
}
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||||
|
}
|
||||||
|
// we treat surface the same as texture
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeTextureAlignment
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||||
|
}
|
||||||
|
// Totally made up
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
|
||||||
|
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
|
||||||
|
unsafe { *pi = u16::MAX as i32 };
|
||||||
|
return hipError_t::hipSuccess;
|
||||||
|
}
|
||||||
|
// linear sizes
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
|
||||||
|
let mut prop = unsafe { mem::zeroed() };
|
||||||
|
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
||||||
|
if err != hipError_t::hipSuccess {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
unsafe { *pi = prop.maxTexture1DLinear };
|
||||||
|
return hipError_t::hipSuccess;
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => {
|
||||||
|
let mut prop = unsafe { mem::zeroed() };
|
||||||
|
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
||||||
|
if err != hipError_t::hipSuccess {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
unsafe { *pi = prop.pciDomainID };
|
||||||
|
return hipError_t::hipSuccess;
|
||||||
|
}
|
||||||
|
attrib => remap_attribute! {
|
||||||
|
attrib =>
|
||||||
|
[MAX THREADS PER BLOCK],
|
||||||
|
[MAX BLOCK DIM X],
|
||||||
|
[MAX BLOCK DIM Y],
|
||||||
|
[MAX BLOCK DIM Z],
|
||||||
|
[MAX GRID DIM X],
|
||||||
|
[MAX GRID DIM Y],
|
||||||
|
[MAX GRID DIM Z],
|
||||||
|
[MAX SHARED MEMORY PER BLOCK],
|
||||||
|
[TOTAL CONSTANT MEMORY],
|
||||||
|
[WARP SIZE],
|
||||||
|
[MAX PITCH],
|
||||||
|
[MAX REGISTERS PER BLOCK],
|
||||||
|
[CLOCK RATE],
|
||||||
|
[TEXTURE ALIGNMENT],
|
||||||
|
//[GPU OVERLAP],
|
||||||
|
[MULTIPROCESSOR COUNT],
|
||||||
|
[KERNEL EXEC TIMEOUT],
|
||||||
|
[INTEGRATED],
|
||||||
|
[CAN MAP HOST MEMORY],
|
||||||
|
[COMPUTE MODE],
|
||||||
|
[MAXIMUM TEXTURE1D WIDTH],
|
||||||
|
[MAXIMUM TEXTURE2D WIDTH],
|
||||||
|
[MAXIMUM TEXTURE2D HEIGHT],
|
||||||
|
[MAXIMUM TEXTURE3D WIDTH],
|
||||||
|
[MAXIMUM TEXTURE3D HEIGHT],
|
||||||
|
[MAXIMUM TEXTURE3D DEPTH],
|
||||||
|
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
|
||||||
|
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
|
||||||
|
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
||||||
|
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
||||||
|
//[SURFACE ALIGNMENT],
|
||||||
|
[CONCURRENT KERNELS],
|
||||||
|
[ECC ENABLED],
|
||||||
|
[PCI BUS ID],
|
||||||
|
[PCI DEVICE ID],
|
||||||
|
//[TCC DRIVER],
|
||||||
|
[MEMORY CLOCK RATE],
|
||||||
|
[GLOBAL MEMORY BUS WIDTH],
|
||||||
|
[L2 CACHE SIZE],
|
||||||
|
[MAX THREADS PER MULTIPROCESSOR],
|
||||||
|
[ASYNC ENGINE COUNT],
|
||||||
|
//[UNIFIED ADDRESSING],
|
||||||
|
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
||||||
|
//[CAN TEX2D GATHER],
|
||||||
|
//[MAXIMUM TEXTURE2D GATHER WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
|
||||||
|
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
||||||
|
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
||||||
|
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
||||||
|
//[PCI DOMAIN ID],
|
||||||
|
[TEXTURE PITCH ALIGNMENT],
|
||||||
|
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
||||||
|
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
||||||
|
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
|
||||||
|
//[MAXIMUM SURFACE1D WIDTH],
|
||||||
|
//[MAXIMUM SURFACE2D WIDTH],
|
||||||
|
//[MAXIMUM SURFACE2D HEIGHT],
|
||||||
|
//[MAXIMUM SURFACE3D WIDTH],
|
||||||
|
//[MAXIMUM SURFACE3D HEIGHT],
|
||||||
|
//[MAXIMUM SURFACE3D DEPTH],
|
||||||
|
//[MAXIMUM SURFACE1D LAYERED WIDTH],
|
||||||
|
//[MAXIMUM SURFACE1D LAYERED LAYERS],
|
||||||
|
//[MAXIMUM SURFACE2D LAYERED WIDTH],
|
||||||
|
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
|
||||||
|
//[MAXIMUM SURFACE2D LAYERED LAYERS],
|
||||||
|
//[MAXIMUM SURFACECUBEMAP WIDTH],
|
||||||
|
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
|
||||||
|
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
|
||||||
|
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
|
||||||
|
//[MAXIMUM TEXTURE2D LINEAR PITCH],
|
||||||
|
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
|
||||||
|
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
|
||||||
|
//[COMPUTE CAPABILITY MAJOR],
|
||||||
|
//[COMPUTE CAPABILITY MINOR],
|
||||||
|
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
||||||
|
//[STREAM PRIORITIES SUPPORTED],
|
||||||
|
//[GLOBAL L1 CACHE SUPPORTED],
|
||||||
|
//[LOCAL L1 CACHE SUPPORTED],
|
||||||
|
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
||||||
|
//[MAX REGISTERS PER MULTIPROCESSOR],
|
||||||
|
[MANAGED MEMORY],
|
||||||
|
//[MULTI GPU BOARD],
|
||||||
|
//[MULTI GPU BOARD GROUP ID],
|
||||||
|
//[HOST NATIVE ATOMIC SUPPORTED],
|
||||||
|
//[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
||||||
|
[PAGEABLE MEMORY ACCESS],
|
||||||
|
[CONCURRENT MANAGED ACCESS],
|
||||||
|
//[COMPUTE PREEMPTION SUPPORTED],
|
||||||
|
//[CAN USE HOST POINTER FOR REGISTERED MEM],
|
||||||
|
//[CAN USE STREAM MEM OPS],
|
||||||
|
//[CAN USE 64 BIT STREAM MEM OPS],
|
||||||
|
//[CAN USE STREAM WAIT VALUE NOR],
|
||||||
|
[COOPERATIVE LAUNCH],
|
||||||
|
[COOPERATIVE MULTI DEVICE LAUNCH],
|
||||||
|
//[MAX SHARED MEMORY PER BLOCK OPTIN],
|
||||||
|
//[CAN FLUSH REMOTE WRITES],
|
||||||
|
//[HOST REGISTER SUPPORTED],
|
||||||
|
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
||||||
|
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
||||||
|
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
||||||
|
//[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
||||||
|
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
||||||
|
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
||||||
|
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
||||||
|
//[MAX BLOCKS PER MULTIPROCESSOR],
|
||||||
|
//[GENERIC COMPRESSION SUPPORTED],
|
||||||
|
//[MAX PERSISTING L2 CACHE SIZE],
|
||||||
|
//[MAX ACCESS POLICY WINDOW SIZE],
|
||||||
|
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
|
||||||
|
//[RESERVED SHARED MEMORY PER BLOCK],
|
||||||
|
//[SPARSE CUDA ARRAY SUPPORTED],
|
||||||
|
//[READ ONLY HOST REGISTER SUPPORTED],
|
||||||
|
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
||||||
|
//[MEMORY POOLS SUPPORTED],
|
||||||
|
},
|
||||||
};
|
};
|
||||||
unsafe { *pi = value };
|
unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) }
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_uuid(uuid: *mut CUuuid_st, dev_idx: Index) -> Result<(), CUresult> {
|
pub fn get_uuid(uuid: *mut CUuuid_st, _: Index) -> Result<(), CUresult> {
|
||||||
unsafe {
|
unsafe {
|
||||||
*uuid = CUuuid_st {
|
*uuid = CUuuid_st {
|
||||||
bytes: mem::zeroed(),
|
bytes: mem::zeroed(),
|
||||||
|
|
|
@ -148,6 +148,16 @@ impl From<ocl_core::Error> for CUresult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<hip_runtime_sys::hipError_t> for CUresult {
|
||||||
|
fn from(result: hip_runtime_sys::hipError_t) -> Self {
|
||||||
|
match result {
|
||||||
|
hip_runtime_sys::hipError_t::hipErrorRuntimeMemory
|
||||||
|
| hip_runtime_sys::hipError_t::hipErrorRuntimeOther => CUresult::CUDA_ERROR_UNKNOWN,
|
||||||
|
hip_runtime_sys::hipError_t(e) => CUresult(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub trait Encuda {
|
pub trait Encuda {
|
||||||
type To: Sized;
|
type To: Sized;
|
||||||
fn encuda(self: Self) -> Self::To;
|
fn encuda(self: Self) -> Self::To;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue