mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-07-14 21:21:28 +00:00
Implement device host functions
This commit is contained in:
parent
6c2a8576c2
commit
94e8e13425
3 changed files with 335 additions and 313 deletions
|
@ -1,29 +1,25 @@
|
||||||
use super::{transmute_lifetime, transmute_lifetime_mut, CUresult};
|
use cuda_types::*;
|
||||||
use crate::{
|
use hip_runtime_sys::*;
|
||||||
cuda::{self, CUdevice, CUdevprop},
|
use std::{mem, ptr};
|
||||||
hip_call,
|
|
||||||
};
|
|
||||||
use cuda::{CUdevice_attribute, CUuuid_st};
|
|
||||||
use hip_runtime_sys::{
|
|
||||||
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
|
|
||||||
};
|
|
||||||
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
|
|
||||||
use paste::paste;
|
|
||||||
use std::{
|
|
||||||
cmp,
|
|
||||||
collections::HashSet,
|
|
||||||
ffi::c_void,
|
|
||||||
mem,
|
|
||||||
os::raw::{c_char, c_int, c_uint},
|
|
||||||
ptr,
|
|
||||||
sync::atomic::{AtomicU32, Ordering},
|
|
||||||
};
|
|
||||||
|
|
||||||
const PROJECT_URL_SUFFIX_SHORT: &'static str = " [ZLUDA]";
|
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
||||||
const PROJECT_URL_SUFFIX_LONG: &'static str = " [github.com/vosen/ZLUDA]";
|
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
||||||
|
pub const COMPUTE_CAPABILITY_MINOR: i32 = 8;
|
||||||
|
|
||||||
|
pub(crate) fn compute_capability(major: &mut i32, minor: &mut i32, _dev: hipDevice_t) -> CUresult {
|
||||||
|
*major = COMPUTE_CAPABILITY_MAJOR;
|
||||||
|
*minor = COMPUTE_CAPABILITY_MINOR;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get(device: *mut hipDevice_t, ordinal: i32) -> hipError_t {
|
||||||
|
unsafe { hipDeviceGet(device, ordinal) }
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(warnings)]
|
#[allow(warnings)]
|
||||||
trait hipDeviceAttribute_t_ext {
|
trait DeviceAttributeNames {
|
||||||
|
const hipDeviceAttributeGpuOverlap: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeDeviceOverlap;
|
||||||
const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t =
|
const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t =
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth;
|
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth;
|
||||||
const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t =
|
const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t =
|
||||||
|
@ -42,148 +38,56 @@ trait hipDeviceAttribute_t_ext {
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor;
|
hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor;
|
||||||
const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t =
|
const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t =
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels;
|
hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels;
|
||||||
|
const hipDeviceAttributePciDomainId: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributePciDomainID;
|
||||||
|
const hipDeviceAttributeMultiGpuBoard: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeIsMultiGpuBoard;
|
||||||
|
const hipDeviceAttributeMultiGpuBoardGroupId: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeMultiGpuBoardGroupID;
|
||||||
|
const hipDeviceAttributeMaxSharedMemoryPerBlockOptin: hipDeviceAttribute_t =
|
||||||
|
hipDeviceAttribute_t::hipDeviceAttributeSharedMemPerBlockOptin;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl hipDeviceAttribute_t_ext for hipDeviceAttribute_t {}
|
impl DeviceAttributeNames for hipDeviceAttribute_t {}
|
||||||
|
|
||||||
macro_rules! remap_attribute {
|
macro_rules! remap_attribute {
|
||||||
($attrib:expr => $([ $($word:expr)* ]),*,) => {
|
($attrib:expr => $([ $($word:expr)* ]),*,) => {
|
||||||
match $attrib {
|
match $attrib {
|
||||||
$(
|
$(
|
||||||
paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
||||||
paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
||||||
}
|
}
|
||||||
)*
|
)*
|
||||||
_ => return hipError_t::hipErrorInvalidValue
|
_ => return Err(hipErrorCode_t::hipErrorNotSupported)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -> hipError_t {
|
pub(crate) fn get_attribute(
|
||||||
if pi == ptr::null_mut() {
|
pi: &mut i32,
|
||||||
return hipError_t::hipErrorInvalidValue;
|
attrib: CUdevice_attribute,
|
||||||
|
dev_idx: hipDevice_t,
|
||||||
|
) -> hipError_t {
|
||||||
|
match attrib {
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => {
|
||||||
|
*pi = 32;
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
//let mut props = unsafe { mem::zeroed() };
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER => {
|
||||||
let hip_attrib = match attrib {
|
*pi = 0;
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => {
|
return Ok(());
|
||||||
unsafe { *pi = 1 };
|
|
||||||
return hipError_t::hipSuccess;
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED => {
|
|
||||||
unsafe { *pi = 1 };
|
|
||||||
return hipError_t::hipSuccess;
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID => {
|
|
||||||
unsafe { *pi = 0 };
|
|
||||||
return hipError_t::hipSuccess;
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
|
||||||
unsafe { *pi = 8 };
|
*pi = COMPUTE_CAPABILITY_MAJOR;
|
||||||
return hipError_t::hipSuccess;
|
return Ok(());
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => {
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => {
|
||||||
unsafe { *pi = 0 };
|
*pi = COMPUTE_CAPABILITY_MINOR;
|
||||||
return hipError_t::hipSuccess;
|
return Ok(());
|
||||||
}
|
}
|
||||||
// we assume that arrayed texts have the same limits
|
_ => {}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
|
||||||
}
|
}
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT => {
|
let attrib = remap_attribute! {
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
|
||||||
}
|
|
||||||
// we treat surface the same as texture
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeTextureAlignment
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
|
|
||||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
|
||||||
}
|
|
||||||
// Totally made up
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
|
|
||||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
|
|
||||||
unsafe { *pi = u16::MAX as i32 };
|
|
||||||
return hipError_t::hipSuccess;
|
|
||||||
}
|
|
||||||
// linear sizes
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
|
|
||||||
let mut prop = unsafe { mem::zeroed() };
|
|
||||||
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
|
||||||
if err != hipError_t::hipSuccess {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
unsafe { *pi = prop.maxTexture1DLinear };
|
|
||||||
return hipError_t::hipSuccess;
|
|
||||||
}
|
|
||||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => {
|
|
||||||
let mut prop = unsafe { mem::zeroed() };
|
|
||||||
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
|
||||||
if err != hipError_t::hipSuccess {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
unsafe { *pi = prop.pciDomainID };
|
|
||||||
return hipError_t::hipSuccess;
|
|
||||||
}
|
|
||||||
attrib => remap_attribute! {
|
|
||||||
attrib =>
|
attrib =>
|
||||||
[MAX THREADS PER BLOCK],
|
[MAX THREADS PER BLOCK],
|
||||||
[MAX BLOCK DIM X],
|
[MAX BLOCK DIM X],
|
||||||
|
@ -194,12 +98,12 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
[MAX GRID DIM Z],
|
[MAX GRID DIM Z],
|
||||||
[MAX SHARED MEMORY PER BLOCK],
|
[MAX SHARED MEMORY PER BLOCK],
|
||||||
[TOTAL CONSTANT MEMORY],
|
[TOTAL CONSTANT MEMORY],
|
||||||
[WARP SIZE],
|
//[WARP SIZE],
|
||||||
[MAX PITCH],
|
[MAX PITCH],
|
||||||
[MAX REGISTERS PER BLOCK],
|
[MAX REGISTERS PER BLOCK],
|
||||||
[CLOCK RATE],
|
[CLOCK RATE],
|
||||||
[TEXTURE ALIGNMENT],
|
[TEXTURE ALIGNMENT],
|
||||||
//[GPU OVERLAP],
|
[GPU OVERLAP],
|
||||||
[MULTIPROCESSOR COUNT],
|
[MULTIPROCESSOR COUNT],
|
||||||
[KERNEL EXEC TIMEOUT],
|
[KERNEL EXEC TIMEOUT],
|
||||||
[INTEGRATED],
|
[INTEGRATED],
|
||||||
|
@ -217,7 +121,7 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
||||||
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
||||||
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
||||||
//[SURFACE ALIGNMENT],
|
[SURFACE ALIGNMENT],
|
||||||
[CONCURRENT KERNELS],
|
[CONCURRENT KERNELS],
|
||||||
[ECC ENABLED],
|
[ECC ENABLED],
|
||||||
[PCI BUS ID],
|
[PCI BUS ID],
|
||||||
|
@ -228,7 +132,7 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
[L2 CACHE SIZE],
|
[L2 CACHE SIZE],
|
||||||
[MAX THREADS PER MULTIPROCESSOR],
|
[MAX THREADS PER MULTIPROCESSOR],
|
||||||
[ASYNC ENGINE COUNT],
|
[ASYNC ENGINE COUNT],
|
||||||
//[UNIFIED ADDRESSING],
|
[UNIFIED ADDRESSING],
|
||||||
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
||||||
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
||||||
//[CAN TEX2D GATHER],
|
//[CAN TEX2D GATHER],
|
||||||
|
@ -237,7 +141,7 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
||||||
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
||||||
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
||||||
//[PCI DOMAIN ID],
|
[PCI DOMAIN ID],
|
||||||
[TEXTURE PITCH ALIGNMENT],
|
[TEXTURE PITCH ALIGNMENT],
|
||||||
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
||||||
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
||||||
|
@ -265,32 +169,30 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
//[COMPUTE CAPABILITY MAJOR],
|
//[COMPUTE CAPABILITY MAJOR],
|
||||||
//[COMPUTE CAPABILITY MINOR],
|
//[COMPUTE CAPABILITY MINOR],
|
||||||
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
||||||
//[STREAM PRIORITIES SUPPORTED],
|
[STREAM PRIORITIES SUPPORTED],
|
||||||
//[GLOBAL L1 CACHE SUPPORTED],
|
[GLOBAL L1 CACHE SUPPORTED],
|
||||||
//[LOCAL L1 CACHE SUPPORTED],
|
[LOCAL L1 CACHE SUPPORTED],
|
||||||
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
||||||
//[MAX REGISTERS PER MULTIPROCESSOR],
|
[MAX REGISTERS PER MULTIPROCESSOR],
|
||||||
[MANAGED MEMORY],
|
[MANAGED MEMORY],
|
||||||
//[MULTI GPU BOARD],
|
[MULTI GPU BOARD],
|
||||||
//[MULTI GPU BOARD GROUP ID],
|
[MULTI GPU BOARD GROUP ID],
|
||||||
//[HOST NATIVE ATOMIC SUPPORTED],
|
[HOST NATIVE ATOMIC SUPPORTED],
|
||||||
//[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
||||||
[PAGEABLE MEMORY ACCESS],
|
[PAGEABLE MEMORY ACCESS],
|
||||||
[CONCURRENT MANAGED ACCESS],
|
[CONCURRENT MANAGED ACCESS],
|
||||||
//[COMPUTE PREEMPTION SUPPORTED],
|
[COMPUTE PREEMPTION SUPPORTED],
|
||||||
//[CAN USE HOST POINTER FOR REGISTERED MEM],
|
[CAN USE HOST POINTER FOR REGISTERED MEM],
|
||||||
//[CAN USE STREAM MEM OPS],
|
//[CAN USE STREAM MEM OPS],
|
||||||
//[CAN USE 64 BIT STREAM MEM OPS],
|
|
||||||
//[CAN USE STREAM WAIT VALUE NOR],
|
|
||||||
[COOPERATIVE LAUNCH],
|
[COOPERATIVE LAUNCH],
|
||||||
[COOPERATIVE MULTI DEVICE LAUNCH],
|
[COOPERATIVE MULTI DEVICE LAUNCH],
|
||||||
//[MAX SHARED MEMORY PER BLOCK OPTIN],
|
[MAX SHARED MEMORY PER BLOCK OPTIN],
|
||||||
//[CAN FLUSH REMOTE WRITES],
|
//[CAN FLUSH REMOTE WRITES],
|
||||||
//[HOST REGISTER SUPPORTED],
|
[HOST REGISTER SUPPORTED],
|
||||||
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
||||||
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
||||||
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
||||||
//[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
||||||
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
||||||
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
||||||
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
||||||
|
@ -303,46 +205,101 @@ pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -
|
||||||
//[SPARSE CUDA ARRAY SUPPORTED],
|
//[SPARSE CUDA ARRAY SUPPORTED],
|
||||||
//[READ ONLY HOST REGISTER SUPPORTED],
|
//[READ ONLY HOST REGISTER SUPPORTED],
|
||||||
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
||||||
//[MEMORY POOLS SUPPORTED],
|
[MEMORY POOLS SUPPORTED],
|
||||||
},
|
//[GPU DIRECT RDMA SUPPORTED],
|
||||||
|
//[GPU DIRECT RDMA FLUSH WRITES OPTIONS],
|
||||||
|
//[GPU DIRECT RDMA WRITES ORDERING],
|
||||||
|
//[MEMPOOL SUPPORTED HANDLE TYPES],
|
||||||
|
//[CLUSTER LAUNCH],
|
||||||
|
//[DEFERRED MAPPING CUDA ARRAY SUPPORTED],
|
||||||
|
//[CAN USE 64 BIT STREAM MEM OPS],
|
||||||
|
//[CAN USE STREAM WAIT VALUE NOR],
|
||||||
|
//[DMA BUF SUPPORTED],
|
||||||
|
//[IPC EVENT SUPPORTED],
|
||||||
|
//[MEM SYNC DOMAIN COUNT],
|
||||||
|
//[TENSOR MAP ACCESS SUPPORTED],
|
||||||
|
//[HANDLE TYPE FABRIC SUPPORTED],
|
||||||
|
//[UNIFIED FUNCTION POINTERS],
|
||||||
|
//[NUMA CONFIG],
|
||||||
|
//[NUMA ID],
|
||||||
|
//[MULTICAST SUPPORTED],
|
||||||
|
//[MPS ENABLED],
|
||||||
|
//[HOST NUMA ID],
|
||||||
};
|
};
|
||||||
unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) }
|
unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_uuid(uuid: *mut CUuuid_st, _dev_idx: c_int) -> Result<(), CUresult> {
|
pub(crate) fn get_uuid(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
|
||||||
|
unsafe { hipDeviceGetUuid(uuid, device) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_uuid_v2(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
|
||||||
|
get_uuid(uuid, device)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_luid(
|
||||||
|
luid: *mut ::core::ffi::c_char,
|
||||||
|
device_node_mask: &mut ::core::ffi::c_uint,
|
||||||
|
dev: hipDevice_t,
|
||||||
|
) -> hipError_t {
|
||||||
|
let luid = unsafe {
|
||||||
|
luid.cast::<[i8; 8]>()
|
||||||
|
.as_mut()
|
||||||
|
.ok_or(hipErrorCode_t::hipErrorInvalidValue)
|
||||||
|
}?;
|
||||||
|
let mut properties = unsafe { mem::zeroed() };
|
||||||
|
unsafe { hipGetDevicePropertiesR0600(&mut properties, dev) }?;
|
||||||
|
*luid = properties.luid;
|
||||||
|
*device_node_mask = properties.luidDeviceNodeMask;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_name(
|
||||||
|
name: *mut ::core::ffi::c_char,
|
||||||
|
len: ::core::ffi::c_int,
|
||||||
|
dev: hipDevice_t,
|
||||||
|
) -> cuda_types::CUresult {
|
||||||
|
unsafe { hipDeviceGetName(name, len, dev) }?;
|
||||||
|
let len = len as usize;
|
||||||
|
let buffer = unsafe { std::slice::from_raw_parts(name, len) };
|
||||||
|
let first_zero = buffer.iter().position(|c| *c == 0);
|
||||||
|
let first_zero = if let Some(x) = first_zero {
|
||||||
|
x
|
||||||
|
} else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
if (first_zero + PROJECT_SUFFIX.len()) > len {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
unsafe {
|
unsafe {
|
||||||
*uuid = CUuuid_st {
|
ptr::copy_nonoverlapping(
|
||||||
bytes: mem::zeroed(),
|
PROJECT_SUFFIX.as_ptr() as _,
|
||||||
}
|
name.add(first_zero),
|
||||||
|
PROJECT_SUFFIX.len(),
|
||||||
|
)
|
||||||
};
|
};
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: add support if Level 0 exposes it
|
pub(crate) fn total_mem_v2(bytes: *mut usize, dev: hipDevice_t) -> hipError_t {
|
||||||
pub fn get_luid(
|
unsafe { hipDeviceTotalMem(bytes, dev) }
|
||||||
luid: *mut c_char,
|
}
|
||||||
dev_node_mask: *mut c_uint,
|
|
||||||
_dev_idx: c_int,
|
pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) -> hipError_t {
|
||||||
) -> Result<(), CUresult> {
|
let mut hip_props = unsafe { mem::zeroed() };
|
||||||
unsafe { ptr::write_bytes(luid, 0u8, 8) };
|
unsafe { hipGetDevicePropertiesR0600(&mut hip_props, dev) }?;
|
||||||
unsafe { *dev_node_mask = 0 };
|
prop.maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
||||||
|
prop.maxThreadsDim = hip_props.maxThreadsDim;
|
||||||
|
prop.maxGridSize = hip_props.maxGridSize;
|
||||||
|
prop.totalConstantMemory = clamp_usize(hip_props.totalConstMem);
|
||||||
|
prop.SIMDWidth = 32;
|
||||||
|
prop.memPitch = clamp_usize(hip_props.memPitch);
|
||||||
|
prop.regsPerBlock = hip_props.regsPerBlock;
|
||||||
|
prop.clockRate = hip_props.clockRate;
|
||||||
|
prop.textureAlign = clamp_usize(hip_props.textureAlignment);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn get_properties(prop: *mut CUdevprop, dev: CUdevice) -> Result<(), hipError_t> {
|
fn clamp_usize(x: usize) -> i32 {
|
||||||
if prop == ptr::null_mut() {
|
usize::min(x, i32::MAX as usize) as i32
|
||||||
return Err(hipError_t::hipErrorInvalidValue);
|
|
||||||
}
|
|
||||||
let mut hip_props = mem::zeroed();
|
|
||||||
hip_call! { hipGetDeviceProperties(&mut hip_props, dev.0) };
|
|
||||||
(*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
|
||||||
(*prop).maxThreadsDim = hip_props.maxThreadsDim;
|
|
||||||
(*prop).maxGridSize = hip_props.maxGridSize;
|
|
||||||
(*prop).totalConstantMemory = usize::min(hip_props.totalConstMem, i32::MAX as usize) as i32;
|
|
||||||
(*prop).SIMDWidth = hip_props.warpSize;
|
|
||||||
(*prop).memPitch = usize::min(hip_props.memPitch, i32::MAX as usize) as i32;
|
|
||||||
(*prop).regsPerBlock = hip_props.regsPerBlock;
|
|
||||||
(*prop).clockRate = hip_props.clockRate;
|
|
||||||
(*prop).textureAlign = usize::min(hip_props.textureAlignment, i32::MAX as usize) as i32;
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
use cuda_types::*;
|
use cuda_types::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
|
|
||||||
|
pub(super) mod device;
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
pub(crate) fn unimplemented() -> CUresult {
|
pub(crate) fn unimplemented() -> CUresult {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
|
@ -11,16 +13,70 @@ pub(crate) fn unimplemented() -> CUresult {
|
||||||
CUresult::ERROR_NOT_SUPPORTED
|
CUresult::ERROR_NOT_SUPPORTED
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) trait FromCuda<T>: Sized {
|
pub(crate) trait FromCuda<'a, T>: Sized {
|
||||||
fn from_cuda(t: T) -> Result<Self, CUerror>;
|
fn from_cuda(t: &'a T) -> Result<Self, CUerror>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FromCuda<u32> for u32 {
|
macro_rules! from_cuda_noop {
|
||||||
fn from_cuda(x: u32) -> Result<Self, CUerror> {
|
($($type_:ty),*) => {
|
||||||
Ok(x)
|
$(
|
||||||
|
impl<'a> FromCuda<'a, $type_> for $type_ {
|
||||||
|
fn from_cuda(x: &'a $type_) -> Result<Self, CUerror> {
|
||||||
|
Ok(*x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> FromCuda<'a, *mut $type_> for &'a mut $type_ {
|
||||||
|
fn from_cuda(x: &'a *mut $type_) -> Result<Self, CUerror> {
|
||||||
|
match unsafe { x.as_mut() } {
|
||||||
|
Some(x) => Ok(x),
|
||||||
|
None => Err(CUerror::INVALID_VALUE),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! from_cuda_transmute {
|
||||||
|
($($from:ty => $to:ty),*) => {
|
||||||
|
$(
|
||||||
|
impl<'a> FromCuda<'a, $from> for $to {
|
||||||
|
fn from_cuda(x: &'a $from) -> Result<Self, CUerror> {
|
||||||
|
Ok(unsafe { std::mem::transmute(*x) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FromCuda<'a, *mut $from> for &'a mut $to {
|
||||||
|
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
|
||||||
|
match unsafe { x.cast::<$to>().as_mut() } {
|
||||||
|
Some(x) => Ok(x),
|
||||||
|
None => Err(CUerror::INVALID_VALUE),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FromCuda<'a, *mut $from> for * mut $to {
|
||||||
|
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
|
||||||
|
Ok(x.cast::<$to>())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
from_cuda_noop!(
|
||||||
|
*mut i8,
|
||||||
|
*mut usize,
|
||||||
|
i32,
|
||||||
|
u32,
|
||||||
|
cuda_types::CUdevprop, CUdevice_attribute
|
||||||
|
);
|
||||||
|
from_cuda_transmute!(
|
||||||
|
CUdevice => hipDevice_t,
|
||||||
|
CUuuid => hipUUID
|
||||||
|
);
|
||||||
|
|
||||||
pub(crate) fn init(flags: ::core::ffi::c_uint) -> hipError_t {
|
pub(crate) fn init(flags: ::core::ffi::c_uint) -> hipError_t {
|
||||||
unsafe { hipInit(flags) }
|
unsafe { hipInit(flags) }
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ macro_rules! implemented {
|
||||||
#[allow(improper_ctypes)]
|
#[allow(improper_ctypes)]
|
||||||
#[allow(improper_ctypes_definitions)]
|
#[allow(improper_ctypes_definitions)]
|
||||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||||
cuda_base::cuda_normalize_fn!( crate::r#impl::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda($arg_id)?),*)?;
|
cuda_base::cuda_normalize_fn!( crate::r#impl::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda(&$arg_id)?),*)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
)*
|
)*
|
||||||
|
@ -32,6 +32,15 @@ use cuda_base::cuda_function_declarations;
|
||||||
cuda_function_declarations!(
|
cuda_function_declarations!(
|
||||||
unimplemented,
|
unimplemented,
|
||||||
implemented <= [
|
implemented <= [
|
||||||
cuInit
|
cuDeviceComputeCapability,
|
||||||
|
cuDeviceGet,
|
||||||
|
cuDeviceGetAttribute,
|
||||||
|
cuDeviceGetLuid,
|
||||||
|
cuDeviceGetName,
|
||||||
|
cuDeviceGetProperties,
|
||||||
|
cuDeviceGetUuid,
|
||||||
|
cuDeviceGetUuid_v2,
|
||||||
|
cuDeviceTotalMem_v2,
|
||||||
|
cuInit,
|
||||||
]
|
]
|
||||||
);
|
);
|
Loading…
Add table
Add a link
Reference in a new issue