mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Implement device host functions
This commit is contained in:
parent
6c2a8576c2
commit
94e8e13425
3 changed files with 335 additions and 313 deletions
|
@ -1,29 +1,25 @@
|
|||
use super::{transmute_lifetime, transmute_lifetime_mut, CUresult};
|
||||
use crate::{
|
||||
cuda::{self, CUdevice, CUdevprop},
|
||||
hip_call,
|
||||
};
|
||||
use cuda::{CUdevice_attribute, CUuuid_st};
|
||||
use hip_runtime_sys::{
|
||||
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
|
||||
};
|
||||
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
|
||||
use paste::paste;
|
||||
use std::{
|
||||
cmp,
|
||||
collections::HashSet,
|
||||
ffi::c_void,
|
||||
mem,
|
||||
os::raw::{c_char, c_int, c_uint},
|
||||
ptr,
|
||||
sync::atomic::{AtomicU32, Ordering},
|
||||
};
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{mem, ptr};
|
||||
|
||||
const PROJECT_URL_SUFFIX_SHORT: &'static str = " [ZLUDA]";
|
||||
const PROJECT_URL_SUFFIX_LONG: &'static str = " [github.com/vosen/ZLUDA]";
|
||||
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
||||
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
||||
pub const COMPUTE_CAPABILITY_MINOR: i32 = 8;
|
||||
|
||||
pub(crate) fn compute_capability(major: &mut i32, minor: &mut i32, _dev: hipDevice_t) -> CUresult {
|
||||
*major = COMPUTE_CAPABILITY_MAJOR;
|
||||
*minor = COMPUTE_CAPABILITY_MINOR;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get(device: *mut hipDevice_t, ordinal: i32) -> hipError_t {
|
||||
unsafe { hipDeviceGet(device, ordinal) }
|
||||
}
|
||||
|
||||
#[allow(warnings)]
|
||||
trait hipDeviceAttribute_t_ext {
|
||||
trait DeviceAttributeNames {
|
||||
const hipDeviceAttributeGpuOverlap: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeDeviceOverlap;
|
||||
const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth;
|
||||
const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t =
|
||||
|
@ -42,307 +38,268 @@ trait hipDeviceAttribute_t_ext {
|
|||
hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor;
|
||||
const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels;
|
||||
const hipDeviceAttributePciDomainId: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributePciDomainID;
|
||||
const hipDeviceAttributeMultiGpuBoard: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeIsMultiGpuBoard;
|
||||
const hipDeviceAttributeMultiGpuBoardGroupId: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMultiGpuBoardGroupID;
|
||||
const hipDeviceAttributeMaxSharedMemoryPerBlockOptin: hipDeviceAttribute_t =
|
||||
hipDeviceAttribute_t::hipDeviceAttributeSharedMemPerBlockOptin;
|
||||
}
|
||||
|
||||
impl hipDeviceAttribute_t_ext for hipDeviceAttribute_t {}
|
||||
impl DeviceAttributeNames for hipDeviceAttribute_t {}
|
||||
|
||||
macro_rules! remap_attribute {
|
||||
($attrib:expr => $([ $($word:expr)* ]),*,) => {
|
||||
match $attrib {
|
||||
$(
|
||||
paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
||||
paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
||||
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
|
||||
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
|
||||
}
|
||||
)*
|
||||
_ => return hipError_t::hipErrorInvalidValue
|
||||
_ => return Err(hipErrorCode_t::hipErrorNotSupported)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -> hipError_t {
|
||||
if pi == ptr::null_mut() {
|
||||
return hipError_t::hipErrorInvalidValue;
|
||||
}
|
||||
//let mut props = unsafe { mem::zeroed() };
|
||||
let hip_attrib = match attrib {
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => {
|
||||
unsafe { *pi = 1 };
|
||||
return hipError_t::hipSuccess;
|
||||
pub(crate) fn get_attribute(
|
||||
pi: &mut i32,
|
||||
attrib: CUdevice_attribute,
|
||||
dev_idx: hipDevice_t,
|
||||
) -> hipError_t {
|
||||
match attrib {
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => {
|
||||
*pi = 32;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED => {
|
||||
unsafe { *pi = 1 };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID => {
|
||||
unsafe { *pi = 0 };
|
||||
return hipError_t::hipSuccess;
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER => {
|
||||
*pi = 0;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
|
||||
unsafe { *pi = 8 };
|
||||
return hipError_t::hipSuccess;
|
||||
*pi = COMPUTE_CAPABILITY_MAJOR;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => {
|
||||
unsafe { *pi = 0 };
|
||||
return hipError_t::hipSuccess;
|
||||
*pi = COMPUTE_CAPABILITY_MINOR;
|
||||
return Ok(());
|
||||
}
|
||||
// we assume that arrayed texts have the same limits
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
// we treat surface the same as texture
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeTextureAlignment
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
|
||||
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
|
||||
}
|
||||
// Totally made up
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
|
||||
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
|
||||
unsafe { *pi = u16::MAX as i32 };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
// linear sizes
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
|
||||
let mut prop = unsafe { mem::zeroed() };
|
||||
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return err;
|
||||
}
|
||||
unsafe { *pi = prop.maxTexture1DLinear };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => {
|
||||
let mut prop = unsafe { mem::zeroed() };
|
||||
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
|
||||
if err != hipError_t::hipSuccess {
|
||||
return err;
|
||||
}
|
||||
unsafe { *pi = prop.pciDomainID };
|
||||
return hipError_t::hipSuccess;
|
||||
}
|
||||
attrib => remap_attribute! {
|
||||
attrib =>
|
||||
[MAX THREADS PER BLOCK],
|
||||
[MAX BLOCK DIM X],
|
||||
[MAX BLOCK DIM Y],
|
||||
[MAX BLOCK DIM Z],
|
||||
[MAX GRID DIM X],
|
||||
[MAX GRID DIM Y],
|
||||
[MAX GRID DIM Z],
|
||||
[MAX SHARED MEMORY PER BLOCK],
|
||||
[TOTAL CONSTANT MEMORY],
|
||||
[WARP SIZE],
|
||||
[MAX PITCH],
|
||||
[MAX REGISTERS PER BLOCK],
|
||||
[CLOCK RATE],
|
||||
[TEXTURE ALIGNMENT],
|
||||
//[GPU OVERLAP],
|
||||
[MULTIPROCESSOR COUNT],
|
||||
[KERNEL EXEC TIMEOUT],
|
||||
[INTEGRATED],
|
||||
[CAN MAP HOST MEMORY],
|
||||
[COMPUTE MODE],
|
||||
[MAXIMUM TEXTURE1D WIDTH],
|
||||
[MAXIMUM TEXTURE2D WIDTH],
|
||||
[MAXIMUM TEXTURE2D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D WIDTH],
|
||||
[MAXIMUM TEXTURE3D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D DEPTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
||||
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
||||
//[SURFACE ALIGNMENT],
|
||||
[CONCURRENT KERNELS],
|
||||
[ECC ENABLED],
|
||||
[PCI BUS ID],
|
||||
[PCI DEVICE ID],
|
||||
//[TCC DRIVER],
|
||||
[MEMORY CLOCK RATE],
|
||||
[GLOBAL MEMORY BUS WIDTH],
|
||||
[L2 CACHE SIZE],
|
||||
[MAX THREADS PER MULTIPROCESSOR],
|
||||
[ASYNC ENGINE COUNT],
|
||||
//[UNIFIED ADDRESSING],
|
||||
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
||||
//[CAN TEX2D GATHER],
|
||||
//[MAXIMUM TEXTURE2D GATHER WIDTH],
|
||||
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
|
||||
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
||||
//[PCI DOMAIN ID],
|
||||
[TEXTURE PITCH ALIGNMENT],
|
||||
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE1D WIDTH],
|
||||
//[MAXIMUM SURFACE2D WIDTH],
|
||||
//[MAXIMUM SURFACE2D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D WIDTH],
|
||||
//[MAXIMUM SURFACE3D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D DEPTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE2D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM SURFACE2D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACECUBEMAP WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LINEAR PITCH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
|
||||
//[COMPUTE CAPABILITY MAJOR],
|
||||
//[COMPUTE CAPABILITY MINOR],
|
||||
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
||||
//[STREAM PRIORITIES SUPPORTED],
|
||||
//[GLOBAL L1 CACHE SUPPORTED],
|
||||
//[LOCAL L1 CACHE SUPPORTED],
|
||||
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
||||
//[MAX REGISTERS PER MULTIPROCESSOR],
|
||||
[MANAGED MEMORY],
|
||||
//[MULTI GPU BOARD],
|
||||
//[MULTI GPU BOARD GROUP ID],
|
||||
//[HOST NATIVE ATOMIC SUPPORTED],
|
||||
//[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
||||
[PAGEABLE MEMORY ACCESS],
|
||||
[CONCURRENT MANAGED ACCESS],
|
||||
//[COMPUTE PREEMPTION SUPPORTED],
|
||||
//[CAN USE HOST POINTER FOR REGISTERED MEM],
|
||||
//[CAN USE STREAM MEM OPS],
|
||||
//[CAN USE 64 BIT STREAM MEM OPS],
|
||||
//[CAN USE STREAM WAIT VALUE NOR],
|
||||
[COOPERATIVE LAUNCH],
|
||||
[COOPERATIVE MULTI DEVICE LAUNCH],
|
||||
//[MAX SHARED MEMORY PER BLOCK OPTIN],
|
||||
//[CAN FLUSH REMOTE WRITES],
|
||||
//[HOST REGISTER SUPPORTED],
|
||||
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
||||
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
||||
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
||||
//[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
||||
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
||||
//[MAX BLOCKS PER MULTIPROCESSOR],
|
||||
//[GENERIC COMPRESSION SUPPORTED],
|
||||
//[MAX PERSISTING L2 CACHE SIZE],
|
||||
//[MAX ACCESS POLICY WINDOW SIZE],
|
||||
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
|
||||
//[RESERVED SHARED MEMORY PER BLOCK],
|
||||
//[SPARSE CUDA ARRAY SUPPORTED],
|
||||
//[READ ONLY HOST REGISTER SUPPORTED],
|
||||
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
||||
//[MEMORY POOLS SUPPORTED],
|
||||
},
|
||||
};
|
||||
unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) }
|
||||
}
|
||||
|
||||
pub fn get_uuid(uuid: *mut CUuuid_st, _dev_idx: c_int) -> Result<(), CUresult> {
|
||||
unsafe {
|
||||
*uuid = CUuuid_st {
|
||||
bytes: mem::zeroed(),
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO: add support if Level 0 exposes it
|
||||
pub fn get_luid(
|
||||
luid: *mut c_char,
|
||||
dev_node_mask: *mut c_uint,
|
||||
_dev_idx: c_int,
|
||||
) -> Result<(), CUresult> {
|
||||
unsafe { ptr::write_bytes(luid, 0u8, 8) };
|
||||
unsafe { *dev_node_mask = 0 };
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_properties(prop: *mut CUdevprop, dev: CUdevice) -> Result<(), hipError_t> {
|
||||
if prop == ptr::null_mut() {
|
||||
return Err(hipError_t::hipErrorInvalidValue);
|
||||
_ => {}
|
||||
}
|
||||
let mut hip_props = mem::zeroed();
|
||||
hip_call! { hipGetDeviceProperties(&mut hip_props, dev.0) };
|
||||
(*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
||||
(*prop).maxThreadsDim = hip_props.maxThreadsDim;
|
||||
(*prop).maxGridSize = hip_props.maxGridSize;
|
||||
(*prop).totalConstantMemory = usize::min(hip_props.totalConstMem, i32::MAX as usize) as i32;
|
||||
(*prop).SIMDWidth = hip_props.warpSize;
|
||||
(*prop).memPitch = usize::min(hip_props.memPitch, i32::MAX as usize) as i32;
|
||||
(*prop).regsPerBlock = hip_props.regsPerBlock;
|
||||
(*prop).clockRate = hip_props.clockRate;
|
||||
(*prop).textureAlign = usize::min(hip_props.textureAlignment, i32::MAX as usize) as i32;
|
||||
let attrib = remap_attribute! {
|
||||
attrib =>
|
||||
[MAX THREADS PER BLOCK],
|
||||
[MAX BLOCK DIM X],
|
||||
[MAX BLOCK DIM Y],
|
||||
[MAX BLOCK DIM Z],
|
||||
[MAX GRID DIM X],
|
||||
[MAX GRID DIM Y],
|
||||
[MAX GRID DIM Z],
|
||||
[MAX SHARED MEMORY PER BLOCK],
|
||||
[TOTAL CONSTANT MEMORY],
|
||||
//[WARP SIZE],
|
||||
[MAX PITCH],
|
||||
[MAX REGISTERS PER BLOCK],
|
||||
[CLOCK RATE],
|
||||
[TEXTURE ALIGNMENT],
|
||||
[GPU OVERLAP],
|
||||
[MULTIPROCESSOR COUNT],
|
||||
[KERNEL EXEC TIMEOUT],
|
||||
[INTEGRATED],
|
||||
[CAN MAP HOST MEMORY],
|
||||
[COMPUTE MODE],
|
||||
[MAXIMUM TEXTURE1D WIDTH],
|
||||
[MAXIMUM TEXTURE2D WIDTH],
|
||||
[MAXIMUM TEXTURE2D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D WIDTH],
|
||||
[MAXIMUM TEXTURE3D HEIGHT],
|
||||
[MAXIMUM TEXTURE3D DEPTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
|
||||
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
|
||||
[SURFACE ALIGNMENT],
|
||||
[CONCURRENT KERNELS],
|
||||
[ECC ENABLED],
|
||||
[PCI BUS ID],
|
||||
[PCI DEVICE ID],
|
||||
//[TCC DRIVER],
|
||||
[MEMORY CLOCK RATE],
|
||||
[GLOBAL MEMORY BUS WIDTH],
|
||||
[L2 CACHE SIZE],
|
||||
[MAX THREADS PER MULTIPROCESSOR],
|
||||
[ASYNC ENGINE COUNT],
|
||||
[UNIFIED ADDRESSING],
|
||||
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
|
||||
//[CAN TEX2D GATHER],
|
||||
//[MAXIMUM TEXTURE2D GATHER WIDTH],
|
||||
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
|
||||
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
|
||||
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
|
||||
[PCI DOMAIN ID],
|
||||
[TEXTURE PITCH ALIGNMENT],
|
||||
//[MAXIMUM TEXTURECUBEMAP WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE1D WIDTH],
|
||||
//[MAXIMUM SURFACE2D WIDTH],
|
||||
//[MAXIMUM SURFACE2D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D WIDTH],
|
||||
//[MAXIMUM SURFACE3D HEIGHT],
|
||||
//[MAXIMUM SURFACE3D DEPTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE1D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACE2D LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
|
||||
//[MAXIMUM SURFACE2D LAYERED LAYERS],
|
||||
//[MAXIMUM SURFACECUBEMAP WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
|
||||
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
|
||||
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
|
||||
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
|
||||
//[MAXIMUM TEXTURE2D LINEAR PITCH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
|
||||
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
|
||||
//[COMPUTE CAPABILITY MAJOR],
|
||||
//[COMPUTE CAPABILITY MINOR],
|
||||
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
|
||||
[STREAM PRIORITIES SUPPORTED],
|
||||
[GLOBAL L1 CACHE SUPPORTED],
|
||||
[LOCAL L1 CACHE SUPPORTED],
|
||||
[MAX SHARED MEMORY PER MULTIPROCESSOR],
|
||||
[MAX REGISTERS PER MULTIPROCESSOR],
|
||||
[MANAGED MEMORY],
|
||||
[MULTI GPU BOARD],
|
||||
[MULTI GPU BOARD GROUP ID],
|
||||
[HOST NATIVE ATOMIC SUPPORTED],
|
||||
[SINGLE TO DOUBLE PRECISION PERF RATIO],
|
||||
[PAGEABLE MEMORY ACCESS],
|
||||
[CONCURRENT MANAGED ACCESS],
|
||||
[COMPUTE PREEMPTION SUPPORTED],
|
||||
[CAN USE HOST POINTER FOR REGISTERED MEM],
|
||||
//[CAN USE STREAM MEM OPS],
|
||||
[COOPERATIVE LAUNCH],
|
||||
[COOPERATIVE MULTI DEVICE LAUNCH],
|
||||
[MAX SHARED MEMORY PER BLOCK OPTIN],
|
||||
//[CAN FLUSH REMOTE WRITES],
|
||||
[HOST REGISTER SUPPORTED],
|
||||
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
|
||||
[DIRECT MANAGED MEM ACCESS FROM HOST],
|
||||
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
|
||||
[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
|
||||
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
|
||||
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
|
||||
//[MAX BLOCKS PER MULTIPROCESSOR],
|
||||
//[GENERIC COMPRESSION SUPPORTED],
|
||||
//[MAX PERSISTING L2 CACHE SIZE],
|
||||
//[MAX ACCESS POLICY WINDOW SIZE],
|
||||
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
|
||||
//[RESERVED SHARED MEMORY PER BLOCK],
|
||||
//[SPARSE CUDA ARRAY SUPPORTED],
|
||||
//[READ ONLY HOST REGISTER SUPPORTED],
|
||||
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
|
||||
[MEMORY POOLS SUPPORTED],
|
||||
//[GPU DIRECT RDMA SUPPORTED],
|
||||
//[GPU DIRECT RDMA FLUSH WRITES OPTIONS],
|
||||
//[GPU DIRECT RDMA WRITES ORDERING],
|
||||
//[MEMPOOL SUPPORTED HANDLE TYPES],
|
||||
//[CLUSTER LAUNCH],
|
||||
//[DEFERRED MAPPING CUDA ARRAY SUPPORTED],
|
||||
//[CAN USE 64 BIT STREAM MEM OPS],
|
||||
//[CAN USE STREAM WAIT VALUE NOR],
|
||||
//[DMA BUF SUPPORTED],
|
||||
//[IPC EVENT SUPPORTED],
|
||||
//[MEM SYNC DOMAIN COUNT],
|
||||
//[TENSOR MAP ACCESS SUPPORTED],
|
||||
//[HANDLE TYPE FABRIC SUPPORTED],
|
||||
//[UNIFIED FUNCTION POINTERS],
|
||||
//[NUMA CONFIG],
|
||||
//[NUMA ID],
|
||||
//[MULTICAST SUPPORTED],
|
||||
//[MPS ENABLED],
|
||||
//[HOST NUMA ID],
|
||||
};
|
||||
unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_uuid(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
|
||||
unsafe { hipDeviceGetUuid(uuid, device) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_uuid_v2(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
|
||||
get_uuid(uuid, device)
|
||||
}
|
||||
|
||||
pub(crate) fn get_luid(
|
||||
luid: *mut ::core::ffi::c_char,
|
||||
device_node_mask: &mut ::core::ffi::c_uint,
|
||||
dev: hipDevice_t,
|
||||
) -> hipError_t {
|
||||
let luid = unsafe {
|
||||
luid.cast::<[i8; 8]>()
|
||||
.as_mut()
|
||||
.ok_or(hipErrorCode_t::hipErrorInvalidValue)
|
||||
}?;
|
||||
let mut properties = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut properties, dev) }?;
|
||||
*luid = properties.luid;
|
||||
*device_node_mask = properties.luidDeviceNodeMask;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_name(
|
||||
name: *mut ::core::ffi::c_char,
|
||||
len: ::core::ffi::c_int,
|
||||
dev: hipDevice_t,
|
||||
) -> cuda_types::CUresult {
|
||||
unsafe { hipDeviceGetName(name, len, dev) }?;
|
||||
let len = len as usize;
|
||||
let buffer = unsafe { std::slice::from_raw_parts(name, len) };
|
||||
let first_zero = buffer.iter().position(|c| *c == 0);
|
||||
let first_zero = if let Some(x) = first_zero {
|
||||
x
|
||||
} else {
|
||||
return Ok(());
|
||||
};
|
||||
if (first_zero + PROJECT_SUFFIX.len()) > len {
|
||||
return Ok(());
|
||||
}
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(
|
||||
PROJECT_SUFFIX.as_ptr() as _,
|
||||
name.add(first_zero),
|
||||
PROJECT_SUFFIX.len(),
|
||||
)
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn total_mem_v2(bytes: *mut usize, dev: hipDevice_t) -> hipError_t {
|
||||
unsafe { hipDeviceTotalMem(bytes, dev) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) -> hipError_t {
|
||||
let mut hip_props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut hip_props, dev) }?;
|
||||
prop.maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
||||
prop.maxThreadsDim = hip_props.maxThreadsDim;
|
||||
prop.maxGridSize = hip_props.maxGridSize;
|
||||
prop.totalConstantMemory = clamp_usize(hip_props.totalConstMem);
|
||||
prop.SIMDWidth = 32;
|
||||
prop.memPitch = clamp_usize(hip_props.memPitch);
|
||||
prop.regsPerBlock = hip_props.regsPerBlock;
|
||||
prop.clockRate = hip_props.clockRate;
|
||||
prop.textureAlign = clamp_usize(hip_props.textureAlignment);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn clamp_usize(x: usize) -> i32 {
|
||||
usize::min(x, i32::MAX as usize) as i32
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
|
||||
pub(super) mod device;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> CUresult {
|
||||
unimplemented!()
|
||||
|
@ -11,16 +13,70 @@ pub(crate) fn unimplemented() -> CUresult {
|
|||
CUresult::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) trait FromCuda<T>: Sized {
|
||||
fn from_cuda(t: T) -> Result<Self, CUerror>;
|
||||
pub(crate) trait FromCuda<'a, T>: Sized {
|
||||
fn from_cuda(t: &'a T) -> Result<Self, CUerror>;
|
||||
}
|
||||
|
||||
impl FromCuda<u32> for u32 {
|
||||
fn from_cuda(x: u32) -> Result<Self, CUerror> {
|
||||
Ok(x)
|
||||
}
|
||||
macro_rules! from_cuda_noop {
|
||||
($($type_:ty),*) => {
|
||||
$(
|
||||
impl<'a> FromCuda<'a, $type_> for $type_ {
|
||||
fn from_cuda(x: &'a $type_) -> Result<Self, CUerror> {
|
||||
Ok(*x)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut $type_> for &'a mut $type_ {
|
||||
fn from_cuda(x: &'a *mut $type_) -> Result<Self, CUerror> {
|
||||
match unsafe { x.as_mut() } {
|
||||
Some(x) => Ok(x),
|
||||
None => Err(CUerror::INVALID_VALUE),
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! from_cuda_transmute {
|
||||
($($from:ty => $to:ty),*) => {
|
||||
$(
|
||||
impl<'a> FromCuda<'a, $from> for $to {
|
||||
fn from_cuda(x: &'a $from) -> Result<Self, CUerror> {
|
||||
Ok(unsafe { std::mem::transmute(*x) })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut $from> for &'a mut $to {
|
||||
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
|
||||
match unsafe { x.cast::<$to>().as_mut() } {
|
||||
Some(x) => Ok(x),
|
||||
None => Err(CUerror::INVALID_VALUE),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FromCuda<'a, *mut $from> for * mut $to {
|
||||
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
|
||||
Ok(x.cast::<$to>())
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
from_cuda_noop!(
|
||||
*mut i8,
|
||||
*mut usize,
|
||||
i32,
|
||||
u32,
|
||||
cuda_types::CUdevprop, CUdevice_attribute
|
||||
);
|
||||
from_cuda_transmute!(
|
||||
CUdevice => hipDevice_t,
|
||||
CUuuid => hipUUID
|
||||
);
|
||||
|
||||
pub(crate) fn init(flags: ::core::ffi::c_uint) -> hipError_t {
|
||||
unsafe { hipInit(flags) }
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ macro_rules! implemented {
|
|||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
cuda_base::cuda_normalize_fn!( crate::r#impl::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda($arg_id)?),*)?;
|
||||
cuda_base::cuda_normalize_fn!( crate::r#impl::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda(&$arg_id)?),*)?;
|
||||
Ok(())
|
||||
}
|
||||
)*
|
||||
|
@ -32,6 +32,15 @@ use cuda_base::cuda_function_declarations;
|
|||
cuda_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cuInit
|
||||
cuDeviceComputeCapability,
|
||||
cuDeviceGet,
|
||||
cuDeviceGetAttribute,
|
||||
cuDeviceGetLuid,
|
||||
cuDeviceGetName,
|
||||
cuDeviceGetProperties,
|
||||
cuDeviceGetUuid,
|
||||
cuDeviceGetUuid_v2,
|
||||
cuDeviceTotalMem_v2,
|
||||
cuInit,
|
||||
]
|
||||
);
|
Loading…
Add table
Reference in a new issue