Handle new attributes in cuDeviceGetAttribute (#383)

This commit is contained in:
Violet 2025-06-16 13:20:04 -07:00 committed by GitHub
commit 9c5f1ed9fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -53,14 +53,19 @@ trait DeviceAttributeNames {
impl DeviceAttributeNames for hipDeviceAttribute_t {} impl DeviceAttributeNames for hipDeviceAttribute_t {}
macro_rules! remap_attribute { macro_rules! remap_attribute {
($attrib:expr => $([ $($word:expr)* ]),*,) => { ($attrib:expr => { $([ $($word:expr)* ]),*, }, { $( $exactWord:expr => $hipWord:expr ),*, }) => {
match $attrib { match $attrib {
$( $(
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => { paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] } paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
} }
)* )*
_ => return Err(hipErrorCode_t::NotSupported) $(
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE_ $exactWord >] } => {
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $hipWord >] }
}
)*
_ => return Err(hipErrorCode_t::InvalidValue)
} }
} }
} }
@ -204,146 +209,177 @@ pub(crate) fn get_attribute(
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => { CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DMipmap) return get_device_prop(pi, dev_idx, |props| props.maxTexture1DMipmap)
} }
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE => {
return get_device_prop(pi, dev_idx, |props| props.persistingL2CacheMaxSize)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO => {
return get_device_prop(pi, dev_idx, |props| props.singleToDoublePrecisionPerfRatio)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE => {
return get_device_prop(pi, dev_idx, |props| props.accessPolicyMaxWindowSize)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.sparseHipArraySupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.hostRegisterReadOnlySupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.timelineSemaphoreInteropSupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.gpuDirectRDMASupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS => {
return get_device_prop(pi, dev_idx, |props| {
props.gpuDirectRDMAFlushWritesOptions as i32
})
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING => {
return get_device_prop(pi, dev_idx, |props| props.gpuDirectRDMAWritesOrdering)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH => {
return get_device_prop(pi, dev_idx, |props| props.clusterLaunch)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.deferredMappingHipArraySupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.ipcEventSupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS => {
return get_device_prop(pi, dev_idx, |props| props.unifiedFunctionPointers)
}
_ => {} _ => {}
} }
let attrib = remap_attribute! { let attrib = remap_attribute! {
attrib => attrib => {
[MAX THREADS PER BLOCK], [MAX THREADS PER BLOCK],
[MAX BLOCK DIM X], [MAX BLOCK DIM X],
[MAX BLOCK DIM Y], [MAX BLOCK DIM Y],
[MAX BLOCK DIM Z], [MAX BLOCK DIM Z],
[MAX GRID DIM X], [MAX GRID DIM X],
[MAX GRID DIM Y], [MAX GRID DIM Y],
[MAX GRID DIM Z], [MAX GRID DIM Z],
[MAX SHARED MEMORY PER BLOCK], [MAX SHARED MEMORY PER BLOCK],
[TOTAL CONSTANT MEMORY], [TOTAL CONSTANT MEMORY],
//[WARP SIZE], //[WARP SIZE],
[MAX PITCH], [MAX PITCH],
[MAX REGISTERS PER BLOCK], [MAX REGISTERS PER BLOCK],
[CLOCK RATE], [CLOCK RATE],
[TEXTURE ALIGNMENT], [TEXTURE ALIGNMENT],
[GPU OVERLAP], [GPU OVERLAP],
[MULTIPROCESSOR COUNT], [MULTIPROCESSOR COUNT],
[KERNEL EXEC TIMEOUT], [KERNEL EXEC TIMEOUT],
[INTEGRATED], [INTEGRATED],
[CAN MAP HOST MEMORY], [CAN MAP HOST MEMORY],
[COMPUTE MODE], [COMPUTE MODE],
[MAXIMUM TEXTURE1D WIDTH], [MAXIMUM TEXTURE1D WIDTH],
[MAXIMUM TEXTURE2D WIDTH], [MAXIMUM TEXTURE2D WIDTH],
[MAXIMUM TEXTURE2D HEIGHT], [MAXIMUM TEXTURE2D HEIGHT],
[MAXIMUM TEXTURE3D WIDTH], [MAXIMUM TEXTURE3D WIDTH],
[MAXIMUM TEXTURE3D HEIGHT], [MAXIMUM TEXTURE3D HEIGHT],
[MAXIMUM TEXTURE3D DEPTH], [MAXIMUM TEXTURE3D DEPTH],
//[MAXIMUM TEXTURE2D LAYERED WIDTH], //[MAXIMUM TEXTURE2D LAYERED WIDTH],
//[MAXIMUM TEXTURE2D LAYERED HEIGHT], //[MAXIMUM TEXTURE2D LAYERED HEIGHT],
//[MAXIMUM TEXTURE2D LAYERED LAYERS], //[MAXIMUM TEXTURE2D LAYERED LAYERS],
//[MAXIMUM TEXTURE2D ARRAY WIDTH], //[MAXIMUM TEXTURE2D ARRAY WIDTH],
//[MAXIMUM TEXTURE2D ARRAY HEIGHT], //[MAXIMUM TEXTURE2D ARRAY HEIGHT],
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES], //[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
[SURFACE ALIGNMENT], [SURFACE ALIGNMENT],
[CONCURRENT KERNELS], [CONCURRENT KERNELS],
[ECC ENABLED], [ECC ENABLED],
[PCI BUS ID], [PCI BUS ID],
[PCI DEVICE ID], [PCI DEVICE ID],
//[TCC DRIVER], //[TCC DRIVER],
[MEMORY CLOCK RATE], [MEMORY CLOCK RATE],
[GLOBAL MEMORY BUS WIDTH], [GLOBAL MEMORY BUS WIDTH],
[L2 CACHE SIZE], [L2 CACHE SIZE],
[MAX THREADS PER MULTIPROCESSOR], [MAX THREADS PER MULTIPROCESSOR],
[ASYNC ENGINE COUNT], [ASYNC ENGINE COUNT],
[UNIFIED ADDRESSING], [UNIFIED ADDRESSING],
//[MAXIMUM TEXTURE1D LAYERED WIDTH], //[MAXIMUM TEXTURE1D LAYERED WIDTH],
//[MAXIMUM TEXTURE1D LAYERED LAYERS], //[MAXIMUM TEXTURE1D LAYERED LAYERS],
//[CAN TEX2D GATHER], //[CAN TEX2D GATHER],
//[MAXIMUM TEXTURE2D GATHER WIDTH], //[MAXIMUM TEXTURE2D GATHER WIDTH],
//[MAXIMUM TEXTURE2D GATHER HEIGHT], //[MAXIMUM TEXTURE2D GATHER HEIGHT],
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE], //[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE], //[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE], //[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
[PCI DOMAIN ID], [PCI DOMAIN ID],
[TEXTURE PITCH ALIGNMENT], [TEXTURE PITCH ALIGNMENT],
//[MAXIMUM TEXTURECUBEMAP WIDTH], //[MAXIMUM TEXTURECUBEMAP WIDTH],
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH], //[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS], //[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
//[MAXIMUM SURFACE1D WIDTH], //[MAXIMUM SURFACE1D WIDTH],
//[MAXIMUM SURFACE2D WIDTH], //[MAXIMUM SURFACE2D WIDTH],
//[MAXIMUM SURFACE2D HEIGHT], //[MAXIMUM SURFACE2D HEIGHT],
//[MAXIMUM SURFACE3D WIDTH], //[MAXIMUM SURFACE3D WIDTH],
//[MAXIMUM SURFACE3D HEIGHT], //[MAXIMUM SURFACE3D HEIGHT],
//[MAXIMUM SURFACE3D DEPTH], //[MAXIMUM SURFACE3D DEPTH],
//[MAXIMUM SURFACE1D LAYERED WIDTH], //[MAXIMUM SURFACE1D LAYERED WIDTH],
//[MAXIMUM SURFACE1D LAYERED LAYERS], //[MAXIMUM SURFACE1D LAYERED LAYERS],
//[MAXIMUM SURFACE2D LAYERED WIDTH], //[MAXIMUM SURFACE2D LAYERED WIDTH],
//[MAXIMUM SURFACE2D LAYERED HEIGHT], //[MAXIMUM SURFACE2D LAYERED HEIGHT],
//[MAXIMUM SURFACE2D LAYERED LAYERS], //[MAXIMUM SURFACE2D LAYERED LAYERS],
//[MAXIMUM SURFACECUBEMAP WIDTH], //[MAXIMUM SURFACECUBEMAP WIDTH],
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH], //[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS], //[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
//[MAXIMUM TEXTURE1D LINEAR WIDTH], //[MAXIMUM TEXTURE1D LINEAR WIDTH],
//[MAXIMUM TEXTURE2D LINEAR WIDTH], //[MAXIMUM TEXTURE2D LINEAR WIDTH],
//[MAXIMUM TEXTURE2D LINEAR HEIGHT], //[MAXIMUM TEXTURE2D LINEAR HEIGHT],
//[MAXIMUM TEXTURE2D LINEAR PITCH], //[MAXIMUM TEXTURE2D LINEAR PITCH],
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH], //[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT], //[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
//[COMPUTE CAPABILITY MAJOR], //[COMPUTE CAPABILITY MAJOR],
//[COMPUTE CAPABILITY MINOR], //[COMPUTE CAPABILITY MINOR],
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH], //[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
[STREAM PRIORITIES SUPPORTED], [STREAM PRIORITIES SUPPORTED],
[GLOBAL L1 CACHE SUPPORTED], [GLOBAL L1 CACHE SUPPORTED],
[LOCAL L1 CACHE SUPPORTED], [LOCAL L1 CACHE SUPPORTED],
[MAX SHARED MEMORY PER MULTIPROCESSOR], [MAX SHARED MEMORY PER MULTIPROCESSOR],
[MAX REGISTERS PER MULTIPROCESSOR], [MAX REGISTERS PER MULTIPROCESSOR],
[MANAGED MEMORY], [MANAGED MEMORY],
[MULTI GPU BOARD], [MULTI GPU BOARD],
[MULTI GPU BOARD GROUP ID], [MULTI GPU BOARD GROUP ID],
[HOST NATIVE ATOMIC SUPPORTED], [HOST NATIVE ATOMIC SUPPORTED],
[SINGLE TO DOUBLE PRECISION PERF RATIO], // [SINGLE TO DOUBLE PRECISION PERF RATIO], // not supported by hipDeviceGetAttribute
[PAGEABLE MEMORY ACCESS], [PAGEABLE MEMORY ACCESS],
[CONCURRENT MANAGED ACCESS], [CONCURRENT MANAGED ACCESS],
[COMPUTE PREEMPTION SUPPORTED], [COMPUTE PREEMPTION SUPPORTED],
[CAN USE HOST POINTER FOR REGISTERED MEM], [CAN USE HOST POINTER FOR REGISTERED MEM],
//[CAN USE STREAM MEM OPS], //[CAN USE STREAM MEM OPS],
[COOPERATIVE LAUNCH], [COOPERATIVE LAUNCH],
[COOPERATIVE MULTI DEVICE LAUNCH], [COOPERATIVE MULTI DEVICE LAUNCH],
[MAX SHARED MEMORY PER BLOCK OPTIN], [MAX SHARED MEMORY PER BLOCK OPTIN],
//[CAN FLUSH REMOTE WRITES], //[CAN FLUSH REMOTE WRITES],
[HOST REGISTER SUPPORTED], [HOST REGISTER SUPPORTED],
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES], [PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
[DIRECT MANAGED MEM ACCESS FROM HOST], [DIRECT MANAGED MEM ACCESS FROM HOST],
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED], //[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
[VIRTUAL MEMORY MANAGEMENT SUPPORTED], [VIRTUAL MEMORY MANAGEMENT SUPPORTED],
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED], //[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
//[HANDLE TYPE WIN32 HANDLE SUPPORTED], //[HANDLE TYPE WIN32 HANDLE SUPPORTED],
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED], //[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
//[MAX BLOCKS PER MULTIPROCESSOR], //[GENERIC COMPRESSION SUPPORTED],
//[GENERIC COMPRESSION SUPPORTED], //[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
//[MAX PERSISTING L2 CACHE SIZE], [MEMORY POOLS SUPPORTED],
//[MAX ACCESS POLICY WINDOW SIZE], //[CAN USE 64 BIT STREAM MEM OPS],
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED], //[CAN USE STREAM WAIT VALUE NOR],
//[RESERVED SHARED MEMORY PER BLOCK], //[DMA BUF SUPPORTED],
//[SPARSE CUDA ARRAY SUPPORTED], //[MEM SYNC DOMAIN COUNT],
//[READ ONLY HOST REGISTER SUPPORTED], //[TENSOR MAP ACCESS SUPPORTED],
//[TIMELINE SEMAPHORE INTEROP SUPPORTED], //[HANDLE TYPE FABRIC SUPPORTED],
[MEMORY POOLS SUPPORTED], //[NUMA CONFIG],
//[GPU DIRECT RDMA SUPPORTED], //[NUMA ID],
//[GPU DIRECT RDMA FLUSH WRITES OPTIONS], //[MULTICAST SUPPORTED],
//[GPU DIRECT RDMA WRITES ORDERING], //[MPS ENABLED],
//[MEMPOOL SUPPORTED HANDLE TYPES], //[HOST NUMA ID],
//[CLUSTER LAUNCH], }, {
//[DEFERRED MAPPING CUDA ARRAY SUPPORTED], MAX_BLOCKS_PER_MULTIPROCESSOR => MaxBlocksPerMultiProcessor,
//[CAN USE 64 BIT STREAM MEM OPS], RESERVED_SHARED_MEMORY_PER_BLOCK => ReservedSharedMemPerBlock,
//[CAN USE STREAM WAIT VALUE NOR], MEMPOOL_SUPPORTED_HANDLE_TYPES => MemoryPoolSupportedHandleTypes,
//[DMA BUF SUPPORTED], }
//[IPC EVENT SUPPORTED],
//[MEM SYNC DOMAIN COUNT],
//[TENSOR MAP ACCESS SUPPORTED],
//[HANDLE TYPE FABRIC SUPPORTED],
//[UNIFIED FUNCTION POINTERS],
//[NUMA CONFIG],
//[NUMA ID],
//[MULTICAST SUPPORTED],
//[MPS ENABLED],
//[HOST NUMA ID],
}; };
unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) } unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) }
} }