Handle new attributes in cuDeviceGetAttribute (#383)

This commit is contained in:
Violet 2025-06-16 13:20:04 -07:00 committed by GitHub
commit 9c5f1ed9fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -53,14 +53,19 @@ trait DeviceAttributeNames {
impl DeviceAttributeNames for hipDeviceAttribute_t {}
macro_rules! remap_attribute {
($attrib:expr => $([ $($word:expr)* ]),*,) => {
($attrib:expr => { $([ $($word:expr)* ]),*, }, { $( $exactWord:expr => $hipWord:expr ),*, }) => {
match $attrib {
$(
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
}
)*
_ => return Err(hipErrorCode_t::NotSupported)
$(
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE_ $exactWord >] } => {
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $hipWord >] }
}
)*
_ => return Err(hipErrorCode_t::InvalidValue)
}
}
}
@ -204,10 +209,51 @@ pub(crate) fn get_attribute(
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DMipmap)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE => {
return get_device_prop(pi, dev_idx, |props| props.persistingL2CacheMaxSize)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO => {
return get_device_prop(pi, dev_idx, |props| props.singleToDoublePrecisionPerfRatio)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE => {
return get_device_prop(pi, dev_idx, |props| props.accessPolicyMaxWindowSize)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.sparseHipArraySupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.hostRegisterReadOnlySupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.timelineSemaphoreInteropSupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.gpuDirectRDMASupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS => {
return get_device_prop(pi, dev_idx, |props| {
props.gpuDirectRDMAFlushWritesOptions as i32
})
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING => {
return get_device_prop(pi, dev_idx, |props| props.gpuDirectRDMAWritesOrdering)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH => {
return get_device_prop(pi, dev_idx, |props| props.clusterLaunch)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.deferredMappingHipArraySupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED => {
return get_device_prop(pi, dev_idx, |props| props.ipcEventSupported)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS => {
return get_device_prop(pi, dev_idx, |props| props.unifiedFunctionPointers)
}
_ => {}
}
let attrib = remap_attribute! {
attrib =>
attrib => {
[MAX THREADS PER BLOCK],
[MAX BLOCK DIM X],
[MAX BLOCK DIM Y],
@ -297,7 +343,7 @@ pub(crate) fn get_attribute(
[MULTI GPU BOARD],
[MULTI GPU BOARD GROUP ID],
[HOST NATIVE ATOMIC SUPPORTED],
[SINGLE TO DOUBLE PRECISION PERF RATIO],
// [SINGLE TO DOUBLE PRECISION PERF RATIO], // not supported by hipDeviceGetAttribute
[PAGEABLE MEMORY ACCESS],
[CONCURRENT MANAGED ACCESS],
[COMPUTE PREEMPTION SUPPORTED],
@ -315,35 +361,25 @@ pub(crate) fn get_attribute(
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
//[MAX BLOCKS PER MULTIPROCESSOR],
//[GENERIC COMPRESSION SUPPORTED],
//[MAX PERSISTING L2 CACHE SIZE],
//[MAX ACCESS POLICY WINDOW SIZE],
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
//[RESERVED SHARED MEMORY PER BLOCK],
//[SPARSE CUDA ARRAY SUPPORTED],
//[READ ONLY HOST REGISTER SUPPORTED],
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
[MEMORY POOLS SUPPORTED],
//[GPU DIRECT RDMA SUPPORTED],
//[GPU DIRECT RDMA FLUSH WRITES OPTIONS],
//[GPU DIRECT RDMA WRITES ORDERING],
//[MEMPOOL SUPPORTED HANDLE TYPES],
//[CLUSTER LAUNCH],
//[DEFERRED MAPPING CUDA ARRAY SUPPORTED],
//[CAN USE 64 BIT STREAM MEM OPS],
//[CAN USE STREAM WAIT VALUE NOR],
//[DMA BUF SUPPORTED],
//[IPC EVENT SUPPORTED],
//[MEM SYNC DOMAIN COUNT],
//[TENSOR MAP ACCESS SUPPORTED],
//[HANDLE TYPE FABRIC SUPPORTED],
//[UNIFIED FUNCTION POINTERS],
//[NUMA CONFIG],
//[NUMA ID],
//[MULTICAST SUPPORTED],
//[MPS ENABLED],
//[HOST NUMA ID],
}, {
MAX_BLOCKS_PER_MULTIPROCESSOR => MaxBlocksPerMultiProcessor,
RESERVED_SHARED_MEMORY_PER_BLOCK => ReservedSharedMemPerBlock,
MEMPOOL_SUPPORTED_HANDLE_TYPES => MemoryPoolSupportedHandleTypes,
}
};
unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) }
}