diff --git a/notcuda/src/cu.rs b/notcuda/src/cu.rs index 84f7efd..75eca4f 100644 --- a/notcuda/src/cu.rs +++ b/notcuda/src/cu.rs @@ -1,4 +1,5 @@ use num_enum::TryFromPrimitive; +use std::convert::TryFrom; use std::os::raw::c_int; #[repr(C)] @@ -82,118 +83,55 @@ pub enum Result { ERROR_UNKNOWN = 999, } -#[repr(i32)] -#[derive(Copy, Clone, TryFromPrimitive)] -#[allow(non_camel_case_types)] pub enum DeviceAttribute { - MAX_THREADS_PER_BLOCK = 1, - MAX_BLOCK_DIM_X = 2, - MAX_BLOCK_DIM_Y = 3, - MAX_BLOCK_DIM_Z = 4, - MAX_GRID_DIM_X = 5, - MAX_GRID_DIM_Y = 6, - MAX_GRID_DIM_Z = 7, - MAX_SHARED_MEMORY_PER_BLOCK = 8, - TOTAL_CONSTANT_MEMORY = 9, - WARP_SIZE = 10, - MAX_PITCH = 11, - MAX_REGISTERS_PER_BLOCK = 12, - CLOCK_RATE = 13, - TEXTURE_ALIGNMENT = 14, + Static(DeviceStaticAttribute), + Dynamic(DeviceDynamicAttribute) +} + +impl DeviceAttribute { + pub fn try_new(e: u8) -> Option { + DeviceStaticAttribute::try_from(e).map(DeviceAttribute::Static) + .or_else(|_| DeviceGeneralAttribute::try_from(e).map(DeviceDynamicAttribute::General).map(DeviceAttribute::Dynamic)) + .or_else(|_| DeviceTextureAttribute::try_from(e).map(DeviceDynamicAttribute::Texture).map(DeviceAttribute::Dynamic)) + .ok() + } +} + +#[repr(u8)] +#[derive(TryFromPrimitive)] +#[allow(non_camel_case_types)] +pub enum DeviceStaticAttribute { GPU_OVERLAP = 15, - MULTIPROCESSOR_COUNT = 16, KERNEL_EXEC_TIMEOUT = 17, INTEGRATED = 18, - CAN_MAP_HOST_MEMORY = 19, - COMPUTE_MODE = 20, - MAXIMUM_TEXTURE1D_WIDTH = 21, - MAXIMUM_TEXTURE2D_WIDTH = 22, - MAXIMUM_TEXTURE2D_HEIGHT = 23, - MAXIMUM_TEXTURE3D_WIDTH = 24, - MAXIMUM_TEXTURE3D_HEIGHT = 25, - MAXIMUM_TEXTURE3D_DEPTH = 26, - MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, - MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, - MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, - SURFACE_ALIGNMENT = 30, - CONCURRENT_KERNELS = 31, - ECC_ENABLED = 32, - PCI_BUS_ID = 33, - PCI_DEVICE_ID = 34, - TCC_DRIVER = 35, - MEMORY_CLOCK_RATE = 36, - GLOBAL_MEMORY_BUS_WIDTH = 37, - L2_CACHE_SIZE = 38, - MAX_THREADS_PER_MULTIPROCESSOR = 39, - ASYNC_ENGINE_COUNT = 40, - UNIFIED_ADDRESSING = 41, - MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, - MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, - CAN_TEX2D_GATHER = 44, - MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, - MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, - MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, - MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, - MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, - PCI_DOMAIN_ID = 50, - TEXTURE_PITCH_ALIGNMENT = 51, - MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, - MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, - MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, - MAXIMUM_SURFACE1D_WIDTH = 55, - MAXIMUM_SURFACE2D_WIDTH = 56, - MAXIMUM_SURFACE2D_HEIGHT = 57, - MAXIMUM_SURFACE3D_WIDTH = 58, - MAXIMUM_SURFACE3D_HEIGHT = 59, - MAXIMUM_SURFACE3D_DEPTH = 60, - MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, - MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, - MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, - MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, - MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, - MAXIMUM_SURFACECUBEMAP_WIDTH = 66, - MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, - MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, - MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, - MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, - MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, - MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, - MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, - MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, COMPUTE_CAPABILITY_MAJOR = 75, COMPUTE_CAPABILITY_MINOR = 76, - MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, - STREAM_PRIORITIES_SUPPORTED = 78, - GLOBAL_L1_CACHE_SUPPORTED = 79, - LOCAL_L1_CACHE_SUPPORTED = 80, - MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, - MAX_REGISTERS_PER_MULTIPROCESSOR = 82, - MANAGED_MEMORY = 83, - MULTI_GPU_BOARD = 84, - MULTI_GPU_BOARD_GROUP_ID = 85, - HOST_NATIVE_ATOMIC_SUPPORTED = 86, - SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, - PAGEABLE_MEMORY_ACCESS = 88, - CONCURRENT_MANAGED_ACCESS = 89, - COMPUTE_PREEMPTION_SUPPORTED = 90, - CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, - CAN_USE_STREAM_MEM_OPS = 92, - CAN_USE_64_BIT_STREAM_MEM_OPS = 93, - CAN_USE_STREAM_WAIT_VALUE_NOR = 94, - COOPERATIVE_LAUNCH = 95, - COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, - MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, - CAN_FLUSH_REMOTE_WRITES = 98, - HOST_REGISTER_SUPPORTED = 99, - PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, - DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, - VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, - HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, - HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, - HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, - MAX = 106, } +pub enum DeviceDynamicAttribute { + General(DeviceGeneralAttribute), + Texture(DeviceTextureAttribute) +} + + +#[repr(u8)] +#[derive(TryFromPrimitive)] +#[allow(non_camel_case_types)] +pub enum DeviceGeneralAttribute { + MULTIPROCESSOR_COUNT = 16, + CAN_MAP_HOST_MEMORY = 19, + ASYNC_ENGINE_COUNT = 40, +} + + +#[repr(u8)] +#[derive(TryFromPrimitive)] +#[allow(non_camel_case_types)] +pub enum DeviceTextureAttribute { + MAXIMUM_TEXTURE1D_WIDTH = 21 +} + + impl Result { pub fn from_l0(result: l0::ze_result_t) -> Result { match result { @@ -215,4 +153,7 @@ pub struct Uuid { } #[repr(transparent)] -pub struct Device(pub c_int); \ No newline at end of file +pub struct Device(pub c_int); + +#[repr(transparent)] +pub struct DevicePtr(c_int); \ No newline at end of file diff --git a/notcuda/src/export_table.rs b/notcuda/src/export_table.rs index d9abeef..c1fe5bc 100644 --- a/notcuda/src/export_table.rs +++ b/notcuda/src/export_table.rs @@ -9,14 +9,23 @@ pub unsafe extern "C" fn cuGetExportTable( table: *mut *const std::os::raw::c_void, id: *const cu::Uuid, ) -> cu::Result { - if *id == CU_ETID_ToolsRuntimeCallbackHooks { + if table == ptr::null_mut() || id == ptr::null_mut() { + cu::Result::ERROR_INVALID_VALUE + } else if *id == CU_ETID_ToolsRuntimeCallbackHooks { *table = TABLE0.as_ptr() as *const _; - return cu::Result::SUCCESS; + cu::Result::SUCCESS } else if *id == CU_ETID_CudartInterface { *table = TABLE1.as_ptr() as *const _; - return cu::Result::SUCCESS; + cu::Result::SUCCESS + } else if *id == CU_ETID_ToolsTls { + *table = 1 as _; + cu::Result::SUCCESS + } else if *id == CU_ETID_ContextLocalStorageInterface_v0301 { + *table = ContextLocalStorageInterface_v0301_VTABLE.as_ptr() as *const _; + cu::Result::SUCCESS + } else { + cu::Result::ERROR_NOT_SUPPORTED } - cu::Result::ERROR_NOT_SUPPORTED } const CU_ETID_ToolsRuntimeCallbackHooks: cu::Uuid = cu::Uuid { @@ -26,24 +35,24 @@ const CU_ETID_ToolsRuntimeCallbackHooks: cu::Uuid = cu::Uuid { ], }; #[repr(C)] -union PtrOrLength { +union VTableEntry { ptr: *const (), length: usize, } -unsafe impl Sync for PtrOrLength {} +unsafe impl Sync for VTableEntry {} const TABLE0_LEN: usize = 7; -static TABLE0: [PtrOrLength; TABLE0_LEN] = [ - PtrOrLength { - length: mem::size_of::<[PtrOrLength; TABLE0_LEN]>(), +static TABLE0: [VTableEntry; TABLE0_LEN] = [ + VTableEntry { + length: mem::size_of::<[VTableEntry; TABLE0_LEN]>(), }, - PtrOrLength { ptr: ptr::null() }, - PtrOrLength { + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: table0_fn1 as *const (), }, - PtrOrLength { ptr: ptr::null() }, - PtrOrLength { ptr: ptr::null() }, - PtrOrLength { ptr: ptr::null() }, - PtrOrLength { + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: table0_fn5 as *const (), }, ]; @@ -69,17 +78,59 @@ const CU_ETID_CudartInterface: cu::Uuid = cu::Uuid { ], }; -const TABLE1_LEN: usize = 3; -static TABLE1: [PtrOrLength; TABLE1_LEN] = [ - PtrOrLength { - length: mem::size_of::<[PtrOrLength; TABLE1_LEN]>(), +const TABLE1_LEN: usize = 10; +static TABLE1: [VTableEntry; TABLE1_LEN] = [ + VTableEntry { + length: mem::size_of::<[VTableEntry; TABLE1_LEN]>(), }, - PtrOrLength { ptr: ptr::null() }, - PtrOrLength { + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: table1_fn1 as *const (), }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { + ptr: table1_fn6 as *const (), + }, + VTableEntry { ptr: ptr::null() }, + VTableEntry { ptr: ptr::null() }, ]; unsafe extern "C" fn table1_fn1(_: *mut c_ulong, _: c_int) -> c_int { 0 +} + +unsafe extern "C" fn table1_fn6(_: u64) { } + +const CU_ETID_ToolsTls: cu::Uuid = cu::Uuid { + x: [0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec, 0xdc], +}; + + +const CU_ETID_ContextLocalStorageInterface_v0301: cu::Uuid = cu::Uuid { + x: [0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95, 0x93], +}; + +// the table is much bigger and start earlier +static ContextLocalStorageInterface_v0301_VTABLE: [VTableEntry; 4] = [ + VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn0 as *const () }, + VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn1 as *const () }, + VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn2 as *const () }, + VTableEntry { ptr: ptr::null() }, +]; + +// some kind of ctor +unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn0(ms: *mut usize, _: *mut (), _: *mut (), _: *mut ()) -> u32 { + 0 +} + +// some kind of dtor +unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn1(ms: *mut usize, _: *mut ()) -> u32 { + 0 +} + +unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn2(_: *mut *mut (), _: *mut (), _: *mut ()) -> u32 { + 0 } \ No newline at end of file diff --git a/notcuda/src/lib.rs b/notcuda/src/lib.rs index 22fff81..81f2b92 100644 --- a/notcuda/src/lib.rs +++ b/notcuda/src/lib.rs @@ -153,18 +153,35 @@ pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) -> #[no_mangle] pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: c_int, dev_idx: cu::Device) -> cu::Result { + if pi == ptr::null_mut() { return cu::Result::ERROR_INVALID_VALUE; } - let attrib = match cu::DeviceAttribute::try_from(attrib) { - Ok(attrib) => attrib, + let attrib = match u8::try_from(attrib) { + Ok(a) => a, Err(_) => return cu::Result::ERROR_INVALID_VALUE }; - match ze::Device::try_get_attribute(attrib) { - Some(attrib) => { - unsafe { *pi = attrib }; + match cu::DeviceAttribute::try_new(attrib) { + Some(cu::DeviceAttribute::Static(a)) => { + unsafe { *pi = ze::Device::get_attribute_static(a) }; cu::Result::SUCCESS }, - None => Driver::call_device(dev_idx, |dev| dev.get_attribute(pi, attrib)), + Some(cu::DeviceAttribute::Dynamic(a)) => Driver::call_device(dev_idx, |dev| dev.get_attribute(pi, a)), + // TODO: add support for more properties + None => cu::Result::SUCCESS } +} + +#[no_mangle] +pub extern "C" fn cuDeviceGetUuid(uuid: *mut cu::Uuid, dev_idx: cu::Device) -> cu::Result { + if uuid == ptr::null_mut() { + return cu::Result::ERROR_INVALID_VALUE; + } + Driver::call_device(dev_idx, |dev| dev.get_uuid(uuid)) +} + + +#[no_mangle] +pub extern "C" fn cuMemAlloc_v2(dptr: *mut cu::DevicePtr, bytesize: usize) -> cu::Result { + unimplemented!() } \ No newline at end of file diff --git a/notcuda/src/ze.rs b/notcuda/src/ze.rs index 74821b5..e1fc804 100644 --- a/notcuda/src/ze.rs +++ b/notcuda/src/ze.rs @@ -25,6 +25,13 @@ macro_rules! l0_check { }; } +fn either(r: Result) -> T { + match r { + Ok(x) => x, + Err(x) => x + } +} + pub trait Versioned : Sized { type Version; @@ -60,6 +67,16 @@ impl Versioned for ze_device_properties_t { } } +impl Versioned for ze_device_image_properties_t { + type Version = ze_device_image_properties_version_t; + fn current() -> Self::Version { + ze_device_image_properties_version_t::ZE_DEVICE_IMAGE_PROPERTIES_VERSION_CURRENT + } + fn version(&mut self) -> &mut Self::Version { + &mut self.version + } +} + #[derive(Clone, Copy)] #[repr(transparent)] // required so a Vec can be safely transmutted to Vec pub struct Device(pub ze_device_handle_t); @@ -76,6 +93,12 @@ impl Device { Ok(props) } + fn get_device_image_properties(self) -> Result, ze_result_t> { + let mut props = Box::new(l0::ze_device_image_properties_t::new()); + l0_check_err! { l0::zeDeviceGetImageProperties(self.0, props.as_mut()) }; + Ok(props) + } + pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t { let props = match self.get_device_properties() { Ok(props) => props, @@ -105,40 +128,48 @@ impl Device { l0::ze_result_t::ZE_RESULT_SUCCESS } - pub fn try_get_attribute(attr: cu::DeviceAttribute) -> Option { + pub fn get_attribute_static(attr: cu::DeviceStaticAttribute) -> c_int { match attr { - cu::DeviceAttribute::COMPUTE_CAPABILITY_MAJOR => Some(c_int::max_value()), - cu::DeviceAttribute::COMPUTE_CAPABILITY_MINOR => Some(c_int::max_value()), - cu::DeviceAttribute::GPU_OVERLAP => Some(1), - cu::DeviceAttribute::KERNEL_EXEC_TIMEOUT => Some(0), - _ => None + cu::DeviceStaticAttribute::GPU_OVERLAP => 1, + cu::DeviceStaticAttribute::KERNEL_EXEC_TIMEOUT => 0, + cu::DeviceStaticAttribute::INTEGRATED => 1, + cu::DeviceStaticAttribute::COMPUTE_CAPABILITY_MAJOR => c_int::max_value(), + cu::DeviceStaticAttribute::COMPUTE_CAPABILITY_MINOR => c_int::max_value(), } } - fn map_cuda_attribute(attr: cu::DeviceAttribute, props: &ze_device_properties_t) -> Option { + fn get_attribute_general(attr: cu::DeviceGeneralAttribute, props: &l0::ze_device_properties_t) -> c_int { match attr { - cu::DeviceAttribute::ASYNC_ENGINE_COUNT => Some(props.numAsyncCopyEngines as i32), - cu::DeviceAttribute::MULTIPROCESSOR_COUNT => Some((props.numSlicesPerTile * props.numSubslicesPerSlice) as i32), - cu::DeviceAttribute::KERNEL_EXEC_TIMEOUT => Some(0), - // FIXME - cu::DeviceAttribute::INTEGRATED => Some(1), - cu::DeviceAttribute::CAN_MAP_HOST_MEMORY => Some(props.unifiedMemorySupported as i32), - _ => None + cu::DeviceGeneralAttribute::CAN_MAP_HOST_MEMORY => props.unifiedMemorySupported as i32, + cu::DeviceGeneralAttribute::ASYNC_ENGINE_COUNT => props.numAsyncCopyEngines as i32, + cu::DeviceGeneralAttribute::MULTIPROCESSOR_COUNT => (props.numSlicesPerTile * props.numSubslicesPerSlice) as i32, } } - pub fn get_attribute(self, pi: *mut c_int, attr: cu::DeviceAttribute) -> l0::ze_result_t { - match self.get_device_properties() { - Ok(props) => { - match Device::map_cuda_attribute(attr, &props) { - Some(cuda_value) => { - unsafe { *pi = cuda_value }; - l0::ze_result_t::ZE_RESULT_SUCCESS - }, - None => l0::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE - } + fn get_attribute_texture(attr: cu::DeviceTextureAttribute, props: &l0::ze_device_image_properties_t) -> c_int { + match attr { + cu::DeviceTextureAttribute::MAXIMUM_TEXTURE1D_WIDTH => cmp::min(props.maxImageDims1D, c_int::max_value() as u32) as c_int, + } + } + + pub fn get_attribute(self, pi: *mut c_int, attr: cu::DeviceDynamicAttribute) -> l0::ze_result_t { + let value_or_err = match attr { + cu::DeviceDynamicAttribute::General(a) => self.get_device_properties().map(|p| Device::get_attribute_general(a, &p)), + cu::DeviceDynamicAttribute::Texture(a) => self.get_device_image_properties().map(|p| Device::get_attribute_texture(a, &p)), + }; + match value_or_err { + Ok(value) => { + unsafe { *pi = value }; + l0::ze_result_t::ZE_RESULT_SUCCESS } - Err(err) => err + Err(e) => e } } + + pub fn get_uuid(self, uuid: *mut cu::Uuid) -> l0::ze_result_t { + either(self.get_device_properties().map(|prop| { + unsafe { *uuid = cu::Uuid{ x: prop.uuid.id } }; + l0::ze_result_t::ZE_RESULT_SUCCESS + })) + } } \ No newline at end of file