diff --git a/notcuda/Cargo.toml b/notcuda/Cargo.toml index d751806..7f24f33 100644 --- a/notcuda/Cargo.toml +++ b/notcuda/Cargo.toml @@ -10,4 +10,5 @@ crate-type = ["cdylib"] [dependencies] level_zero-sys = { path = "../level_zero-sys" } -lazy_static = "1.4" \ No newline at end of file +lazy_static = "1.4" +num_enum = "0.4" \ No newline at end of file diff --git a/notcuda/src/cu.rs b/notcuda/src/cu.rs index 2cf152e..84f7efd 100644 --- a/notcuda/src/cu.rs +++ b/notcuda/src/cu.rs @@ -1,3 +1,4 @@ +use num_enum::TryFromPrimitive; use std::os::raw::c_int; #[repr(C)] @@ -81,7 +82,8 @@ pub enum Result { ERROR_UNKNOWN = 999, } -#[repr(C)] +#[repr(i32)] +#[derive(Copy, Clone, TryFromPrimitive)] #[allow(non_camel_case_types)] pub enum DeviceAttribute { MAX_THREADS_PER_BLOCK = 1, @@ -200,6 +202,7 @@ impl Result { l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Result::ERROR_INVALID_VALUE, l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Result::ERROR_INVALID_VALUE, l0::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Result::ERROR_OUT_OF_MEMORY, + l0::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Result::ERROR_NOT_SUPPORTED, _ => Result::ERROR_UNKNOWN } } diff --git a/notcuda/src/lib.rs b/notcuda/src/lib.rs index 6d9b884..22fff81 100644 --- a/notcuda/src/lib.rs +++ b/notcuda/src/lib.rs @@ -2,14 +2,12 @@ extern crate level_zero_sys as l0; #[macro_use] extern crate lazy_static; +use std::convert::TryFrom; use std::sync::Mutex; use std::ptr; use std::os::raw::{c_char, c_int, c_uint}; -mod cu; -mod export_table; -mod ze; - +#[macro_use] macro_rules! l0_check_err { ($exp:expr) => { { @@ -21,6 +19,10 @@ macro_rules! l0_check_err { }; } +mod cu; +mod export_table; +mod ze; + lazy_static! { pub static ref GLOBAL_STATE: Mutex> = Mutex::new(None); } @@ -149,13 +151,20 @@ pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) -> Driver::call_device(dev_idx, |dev| dev.total_mem(bytes)) } -/* #[no_mangle] -pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: cu::DeviceAttribute, dev: cu::Device) -> cu::Result { - let cu::Device(dev_idx) = dev; - if pi == ptr::null_mut() || dev_idx < 0 { +pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: c_int, dev_idx: cu::Device) -> cu::Result { + if pi == ptr::null_mut() { return cu::Result::ERROR_INVALID_VALUE; } - Driver::call(|driver| driver.device_get_attribute(bytes, dev)) -} -*/ \ No newline at end of file + let attrib = match cu::DeviceAttribute::try_from(attrib) { + Ok(attrib) => attrib, + Err(_) => return cu::Result::ERROR_INVALID_VALUE + }; + match ze::Device::try_get_attribute(attrib) { + Some(attrib) => { + unsafe { *pi = attrib }; + cu::Result::SUCCESS + }, + None => Driver::call_device(dev_idx, |dev| dev.get_attribute(pi, attrib)), + } +} \ No newline at end of file diff --git a/notcuda/src/ze.rs b/notcuda/src/ze.rs index 1da9a3e..74821b5 100644 --- a/notcuda/src/ze.rs +++ b/notcuda/src/ze.rs @@ -1,4 +1,5 @@ use level_zero_sys::*; +use super::cu; use std::cmp; use std::mem; @@ -69,9 +70,17 @@ impl Device { unsafe { mem::transmute(v) } } - pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t { + fn get_device_properties(self) -> Result, ze_result_t> { let mut props = Box::new(l0::ze_device_properties_t::new()); - l0_check! { l0::zeDeviceGetProperties(self.0, props.as_mut()) }; + l0_check_err! { l0::zeDeviceGetProperties(self.0, props.as_mut()) }; + Ok(props) + } + + pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t { + let props = match self.get_device_properties() { + Ok(props) => props, + Err(res) => return res + }; let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0); let dst_null_pos = cmp::min((len - 1) as usize, null_pos); unsafe { *(name.add(dst_null_pos)) = 0 }; @@ -95,4 +104,41 @@ impl Device { unsafe { *bytes = max_mem as usize }; l0::ze_result_t::ZE_RESULT_SUCCESS } + + pub fn try_get_attribute(attr: cu::DeviceAttribute) -> Option { + match attr { + cu::DeviceAttribute::COMPUTE_CAPABILITY_MAJOR => Some(c_int::max_value()), + cu::DeviceAttribute::COMPUTE_CAPABILITY_MINOR => Some(c_int::max_value()), + cu::DeviceAttribute::GPU_OVERLAP => Some(1), + cu::DeviceAttribute::KERNEL_EXEC_TIMEOUT => Some(0), + _ => None + } + } + + fn map_cuda_attribute(attr: cu::DeviceAttribute, props: &ze_device_properties_t) -> Option { + match attr { + cu::DeviceAttribute::ASYNC_ENGINE_COUNT => Some(props.numAsyncCopyEngines as i32), + cu::DeviceAttribute::MULTIPROCESSOR_COUNT => Some((props.numSlicesPerTile * props.numSubslicesPerSlice) as i32), + cu::DeviceAttribute::KERNEL_EXEC_TIMEOUT => Some(0), + // FIXME + cu::DeviceAttribute::INTEGRATED => Some(1), + cu::DeviceAttribute::CAN_MAP_HOST_MEMORY => Some(props.unifiedMemorySupported as i32), + _ => None + } + } + + pub fn get_attribute(self, pi: *mut c_int, attr: cu::DeviceAttribute) -> l0::ze_result_t { + match self.get_device_properties() { + Ok(props) => { + match Device::map_cuda_attribute(attr, &props) { + Some(cuda_value) => { + unsafe { *pi = cuda_value }; + l0::ze_result_t::ZE_RESULT_SUCCESS + }, + None => l0::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE + } + } + Err(err) => err + } + } } \ No newline at end of file