Update level0 to 1.0 and unpack fatbins

This commit is contained in:
Andrzej Janik 2020-08-28 22:37:16 +02:00
parent d47cd1e133
commit 34dc149be1
17 changed files with 8112 additions and 4753 deletions

View file

@ -54,3 +54,24 @@ PTX grammar
-----------
* PTX grammar rules are atrocious, keywords can be freely reused as ids without escaping
* Modifiers can be applied to instructions in any arbitrary order. We don't support it and hope we will never have to
Rust debugging
--------------
* Nothing works 100% well on vscode/Windows:
* MSVC/lldb - always garbage (simple enums are fubar)
* MSVC/cppvsdbg - sometimes garbage (nested enums are fubar)
* GNU/lldb - mostly fine, but can't follow child processes
* GNU/gdb - always garbage (I don't have the patience to manually QA rust-gdb on Windows) and doesn't quite understand file paths for break points
* Neither on vscode/Linux:
* lldb - mostly fine, but can't follow child processes
* gdb - visualizes variables somewhat awkardly (shows all possible variants of an enum)
* CLion could be the solution, but intellij-rust can't load this project
CUDA <-> L0
-----------
* device ~= device
* stream ~= command queue
* context ~= context (1.0+)
* graph ~= command list
* module ~= module

View file

@ -1,6 +1,6 @@
[package]
name = "level_zero-sys"
version = "0.91.0"
version = "1.0.4"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
links = "ze_loader"

View file

@ -1 +1,4 @@
bindgen --size_t-is-usize --default-enum-style=rust --whitelist-function ze.* /usr/include/level_zero/zex_api.h -o zex_api.rs -- -x c++ && sed -i 's/pub enum _ze_result_t/#[must_use]\npub enum _ze_result_t/g' zex_api.rs
sed 's/^typedef uint32_t ze_.*flags_t;$//g' /usr/local/include/level_zero/ze_api.h > ze_api.h
sed -i -r 's/ze_(.*)_flag_t/ze_\1_flags_t/g' ze_api.h
bindgen --size_t-is-usize --default-enum-style=newtype --bitfield-enum ".*flags_t" --whitelist-function "ze.*" ze_api.h -o ze_api.rs
sed -i 's/pub struct _ze_result_t/#[must_use]\npub struct _ze_result_t/g' ze_api.rs

View file

@ -1,7 +1,18 @@
use env::VarError;
use std::{env, path::PathBuf};
fn main() {
fn main() -> Result<(), VarError> {
println!("cargo:rustc-link-lib=dylib=ze_loader");
// TODO: make this windows-only
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
if env::var("CARGO_CFG_WINDOWS").is_ok() {
let env = env::var("CARGO_CFG_TARGET_ENV")?;
if env == "gnu" {
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
} else {
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
path.push("src");
println!("cargo:rustc-link-search=native={}", path.display());
};
}
println!("cargo:rerun-if-changed=build.rs");
}
Ok(())
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -6,13 +6,14 @@ use std::{
mem, ptr,
};
pub type Result<T> = std::result::Result<T, Error>;
macro_rules! check {
($expr:expr) => {
let err = unsafe { $expr };
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
return Result::Err(Error::err(err));
#[allow(unused_unsafe)]
{
let err = unsafe { $expr };
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
return Result::Err(Error(err));
}
}
};
}
@ -26,173 +27,17 @@ macro_rules! check_panic {
};
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Error {
NotReady = 1,
DeviceLost = 1879048193,
OutOfHostMemory = 1879048194,
OutOfDeviceMemory = 1879048195,
ModuleBuildFailure = 1879048196,
InsufficientPermissions = 1879113728,
NotAvailable = 1879113729,
Uninitialized = 2013265921,
UnsupportedVersion = 2013265922,
UnsupportedFeature = 2013265923,
InvalidArgument = 2013265924,
InvalidNullHandle = 2013265925,
HandleObjectInUse = 2013265926,
InvalidNullPointer = 2013265927,
InvalidSize = 2013265928,
UnsupportedSize = 2013265929,
UnsupportedAlignment = 2013265930,
InvalidSynchronizationObject = 2013265931,
InvalidEnumeration = 2013265932,
UnsupportedEnumeration = 2013265933,
UnsupportedImageFormat = 2013265934,
InvalidNativeBinary = 2013265935,
InvalidGlobalName = 2013265936,
InvalidKernelName = 2013265937,
InvalidFunctionName = 2013265938,
InvalidGroupSizeDimension = 2013265939,
InvalidGlobalWidthDimension = 2013265940,
InvalidKernelArgumentIndex = 2013265941,
InvalidKernelArgumentSize = 2013265942,
InvalidKernelAttributeValue = 2013265943,
InvalidCommandListType = 2013265944,
OverlappingRegions = 2013265945,
Unknown = 2147483647,
}
pub struct Error(pub sys::ze_result_t);
impl Error {
pub fn new<T>(r: sys::ze_result_t, t: T) -> Result<T> {
Error::new_with(r, || t)
}
pub fn new_with<T, F: FnOnce() -> T>(r: sys::ze_result_t, f: F) -> Result<T> {
match r {
sys::ze_result_t::ZE_RESULT_SUCCESS => Ok(f()),
sys::ze_result_t::ZE_RESULT_NOT_READY => Err(Error::NotReady),
sys::ze_result_t::ZE_RESULT_ERROR_DEVICE_LOST => Err(Error::DeviceLost),
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory),
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY => Err(Error::OutOfDeviceMemory),
sys::ze_result_t::ZE_RESULT_ERROR_MODULE_BUILD_FAILURE => {
Err(Error::ModuleBuildFailure)
}
sys::ze_result_t::ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS => {
Err(Error::InsufficientPermissions)
}
sys::ze_result_t::ZE_RESULT_ERROR_NOT_AVAILABLE => Err(Error::NotAvailable),
sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Err(Error::Uninitialized),
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_VERSION => Err(Error::UnsupportedVersion),
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Err(Error::UnsupportedFeature),
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Err(Error::InvalidArgument),
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_HANDLE => Err(Error::InvalidNullHandle),
sys::ze_result_t::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE => Err(Error::HandleObjectInUse),
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_POINTER => {
Err(Error::InvalidNullPointer)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SIZE => Err(Error::InvalidSize),
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_SIZE => Err(Error::UnsupportedSize),
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT => {
Err(Error::UnsupportedAlignment)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT => {
Err(Error::InvalidSynchronizationObject)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Err(Error::InvalidEnumeration),
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION => {
Err(Error::UnsupportedEnumeration)
}
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT => {
Err(Error::UnsupportedImageFormat)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NATIVE_BINARY => {
Err(Error::InvalidNativeBinary)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_NAME => Err(Error::InvalidGlobalName),
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_NAME => Err(Error::InvalidKernelName),
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_FUNCTION_NAME => {
Err(Error::InvalidFunctionName)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION => {
Err(Error::InvalidGroupSizeDimension)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION => {
Err(Error::InvalidGlobalWidthDimension)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX => {
Err(Error::InvalidKernelArgumentIndex)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE => {
Err(Error::InvalidKernelArgumentSize)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE => {
Err(Error::InvalidKernelAttributeValue)
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE => {
Err(Error::InvalidCommandListType)
}
sys::ze_result_t::ZE_RESULT_ERROR_OVERLAPPING_REGIONS => Err(Error::OverlappingRegions),
sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN => Err(Error::Unknown),
}
}
fn err(r: sys::ze_result_t) -> Self {
match r {
sys::ze_result_t::ZE_RESULT_SUCCESS => unreachable!(),
sys::ze_result_t::ZE_RESULT_NOT_READY => Error::NotReady,
sys::ze_result_t::ZE_RESULT_ERROR_DEVICE_LOST => Error::DeviceLost,
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Error::OutOfHostMemory,
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY => Error::OutOfDeviceMemory,
sys::ze_result_t::ZE_RESULT_ERROR_MODULE_BUILD_FAILURE => Error::ModuleBuildFailure,
sys::ze_result_t::ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS => {
Error::InsufficientPermissions
}
sys::ze_result_t::ZE_RESULT_ERROR_NOT_AVAILABLE => Error::NotAvailable,
sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Error::Uninitialized,
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_VERSION => Error::UnsupportedVersion,
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Error::UnsupportedFeature,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Error::InvalidArgument,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_HANDLE => Error::InvalidNullHandle,
sys::ze_result_t::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE => Error::HandleObjectInUse,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_POINTER => Error::InvalidNullPointer,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SIZE => Error::InvalidSize,
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_SIZE => Error::UnsupportedSize,
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT => Error::UnsupportedAlignment,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT => {
Error::InvalidSynchronizationObject
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Error::InvalidEnumeration,
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION => {
Error::UnsupportedEnumeration
}
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT => {
Error::UnsupportedImageFormat
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NATIVE_BINARY => Error::InvalidNativeBinary,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_NAME => Error::InvalidGlobalName,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_NAME => Error::InvalidKernelName,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_FUNCTION_NAME => Error::InvalidFunctionName,
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION => {
Error::InvalidGroupSizeDimension
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION => {
Error::InvalidGlobalWidthDimension
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX => {
Error::InvalidKernelArgumentIndex
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE => {
Error::InvalidKernelArgumentSize
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE => {
Error::InvalidKernelAttributeValue
}
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE => {
Error::InvalidCommandListType
}
sys::ze_result_t::ZE_RESULT_ERROR_OVERLAPPING_REGIONS => Error::OverlappingRegions,
sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN => Error::Unknown,
fn new<T>(res: sys::ze_result_t, default: T) -> Result<T> {
if res == sys::ze_result_t::ZE_RESULT_SUCCESS {
Ok(default)
} else {
Err(Self(res))
}
}
}
@ -207,7 +52,7 @@ impl std::error::Error for Error {}
pub fn init() -> Result<()> {
Error::new(
unsafe { sys::zeInit(sys::ze_init_flag_t::ZE_INIT_FLAG_NONE) },
unsafe { sys::zeInit(sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY) },
(),
)
}
@ -246,6 +91,9 @@ impl Driver {
&mut len,
result.as_mut_ptr() as *mut _
));
if (len as usize) < result.len() {
result.truncate(len as usize);
}
Ok(result)
}
}
@ -260,6 +108,80 @@ impl Device {
pub unsafe fn from_ffi(x: sys::ze_device_handle_t) -> Self {
Self(x)
}
pub fn get_properties(&self) -> Result<Box<sys::ze_device_properties_t>> {
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_properties_t>() });
check! { sys::zeDeviceGetProperties(self.0, props.as_mut()) };
Ok(props)
}
pub fn get_image_properties(&self) -> Result<Box<sys::ze_device_image_properties_t>> {
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_image_properties_t>() });
check! { sys::zeDeviceGetImageProperties(self.0, props.as_mut()) };
Ok(props)
}
pub fn get_memory_properties(&self) -> Result<Vec<sys::ze_device_memory_properties_t>> {
let mut count = 0u32;
check! { sys::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) };
if count == 0 {
return Ok(Vec::new());
}
let mut props =
vec![unsafe { mem::zeroed::<sys::ze_device_memory_properties_t>() }; count as usize];
check! { sys::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) };
Ok(props)
}
pub unsafe fn mem_alloc_device(
&mut self,
ctx: &mut Context,
size: usize,
alignment: usize,
) -> Result<*mut c_void> {
let descr = sys::ze_device_mem_alloc_desc_t {
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
pNext: ptr::null(),
flags: sys::ze_device_mem_alloc_flags_t(0),
ordinal: 0,
};
let mut result = ptr::null_mut();
// TODO: check current context for the device
check! {
sys::zeMemAllocDevice(
ctx.0,
&descr,
size,
alignment,
self.0,
&mut result,
)
};
Ok(result)
}
}
#[repr(transparent)]
pub struct Context(sys::ze_context_handle_t);
impl Context {
pub unsafe fn as_ffi(&self) -> sys::ze_context_handle_t {
self.0
}
pub unsafe fn from_ffi(x: sys::ze_context_handle_t) -> Self {
Self(x)
}
pub fn new(drv: &Driver) -> Result<Self> {
let ctx_desc = sys::ze_context_desc_t {
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_CONTEXT_DESC,
pNext: ptr::null(),
flags: sys::ze_context_flags_t(0),
};
let mut result = ptr::null_mut();
check!(sys::zeContextCreate(drv.0, &ctx_desc, &mut result));
Ok(Context(result))
}
}
#[repr(transparent)]
@ -273,16 +195,23 @@ impl CommandQueue {
Self(x)
}
pub fn new(d: &Device) -> Result<Self> {
pub fn new(ctx: &mut Context, d: &Device) -> Result<Self> {
let que_desc = sys::ze_command_queue_desc_t {
version: sys::ze_command_queue_desc_version_t::ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT,
flags: sys::ze_command_queue_flag_t::ZE_COMMAND_QUEUE_FLAG_NONE,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
pNext: ptr::null(),
ordinal: 0,
index: 0,
flags: sys::ze_command_queue_flags_t(0),
mode: sys::ze_command_queue_mode_t::ZE_COMMAND_QUEUE_MODE_DEFAULT,
priority: sys::ze_command_queue_priority_t::ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
ordinal: 0,
};
let mut result = ptr::null_mut();
check!(sys::zeCommandQueueCreate(d.0, &que_desc, &mut result));
check!(sys::zeCommandQueueCreate(
ctx.0,
d.0,
&que_desc,
&mut result
));
Ok(CommandQueue(result))
}
@ -318,17 +247,29 @@ impl Module {
Self(x)
}
pub fn new_spirv(d: &Device, bin: &[u8], opts: Option<&CStr>) -> Result<Self> {
Module::new(true, d, bin, opts)
pub fn new_spirv(
ctx: &mut Context,
d: &Device,
bin: &[u8],
opts: Option<&CStr>,
) -> Result<Self> {
Module::new(ctx, true, d, bin, opts)
}
pub fn new_native(d: &Device, bin: &[u8]) -> Result<Self> {
Module::new(false, d, bin, None)
pub fn new_native(ctx: &mut Context, d: &Device, bin: &[u8]) -> Result<Self> {
Module::new(ctx, false, d, bin, None)
}
fn new(spirv: bool, d: &Device, bin: &[u8], opts: Option<&CStr>) -> Result<Self> {
fn new(
ctx: &mut Context,
spirv: bool,
d: &Device,
bin: &[u8],
opts: Option<&CStr>,
) -> Result<Self> {
let desc = sys::ze_module_desc_t {
version: sys::ze_module_desc_version_t::ZE_MODULE_DESC_VERSION_CURRENT,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_MODULE_DESC,
pNext: ptr::null(),
format: if spirv {
sys::ze_module_format_t::ZE_MODULE_FORMAT_IL_SPIRV
} else {
@ -341,6 +282,7 @@ impl Module {
};
let mut result: sys::ze_module_handle_t = ptr::null_mut();
check!(sys::zeModuleCreate(
ctx.0,
d.0,
&desc,
&mut result,
@ -371,7 +313,7 @@ impl SafeRepr for f64 {}
pub struct DeviceBuffer<T: SafeRepr> {
ptr: *mut c_void,
driver: sys::ze_driver_handle_t,
ctx: sys::ze_context_handle_t,
len: usize,
marker: PhantomData<T>,
}
@ -380,33 +322,33 @@ impl<T: SafeRepr> DeviceBuffer<T> {
pub unsafe fn as_ffi(&self) -> *mut c_void {
self.ptr
}
pub unsafe fn from_ffi(driver: sys::ze_driver_handle_t, ptr: *mut c_void, len: usize) -> Self {
pub unsafe fn from_ffi(ctx: sys::ze_context_handle_t, ptr: *mut c_void, len: usize) -> Self {
let marker = PhantomData::<T>;
Self {
ptr,
driver,
ctx,
len,
marker,
}
}
pub fn new(drv: &Driver, dev: &Device, len: usize) -> Result<Self> {
pub fn new(ctx: &mut Context, dev: &Device, len: usize) -> Result<Self> {
let desc = sys::_ze_device_mem_alloc_desc_t {
version:
sys::ze_device_mem_alloc_desc_version_t::ZE_DEVICE_MEM_ALLOC_DESC_VERSION_CURRENT,
flags: sys::ze_device_mem_alloc_flag_t::ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
pNext: ptr::null(),
flags: sys::ze_device_mem_alloc_flags_t(0),
ordinal: 0,
};
let mut result = ptr::null_mut();
check!(sys::zeDriverAllocDeviceMem(
drv.0,
check!(sys::zeMemAllocDevice(
ctx.0,
&desc,
len * mem::size_of::<T>(),
mem::align_of::<T>(),
dev.0,
&mut result
));
Ok(unsafe { Self::from_ffi(drv.0, result, len) })
Ok(unsafe { Self::from_ffi(ctx.0, result, len) })
}
pub fn len(&self) -> usize {
@ -417,7 +359,7 @@ impl<T: SafeRepr> DeviceBuffer<T> {
impl<T: SafeRepr> Drop for DeviceBuffer<T> {
#[allow(unused_must_use)]
fn drop(&mut self) {
check_panic! { sys::zeDriverFreeMem(self.driver, self.ptr) };
check_panic! { sys::zeMemFree(self.ctx, self.ptr) };
}
}
@ -431,13 +373,15 @@ impl<'a> CommandList<'a> {
Self(x, PhantomData)
}
pub fn new(dev: &Device) -> Result<Self> {
pub fn new(ctx: &mut Context, dev: &Device) -> Result<Self> {
let desc = sys::ze_command_list_desc_t {
version: sys::ze_command_list_desc_version_t::ZE_COMMAND_LIST_DESC_VERSION_CURRENT,
flags: sys::ze_command_list_flag_t::ZE_COMMAND_LIST_FLAG_NONE,
stype: sys::_ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
commandQueueGroupOrdinal: 0,
pNext: ptr::null(),
flags: sys::ze_command_list_flags_t(0),
};
let mut result: sys::ze_command_list_handle_t = ptr::null_mut();
check!(sys::zeCommandListCreate(dev.0, &desc, &mut result));
check!(sys::zeCommandListCreate(ctx.0, dev.0, &desc, &mut result));
Ok(Self(result, PhantomData))
}
@ -449,19 +393,34 @@ impl<'a> CommandList<'a> {
&mut self,
dst: Dst,
src: Src,
length: Option<usize>,
signal: Option<&Event<'a>>,
signal: Option<&mut Event<'a>>,
wait: &mut [Event<'a>],
) -> Result<()> {
let dst = dst.into();
let src = src.into();
let elements = length.unwrap_or(std::cmp::max(dst.len(), src.len()));
let event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let elements = std::cmp::min(dst.len(), src.len());
let length = elements * mem::size_of::<T>();
unsafe { self.append_memory_copy_unsafe(dst.get(), src.get(), length, signal, wait) }
}
pub unsafe fn append_memory_copy_unsafe(
&mut self,
dst: *mut c_void,
src: *const c_void,
length: usize,
signal: Option<&mut Event<'a>>,
wait: &mut [Event<'a>],
) -> Result<()> {
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let (wait_len, wait_ptr) = Event::raw_slice(wait);
check!(sys::zeCommandListAppendMemoryCopy(
self.0,
dst.get(),
src.get(),
elements * std::mem::size_of::<T>(),
event,
dst,
src,
length,
signal_event,
wait_len,
wait_ptr
));
Ok(())
}
@ -469,19 +428,23 @@ impl<'a> CommandList<'a> {
pub fn append_memory_fill<T>(
&mut self,
dst: BufferPtrMut<'a, T>,
pattern: T,
signal: Option<&Event<'a>>,
pattern: u8,
signal: Option<&mut Event<'a>>,
wait: &mut [Event<'a>],
) -> Result<()> {
let raw_pattern = &pattern as *const T as *const _;
let event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let raw_pattern = &pattern as *const u8 as *const _;
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let (wait_len, wait_ptr) = unsafe { Event::raw_slice(wait) };
let byte_len = dst.len() * mem::size_of::<T>();
check!(sys::zeCommandListAppendMemoryFill(
self.0,
dst.get(),
raw_pattern,
mem::size_of::<T>(),
mem::size_of::<u8>(),
byte_len,
event,
signal_event,
wait_len,
wait_ptr
));
Ok(())
}
@ -490,23 +453,23 @@ impl<'a> CommandList<'a> {
&mut self,
kernel: &'a Kernel,
group_count: &[u32; 3],
signal: Option<&Event<'a>>,
wait: &[&Event<'a>],
signal: Option<&mut Event<'a>>,
wait: &mut [Event<'a>],
) -> Result<()> {
let gr_count = sys::ze_group_count_t {
groupCountX: group_count[0],
groupCountY: group_count[1],
groupCountZ: group_count[2],
};
let event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let mut wait_ptrs = wait.iter().map(|e| e.0).collect::<Vec<_>>();
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
let (wait_len, wait_ptr) = unsafe { Event::raw_slice(wait) };
check!(sys::zeCommandListAppendLaunchKernel(
self.0,
kernel.0,
&gr_count,
event,
wait.len() as u32,
wait_ptrs.as_mut_ptr(),
signal_event,
wait_len,
wait_ptr,
));
Ok(())
}
@ -528,8 +491,9 @@ pub struct FenceGuard<'a>(
impl<'a> FenceGuard<'a> {
fn new(q: &'a CommandQueue, cmd_list: sys::ze_command_list_handle_t) -> Result<Self> {
let desc = sys::_ze_fence_desc_t {
version: sys::ze_fence_desc_version_t::ZE_FENCE_DESC_VERSION_CURRENT,
flags: sys::ze_fence_flag_t::ZE_FENCE_FLAG_NONE,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_FENCE_DESC,
pNext: ptr::null(),
flags: sys::ze_fence_flags_t(0),
};
let mut result = ptr::null_mut();
check!(sys::zeFenceCreate(q.0, &desc, &mut result));
@ -540,7 +504,7 @@ impl<'a> FenceGuard<'a> {
impl<'a> Drop for FenceGuard<'a> {
#[allow(unused_must_use)]
fn drop(&mut self) {
check_panic! { sys::zeFenceHostSynchronize(self.0, u32::max_value()) };
check_panic! { sys::zeFenceHostSynchronize(self.0, u64::max_value()) };
check_panic! { sys::zeFenceDestroy(self.0) };
check_panic! { sys::zeCommandListDestroy(self.1) };
}
@ -638,10 +602,11 @@ impl<'a> EventPool<'a> {
pub unsafe fn from_ffi(x: sys::ze_event_pool_handle_t) -> Self {
Self(x, PhantomData)
}
pub fn new(driver: &Driver, count: u32, dev: Option<&[&'a Device]>) -> Result<Self> {
pub fn new(ctx: &mut Context, count: u32, dev: Option<&[&'a Device]>) -> Result<Self> {
let desc = sys::ze_event_pool_desc_t {
version: sys::ze_event_pool_desc_version_t::ZE_EVENT_POOL_DESC_VERSION_CURRENT,
flags: sys::ze_event_pool_flag_t::ZE_EVENT_POOL_FLAG_DEFAULT,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
pNext: ptr::null(),
flags: sys::ze_event_pool_flags_t(0),
count: count,
};
let mut dev = dev.map(|d| d.iter().map(|d| d.0).collect::<Vec<_>>());
@ -649,7 +614,7 @@ impl<'a> EventPool<'a> {
let dev_ptr = dev.as_mut().map_or(ptr::null_mut(), |d| d.as_mut_ptr());
let mut result = ptr::null_mut();
check!(sys::zeEventPoolCreate(
driver.0,
ctx.0,
&desc,
dev_len,
dev_ptr,
@ -679,15 +644,25 @@ impl<'a> Event<'a> {
pub fn new(pool: &'a EventPool, index: u32) -> Result<Self> {
let desc = sys::ze_event_desc_t {
version: sys::ze_event_desc_version_t::ZE_EVENT_DESC_VERSION_CURRENT,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_EVENT_DESC,
pNext: ptr::null(),
index: index,
signal: sys::ze_event_scope_flag_t::ZE_EVENT_SCOPE_FLAG_NONE,
wait: sys::ze_event_scope_flag_t::ZE_EVENT_SCOPE_FLAG_NONE,
signal: sys::ze_event_scope_flags_t(0),
wait: sys::ze_event_scope_flags_t(0),
};
let mut result = ptr::null_mut();
check!(sys::zeEventCreate(pool.0, &desc, &mut result));
Ok(Self(result, PhantomData))
}
unsafe fn raw_slice(e: &mut [Event]) -> (u32, *mut sys::ze_event_handle_t) {
let ptr = if e.len() == 0 {
ptr::null_mut()
} else {
e.as_mut_ptr()
};
(e.len() as u32, ptr as *mut sys::ze_event_handle_t)
}
}
impl<'a> Drop for Event<'a> {
@ -710,8 +685,9 @@ impl<'a> Kernel<'a> {
pub fn new_resident(module: &'a Module, name: &CStr) -> Result<Self> {
let desc = sys::ze_kernel_desc_t {
version: sys::ze_kernel_desc_version_t::ZE_KERNEL_DESC_VERSION_CURRENT,
flags: sys::ze_kernel_flag_t::ZE_KERNEL_FLAG_FORCE_RESIDENCY,
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_KERNEL_DESC,
pNext: ptr::null(),
flags: sys::ze_kernel_flags_t::ZE_KERNEL_FLAG_FORCE_RESIDENCY,
pKernelName: name.as_ptr() as *const _,
};
let mut result = ptr::null_mut();
@ -719,18 +695,11 @@ impl<'a> Kernel<'a> {
Ok(Self(result, PhantomData))
}
pub fn set_attribute_bool(
pub fn set_indirect_access(
&mut self,
attr: sys::ze_kernel_attribute_t,
value: bool,
flags: sys::ze_kernel_indirect_access_flags_t,
) -> Result<()> {
let ze_bool: sys::ze_bool_t = if value { 1 } else { 0 };
check!(sys::zeKernelSetAttribute(
self.0,
attr,
mem::size_of::<sys::ze_bool_t>() as u32,
&ze_bool as *const _ as *const _
));
check!(sys::zeKernelSetIndirectAccess(self.0, flags));
Ok(())
}
@ -771,3 +740,16 @@ impl<'a> Drop for Kernel<'a> {
check_panic! { sys::zeKernelDestroy(self.0) };
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn event_has_correct_layout() {
assert_eq!(
mem::size_of::<Event>(),
mem::size_of::<sys::ze_event_handle_t>()
);
}
}

View file

@ -9,6 +9,9 @@ name = "nvcuda"
crate-type = ["cdylib"]
[dependencies]
ptx = { path = "../ptx" }
level_zero = { path = "../level_zero" }
level_zero-sys = { path = "../level_zero-sys" }
lazy_static = "1.4"
num_enum = "0.4"
num_enum = "0.4"
lz4 = "1.23"

View file

@ -1,9 +1,10 @@
use num_enum::TryFromPrimitive;
use std::convert::TryFrom;
use std::os::raw::c_int;
use std::ptr;
use std::{mem, ptr};
use crate::ze;
#[repr(C)]
#[repr(u32)]
#[allow(non_camel_case_types)]
pub enum Result {
SUCCESS = 0,
@ -134,14 +135,14 @@ pub enum DeviceTextureAttribute {
impl Result {
pub fn from_l0(result: l0::ze_result_t) -> Result {
pub fn from_l0(result: l0_sys::ze_result_t) -> Result {
match result {
l0::ze_result_t::ZE_RESULT_SUCCESS => Result::SUCCESS,
l0::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Result::ERROR_NOT_INITIALIZED,
l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Result::ERROR_INVALID_VALUE,
l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Result::ERROR_INVALID_VALUE,
l0::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Result::ERROR_OUT_OF_MEMORY,
l0::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Result::ERROR_NOT_SUPPORTED,
l0_sys::ze_result_t::ZE_RESULT_SUCCESS => Result::SUCCESS,
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Result::ERROR_NOT_INITIALIZED,
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Result::ERROR_INVALID_VALUE,
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Result::ERROR_INVALID_VALUE,
l0_sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Result::ERROR_OUT_OF_MEMORY,
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Result::ERROR_NOT_SUPPORTED,
_ => Result::ERROR_UNKNOWN
}
}
@ -157,11 +158,11 @@ pub struct Uuid {
pub struct Device(pub c_int);
#[repr(transparent)]
pub struct DevicePtr(usize);
pub struct DevicePtr(pub usize);
#[repr(transparent)]
#[derive(Clone, PartialEq)]
pub struct Context(*mut ());
#[derive(Clone, Copy, PartialEq)]
pub struct Context(pub *mut ze::Context);
impl Context {
pub fn null() -> Context {
Context(ptr::null_mut())
@ -169,7 +170,21 @@ impl Context {
}
#[repr(transparent)]
pub struct Module(*mut ());
#[derive(Clone, Copy, PartialEq)]
pub struct Module(*mut ze::Module);
impl Module {
pub fn null() -> Module {
Module(ptr::null_mut())
}
pub fn new(inner: ze::Module) -> Module {
let mut boxed = Box::new(inner);
let result = Module(boxed.as_mut());
mem::forget(boxed);
result
}
}
#[repr(transparent)]
pub struct Function(*mut ());

1
notcuda/src/cuda/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod rt;

2
notcuda/src/cuda/rt.rs Normal file
View file

@ -0,0 +1,2 @@
pub enum ContextState {}
pub enum ContextStateManager {}

View file

@ -1,8 +1,9 @@
use super::cu;
use crate::cu;
use crate::{cuda, ze};
use std::mem;
use std::ptr;
use std::os::raw::{c_int, c_ulong};
use std::os::raw::{c_int, c_uint, c_ulong, c_ushort};
use std::{ffi::c_void, ptr, slice};
#[no_mangle]
pub unsafe extern "C" fn cuGetExportTable(
@ -11,24 +12,24 @@ pub unsafe extern "C" fn cuGetExportTable(
) -> cu::Result {
if table == ptr::null_mut() || id == ptr::null_mut() {
cu::Result::ERROR_INVALID_VALUE
} else if *id == CU_ETID_ToolsRuntimeCallbackHooks {
*table = TABLE0.as_ptr() as *const _;
} else if *id == TOOLS_RUNTIME_CALLBACK_HOOKS_GUID {
*table = TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE.as_ptr() as *const _;
cu::Result::SUCCESS
} else if *id == CU_ETID_CudartInterface {
*table = TABLE1.as_ptr() as *const _;
} else if *id == CUDART_INTERFACE_GUID {
*table = CUDART_INTERFACE_VTABLE.as_ptr() as *const _;
cu::Result::SUCCESS
} else if *id == CU_ETID_ToolsTls {
} else if *id == TOOLS_TLS_GUID {
*table = 1 as _;
cu::Result::SUCCESS
} else if *id == CU_ETID_ContextLocalStorageInterface_v0301 {
*table = ContextLocalStorageInterface_v0301_VTABLE.as_ptr() as *const _;
} else if *id == CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID {
*table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _;
cu::Result::SUCCESS
} else {
cu::Result::ERROR_NOT_SUPPORTED
}
}
const CU_ETID_ToolsRuntimeCallbackHooks: cu::Uuid = cu::Uuid {
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: cu::Uuid = cu::Uuid {
x: [
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
0x66,
@ -40,97 +41,254 @@ union VTableEntry {
length: usize,
}
unsafe impl Sync for VTableEntry {}
const TABLE0_LEN: usize = 7;
static TABLE0: [VTableEntry; TABLE0_LEN] = [
const TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH: usize = 7;
static TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE: [VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH] = [
VTableEntry {
length: mem::size_of::<[VTableEntry; TABLE0_LEN]>(),
length: mem::size_of::<[VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH]>(),
},
VTableEntry { ptr: ptr::null() },
VTableEntry {
ptr: table0_fn1 as *const (),
ptr: runtime_callback_hooks_fn1 as *const (),
},
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry {
ptr: table0_fn5 as *const (),
ptr: runtime_callback_hooks_fn5 as *const (),
},
];
static mut TABLE0_FN1_SPACE: [u8; 512] = [0; 512];
static mut TABLE0_FN5_SPACE: [u8; 2] = [0; 2];
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE: [u8; 512] = [0; 512];
unsafe extern "C" fn table0_fn1(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
*ptr = TABLE0_FN1_SPACE.as_mut_ptr();
*size = TABLE0_FN1_SPACE.len();
return TABLE0_FN1_SPACE.as_mut_ptr();
unsafe extern "C" fn runtime_callback_hooks_fn1(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.as_mut_ptr();
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.len();
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.as_mut_ptr();
}
unsafe extern "C" fn table0_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
*ptr = TABLE0_FN5_SPACE.as_mut_ptr();
*size = TABLE0_FN5_SPACE.len();
return TABLE0_FN5_SPACE.as_mut_ptr();
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
unsafe extern "C" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
}
const CU_ETID_CudartInterface: cu::Uuid = cu::Uuid {
const CUDART_INTERFACE_GUID: cu::Uuid = cu::Uuid {
x: [
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
0xf9
0xf9,
],
};
const TABLE1_LEN: usize = 10;
static TABLE1: [VTableEntry; TABLE1_LEN] = [
const CUDART_INTERFACE_LENGTH: usize = 10;
static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry {
length: mem::size_of::<[VTableEntry; TABLE1_LEN]>(),
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
},
VTableEntry { ptr: ptr::null() },
VTableEntry {
ptr: table1_fn1 as *const (),
ptr: cudart_interface_fn1 as *const (),
},
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry {
ptr: table1_fn6 as *const (),
ptr: get_module_from_cubin as *const (),
},
VTableEntry {
ptr: cudart_interface_fn6 as *const (),
},
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
];
unsafe extern "C" fn table1_fn1(_: *mut c_ulong, _: c_int) -> c_int {
unsafe extern "C" fn cudart_interface_fn1(_: *mut c_ulong, _: c_int) -> c_int {
0
}
unsafe extern "C" fn table1_fn6(_: u64) { }
/*
fat_cubin:
typedef struct {
int magic;
int version;
const unsigned long long* data;
void *filename_or_fatbins; /* version 1: offline filename,
* version 2: array of prelinked fatbins */
} __fatBinC_Wrapper_t;
const CU_ETID_ToolsTls: cu::Uuid = cu::Uuid {
x: [0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec, 0xdc],
data start with this header:
#define FATBIN_MAGIC 0xBA55ED50U
#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
#define FATBIN_VERSION 0x0001U
struct fatbinary_ALIGN_(8) fatBinaryHeader
{
unsigned int magic; // FATBIN_MAGIC
unsigned short version; // FATBIN_VERSION
unsigned short headerSize;
unsigned long long int fatSize; // size of the entire fat binary excluding this header
};
there's binary data after header
const CU_ETID_ContextLocalStorageInterface_v0301: cu::Uuid = cu::Uuid {
x: [0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95, 0x93],
*/
const FATBINC_MAGIC: c_uint = 0x466243B1;
const FATBINC_VERSION: c_uint = 0x1;
#[repr(C)]
struct FatbincWrapper {
magic: c_uint,
version: c_uint,
data: *const FatbinHeader,
filename_or_fatbins: *const c_void,
}
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
const FATBIN_VERSION: c_ushort = 0x01;
#[repr(C, align(8))]
struct FatbinHeader {
magic: c_uint,
version: c_ushort,
header_size: c_ushort,
files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
}
const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
// assembly file header is a bit different, but we don't care
#[repr(C)]
struct FatbinFileHeader {
kind: c_ushort,
version: c_ushort,
header_size: c_uint,
padded_payload_size: c_uint,
unknown0: c_uint, // check if it's written into separately
payload_size: c_uint,
unknown1: c_uint,
unknown2: c_uint,
sm_version: c_uint,
bit_width: c_uint,
unknown3: c_uint,
unknown4: c_ulong,
unknown5: c_ulong,
uncompressed_payload: c_ulong,
}
unsafe extern "C" fn get_module_from_cubin(
result: *mut cu::Module,
fatbinc_wrapper: *const FatbincWrapper,
_: *mut c_void,
_: *mut c_void,
) -> cu::Result {
if result == ptr::null_mut() || (*fatbinc_wrapper).magic != FATBINC_MAGIC || (*fatbinc_wrapper).version != FATBINC_VERSION {
return cu::Result::ERROR_INVALID_VALUE;
}
let fatbin_header = (*fatbinc_wrapper).data;
if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
return cu::Result::ERROR_INVALID_VALUE;
}
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
let end = file.add((*fatbin_header).files_size as usize);
let mut ptx_files = get_ptx_files(file, end);
ptx_files.sort_unstable_by_key(|f| c_uint::max_value() - (**f).sm_version);
for file in ptx_files {
let slice = slice::from_raw_parts(
(file as *const u8).add((*file).header_size as usize),
(*file).payload_size as usize,
);
let kernel_text =
lz4::block::decompress(slice, Some((*file).uncompressed_payload as i32)).unwrap();
let module = ze::Module {
ptx_text: kernel_text,
};
*result = cu::Module::new(module);
return cu::Result::SUCCESS
}
cu::Result::ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
}
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file;
let mut result = Vec::new();
while index < end {
let file = index as *const FatbinFileHeader;
if (*file).kind == FATBIN_FILE_HEADER_KIND_PTX
&& (*file).version == FATBIN_FILE_HEADER_VERSION_CURRENT
{
result.push(file)
}
index = index.add((*file).header_size as usize + (*file).padded_payload_size as usize);
}
result
}
unsafe extern "C" fn cudart_interface_fn6(_: u64) {}
const TOOLS_TLS_GUID: cu::Uuid = cu::Uuid {
x: [
0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec,
0xdc,
],
};
// the table is much bigger and start earlier
static ContextLocalStorageInterface_v0301_VTABLE: [VTableEntry; 4] = [
VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn0 as *const () },
VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn1 as *const () },
VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn2 as *const () },
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: cu::Uuid = cu::Uuid {
x: [
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
0x93,
],
};
// the table is much bigger and starts earlier
static CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE: [VTableEntry; 4] = [
VTableEntry {
ptr: context_local_storage_ctor as *const (),
},
VTableEntry {
ptr: context_local_storage_dtor as *const (),
},
VTableEntry {
ptr: context_local_storage_get_state as *const (),
},
VTableEntry { ptr: ptr::null() },
];
// some kind of ctor
unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn0(ms: *mut usize, _: *mut (), _: *mut (), _: *mut ()) -> u32 {
0
unsafe extern "C" fn context_local_storage_ctor(
cu_ctx: cu::Context, // always zero
mgr: *mut cuda::rt::ContextStateManager,
ctx_state: *mut cuda::rt::ContextState,
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
dtor_cb: extern "C" fn(
cu::Context,
*mut cuda::rt::ContextStateManager,
*mut cuda::rt::ContextState,
),
) -> cu::Result {
if cu_ctx.0 == ptr::null_mut() {
return cu::Result::ERROR_NOT_SUPPORTED;
}
(*cu_ctx.0).cuda_manager = mgr;
(*cu_ctx.0).cuda_state = ctx_state;
(*cu_ctx.0).cuda_dtor_cb = dtor_cb;
cu::Result::SUCCESS
}
// some kind of dtor
unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn1(ms: *mut usize, _: *mut ()) -> u32 {
unsafe extern "C" fn context_local_storage_dtor(_: *mut usize, _: *mut ()) -> u32 {
0
}
unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn2(_: *mut *mut (), _: *mut (), _: *mut ()) -> u32 {
0
}
unsafe extern "C" fn context_local_storage_get_state(
ctx_state: *mut *mut cuda::rt::ContextState,
cu_ctx: cu::Context,
_: *mut cuda::rt::ContextStateManager,
) -> cu::Result {
if cu_ctx == cu::Context::null() {
return cu::Result::ERROR_INVALID_CONTEXT;
}
*ctx_state = (*cu_ctx.0).cuda_state;
cu::Result::SUCCESS
}

View file

@ -1,28 +1,17 @@
extern crate level_zero_sys as l0;
extern crate level_zero as l0;
extern crate level_zero_sys as l0_sys;
#[macro_use]
extern crate lazy_static;
extern crate lz4;
use std::cell::RefCell;
use std::convert::TryFrom;
use std::os::raw::{c_char, c_int, c_uint};
use std::ptr;
use std::cell::RefCell;
use std::sync::Mutex;
use ze::Versioned;
#[macro_use]
macro_rules! l0_check_err {
($exp:expr) => {
{
let result = unsafe{ $exp };
if result != l0::ze_result_t::ZE_RESULT_SUCCESS {
return Err(result);
}
}
};
}
use std::{ffi::c_void, sync::Mutex};
mod cu;
mod cuda;
mod export_table;
mod ze;
@ -31,32 +20,32 @@ thread_local! {
}
lazy_static! {
static ref GLOBAL_STATE: Mutex<Option<Driver>> = Mutex::new(None);
static ref GLOBAL_STATE: Mutex<Option<Context>> = Mutex::new(None);
}
struct Driver {
base: l0::ze_driver_handle_t,
devices: Vec::<ze::Device>
struct Context {
base: l0::Context,
devices: Vec<ze::Device>,
}
unsafe impl Send for Driver {}
unsafe impl Sync for Driver {}
unsafe impl Send for Context {}
unsafe impl Sync for Context {}
impl Driver {
fn new() -> Result<Driver, l0::ze_result_t> {
let mut driver_count = 1;
let mut handle = ptr::null_mut();
l0_check_err!{ l0::zeDriverGet(&mut driver_count, &mut handle) };
let mut count = 0;
l0_check_err! { l0::zeDeviceGet(handle, &mut count, ptr::null_mut()) }
let mut devices = vec![ptr::null_mut(); count as usize];
l0_check_err! { l0::zeDeviceGet(handle, &mut count, devices.as_mut_ptr()) }
if (count as usize) < devices.len() {
devices.truncate(count as usize);
}
Ok(Driver{ base: handle, devices: ze::Device::new_vec(devices) })
impl Context {
fn new() -> l0::Result<Context> {
let driver = l0::Driver::get()?;
let ze_devices = driver[0].devices()?;
let mut ctx = l0::Context::new(&driver[0])?;
let devices = ze_devices
.into_iter()
.map(|d| ze::Device::new(&mut ctx, d))
.collect::<Result<_, _>>()?;
Ok(Context {
base: ctx,
devices: devices,
})
}
fn call<F: FnOnce(&mut Driver) -> l0::ze_result_t>(f: F) -> cu::Result {
fn call<F: FnOnce(&mut Context) -> l0_sys::ze_result_t>(f: F) -> cu::Result {
let mut lock = GLOBAL_STATE.try_lock();
if let Ok(ref mut mutex) = lock {
match **mutex {
@ -70,30 +59,43 @@ impl Driver {
}
}
fn call_device<F: FnOnce(&mut ze::Device) -> l0::ze_result_t>(cu::Device(dev): cu::Device, f: F) -> cu::Result {
fn call2<F: FnOnce(&mut Context) -> l0::Result<()>>(f: F) -> cu::Result {
Context::call(|ctx| match f(ctx) {
Ok(()) => l0::sys::ze_result_t::ZE_RESULT_SUCCESS,
Err(err) => err.0,
})
}
fn call_device<F: FnOnce(&mut ze::Device) -> l0::Result<()>>(
cu::Device(dev): cu::Device,
f: F,
) -> cu::Result {
if dev < 0 {
return cu::Result::ERROR_INVALID_VALUE;
}
let dev = dev as usize;
Driver::call(|driver| {
Context::call(|driver| {
if dev >= driver.devices.len() {
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
return l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
match f(&mut driver.devices[dev]) {
Ok(()) => l0::sys::ze_result_t::ZE_RESULT_SUCCESS,
Err(err) => err.0,
}
f(&mut driver.devices[dev])
})
}
fn device_get_count(&self, count: *mut i32) -> l0::ze_result_t {
fn device_get_count(&self, count: *mut i32) -> l0_sys::ze_result_t {
unsafe { *count = self.devices.len() as i32 };
l0::ze_result_t::ZE_RESULT_SUCCESS
l0_sys::ze_result_t::ZE_RESULT_SUCCESS
}
fn device_get(&self, device: *mut cu::Device, ordinal: c_int) -> l0::ze_result_t {
fn device_get(&self, device: *mut cu::Device, ordinal: c_int) -> l0_sys::ze_result_t {
if (ordinal as usize) >= self.devices.len() {
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
return l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
unsafe { *device = cu::Device(ordinal) };
l0::ze_result_t::ZE_RESULT_SUCCESS
l0_sys::ze_result_t::ZE_RESULT_SUCCESS
}
}
@ -108,16 +110,16 @@ pub unsafe extern "C" fn cuDriverGetVersion(version: *mut c_int) -> cu::Result {
#[no_mangle]
pub unsafe extern "C" fn cuInit(_: c_uint) -> cu::Result {
let l0_init = l0::zeInit(l0::ze_init_flag_t::ZE_INIT_FLAG_GPU_ONLY);
if l0_init != l0::ze_result_t::ZE_RESULT_SUCCESS {
return cu::Result::from_l0(l0_init);
let l0_sys_init = l0_sys::zeInit(l0_sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY);
if l0_sys_init != l0_sys::ze_result_t::ZE_RESULT_SUCCESS {
return cu::Result::from_l0(l0_sys_init);
}
let mut lock = GLOBAL_STATE.try_lock();
if let Ok(ref mut mutex) = lock {
if let None = **mutex {
match Driver::new() {
match Context::new() {
Ok(state) => **mutex = Some(state),
Err(err) => return cu::Result::from_l0(err)
Err(err) => return cu::Result::from_l0(err.0),
}
}
} else {
@ -131,7 +133,7 @@ pub extern "C" fn cuDeviceGetCount(count: *mut c_int) -> cu::Result {
if count == ptr::null_mut() {
return cu::Result::ERROR_INVALID_VALUE;
}
Driver::call(|driver| driver.device_get_count(count))
Context::call(|driver| driver.device_get_count(count))
}
#[no_mangle]
@ -139,15 +141,19 @@ pub extern "C" fn cuDeviceGet(device: *mut cu::Device, ordinal: c_int) -> cu::Re
if ordinal < 0 || device == ptr::null_mut() {
return cu::Result::ERROR_INVALID_VALUE;
}
Driver::call(|driver| driver.device_get(device, ordinal))
Context::call(|driver| driver.device_get(device, ordinal))
}
#[no_mangle]
pub extern "C" fn cuDeviceGetName(name: *mut c_char, len: c_int, dev_idx: cu::Device) -> cu::Result {
pub extern "C" fn cuDeviceGetName(
name: *mut c_char,
len: c_int,
dev_idx: cu::Device,
) -> cu::Result {
if name == ptr::null_mut() || len <= 0 {
return cu::Result::ERROR_INVALID_VALUE;
}
Driver::call_device(dev_idx, |dev| dev.get_name(name, len))
Context::call_device(dev_idx, |dev| dev.get_name(name, len))
}
#[no_mangle]
@ -155,27 +161,32 @@ pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) ->
if bytes == ptr::null_mut() {
return cu::Result::ERROR_INVALID_VALUE;
}
Driver::call_device(dev_idx, |dev| dev.total_mem(bytes))
Context::call_device(dev_idx, |dev| dev.total_mem(bytes))
}
#[no_mangle]
pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: c_int, dev_idx: cu::Device) -> cu::Result {
pub extern "C" fn cuDeviceGetAttribute(
pi: *mut c_int,
attrib: c_int,
dev_idx: cu::Device,
) -> cu::Result {
if pi == ptr::null_mut() {
return cu::Result::ERROR_INVALID_VALUE;
}
let attrib = match u8::try_from(attrib) {
Ok(a) => a,
Err(_) => return cu::Result::ERROR_INVALID_VALUE
Err(_) => return cu::Result::ERROR_INVALID_VALUE,
};
match cu::DeviceAttribute::try_new(attrib) {
Some(cu::DeviceAttribute::Static(a)) => {
unsafe { *pi = ze::Device::get_attribute_static(a) };
cu::Result::SUCCESS
},
Some(cu::DeviceAttribute::Dynamic(a)) => Driver::call_device(dev_idx, |dev| dev.get_attribute(pi, a)),
}
Some(cu::DeviceAttribute::Dynamic(a)) => {
Context::call_device(dev_idx, |dev| dev.get_attribute(pi, a))
}
// TODO: add support for more properties
None => cu::Result::SUCCESS
None => cu::Result::SUCCESS,
}
}
@ -184,16 +195,14 @@ pub extern "C" fn cuDeviceGetUuid(uuid: *mut cu::Uuid, dev_idx: cu::Device) -> c
if uuid == ptr::null_mut() {
return cu::Result::ERROR_INVALID_VALUE;
}
Driver::call_device(dev_idx, |dev| dev.get_uuid(uuid))
Context::call_device(dev_idx, |dev| dev.get_uuid(uuid))
}
#[no_mangle]
pub extern "C" fn cuCtxGetCurrent(pctx: *mut cu::Context) -> cu::Result {
let ctx = CONTEXT_STACK.with(|stack| {
match stack.borrow().last() {
Some(ctx) => ctx.clone(),
None => cu::Context::null()
}
let ctx = CONTEXT_STACK.with(|stack| match stack.borrow().last() {
Some(ctx) => ctx.clone(),
None => cu::Context::null(),
});
unsafe { *pctx = ctx };
cu::Result::SUCCESS
@ -206,7 +215,7 @@ pub extern "C" fn cuCtxSetCurrent(ctx: cu::Context) -> cu::Result {
stack.pop();
if ctx != cu::Context::null() {
stack.push(ctx);
}
}
});
cu::Result::SUCCESS
}
@ -216,42 +225,67 @@ pub extern "C" fn cuMemAlloc_v2(dptr: *mut cu::DevicePtr, bytesize: usize) -> cu
if dptr == ptr::null_mut() || bytesize == 0 {
return cu::Result::ERROR_INVALID_VALUE;
}
Driver::call(|drv| {
let mut descr = l0::ze_device_mem_alloc_desc_t::new();
descr.flags = l0::ze_device_mem_alloc_flag_t::ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT;
descr.ordinal = 0;
// TODO: check current context for the device
unsafe { l0::zeDriverAllocDeviceMem(drv.base, &descr, bytesize, 0, drv.devices[0].0, dptr as *mut _) }
Context::call2(|drv| {
unsafe {
*dptr = cu::DevicePtr(drv.devices[0].base.mem_alloc_device(
&mut drv.base,
bytesize,
0,
)? as usize)
};
Ok(())
})
}
#[no_mangle]
pub extern "C" fn cuCtxDestroy_v2(ctx: cu::Context) -> cu::Result
{
pub extern "C" fn cuCtxDestroy_v2(ctx: cu::Context) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
pub extern "C" fn cuMemcpyDtoH_v2(dstHost: *mut (), srcDevice: cu::DevicePtr, byte_count: usize) -> cu::Result
{
pub extern "C" fn cuMemcpyDtoH_v2(
dst_host: *mut c_void,
src_device: cu::DevicePtr,
byte_count: usize,
) -> cu::Result {
if dst_host == ptr::null_mut() || src_device.0 == 0 || byte_count == 0 {
return cu::Result::ERROR_INVALID_VALUE;
}
// TODO: choose the right device from device ptr
Context::call(|drv| {
let cu_dev = &mut drv.devices[0];
// Safe, because there's no Drop impl for device
let dev = unsafe { l0::Device::from_ffi(cu_dev.base.as_ffi()) };
let queue = &mut cu_dev.default_queue;
let result = unsafe {
memcpy_impl(
&mut drv.base,
dst_host,
src_device.0 as *mut _,
byte_count,
&dev,
queue,
)
};
match result {
Ok(()) => l0_sys::ze_result_t::ZE_RESULT_SUCCESS,
Err(e) => e.0,
}
})
}
#[no_mangle]
pub extern "C" fn cuMemFree_v2(srcDevice: cu::DevicePtr) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
pub extern "C" fn cuMemFree_v2(srcDevice: cu::DevicePtr) -> cu::Result
{
pub extern "C" fn cuModuleLoad(module: *mut cu::Module, fname: *const c_char) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
pub extern "C" fn cuModuleLoad(module: *mut cu::Module, fname: *const c_char) -> cu::Result
{
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
pub extern "C" fn cuGetErrorString(error: cu::Result, pStr: *mut *const c_char) -> cu::Result
{
pub extern "C" fn cuGetErrorString(error: cu::Result, pStr: *mut *const c_char) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
@ -267,43 +301,97 @@ pub extern "C" fn cuLaunchKernel(
sharedMemBytes: c_uint,
hStream: cu::Stream,
kernelParams: *mut *mut (),
extra: *mut *mut ())
-> cu::Result
{
extra: *mut *mut (),
) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
#[allow(non_snake_case)]
pub extern "C" fn cuModuleLoadDataEx(
module: *mut cu::Module,
image: *const (),
numOptions: c_uint,
options: *mut cu::JitOption,
optionValues: *mut *mut ()) -> cu::Result
{
optionValues: *mut *mut (),
) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
pub extern "C" fn cuMemcpyHtoD_v2(
dstDevice: cu::DevicePtr,
srcHost: *const (),
ByteCount: usize) -> cu::Result
{
cu::Result::ERROR_NOT_SUPPORTED
dst_device: cu::DevicePtr,
src_host: *const c_void,
byte_count: usize,
) -> cu::Result {
if dst_device.0 == 0 || src_host == ptr::null_mut() || byte_count == 0 {
return cu::Result::ERROR_INVALID_VALUE;
}
// TODO: choose the right device from device ptr
Context::call(|drv| {
let cu_dev = &mut drv.devices[0];
// Safe, because there's no Drop impl for device
let dev = unsafe { l0::Device::from_ffi(cu_dev.base.as_ffi()) };
let queue = &mut cu_dev.default_queue;
let result = unsafe {
memcpy_impl(
&mut drv.base,
dst_device.0 as *mut _,
src_host,
byte_count,
&dev,
queue,
)
};
match result {
Ok(()) => l0_sys::ze_result_t::ZE_RESULT_SUCCESS,
Err(e) => e.0,
}
})
}
unsafe fn memcpy_impl(
ctx: &mut l0::Context,
dst: *mut c_void,
src: *const c_void,
bytes_count: usize,
dev: &l0::Device,
queue: &mut l0::CommandQueue,
) -> l0::Result<()> {
let mut cmd_list = l0::CommandList::new(ctx, &dev)?;
cmd_list.append_memory_copy_unsafe(dst, src, bytes_count, None, &mut [])?;
queue.execute(cmd_list)?;
Ok(())
}
#[no_mangle]
pub extern "C" fn cuCtxCreate_v2(pctx: *mut cu::Context, flags: c_uint, dev: cu::Device) -> cu::Result
{
cu::Result::SUCCESS
pub extern "C" fn cuCtxCreate_v2(
pctx: *mut cu::Context,
flags: c_uint,
dev: cu::Device,
) -> cu::Result {
if pctx == ptr::null_mut() {
return cu::Result::ERROR_INVALID_VALUE;
}
cu::Result::ERROR_NOT_SUPPORTED
}
#[no_mangle]
pub extern "C" fn cuModuleGetFunction(
hfunc: *mut cu::Function,
hmod: cu::Module,
name: *const c_char) -> cu::Result
{
name: *const c_char,
) -> cu::Result {
cu::Result::ERROR_NOT_SUPPORTED
}
}
#[no_mangle]
pub extern "C" fn cuDevicePrimaryCtxRetain(pctx: *mut cu::Context, dev: cu::Device) -> cu::Result {
cu::Result::SUCCESS
}
#[no_mangle]
pub extern "C" fn cuCtxGetDevice(dev: *mut cu::Device) -> cu::Result {
unsafe { *dev = cu::Device(0) };
cu::Result::SUCCESS
}

View file

@ -1,141 +1,73 @@
use level_zero_sys::*;
use super::cu;
use crate::cuda;
use std::cmp;
use std::mem;
use std::os::raw::{c_char, c_int};
use std::ptr;
macro_rules! assert_size_eq {
($x:ty, $($xs:ty),+ $(,)?) => {
const _: fn() = || {
$(let _ = ::std::mem::transmute::<$x, $xs>;)+
};
};
pub struct Device {
pub base: l0::Device,
pub default_queue: l0::CommandQueue,
properties: Option<Box<l0::sys::ze_device_properties_t>>,
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>,
}
macro_rules! l0_check {
($exp:expr) => {
{
let result = unsafe{ $exp };
if result != l0::ze_result_t::ZE_RESULT_SUCCESS {
return result;
}
}
};
}
fn either<T>(r: Result<T, T>) -> T {
match r {
Ok(x) => x,
Err(x) => x
}
}
pub trait Versioned : Sized {
type Version;
fn new() -> Self {
let mut result = unsafe { std::mem::zeroed::<Self>() };
let ver = result.version();
*ver = Self::current();
return result;
}
fn current() -> Self::Version;
fn version(&mut self) -> &mut Self::Version;
}
impl Versioned for ze_device_memory_properties_t {
type Version = ze_device_memory_properties_version_t;
fn current() -> Self::Version {
ze_device_memory_properties_version_t::ZE_DEVICE_MEMORY_PROPERTIES_VERSION_CURRENT
}
fn version(&mut self) -> &mut Self::Version {
&mut self.version
}
}
impl Versioned for ze_device_properties_t {
type Version = ze_device_properties_version_t;
fn current() -> Self::Version {
ze_device_properties_version_t::ZE_DEVICE_PROPERTIES_VERSION_CURRENT
}
fn version(&mut self) -> &mut Self::Version {
&mut self.version
}
}
impl Versioned for ze_device_image_properties_t {
type Version = ze_device_image_properties_version_t;
fn current() -> Self::Version {
ze_device_image_properties_version_t::ZE_DEVICE_IMAGE_PROPERTIES_VERSION_CURRENT
}
fn version(&mut self) -> &mut Self::Version {
&mut self.version
}
}
impl Versioned for ze_device_mem_alloc_desc_t {
type Version = ze_device_mem_alloc_desc_version_t;
fn current() -> Self::Version {
ze_device_mem_alloc_desc_version_t::ZE_DEVICE_MEM_ALLOC_DESC_VERSION_CURRENT
}
fn version(&mut self) -> &mut Self::Version {
&mut self.version
}
}
#[derive(Clone, Copy)]
#[repr(transparent)] // required so a Vec<ze_device_handle_t> can be safely transmutted to Vec<Device>
pub struct Device(pub ze_device_handle_t);
impl Device {
pub fn new_vec(v: Vec<ze_device_handle_t>) -> Vec<Device> {
assert_size_eq!(Device, ze_device_handle_t);
unsafe { mem::transmute(v) }
pub fn new(ctx: &mut l0::Context, d: l0::Device) -> l0::Result<Self> {
let queue = l0::CommandQueue::new(ctx, &d)?;
Ok(Self {
base: d,
default_queue: queue,
properties: None,
image_properties: None,
memory_properties: None,
})
}
fn get_device_properties(self) -> Result<Box<ze_device_properties_t>, ze_result_t> {
let mut props = Box::new(l0::ze_device_properties_t::new());
l0_check_err! { l0::zeDeviceGetProperties(self.0, props.as_mut()) };
Ok(props)
fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> {
if let Some(ref prop) = self.properties {
return Ok(prop);
}
match self.base.get_properties() {
Ok(prop) => Ok(self.properties.get_or_insert(prop)),
Err(e) => Err(e),
}
}
fn get_device_image_properties(self) -> Result<Box<ze_device_image_properties_t>, ze_result_t> {
let mut props = Box::new(l0::ze_device_image_properties_t::new());
l0_check_err! { l0::zeDeviceGetImageProperties(self.0, props.as_mut()) };
Ok(props)
fn get_image_properties(&mut self) -> l0::Result<&l0::sys::ze_device_image_properties_t> {
if let Some(ref prop) = self.image_properties {
return Ok(prop);
}
match self.base.get_image_properties() {
Ok(prop) => Ok(self.image_properties.get_or_insert(prop)),
Err(e) => Err(e),
}
}
pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t {
let props = match self.get_device_properties() {
Ok(props) => props,
Err(res) => return res
};
fn get_memory_properties(&mut self) -> l0::Result<&[l0::sys::ze_device_memory_properties_t]> {
if let Some(ref prop) = self.memory_properties {
return Ok(prop);
}
match self.base.get_memory_properties() {
Ok(prop) => Ok(self.memory_properties.get_or_insert(prop)),
Err(e) => Err(e),
}
}
pub fn get_name(&mut self, name: *mut c_char, len: c_int) -> l0::Result<()> {
let props = self.get_properties()?;
let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0);
let dst_null_pos = cmp::min((len - 1) as usize, null_pos);
unsafe { *(name.add(dst_null_pos)) = 0 };
unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) };
l0::ze_result_t::ZE_RESULT_SUCCESS
Ok(())
}
pub fn total_mem(self, bytes: *mut usize) -> l0::ze_result_t {
let mut count = 0;
l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) };
if count == 0 {
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
}
let mut props = vec![l0::ze_device_memory_properties_t::new(); count as usize];
l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) };
let iter_count = cmp::min(count as usize, props.len());
if iter_count == 0 {
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
}
let max_mem = props.iter().take(iter_count).map(|p| p.totalSize).max().unwrap();
pub fn total_mem(&mut self, bytes: *mut usize) -> l0::Result<()> {
let props = self.get_memory_properties()?;
let max_mem = props.iter().map(|p| p.totalSize).max().unwrap();
unsafe { *bytes = max_mem as usize };
l0::ze_result_t::ZE_RESULT_SUCCESS
Ok(())
}
pub fn get_attribute_static(attr: cu::DeviceStaticAttribute) -> c_int {
@ -148,38 +80,61 @@ impl Device {
}
}
fn get_attribute_general(attr: cu::DeviceGeneralAttribute, props: &l0::ze_device_properties_t) -> c_int {
fn get_attribute_general(
attr: cu::DeviceGeneralAttribute,
props: &l0_sys::ze_device_properties_t,
) -> c_int {
match attr {
cu::DeviceGeneralAttribute::CAN_MAP_HOST_MEMORY => props.unifiedMemorySupported as i32,
cu::DeviceGeneralAttribute::ASYNC_ENGINE_COUNT => props.numAsyncCopyEngines as i32,
cu::DeviceGeneralAttribute::MULTIPROCESSOR_COUNT => (props.numSlicesPerTile * props.numSubslicesPerSlice) as i32,
}
}
fn get_attribute_texture(attr: cu::DeviceTextureAttribute, props: &l0::ze_device_image_properties_t) -> c_int {
match attr {
cu::DeviceTextureAttribute::MAXIMUM_TEXTURE1D_WIDTH => cmp::min(props.maxImageDims1D, c_int::max_value() as u32) as c_int,
}
}
pub fn get_attribute(self, pi: *mut c_int, attr: cu::DeviceDynamicAttribute) -> l0::ze_result_t {
let value_or_err = match attr {
cu::DeviceDynamicAttribute::General(a) => self.get_device_properties().map(|p| Device::get_attribute_general(a, &p)),
cu::DeviceDynamicAttribute::Texture(a) => self.get_device_image_properties().map(|p| Device::get_attribute_texture(a, &p)),
};
match value_or_err {
Ok(value) => {
unsafe { *pi = value };
l0::ze_result_t::ZE_RESULT_SUCCESS
cu::DeviceGeneralAttribute::CAN_MAP_HOST_MEMORY => 1,
cu::DeviceGeneralAttribute::ASYNC_ENGINE_COUNT => props.maxHardwareContexts as i32,
cu::DeviceGeneralAttribute::MULTIPROCESSOR_COUNT => {
(props.numSlices * props.numSubslicesPerSlice * props.numEUsPerSubslice) as i32
}
Err(e) => e
}
}
pub fn get_uuid(self, uuid: *mut cu::Uuid) -> l0::ze_result_t {
either(self.get_device_properties().map(|prop| {
unsafe { *uuid = cu::Uuid{ x: prop.uuid.id } };
l0::ze_result_t::ZE_RESULT_SUCCESS
}))
fn get_attribute_texture(
attr: cu::DeviceTextureAttribute,
props: &l0_sys::ze_device_image_properties_t,
) -> c_int {
match attr {
cu::DeviceTextureAttribute::MAXIMUM_TEXTURE1D_WIDTH => {
cmp::min(props.maxImageDims1D, c_int::max_value() as u32) as c_int
}
}
}
}
pub fn get_attribute(
&mut self,
pi: *mut c_int,
attr: cu::DeviceDynamicAttribute,
) -> l0::Result<()> {
let value = match attr {
cu::DeviceDynamicAttribute::General(a) => {
Device::get_attribute_general(a, self.get_properties()?)
}
cu::DeviceDynamicAttribute::Texture(a) => {
Device::get_attribute_texture(a, self.get_image_properties()?)
}
};
unsafe { *pi = value };
Ok(())
}
pub fn get_uuid(&mut self, uuid: *mut cu::Uuid) -> l0::Result<()> {
let props = self.get_properties()?;
unsafe { *uuid = cu::Uuid { x: props.uuid.id } };
Ok(())
}
}
pub struct Context {
pub cuda_manager: *mut cuda::rt::ContextStateManager,
pub cuda_state: *mut cuda::rt::ContextState,
pub cuda_dtor_cb:
extern "C" fn(cu::Context, *mut cuda::rt::ContextStateManager, *mut cuda::rt::ContextState),
}
pub struct Module {
pub ptx_text: Vec<u8>,
}

View file

@ -100,34 +100,37 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
)
};
let mut result = vec![0u8.into(); output.len()];
let mut drivers = ze::Driver::get()?;
let drv = drivers.drain(0..1).next().unwrap();
let mut devices = drv.devices()?;
let dev = devices.drain(0..1).next().unwrap();
let queue = ze::CommandQueue::new(&dev)?;
let module = ze::Module::new_spirv(&dev, byte_il, None)?;
let mut kernel = ze::Kernel::new_resident(&module, name)?;
kernel.set_attribute_bool(
ze::sys::ze_kernel_attribute_t::ZE_KERNEL_ATTR_INDIRECT_DEVICE_ACCESS,
true,
)?;
let mut inp_b = ze::DeviceBuffer::<T>::new(&drv, &dev, input.len())?;
let mut out_b = ze::DeviceBuffer::<T>::new(&drv, &dev, output.len())?;
let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
let event_pool = ze::EventPool::new(&drv, 3, Some(&[&dev]))?;
let ev0 = ze::Event::new(&event_pool, 0)?;
let ev1 = ze::Event::new(&event_pool, 1)?;
let ev2 = ze::Event::new(&event_pool, 2)?;
let mut cmd_list = ze::CommandList::new(&dev)?;
let out_b_ptr_mut: ze::BufferPtrMut<T> = (&mut out_b).into();
cmd_list.append_memory_copy(inp_b_ptr_mut, input, None, Some(&ev0))?;
cmd_list.append_memory_fill(out_b_ptr_mut, 0u8.into(), Some(&ev1))?;
kernel.set_group_size(1, 1, 1)?;
kernel.set_arg_buffer(0, inp_b_ptr_mut)?;
kernel.set_arg_buffer(1, out_b_ptr_mut)?;
cmd_list.append_launch_kernel(&kernel, &[1, 1, 1], Some(&ev2), &[&ev0, &ev1])?;
cmd_list.append_memory_copy(result.as_mut_slice(), out_b_ptr_mut, None, Some(&ev2))?;
queue.execute(cmd_list)?;
{
let mut drivers = ze::Driver::get()?;
let drv = drivers.drain(0..1).next().unwrap();
let mut ctx = ze::Context::new(&drv)?;
let mut devices = drv.devices()?;
let dev = devices.drain(0..1).next().unwrap();
let queue = ze::CommandQueue::new(&mut ctx, &dev)?;
let module = ze::Module::new_spirv(&mut ctx, &dev, byte_il, None)?;
let mut kernel = ze::Kernel::new_resident(&module, name)?;
kernel.set_indirect_access(
ze::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE,
)?;
let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, input.len())?;
let mut out_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, output.len())?;
let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
let event_pool = ze::EventPool::new(&mut ctx, 3, Some(&[&dev]))?;
let ev0 = ze::Event::new(&event_pool, 0)?;
let ev1 = ze::Event::new(&event_pool, 1)?;
let mut ev2 = ze::Event::new(&event_pool, 2)?;
let mut cmd_list = ze::CommandList::new(&mut ctx, &dev)?;
let out_b_ptr_mut: ze::BufferPtrMut<T> = (&mut out_b).into();
let mut init_evs = [ev0, ev1];
cmd_list.append_memory_copy(inp_b_ptr_mut, input, Some(&mut init_evs[0]), &mut [])?;
cmd_list.append_memory_fill(out_b_ptr_mut, 0, Some(&mut init_evs[1]), &mut [])?;
kernel.set_group_size(1, 1, 1)?;
kernel.set_arg_buffer(0, inp_b_ptr_mut)?;
kernel.set_arg_buffer(1, out_b_ptr_mut)?;
cmd_list.append_launch_kernel(&kernel, &[1, 1, 1], Some(&mut ev2), &mut init_evs)?;
cmd_list.append_memory_copy(result.as_mut_slice(), out_b_ptr_mut, None, &mut [ev2])?;
queue.execute(cmd_list)?;
}
Ok(result)
}

View file

@ -121,19 +121,10 @@ impl TypeWordMap {
})
}
fn get_or_add_extended(
&mut self,
b: &mut dr::Builder,
t: ast::ExtendedScalarType,
) -> spirv::Word {
let key: SpirvScalarKey = t.into();
self.get_or_add_spirv_scalar(b, key)
}
fn get_or_add(&mut self, b: &mut dr::Builder, t: SpirvType) -> spirv::Word {
match t {
SpirvType::Base(key) => self.get_or_add_spirv_scalar(b, key),
SpirvType::Pointer(typ, mut storage) => {
SpirvType::Pointer(typ, storage) => {
let base = self.get_or_add_spirv_scalar(b, typ);
*self
.complex
@ -728,7 +719,7 @@ fn emit_function_body_ops(
builder.shift_left_logical(result_type, Some(a.dst), a.src1, a.src2)?;
}
ast::Instruction::Cvt(dets, arg) => {
emit_cvt(builder, map, opencl, dets, arg)?;
emit_cvt(builder, map, dets, arg)?;
}
ast::Instruction::Cvta(_, arg) => {
// This would be only meaningful if const/slm/global pointers
@ -754,7 +745,6 @@ fn emit_function_body_ops(
fn emit_cvt(
builder: &mut dr::Builder,
map: &mut TypeWordMap,
opencl: spirv::Word,
dets: &ast::CvtDetails,
arg: &ast::Arg2<ExpandedArgParams>,
) -> Result<(), dr::Error> {
@ -841,7 +831,6 @@ fn emit_cvt(
builder.bitcast(result_type, Some(arg.dst), src)?;
}
}
_ => todo!(),
}
Ok(())
}

View file

@ -18,6 +18,10 @@ fn main() -> Result<(), VarError> {
"cargo:rustc-link-search=native={}/bin",
spirv_tools_dir.display()
);
println!(
"cargo:rustc-link-search=native={}/lib",
spirv_tools_dir.display()
);
// dynamic linking to avoid linking to C++ runtime
println!("cargo:rustc-link-lib=dylib=SPIRV-Tools-shared");
Ok(())