mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-19 16:04:44 +00:00
Update level0 to 1.0 and unpack fatbins
This commit is contained in:
parent
d47cd1e133
commit
34dc149be1
17 changed files with 8112 additions and 4753 deletions
21
doc/NOTES.md
21
doc/NOTES.md
|
@ -54,3 +54,24 @@ PTX grammar
|
|||
-----------
|
||||
* PTX grammar rules are atrocious, keywords can be freely reused as ids without escaping
|
||||
* Modifiers can be applied to instructions in any arbitrary order. We don't support it and hope we will never have to
|
||||
|
||||
|
||||
Rust debugging
|
||||
--------------
|
||||
* Nothing works 100% well on vscode/Windows:
|
||||
* MSVC/lldb - always garbage (simple enums are fubar)
|
||||
* MSVC/cppvsdbg - sometimes garbage (nested enums are fubar)
|
||||
* GNU/lldb - mostly fine, but can't follow child processes
|
||||
* GNU/gdb - always garbage (I don't have the patience to manually QA rust-gdb on Windows) and doesn't quite understand file paths for break points
|
||||
* Neither on vscode/Linux:
|
||||
* lldb - mostly fine, but can't follow child processes
|
||||
* gdb - visualizes variables somewhat awkardly (shows all possible variants of an enum)
|
||||
* CLion could be the solution, but intellij-rust can't load this project
|
||||
|
||||
CUDA <-> L0
|
||||
-----------
|
||||
* device ~= device
|
||||
* stream ~= command queue
|
||||
* context ~= context (1.0+)
|
||||
* graph ~= command list
|
||||
* module ~= module
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "level_zero-sys"
|
||||
version = "0.91.0"
|
||||
version = "1.0.4"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
links = "ze_loader"
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
bindgen --size_t-is-usize --default-enum-style=rust --whitelist-function ze.* /usr/include/level_zero/zex_api.h -o zex_api.rs -- -x c++ && sed -i 's/pub enum _ze_result_t/#[must_use]\npub enum _ze_result_t/g' zex_api.rs
|
||||
sed 's/^typedef uint32_t ze_.*flags_t;$//g' /usr/local/include/level_zero/ze_api.h > ze_api.h
|
||||
sed -i -r 's/ze_(.*)_flag_t/ze_\1_flags_t/g' ze_api.h
|
||||
bindgen --size_t-is-usize --default-enum-style=newtype --bitfield-enum ".*flags_t" --whitelist-function "ze.*" ze_api.h -o ze_api.rs
|
||||
sed -i 's/pub struct _ze_result_t/#[must_use]\npub struct _ze_result_t/g' ze_api.rs
|
|
@ -1,7 +1,18 @@
|
|||
use env::VarError;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
fn main() {
|
||||
fn main() -> Result<(), VarError> {
|
||||
println!("cargo:rustc-link-lib=dylib=ze_loader");
|
||||
// TODO: make this windows-only
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
if env::var("CARGO_CFG_WINDOWS").is_ok() {
|
||||
let env = env::var("CARGO_CFG_TARGET_ENV")?;
|
||||
if env == "gnu" {
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
} else {
|
||||
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
|
||||
path.push("src");
|
||||
println!("cargo:rustc-link-search=native={}", path.display());
|
||||
};
|
||||
}
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -6,13 +6,14 @@ use std::{
|
|||
mem, ptr,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
macro_rules! check {
|
||||
($expr:expr) => {
|
||||
let err = unsafe { $expr };
|
||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return Result::Err(Error::err(err));
|
||||
#[allow(unused_unsafe)]
|
||||
{
|
||||
let err = unsafe { $expr };
|
||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return Result::Err(Error(err));
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -26,173 +27,17 @@ macro_rules! check_panic {
|
|||
};
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Error {
|
||||
NotReady = 1,
|
||||
DeviceLost = 1879048193,
|
||||
OutOfHostMemory = 1879048194,
|
||||
OutOfDeviceMemory = 1879048195,
|
||||
ModuleBuildFailure = 1879048196,
|
||||
InsufficientPermissions = 1879113728,
|
||||
NotAvailable = 1879113729,
|
||||
Uninitialized = 2013265921,
|
||||
UnsupportedVersion = 2013265922,
|
||||
UnsupportedFeature = 2013265923,
|
||||
InvalidArgument = 2013265924,
|
||||
InvalidNullHandle = 2013265925,
|
||||
HandleObjectInUse = 2013265926,
|
||||
InvalidNullPointer = 2013265927,
|
||||
InvalidSize = 2013265928,
|
||||
UnsupportedSize = 2013265929,
|
||||
UnsupportedAlignment = 2013265930,
|
||||
InvalidSynchronizationObject = 2013265931,
|
||||
InvalidEnumeration = 2013265932,
|
||||
UnsupportedEnumeration = 2013265933,
|
||||
UnsupportedImageFormat = 2013265934,
|
||||
InvalidNativeBinary = 2013265935,
|
||||
InvalidGlobalName = 2013265936,
|
||||
InvalidKernelName = 2013265937,
|
||||
InvalidFunctionName = 2013265938,
|
||||
InvalidGroupSizeDimension = 2013265939,
|
||||
InvalidGlobalWidthDimension = 2013265940,
|
||||
InvalidKernelArgumentIndex = 2013265941,
|
||||
InvalidKernelArgumentSize = 2013265942,
|
||||
InvalidKernelAttributeValue = 2013265943,
|
||||
InvalidCommandListType = 2013265944,
|
||||
OverlappingRegions = 2013265945,
|
||||
Unknown = 2147483647,
|
||||
}
|
||||
pub struct Error(pub sys::ze_result_t);
|
||||
|
||||
impl Error {
|
||||
pub fn new<T>(r: sys::ze_result_t, t: T) -> Result<T> {
|
||||
Error::new_with(r, || t)
|
||||
}
|
||||
|
||||
pub fn new_with<T, F: FnOnce() -> T>(r: sys::ze_result_t, f: F) -> Result<T> {
|
||||
match r {
|
||||
sys::ze_result_t::ZE_RESULT_SUCCESS => Ok(f()),
|
||||
sys::ze_result_t::ZE_RESULT_NOT_READY => Err(Error::NotReady),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_DEVICE_LOST => Err(Error::DeviceLost),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY => Err(Error::OutOfDeviceMemory),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_MODULE_BUILD_FAILURE => {
|
||||
Err(Error::ModuleBuildFailure)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS => {
|
||||
Err(Error::InsufficientPermissions)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_NOT_AVAILABLE => Err(Error::NotAvailable),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Err(Error::Uninitialized),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_VERSION => Err(Error::UnsupportedVersion),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Err(Error::UnsupportedFeature),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Err(Error::InvalidArgument),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_HANDLE => Err(Error::InvalidNullHandle),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE => Err(Error::HandleObjectInUse),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_POINTER => {
|
||||
Err(Error::InvalidNullPointer)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SIZE => Err(Error::InvalidSize),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_SIZE => Err(Error::UnsupportedSize),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT => {
|
||||
Err(Error::UnsupportedAlignment)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT => {
|
||||
Err(Error::InvalidSynchronizationObject)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Err(Error::InvalidEnumeration),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION => {
|
||||
Err(Error::UnsupportedEnumeration)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT => {
|
||||
Err(Error::UnsupportedImageFormat)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NATIVE_BINARY => {
|
||||
Err(Error::InvalidNativeBinary)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_NAME => Err(Error::InvalidGlobalName),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_NAME => Err(Error::InvalidKernelName),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_FUNCTION_NAME => {
|
||||
Err(Error::InvalidFunctionName)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION => {
|
||||
Err(Error::InvalidGroupSizeDimension)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION => {
|
||||
Err(Error::InvalidGlobalWidthDimension)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX => {
|
||||
Err(Error::InvalidKernelArgumentIndex)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE => {
|
||||
Err(Error::InvalidKernelArgumentSize)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE => {
|
||||
Err(Error::InvalidKernelAttributeValue)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE => {
|
||||
Err(Error::InvalidCommandListType)
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_OVERLAPPING_REGIONS => Err(Error::OverlappingRegions),
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN => Err(Error::Unknown),
|
||||
}
|
||||
}
|
||||
|
||||
fn err(r: sys::ze_result_t) -> Self {
|
||||
match r {
|
||||
sys::ze_result_t::ZE_RESULT_SUCCESS => unreachable!(),
|
||||
sys::ze_result_t::ZE_RESULT_NOT_READY => Error::NotReady,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_DEVICE_LOST => Error::DeviceLost,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Error::OutOfHostMemory,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY => Error::OutOfDeviceMemory,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_MODULE_BUILD_FAILURE => Error::ModuleBuildFailure,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS => {
|
||||
Error::InsufficientPermissions
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_NOT_AVAILABLE => Error::NotAvailable,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Error::Uninitialized,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_VERSION => Error::UnsupportedVersion,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Error::UnsupportedFeature,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Error::InvalidArgument,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_HANDLE => Error::InvalidNullHandle,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE => Error::HandleObjectInUse,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NULL_POINTER => Error::InvalidNullPointer,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SIZE => Error::InvalidSize,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_SIZE => Error::UnsupportedSize,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT => Error::UnsupportedAlignment,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT => {
|
||||
Error::InvalidSynchronizationObject
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Error::InvalidEnumeration,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION => {
|
||||
Error::UnsupportedEnumeration
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT => {
|
||||
Error::UnsupportedImageFormat
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_NATIVE_BINARY => Error::InvalidNativeBinary,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_NAME => Error::InvalidGlobalName,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_NAME => Error::InvalidKernelName,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_FUNCTION_NAME => Error::InvalidFunctionName,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION => {
|
||||
Error::InvalidGroupSizeDimension
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION => {
|
||||
Error::InvalidGlobalWidthDimension
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX => {
|
||||
Error::InvalidKernelArgumentIndex
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE => {
|
||||
Error::InvalidKernelArgumentSize
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE => {
|
||||
Error::InvalidKernelAttributeValue
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE => {
|
||||
Error::InvalidCommandListType
|
||||
}
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_OVERLAPPING_REGIONS => Error::OverlappingRegions,
|
||||
sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN => Error::Unknown,
|
||||
fn new<T>(res: sys::ze_result_t, default: T) -> Result<T> {
|
||||
if res == sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
Ok(default)
|
||||
} else {
|
||||
Err(Self(res))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -207,7 +52,7 @@ impl std::error::Error for Error {}
|
|||
|
||||
pub fn init() -> Result<()> {
|
||||
Error::new(
|
||||
unsafe { sys::zeInit(sys::ze_init_flag_t::ZE_INIT_FLAG_NONE) },
|
||||
unsafe { sys::zeInit(sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY) },
|
||||
(),
|
||||
)
|
||||
}
|
||||
|
@ -246,6 +91,9 @@ impl Driver {
|
|||
&mut len,
|
||||
result.as_mut_ptr() as *mut _
|
||||
));
|
||||
if (len as usize) < result.len() {
|
||||
result.truncate(len as usize);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
@ -260,6 +108,80 @@ impl Device {
|
|||
pub unsafe fn from_ffi(x: sys::ze_device_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn get_properties(&self) -> Result<Box<sys::ze_device_properties_t>> {
|
||||
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_properties_t>() });
|
||||
check! { sys::zeDeviceGetProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub fn get_image_properties(&self) -> Result<Box<sys::ze_device_image_properties_t>> {
|
||||
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_image_properties_t>() });
|
||||
check! { sys::zeDeviceGetImageProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub fn get_memory_properties(&self) -> Result<Vec<sys::ze_device_memory_properties_t>> {
|
||||
let mut count = 0u32;
|
||||
check! { sys::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) };
|
||||
if count == 0 {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let mut props =
|
||||
vec![unsafe { mem::zeroed::<sys::ze_device_memory_properties_t>() }; count as usize];
|
||||
check! { sys::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub unsafe fn mem_alloc_device(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
size: usize,
|
||||
alignment: usize,
|
||||
) -> Result<*mut c_void> {
|
||||
let descr = sys::ze_device_mem_alloc_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_device_mem_alloc_flags_t(0),
|
||||
ordinal: 0,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
// TODO: check current context for the device
|
||||
check! {
|
||||
sys::zeMemAllocDevice(
|
||||
ctx.0,
|
||||
&descr,
|
||||
size,
|
||||
alignment,
|
||||
self.0,
|
||||
&mut result,
|
||||
)
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Context(sys::ze_context_handle_t);
|
||||
|
||||
impl Context {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_context_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_context_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn new(drv: &Driver) -> Result<Self> {
|
||||
let ctx_desc = sys::ze_context_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_CONTEXT_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_context_flags_t(0),
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeContextCreate(drv.0, &ctx_desc, &mut result));
|
||||
Ok(Context(result))
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
|
@ -273,16 +195,23 @@ impl CommandQueue {
|
|||
Self(x)
|
||||
}
|
||||
|
||||
pub fn new(d: &Device) -> Result<Self> {
|
||||
pub fn new(ctx: &mut Context, d: &Device) -> Result<Self> {
|
||||
let que_desc = sys::ze_command_queue_desc_t {
|
||||
version: sys::ze_command_queue_desc_version_t::ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT,
|
||||
flags: sys::ze_command_queue_flag_t::ZE_COMMAND_QUEUE_FLAG_NONE,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
|
||||
pNext: ptr::null(),
|
||||
ordinal: 0,
|
||||
index: 0,
|
||||
flags: sys::ze_command_queue_flags_t(0),
|
||||
mode: sys::ze_command_queue_mode_t::ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
||||
priority: sys::ze_command_queue_priority_t::ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
||||
ordinal: 0,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeCommandQueueCreate(d.0, &que_desc, &mut result));
|
||||
check!(sys::zeCommandQueueCreate(
|
||||
ctx.0,
|
||||
d.0,
|
||||
&que_desc,
|
||||
&mut result
|
||||
));
|
||||
Ok(CommandQueue(result))
|
||||
}
|
||||
|
||||
|
@ -318,17 +247,29 @@ impl Module {
|
|||
Self(x)
|
||||
}
|
||||
|
||||
pub fn new_spirv(d: &Device, bin: &[u8], opts: Option<&CStr>) -> Result<Self> {
|
||||
Module::new(true, d, bin, opts)
|
||||
pub fn new_spirv(
|
||||
ctx: &mut Context,
|
||||
d: &Device,
|
||||
bin: &[u8],
|
||||
opts: Option<&CStr>,
|
||||
) -> Result<Self> {
|
||||
Module::new(ctx, true, d, bin, opts)
|
||||
}
|
||||
|
||||
pub fn new_native(d: &Device, bin: &[u8]) -> Result<Self> {
|
||||
Module::new(false, d, bin, None)
|
||||
pub fn new_native(ctx: &mut Context, d: &Device, bin: &[u8]) -> Result<Self> {
|
||||
Module::new(ctx, false, d, bin, None)
|
||||
}
|
||||
|
||||
fn new(spirv: bool, d: &Device, bin: &[u8], opts: Option<&CStr>) -> Result<Self> {
|
||||
fn new(
|
||||
ctx: &mut Context,
|
||||
spirv: bool,
|
||||
d: &Device,
|
||||
bin: &[u8],
|
||||
opts: Option<&CStr>,
|
||||
) -> Result<Self> {
|
||||
let desc = sys::ze_module_desc_t {
|
||||
version: sys::ze_module_desc_version_t::ZE_MODULE_DESC_VERSION_CURRENT,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_MODULE_DESC,
|
||||
pNext: ptr::null(),
|
||||
format: if spirv {
|
||||
sys::ze_module_format_t::ZE_MODULE_FORMAT_IL_SPIRV
|
||||
} else {
|
||||
|
@ -341,6 +282,7 @@ impl Module {
|
|||
};
|
||||
let mut result: sys::ze_module_handle_t = ptr::null_mut();
|
||||
check!(sys::zeModuleCreate(
|
||||
ctx.0,
|
||||
d.0,
|
||||
&desc,
|
||||
&mut result,
|
||||
|
@ -371,7 +313,7 @@ impl SafeRepr for f64 {}
|
|||
|
||||
pub struct DeviceBuffer<T: SafeRepr> {
|
||||
ptr: *mut c_void,
|
||||
driver: sys::ze_driver_handle_t,
|
||||
ctx: sys::ze_context_handle_t,
|
||||
len: usize,
|
||||
marker: PhantomData<T>,
|
||||
}
|
||||
|
@ -380,33 +322,33 @@ impl<T: SafeRepr> DeviceBuffer<T> {
|
|||
pub unsafe fn as_ffi(&self) -> *mut c_void {
|
||||
self.ptr
|
||||
}
|
||||
pub unsafe fn from_ffi(driver: sys::ze_driver_handle_t, ptr: *mut c_void, len: usize) -> Self {
|
||||
pub unsafe fn from_ffi(ctx: sys::ze_context_handle_t, ptr: *mut c_void, len: usize) -> Self {
|
||||
let marker = PhantomData::<T>;
|
||||
Self {
|
||||
ptr,
|
||||
driver,
|
||||
ctx,
|
||||
len,
|
||||
marker,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(drv: &Driver, dev: &Device, len: usize) -> Result<Self> {
|
||||
pub fn new(ctx: &mut Context, dev: &Device, len: usize) -> Result<Self> {
|
||||
let desc = sys::_ze_device_mem_alloc_desc_t {
|
||||
version:
|
||||
sys::ze_device_mem_alloc_desc_version_t::ZE_DEVICE_MEM_ALLOC_DESC_VERSION_CURRENT,
|
||||
flags: sys::ze_device_mem_alloc_flag_t::ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_device_mem_alloc_flags_t(0),
|
||||
ordinal: 0,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeDriverAllocDeviceMem(
|
||||
drv.0,
|
||||
check!(sys::zeMemAllocDevice(
|
||||
ctx.0,
|
||||
&desc,
|
||||
len * mem::size_of::<T>(),
|
||||
mem::align_of::<T>(),
|
||||
dev.0,
|
||||
&mut result
|
||||
));
|
||||
Ok(unsafe { Self::from_ffi(drv.0, result, len) })
|
||||
Ok(unsafe { Self::from_ffi(ctx.0, result, len) })
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
|
@ -417,7 +359,7 @@ impl<T: SafeRepr> DeviceBuffer<T> {
|
|||
impl<T: SafeRepr> Drop for DeviceBuffer<T> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeDriverFreeMem(self.driver, self.ptr) };
|
||||
check_panic! { sys::zeMemFree(self.ctx, self.ptr) };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -431,13 +373,15 @@ impl<'a> CommandList<'a> {
|
|||
Self(x, PhantomData)
|
||||
}
|
||||
|
||||
pub fn new(dev: &Device) -> Result<Self> {
|
||||
pub fn new(ctx: &mut Context, dev: &Device) -> Result<Self> {
|
||||
let desc = sys::ze_command_list_desc_t {
|
||||
version: sys::ze_command_list_desc_version_t::ZE_COMMAND_LIST_DESC_VERSION_CURRENT,
|
||||
flags: sys::ze_command_list_flag_t::ZE_COMMAND_LIST_FLAG_NONE,
|
||||
stype: sys::_ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
|
||||
commandQueueGroupOrdinal: 0,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_command_list_flags_t(0),
|
||||
};
|
||||
let mut result: sys::ze_command_list_handle_t = ptr::null_mut();
|
||||
check!(sys::zeCommandListCreate(dev.0, &desc, &mut result));
|
||||
check!(sys::zeCommandListCreate(ctx.0, dev.0, &desc, &mut result));
|
||||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
|
||||
|
@ -449,19 +393,34 @@ impl<'a> CommandList<'a> {
|
|||
&mut self,
|
||||
dst: Dst,
|
||||
src: Src,
|
||||
length: Option<usize>,
|
||||
signal: Option<&Event<'a>>,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let dst = dst.into();
|
||||
let src = src.into();
|
||||
let elements = length.unwrap_or(std::cmp::max(dst.len(), src.len()));
|
||||
let event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let elements = std::cmp::min(dst.len(), src.len());
|
||||
let length = elements * mem::size_of::<T>();
|
||||
unsafe { self.append_memory_copy_unsafe(dst.get(), src.get(), length, signal, wait) }
|
||||
}
|
||||
|
||||
pub unsafe fn append_memory_copy_unsafe(
|
||||
&mut self,
|
||||
dst: *mut c_void,
|
||||
src: *const c_void,
|
||||
length: usize,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = Event::raw_slice(wait);
|
||||
check!(sys::zeCommandListAppendMemoryCopy(
|
||||
self.0,
|
||||
dst.get(),
|
||||
src.get(),
|
||||
elements * std::mem::size_of::<T>(),
|
||||
event,
|
||||
dst,
|
||||
src,
|
||||
length,
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
@ -469,19 +428,23 @@ impl<'a> CommandList<'a> {
|
|||
pub fn append_memory_fill<T>(
|
||||
&mut self,
|
||||
dst: BufferPtrMut<'a, T>,
|
||||
pattern: T,
|
||||
signal: Option<&Event<'a>>,
|
||||
pattern: u8,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let raw_pattern = &pattern as *const T as *const _;
|
||||
let event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let raw_pattern = &pattern as *const u8 as *const _;
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = unsafe { Event::raw_slice(wait) };
|
||||
let byte_len = dst.len() * mem::size_of::<T>();
|
||||
check!(sys::zeCommandListAppendMemoryFill(
|
||||
self.0,
|
||||
dst.get(),
|
||||
raw_pattern,
|
||||
mem::size_of::<T>(),
|
||||
mem::size_of::<u8>(),
|
||||
byte_len,
|
||||
event,
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
@ -490,23 +453,23 @@ impl<'a> CommandList<'a> {
|
|||
&mut self,
|
||||
kernel: &'a Kernel,
|
||||
group_count: &[u32; 3],
|
||||
signal: Option<&Event<'a>>,
|
||||
wait: &[&Event<'a>],
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let gr_count = sys::ze_group_count_t {
|
||||
groupCountX: group_count[0],
|
||||
groupCountY: group_count[1],
|
||||
groupCountZ: group_count[2],
|
||||
};
|
||||
let event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let mut wait_ptrs = wait.iter().map(|e| e.0).collect::<Vec<_>>();
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = unsafe { Event::raw_slice(wait) };
|
||||
check!(sys::zeCommandListAppendLaunchKernel(
|
||||
self.0,
|
||||
kernel.0,
|
||||
&gr_count,
|
||||
event,
|
||||
wait.len() as u32,
|
||||
wait_ptrs.as_mut_ptr(),
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr,
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
@ -528,8 +491,9 @@ pub struct FenceGuard<'a>(
|
|||
impl<'a> FenceGuard<'a> {
|
||||
fn new(q: &'a CommandQueue, cmd_list: sys::ze_command_list_handle_t) -> Result<Self> {
|
||||
let desc = sys::_ze_fence_desc_t {
|
||||
version: sys::ze_fence_desc_version_t::ZE_FENCE_DESC_VERSION_CURRENT,
|
||||
flags: sys::ze_fence_flag_t::ZE_FENCE_FLAG_NONE,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_FENCE_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_fence_flags_t(0),
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeFenceCreate(q.0, &desc, &mut result));
|
||||
|
@ -540,7 +504,7 @@ impl<'a> FenceGuard<'a> {
|
|||
impl<'a> Drop for FenceGuard<'a> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeFenceHostSynchronize(self.0, u32::max_value()) };
|
||||
check_panic! { sys::zeFenceHostSynchronize(self.0, u64::max_value()) };
|
||||
check_panic! { sys::zeFenceDestroy(self.0) };
|
||||
check_panic! { sys::zeCommandListDestroy(self.1) };
|
||||
}
|
||||
|
@ -638,10 +602,11 @@ impl<'a> EventPool<'a> {
|
|||
pub unsafe fn from_ffi(x: sys::ze_event_pool_handle_t) -> Self {
|
||||
Self(x, PhantomData)
|
||||
}
|
||||
pub fn new(driver: &Driver, count: u32, dev: Option<&[&'a Device]>) -> Result<Self> {
|
||||
pub fn new(ctx: &mut Context, count: u32, dev: Option<&[&'a Device]>) -> Result<Self> {
|
||||
let desc = sys::ze_event_pool_desc_t {
|
||||
version: sys::ze_event_pool_desc_version_t::ZE_EVENT_POOL_DESC_VERSION_CURRENT,
|
||||
flags: sys::ze_event_pool_flag_t::ZE_EVENT_POOL_FLAG_DEFAULT,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_event_pool_flags_t(0),
|
||||
count: count,
|
||||
};
|
||||
let mut dev = dev.map(|d| d.iter().map(|d| d.0).collect::<Vec<_>>());
|
||||
|
@ -649,7 +614,7 @@ impl<'a> EventPool<'a> {
|
|||
let dev_ptr = dev.as_mut().map_or(ptr::null_mut(), |d| d.as_mut_ptr());
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeEventPoolCreate(
|
||||
driver.0,
|
||||
ctx.0,
|
||||
&desc,
|
||||
dev_len,
|
||||
dev_ptr,
|
||||
|
@ -679,15 +644,25 @@ impl<'a> Event<'a> {
|
|||
|
||||
pub fn new(pool: &'a EventPool, index: u32) -> Result<Self> {
|
||||
let desc = sys::ze_event_desc_t {
|
||||
version: sys::ze_event_desc_version_t::ZE_EVENT_DESC_VERSION_CURRENT,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_EVENT_DESC,
|
||||
pNext: ptr::null(),
|
||||
index: index,
|
||||
signal: sys::ze_event_scope_flag_t::ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
wait: sys::ze_event_scope_flag_t::ZE_EVENT_SCOPE_FLAG_NONE,
|
||||
signal: sys::ze_event_scope_flags_t(0),
|
||||
wait: sys::ze_event_scope_flags_t(0),
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeEventCreate(pool.0, &desc, &mut result));
|
||||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
|
||||
unsafe fn raw_slice(e: &mut [Event]) -> (u32, *mut sys::ze_event_handle_t) {
|
||||
let ptr = if e.len() == 0 {
|
||||
ptr::null_mut()
|
||||
} else {
|
||||
e.as_mut_ptr()
|
||||
};
|
||||
(e.len() as u32, ptr as *mut sys::ze_event_handle_t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for Event<'a> {
|
||||
|
@ -710,8 +685,9 @@ impl<'a> Kernel<'a> {
|
|||
|
||||
pub fn new_resident(module: &'a Module, name: &CStr) -> Result<Self> {
|
||||
let desc = sys::ze_kernel_desc_t {
|
||||
version: sys::ze_kernel_desc_version_t::ZE_KERNEL_DESC_VERSION_CURRENT,
|
||||
flags: sys::ze_kernel_flag_t::ZE_KERNEL_FLAG_FORCE_RESIDENCY,
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_KERNEL_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_kernel_flags_t::ZE_KERNEL_FLAG_FORCE_RESIDENCY,
|
||||
pKernelName: name.as_ptr() as *const _,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
|
@ -719,18 +695,11 @@ impl<'a> Kernel<'a> {
|
|||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
|
||||
pub fn set_attribute_bool(
|
||||
pub fn set_indirect_access(
|
||||
&mut self,
|
||||
attr: sys::ze_kernel_attribute_t,
|
||||
value: bool,
|
||||
flags: sys::ze_kernel_indirect_access_flags_t,
|
||||
) -> Result<()> {
|
||||
let ze_bool: sys::ze_bool_t = if value { 1 } else { 0 };
|
||||
check!(sys::zeKernelSetAttribute(
|
||||
self.0,
|
||||
attr,
|
||||
mem::size_of::<sys::ze_bool_t>() as u32,
|
||||
&ze_bool as *const _ as *const _
|
||||
));
|
||||
check!(sys::zeKernelSetIndirectAccess(self.0, flags));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -771,3 +740,16 @@ impl<'a> Drop for Kernel<'a> {
|
|||
check_panic! { sys::zeKernelDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn event_has_correct_layout() {
|
||||
assert_eq!(
|
||||
mem::size_of::<Event>(),
|
||||
mem::size_of::<sys::ze_event_handle_t>()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,9 @@ name = "nvcuda"
|
|||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
ptx = { path = "../ptx" }
|
||||
level_zero = { path = "../level_zero" }
|
||||
level_zero-sys = { path = "../level_zero-sys" }
|
||||
lazy_static = "1.4"
|
||||
num_enum = "0.4"
|
||||
num_enum = "0.4"
|
||||
lz4 = "1.23"
|
|
@ -1,9 +1,10 @@
|
|||
use num_enum::TryFromPrimitive;
|
||||
use std::convert::TryFrom;
|
||||
use std::os::raw::c_int;
|
||||
use std::ptr;
|
||||
use std::{mem, ptr};
|
||||
use crate::ze;
|
||||
|
||||
#[repr(C)]
|
||||
#[repr(u32)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum Result {
|
||||
SUCCESS = 0,
|
||||
|
@ -134,14 +135,14 @@ pub enum DeviceTextureAttribute {
|
|||
|
||||
|
||||
impl Result {
|
||||
pub fn from_l0(result: l0::ze_result_t) -> Result {
|
||||
pub fn from_l0(result: l0_sys::ze_result_t) -> Result {
|
||||
match result {
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS => Result::SUCCESS,
|
||||
l0::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Result::ERROR_NOT_INITIALIZED,
|
||||
l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Result::ERROR_INVALID_VALUE,
|
||||
l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Result::ERROR_INVALID_VALUE,
|
||||
l0::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Result::ERROR_OUT_OF_MEMORY,
|
||||
l0::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Result::ERROR_NOT_SUPPORTED,
|
||||
l0_sys::ze_result_t::ZE_RESULT_SUCCESS => Result::SUCCESS,
|
||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Result::ERROR_NOT_INITIALIZED,
|
||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Result::ERROR_INVALID_VALUE,
|
||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Result::ERROR_INVALID_VALUE,
|
||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Result::ERROR_OUT_OF_MEMORY,
|
||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Result::ERROR_NOT_SUPPORTED,
|
||||
_ => Result::ERROR_UNKNOWN
|
||||
}
|
||||
}
|
||||
|
@ -157,11 +158,11 @@ pub struct Uuid {
|
|||
pub struct Device(pub c_int);
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct DevicePtr(usize);
|
||||
pub struct DevicePtr(pub usize);
|
||||
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct Context(*mut ());
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub struct Context(pub *mut ze::Context);
|
||||
impl Context {
|
||||
pub fn null() -> Context {
|
||||
Context(ptr::null_mut())
|
||||
|
@ -169,7 +170,21 @@ impl Context {
|
|||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Module(*mut ());
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub struct Module(*mut ze::Module);
|
||||
|
||||
impl Module {
|
||||
pub fn null() -> Module {
|
||||
Module(ptr::null_mut())
|
||||
}
|
||||
|
||||
pub fn new(inner: ze::Module) -> Module {
|
||||
let mut boxed = Box::new(inner);
|
||||
let result = Module(boxed.as_mut());
|
||||
mem::forget(boxed);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Function(*mut ());
|
||||
|
|
1
notcuda/src/cuda/mod.rs
Normal file
1
notcuda/src/cuda/mod.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod rt;
|
2
notcuda/src/cuda/rt.rs
Normal file
2
notcuda/src/cuda/rt.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
pub enum ContextState {}
|
||||
pub enum ContextStateManager {}
|
|
@ -1,8 +1,9 @@
|
|||
use super::cu;
|
||||
use crate::cu;
|
||||
use crate::{cuda, ze};
|
||||
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use std::os::raw::{c_int, c_ulong};
|
||||
use std::os::raw::{c_int, c_uint, c_ulong, c_ushort};
|
||||
use std::{ffi::c_void, ptr, slice};
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn cuGetExportTable(
|
||||
|
@ -11,24 +12,24 @@ pub unsafe extern "C" fn cuGetExportTable(
|
|||
) -> cu::Result {
|
||||
if table == ptr::null_mut() || id == ptr::null_mut() {
|
||||
cu::Result::ERROR_INVALID_VALUE
|
||||
} else if *id == CU_ETID_ToolsRuntimeCallbackHooks {
|
||||
*table = TABLE0.as_ptr() as *const _;
|
||||
} else if *id == TOOLS_RUNTIME_CALLBACK_HOOKS_GUID {
|
||||
*table = TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE.as_ptr() as *const _;
|
||||
cu::Result::SUCCESS
|
||||
} else if *id == CU_ETID_CudartInterface {
|
||||
*table = TABLE1.as_ptr() as *const _;
|
||||
} else if *id == CUDART_INTERFACE_GUID {
|
||||
*table = CUDART_INTERFACE_VTABLE.as_ptr() as *const _;
|
||||
cu::Result::SUCCESS
|
||||
} else if *id == CU_ETID_ToolsTls {
|
||||
} else if *id == TOOLS_TLS_GUID {
|
||||
*table = 1 as _;
|
||||
cu::Result::SUCCESS
|
||||
} else if *id == CU_ETID_ContextLocalStorageInterface_v0301 {
|
||||
*table = ContextLocalStorageInterface_v0301_VTABLE.as_ptr() as *const _;
|
||||
} else if *id == CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID {
|
||||
*table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _;
|
||||
cu::Result::SUCCESS
|
||||
} else {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
}
|
||||
|
||||
const CU_ETID_ToolsRuntimeCallbackHooks: cu::Uuid = cu::Uuid {
|
||||
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: cu::Uuid = cu::Uuid {
|
||||
x: [
|
||||
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
|
||||
0x66,
|
||||
|
@ -40,97 +41,254 @@ union VTableEntry {
|
|||
length: usize,
|
||||
}
|
||||
unsafe impl Sync for VTableEntry {}
|
||||
const TABLE0_LEN: usize = 7;
|
||||
static TABLE0: [VTableEntry; TABLE0_LEN] = [
|
||||
const TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH: usize = 7;
|
||||
static TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE: [VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH] = [
|
||||
VTableEntry {
|
||||
length: mem::size_of::<[VTableEntry; TABLE0_LEN]>(),
|
||||
length: mem::size_of::<[VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH]>(),
|
||||
},
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry {
|
||||
ptr: table0_fn1 as *const (),
|
||||
ptr: runtime_callback_hooks_fn1 as *const (),
|
||||
},
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry {
|
||||
ptr: table0_fn5 as *const (),
|
||||
ptr: runtime_callback_hooks_fn5 as *const (),
|
||||
},
|
||||
];
|
||||
static mut TABLE0_FN1_SPACE: [u8; 512] = [0; 512];
|
||||
static mut TABLE0_FN5_SPACE: [u8; 2] = [0; 2];
|
||||
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE: [u8; 512] = [0; 512];
|
||||
|
||||
unsafe extern "C" fn table0_fn1(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
|
||||
*ptr = TABLE0_FN1_SPACE.as_mut_ptr();
|
||||
*size = TABLE0_FN1_SPACE.len();
|
||||
return TABLE0_FN1_SPACE.as_mut_ptr();
|
||||
unsafe extern "C" fn runtime_callback_hooks_fn1(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
|
||||
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.as_mut_ptr();
|
||||
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.len();
|
||||
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.as_mut_ptr();
|
||||
}
|
||||
|
||||
unsafe extern "C" fn table0_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
|
||||
*ptr = TABLE0_FN5_SPACE.as_mut_ptr();
|
||||
*size = TABLE0_FN5_SPACE.len();
|
||||
return TABLE0_FN5_SPACE.as_mut_ptr();
|
||||
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
|
||||
|
||||
unsafe extern "C" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
|
||||
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
|
||||
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
|
||||
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
|
||||
}
|
||||
|
||||
const CU_ETID_CudartInterface: cu::Uuid = cu::Uuid {
|
||||
const CUDART_INTERFACE_GUID: cu::Uuid = cu::Uuid {
|
||||
x: [
|
||||
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
|
||||
0xf9
|
||||
0xf9,
|
||||
],
|
||||
};
|
||||
|
||||
const TABLE1_LEN: usize = 10;
|
||||
static TABLE1: [VTableEntry; TABLE1_LEN] = [
|
||||
const CUDART_INTERFACE_LENGTH: usize = 10;
|
||||
static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
|
||||
VTableEntry {
|
||||
length: mem::size_of::<[VTableEntry; TABLE1_LEN]>(),
|
||||
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
|
||||
},
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry {
|
||||
ptr: table1_fn1 as *const (),
|
||||
ptr: cudart_interface_fn1 as *const (),
|
||||
},
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry {
|
||||
ptr: table1_fn6 as *const (),
|
||||
ptr: get_module_from_cubin as *const (),
|
||||
},
|
||||
VTableEntry {
|
||||
ptr: cudart_interface_fn6 as *const (),
|
||||
},
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
];
|
||||
|
||||
unsafe extern "C" fn table1_fn1(_: *mut c_ulong, _: c_int) -> c_int {
|
||||
unsafe extern "C" fn cudart_interface_fn1(_: *mut c_ulong, _: c_int) -> c_int {
|
||||
0
|
||||
}
|
||||
|
||||
unsafe extern "C" fn table1_fn6(_: u64) { }
|
||||
/*
|
||||
fat_cubin:
|
||||
typedef struct {
|
||||
int magic;
|
||||
int version;
|
||||
const unsigned long long* data;
|
||||
void *filename_or_fatbins; /* version 1: offline filename,
|
||||
* version 2: array of prelinked fatbins */
|
||||
} __fatBinC_Wrapper_t;
|
||||
|
||||
const CU_ETID_ToolsTls: cu::Uuid = cu::Uuid {
|
||||
x: [0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec, 0xdc],
|
||||
data start with this header:
|
||||
#define FATBIN_MAGIC 0xBA55ED50U
|
||||
#define OLD_STYLE_FATBIN_MAGIC 0x1EE55A01U
|
||||
#define FATBIN_VERSION 0x0001U
|
||||
|
||||
struct fatbinary_ALIGN_(8) fatBinaryHeader
|
||||
{
|
||||
unsigned int magic; // FATBIN_MAGIC
|
||||
unsigned short version; // FATBIN_VERSION
|
||||
unsigned short headerSize;
|
||||
unsigned long long int fatSize; // size of the entire fat binary excluding this header
|
||||
};
|
||||
|
||||
there's binary data after header
|
||||
|
||||
const CU_ETID_ContextLocalStorageInterface_v0301: cu::Uuid = cu::Uuid {
|
||||
x: [0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95, 0x93],
|
||||
*/
|
||||
|
||||
const FATBINC_MAGIC: c_uint = 0x466243B1;
|
||||
const FATBINC_VERSION: c_uint = 0x1;
|
||||
|
||||
#[repr(C)]
|
||||
struct FatbincWrapper {
|
||||
magic: c_uint,
|
||||
version: c_uint,
|
||||
data: *const FatbinHeader,
|
||||
filename_or_fatbins: *const c_void,
|
||||
}
|
||||
|
||||
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
|
||||
const FATBIN_VERSION: c_ushort = 0x01;
|
||||
|
||||
#[repr(C, align(8))]
|
||||
struct FatbinHeader {
|
||||
magic: c_uint,
|
||||
version: c_ushort,
|
||||
header_size: c_ushort,
|
||||
files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
|
||||
}
|
||||
|
||||
const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
|
||||
const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
|
||||
|
||||
// assembly file header is a bit different, but we don't care
|
||||
#[repr(C)]
|
||||
struct FatbinFileHeader {
|
||||
kind: c_ushort,
|
||||
version: c_ushort,
|
||||
header_size: c_uint,
|
||||
padded_payload_size: c_uint,
|
||||
unknown0: c_uint, // check if it's written into separately
|
||||
payload_size: c_uint,
|
||||
unknown1: c_uint,
|
||||
unknown2: c_uint,
|
||||
sm_version: c_uint,
|
||||
bit_width: c_uint,
|
||||
unknown3: c_uint,
|
||||
unknown4: c_ulong,
|
||||
unknown5: c_ulong,
|
||||
uncompressed_payload: c_ulong,
|
||||
}
|
||||
|
||||
unsafe extern "C" fn get_module_from_cubin(
|
||||
result: *mut cu::Module,
|
||||
fatbinc_wrapper: *const FatbincWrapper,
|
||||
_: *mut c_void,
|
||||
_: *mut c_void,
|
||||
) -> cu::Result {
|
||||
if result == ptr::null_mut() || (*fatbinc_wrapper).magic != FATBINC_MAGIC || (*fatbinc_wrapper).version != FATBINC_VERSION {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
let fatbin_header = (*fatbinc_wrapper).data;
|
||||
if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
|
||||
let end = file.add((*fatbin_header).files_size as usize);
|
||||
let mut ptx_files = get_ptx_files(file, end);
|
||||
ptx_files.sort_unstable_by_key(|f| c_uint::max_value() - (**f).sm_version);
|
||||
for file in ptx_files {
|
||||
let slice = slice::from_raw_parts(
|
||||
(file as *const u8).add((*file).header_size as usize),
|
||||
(*file).payload_size as usize,
|
||||
);
|
||||
let kernel_text =
|
||||
lz4::block::decompress(slice, Some((*file).uncompressed_payload as i32)).unwrap();
|
||||
let module = ze::Module {
|
||||
ptx_text: kernel_text,
|
||||
};
|
||||
*result = cu::Module::new(module);
|
||||
return cu::Result::SUCCESS
|
||||
}
|
||||
cu::Result::ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
||||
}
|
||||
|
||||
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
|
||||
let mut index = file;
|
||||
let mut result = Vec::new();
|
||||
while index < end {
|
||||
let file = index as *const FatbinFileHeader;
|
||||
if (*file).kind == FATBIN_FILE_HEADER_KIND_PTX
|
||||
&& (*file).version == FATBIN_FILE_HEADER_VERSION_CURRENT
|
||||
{
|
||||
result.push(file)
|
||||
}
|
||||
index = index.add((*file).header_size as usize + (*file).padded_payload_size as usize);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
unsafe extern "C" fn cudart_interface_fn6(_: u64) {}
|
||||
|
||||
const TOOLS_TLS_GUID: cu::Uuid = cu::Uuid {
|
||||
x: [
|
||||
0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec,
|
||||
0xdc,
|
||||
],
|
||||
};
|
||||
|
||||
// the table is much bigger and start earlier
|
||||
static ContextLocalStorageInterface_v0301_VTABLE: [VTableEntry; 4] = [
|
||||
VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn0 as *const () },
|
||||
VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn1 as *const () },
|
||||
VTableEntry { ptr: ContextLocalStorageInterface_v0301_VTABLE_fn2 as *const () },
|
||||
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: cu::Uuid = cu::Uuid {
|
||||
x: [
|
||||
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
|
||||
0x93,
|
||||
],
|
||||
};
|
||||
|
||||
// the table is much bigger and starts earlier
|
||||
static CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE: [VTableEntry; 4] = [
|
||||
VTableEntry {
|
||||
ptr: context_local_storage_ctor as *const (),
|
||||
},
|
||||
VTableEntry {
|
||||
ptr: context_local_storage_dtor as *const (),
|
||||
},
|
||||
VTableEntry {
|
||||
ptr: context_local_storage_get_state as *const (),
|
||||
},
|
||||
VTableEntry { ptr: ptr::null() },
|
||||
];
|
||||
|
||||
// some kind of ctor
|
||||
unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn0(ms: *mut usize, _: *mut (), _: *mut (), _: *mut ()) -> u32 {
|
||||
0
|
||||
unsafe extern "C" fn context_local_storage_ctor(
|
||||
cu_ctx: cu::Context, // always zero
|
||||
mgr: *mut cuda::rt::ContextStateManager,
|
||||
ctx_state: *mut cuda::rt::ContextState,
|
||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||
dtor_cb: extern "C" fn(
|
||||
cu::Context,
|
||||
*mut cuda::rt::ContextStateManager,
|
||||
*mut cuda::rt::ContextState,
|
||||
),
|
||||
) -> cu::Result {
|
||||
if cu_ctx.0 == ptr::null_mut() {
|
||||
return cu::Result::ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
(*cu_ctx.0).cuda_manager = mgr;
|
||||
(*cu_ctx.0).cuda_state = ctx_state;
|
||||
(*cu_ctx.0).cuda_dtor_cb = dtor_cb;
|
||||
cu::Result::SUCCESS
|
||||
}
|
||||
|
||||
// some kind of dtor
|
||||
unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn1(ms: *mut usize, _: *mut ()) -> u32 {
|
||||
unsafe extern "C" fn context_local_storage_dtor(_: *mut usize, _: *mut ()) -> u32 {
|
||||
0
|
||||
}
|
||||
|
||||
unsafe extern "C" fn ContextLocalStorageInterface_v0301_VTABLE_fn2(_: *mut *mut (), _: *mut (), _: *mut ()) -> u32 {
|
||||
0
|
||||
}
|
||||
unsafe extern "C" fn context_local_storage_get_state(
|
||||
ctx_state: *mut *mut cuda::rt::ContextState,
|
||||
cu_ctx: cu::Context,
|
||||
_: *mut cuda::rt::ContextStateManager,
|
||||
) -> cu::Result {
|
||||
if cu_ctx == cu::Context::null() {
|
||||
return cu::Result::ERROR_INVALID_CONTEXT;
|
||||
}
|
||||
*ctx_state = (*cu_ctx.0).cuda_state;
|
||||
cu::Result::SUCCESS
|
||||
}
|
||||
|
|
|
@ -1,28 +1,17 @@
|
|||
extern crate level_zero_sys as l0;
|
||||
extern crate level_zero as l0;
|
||||
extern crate level_zero_sys as l0_sys;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate lz4;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::convert::TryFrom;
|
||||
use std::os::raw::{c_char, c_int, c_uint};
|
||||
use std::ptr;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use ze::Versioned;
|
||||
|
||||
#[macro_use]
|
||||
macro_rules! l0_check_err {
|
||||
($exp:expr) => {
|
||||
{
|
||||
let result = unsafe{ $exp };
|
||||
if result != l0::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return Err(result);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
use std::{ffi::c_void, sync::Mutex};
|
||||
|
||||
mod cu;
|
||||
mod cuda;
|
||||
mod export_table;
|
||||
mod ze;
|
||||
|
||||
|
@ -31,32 +20,32 @@ thread_local! {
|
|||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref GLOBAL_STATE: Mutex<Option<Driver>> = Mutex::new(None);
|
||||
static ref GLOBAL_STATE: Mutex<Option<Context>> = Mutex::new(None);
|
||||
}
|
||||
|
||||
struct Driver {
|
||||
base: l0::ze_driver_handle_t,
|
||||
devices: Vec::<ze::Device>
|
||||
struct Context {
|
||||
base: l0::Context,
|
||||
devices: Vec<ze::Device>,
|
||||
}
|
||||
unsafe impl Send for Driver {}
|
||||
unsafe impl Sync for Driver {}
|
||||
unsafe impl Send for Context {}
|
||||
unsafe impl Sync for Context {}
|
||||
|
||||
impl Driver {
|
||||
fn new() -> Result<Driver, l0::ze_result_t> {
|
||||
let mut driver_count = 1;
|
||||
let mut handle = ptr::null_mut();
|
||||
l0_check_err!{ l0::zeDriverGet(&mut driver_count, &mut handle) };
|
||||
let mut count = 0;
|
||||
l0_check_err! { l0::zeDeviceGet(handle, &mut count, ptr::null_mut()) }
|
||||
let mut devices = vec![ptr::null_mut(); count as usize];
|
||||
l0_check_err! { l0::zeDeviceGet(handle, &mut count, devices.as_mut_ptr()) }
|
||||
if (count as usize) < devices.len() {
|
||||
devices.truncate(count as usize);
|
||||
}
|
||||
Ok(Driver{ base: handle, devices: ze::Device::new_vec(devices) })
|
||||
impl Context {
|
||||
fn new() -> l0::Result<Context> {
|
||||
let driver = l0::Driver::get()?;
|
||||
let ze_devices = driver[0].devices()?;
|
||||
let mut ctx = l0::Context::new(&driver[0])?;
|
||||
let devices = ze_devices
|
||||
.into_iter()
|
||||
.map(|d| ze::Device::new(&mut ctx, d))
|
||||
.collect::<Result<_, _>>()?;
|
||||
Ok(Context {
|
||||
base: ctx,
|
||||
devices: devices,
|
||||
})
|
||||
}
|
||||
|
||||
fn call<F: FnOnce(&mut Driver) -> l0::ze_result_t>(f: F) -> cu::Result {
|
||||
fn call<F: FnOnce(&mut Context) -> l0_sys::ze_result_t>(f: F) -> cu::Result {
|
||||
let mut lock = GLOBAL_STATE.try_lock();
|
||||
if let Ok(ref mut mutex) = lock {
|
||||
match **mutex {
|
||||
|
@ -70,30 +59,43 @@ impl Driver {
|
|||
}
|
||||
}
|
||||
|
||||
fn call_device<F: FnOnce(&mut ze::Device) -> l0::ze_result_t>(cu::Device(dev): cu::Device, f: F) -> cu::Result {
|
||||
fn call2<F: FnOnce(&mut Context) -> l0::Result<()>>(f: F) -> cu::Result {
|
||||
Context::call(|ctx| match f(ctx) {
|
||||
Ok(()) => l0::sys::ze_result_t::ZE_RESULT_SUCCESS,
|
||||
Err(err) => err.0,
|
||||
})
|
||||
}
|
||||
|
||||
fn call_device<F: FnOnce(&mut ze::Device) -> l0::Result<()>>(
|
||||
cu::Device(dev): cu::Device,
|
||||
f: F,
|
||||
) -> cu::Result {
|
||||
if dev < 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
let dev = dev as usize;
|
||||
Driver::call(|driver| {
|
||||
Context::call(|driver| {
|
||||
if dev >= driver.devices.len() {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
return l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
match f(&mut driver.devices[dev]) {
|
||||
Ok(()) => l0::sys::ze_result_t::ZE_RESULT_SUCCESS,
|
||||
Err(err) => err.0,
|
||||
}
|
||||
f(&mut driver.devices[dev])
|
||||
})
|
||||
}
|
||||
|
||||
fn device_get_count(&self, count: *mut i32) -> l0::ze_result_t {
|
||||
fn device_get_count(&self, count: *mut i32) -> l0_sys::ze_result_t {
|
||||
unsafe { *count = self.devices.len() as i32 };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
l0_sys::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
|
||||
fn device_get(&self, device: *mut cu::Device, ordinal: c_int) -> l0::ze_result_t {
|
||||
fn device_get(&self, device: *mut cu::Device, ordinal: c_int) -> l0_sys::ze_result_t {
|
||||
if (ordinal as usize) >= self.devices.len() {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
return l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
unsafe { *device = cu::Device(ordinal) };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
l0_sys::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -108,16 +110,16 @@ pub unsafe extern "C" fn cuDriverGetVersion(version: *mut c_int) -> cu::Result {
|
|||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn cuInit(_: c_uint) -> cu::Result {
|
||||
let l0_init = l0::zeInit(l0::ze_init_flag_t::ZE_INIT_FLAG_GPU_ONLY);
|
||||
if l0_init != l0::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return cu::Result::from_l0(l0_init);
|
||||
let l0_sys_init = l0_sys::zeInit(l0_sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY);
|
||||
if l0_sys_init != l0_sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return cu::Result::from_l0(l0_sys_init);
|
||||
}
|
||||
let mut lock = GLOBAL_STATE.try_lock();
|
||||
if let Ok(ref mut mutex) = lock {
|
||||
if let None = **mutex {
|
||||
match Driver::new() {
|
||||
match Context::new() {
|
||||
Ok(state) => **mutex = Some(state),
|
||||
Err(err) => return cu::Result::from_l0(err)
|
||||
Err(err) => return cu::Result::from_l0(err.0),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -131,7 +133,7 @@ pub extern "C" fn cuDeviceGetCount(count: *mut c_int) -> cu::Result {
|
|||
if count == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call(|driver| driver.device_get_count(count))
|
||||
Context::call(|driver| driver.device_get_count(count))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -139,15 +141,19 @@ pub extern "C" fn cuDeviceGet(device: *mut cu::Device, ordinal: c_int) -> cu::Re
|
|||
if ordinal < 0 || device == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call(|driver| driver.device_get(device, ordinal))
|
||||
Context::call(|driver| driver.device_get(device, ordinal))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuDeviceGetName(name: *mut c_char, len: c_int, dev_idx: cu::Device) -> cu::Result {
|
||||
pub extern "C" fn cuDeviceGetName(
|
||||
name: *mut c_char,
|
||||
len: c_int,
|
||||
dev_idx: cu::Device,
|
||||
) -> cu::Result {
|
||||
if name == ptr::null_mut() || len <= 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call_device(dev_idx, |dev| dev.get_name(name, len))
|
||||
Context::call_device(dev_idx, |dev| dev.get_name(name, len))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -155,27 +161,32 @@ pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) ->
|
|||
if bytes == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call_device(dev_idx, |dev| dev.total_mem(bytes))
|
||||
Context::call_device(dev_idx, |dev| dev.total_mem(bytes))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: c_int, dev_idx: cu::Device) -> cu::Result {
|
||||
|
||||
pub extern "C" fn cuDeviceGetAttribute(
|
||||
pi: *mut c_int,
|
||||
attrib: c_int,
|
||||
dev_idx: cu::Device,
|
||||
) -> cu::Result {
|
||||
if pi == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
let attrib = match u8::try_from(attrib) {
|
||||
Ok(a) => a,
|
||||
Err(_) => return cu::Result::ERROR_INVALID_VALUE
|
||||
Err(_) => return cu::Result::ERROR_INVALID_VALUE,
|
||||
};
|
||||
match cu::DeviceAttribute::try_new(attrib) {
|
||||
Some(cu::DeviceAttribute::Static(a)) => {
|
||||
unsafe { *pi = ze::Device::get_attribute_static(a) };
|
||||
cu::Result::SUCCESS
|
||||
},
|
||||
Some(cu::DeviceAttribute::Dynamic(a)) => Driver::call_device(dev_idx, |dev| dev.get_attribute(pi, a)),
|
||||
}
|
||||
Some(cu::DeviceAttribute::Dynamic(a)) => {
|
||||
Context::call_device(dev_idx, |dev| dev.get_attribute(pi, a))
|
||||
}
|
||||
// TODO: add support for more properties
|
||||
None => cu::Result::SUCCESS
|
||||
None => cu::Result::SUCCESS,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,16 +195,14 @@ pub extern "C" fn cuDeviceGetUuid(uuid: *mut cu::Uuid, dev_idx: cu::Device) -> c
|
|||
if uuid == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call_device(dev_idx, |dev| dev.get_uuid(uuid))
|
||||
Context::call_device(dev_idx, |dev| dev.get_uuid(uuid))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuCtxGetCurrent(pctx: *mut cu::Context) -> cu::Result {
|
||||
let ctx = CONTEXT_STACK.with(|stack| {
|
||||
match stack.borrow().last() {
|
||||
Some(ctx) => ctx.clone(),
|
||||
None => cu::Context::null()
|
||||
}
|
||||
let ctx = CONTEXT_STACK.with(|stack| match stack.borrow().last() {
|
||||
Some(ctx) => ctx.clone(),
|
||||
None => cu::Context::null(),
|
||||
});
|
||||
unsafe { *pctx = ctx };
|
||||
cu::Result::SUCCESS
|
||||
|
@ -206,7 +215,7 @@ pub extern "C" fn cuCtxSetCurrent(ctx: cu::Context) -> cu::Result {
|
|||
stack.pop();
|
||||
if ctx != cu::Context::null() {
|
||||
stack.push(ctx);
|
||||
}
|
||||
}
|
||||
});
|
||||
cu::Result::SUCCESS
|
||||
}
|
||||
|
@ -216,42 +225,67 @@ pub extern "C" fn cuMemAlloc_v2(dptr: *mut cu::DevicePtr, bytesize: usize) -> cu
|
|||
if dptr == ptr::null_mut() || bytesize == 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call(|drv| {
|
||||
let mut descr = l0::ze_device_mem_alloc_desc_t::new();
|
||||
descr.flags = l0::ze_device_mem_alloc_flag_t::ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT;
|
||||
descr.ordinal = 0;
|
||||
// TODO: check current context for the device
|
||||
unsafe { l0::zeDriverAllocDeviceMem(drv.base, &descr, bytesize, 0, drv.devices[0].0, dptr as *mut _) }
|
||||
Context::call2(|drv| {
|
||||
unsafe {
|
||||
*dptr = cu::DevicePtr(drv.devices[0].base.mem_alloc_device(
|
||||
&mut drv.base,
|
||||
bytesize,
|
||||
0,
|
||||
)? as usize)
|
||||
};
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuCtxDestroy_v2(ctx: cu::Context) -> cu::Result
|
||||
{
|
||||
pub extern "C" fn cuCtxDestroy_v2(ctx: cu::Context) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuMemcpyDtoH_v2(dstHost: *mut (), srcDevice: cu::DevicePtr, byte_count: usize) -> cu::Result
|
||||
{
|
||||
pub extern "C" fn cuMemcpyDtoH_v2(
|
||||
dst_host: *mut c_void,
|
||||
src_device: cu::DevicePtr,
|
||||
byte_count: usize,
|
||||
) -> cu::Result {
|
||||
if dst_host == ptr::null_mut() || src_device.0 == 0 || byte_count == 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
// TODO: choose the right device from device ptr
|
||||
Context::call(|drv| {
|
||||
let cu_dev = &mut drv.devices[0];
|
||||
// Safe, because there's no Drop impl for device
|
||||
let dev = unsafe { l0::Device::from_ffi(cu_dev.base.as_ffi()) };
|
||||
let queue = &mut cu_dev.default_queue;
|
||||
let result = unsafe {
|
||||
memcpy_impl(
|
||||
&mut drv.base,
|
||||
dst_host,
|
||||
src_device.0 as *mut _,
|
||||
byte_count,
|
||||
&dev,
|
||||
queue,
|
||||
)
|
||||
};
|
||||
match result {
|
||||
Ok(()) => l0_sys::ze_result_t::ZE_RESULT_SUCCESS,
|
||||
Err(e) => e.0,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuMemFree_v2(srcDevice: cu::DevicePtr) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuMemFree_v2(srcDevice: cu::DevicePtr) -> cu::Result
|
||||
{
|
||||
pub extern "C" fn cuModuleLoad(module: *mut cu::Module, fname: *const c_char) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuModuleLoad(module: *mut cu::Module, fname: *const c_char) -> cu::Result
|
||||
{
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuGetErrorString(error: cu::Result, pStr: *mut *const c_char) -> cu::Result
|
||||
{
|
||||
pub extern "C" fn cuGetErrorString(error: cu::Result, pStr: *mut *const c_char) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
|
@ -267,43 +301,97 @@ pub extern "C" fn cuLaunchKernel(
|
|||
sharedMemBytes: c_uint,
|
||||
hStream: cu::Stream,
|
||||
kernelParams: *mut *mut (),
|
||||
extra: *mut *mut ())
|
||||
-> cu::Result
|
||||
{
|
||||
extra: *mut *mut (),
|
||||
) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
#[allow(non_snake_case)]
|
||||
pub extern "C" fn cuModuleLoadDataEx(
|
||||
module: *mut cu::Module,
|
||||
image: *const (),
|
||||
numOptions: c_uint,
|
||||
options: *mut cu::JitOption,
|
||||
optionValues: *mut *mut ()) -> cu::Result
|
||||
{
|
||||
optionValues: *mut *mut (),
|
||||
) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuMemcpyHtoD_v2(
|
||||
dstDevice: cu::DevicePtr,
|
||||
srcHost: *const (),
|
||||
ByteCount: usize) -> cu::Result
|
||||
{
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
dst_device: cu::DevicePtr,
|
||||
src_host: *const c_void,
|
||||
byte_count: usize,
|
||||
) -> cu::Result {
|
||||
if dst_device.0 == 0 || src_host == ptr::null_mut() || byte_count == 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
// TODO: choose the right device from device ptr
|
||||
Context::call(|drv| {
|
||||
let cu_dev = &mut drv.devices[0];
|
||||
// Safe, because there's no Drop impl for device
|
||||
let dev = unsafe { l0::Device::from_ffi(cu_dev.base.as_ffi()) };
|
||||
let queue = &mut cu_dev.default_queue;
|
||||
let result = unsafe {
|
||||
memcpy_impl(
|
||||
&mut drv.base,
|
||||
dst_device.0 as *mut _,
|
||||
src_host,
|
||||
byte_count,
|
||||
&dev,
|
||||
queue,
|
||||
)
|
||||
};
|
||||
match result {
|
||||
Ok(()) => l0_sys::ze_result_t::ZE_RESULT_SUCCESS,
|
||||
Err(e) => e.0,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
unsafe fn memcpy_impl(
|
||||
ctx: &mut l0::Context,
|
||||
dst: *mut c_void,
|
||||
src: *const c_void,
|
||||
bytes_count: usize,
|
||||
dev: &l0::Device,
|
||||
queue: &mut l0::CommandQueue,
|
||||
) -> l0::Result<()> {
|
||||
let mut cmd_list = l0::CommandList::new(ctx, &dev)?;
|
||||
cmd_list.append_memory_copy_unsafe(dst, src, bytes_count, None, &mut [])?;
|
||||
queue.execute(cmd_list)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuCtxCreate_v2(pctx: *mut cu::Context, flags: c_uint, dev: cu::Device) -> cu::Result
|
||||
{
|
||||
cu::Result::SUCCESS
|
||||
pub extern "C" fn cuCtxCreate_v2(
|
||||
pctx: *mut cu::Context,
|
||||
flags: c_uint,
|
||||
dev: cu::Device,
|
||||
) -> cu::Result {
|
||||
if pctx == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuModuleGetFunction(
|
||||
hfunc: *mut cu::Function,
|
||||
hmod: cu::Module,
|
||||
name: *const c_char) -> cu::Result
|
||||
{
|
||||
name: *const c_char,
|
||||
) -> cu::Result {
|
||||
cu::Result::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuDevicePrimaryCtxRetain(pctx: *mut cu::Context, dev: cu::Device) -> cu::Result {
|
||||
cu::Result::SUCCESS
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuCtxGetDevice(dev: *mut cu::Device) -> cu::Result {
|
||||
unsafe { *dev = cu::Device(0) };
|
||||
cu::Result::SUCCESS
|
||||
}
|
||||
|
|
|
@ -1,141 +1,73 @@
|
|||
use level_zero_sys::*;
|
||||
use super::cu;
|
||||
use crate::cuda;
|
||||
|
||||
use std::cmp;
|
||||
use std::mem;
|
||||
use std::os::raw::{c_char, c_int};
|
||||
use std::ptr;
|
||||
|
||||
macro_rules! assert_size_eq {
|
||||
($x:ty, $($xs:ty),+ $(,)?) => {
|
||||
const _: fn() = || {
|
||||
$(let _ = ::std::mem::transmute::<$x, $xs>;)+
|
||||
};
|
||||
};
|
||||
pub struct Device {
|
||||
pub base: l0::Device,
|
||||
pub default_queue: l0::CommandQueue,
|
||||
properties: Option<Box<l0::sys::ze_device_properties_t>>,
|
||||
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
|
||||
memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>,
|
||||
}
|
||||
|
||||
macro_rules! l0_check {
|
||||
($exp:expr) => {
|
||||
{
|
||||
let result = unsafe{ $exp };
|
||||
if result != l0::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn either<T>(r: Result<T, T>) -> T {
|
||||
match r {
|
||||
Ok(x) => x,
|
||||
Err(x) => x
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Versioned : Sized {
|
||||
type Version;
|
||||
|
||||
fn new() -> Self {
|
||||
let mut result = unsafe { std::mem::zeroed::<Self>() };
|
||||
let ver = result.version();
|
||||
*ver = Self::current();
|
||||
return result;
|
||||
}
|
||||
|
||||
fn current() -> Self::Version;
|
||||
|
||||
fn version(&mut self) -> &mut Self::Version;
|
||||
}
|
||||
|
||||
impl Versioned for ze_device_memory_properties_t {
|
||||
type Version = ze_device_memory_properties_version_t;
|
||||
fn current() -> Self::Version {
|
||||
ze_device_memory_properties_version_t::ZE_DEVICE_MEMORY_PROPERTIES_VERSION_CURRENT
|
||||
}
|
||||
fn version(&mut self) -> &mut Self::Version {
|
||||
&mut self.version
|
||||
}
|
||||
}
|
||||
|
||||
impl Versioned for ze_device_properties_t {
|
||||
type Version = ze_device_properties_version_t;
|
||||
fn current() -> Self::Version {
|
||||
ze_device_properties_version_t::ZE_DEVICE_PROPERTIES_VERSION_CURRENT
|
||||
}
|
||||
fn version(&mut self) -> &mut Self::Version {
|
||||
&mut self.version
|
||||
}
|
||||
}
|
||||
|
||||
impl Versioned for ze_device_image_properties_t {
|
||||
type Version = ze_device_image_properties_version_t;
|
||||
fn current() -> Self::Version {
|
||||
ze_device_image_properties_version_t::ZE_DEVICE_IMAGE_PROPERTIES_VERSION_CURRENT
|
||||
}
|
||||
fn version(&mut self) -> &mut Self::Version {
|
||||
&mut self.version
|
||||
}
|
||||
}
|
||||
|
||||
impl Versioned for ze_device_mem_alloc_desc_t {
|
||||
type Version = ze_device_mem_alloc_desc_version_t;
|
||||
fn current() -> Self::Version {
|
||||
ze_device_mem_alloc_desc_version_t::ZE_DEVICE_MEM_ALLOC_DESC_VERSION_CURRENT
|
||||
}
|
||||
fn version(&mut self) -> &mut Self::Version {
|
||||
&mut self.version
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[repr(transparent)] // required so a Vec<ze_device_handle_t> can be safely transmutted to Vec<Device>
|
||||
pub struct Device(pub ze_device_handle_t);
|
||||
|
||||
impl Device {
|
||||
pub fn new_vec(v: Vec<ze_device_handle_t>) -> Vec<Device> {
|
||||
assert_size_eq!(Device, ze_device_handle_t);
|
||||
unsafe { mem::transmute(v) }
|
||||
pub fn new(ctx: &mut l0::Context, d: l0::Device) -> l0::Result<Self> {
|
||||
let queue = l0::CommandQueue::new(ctx, &d)?;
|
||||
Ok(Self {
|
||||
base: d,
|
||||
default_queue: queue,
|
||||
properties: None,
|
||||
image_properties: None,
|
||||
memory_properties: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_device_properties(self) -> Result<Box<ze_device_properties_t>, ze_result_t> {
|
||||
let mut props = Box::new(l0::ze_device_properties_t::new());
|
||||
l0_check_err! { l0::zeDeviceGetProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> {
|
||||
if let Some(ref prop) = self.properties {
|
||||
return Ok(prop);
|
||||
}
|
||||
match self.base.get_properties() {
|
||||
Ok(prop) => Ok(self.properties.get_or_insert(prop)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_device_image_properties(self) -> Result<Box<ze_device_image_properties_t>, ze_result_t> {
|
||||
let mut props = Box::new(l0::ze_device_image_properties_t::new());
|
||||
l0_check_err! { l0::zeDeviceGetImageProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
fn get_image_properties(&mut self) -> l0::Result<&l0::sys::ze_device_image_properties_t> {
|
||||
if let Some(ref prop) = self.image_properties {
|
||||
return Ok(prop);
|
||||
}
|
||||
match self.base.get_image_properties() {
|
||||
Ok(prop) => Ok(self.image_properties.get_or_insert(prop)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t {
|
||||
let props = match self.get_device_properties() {
|
||||
Ok(props) => props,
|
||||
Err(res) => return res
|
||||
};
|
||||
fn get_memory_properties(&mut self) -> l0::Result<&[l0::sys::ze_device_memory_properties_t]> {
|
||||
if let Some(ref prop) = self.memory_properties {
|
||||
return Ok(prop);
|
||||
}
|
||||
match self.base.get_memory_properties() {
|
||||
Ok(prop) => Ok(self.memory_properties.get_or_insert(prop)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_name(&mut self, name: *mut c_char, len: c_int) -> l0::Result<()> {
|
||||
let props = self.get_properties()?;
|
||||
let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0);
|
||||
let dst_null_pos = cmp::min((len - 1) as usize, null_pos);
|
||||
unsafe { *(name.add(dst_null_pos)) = 0 };
|
||||
unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn total_mem(self, bytes: *mut usize) -> l0::ze_result_t {
|
||||
let mut count = 0;
|
||||
l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) };
|
||||
if count == 0 {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
let mut props = vec![l0::ze_device_memory_properties_t::new(); count as usize];
|
||||
l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) };
|
||||
let iter_count = cmp::min(count as usize, props.len());
|
||||
if iter_count == 0 {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
let max_mem = props.iter().take(iter_count).map(|p| p.totalSize).max().unwrap();
|
||||
pub fn total_mem(&mut self, bytes: *mut usize) -> l0::Result<()> {
|
||||
let props = self.get_memory_properties()?;
|
||||
let max_mem = props.iter().map(|p| p.totalSize).max().unwrap();
|
||||
unsafe { *bytes = max_mem as usize };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_attribute_static(attr: cu::DeviceStaticAttribute) -> c_int {
|
||||
|
@ -148,38 +80,61 @@ impl Device {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_attribute_general(attr: cu::DeviceGeneralAttribute, props: &l0::ze_device_properties_t) -> c_int {
|
||||
fn get_attribute_general(
|
||||
attr: cu::DeviceGeneralAttribute,
|
||||
props: &l0_sys::ze_device_properties_t,
|
||||
) -> c_int {
|
||||
match attr {
|
||||
cu::DeviceGeneralAttribute::CAN_MAP_HOST_MEMORY => props.unifiedMemorySupported as i32,
|
||||
cu::DeviceGeneralAttribute::ASYNC_ENGINE_COUNT => props.numAsyncCopyEngines as i32,
|
||||
cu::DeviceGeneralAttribute::MULTIPROCESSOR_COUNT => (props.numSlicesPerTile * props.numSubslicesPerSlice) as i32,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_attribute_texture(attr: cu::DeviceTextureAttribute, props: &l0::ze_device_image_properties_t) -> c_int {
|
||||
match attr {
|
||||
cu::DeviceTextureAttribute::MAXIMUM_TEXTURE1D_WIDTH => cmp::min(props.maxImageDims1D, c_int::max_value() as u32) as c_int,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_attribute(self, pi: *mut c_int, attr: cu::DeviceDynamicAttribute) -> l0::ze_result_t {
|
||||
let value_or_err = match attr {
|
||||
cu::DeviceDynamicAttribute::General(a) => self.get_device_properties().map(|p| Device::get_attribute_general(a, &p)),
|
||||
cu::DeviceDynamicAttribute::Texture(a) => self.get_device_image_properties().map(|p| Device::get_attribute_texture(a, &p)),
|
||||
};
|
||||
match value_or_err {
|
||||
Ok(value) => {
|
||||
unsafe { *pi = value };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
cu::DeviceGeneralAttribute::CAN_MAP_HOST_MEMORY => 1,
|
||||
cu::DeviceGeneralAttribute::ASYNC_ENGINE_COUNT => props.maxHardwareContexts as i32,
|
||||
cu::DeviceGeneralAttribute::MULTIPROCESSOR_COUNT => {
|
||||
(props.numSlices * props.numSubslicesPerSlice * props.numEUsPerSubslice) as i32
|
||||
}
|
||||
Err(e) => e
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_uuid(self, uuid: *mut cu::Uuid) -> l0::ze_result_t {
|
||||
either(self.get_device_properties().map(|prop| {
|
||||
unsafe { *uuid = cu::Uuid{ x: prop.uuid.id } };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}))
|
||||
fn get_attribute_texture(
|
||||
attr: cu::DeviceTextureAttribute,
|
||||
props: &l0_sys::ze_device_image_properties_t,
|
||||
) -> c_int {
|
||||
match attr {
|
||||
cu::DeviceTextureAttribute::MAXIMUM_TEXTURE1D_WIDTH => {
|
||||
cmp::min(props.maxImageDims1D, c_int::max_value() as u32) as c_int
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_attribute(
|
||||
&mut self,
|
||||
pi: *mut c_int,
|
||||
attr: cu::DeviceDynamicAttribute,
|
||||
) -> l0::Result<()> {
|
||||
let value = match attr {
|
||||
cu::DeviceDynamicAttribute::General(a) => {
|
||||
Device::get_attribute_general(a, self.get_properties()?)
|
||||
}
|
||||
cu::DeviceDynamicAttribute::Texture(a) => {
|
||||
Device::get_attribute_texture(a, self.get_image_properties()?)
|
||||
}
|
||||
};
|
||||
unsafe { *pi = value };
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_uuid(&mut self, uuid: *mut cu::Uuid) -> l0::Result<()> {
|
||||
let props = self.get_properties()?;
|
||||
unsafe { *uuid = cu::Uuid { x: props.uuid.id } };
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Context {
|
||||
pub cuda_manager: *mut cuda::rt::ContextStateManager,
|
||||
pub cuda_state: *mut cuda::rt::ContextState,
|
||||
pub cuda_dtor_cb:
|
||||
extern "C" fn(cu::Context, *mut cuda::rt::ContextStateManager, *mut cuda::rt::ContextState),
|
||||
}
|
||||
|
||||
pub struct Module {
|
||||
pub ptx_text: Vec<u8>,
|
||||
}
|
||||
|
|
|
@ -100,34 +100,37 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
|
|||
)
|
||||
};
|
||||
let mut result = vec![0u8.into(); output.len()];
|
||||
let mut drivers = ze::Driver::get()?;
|
||||
let drv = drivers.drain(0..1).next().unwrap();
|
||||
let mut devices = drv.devices()?;
|
||||
let dev = devices.drain(0..1).next().unwrap();
|
||||
let queue = ze::CommandQueue::new(&dev)?;
|
||||
let module = ze::Module::new_spirv(&dev, byte_il, None)?;
|
||||
let mut kernel = ze::Kernel::new_resident(&module, name)?;
|
||||
kernel.set_attribute_bool(
|
||||
ze::sys::ze_kernel_attribute_t::ZE_KERNEL_ATTR_INDIRECT_DEVICE_ACCESS,
|
||||
true,
|
||||
)?;
|
||||
let mut inp_b = ze::DeviceBuffer::<T>::new(&drv, &dev, input.len())?;
|
||||
let mut out_b = ze::DeviceBuffer::<T>::new(&drv, &dev, output.len())?;
|
||||
let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
|
||||
let event_pool = ze::EventPool::new(&drv, 3, Some(&[&dev]))?;
|
||||
let ev0 = ze::Event::new(&event_pool, 0)?;
|
||||
let ev1 = ze::Event::new(&event_pool, 1)?;
|
||||
let ev2 = ze::Event::new(&event_pool, 2)?;
|
||||
let mut cmd_list = ze::CommandList::new(&dev)?;
|
||||
let out_b_ptr_mut: ze::BufferPtrMut<T> = (&mut out_b).into();
|
||||
cmd_list.append_memory_copy(inp_b_ptr_mut, input, None, Some(&ev0))?;
|
||||
cmd_list.append_memory_fill(out_b_ptr_mut, 0u8.into(), Some(&ev1))?;
|
||||
kernel.set_group_size(1, 1, 1)?;
|
||||
kernel.set_arg_buffer(0, inp_b_ptr_mut)?;
|
||||
kernel.set_arg_buffer(1, out_b_ptr_mut)?;
|
||||
cmd_list.append_launch_kernel(&kernel, &[1, 1, 1], Some(&ev2), &[&ev0, &ev1])?;
|
||||
cmd_list.append_memory_copy(result.as_mut_slice(), out_b_ptr_mut, None, Some(&ev2))?;
|
||||
queue.execute(cmd_list)?;
|
||||
{
|
||||
let mut drivers = ze::Driver::get()?;
|
||||
let drv = drivers.drain(0..1).next().unwrap();
|
||||
let mut ctx = ze::Context::new(&drv)?;
|
||||
let mut devices = drv.devices()?;
|
||||
let dev = devices.drain(0..1).next().unwrap();
|
||||
let queue = ze::CommandQueue::new(&mut ctx, &dev)?;
|
||||
let module = ze::Module::new_spirv(&mut ctx, &dev, byte_il, None)?;
|
||||
let mut kernel = ze::Kernel::new_resident(&module, name)?;
|
||||
kernel.set_indirect_access(
|
||||
ze::sys::ze_kernel_indirect_access_flags_t::ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE,
|
||||
)?;
|
||||
let mut inp_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, input.len())?;
|
||||
let mut out_b = ze::DeviceBuffer::<T>::new(&mut ctx, &dev, output.len())?;
|
||||
let inp_b_ptr_mut: ze::BufferPtrMut<T> = (&mut inp_b).into();
|
||||
let event_pool = ze::EventPool::new(&mut ctx, 3, Some(&[&dev]))?;
|
||||
let ev0 = ze::Event::new(&event_pool, 0)?;
|
||||
let ev1 = ze::Event::new(&event_pool, 1)?;
|
||||
let mut ev2 = ze::Event::new(&event_pool, 2)?;
|
||||
let mut cmd_list = ze::CommandList::new(&mut ctx, &dev)?;
|
||||
let out_b_ptr_mut: ze::BufferPtrMut<T> = (&mut out_b).into();
|
||||
let mut init_evs = [ev0, ev1];
|
||||
cmd_list.append_memory_copy(inp_b_ptr_mut, input, Some(&mut init_evs[0]), &mut [])?;
|
||||
cmd_list.append_memory_fill(out_b_ptr_mut, 0, Some(&mut init_evs[1]), &mut [])?;
|
||||
kernel.set_group_size(1, 1, 1)?;
|
||||
kernel.set_arg_buffer(0, inp_b_ptr_mut)?;
|
||||
kernel.set_arg_buffer(1, out_b_ptr_mut)?;
|
||||
cmd_list.append_launch_kernel(&kernel, &[1, 1, 1], Some(&mut ev2), &mut init_evs)?;
|
||||
cmd_list.append_memory_copy(result.as_mut_slice(), out_b_ptr_mut, None, &mut [ev2])?;
|
||||
queue.execute(cmd_list)?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
|
|
|
@ -121,19 +121,10 @@ impl TypeWordMap {
|
|||
})
|
||||
}
|
||||
|
||||
fn get_or_add_extended(
|
||||
&mut self,
|
||||
b: &mut dr::Builder,
|
||||
t: ast::ExtendedScalarType,
|
||||
) -> spirv::Word {
|
||||
let key: SpirvScalarKey = t.into();
|
||||
self.get_or_add_spirv_scalar(b, key)
|
||||
}
|
||||
|
||||
fn get_or_add(&mut self, b: &mut dr::Builder, t: SpirvType) -> spirv::Word {
|
||||
match t {
|
||||
SpirvType::Base(key) => self.get_or_add_spirv_scalar(b, key),
|
||||
SpirvType::Pointer(typ, mut storage) => {
|
||||
SpirvType::Pointer(typ, storage) => {
|
||||
let base = self.get_or_add_spirv_scalar(b, typ);
|
||||
*self
|
||||
.complex
|
||||
|
@ -728,7 +719,7 @@ fn emit_function_body_ops(
|
|||
builder.shift_left_logical(result_type, Some(a.dst), a.src1, a.src2)?;
|
||||
}
|
||||
ast::Instruction::Cvt(dets, arg) => {
|
||||
emit_cvt(builder, map, opencl, dets, arg)?;
|
||||
emit_cvt(builder, map, dets, arg)?;
|
||||
}
|
||||
ast::Instruction::Cvta(_, arg) => {
|
||||
// This would be only meaningful if const/slm/global pointers
|
||||
|
@ -754,7 +745,6 @@ fn emit_function_body_ops(
|
|||
fn emit_cvt(
|
||||
builder: &mut dr::Builder,
|
||||
map: &mut TypeWordMap,
|
||||
opencl: spirv::Word,
|
||||
dets: &ast::CvtDetails,
|
||||
arg: &ast::Arg2<ExpandedArgParams>,
|
||||
) -> Result<(), dr::Error> {
|
||||
|
@ -841,7 +831,6 @@ fn emit_cvt(
|
|||
builder.bitcast(result_type, Some(arg.dst), src)?;
|
||||
}
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -18,6 +18,10 @@ fn main() -> Result<(), VarError> {
|
|||
"cargo:rustc-link-search=native={}/bin",
|
||||
spirv_tools_dir.display()
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-link-search=native={}/lib",
|
||||
spirv_tools_dir.display()
|
||||
);
|
||||
// dynamic linking to avoid linking to C++ runtime
|
||||
println!("cargo:rustc-link-lib=dylib=SPIRV-Tools-shared");
|
||||
Ok(())
|
||||
|
|
Loading…
Add table
Reference in a new issue