mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-05 15:49:24 +00:00
Refactor main library, implement some more functionality
This commit is contained in:
parent
34dc149be1
commit
2e4cadc2ab
18 changed files with 6024 additions and 856 deletions
|
@ -1,7 +1,7 @@
|
||||||
use crate::sys;
|
use crate::sys;
|
||||||
use std::{
|
use std::{
|
||||||
ffi::{c_void, CStr},
|
ffi::{c_void, CStr},
|
||||||
fmt::{Debug, Display},
|
fmt::Debug,
|
||||||
marker::PhantomData,
|
marker::PhantomData,
|
||||||
mem, ptr,
|
mem, ptr,
|
||||||
};
|
};
|
||||||
|
@ -12,7 +12,7 @@ macro_rules! check {
|
||||||
{
|
{
|
||||||
let err = unsafe { $expr };
|
let err = unsafe { $expr };
|
||||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||||
return Result::Err(Error(err));
|
return Result::Err(err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -27,39 +27,24 @@ macro_rules! check_panic {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, sys::ze_result_t>;
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct Error(pub sys::ze_result_t);
|
pub struct Error(pub sys::ze_result_t);
|
||||||
|
|
||||||
impl Error {
|
|
||||||
fn new<T>(res: sys::ze_result_t, default: T) -> Result<T> {
|
|
||||||
if res == sys::ze_result_t::ZE_RESULT_SUCCESS {
|
|
||||||
Ok(default)
|
|
||||||
} else {
|
|
||||||
Err(Self(res))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for Error {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
Debug::fmt(self, f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for Error {}
|
|
||||||
|
|
||||||
pub fn init() -> Result<()> {
|
pub fn init() -> Result<()> {
|
||||||
Error::new(
|
match unsafe { sys::zeInit(sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY) } {
|
||||||
unsafe { sys::zeInit(sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY) },
|
sys::ze_result_t::ZE_RESULT_SUCCESS => Ok(()),
|
||||||
(),
|
e => Err(e),
|
||||||
)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct Driver(sys::ze_driver_handle_t);
|
pub struct Driver(sys::ze_driver_handle_t);
|
||||||
|
|
||||||
|
unsafe impl Send for Driver {}
|
||||||
|
unsafe impl Sync for Driver {}
|
||||||
|
|
||||||
impl Driver {
|
impl Driver {
|
||||||
pub unsafe fn as_ffi(&self) -> sys::ze_driver_handle_t {
|
pub unsafe fn as_ffi(&self) -> sys::ze_driver_handle_t {
|
||||||
self.0
|
self.0
|
||||||
|
@ -184,6 +169,13 @@ impl Context {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Drop for Context {
|
||||||
|
#[allow(unused_must_use)]
|
||||||
|
fn drop(&mut self) {
|
||||||
|
check_panic! { sys::zeContextDestroy(self.0) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct CommandQueue(sys::ze_command_queue_handle_t);
|
pub struct CommandQueue(sys::ze_command_queue_handle_t);
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
name = "nvcuda"
|
name = "notcuda"
|
||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
@ -15,3 +15,7 @@ level_zero-sys = { path = "../level_zero-sys" }
|
||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
num_enum = "0.4"
|
num_enum = "0.4"
|
||||||
lz4 = "1.23"
|
lz4 = "1.23"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
cuda-driver-sys = "0.3.0"
|
||||||
|
paste = "1.0"
|
3
notcuda/README
Normal file
3
notcuda/README
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
bindgen /usr/local/cuda/include/cuda.h -o cuda.rs --whitelist-function="^cu.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug --new-type-alias "^CUdevice$|^CUdeviceptr$"
|
||||||
|
sed -i -e 's/extern "C" {//g' -e 's/-> CUresult;/-> CUresult { impl_::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "C" fn /g' cuda.rs
|
||||||
|
rustfmt cuda.rs
|
|
@ -1,220 +0,0 @@
|
||||||
use num_enum::TryFromPrimitive;
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
use std::os::raw::c_int;
|
|
||||||
use std::{mem, ptr};
|
|
||||||
use crate::ze;
|
|
||||||
|
|
||||||
#[repr(u32)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
pub enum Result {
|
|
||||||
SUCCESS = 0,
|
|
||||||
ERROR_INVALID_VALUE = 1,
|
|
||||||
ERROR_OUT_OF_MEMORY = 2,
|
|
||||||
ERROR_NOT_INITIALIZED = 3,
|
|
||||||
ERROR_DEINITIALIZED = 4,
|
|
||||||
ERROR_PROFILER_DISABLED = 5,
|
|
||||||
ERROR_PROFILER_NOT_INITIALIZED = 6,
|
|
||||||
ERROR_PROFILER_ALREADY_STARTED = 7,
|
|
||||||
ERROR_PROFILER_ALREADY_STOPPED = 8,
|
|
||||||
ERROR_NO_DEVICE = 100,
|
|
||||||
ERROR_INVALID_DEVICE = 101,
|
|
||||||
ERROR_INVALID_IMAGE = 200,
|
|
||||||
ERROR_INVALID_CONTEXT = 201,
|
|
||||||
ERROR_CONTEXT_ALREADY_CURRENT = 202,
|
|
||||||
ERROR_MAP_FAILED = 205,
|
|
||||||
ERROR_UNMAP_FAILED = 206,
|
|
||||||
ERROR_ARRAY_IS_MAPPED = 207,
|
|
||||||
ERROR_ALREADY_MAPPED = 208,
|
|
||||||
ERROR_NO_BINARY_FOR_GPU = 209,
|
|
||||||
ERROR_ALREADY_ACQUIRED = 210,
|
|
||||||
ERROR_NOT_MAPPED = 211,
|
|
||||||
ERROR_NOT_MAPPED_AS_ARRAY = 212,
|
|
||||||
ERROR_NOT_MAPPED_AS_POINTER = 213,
|
|
||||||
ERROR_ECC_UNCORRECTABLE = 214,
|
|
||||||
ERROR_UNSUPPORTED_LIMIT = 215,
|
|
||||||
ERROR_CONTEXT_ALREADY_IN_USE = 216,
|
|
||||||
ERROR_PEER_ACCESS_UNSUPPORTED = 217,
|
|
||||||
ERROR_INVALID_PTX = 218,
|
|
||||||
ERROR_INVALID_GRAPHICS_CONTEXT = 219,
|
|
||||||
ERROR_NVLINK_UNCORRECTABLE = 220,
|
|
||||||
ERROR_JIT_COMPILER_NOT_FOUND = 221,
|
|
||||||
ERROR_INVALID_SOURCE = 300,
|
|
||||||
ERROR_FILE_NOT_FOUND = 301,
|
|
||||||
ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
|
|
||||||
ERROR_SHARED_OBJECT_INIT_FAILED = 303,
|
|
||||||
ERROR_OPERATING_SYSTEM = 304,
|
|
||||||
ERROR_INVALID_HANDLE = 400,
|
|
||||||
ERROR_ILLEGAL_STATE = 401,
|
|
||||||
ERROR_NOT_FOUND = 500,
|
|
||||||
ERROR_NOT_READY = 600,
|
|
||||||
ERROR_ILLEGAL_ADDRESS = 700,
|
|
||||||
ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
|
|
||||||
ERROR_LAUNCH_TIMEOUT = 702,
|
|
||||||
ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
|
|
||||||
ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
|
|
||||||
ERROR_PEER_ACCESS_NOT_ENABLED = 705,
|
|
||||||
ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
|
|
||||||
ERROR_CONTEXT_IS_DESTROYED = 709,
|
|
||||||
ERROR_ASSERT = 710,
|
|
||||||
ERROR_TOO_MANY_PEERS = 711,
|
|
||||||
ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
|
|
||||||
ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
|
|
||||||
ERROR_HARDWARE_STACK_ERROR = 714,
|
|
||||||
ERROR_ILLEGAL_INSTRUCTION = 715,
|
|
||||||
ERROR_MISALIGNED_ADDRESS = 716,
|
|
||||||
ERROR_INVALID_ADDRESS_SPACE = 717,
|
|
||||||
ERROR_INVALID_PC = 718,
|
|
||||||
ERROR_LAUNCH_FAILED = 719,
|
|
||||||
ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
|
|
||||||
ERROR_NOT_PERMITTED = 800,
|
|
||||||
ERROR_NOT_SUPPORTED = 801,
|
|
||||||
ERROR_SYSTEM_NOT_READY = 802,
|
|
||||||
ERROR_SYSTEM_DRIVER_MISMATCH = 803,
|
|
||||||
ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
|
|
||||||
ERROR_STREAM_CAPTURE_UNSUPPORTED = 900,
|
|
||||||
ERROR_STREAM_CAPTURE_INVALIDATED = 901,
|
|
||||||
ERROR_STREAM_CAPTURE_MERGE = 902,
|
|
||||||
ERROR_STREAM_CAPTURE_UNMATCHED = 903,
|
|
||||||
ERROR_STREAM_CAPTURE_UNJOINED = 904,
|
|
||||||
ERROR_STREAM_CAPTURE_ISOLATION = 905,
|
|
||||||
ERROR_STREAM_CAPTURE_IMPLICIT = 906,
|
|
||||||
ERROR_CAPTURED_EVENT = 907,
|
|
||||||
ERROR_STREAM_CAPTURE_WRONG_THREAD = 908,
|
|
||||||
ERROR_TIMEOUT = 909,
|
|
||||||
ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910,
|
|
||||||
ERROR_UNKNOWN = 999,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum DeviceAttribute {
|
|
||||||
Static(DeviceStaticAttribute),
|
|
||||||
Dynamic(DeviceDynamicAttribute)
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DeviceAttribute {
|
|
||||||
pub fn try_new(e: u8) -> Option<DeviceAttribute> {
|
|
||||||
DeviceStaticAttribute::try_from(e).map(DeviceAttribute::Static)
|
|
||||||
.or_else(|_| DeviceGeneralAttribute::try_from(e).map(DeviceDynamicAttribute::General).map(DeviceAttribute::Dynamic))
|
|
||||||
.or_else(|_| DeviceTextureAttribute::try_from(e).map(DeviceDynamicAttribute::Texture).map(DeviceAttribute::Dynamic))
|
|
||||||
.ok()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(u8)]
|
|
||||||
#[derive(TryFromPrimitive)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
pub enum DeviceStaticAttribute {
|
|
||||||
GPU_OVERLAP = 15,
|
|
||||||
KERNEL_EXEC_TIMEOUT = 17,
|
|
||||||
INTEGRATED = 18,
|
|
||||||
COMPUTE_CAPABILITY_MAJOR = 75,
|
|
||||||
COMPUTE_CAPABILITY_MINOR = 76,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum DeviceDynamicAttribute {
|
|
||||||
General(DeviceGeneralAttribute),
|
|
||||||
Texture(DeviceTextureAttribute)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#[repr(u8)]
|
|
||||||
#[derive(TryFromPrimitive)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
pub enum DeviceGeneralAttribute {
|
|
||||||
MULTIPROCESSOR_COUNT = 16,
|
|
||||||
CAN_MAP_HOST_MEMORY = 19,
|
|
||||||
ASYNC_ENGINE_COUNT = 40,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#[repr(u8)]
|
|
||||||
#[derive(TryFromPrimitive)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
pub enum DeviceTextureAttribute {
|
|
||||||
MAXIMUM_TEXTURE1D_WIDTH = 21
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
impl Result {
|
|
||||||
pub fn from_l0(result: l0_sys::ze_result_t) -> Result {
|
|
||||||
match result {
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_SUCCESS => Result::SUCCESS,
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => Result::ERROR_NOT_INITIALIZED,
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => Result::ERROR_INVALID_VALUE,
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => Result::ERROR_INVALID_VALUE,
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => Result::ERROR_OUT_OF_MEMORY,
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => Result::ERROR_NOT_SUPPORTED,
|
|
||||||
_ => Result::ERROR_UNKNOWN
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(C)]
|
|
||||||
#[derive(PartialEq, Eq)]
|
|
||||||
pub struct Uuid {
|
|
||||||
pub x: [std::os::raw::c_uchar; 16]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct Device(pub c_int);
|
|
||||||
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct DevicePtr(pub usize);
|
|
||||||
|
|
||||||
#[repr(transparent)]
|
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
|
||||||
pub struct Context(pub *mut ze::Context);
|
|
||||||
impl Context {
|
|
||||||
pub fn null() -> Context {
|
|
||||||
Context(ptr::null_mut())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(transparent)]
|
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
|
||||||
pub struct Module(*mut ze::Module);
|
|
||||||
|
|
||||||
impl Module {
|
|
||||||
pub fn null() -> Module {
|
|
||||||
Module(ptr::null_mut())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new(inner: ze::Module) -> Module {
|
|
||||||
let mut boxed = Box::new(inner);
|
|
||||||
let result = Module(boxed.as_mut());
|
|
||||||
mem::forget(boxed);
|
|
||||||
result
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct Function(*mut ());
|
|
||||||
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct Stream(*mut ());
|
|
||||||
|
|
||||||
#[repr(i32)]
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
pub enum JitOption {
|
|
||||||
MAX_REGISTERS = 0,
|
|
||||||
THREADS_PER_BLOCK = 1,
|
|
||||||
WALL_TIME = 2,
|
|
||||||
INFO_LOG_BUFFER = 3,
|
|
||||||
INFO_LOG_BUFFER_SIZE_BYTES = 4,
|
|
||||||
ERROR_LOG_BUFFER = 5,
|
|
||||||
ERROR_LOG_BUFFER_SIZE_BYTES = 6,
|
|
||||||
OPTIMIZATION_LEVEL = 7,
|
|
||||||
TARGET_FROM_CUCONTEXT = 8,
|
|
||||||
TARGET = 9,
|
|
||||||
FALLBACK_STRATEGY = 10,
|
|
||||||
GENERATE_DEBUG_INFO = 11,
|
|
||||||
LOG_VERBOSE = 12,
|
|
||||||
GENERATE_LINE_INFO = 13,
|
|
||||||
CACHE_MODE = 14,
|
|
||||||
NEW_SM3X_OPT = 15,
|
|
||||||
FAST_COMPILE = 16,
|
|
||||||
GLOBAL_SYMBOL_NAMES = 17,
|
|
||||||
GLOBAL_SYMBOL_ADDRESSES = 18,
|
|
||||||
GLOBAL_SYMBOL_COUNT = 19,
|
|
||||||
NUM_OPTIONS = 20,
|
|
||||||
}
|
|
4751
notcuda/src/cuda.rs
Normal file
4751
notcuda/src/cuda.rs
Normal file
File diff suppressed because it is too large
Load diff
339
notcuda/src/impl/context.rs
Normal file
339
notcuda/src/impl/context.rs
Normal file
|
@ -0,0 +1,339 @@
|
||||||
|
use super::CUresult;
|
||||||
|
use super::{device, HasLivenessCookie, LiveCheck};
|
||||||
|
use crate::{cuda::CUcontext, cuda_impl};
|
||||||
|
use l0::sys::ze_result_t;
|
||||||
|
use std::mem::{self, ManuallyDrop};
|
||||||
|
use std::{
|
||||||
|
cell::RefCell,
|
||||||
|
num::NonZeroU32,
|
||||||
|
os::raw::c_uint,
|
||||||
|
ptr,
|
||||||
|
sync::{atomic::AtomicU32, Mutex},
|
||||||
|
};
|
||||||
|
|
||||||
|
thread_local! {
|
||||||
|
pub static CONTEXT_STACK: RefCell<Vec<*const Context>> = RefCell::new(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Context = LiveCheck<ContextData>;
|
||||||
|
|
||||||
|
impl HasLivenessCookie for ContextData {
|
||||||
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
const COOKIE: usize = 0x5f0119560b643ffb;
|
||||||
|
|
||||||
|
#[cfg(target_pointer_width = "32")]
|
||||||
|
const COOKIE: usize = 0x0b643ffb;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ContextRefCount {
|
||||||
|
Primary,
|
||||||
|
NonPrimary(NonZeroU32),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ContextRefCount {
|
||||||
|
fn new(is_primary: bool) -> Self {
|
||||||
|
if is_primary {
|
||||||
|
ContextRefCount::Primary
|
||||||
|
} else {
|
||||||
|
ContextRefCount::NonPrimary(unsafe { NonZeroU32::new_unchecked(1) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn incr(&mut self) -> Result<(), CUresult> {
|
||||||
|
match self {
|
||||||
|
ContextRefCount::Primary => Ok(()),
|
||||||
|
ContextRefCount::NonPrimary(c) => {
|
||||||
|
let (new_count, overflow) = c.get().overflowing_add(1);
|
||||||
|
if overflow {
|
||||||
|
Err(CUresult::CUDA_ERROR_INVALID_VALUE)
|
||||||
|
} else {
|
||||||
|
*c = unsafe { NonZeroU32::new_unchecked(new_count) };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
fn decr(&mut self) -> bool {
|
||||||
|
match self {
|
||||||
|
ContextRefCount::Primary => false,
|
||||||
|
ContextRefCount::NonPrimary(c) => {
|
||||||
|
if c.get() == 1 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
*c = unsafe { NonZeroU32::new_unchecked(c.get() - 1) };
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_primary(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
ContextRefCount::Primary => true,
|
||||||
|
ContextRefCount::NonPrimary(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ContextData {
|
||||||
|
pub flags: AtomicU32,
|
||||||
|
pub device_index: device::Index,
|
||||||
|
// This pointer is null only for a moment when constructing primary context
|
||||||
|
pub device: *const Mutex<device::Device>,
|
||||||
|
// The split between mutable / non-mutable is mainly to avoid recursive locking in cuDevicePrimaryCtxGetState
|
||||||
|
pub mutable: Mutex<ContextDataMutable>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ContextDataMutable {
|
||||||
|
ref_count: ContextRefCount,
|
||||||
|
pub cuda_manager: *mut cuda_impl::rt::ContextStateManager,
|
||||||
|
pub cuda_state: *mut cuda_impl::rt::ContextState,
|
||||||
|
pub cuda_dtor_cb: Option<
|
||||||
|
extern "C" fn(
|
||||||
|
CUcontext,
|
||||||
|
*mut cuda_impl::rt::ContextStateManager,
|
||||||
|
*mut cuda_impl::rt::ContextState,
|
||||||
|
),
|
||||||
|
>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ContextData {
|
||||||
|
pub fn new(
|
||||||
|
flags: c_uint,
|
||||||
|
is_primary: bool,
|
||||||
|
dev_index: device::Index,
|
||||||
|
dev: *const Mutex<device::Device>,
|
||||||
|
) -> Self {
|
||||||
|
ContextData {
|
||||||
|
flags: AtomicU32::new(flags),
|
||||||
|
device_index: dev_index,
|
||||||
|
device: dev,
|
||||||
|
mutable: Mutex::new(ContextDataMutable {
|
||||||
|
ref_count: ContextRefCount::new(is_primary),
|
||||||
|
cuda_manager: ptr::null_mut(),
|
||||||
|
cuda_state: ptr::null_mut(),
|
||||||
|
cuda_dtor_cb: None,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_v2(pctx: *mut *mut Context, flags: u32, dev_idx: device::Index) -> CUresult {
|
||||||
|
if pctx == ptr::null_mut() {
|
||||||
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
let dev = device::get_device_ref(dev_idx);
|
||||||
|
let dev = match dev {
|
||||||
|
Ok(d) => d,
|
||||||
|
Err(e) => return e,
|
||||||
|
};
|
||||||
|
let mut ctx = Box::new(LiveCheck::new(ContextData::new(flags, false, dev_idx, dev)));
|
||||||
|
let ctx_ref = ctx.as_mut() as *mut Context;
|
||||||
|
unsafe { *pctx = ctx_ref };
|
||||||
|
mem::forget(ctx);
|
||||||
|
CONTEXT_STACK.with(|stack| stack.borrow_mut().push(ctx_ref));
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn destroy_v2(ctx: *mut Context) -> CUresult {
|
||||||
|
if ctx == ptr::null_mut() {
|
||||||
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
CONTEXT_STACK.with(|stack| {
|
||||||
|
let mut stack = stack.borrow_mut();
|
||||||
|
let should_pop = match stack.last() {
|
||||||
|
Some(active_ctx) => *active_ctx == (ctx as *const _),
|
||||||
|
None => false,
|
||||||
|
};
|
||||||
|
if should_pop {
|
||||||
|
stack.pop();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let mut ctx_box = ManuallyDrop::new(unsafe { Box::from_raw(ctx) });
|
||||||
|
if !ctx_box.try_drop() {
|
||||||
|
CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
} else {
|
||||||
|
unsafe { ManuallyDrop::drop(&mut ctx_box) };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pop_current_v2(pctx: *mut *mut Context) -> CUresult {
|
||||||
|
if pctx == ptr::null_mut() {
|
||||||
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
let mut ctx = CONTEXT_STACK.with(|stack| stack.borrow_mut().pop());
|
||||||
|
let ctx_ptr = match &mut ctx {
|
||||||
|
Some(ctx) => *ctx as *mut _,
|
||||||
|
None => return CUresult::CUDA_ERROR_INVALID_CONTEXT,
|
||||||
|
};
|
||||||
|
unsafe { *pctx = ctx_ptr };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_current<F: FnOnce(&ContextData) -> R, R>(f: F) -> Result<R, CUresult> {
|
||||||
|
CONTEXT_STACK.with(|stack| {
|
||||||
|
stack
|
||||||
|
.borrow()
|
||||||
|
.last()
|
||||||
|
.and_then(|c| unsafe { &**c }.as_ref())
|
||||||
|
.ok_or(CUresult::CUDA_ERROR_INVALID_CONTEXT)
|
||||||
|
.map(f)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_current(pctx: *mut *mut Context) -> l0::Result<()> {
|
||||||
|
if pctx == ptr::null_mut() {
|
||||||
|
return Err(ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT);
|
||||||
|
}
|
||||||
|
let ctx = CONTEXT_STACK.with(|stack| match stack.borrow().last() {
|
||||||
|
Some(ctx) => *ctx as *mut _,
|
||||||
|
None => ptr::null_mut(),
|
||||||
|
});
|
||||||
|
unsafe { *pctx = ctx };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_current(ctx: *mut Context) -> CUresult {
|
||||||
|
if ctx == ptr::null_mut() {
|
||||||
|
CONTEXT_STACK.with(|stack| stack.borrow_mut().pop());
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
} else {
|
||||||
|
CONTEXT_STACK.with(|stack| stack.borrow_mut().push(ctx));
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_api_version(ctx: *mut Context, version: *mut u32) -> CUresult {
|
||||||
|
let _ctx = match unsafe { ctx.as_mut() } {
|
||||||
|
None => return CUresult::CUDA_ERROR_INVALID_VALUE,
|
||||||
|
Some(ctx) => match ctx.as_mut() {
|
||||||
|
None => return CUresult::CUDA_ERROR_INVALID_CONTEXT,
|
||||||
|
Some(ctx) => ctx,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
//TODO: query device for properties roughly matching CUDA API version
|
||||||
|
unsafe { *version = 1100 };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_device(dev: *mut device::Index) -> CUresult {
|
||||||
|
let dev_idx = with_current(|ctx| ctx.device_index);
|
||||||
|
match dev_idx {
|
||||||
|
Ok(idx) => {
|
||||||
|
unsafe { *dev = idx }
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
Err(err) => err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn is_context_stack_empty() -> bool {
|
||||||
|
CONTEXT_STACK.with(|stack| stack.borrow().is_empty())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::super::test::CudaDriverFns;
|
||||||
|
use super::super::CUresult;
|
||||||
|
use std::{ffi::c_void, ptr};
|
||||||
|
|
||||||
|
cuda_driver_test!(destroy_leaves_zombie_context);
|
||||||
|
|
||||||
|
fn destroy_leaves_zombie_context<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx1 = ptr::null_mut();
|
||||||
|
let mut ctx2 = ptr::null_mut();
|
||||||
|
let mut ctx3 = ptr::null_mut();
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx3, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut popped_ctx1 = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut popped_ctx1),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(popped_ctx1, ctx3);
|
||||||
|
let mut popped_ctx2 = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut popped_ctx2),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(popped_ctx2, ctx2);
|
||||||
|
let mut popped_ctx3 = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut popped_ctx3),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(popped_ctx3, ctx1);
|
||||||
|
let mut temp = 0;
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxGetApiVersion(ctx2, &mut temp),
|
||||||
|
CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut ptr::null_mut()),
|
||||||
|
CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_driver_test!(empty_pop_fails);
|
||||||
|
|
||||||
|
fn empty_pop_fails<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut ctx),
|
||||||
|
CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_driver_test!(destroy_pops_top_of_stack);
|
||||||
|
|
||||||
|
fn destroy_pops_top_of_stack<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx1 = ptr::null_mut();
|
||||||
|
let mut ctx2 = ptr::null_mut();
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx1, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx2, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(T::cuCtxDestroy_v2(ctx2), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut popped_ctx1 = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut popped_ctx1),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(popped_ctx1, ctx1);
|
||||||
|
let mut popped_ctx2 = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuCtxPopCurrent_v2(&mut popped_ctx2),
|
||||||
|
CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_driver_test!(double_destroy_fails);
|
||||||
|
|
||||||
|
fn double_destroy_fails<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx = ptr::null_mut();
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS);
|
||||||
|
let destroy_result = T::cuCtxDestroy_v2(ctx);
|
||||||
|
// original CUDA impl returns randomly one or the other
|
||||||
|
assert!(
|
||||||
|
destroy_result == CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
|| destroy_result == CUresult::CUDA_ERROR_CONTEXT_IS_DESTROYED
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_driver_test!(no_current_on_init);
|
||||||
|
|
||||||
|
fn no_current_on_init<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx = 1 as *mut c_void;
|
||||||
|
assert_eq!(T::cuCtxGetCurrent(&mut ctx), CUresult::CUDA_SUCCESS);
|
||||||
|
assert_eq!(ctx, ptr::null_mut());
|
||||||
|
}
|
||||||
|
}
|
330
notcuda/src/impl/device.rs
Normal file
330
notcuda/src/impl/device.rs
Normal file
|
@ -0,0 +1,330 @@
|
||||||
|
use super::{context, CUresult, Error};
|
||||||
|
use crate::cuda;
|
||||||
|
use cuda::{CUdevice_attribute, CUuuid_st};
|
||||||
|
use std::{
|
||||||
|
cmp, mem,
|
||||||
|
os::raw::{c_char, c_int},
|
||||||
|
ptr,
|
||||||
|
sync::{
|
||||||
|
atomic::{AtomicU32, Ordering},
|
||||||
|
Mutex, MutexGuard,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static mut DEVICES: Option<Vec<Mutex<Device>>> = None;
|
||||||
|
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct Index(pub c_int);
|
||||||
|
|
||||||
|
pub struct Device {
|
||||||
|
pub base: l0::Device,
|
||||||
|
pub default_queue: l0::CommandQueue,
|
||||||
|
pub l0_context: l0::Context,
|
||||||
|
pub primary_context: context::Context,
|
||||||
|
properties: Option<Box<l0::sys::ze_device_properties_t>>,
|
||||||
|
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
|
||||||
|
memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for Device {}
|
||||||
|
|
||||||
|
impl Device {
|
||||||
|
// Unsafe because it does not fully initalize primary_context
|
||||||
|
unsafe fn new(drv: &l0::Driver, d: l0::Device, idx: usize) -> l0::Result<Self> {
|
||||||
|
let mut ctx = l0::Context::new(drv)?;
|
||||||
|
let queue = l0::CommandQueue::new(&mut ctx, &d)?;
|
||||||
|
let primary_context = context::Context::new(context::ContextData::new(
|
||||||
|
0,
|
||||||
|
true,
|
||||||
|
Index(idx as c_int),
|
||||||
|
ptr::null(),
|
||||||
|
));
|
||||||
|
Ok(Self {
|
||||||
|
base: d,
|
||||||
|
default_queue: queue,
|
||||||
|
l0_context: ctx,
|
||||||
|
primary_context: primary_context,
|
||||||
|
properties: None,
|
||||||
|
image_properties: None,
|
||||||
|
memory_properties: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> {
|
||||||
|
if let Some(ref prop) = self.properties {
|
||||||
|
return Ok(prop);
|
||||||
|
}
|
||||||
|
match self.base.get_properties() {
|
||||||
|
Ok(prop) => Ok(self.properties.get_or_insert(prop)),
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_image_properties(&mut self) -> l0::Result<&l0::sys::ze_device_image_properties_t> {
|
||||||
|
if let Some(ref prop) = self.image_properties {
|
||||||
|
return Ok(prop);
|
||||||
|
}
|
||||||
|
match self.base.get_image_properties() {
|
||||||
|
Ok(prop) => Ok(self.image_properties.get_or_insert(prop)),
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_memory_properties(&mut self) -> l0::Result<&[l0::sys::ze_device_memory_properties_t]> {
|
||||||
|
if let Some(ref prop) = self.memory_properties {
|
||||||
|
return Ok(prop);
|
||||||
|
}
|
||||||
|
match self.base.get_memory_properties() {
|
||||||
|
Ok(prop) => Ok(self.memory_properties.get_or_insert(prop)),
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init(driver: &l0::Driver) -> l0::Result<()> {
|
||||||
|
let ze_devices = driver.devices()?;
|
||||||
|
let mut devices = ze_devices
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(idx, d)| unsafe { Device::new(driver, d, idx) }.map(Mutex::new))
|
||||||
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
for d in devices.iter_mut() {
|
||||||
|
d.get_mut()
|
||||||
|
.unwrap()
|
||||||
|
.primary_context
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.device = d;
|
||||||
|
}
|
||||||
|
unsafe { DEVICES = Some(devices) };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn devices() -> Result<&'static Vec<Mutex<Device>>, CUresult> {
|
||||||
|
match unsafe { &DEVICES } {
|
||||||
|
Some(devs) => Ok(devs),
|
||||||
|
None => Err(CUresult::CUDA_ERROR_NOT_INITIALIZED),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_device_ref(Index(dev_idx): Index) -> Result<&'static Mutex<Device>, CUresult> {
|
||||||
|
let devs = devices()?;
|
||||||
|
if dev_idx < 0 || dev_idx >= devs.len() as c_int {
|
||||||
|
return Err(CUresult::CUDA_ERROR_INVALID_DEVICE);
|
||||||
|
}
|
||||||
|
Ok(&devs[dev_idx as usize])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_device(dev_idx: Index) -> Result<MutexGuard<'static, Device>, CUresult> {
|
||||||
|
let dev = get_device_ref(dev_idx)?;
|
||||||
|
dev.lock().map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_count(count: *mut c_int) -> CUresult {
|
||||||
|
let len = devices().map(|d| d.len());
|
||||||
|
match len {
|
||||||
|
Ok(len) => {
|
||||||
|
unsafe { *count = len as c_int };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
Err(e) => e,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(device: *mut Index, ordinal: c_int) -> CUresult {
|
||||||
|
if device == ptr::null_mut() || ordinal < 0 {
|
||||||
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
let len = devices().map(|d| d.len());
|
||||||
|
match len {
|
||||||
|
Ok(len) if ordinal < (len as i32) => {
|
||||||
|
unsafe { *device = Index(ordinal) };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
Ok(_) => CUresult::CUDA_ERROR_INVALID_VALUE,
|
||||||
|
Err(e) => e,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_name(name: *mut c_char, len: i32, dev: Index) -> Result<(), CUresult> {
|
||||||
|
if name == ptr::null_mut() || len < 0 {
|
||||||
|
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||||
|
}
|
||||||
|
// This is safe because devices are 'static
|
||||||
|
let name_ptr = {
|
||||||
|
let mut dev = get_device(dev)?;
|
||||||
|
let props = dev.get_properties().map_err(Into::<CUresult>::into)?;
|
||||||
|
props.name.as_ptr()
|
||||||
|
};
|
||||||
|
let name_len = (0..256)
|
||||||
|
.position(|i| unsafe { *name_ptr.add(i) } == 0)
|
||||||
|
.unwrap_or(256);
|
||||||
|
let dst_null_pos = cmp::min((len - 1) as usize, name_len);
|
||||||
|
unsafe { std::ptr::copy_nonoverlapping(name_ptr, name, dst_null_pos) };
|
||||||
|
unsafe { *(name.add(dst_null_pos)) = 0 };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
|
||||||
|
mem::transmute(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn total_mem_v2(bytes: *mut usize, dev: Index) -> Result<(), CUresult> {
|
||||||
|
if bytes == ptr::null_mut() {
|
||||||
|
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||||
|
}
|
||||||
|
// This is safe because devices are 'static
|
||||||
|
let mem_props = {
|
||||||
|
let mut dev = get_device(dev)?;
|
||||||
|
unsafe {
|
||||||
|
transmute_lifetime(
|
||||||
|
dev.get_memory_properties()
|
||||||
|
.map_err(Into::<CUresult>::into)?,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let max_mem = mem_props
|
||||||
|
.iter()
|
||||||
|
.map(|p| p.totalSize)
|
||||||
|
.max()
|
||||||
|
.ok_or(CUresult::CUDA_ERROR_ILLEGAL_STATE)?;
|
||||||
|
unsafe { *bytes = max_mem as usize };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CUdevice_attribute {
|
||||||
|
fn get_static_value(self) -> Option<i32> {
|
||||||
|
match self {
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP => Some(1),
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT => Some(1),
|
||||||
|
// TODO: fix this for DG1
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_INTEGRATED => Some(1),
|
||||||
|
// TODO: go back to this once we have more funcitonality implemented
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => Some(8),
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => Some(0),
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY => Some(1),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev: Index) -> Result<(), Error> {
|
||||||
|
if pi == ptr::null_mut() {
|
||||||
|
return Err(Error::Cuda(CUresult::CUDA_ERROR_INVALID_VALUE));
|
||||||
|
}
|
||||||
|
if let Some(value) = attrib.get_static_value() {
|
||||||
|
unsafe { *pi = value };
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let mut dev = get_device(dev).map_err(Error::Cuda)?;
|
||||||
|
let value = match attrib {
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => {
|
||||||
|
dev.get_properties().map_err(Error::L0)?.maxHardwareContexts as i32
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT => {
|
||||||
|
let props = dev.get_properties().map_err(Error::L0)?;
|
||||||
|
(props.numSlices * props.numSubslicesPerSlice * props.numEUsPerSubslice) as i32
|
||||||
|
}
|
||||||
|
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH => cmp::min(
|
||||||
|
dev.get_image_properties()
|
||||||
|
.map_err(Error::L0)?
|
||||||
|
.maxImageDims1D,
|
||||||
|
c_int::max_value() as u32,
|
||||||
|
) as c_int,
|
||||||
|
_ => {
|
||||||
|
// TODO: support more attributes for CUDA runtime
|
||||||
|
/*
|
||||||
|
return Err(l0::Error(
|
||||||
|
l0::sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE,
|
||||||
|
))
|
||||||
|
*/
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
unsafe { *pi = value };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_uuid(uuid: *mut CUuuid_st, dev: Index) -> Result<(), Error> {
|
||||||
|
let ze_uuid = {
|
||||||
|
get_device(dev)
|
||||||
|
.map_err(Error::Cuda)?
|
||||||
|
.get_properties()
|
||||||
|
.map_err(Error::L0)?
|
||||||
|
.uuid
|
||||||
|
};
|
||||||
|
unsafe {
|
||||||
|
*uuid = CUuuid_st {
|
||||||
|
bytes: mem::transmute(ze_uuid.id),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_current_exclusive<F: FnOnce(&mut Device) -> R, R>(f: F) -> Result<R, CUresult> {
|
||||||
|
let dev = super::context::with_current(|ctx| ctx.device);
|
||||||
|
dev.and_then(|dev| {
|
||||||
|
unsafe { &*dev }
|
||||||
|
.try_lock()
|
||||||
|
.map(|mut dev| f(&mut dev))
|
||||||
|
.map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_exclusive<F: FnOnce(&mut Device) -> R, R>(dev: Index, f: F) -> Result<R, CUresult> {
|
||||||
|
let dev = get_device_ref(dev)?;
|
||||||
|
dev.try_lock()
|
||||||
|
.map(|mut dev| f(&mut dev))
|
||||||
|
.map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn primary_ctx_get_state(
|
||||||
|
idx: Index,
|
||||||
|
flags: *mut u32,
|
||||||
|
active: *mut i32,
|
||||||
|
) -> Result<(), CUresult> {
|
||||||
|
let (ctx_ptr, flags_ptr) = with_exclusive(idx, |dev| {
|
||||||
|
// This is safe because primary context can't be dropped
|
||||||
|
let ctx_ptr = &dev.primary_context as *const _;
|
||||||
|
let flags_ptr =
|
||||||
|
(&unsafe { dev.primary_context.as_ref_unchecked() }.flags) as *const AtomicU32;
|
||||||
|
(ctx_ptr, flags_ptr)
|
||||||
|
})?;
|
||||||
|
let is_active = context::CONTEXT_STACK
|
||||||
|
.with(|stack| stack.borrow().last().map(|x| *x))
|
||||||
|
.map(|current| current == ctx_ptr)
|
||||||
|
.unwrap_or(false);
|
||||||
|
let flags_value = unsafe { &*flags_ptr }.load(Ordering::Relaxed);
|
||||||
|
unsafe { *flags = flags_value };
|
||||||
|
unsafe { *active = if is_active { 1 } else { 0 } };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn primary_ctx_retain(pctx: *mut *mut context::Context, dev: Index) -> Result<(), CUresult> {
|
||||||
|
let ctx_ptr = with_exclusive(dev, |dev| &mut dev.primary_context as *mut _)?;
|
||||||
|
unsafe { *pctx = ctx_ptr };
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::super::test::CudaDriverFns;
|
||||||
|
use super::super::CUresult;
|
||||||
|
use crate::cuda::CUuuid;
|
||||||
|
use std::{ffi::c_void, mem, ptr};
|
||||||
|
|
||||||
|
cuda_driver_test!(primary_ctx_default_inactive);
|
||||||
|
|
||||||
|
fn primary_ctx_default_inactive<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut flags = u32::max_value();
|
||||||
|
let mut active = i32::max_value();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuDevicePrimaryCtxGetState(0, &mut flags, &mut active),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(flags, 0);
|
||||||
|
assert_eq!(active, 0);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,36 +1,42 @@
|
||||||
use crate::cu;
|
use crate::cuda::CUresult;
|
||||||
use crate::{cuda, ze};
|
use crate::{
|
||||||
|
cuda::{CUcontext, CUdevice, CUmodule, CUuuid},
|
||||||
|
cuda_impl,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{context, device, Decuda, Encuda};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::os::raw::{c_int, c_uint, c_ulong, c_ushort};
|
use std::os::raw::{c_uint, c_ulong, c_ushort};
|
||||||
use std::{ffi::c_void, ptr, slice};
|
use std::{ffi::c_void, ptr, slice};
|
||||||
|
|
||||||
#[no_mangle]
|
pub fn get(table: *mut *const std::os::raw::c_void, id: *const CUuuid) -> CUresult {
|
||||||
pub unsafe extern "C" fn cuGetExportTable(
|
|
||||||
table: *mut *const std::os::raw::c_void,
|
|
||||||
id: *const cu::Uuid,
|
|
||||||
) -> cu::Result {
|
|
||||||
if table == ptr::null_mut() || id == ptr::null_mut() {
|
if table == ptr::null_mut() || id == ptr::null_mut() {
|
||||||
cu::Result::ERROR_INVALID_VALUE
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
} else if *id == TOOLS_RUNTIME_CALLBACK_HOOKS_GUID {
|
}
|
||||||
*table = TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE.as_ptr() as *const _;
|
let id = unsafe { *id };
|
||||||
cu::Result::SUCCESS
|
match id {
|
||||||
} else if *id == CUDART_INTERFACE_GUID {
|
TOOLS_RUNTIME_CALLBACK_HOOKS_GUID => {
|
||||||
*table = CUDART_INTERFACE_VTABLE.as_ptr() as *const _;
|
unsafe { *table = TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE.as_ptr() as *const _ };
|
||||||
cu::Result::SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
} else if *id == TOOLS_TLS_GUID {
|
}
|
||||||
*table = 1 as _;
|
CUDART_INTERFACE_GUID => {
|
||||||
cu::Result::SUCCESS
|
unsafe { *table = CUDART_INTERFACE_VTABLE.as_ptr() as *const _ };
|
||||||
} else if *id == CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID {
|
CUresult::CUDA_SUCCESS
|
||||||
*table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _;
|
}
|
||||||
cu::Result::SUCCESS
|
TOOLS_TLS_GUID => {
|
||||||
} else {
|
unsafe { *table = 1 as _ };
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID => {
|
||||||
|
unsafe { *table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _ };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
_ => CUresult::CUDA_ERROR_NOT_SUPPORTED,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: cu::Uuid = cu::Uuid {
|
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
|
||||||
x: [
|
bytes: [
|
||||||
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
|
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
|
||||||
0x66,
|
0x66,
|
||||||
],
|
],
|
||||||
|
@ -73,8 +79,8 @@ unsafe extern "C" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut us
|
||||||
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
|
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
|
||||||
}
|
}
|
||||||
|
|
||||||
const CUDART_INTERFACE_GUID: cu::Uuid = cu::Uuid {
|
const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
|
||||||
x: [
|
bytes: [
|
||||||
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
|
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
|
||||||
0xf9,
|
0xf9,
|
||||||
],
|
],
|
||||||
|
@ -102,8 +108,17 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
|
||||||
VTableEntry { ptr: ptr::null() },
|
VTableEntry { ptr: ptr::null() },
|
||||||
];
|
];
|
||||||
|
|
||||||
unsafe extern "C" fn cudart_interface_fn1(_: *mut c_ulong, _: c_int) -> c_int {
|
unsafe extern "C" fn cudart_interface_fn1(pctx: *mut CUcontext, dev: CUdevice) -> CUresult {
|
||||||
0
|
cudart_interface_fn1_impl(pctx.decuda(), dev.decuda()).encuda()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cudart_interface_fn1_impl(
|
||||||
|
pctx: *mut *mut context::Context,
|
||||||
|
dev: device::Index,
|
||||||
|
) -> Result<(), CUresult> {
|
||||||
|
let ctx_ptr = device::with_exclusive(dev, |d| &mut d.primary_context as *mut context::Context)?;
|
||||||
|
unsafe { *pctx = ctx_ptr };
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -178,17 +193,20 @@ struct FatbinFileHeader {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "C" fn get_module_from_cubin(
|
unsafe extern "C" fn get_module_from_cubin(
|
||||||
result: *mut cu::Module,
|
result: *mut CUmodule,
|
||||||
fatbinc_wrapper: *const FatbincWrapper,
|
fatbinc_wrapper: *const FatbincWrapper,
|
||||||
_: *mut c_void,
|
_: *mut c_void,
|
||||||
_: *mut c_void,
|
_: *mut c_void,
|
||||||
) -> cu::Result {
|
) -> CUresult {
|
||||||
if result == ptr::null_mut() || (*fatbinc_wrapper).magic != FATBINC_MAGIC || (*fatbinc_wrapper).version != FATBINC_VERSION {
|
if result == ptr::null_mut()
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
|
||||||
|
|| (*fatbinc_wrapper).version != FATBINC_VERSION
|
||||||
|
{
|
||||||
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
}
|
}
|
||||||
let fatbin_header = (*fatbinc_wrapper).data;
|
let fatbin_header = (*fatbinc_wrapper).data;
|
||||||
if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
|
if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
||||||
}
|
}
|
||||||
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
|
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
|
||||||
let end = file.add((*fatbin_header).files_size as usize);
|
let end = file.add((*fatbin_header).files_size as usize);
|
||||||
|
@ -201,13 +219,9 @@ unsafe extern "C" fn get_module_from_cubin(
|
||||||
);
|
);
|
||||||
let kernel_text =
|
let kernel_text =
|
||||||
lz4::block::decompress(slice, Some((*file).uncompressed_payload as i32)).unwrap();
|
lz4::block::decompress(slice, Some((*file).uncompressed_payload as i32)).unwrap();
|
||||||
let module = ze::Module {
|
return CUresult::CUDA_SUCCESS;
|
||||||
ptx_text: kernel_text,
|
|
||||||
};
|
|
||||||
*result = cu::Module::new(module);
|
|
||||||
return cu::Result::SUCCESS
|
|
||||||
}
|
}
|
||||||
cu::Result::ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
|
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
|
||||||
|
@ -227,15 +241,15 @@ unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFil
|
||||||
|
|
||||||
unsafe extern "C" fn cudart_interface_fn6(_: u64) {}
|
unsafe extern "C" fn cudart_interface_fn6(_: u64) {}
|
||||||
|
|
||||||
const TOOLS_TLS_GUID: cu::Uuid = cu::Uuid {
|
const TOOLS_TLS_GUID: CUuuid = CUuuid {
|
||||||
x: [
|
bytes: [
|
||||||
0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec,
|
0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec,
|
||||||
0xdc,
|
0xdc,
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: cu::Uuid = cu::Uuid {
|
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
|
||||||
x: [
|
bytes: [
|
||||||
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
|
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
|
||||||
0x93,
|
0x93,
|
||||||
],
|
],
|
||||||
|
@ -257,23 +271,50 @@ static CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE: [VTableEntry; 4] = [
|
||||||
|
|
||||||
// some kind of ctor
|
// some kind of ctor
|
||||||
unsafe extern "C" fn context_local_storage_ctor(
|
unsafe extern "C" fn context_local_storage_ctor(
|
||||||
cu_ctx: cu::Context, // always zero
|
cu_ctx: CUcontext, // always zero
|
||||||
mgr: *mut cuda::rt::ContextStateManager,
|
mgr: *mut cuda_impl::rt::ContextStateManager,
|
||||||
ctx_state: *mut cuda::rt::ContextState,
|
ctx_state: *mut cuda_impl::rt::ContextState,
|
||||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||||
dtor_cb: extern "C" fn(
|
dtor_cb: Option<
|
||||||
cu::Context,
|
extern "C" fn(
|
||||||
*mut cuda::rt::ContextStateManager,
|
CUcontext,
|
||||||
*mut cuda::rt::ContextState,
|
*mut cuda_impl::rt::ContextStateManager,
|
||||||
|
*mut cuda_impl::rt::ContextState,
|
||||||
),
|
),
|
||||||
) -> cu::Result {
|
>,
|
||||||
if cu_ctx.0 == ptr::null_mut() {
|
) -> CUresult {
|
||||||
return cu::Result::ERROR_NOT_SUPPORTED;
|
context_local_storage_ctor_impl(cu_ctx.decuda(), mgr, ctx_state, dtor_cb).encuda()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn context_local_storage_ctor_impl(
|
||||||
|
cu_ctx: *mut context::Context,
|
||||||
|
mgr: *mut cuda_impl::rt::ContextStateManager,
|
||||||
|
ctx_state: *mut cuda_impl::rt::ContextState,
|
||||||
|
dtor_cb: Option<
|
||||||
|
extern "C" fn(
|
||||||
|
CUcontext,
|
||||||
|
*mut cuda_impl::rt::ContextStateManager,
|
||||||
|
*mut cuda_impl::rt::ContextState,
|
||||||
|
),
|
||||||
|
>,
|
||||||
|
) -> Result<(), CUresult> {
|
||||||
|
if cu_ctx == ptr::null_mut() {
|
||||||
|
return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED);
|
||||||
}
|
}
|
||||||
(*cu_ctx.0).cuda_manager = mgr;
|
unsafe { &*cu_ctx }
|
||||||
(*cu_ctx.0).cuda_state = ctx_state;
|
.as_ref()
|
||||||
(*cu_ctx.0).cuda_dtor_cb = dtor_cb;
|
.ok_or(CUresult::CUDA_ERROR_INVALID_CONTEXT)
|
||||||
cu::Result::SUCCESS
|
.and_then(|ctx| {
|
||||||
|
ctx.mutable
|
||||||
|
.try_lock()
|
||||||
|
.map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)
|
||||||
|
.map(|mut mutable| {
|
||||||
|
mutable.cuda_manager = mgr;
|
||||||
|
mutable.cuda_state = ctx_state;
|
||||||
|
mutable.cuda_dtor_cb = dtor_cb;
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// some kind of dtor
|
// some kind of dtor
|
||||||
|
@ -282,13 +323,30 @@ unsafe extern "C" fn context_local_storage_dtor(_: *mut usize, _: *mut ()) -> u3
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "C" fn context_local_storage_get_state(
|
unsafe extern "C" fn context_local_storage_get_state(
|
||||||
ctx_state: *mut *mut cuda::rt::ContextState,
|
ctx_state: *mut *mut cuda_impl::rt::ContextState,
|
||||||
cu_ctx: cu::Context,
|
cu_ctx: CUcontext,
|
||||||
_: *mut cuda::rt::ContextStateManager,
|
state_mgr: *mut cuda_impl::rt::ContextStateManager,
|
||||||
) -> cu::Result {
|
) -> CUresult {
|
||||||
if cu_ctx == cu::Context::null() {
|
context_local_storage_get_state_impl(ctx_state, cu_ctx.decuda(), state_mgr).encuda()
|
||||||
return cu::Result::ERROR_INVALID_CONTEXT;
|
}
|
||||||
}
|
|
||||||
*ctx_state = (*cu_ctx.0).cuda_state;
|
fn context_local_storage_get_state_impl(
|
||||||
cu::Result::SUCCESS
|
ctx_state: *mut *mut cuda_impl::rt::ContextState,
|
||||||
|
cu_ctx: *mut context::Context,
|
||||||
|
_: *mut cuda_impl::rt::ContextStateManager,
|
||||||
|
) -> Result<(), CUresult> {
|
||||||
|
if cu_ctx == ptr::null_mut() {
|
||||||
|
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||||
|
}
|
||||||
|
let cuda_state = unsafe { &*cu_ctx }
|
||||||
|
.as_ref()
|
||||||
|
.ok_or(CUresult::CUDA_ERROR_INVALID_CONTEXT)
|
||||||
|
.and_then(|ctx| {
|
||||||
|
ctx.mutable
|
||||||
|
.try_lock()
|
||||||
|
.map_err(|_| CUresult::CUDA_ERROR_ILLEGAL_STATE)
|
||||||
|
.map(|mutable| mutable.cuda_state)
|
||||||
|
})?;
|
||||||
|
unsafe { *ctx_state = cuda_state };
|
||||||
|
Ok(())
|
||||||
}
|
}
|
81
notcuda/src/impl/memory.rs
Normal file
81
notcuda/src/impl/memory.rs
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
use super::CUresult;
|
||||||
|
use std::ffi::c_void;
|
||||||
|
|
||||||
|
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
|
||||||
|
let alloc_result = super::device::with_current_exclusive(|dev| unsafe {
|
||||||
|
dev.base.mem_alloc_device(&mut dev.l0_context, bytesize, 0)
|
||||||
|
});
|
||||||
|
match alloc_result {
|
||||||
|
Ok(Ok(alloc)) => {
|
||||||
|
unsafe { *dptr = alloc };
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => e.into(),
|
||||||
|
Err(e) => e,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn copy_v2(
|
||||||
|
dst: *mut c_void,
|
||||||
|
src: *const c_void,
|
||||||
|
bytesize: usize,
|
||||||
|
) -> Result<Result<(), l0::sys::ze_result_t>, CUresult> {
|
||||||
|
super::device::with_current_exclusive(|dev| unsafe {
|
||||||
|
memcpy_impl(
|
||||||
|
&mut dev.l0_context,
|
||||||
|
dst,
|
||||||
|
src,
|
||||||
|
bytesize,
|
||||||
|
&dev.base,
|
||||||
|
&mut dev.default_queue,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn memcpy_impl(
|
||||||
|
ctx: &mut l0::Context,
|
||||||
|
dst: *mut c_void,
|
||||||
|
src: *const c_void,
|
||||||
|
bytes_count: usize,
|
||||||
|
dev: &l0::Device,
|
||||||
|
queue: &mut l0::CommandQueue,
|
||||||
|
) -> l0::Result<()> {
|
||||||
|
let mut cmd_list = l0::CommandList::new(ctx, &dev)?;
|
||||||
|
cmd_list.append_memory_copy_unsafe(dst, src, bytes_count, None, &mut [])?;
|
||||||
|
queue.execute(cmd_list)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::super::test::CudaDriverFns;
|
||||||
|
use super::super::CUresult;
|
||||||
|
use std::ptr;
|
||||||
|
|
||||||
|
cuda_driver_test!(alloc_without_ctx);
|
||||||
|
|
||||||
|
fn alloc_without_ctx<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut mem = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
|
||||||
|
CUresult::CUDA_ERROR_INVALID_CONTEXT
|
||||||
|
);
|
||||||
|
assert_eq!(mem, ptr::null_mut());
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_driver_test!(alloc_with_ctx);
|
||||||
|
|
||||||
|
fn alloc_with_ctx<T: CudaDriverFns>() {
|
||||||
|
assert_eq!(T::cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx = ptr::null_mut();
|
||||||
|
assert_eq!(T::cuCtxCreate_v2(&mut ctx, 0, 0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut mem = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
T::cuMemAlloc_v2(&mut mem, std::mem::size_of::<usize>()),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_ne!(mem, ptr::null_mut());
|
||||||
|
assert_eq!(T::cuCtxDestroy_v2(ctx), CUresult::CUDA_SUCCESS);
|
||||||
|
}
|
||||||
|
}
|
234
notcuda/src/impl/mod.rs
Normal file
234
notcuda/src/impl/mod.rs
Normal file
|
@ -0,0 +1,234 @@
|
||||||
|
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUresult};
|
||||||
|
use std::{ffi::c_void, mem::ManuallyDrop, os::raw::c_int, sync::Mutex};
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
#[macro_use]
|
||||||
|
pub mod test;
|
||||||
|
pub mod context;
|
||||||
|
pub mod device;
|
||||||
|
pub mod export_table;
|
||||||
|
pub mod memory;
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
pub fn unimplemented() -> CUresult {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
pub fn unimplemented() -> CUresult {
|
||||||
|
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait HasLivenessCookie {
|
||||||
|
const COOKIE: usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This struct is a best-effort check if wrapped value has been dropped,
|
||||||
|
// while it's inherently safe, its use coming from FFI is very unsafe
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct LiveCheck<T: HasLivenessCookie> {
|
||||||
|
cookie: usize,
|
||||||
|
data: ManuallyDrop<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: HasLivenessCookie> LiveCheck<T> {
|
||||||
|
pub fn new(data: T) -> Self {
|
||||||
|
LiveCheck {
|
||||||
|
cookie: T::COOKIE,
|
||||||
|
data: ManuallyDrop::new(data),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn as_ref_unchecked(&self) -> &T {
|
||||||
|
&self.data
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_ref(&self) -> Option<&T> {
|
||||||
|
if self.cookie == T::COOKIE {
|
||||||
|
Some(&self.data)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_mut(&mut self) -> Option<&mut T> {
|
||||||
|
if self.cookie == T::COOKIE {
|
||||||
|
Some(&mut self.data)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn try_drop(&mut self) -> bool {
|
||||||
|
if self.cookie == T::COOKIE {
|
||||||
|
self.cookie = 0;
|
||||||
|
unsafe { ManuallyDrop::drop(&mut self.data) };
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: HasLivenessCookie> Drop for LiveCheck<T> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.cookie = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait CudaRepr: Sized {
|
||||||
|
type Impl: Sized;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: CudaRepr> CudaRepr for *mut T {
|
||||||
|
type Impl = *mut T::Impl;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Decuda<To> {
|
||||||
|
fn decuda(self: Self) -> To;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: CudaRepr> Decuda<*mut T::Impl> for *mut T {
|
||||||
|
fn decuda(self: Self) -> *mut T::Impl {
|
||||||
|
self as *mut _
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<l0::sys::ze_result_t> for CUresult {
|
||||||
|
fn from(result: l0::sys::ze_result_t) -> Self {
|
||||||
|
match result {
|
||||||
|
l0::sys::ze_result_t::ZE_RESULT_SUCCESS => CUresult::CUDA_SUCCESS,
|
||||||
|
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNINITIALIZED => {
|
||||||
|
CUresult::CUDA_ERROR_NOT_INITIALIZED
|
||||||
|
}
|
||||||
|
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ENUMERATION => {
|
||||||
|
CUresult::CUDA_ERROR_INVALID_VALUE
|
||||||
|
}
|
||||||
|
l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT => {
|
||||||
|
CUresult::CUDA_ERROR_INVALID_VALUE
|
||||||
|
}
|
||||||
|
l0_sys::ze_result_t::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY => {
|
||||||
|
CUresult::CUDA_ERROR_OUT_OF_MEMORY
|
||||||
|
}
|
||||||
|
l0_sys::ze_result_t::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE => {
|
||||||
|
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
||||||
|
}
|
||||||
|
_ => CUresult::CUDA_ERROR_UNKNOWN,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Encuda {
|
||||||
|
type To: Sized;
|
||||||
|
fn encuda(self: Self) -> Self::To;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Encuda for CUresult {
|
||||||
|
type To = CUresult;
|
||||||
|
fn encuda(self: Self) -> Self::To {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Encuda for l0::sys::ze_result_t {
|
||||||
|
type To = CUresult;
|
||||||
|
fn encuda(self: Self) -> Self::To {
|
||||||
|
self.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Encuda for () {
|
||||||
|
type To = CUresult;
|
||||||
|
fn encuda(self: Self) -> Self::To {
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1, T2> {
|
||||||
|
type To = CUresult;
|
||||||
|
fn encuda(self: Self) -> Self::To {
|
||||||
|
match self {
|
||||||
|
Ok(e) => e.encuda(),
|
||||||
|
Err(e) => e.encuda(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Error {
|
||||||
|
L0(l0::sys::ze_result_t),
|
||||||
|
Cuda(CUresult),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Encuda for Error {
|
||||||
|
type To = CUresult;
|
||||||
|
fn encuda(self: Self) -> Self::To {
|
||||||
|
match self {
|
||||||
|
Error::L0(e) => e.into(),
|
||||||
|
Error::Cuda(e) => e,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref GLOBAL_STATE: Mutex<Option<GlobalState>> = Mutex::new(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GlobalState {
|
||||||
|
driver: l0::Driver,
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for GlobalState {}
|
||||||
|
|
||||||
|
// TODO: implement
|
||||||
|
fn is_intel_gpu_driver(_: &l0::Driver) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init() -> l0::Result<()> {
|
||||||
|
let mut global_state = GLOBAL_STATE
|
||||||
|
.lock()
|
||||||
|
.map_err(|_| l0::sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN)?;
|
||||||
|
if global_state.is_some() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
l0::init()?;
|
||||||
|
let drivers = l0::Driver::get()?;
|
||||||
|
let driver = match drivers.into_iter().find(is_intel_gpu_driver) {
|
||||||
|
None => return Err(l0::sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN),
|
||||||
|
Some(driver) => {
|
||||||
|
device::init(&driver)?;
|
||||||
|
driver
|
||||||
|
}
|
||||||
|
};
|
||||||
|
*global_state = Some(GlobalState { driver });
|
||||||
|
drop(global_state);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn driver_get_version() -> c_int {
|
||||||
|
i32::max_value()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> CudaRepr for CUctx_st {
|
||||||
|
type Impl = context::Context;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> CudaRepr for CUdevice {
|
||||||
|
type Impl = device::Index;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Decuda<device::Index> for CUdevice {
|
||||||
|
fn decuda(self) -> device::Index {
|
||||||
|
device::Index(self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> CudaRepr for CUdeviceptr {
|
||||||
|
type Impl = *mut c_void;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Decuda<*mut c_void> for CUdeviceptr {
|
||||||
|
fn decuda(self) -> *mut c_void {
|
||||||
|
self.0 as *mut _
|
||||||
|
}
|
||||||
|
}
|
118
notcuda/src/impl/test.rs
Normal file
118
notcuda/src/impl/test.rs
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
#![allow(non_snake_case)]
|
||||||
|
|
||||||
|
use crate::r#impl as notcuda;
|
||||||
|
use crate::r#impl::CUresult;
|
||||||
|
use crate::{cuda::CUuuid, r#impl::Encuda};
|
||||||
|
use ::std::{
|
||||||
|
ffi::c_void,
|
||||||
|
os::raw::{c_int, c_uint},
|
||||||
|
};
|
||||||
|
use cuda_driver_sys as cuda;
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! cuda_driver_test {
|
||||||
|
($func:ident) => {
|
||||||
|
paste! {
|
||||||
|
#[test]
|
||||||
|
fn [<$func _notcuda>]() {
|
||||||
|
$func::<crate::r#impl::test::NotCuda>()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn [<$func _cuda>]() {
|
||||||
|
$func::<crate::r#impl::test::Cuda>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait CudaDriverFns {
|
||||||
|
fn cuInit(flags: c_uint) -> CUresult;
|
||||||
|
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult;
|
||||||
|
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult;
|
||||||
|
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult;
|
||||||
|
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult;
|
||||||
|
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult;
|
||||||
|
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult;
|
||||||
|
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult;
|
||||||
|
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct NotCuda();
|
||||||
|
|
||||||
|
impl CudaDriverFns for NotCuda {
|
||||||
|
fn cuInit(_flags: c_uint) -> CUresult {
|
||||||
|
assert!(notcuda::context::is_context_stack_empty());
|
||||||
|
notcuda::init().encuda()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
|
||||||
|
notcuda::context::create_v2(pctx as *mut _, flags, notcuda::device::Index(dev)).encuda()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult {
|
||||||
|
notcuda::context::destroy_v2(ctx as *mut _)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult {
|
||||||
|
notcuda::context::pop_current_v2(pctx as *mut _)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult {
|
||||||
|
notcuda::context::get_api_version(ctx as *mut _, version)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult {
|
||||||
|
notcuda::context::get_current(pctx as *mut _).encuda()
|
||||||
|
}
|
||||||
|
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
|
||||||
|
notcuda::memory::alloc_v2(dptr as *mut _, bytesize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult {
|
||||||
|
notcuda::device::get_uuid(uuid, notcuda::device::Index(dev)).encuda()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
|
||||||
|
notcuda::device::primary_ctx_get_state(notcuda::device::Index(dev), flags, active).encuda()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Cuda();
|
||||||
|
|
||||||
|
impl CudaDriverFns for Cuda {
|
||||||
|
fn cuInit(flags: c_uint) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuInit(flags) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuCtxCreate_v2(pctx as *mut _, flags, dev) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuCtxDestroy_v2(ctx as *mut _) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuCtxPopCurrent_v2(pctx as *mut _) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuCtxGetApiVersion(ctx as *mut _, version) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuCtxGetCurrent(pctx as *mut _) as c_uint) }
|
||||||
|
}
|
||||||
|
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuMemAlloc_v2(dptr as *mut _, bytesize) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuDeviceGetUuid(uuid as *mut _, dev) as c_uint) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
|
||||||
|
unsafe { CUresult(cuda::cuDevicePrimaryCtxGetState(dev, flags, active) as c_uint) }
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,396 +2,14 @@ extern crate level_zero as l0;
|
||||||
extern crate level_zero_sys as l0_sys;
|
extern crate level_zero_sys as l0_sys;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
#[cfg(test)]
|
||||||
|
extern crate cuda_driver_sys;
|
||||||
extern crate lz4;
|
extern crate lz4;
|
||||||
|
#[cfg(test)]
|
||||||
|
#[macro_use]
|
||||||
|
extern crate paste;
|
||||||
|
|
||||||
use std::cell::RefCell;
|
#[allow(warnings)]
|
||||||
use std::convert::TryFrom;
|
|
||||||
use std::os::raw::{c_char, c_int, c_uint};
|
|
||||||
use std::ptr;
|
|
||||||
use std::{ffi::c_void, sync::Mutex};
|
|
||||||
|
|
||||||
mod cu;
|
|
||||||
mod cuda;
|
mod cuda;
|
||||||
mod export_table;
|
mod cuda_impl;
|
||||||
mod ze;
|
pub(crate) mod r#impl;
|
||||||
|
|
||||||
thread_local! {
|
|
||||||
static CONTEXT_STACK: RefCell<Vec<cu::Context>> = RefCell::new(Vec::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
static ref GLOBAL_STATE: Mutex<Option<Context>> = Mutex::new(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Context {
|
|
||||||
base: l0::Context,
|
|
||||||
devices: Vec<ze::Device>,
|
|
||||||
}
|
|
||||||
unsafe impl Send for Context {}
|
|
||||||
unsafe impl Sync for Context {}
|
|
||||||
|
|
||||||
impl Context {
|
|
||||||
fn new() -> l0::Result<Context> {
|
|
||||||
let driver = l0::Driver::get()?;
|
|
||||||
let ze_devices = driver[0].devices()?;
|
|
||||||
let mut ctx = l0::Context::new(&driver[0])?;
|
|
||||||
let devices = ze_devices
|
|
||||||
.into_iter()
|
|
||||||
.map(|d| ze::Device::new(&mut ctx, d))
|
|
||||||
.collect::<Result<_, _>>()?;
|
|
||||||
Ok(Context {
|
|
||||||
base: ctx,
|
|
||||||
devices: devices,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn call<F: FnOnce(&mut Context) -> l0_sys::ze_result_t>(f: F) -> cu::Result {
|
|
||||||
let mut lock = GLOBAL_STATE.try_lock();
|
|
||||||
if let Ok(ref mut mutex) = lock {
|
|
||||||
match **mutex {
|
|
||||||
None => return cu::Result::ERROR_NOT_INITIALIZED,
|
|
||||||
Some(ref mut driver) => {
|
|
||||||
return cu::Result::from_l0(f(driver));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return cu::Result::ERROR_UNKNOWN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn call2<F: FnOnce(&mut Context) -> l0::Result<()>>(f: F) -> cu::Result {
|
|
||||||
Context::call(|ctx| match f(ctx) {
|
|
||||||
Ok(()) => l0::sys::ze_result_t::ZE_RESULT_SUCCESS,
|
|
||||||
Err(err) => err.0,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn call_device<F: FnOnce(&mut ze::Device) -> l0::Result<()>>(
|
|
||||||
cu::Device(dev): cu::Device,
|
|
||||||
f: F,
|
|
||||||
) -> cu::Result {
|
|
||||||
if dev < 0 {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
let dev = dev as usize;
|
|
||||||
Context::call(|driver| {
|
|
||||||
if dev >= driver.devices.len() {
|
|
||||||
return l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
||||||
}
|
|
||||||
match f(&mut driver.devices[dev]) {
|
|
||||||
Ok(()) => l0::sys::ze_result_t::ZE_RESULT_SUCCESS,
|
|
||||||
Err(err) => err.0,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn device_get_count(&self, count: *mut i32) -> l0_sys::ze_result_t {
|
|
||||||
unsafe { *count = self.devices.len() as i32 };
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
fn device_get(&self, device: *mut cu::Device, ordinal: c_int) -> l0_sys::ze_result_t {
|
|
||||||
if (ordinal as usize) >= self.devices.len() {
|
|
||||||
return l0_sys::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
||||||
}
|
|
||||||
unsafe { *device = cu::Device(ordinal) };
|
|
||||||
l0_sys::ze_result_t::ZE_RESULT_SUCCESS
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub unsafe extern "C" fn cuDriverGetVersion(version: *mut c_int) -> cu::Result {
|
|
||||||
if version == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
*version = i32::max_value();
|
|
||||||
return cu::Result::SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub unsafe extern "C" fn cuInit(_: c_uint) -> cu::Result {
|
|
||||||
let l0_sys_init = l0_sys::zeInit(l0_sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY);
|
|
||||||
if l0_sys_init != l0_sys::ze_result_t::ZE_RESULT_SUCCESS {
|
|
||||||
return cu::Result::from_l0(l0_sys_init);
|
|
||||||
}
|
|
||||||
let mut lock = GLOBAL_STATE.try_lock();
|
|
||||||
if let Ok(ref mut mutex) = lock {
|
|
||||||
if let None = **mutex {
|
|
||||||
match Context::new() {
|
|
||||||
Ok(state) => **mutex = Some(state),
|
|
||||||
Err(err) => return cu::Result::from_l0(err.0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return cu::Result::ERROR_UNKNOWN;
|
|
||||||
}
|
|
||||||
cu::Result::SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDeviceGetCount(count: *mut c_int) -> cu::Result {
|
|
||||||
if count == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
Context::call(|driver| driver.device_get_count(count))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDeviceGet(device: *mut cu::Device, ordinal: c_int) -> cu::Result {
|
|
||||||
if ordinal < 0 || device == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
Context::call(|driver| driver.device_get(device, ordinal))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDeviceGetName(
|
|
||||||
name: *mut c_char,
|
|
||||||
len: c_int,
|
|
||||||
dev_idx: cu::Device,
|
|
||||||
) -> cu::Result {
|
|
||||||
if name == ptr::null_mut() || len <= 0 {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
Context::call_device(dev_idx, |dev| dev.get_name(name, len))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) -> cu::Result {
|
|
||||||
if bytes == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
Context::call_device(dev_idx, |dev| dev.total_mem(bytes))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDeviceGetAttribute(
|
|
||||||
pi: *mut c_int,
|
|
||||||
attrib: c_int,
|
|
||||||
dev_idx: cu::Device,
|
|
||||||
) -> cu::Result {
|
|
||||||
if pi == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
let attrib = match u8::try_from(attrib) {
|
|
||||||
Ok(a) => a,
|
|
||||||
Err(_) => return cu::Result::ERROR_INVALID_VALUE,
|
|
||||||
};
|
|
||||||
match cu::DeviceAttribute::try_new(attrib) {
|
|
||||||
Some(cu::DeviceAttribute::Static(a)) => {
|
|
||||||
unsafe { *pi = ze::Device::get_attribute_static(a) };
|
|
||||||
cu::Result::SUCCESS
|
|
||||||
}
|
|
||||||
Some(cu::DeviceAttribute::Dynamic(a)) => {
|
|
||||||
Context::call_device(dev_idx, |dev| dev.get_attribute(pi, a))
|
|
||||||
}
|
|
||||||
// TODO: add support for more properties
|
|
||||||
None => cu::Result::SUCCESS,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDeviceGetUuid(uuid: *mut cu::Uuid, dev_idx: cu::Device) -> cu::Result {
|
|
||||||
if uuid == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
Context::call_device(dev_idx, |dev| dev.get_uuid(uuid))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuCtxGetCurrent(pctx: *mut cu::Context) -> cu::Result {
|
|
||||||
let ctx = CONTEXT_STACK.with(|stack| match stack.borrow().last() {
|
|
||||||
Some(ctx) => ctx.clone(),
|
|
||||||
None => cu::Context::null(),
|
|
||||||
});
|
|
||||||
unsafe { *pctx = ctx };
|
|
||||||
cu::Result::SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuCtxSetCurrent(ctx: cu::Context) -> cu::Result {
|
|
||||||
CONTEXT_STACK.with(|stack| {
|
|
||||||
let mut stack = stack.borrow_mut();
|
|
||||||
stack.pop();
|
|
||||||
if ctx != cu::Context::null() {
|
|
||||||
stack.push(ctx);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
cu::Result::SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuMemAlloc_v2(dptr: *mut cu::DevicePtr, bytesize: usize) -> cu::Result {
|
|
||||||
if dptr == ptr::null_mut() || bytesize == 0 {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
Context::call2(|drv| {
|
|
||||||
unsafe {
|
|
||||||
*dptr = cu::DevicePtr(drv.devices[0].base.mem_alloc_device(
|
|
||||||
&mut drv.base,
|
|
||||||
bytesize,
|
|
||||||
0,
|
|
||||||
)? as usize)
|
|
||||||
};
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuCtxDestroy_v2(ctx: cu::Context) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuMemcpyDtoH_v2(
|
|
||||||
dst_host: *mut c_void,
|
|
||||||
src_device: cu::DevicePtr,
|
|
||||||
byte_count: usize,
|
|
||||||
) -> cu::Result {
|
|
||||||
if dst_host == ptr::null_mut() || src_device.0 == 0 || byte_count == 0 {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
// TODO: choose the right device from device ptr
|
|
||||||
Context::call(|drv| {
|
|
||||||
let cu_dev = &mut drv.devices[0];
|
|
||||||
// Safe, because there's no Drop impl for device
|
|
||||||
let dev = unsafe { l0::Device::from_ffi(cu_dev.base.as_ffi()) };
|
|
||||||
let queue = &mut cu_dev.default_queue;
|
|
||||||
let result = unsafe {
|
|
||||||
memcpy_impl(
|
|
||||||
&mut drv.base,
|
|
||||||
dst_host,
|
|
||||||
src_device.0 as *mut _,
|
|
||||||
byte_count,
|
|
||||||
&dev,
|
|
||||||
queue,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
match result {
|
|
||||||
Ok(()) => l0_sys::ze_result_t::ZE_RESULT_SUCCESS,
|
|
||||||
Err(e) => e.0,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuMemFree_v2(srcDevice: cu::DevicePtr) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuModuleLoad(module: *mut cu::Module, fname: *const c_char) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuGetErrorString(error: cu::Result, pStr: *mut *const c_char) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuLaunchKernel(
|
|
||||||
f: cu::Function,
|
|
||||||
gridDimX: c_uint,
|
|
||||||
gridDimY: c_uint,
|
|
||||||
gridDimZ: c_uint,
|
|
||||||
blockDimX: c_uint,
|
|
||||||
blockDimY: c_uint,
|
|
||||||
blockDimZ: c_uint,
|
|
||||||
sharedMemBytes: c_uint,
|
|
||||||
hStream: cu::Stream,
|
|
||||||
kernelParams: *mut *mut (),
|
|
||||||
extra: *mut *mut (),
|
|
||||||
) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
#[allow(non_snake_case)]
|
|
||||||
pub extern "C" fn cuModuleLoadDataEx(
|
|
||||||
module: *mut cu::Module,
|
|
||||||
image: *const (),
|
|
||||||
numOptions: c_uint,
|
|
||||||
options: *mut cu::JitOption,
|
|
||||||
optionValues: *mut *mut (),
|
|
||||||
) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuMemcpyHtoD_v2(
|
|
||||||
dst_device: cu::DevicePtr,
|
|
||||||
src_host: *const c_void,
|
|
||||||
byte_count: usize,
|
|
||||||
) -> cu::Result {
|
|
||||||
if dst_device.0 == 0 || src_host == ptr::null_mut() || byte_count == 0 {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
// TODO: choose the right device from device ptr
|
|
||||||
Context::call(|drv| {
|
|
||||||
let cu_dev = &mut drv.devices[0];
|
|
||||||
// Safe, because there's no Drop impl for device
|
|
||||||
let dev = unsafe { l0::Device::from_ffi(cu_dev.base.as_ffi()) };
|
|
||||||
let queue = &mut cu_dev.default_queue;
|
|
||||||
let result = unsafe {
|
|
||||||
memcpy_impl(
|
|
||||||
&mut drv.base,
|
|
||||||
dst_device.0 as *mut _,
|
|
||||||
src_host,
|
|
||||||
byte_count,
|
|
||||||
&dev,
|
|
||||||
queue,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
match result {
|
|
||||||
Ok(()) => l0_sys::ze_result_t::ZE_RESULT_SUCCESS,
|
|
||||||
Err(e) => e.0,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn memcpy_impl(
|
|
||||||
ctx: &mut l0::Context,
|
|
||||||
dst: *mut c_void,
|
|
||||||
src: *const c_void,
|
|
||||||
bytes_count: usize,
|
|
||||||
dev: &l0::Device,
|
|
||||||
queue: &mut l0::CommandQueue,
|
|
||||||
) -> l0::Result<()> {
|
|
||||||
let mut cmd_list = l0::CommandList::new(ctx, &dev)?;
|
|
||||||
cmd_list.append_memory_copy_unsafe(dst, src, bytes_count, None, &mut [])?;
|
|
||||||
queue.execute(cmd_list)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuCtxCreate_v2(
|
|
||||||
pctx: *mut cu::Context,
|
|
||||||
flags: c_uint,
|
|
||||||
dev: cu::Device,
|
|
||||||
) -> cu::Result {
|
|
||||||
if pctx == ptr::null_mut() {
|
|
||||||
return cu::Result::ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuModuleGetFunction(
|
|
||||||
hfunc: *mut cu::Function,
|
|
||||||
hmod: cu::Module,
|
|
||||||
name: *const c_char,
|
|
||||||
) -> cu::Result {
|
|
||||||
cu::Result::ERROR_NOT_SUPPORTED
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuDevicePrimaryCtxRetain(pctx: *mut cu::Context, dev: cu::Device) -> cu::Result {
|
|
||||||
cu::Result::SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_mangle]
|
|
||||||
pub extern "C" fn cuCtxGetDevice(dev: *mut cu::Device) -> cu::Result {
|
|
||||||
unsafe { *dev = cu::Device(0) };
|
|
||||||
cu::Result::SUCCESS
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,140 +0,0 @@
|
||||||
use super::cu;
|
|
||||||
use crate::cuda;
|
|
||||||
|
|
||||||
use std::cmp;
|
|
||||||
use std::os::raw::{c_char, c_int};
|
|
||||||
|
|
||||||
pub struct Device {
|
|
||||||
pub base: l0::Device,
|
|
||||||
pub default_queue: l0::CommandQueue,
|
|
||||||
properties: Option<Box<l0::sys::ze_device_properties_t>>,
|
|
||||||
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
|
|
||||||
memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Device {
|
|
||||||
pub fn new(ctx: &mut l0::Context, d: l0::Device) -> l0::Result<Self> {
|
|
||||||
let queue = l0::CommandQueue::new(ctx, &d)?;
|
|
||||||
Ok(Self {
|
|
||||||
base: d,
|
|
||||||
default_queue: queue,
|
|
||||||
properties: None,
|
|
||||||
image_properties: None,
|
|
||||||
memory_properties: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> {
|
|
||||||
if let Some(ref prop) = self.properties {
|
|
||||||
return Ok(prop);
|
|
||||||
}
|
|
||||||
match self.base.get_properties() {
|
|
||||||
Ok(prop) => Ok(self.properties.get_or_insert(prop)),
|
|
||||||
Err(e) => Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_image_properties(&mut self) -> l0::Result<&l0::sys::ze_device_image_properties_t> {
|
|
||||||
if let Some(ref prop) = self.image_properties {
|
|
||||||
return Ok(prop);
|
|
||||||
}
|
|
||||||
match self.base.get_image_properties() {
|
|
||||||
Ok(prop) => Ok(self.image_properties.get_or_insert(prop)),
|
|
||||||
Err(e) => Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_memory_properties(&mut self) -> l0::Result<&[l0::sys::ze_device_memory_properties_t]> {
|
|
||||||
if let Some(ref prop) = self.memory_properties {
|
|
||||||
return Ok(prop);
|
|
||||||
}
|
|
||||||
match self.base.get_memory_properties() {
|
|
||||||
Ok(prop) => Ok(self.memory_properties.get_or_insert(prop)),
|
|
||||||
Err(e) => Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_name(&mut self, name: *mut c_char, len: c_int) -> l0::Result<()> {
|
|
||||||
let props = self.get_properties()?;
|
|
||||||
let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0);
|
|
||||||
let dst_null_pos = cmp::min((len - 1) as usize, null_pos);
|
|
||||||
unsafe { *(name.add(dst_null_pos)) = 0 };
|
|
||||||
unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) };
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn total_mem(&mut self, bytes: *mut usize) -> l0::Result<()> {
|
|
||||||
let props = self.get_memory_properties()?;
|
|
||||||
let max_mem = props.iter().map(|p| p.totalSize).max().unwrap();
|
|
||||||
unsafe { *bytes = max_mem as usize };
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_attribute_static(attr: cu::DeviceStaticAttribute) -> c_int {
|
|
||||||
match attr {
|
|
||||||
cu::DeviceStaticAttribute::GPU_OVERLAP => 1,
|
|
||||||
cu::DeviceStaticAttribute::KERNEL_EXEC_TIMEOUT => 0,
|
|
||||||
cu::DeviceStaticAttribute::INTEGRATED => 1,
|
|
||||||
cu::DeviceStaticAttribute::COMPUTE_CAPABILITY_MAJOR => c_int::max_value(),
|
|
||||||
cu::DeviceStaticAttribute::COMPUTE_CAPABILITY_MINOR => c_int::max_value(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_attribute_general(
|
|
||||||
attr: cu::DeviceGeneralAttribute,
|
|
||||||
props: &l0_sys::ze_device_properties_t,
|
|
||||||
) -> c_int {
|
|
||||||
match attr {
|
|
||||||
cu::DeviceGeneralAttribute::CAN_MAP_HOST_MEMORY => 1,
|
|
||||||
cu::DeviceGeneralAttribute::ASYNC_ENGINE_COUNT => props.maxHardwareContexts as i32,
|
|
||||||
cu::DeviceGeneralAttribute::MULTIPROCESSOR_COUNT => {
|
|
||||||
(props.numSlices * props.numSubslicesPerSlice * props.numEUsPerSubslice) as i32
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_attribute_texture(
|
|
||||||
attr: cu::DeviceTextureAttribute,
|
|
||||||
props: &l0_sys::ze_device_image_properties_t,
|
|
||||||
) -> c_int {
|
|
||||||
match attr {
|
|
||||||
cu::DeviceTextureAttribute::MAXIMUM_TEXTURE1D_WIDTH => {
|
|
||||||
cmp::min(props.maxImageDims1D, c_int::max_value() as u32) as c_int
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_attribute(
|
|
||||||
&mut self,
|
|
||||||
pi: *mut c_int,
|
|
||||||
attr: cu::DeviceDynamicAttribute,
|
|
||||||
) -> l0::Result<()> {
|
|
||||||
let value = match attr {
|
|
||||||
cu::DeviceDynamicAttribute::General(a) => {
|
|
||||||
Device::get_attribute_general(a, self.get_properties()?)
|
|
||||||
}
|
|
||||||
cu::DeviceDynamicAttribute::Texture(a) => {
|
|
||||||
Device::get_attribute_texture(a, self.get_image_properties()?)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
unsafe { *pi = value };
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_uuid(&mut self, uuid: *mut cu::Uuid) -> l0::Result<()> {
|
|
||||||
let props = self.get_properties()?;
|
|
||||||
unsafe { *uuid = cu::Uuid { x: props.uuid.id } };
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Context {
|
|
||||||
pub cuda_manager: *mut cuda::rt::ContextStateManager,
|
|
||||||
pub cuda_state: *mut cuda::rt::ContextState,
|
|
||||||
pub cuda_dtor_cb:
|
|
||||||
extern "C" fn(cu::Context, *mut cuda::rt::ContextStateManager, *mut cuda::rt::ContextState),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Module {
|
|
||||||
pub ptx_text: Vec<u8>,
|
|
||||||
}
|
|
|
@ -13,7 +13,6 @@ rspirv = "0.6"
|
||||||
spirv_headers = "1.4"
|
spirv_headers = "1.4"
|
||||||
quick-error = "1.2"
|
quick-error = "1.2"
|
||||||
bit-vec = "0.6"
|
bit-vec = "0.6"
|
||||||
paste = "0.1"
|
|
||||||
half ="1.6"
|
half ="1.6"
|
||||||
|
|
||||||
[build-dependencies.lalrpop]
|
[build-dependencies.lalrpop]
|
||||||
|
@ -24,3 +23,4 @@ features = ["lexer"]
|
||||||
level_zero-sys = { path = "../level_zero-sys" }
|
level_zero-sys = { path = "../level_zero-sys" }
|
||||||
level_zero = { path = "../level_zero" }
|
level_zero = { path = "../level_zero" }
|
||||||
spirv_tools-sys = { path = "../spirv_tools-sys" }
|
spirv_tools-sys = { path = "../spirv_tools-sys" }
|
||||||
|
paste = "1.0"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#[cfg_attr(macro_use, test)]
|
#[cfg(test)]
|
||||||
extern crate paste;
|
extern crate paste;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lalrpop_util;
|
extern crate lalrpop_util;
|
||||||
|
|
|
@ -51,23 +51,23 @@ test_ptx!(shl, [11u64], [44u64]);
|
||||||
test_ptx!(cvt_sat_s_u, [-1i32], [0i32]);
|
test_ptx!(cvt_sat_s_u, [-1i32], [0i32]);
|
||||||
test_ptx!(cvta, [3.0f32], [3.0f32]);
|
test_ptx!(cvta, [3.0f32], [3.0f32]);
|
||||||
|
|
||||||
struct DisplayError<T: Display + Debug> {
|
struct DisplayError<T: Debug> {
|
||||||
err: T,
|
err: T,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Display + Debug> Display for DisplayError<T> {
|
impl<T: Debug> Display for DisplayError<T> {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
|
||||||
Display::fmt(&self.err, f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Display + Debug> Debug for DisplayError<T> {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
Debug::fmt(&self.err, f)
|
Debug::fmt(&self.err, f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Display + Debug> error::Error for DisplayError<T> {}
|
impl<T: Debug> Debug for DisplayError<T> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
|
Debug::fmt(&self.err, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Debug> error::Error for DisplayError<T> {}
|
||||||
|
|
||||||
fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>(
|
fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>(
|
||||||
name: &str,
|
name: &str,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue