Finish converting to OpenCL

This commit is contained in:
Andrzej Janik 2021-07-22 00:20:33 +02:00
parent 3d2024bf62
commit 9d4f26bd07
9 changed files with 320 additions and 183 deletions

View file

@ -415,7 +415,7 @@ impl Module {
}
pub struct KernelInfo {
pub arguments_sizes: Vec<usize>,
pub arguments_sizes: Vec<(usize, bool)>,
pub uses_shared_mem: bool,
}
@ -1024,7 +1024,12 @@ fn emit_function_header<'a>(
let args_lens = func_decl
.input_arguments
.iter()
.map(|param| param.v_type.size_of())
.map(|param| {
(
param.v_type.size_of(),
matches!(param.v_type, ast::Type::Pointer(..)),
)
})
.collect();
kernel_info.insert(
name.to_string(),

View file

@ -1,5 +1,5 @@
use super::{device, stream::Stream, stream::StreamData, HasLivenessCookie, LiveCheck};
use super::{CUresult, GlobalState};
use super::{transmute_lifetime_mut, CUresult, GlobalState};
use crate::{cuda::CUcontext, cuda_impl};
use l0::sys::ze_result_t;
use std::{cell::RefCell, num::NonZeroU32, os::raw::c_uint, ptr, sync::atomic::AtomicU32};
@ -98,14 +98,11 @@ pub struct ContextData {
impl ContextData {
pub fn new(
l0_ctx: &'static l0::Context,
l0_dev: l0::Device,
flags: c_uint,
is_primary: bool,
host_event: (l0::Event<'static>, u64),
dev: *mut device::Device,
) -> Result<Self, CUresult> {
let default_stream = StreamData::new_unitialized(l0_ctx, l0_dev, host_event)?;
let default_stream = StreamData::new_unitialized()?;
Ok(ContextData {
flags: AtomicU32::new(flags),
device: dev,
@ -121,8 +118,15 @@ impl ContextData {
impl Context {
pub fn late_init(&mut self) {
let ctx_data = self.as_option_mut().unwrap();
ctx_data.default_stream.context = ctx_data as *mut _;
let ctx_data: &'static mut _ = {
let this = self.as_option_mut().unwrap();
let result = { unsafe { transmute_lifetime_mut(this) } };
drop(this);
result
};
{ self.as_option_mut().unwrap() }
.default_stream
.late_init(ctx_data);
}
}
@ -137,11 +141,8 @@ pub fn create_v2(
let mut ctx_box = GlobalState::lock_device(dev_idx, |dev| {
let dev_ptr = dev as *mut _;
let mut ctx_box = Box::new(LiveCheck::new(ContextData::new(
&dev.ocl_context,
dev.base,
flags,
false,
dev.host_event_pool.get(dev.base, &dev.ocl_context)?,
dev_ptr as *mut _,
)?));
ctx_box.late_init();

View file

@ -1,9 +1,11 @@
use super::{context, transmute_lifetime, transmute_lifetime_mut, CUresult, GlobalState};
use crate::cuda;
use cuda::{CUdevice_attribute, CUuuid_st};
use ocl_core::DeviceType;
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
use std::{
cmp, mem,
cmp,
ffi::c_void,
mem,
os::raw::{c_char, c_int, c_uint},
ptr,
sync::atomic::{AtomicU32, Ordering},
@ -22,6 +24,7 @@ pub struct Device {
pub ocl_base: ocl_core::DeviceId,
pub default_queue: ocl_core::CommandQueue,
pub ocl_context: ocl_core::Context,
pub(crate) ocl_ext: OpenCLExtensions,
pub primary_context: context::Context,
properties: Option<Box<l0::sys::ze_device_properties_t>>,
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
@ -29,19 +32,185 @@ pub struct Device {
compute_properties: Option<Box<l0::sys::ze_device_compute_properties_t>>,
}
type cl_mem_properties_intel = ocl_core::ffi::cl_bitfield;
pub(crate) struct OpenCLExtensions {
pub clDeviceMemAllocINTEL: unsafe extern "system" fn(
ocl_core::ffi::cl_context,
ocl_core::ffi::cl_device_id,
*const cl_mem_properties_intel,
usize,
ocl_core::ffi::cl_uint,
*mut ocl_core::ffi::cl_int,
) -> *mut c_void,
pub clEnqueueMemcpyINTEL: unsafe extern "system" fn(
ocl_core::ffi::cl_command_queue,
ocl_core::ffi::cl_bool,
*mut c_void,
*const c_void,
usize,
ocl_core::ffi::cl_uint,
*const ocl_core::ffi::cl_event,
*mut ocl_core::ffi::cl_event,
) -> ocl_core::ffi::cl_int,
pub clMemBlockingFreeINTEL:
unsafe extern "system" fn(ocl_core::ffi::cl_context, *mut c_void) -> ocl_core::ffi::cl_int,
pub clEnqueueMemFillINTEL: unsafe extern "system" fn(
ocl_core::ffi::cl_command_queue,
*mut c_void,
*const c_void,
usize,
usize,
ocl_core::ffi::cl_uint,
*const ocl_core::ffi::cl_event,
*mut ocl_core::ffi::cl_event,
) -> ocl_core::ffi::cl_int,
}
impl OpenCLExtensions {
fn new(plat: &ocl_core::PlatformId) -> Result<Self, CUresult> {
let clDeviceMemAllocINTEL = unsafe {
ocl_core::get_extension_function_address_for_platform(
plat,
"clDeviceMemAllocINTEL",
None,
)?
};
let clEnqueueMemcpyINTEL = unsafe {
ocl_core::get_extension_function_address_for_platform(
plat,
"clEnqueueMemcpyINTEL",
None,
)?
};
let clMemBlockingFreeINTEL = unsafe {
ocl_core::get_extension_function_address_for_platform(
plat,
"clMemBlockingFreeINTEL",
None,
)?
};
let clEnqueueMemFillINTEL = unsafe {
ocl_core::get_extension_function_address_for_platform(
plat,
"clEnqueueMemFillINTEL",
None,
)?
};
Ok(Self {
clDeviceMemAllocINTEL: unsafe { mem::transmute(clDeviceMemAllocINTEL) },
clEnqueueMemcpyINTEL: unsafe { mem::transmute(clEnqueueMemcpyINTEL) },
clMemBlockingFreeINTEL: unsafe { mem::transmute(clMemBlockingFreeINTEL) },
clEnqueueMemFillINTEL: unsafe { mem::transmute(clEnqueueMemFillINTEL) },
})
}
pub unsafe fn device_mem_alloc(
&self,
ctx: &ocl_core::Context,
device: &ocl_core::DeviceId,
size: usize,
alignment: ocl_core::ffi::cl_uint,
) -> Result<*mut c_void, CUresult> {
let mut error = 0;
let result = (self.clDeviceMemAllocINTEL)(
ctx.as_ptr(),
device.as_ptr(),
ptr::null(),
size,
alignment,
&mut error,
);
if error == 0 {
Ok(result)
} else {
Err(CUresult::CUDA_ERROR_UNKNOWN)
}
}
pub unsafe fn enqueue_memcpy(
&self,
queue: &ocl_core::CommandQueue,
blocking: bool,
dst: *mut c_void,
src: *const c_void,
size: usize,
) -> Result<(), CUresult> {
let error = (self.clEnqueueMemcpyINTEL)(
queue.as_ptr(),
if blocking { 1 } else { 0 },
dst,
src,
size,
0,
ptr::null(),
ptr::null_mut(),
);
if error == 0 {
Ok(())
} else {
Err(CUresult::CUDA_ERROR_UNKNOWN)
}
}
pub unsafe fn mem_blocking_free(
&self,
ctx: &ocl_core::Context,
mem_ptr: *mut c_void,
) -> Result<(), CUresult> {
let error = (self.clMemBlockingFreeINTEL)(ctx.as_ptr(), mem_ptr);
if error == 0 {
Ok(())
} else {
Err(CUresult::CUDA_ERROR_UNKNOWN)
}
}
pub unsafe fn enqueue_memfill(
&self,
queue: &ocl_core::CommandQueue,
dst: *mut c_void,
pattern: *const c_void,
patternSize: usize,
size: usize,
) -> Result<ocl_core::Event, CUresult> {
let mut signal: ocl_core::ffi::cl_event = ptr::null_mut();
let error = (self.clEnqueueMemFillINTEL)(
queue.as_ptr(),
dst,
pattern,
patternSize,
size,
0,
ptr::null(),
&mut signal,
);
if error == 0 {
Ok(ocl_core::Event::from_raw(signal))
} else {
Err(CUresult::CUDA_ERROR_UNKNOWN)
}
}
}
unsafe impl Send for Device {}
impl Device {
pub fn new(
drv: &l0::Driver,
l0_dev: l0::Device,
platform: ocl_core::PlatformId,
ocl_dev: ocl_core::DeviceId,
idx: usize,
) -> Result<Self, CUresult> {
let ctx = ocl_core::create_context(None, &[ocl_dev], None, None)?;
let ocl_ext = OpenCLExtensions::new(&platform)?;
let mut props = ocl_core::ContextProperties::new();
props.set_platform(platform);
let ctx = ocl_core::create_context(Some(&props), &[ocl_dev], None, None)?;
let queue = ocl_core::create_command_queue(&ctx, ocl_dev, None)?;
let primary_context = context::Context::new(context::ContextData::new());
let primary_context =
context::Context::new(context::ContextData::new(0, true, ptr::null_mut())?);
Ok(Self {
ocl_ext,
index: Index(idx as c_int),
base: l0_dev,
ocl_base: ocl_dev,
@ -55,6 +224,10 @@ impl Device {
})
}
pub fn late_init(&mut self) {
self.primary_context.as_option_mut().unwrap().device = self as *mut _;
}
fn get_properties<'a>(&'a mut self) -> l0::Result<&'a l0::sys::ze_device_properties_t> {
if let Some(ref prop) = self.properties {
return Ok(prop);
@ -207,7 +380,7 @@ pub fn get_attribute(
& l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)
== l0::sys::ze_device_property_flags_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED
{
Ok(1)
Ok::<_, CUresult>(1)
} else {
Ok(0)
}

View file

@ -27,7 +27,7 @@ impl HasLivenessCookie for FunctionData {
pub struct FunctionData {
pub base: ocl_core::Kernel,
pub arg_size: Vec<usize>,
pub arg_size: Vec<(usize, bool)>,
pub use_shared_mem: bool,
pub legacy_args: LegacyArguments,
}
@ -73,14 +73,28 @@ pub fn launch_kernel(
GlobalState::lock_enqueue(hstream, |queue| {
let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?;
if kernel_params != ptr::null_mut() {
for (i, arg_size) in func.arg_size.iter().enumerate() {
unsafe {
ocl_core::set_kernel_arg(
&func.base,
i as u32,
ocl_core::ArgVal::from_raw(*arg_size, *kernel_params.add(i), false),
)?;
};
for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() {
if is_mem {
let error = 0;
unsafe {
ocl_core::ffi::clSetKernelArgSVMPointer(
func.base.as_ptr(),
i as u32,
*(*kernel_params.add(i) as *const _),
)
};
if error != 0 {
panic!("clSetKernelArgSVMPointer");
}
} else {
unsafe {
ocl_core::set_kernel_arg(
&func.base,
i as u32,
ocl_core::ArgVal::from_raw(arg_size, *kernel_params.add(i), is_mem),
)?;
};
}
}
} else {
let mut offset = 0;
@ -102,27 +116,27 @@ pub fn launch_kernel(
match (buffer_size, buffer_ptr) {
(Some(buffer_size), Some(buffer_ptr)) => {
let sum_of_kernel_argument_sizes =
func.arg_size.iter().fold(0, |offset, size_of_arg| {
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
func.arg_size.iter().fold(0, |offset, &(size_of_arg, _)| {
size_of_arg + round_up_to_multiple(offset, size_of_arg)
});
if buffer_size < sum_of_kernel_argument_sizes {
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
}
let mut offset = 0;
for (i, arg_size) in func.arg_size.iter().enumerate() {
let buffer_offset = round_up_to_multiple(offset, *arg_size);
for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() {
let buffer_offset = round_up_to_multiple(offset, arg_size);
unsafe {
ocl_core::set_kernel_arg(
&func.base,
i as u32,
ocl_core::ArgVal::from_raw(
*arg_size,
arg_size,
buffer_ptr.add(buffer_offset) as *const _,
false,
is_mem,
),
)?;
};
offset = buffer_offset + *arg_size;
offset = buffer_offset + arg_size;
}
}
_ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE),

View file

@ -1,60 +1,77 @@
use super::{stream, CUresult, GlobalState};
use std::{ffi::c_void, mem};
use std::{
ffi::c_void,
mem::{self, size_of},
};
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
let ptr = GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
Ok::<_, CUresult>(dev.ocl_context.mem_alloc_device(bytesize, 0, dev.base)?)
Ok::<_, CUresult>(unsafe {
dev.ocl_ext
.device_mem_alloc(&dev.ocl_context, &dev.ocl_base, bytesize, 0)?
})
})??;
unsafe { *dptr = ptr };
Ok(())
}
pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> {
GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
unsafe { cmd_list.append_memory_copy_raw(dst, src, bytesize, Some(signal), wait)? };
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream_data| {
let dev = unsafe { &*(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
unsafe {
dev.ocl_ext
.enqueue_memcpy(queue, true, dst, src, bytesize)?
};
Ok(())
})
})?
}
pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
GlobalState::lock_current_context(|ctx| {
let dev = unsafe { &mut *ctx.device };
Ok::<_, CUresult>(dev.ocl_context.mem_free(ptr)?)
})
.map_err(|_| CUresult::CUDA_ERROR_INVALID_VALUE)?
unsafe { dev.ocl_ext.mem_blocking_free(&dev.ocl_context, ptr)? };
Ok(())
})?
}
pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> {
GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
unsafe {
cmd_list.append_memory_fill_raw(
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
let dev = unsafe { &*(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
let pattern_size = mem::size_of_val(&ui);
let event = unsafe {
dev.ocl_ext.enqueue_memfill(
queue,
dst,
&mut ui as *mut _ as *mut _,
mem::size_of::<u32>(),
mem::size_of::<u32>() * n,
Some(signal),
wait,
)
}?;
&ui as *const _ as *const _,
pattern_size,
pattern_size * n,
)?
};
ocl_core::wait_for_event(&event)?;
Ok(())
})
})?
}
pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> {
GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
unsafe {
cmd_list.append_memory_fill_raw(
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, move |stream_data| {
let dev = unsafe { &*(*stream_data.context).device };
let queue = stream_data.cmd_list.as_ref().unwrap();
let pattern_size = mem::size_of_val(&uc);
let event = unsafe {
dev.ocl_ext.enqueue_memfill(
queue,
dst,
&mut uc as *mut _ as *mut _,
mem::size_of::<u8>(),
mem::size_of::<u8>() * n,
Some(signal),
wait,
)
}?;
&uc as *const _ as *const _,
pattern_size,
pattern_size * n,
)?
};
ocl_core::wait_for_event(&event)?;
Ok(())
})
})?
}
#[cfg(test)]

View file

@ -290,15 +290,7 @@ impl GlobalState {
let l0_dev = unsafe { (*(*stream_data.context).device).base };
let l0_ctx = unsafe { &mut (*(*stream_data.context).device).ocl_context };
let cmd_list = unsafe { transmute_lifetime(&stream_data.cmd_list) };
// TODO: make new_marker drop-safe
let (new_event, new_marker) = stream_data.get_event(l0_dev, l0_ctx)?;
stream_data.try_reuse_finished_events()?;
let prev_event = stream_data.get_last_event();
let prev_event_array = prev_event.map(|e| [e]);
let empty = [];
let prev_event_slice = prev_event_array.as_ref().map_or(&empty[..], |arr| &arr[..]);
f(cmd_list, &new_event, prev_event_slice)?;
stream_data.push_event((new_event, new_marker));
f(&stream_data.cmd_list.as_ref().unwrap())?;
Ok(())
})?
}
@ -350,15 +342,19 @@ pub fn init() -> Result<(), CUresult> {
})
.ok_or(CUresult::CUDA_ERROR_UNKNOWN)?;
let drivers = l0::Driver::get()?;
let devices = match drivers.into_iter().find(is_intel_gpu_driver) {
let mut devices = match drivers.into_iter().find(is_intel_gpu_driver) {
None => return Err(CUresult::CUDA_ERROR_UNKNOWN),
Some(driver) => driver
.devices()?
.into_iter()
.enumerate()
.map(|(idx, l0_dev)| device::Device::new(&driver, l0_dev, device, idx).unwrap())
.map(|(idx, l0_dev)| device::Device::new(l0_dev, platform, device, idx).unwrap())
.collect::<Vec<_>>(),
};
for d in devices.iter_mut() {
d.late_init();
d.primary_context.late_init();
}
let global_heap = unsafe { os::heap_create() };
if global_heap == ptr::null_mut() {
return Err(CUresult::CUDA_ERROR_OUT_OF_MEMORY);

View file

@ -100,8 +100,19 @@ impl SpirvModule {
)
};
let main_module = ocl_core::create_program_with_il(ctx, byte_il, None)?;
match self.should_link_ptx_impl {
let main_module = match self.should_link_ptx_impl {
None => {
ocl_core::build_program(
&main_module,
Some(&[dev]),
&self.build_options,
None,
None,
)?;
main_module
}
Some(ptx_impl) => {
let ptx_impl_prog = ocl_core::create_program_with_il(ctx, ptx_impl, None)?;
ocl_core::compile_program(
&main_module,
Some(&[dev]),
@ -112,20 +123,13 @@ impl SpirvModule {
None,
None,
)?;
}
Some(ptx_impl) => {
let ptx_impl_prog = ocl_core::create_program_with_il(ctx, ptx_impl, None)?;
ocl_core::build_program(
&main_module,
Some(&[dev]),
&self.build_options,
None,
None,
)?;
ocl_core::build_program(
ocl_core::compile_program(
&ptx_impl_prog,
Some(&[dev]),
&self.build_options,
&[],
&[],
None,
None,
None,
)?;
@ -137,7 +141,7 @@ impl SpirvModule {
None,
None,
None,
)?;
)?
}
};
Ok(main_module)

View file

View file

@ -34,118 +34,45 @@ impl HasLivenessCookie for StreamData {
pub struct StreamData {
pub context: *mut ContextData,
// Immediate CommandList
pub cmd_list: l0::CommandList<'static>,
pub busy_events: VecDeque<(l0::Event<'static>, u64)>,
// This could be a Vec, but I'd rather reuse earliest enqueued event not the one recently enqueued
pub free_events: VecDeque<(l0::Event<'static>, u64)>,
pub synchronization_event: (l0::Event<'static>, u64),
pub cmd_list: Option<ocl_core::CommandQueue>,
}
impl StreamData {
pub fn new_unitialized(
ctx: &'static l0::Context,
device: l0::Device,
host_event: (l0::Event<'static>, u64),
) -> Result<Self, CUresult> {
pub fn new_unitialized() -> Result<Self, CUresult> {
Ok(StreamData {
context: ptr::null_mut(),
cmd_list: l0::CommandList::new_immediate(ctx, device)?,
busy_events: VecDeque::new(),
free_events: VecDeque::new(),
synchronization_event: host_event,
cmd_list: None,
})
}
pub fn new(ctx: &mut ContextData) -> Result<Self, CUresult> {
let l0_ctx = &mut unsafe { &mut *ctx.device }.ocl_context;
let device = unsafe { &*ctx.device }.base;
let synchronization_event = unsafe { &mut *ctx.device }
.host_event_pool
.get(device, l0_ctx)?;
let ocl_ctx = &unsafe { &*ctx.device }.ocl_context;
let device = unsafe { &*ctx.device }.ocl_base;
Ok(StreamData {
context: ctx as *mut _,
cmd_list: l0::CommandList::new_immediate(l0_ctx, device)?,
busy_events: VecDeque::new(),
free_events: VecDeque::new(),
synchronization_event,
cmd_list: Some(ocl_core::create_command_queue::<
&ocl_core::Context,
ocl_core::DeviceId,
>(ocl_ctx, device, None)?),
})
}
pub fn try_reuse_finished_events(&mut self) -> l0::Result<()> {
loop {
match self.busy_events.get(0) {
None => return Ok(()),
Some((ev, _)) => {
if ev.is_ready()? {
let (ev, marker) = self.busy_events.pop_front().unwrap();
ev.host_reset()?;
self.free_events.push_back((ev, marker));
} else {
return Ok(());
}
}
}
}
pub fn late_init(&mut self, ctx: &mut ContextData) {
let ocl_ctx = &unsafe { &*ctx.device }.ocl_context;
let device = unsafe { &*ctx.device }.ocl_base;
self.context = ctx as *mut _;
self.cmd_list = Some(
ocl_core::create_command_queue::<&ocl_core::Context, ocl_core::DeviceId>(
ocl_ctx, device, None,
)
.unwrap(),
);
}
pub fn reuse_all_finished_events(&mut self) -> l0::Result<()> {
self.free_events.reserve(self.busy_events.len());
for (ev, marker) in self.busy_events.drain(..) {
ev.host_reset()?;
self.free_events.push_back((ev, marker));
}
pub fn synchronize(&mut self) -> Result<(), CUresult> {
ocl_core::finish(self.cmd_list.as_ref().unwrap())?;
Ok(())
}
pub fn get_last_event(&self) -> Option<&l0::Event<'static>> {
self.busy_events.iter().next_back().map(|(ev, _)| ev)
}
pub fn push_event(&mut self, ev: (l0::Event<'static>, u64)) {
self.busy_events.push_back(ev);
}
pub fn synchronize(&mut self) -> l0::Result<()> {
let empty = [];
let busy_event_arr = self.busy_events.back().map(|(ev, _)| [ev]);
let wait_events = busy_event_arr.as_ref().map_or(&empty[..], |arr| &arr[..]);
unsafe {
self.cmd_list
.append_barrier(Some(&self.synchronization_event.0), wait_events)?
};
self.synchronization_event
.0
.host_synchronize(u64::max_value())?;
self.synchronization_event.0.host_reset()?;
self.reuse_all_finished_events()?;
Ok(())
}
pub fn get_event(
&mut self,
l0_dev: l0::Device,
l0_ctx: &'static l0::Context,
) -> l0::Result<(l0::Event<'static>, u64)> {
self.free_events
.pop_front()
.map(|x| Ok(x))
.unwrap_or_else(|| {
let event_pool = unsafe { &mut (*(*self.context).device).device_event_pool };
event_pool.get(l0_dev, l0_ctx)
})
}
}
impl Drop for StreamData {
fn drop(&mut self) {
if self.context == ptr::null_mut() {
return;
}
for (_, marker) in self.busy_events.iter().chain(self.free_events.iter()) {
let device_event_pool = unsafe { &mut (*(*self.context).device).device_event_pool };
device_event_pool.mark_as_free(*marker);
}
unsafe { (&mut *self.context).streams.remove(&(&mut *self as *mut _)) };
}
}
pub(crate) fn get_ctx(hstream: *mut Stream, pctx: *mut *mut Context) -> Result<(), CUresult> {