diff --git a/level_zero-sys/lib/ze_loader.def b/level_zero-sys/lib/ze_loader.def index 71bc4df..d95ffad 100644 Binary files a/level_zero-sys/lib/ze_loader.def and b/level_zero-sys/lib/ze_loader.def differ diff --git a/level_zero-sys/lib/ze_loader.lib b/level_zero-sys/lib/ze_loader.lib index dfb3f84..525496a 100644 Binary files a/level_zero-sys/lib/ze_loader.lib and b/level_zero-sys/lib/ze_loader.lib differ diff --git a/level_zero/src/ze.rs b/level_zero/src/ze.rs index 16a98a0..30146a2 100644 --- a/level_zero/src/ze.rs +++ b/level_zero/src/ze.rs @@ -781,6 +781,26 @@ impl<'a> CommandList<'a> { Ok(unsafe { Self::from_ffi(result) }) } + pub fn new_immediate(ctx: &'a Context, dev: Device) -> Result { + let queue_desc = sys::ze_command_queue_desc_t { + stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + pNext: ptr::null(), + ordinal: 0, + index: 0, + flags: sys::ze_command_queue_flags_t(0), + mode: sys::ze_command_queue_mode_t::ZE_COMMAND_QUEUE_MODE_DEFAULT, + priority: sys::ze_command_queue_priority_t::ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + }; + let mut result: sys::ze_command_list_handle_t = ptr::null_mut(); + check!(sys::zeCommandListCreateImmediate( + ctx.as_ffi(), + dev.as_ffi(), + &queue_desc, + &mut result + )); + Ok(unsafe { Self::from_ffi(result) }) + } + pub unsafe fn append_memory_copy< 'dep, T: 'a + 'dep + Copy + Sized, diff --git a/zluda/src/impl/function.rs b/zluda/src/impl/function.rs index 2aaab22..2a35512 100644 --- a/zluda/src/impl/function.rs +++ b/zluda/src/impl/function.rs @@ -81,7 +81,7 @@ pub fn launch_kernel( { return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } - GlobalState::lock_stream(hstream, |stream| { + GlobalState::lock_enqueue(hstream, |cmd_list, signal, wait| { let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?; if kernel_params != ptr::null_mut() { for (i, arg_size) in func.arg_size.iter().enumerate() { @@ -144,19 +144,16 @@ pub fn launch_kernel( func.base .set_group_size(block_dim_x, block_dim_y, block_dim_z)?; func.legacy_args.reset(); - let cmd_list = stream.command_list()?; unsafe { cmd_list.append_launch_kernel( &mut func.base, &[grid_dim_x, grid_dim_y, grid_dim_z], - None, - &mut [], + Some(signal), + wait, )?; } - cmd_list.close()?; - stream.queue.execute_and_synchronize(cmd_list)?; - Ok(()) - })? + Ok::<_, CUresult>(()) + }) } fn round_up_to_multiple(x: usize, multiple: usize) -> usize { diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index 81b4f31..56821d1 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -13,7 +13,7 @@ pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> { GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| { unsafe { cmd_list.append_memory_copy_raw(dst, src, bytesize, Some(signal), wait)? }; - Ok::<_, l0::sys::ze_result_t>(()) + Ok(()) }) } @@ -26,41 +26,35 @@ pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> { } pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> { - GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| { - let cmd_list = stream.command_list()?; + GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| { unsafe { cmd_list.append_memory_fill_raw( dst, &mut ui as *mut _ as *mut _, mem::size_of::(), mem::size_of::() * n, - None, - &mut [], + Some(signal), + wait, ) }?; - cmd_list.close()?; - stream.queue.execute_and_synchronize(cmd_list)?; - Ok::<_, CUresult>(()) - })? + Ok(()) + }) } pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> { - GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| { - let cmd_list = stream.command_list()?; + GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| { unsafe { cmd_list.append_memory_fill_raw( dst, &mut uc as *mut _ as *mut _, mem::size_of::(), mem::size_of::() * n, - None, - &mut [], + Some(signal), + wait, ) }?; - cmd_list.close()?; - stream.queue.execute_and_synchronize(cmd_list)?; - Ok::<_, CUresult>(()) - })? + Ok(()) + }) } #[cfg(test)] diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index 2bdd613..f12e964 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -275,17 +275,13 @@ impl GlobalState { fn lock_enqueue( stream: *mut stream::Stream, - f: impl FnOnce( - &mut l0::CommandList, - &l0::Event<'static>, - &[&l0::Event<'static>], - ) -> l0::Result<()>, + f: impl FnOnce(&l0::CommandList, &l0::Event<'static>, &[&l0::Event<'static>]) -> Result<(), CUresult>, ) -> Result<(), CUresult> { Self::lock_stream(stream, |stream_data| { let l0_dev = unsafe { (*(*stream_data.context).device).base }; let l0_ctx = unsafe { &mut (*(*stream_data.context).device).l0_context }; let event_pool = unsafe { &mut (*(*stream_data.context).device).event_pool }; - let mut cmd_list = unsafe { mem::transmute(stream_data.command_list()?) }; + let cmd_list = unsafe { mem::transmute(stream_data.command_list()?) }; stream_data .process_finished_events(&mut |(_, marker)| event_pool.mark_as_free(marker))?; let prev_event = stream_data.get_last_event(); @@ -293,7 +289,7 @@ impl GlobalState { let empty = []; let prev_event_slice = prev_event_array.as_ref().map_or(&empty[..], |arr| &arr[..]); let (new_event, new_marker) = event_pool.get(l0_dev, l0_ctx)?; - f(&mut cmd_list, &new_event, prev_event_slice)?; + f(&cmd_list, &new_event, prev_event_slice)?; cmd_list.close()?; unsafe { stream_data.queue.execute(&cmd_list, None)? }; stream_data.push_event((new_event, new_marker));