diff --git a/level_zero-sys/lib/ze_loader.def b/level_zero-sys/lib/ze_loader.def new file mode 100644 index 0000000..71bc4df Binary files /dev/null and b/level_zero-sys/lib/ze_loader.def differ diff --git a/level_zero-sys/lib/ze_loader.lib b/level_zero-sys/lib/ze_loader.lib index 661240c..dfb3f84 100644 Binary files a/level_zero-sys/lib/ze_loader.lib and b/level_zero-sys/lib/ze_loader.lib differ diff --git a/level_zero/src/ze.rs b/level_zero/src/ze.rs index c56321a..ce675eb 100644 --- a/level_zero/src/ze.rs +++ b/level_zero/src/ze.rs @@ -270,7 +270,7 @@ impl Module { }; match ocl_core::get_program_info(&ocl_program, ocl_core::ProgramInfo::Binaries) { Ok(ocl_core::ProgramInfoResult::Binaries(binaries)) => { - let (module, build_log) = Self::build_native(ctx, d, &binaries[0]); + let (module, build_log) = Self::build_native_logged(ctx, d, &binaries[0]); (module, Some(build_log)) } _ => return (Err(sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN), None), @@ -346,12 +346,21 @@ impl Module { d: &Device, bin: &[u8], opts: Option<&CStr>, - ) -> (Result, BuildLog) { + ) -> Result { Module::new(ctx, true, d, bin, opts) } - pub fn build_native(ctx: &mut Context, d: &Device, bin: &[u8]) -> (Result, BuildLog) { - Module::new(ctx, false, d, bin, None) + pub fn build_spirv_logged( + ctx: &mut Context, + d: &Device, + bin: &[u8], + opts: Option<&CStr>, + ) -> (Result, BuildLog) { + Module::new_logged(ctx, true, d, bin, opts) + } + + pub fn build_native_logged(ctx: &mut Context, d: &Device, bin: &[u8]) -> (Result, BuildLog) { + Module::new_logged(ctx, false, d, bin, None) } fn new( @@ -360,6 +369,35 @@ impl Module { d: &Device, bin: &[u8], opts: Option<&CStr>, + ) -> Result { + let desc = sys::ze_module_desc_t { + stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_MODULE_DESC, + pNext: ptr::null(), + format: if spirv { + sys::ze_module_format_t::ZE_MODULE_FORMAT_IL_SPIRV + } else { + sys::ze_module_format_t::ZE_MODULE_FORMAT_NATIVE + }, + inputSize: bin.len(), + pInputModule: bin.as_ptr(), + pBuildFlags: opts.map(|s| s.as_ptr() as *const _).unwrap_or(ptr::null()), + pConstants: ptr::null(), + }; + let mut result: sys::ze_module_handle_t = ptr::null_mut(); + let err = unsafe { sys::zeModuleCreate(ctx.0, d.0, &desc, &mut result, ptr::null_mut()) }; + if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS { + Result::Err(err) + } else { + Ok(Module(result)) + } + } + + fn new_logged( + ctx: &mut Context, + spirv: bool, + d: &Device, + bin: &[u8], + opts: Option<&CStr>, ) -> (Result, BuildLog) { let desc = sys::ze_module_desc_t { stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_MODULE_DESC, diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 3976c76..7c790eb 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -116,7 +116,7 @@ test_ptx!(cos, [std::f32::consts::PI], [-1f32]); test_ptx!(lg2, [512f32], [9f32]); test_ptx!(ex2, [10f32], [1024f32]); test_ptx!(cvt_rni, [9.5f32, 10.5f32], [10f32, 10f32]); -test_ptx!(cvt_rzi, [-13.8f32, 12.9f32], [-13f32, 13f32]); +test_ptx!(cvt_rzi, [-13.8f32, 12.9f32], [-13f32, 12f32]); test_ptx!(cvt_s32_f32, [-13.8f32, 12.9f32], [-13i32, 13i32]); test_ptx!(clz, [0b00000101_00101101_00010011_10101011u32], [5u32]); test_ptx!(popc, [0b10111100_10010010_01001001_10001010u32], [14u32]); @@ -225,7 +225,7 @@ fn run_spirv< Some(module.build_options.as_c_str()), ), None => { - let (module, log) = ze::Module::build_spirv( + let (module, log) = ze::Module::build_spirv_logged( &mut ctx, &dev, byte_il, diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 469f8f3..1eb08d5 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -2454,6 +2454,7 @@ pub extern "C" fn cuModuleLoadData( r#impl::module::load_data(module.decuda(), image).encuda() } +// TODO: parse jit options #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuModuleLoadDataEx( module: *mut CUmodule, @@ -2462,7 +2463,7 @@ pub extern "C" fn cuModuleLoadDataEx( options: *mut CUjit_option, optionValues: *mut *mut ::std::os::raw::c_void, ) -> CUresult { - r#impl::unimplemented() + r#impl::module::load_data(module.decuda(), image).encuda() } #[cfg_attr(not(test), no_mangle)] @@ -2736,6 +2737,16 @@ pub extern "C" fn cuMemcpyHtoD_v2( r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda() } +// TODO: implement default stream semantics +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyHtoD_v2_ptds( + dstDevice: CUdeviceptr, + srcHost: *const ::std::os::raw::c_void, + ByteCount: usize, +) -> CUresult { + r#impl::memory::copy_v2(dstDevice.decuda(), srcHost, ByteCount).encuda() +} + #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoH_v2( dstHost: *mut ::std::os::raw::c_void, @@ -2745,6 +2756,16 @@ pub extern "C" fn cuMemcpyDtoH_v2( r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda() } +// TODO: implement default stream semantics +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemcpyDtoH_v2_ptds( + dstHost: *mut ::std::os::raw::c_void, + srcDevice: CUdeviceptr, + ByteCount: usize, +) -> CUresult { + r#impl::memory::copy_v2(dstHost, srcDevice.decuda(), ByteCount).encuda() +} + #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemcpyDtoD_v2( dstDevice: CUdeviceptr, @@ -2926,6 +2947,16 @@ pub extern "C" fn cuMemsetD8_v2( r#impl::memory::set_d8_v2(dstDevice.decuda(), uc, N).encuda() } +// TODO: implement default stream semantics +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD8_v2_ptds( + dstDevice: CUdeviceptr, + uc: ::std::os::raw::c_uchar, + N: usize, +) -> CUresult { + r#impl::memory::set_d8_v2(dstDevice.decuda(), uc, N).encuda() +} + #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD16_v2( dstDevice: CUdeviceptr, @@ -2944,6 +2975,16 @@ pub extern "C" fn cuMemsetD32_v2( r#impl::memory::set_d32_v2(dstDevice.decuda(), ui, N).encuda() } +// TODO: implement default stream semantics +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuMemsetD32_v2_ptds( + dstDevice: CUdeviceptr, + ui: ::std::os::raw::c_uint, + N: usize, +) -> CUresult { + r#impl::memory::set_d32_v2(dstDevice.decuda(), ui, N).encuda() +} + #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuMemsetD2D8_v2( dstDevice: CUdeviceptr, @@ -3322,6 +3363,12 @@ pub extern "C" fn cuStreamGetCtx(hStream: CUstream, pctx: *mut CUcontext) -> CUr r#impl::stream::get_ctx(hStream.decuda(), pctx.decuda()).encuda() } +// TODO: implement default stream semantics +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuStreamGetCtx_ptsz(hStream: CUstream, pctx: *mut CUcontext) -> CUresult { + r#impl::stream::get_ctx(hStream.decuda(), pctx.decuda()).encuda() +} + #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuStreamWaitEvent( hStream: CUstream, @@ -3630,6 +3677,37 @@ pub extern "C" fn cuLaunchKernel( .encuda() } +// TODO: implement default stream semantics +#[cfg_attr(not(test), no_mangle)] +pub extern "C" fn cuLaunchKernel_ptsz( + f: CUfunction, + gridDimX: ::std::os::raw::c_uint, + gridDimY: ::std::os::raw::c_uint, + gridDimZ: ::std::os::raw::c_uint, + blockDimX: ::std::os::raw::c_uint, + blockDimY: ::std::os::raw::c_uint, + blockDimZ: ::std::os::raw::c_uint, + sharedMemBytes: ::std::os::raw::c_uint, + hStream: CUstream, + kernelParams: *mut *mut ::std::os::raw::c_void, + extra: *mut *mut ::std::os::raw::c_void, +) -> CUresult { + r#impl::function::launch_kernel( + f.decuda(), + gridDimX, + gridDimY, + gridDimZ, + blockDimX, + blockDimY, + blockDimZ, + sharedMemBytes, + hStream.decuda(), + kernelParams, + extra, + ) + .encuda() +} + #[cfg_attr(not(test), no_mangle)] pub extern "C" fn cuLaunchCooperativeKernel( f: CUfunction, diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index bdfcd86..98580f8 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -87,7 +87,7 @@ impl SpirvModule { }; let l0_module = match self.should_link_ptx_impl { None => { - l0::Module::build_spirv(ctx, dev, byte_il, Some(self.build_options.as_c_str())).0 + l0::Module::build_spirv(ctx, dev, byte_il, Some(self.build_options.as_c_str())) } Some(ptx_impl) => { l0::Module::build_link_spirv(