diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index abfc3ec..9de0111 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -215,7 +215,7 @@ cuda_function_declarations!( cuLinkCreate_v2, cuMipmappedArrayCreate, cuMipmappedArrayDestroy, - cuMipmappedArrayGetLevel, + cuMipmappedArrayGetLevel ] ); @@ -1249,10 +1249,8 @@ mod definitions { pResDesc: *const CUDA_RESOURCE_DESC, pTexDesc: *const HIP_TEXTURE_DESC, pResViewDesc: *const HIP_RESOURCE_VIEW_DESC, - ) -> hipError_t { - let mut tex_desc = *pTexDesc; - tex_desc.maxMipmapLevelClamp = 0f32; - texobj::create(pTexObject, pResDesc, &tex_desc, pResViewDesc) + ) -> Result<(), CUresult> { + texobj::create(pTexObject, pResDesc, pTexDesc, pResViewDesc) } pub(crate) unsafe fn cuTexObjectDestroy(texObject: hipTextureObject_t) -> hipError_t { @@ -1652,24 +1650,24 @@ mod definitions { } pub(crate) unsafe fn cuMipmappedArrayCreate( - pHandle: *mut hipMipmappedArray_t, + pHandle: *mut CUmipmappedArray, pMipmappedArrayDesc: *const HIP_ARRAY3D_DESCRIPTOR, numMipmapLevels: ::std::os::raw::c_uint, - ) -> hipError_t { + ) -> Result<(), CUresult> { array::mipmapped_create(pHandle, pMipmappedArrayDesc, numMipmapLevels) } pub(crate) unsafe fn cuMipmappedArrayDestroy( - hMipmappedArray: hipMipmappedArray_t, + hMipmappedArray: CUmipmappedArray, ) -> hipError_t { - hipMipmappedArrayDestroy(hMipmappedArray) + array::mipmapped_destroy(hMipmappedArray) } pub(crate) unsafe fn cuMipmappedArrayGetLevel( pLevelArray: *mut CUarray, - hMipmappedArray: hipMipmappedArray_t, + hMipmappedArray: CUmipmappedArray, level: ::std::os::raw::c_uint, - ) -> hipError_t { - hipMipmappedArrayGetLevel(pLevelArray.cast(), hMipmappedArray, level) + ) -> Result<(), CUresult> { + array::mipmapped_get_level(pLevelArray, hMipmappedArray, level) } } diff --git a/zluda/src/impl/array.rs b/zluda/src/impl/array.rs index ab63d2c..76031a9 100644 --- a/zluda/src/impl/array.rs +++ b/zluda/src/impl/array.rs @@ -83,13 +83,55 @@ pub(crate) unsafe fn create( } pub(crate) unsafe fn mipmapped_create( - p_handle: *mut hipMipmappedArray_t, - p_mipmapped_array_desc: *const HIP_ARRAY3D_DESCRIPTOR, + mipmapped_array: *mut CUmipmappedArray, + mipmapped_array_desc: *const HIP_ARRAY3D_DESCRIPTOR, num_mipmap_levels: u32, -) -> hipError_t { - hipMipmappedArrayCreate( - p_handle, - p_mipmapped_array_desc.cast_mut(), - num_mipmap_levels, - ) +) -> Result<(), CUresult> { + if let Some(mipmapped_array_desc) = (mipmapped_array_desc).as_ref() { + let mut mipmapped_array_desc = *mipmapped_array_desc; + let (hack_flag, format) = hipfix::get_non_broken_format(mipmapped_array_desc.Format); + mipmapped_array_desc.Format = format; + let mut hip_array = ptr::null_mut(); + hip_call_cuda!(hipMipmappedArrayCreate( + &mut hip_array, + &mut mipmapped_array_desc, + num_mipmap_levels + )); + if (hip_array as usize & 0b11) != 0 { + hip_call_cuda!(hipMipmappedArrayDestroy(hip_array)); + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); + } + hip_array = (hip_array as usize | hack_flag as usize) as _; + *mipmapped_array = hip_array.cast(); + Ok(()) + } else { + Err(CUresult::CUDA_ERROR_INVALID_VALUE) + } +} + +pub(crate) unsafe fn mipmapped_destroy(mipmapped_array: CUmipmappedArray) -> hipError_t { + let mipmapped_array = hipfix::array::get_mipmapped(mipmapped_array).0; + hipMipmappedArrayDestroy(mipmapped_array) +} + +pub(crate) unsafe fn mipmapped_get_level( + level_array: *mut CUarray, + mipmapped_array: CUmipmappedArray, + level: u32, +) -> Result<(), CUresult> { + let (mipmapped_array, hack_flag) = hipfix::array::get_mipmapped(mipmapped_array); + if let Some(mipmapped_array) = mipmapped_array.as_mut() { + let mut hip_array = mem::zeroed(); + hip_call_cuda!(hipMipmappedArrayGetLevel( + &mut hip_array, + mipmapped_array as *mut _, + level + )); + let hip_array_mut = hip_array.as_mut().ok_or(CUresult::CUDA_ERROR_UNKNOWN)?; + hip_array_mut.textureType = hack_flag; + *level_array = mem::transmute(hip_array); + Ok(()) + } else { + Err(CUresult::CUDA_ERROR_INVALID_VALUE) + } } diff --git a/zluda/src/impl/hipfix.rs b/zluda/src/impl/hipfix.rs index 77fec00..4c02e2d 100644 --- a/zluda/src/impl/hipfix.rs +++ b/zluda/src/impl/hipfix.rs @@ -26,8 +26,8 @@ pub(crate) fn get_non_broken_format(format: hipArray_Format) -> (u32, hipArray_F } #[must_use] -pub(crate) fn get_broken_format(broken: u32, format: hipArray_Format) -> hipArray_Format { - match (broken, format) { +pub(crate) fn get_broken_format(array: &hipArray) -> Option { + Some(match (array.textureType, array.Format) { (2, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => hipArray_Format::HIP_AD_FORMAT_HALF, (1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => { hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16 @@ -35,13 +35,14 @@ pub(crate) fn get_broken_format(broken: u32, format: hipArray_Format) -> hipArra (1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8) => { hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 } - (_, f) => f, - } + (_, _) => return None, + }) } // memcpy3d fails when copying array1d arrays, so we mark all layered arrays by // settings LSB pub(crate) mod array { + use super::get_broken_format; use crate::{ hip_call_cuda, r#impl::{memcpy3d_from_cuda, memory_type_from_cuda, FromCuda}, @@ -51,23 +52,141 @@ pub(crate) mod array { use std::{mem, ptr}; pub(crate) unsafe fn with_resource_desc( - cuda: *const CUDA_RESOURCE_DESC, - fn_: impl FnOnce(*const HIP_RESOURCE_DESC) -> T, - ) -> T { - let cuda = &*cuda; + res_desc: *const CUDA_RESOURCE_DESC, + res_desc_view: *const HIP_RESOURCE_VIEW_DESC, + fn_: impl FnOnce(*const HIP_RESOURCE_DESC, *const HIP_RESOURCE_VIEW_DESC) -> T, + ) -> Result { + let cuda = &*res_desc; if cuda.resType == CUresourcetype::CU_RESOURCE_TYPE_ARRAY { let mut cuda = *cuda; - cuda.res.array.hArray = mem::transmute(get(cuda.res.array.hArray)); - fn_((&cuda as *const CUDA_RESOURCE_DESC).cast::()) + let hip_array = get(cuda.res.array.hArray); + cuda.res.array.hArray = mem::transmute(hip_array); + if let Some(hip_array) = hip_array.as_ref() { + if let Some(format_) = get_broken_format(hip_array) { + return if res_desc_view == ptr::null() { + let res_desc_view = HIP_RESOURCE_VIEW_DESC { + format: resource_view_format(format_, hip_array.NumChannels)?, + width: hip_array.width as usize, + height: hip_array.height as usize, + depth: hip_array.depth as usize, + firstMipmapLevel: 0, + lastMipmapLevel: 0, + firstLayer: 0, + lastLayer: 0, + reserved: mem::zeroed(), + }; + Ok(fn_( + (&cuda as *const CUDA_RESOURCE_DESC).cast::(), + &res_desc_view, + )) + } else { + Err(CUresult::CUDA_ERROR_NOT_SUPPORTED) + }; + } + } + Ok(fn_( + (&cuda as *const CUDA_RESOURCE_DESC).cast::(), + res_desc_view, + )) } else { - fn_((cuda as *const CUDA_RESOURCE_DESC).cast::()) + Ok(fn_( + (cuda as *const CUDA_RESOURCE_DESC).cast::(), + res_desc_view, + )) } } + fn resource_view_format( + format: hipArray_Format, + num_channels: u32, + ) -> Result { + Ok(match (format, num_channels) { + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_1X8 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_2X8 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_4X8 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_1X8 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_2X8 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_4X8 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_1X16 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_2X16 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_4X16 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_1X16 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_2X16 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_4X16 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_1X32 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_2X32 + } + (hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_4X32 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_1X32 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_2X32 + } + (hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_4X32 + } + (hipArray_Format::HIP_AD_FORMAT_HALF, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_1X16 + } + (hipArray_Format::HIP_AD_FORMAT_HALF, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_2X16 + } + (hipArray_Format::HIP_AD_FORMAT_HALF, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_4X16 + } + (hipArray_Format::HIP_AD_FORMAT_FLOAT, 1) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_1X32 + } + (hipArray_Format::HIP_AD_FORMAT_FLOAT, 2) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_2X32 + } + (hipArray_Format::HIP_AD_FORMAT_FLOAT, 4) => { + HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_4X32 + } + _ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), + }) + } + pub(crate) fn get(cuda: CUarray) -> hipArray_t { (cuda as usize & !3usize) as hipArray_t } + pub(crate) fn get_mipmapped(cuda: CUmipmappedArray) -> (hipMipmappedArray_t, u32) { + let array = (cuda as usize & !3usize) as hipMipmappedArray_t; + let broken_flag = (cuda as usize & 3usize) as u32; + (array, broken_flag) + } + pub(crate) fn to_cuda(array: hipArray_t, layered_dims: usize) -> CUarray { let a1d_layered = layered_dims as usize; ((array as usize) | a1d_layered) as CUarray diff --git a/zluda/src/impl/mod.rs b/zluda/src/impl/mod.rs index a36ffc4..d0e58a2 100644 --- a/zluda/src/impl/mod.rs +++ b/zluda/src/impl/mod.rs @@ -216,6 +216,7 @@ impl FromCuda for CUlibraryOption {} impl FromCuda for CUDA_KERNEL_NODE_PARAMS_v1 {} impl FromCuda for CUjitInputType {} impl FromCuda for CUDA_RESOURCE_DESC {} +impl FromCuda for CUmipmappedArray {} impl FromCuda for *mut context::Context {} impl FromCuda for *mut stream::Stream {} @@ -253,7 +254,6 @@ impl FromCuda for hipGraphExec_t {} impl FromCuda for hipGraphicsResource_t {} impl FromCuda for hipLimit_t {} impl FromCuda for hipSurfaceObject_t {} -impl FromCuda for hipMipmappedArray_t {} impl> FromCuda<*mut From> for *mut Into {} impl> FromCuda<*const From> for *const Into {} diff --git a/zluda/src/impl/surface.rs b/zluda/src/impl/surface.rs index fcf9a52..b07b52f 100644 --- a/zluda/src/impl/surface.rs +++ b/zluda/src/impl/surface.rs @@ -109,9 +109,9 @@ fn channel_format_desc( bits.3 *= bit_width; Ok(hipChannelFormatDesc { x: bits.0 as i32, - y: bits.0 as i32, - z: bits.0 as i32, - w: bits.0 as i32, + y: bits.1 as i32, + z: bits.2 as i32, + w: bits.3 as i32, f: kind, }) } diff --git a/zluda/src/impl/texobj.rs b/zluda/src/impl/texobj.rs index 21eb453..0096c74 100644 --- a/zluda/src/impl/texobj.rs +++ b/zluda/src/impl/texobj.rs @@ -1,19 +1,29 @@ +use super::hipfix; +use crate::hip_call_cuda; use cuda_types::*; use hip_runtime_sys::*; use std::ptr; -use super::hipfix; - pub(crate) unsafe fn create( p_tex_object: *mut hipTextureObject_t, p_res_desc: *const CUDA_RESOURCE_DESC, p_tex_desc: *const HIP_TEXTURE_DESC, p_res_view_desc: *const HIP_RESOURCE_VIEW_DESC, -) -> hipError_t { +) -> Result<(), CUresult> { if p_res_desc == ptr::null() { - return hipError_t::hipErrorInvalidValue; + return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } - hipfix::array::with_resource_desc(p_res_desc, |p_res_desc| { - hipTexObjectCreate(p_tex_object, p_res_desc, p_tex_desc, p_res_view_desc) - }) + hipfix::array::with_resource_desc( + p_res_desc, + p_res_view_desc, + |p_res_desc, p_res_view_desc| { + hip_call_cuda!(hipTexObjectCreate( + p_tex_object, + p_res_desc, + p_tex_desc, + p_res_view_desc + )); + Ok(()) + }, + )? } diff --git a/zluda/src/impl/texref.rs b/zluda/src/impl/texref.rs index 307b5ba..b72de09 100644 --- a/zluda/src/impl/texref.rs +++ b/zluda/src/impl/texref.rs @@ -94,7 +94,7 @@ pub(crate) unsafe fn set_array( if let Some(array) = array.as_ref() { hip_call_cuda!(hipTexRefSetFormat( texref, - hipfix::get_broken_format(array.textureType, array.Format), + hipfix::get_broken_format(array).unwrap_or(array.Format), array.NumChannels as i32, )); hip_call_cuda!(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT)); diff --git a/zluda/tests/kernel_sust.rs b/zluda/tests/kernel_sust.rs index 889332e..6b5ef49 100644 --- a/zluda/tests/kernel_sust.rs +++ b/zluda/tests/kernel_sust.rs @@ -425,7 +425,6 @@ unsafe fn kernel_sust_impl< for value in values.iter_mut() { *value = rng.gen::(); } - dbg!(&values); let mut args = vec![ &x as *const _ as *const c_void, &y as *const _ as *const _, diff --git a/zluda/tests/kernel_tex.rs b/zluda/tests/kernel_tex.rs index 6b2d1d3..88e3c4b 100644 --- a/zluda/tests/kernel_tex.rs +++ b/zluda/tests/kernel_tex.rs @@ -213,6 +213,7 @@ generate_tests!( CU_AD_FORMAT_SIGNED_INT8, CU_AD_FORMAT_SIGNED_INT16, CU_AD_FORMAT_SIGNED_INT32, + // TODO: update half crate //CU_AD_FORMAT_HALF, CU_AD_FORMAT_FLOAT ], @@ -337,13 +338,13 @@ const BYTE_FILLER2: u8 = 0xfe; unsafe fn force_transmute(f: From) -> To { if mem::size_of::() == mem::size_of::() - && mem::size_of::() == mem::size_of::() + && mem::size_of::() == mem::size_of::() { return mem::transmute_copy(&f); } - if mem::size_of::() == mem::size_of::() { + if mem::size_of::() == mem::size_of::() { if let Some(value) = ::downcast_ref::(&f) { - return mem::transmute_copy(&((value.to_f64() / f16::MAX.to_f64()) as f32)); + return mem::transmute_copy(&value.to_f32()); } if let Some(value) = ::downcast_ref::(&f) { return mem::transmute_copy(&((*value as f64 / u8::MAX as f64) as f32)); @@ -359,6 +360,9 @@ unsafe fn force_transmute(f: From) -> To { } } if mem::size_of::() == mem::size_of::() { + if let Some(_) = ::downcast_ref::(&f) { + return mem::transmute_copy(&f); + } if let Some(value) = ::downcast_ref::(&f) { return mem::transmute_copy(&f16::from_f64(*value as f64 / u8::MAX as f64)); } diff --git a/zluda/tests/mipmap_texture_to_surface.rs b/zluda/tests/mipmap_texture_to_surface.rs index 00ba4a6..e968e8c 100644 --- a/zluda/tests/mipmap_texture_to_surface.rs +++ b/zluda/tests/mipmap_texture_to_surface.rs @@ -4,6 +4,10 @@ use std::{mem, ptr}; mod common; +// TODO: These two tests expose various random brokenness of mipmapped array +// and texture objects. This should be turned into extensive tests like +// kernel_sust/kernel_suld/kernel_tex + cuda_driver_test!(mipmap_texture_to_surface); unsafe fn mipmap_texture_to_surface(cuda: T) { @@ -28,7 +32,7 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { Depth: 0, Format: CUarray_format::CU_AD_FORMAT_HALF, NumChannels: 4, - Flags: 0, + Flags: 2, }; assert_eq!( cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8), @@ -163,3 +167,166 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { ); assert_eq!(&pixels, &memcpy_dst); } + +cuda_driver_test!(mipmap_texture_to_surface2); + +unsafe fn mipmap_texture_to_surface2(cuda: T) { + let kernel = include_str!("mipmap_texture_to_surface.ptx"); + let mut kernel = kernel.to_owned(); + kernel.push('\0'); + assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!( + cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); + let mut module = ptr::null_mut(); + assert_eq!( + cuda.cuModuleLoadData(&mut module, kernel.as_ptr() as _), + CUresult::CUDA_SUCCESS + ); + let mut array_0 = mem::zeroed(); + let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR { + Width: 1368, + Height: 770, + Depth: 0, + Format: CUarray_format::CU_AD_FORMAT_HALF, + NumChannels: 4, + Flags: 2, + }; + assert_eq!( + cuda.cuArray3DCreate_v2(&mut array_0, &mipmap_desc), + CUresult::CUDA_SUCCESS + ); + let mut array_1 = mem::zeroed(); + let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR { + Width: 1368 / 2, + Height: 770 / 2, + Depth: 0, + Format: CUarray_format::CU_AD_FORMAT_HALF, + NumChannels: 4, + Flags: 2, + }; + assert_eq!( + cuda.cuArray3DCreate_v2(&mut array_1, &mipmap_desc), + CUresult::CUDA_SUCCESS + ); + let mut pixels = [0x3C66u16, 0x4066, 0x4299, 4466]; + let memcpy_from_host = CUDA_MEMCPY2D { + srcXInBytes: 0, + srcY: 0, + srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST, + srcHost: pixels.as_mut_ptr() as _, + srcDevice: CUdeviceptr_v2(ptr::null_mut()), + srcArray: ptr::null_mut(), + srcPitch: 4 * mem::size_of::(), + dstXInBytes: 0, + dstY: 0, + dstMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY, + dstHost: ptr::null_mut(), + dstDevice: CUdeviceptr_v2(ptr::null_mut()), + dstArray: array_0, + dstPitch: 0, + WidthInBytes: 4 * mem::size_of::(), + Height: 1, + }; + assert_eq!( + cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host), + CUresult::CUDA_SUCCESS + ); + let mut func = mem::zeroed(); + assert_eq!( + cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()), + CUresult::CUDA_SUCCESS + ); + let mut texture = mem::zeroed(); + let texture_resource_desc = CUDA_RESOURCE_DESC { + resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY, + res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 { + array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_0 }, + }, + flags: 0, + }; + let texture_desc = CUDA_TEXTURE_DESC { + addressMode: [ + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + ], + filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR, + flags: 2, + maxAnisotropy: 0, + mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_POINT, + mipmapLevelBias: 0f32, + minMipmapLevelClamp: 0f32, + maxMipmapLevelClamp: 0f32, + borderColor: [0f32, 0f32, 0f32, 0f32], + reserved: mem::zeroed(), + }; + assert_eq!( + cuda.cuTexObjectCreate( + &mut texture, + &texture_resource_desc, + &texture_desc, + ptr::null() + ), + CUresult::CUDA_SUCCESS + ); + let mut surface = mem::zeroed(); + let surface_resource_desc = CUDA_RESOURCE_DESC { + resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY, + res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 { + array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_1 }, + }, + flags: 0, + }; + assert_eq!( + cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc), + CUresult::CUDA_SUCCESS + ); + let mut params = [&mut texture, &mut surface]; + assert_eq!( + cuda.cuLaunchKernel( + func, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + ptr::null_mut(), + params.as_mut_ptr().cast(), + ptr::null_mut(), + ), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuStreamSynchronize(ptr::null_mut()), + CUresult::CUDA_SUCCESS + ); + let mut memcpy_dst = [u16::MAX; 4]; + let memcpy_to_host = CUDA_MEMCPY2D { + srcXInBytes: 0, + srcY: 0, + srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY, + srcHost: ptr::null(), + srcDevice: CUdeviceptr_v2(ptr::null_mut()), + srcArray: array_1, + srcPitch: 0, + dstXInBytes: 0, + dstY: 0, + dstMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST, + dstHost: memcpy_dst.as_mut_ptr() as _, + dstDevice: CUdeviceptr_v2(ptr::null_mut()), + dstArray: ptr::null_mut(), + dstPitch: 4 * mem::size_of::(), + WidthInBytes: 4 * mem::size_of::(), + Height: 1, + }; + assert_eq!( + cuda.cuMemcpy2DUnaligned_v2(&memcpy_to_host), + CUresult::CUDA_SUCCESS + ); + assert_eq!(&pixels, &memcpy_dst); +}