diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 7012fd4..1edcbd5 100644 Binary files a/ptx/lib/zluda_ptx_impl.bc and b/ptx/lib/zluda_ptx_impl.bc differ diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index d0d54d5..ecbe691 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -317,6 +317,102 @@ static __device__ void image_store_pck(T value, typename Coordinates::type } } +template +static __device__ T image_load_pck(typename Coordinates::type coord, surface_ptr surface) +{ + if constexpr (sizeof(T) <= sizeof(uint)) + { + uint data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return transmute(data); + } + else if constexpr (sizeof(T) == sizeof(uint2::Native_vec_)) + { + uint2::Native_vec_ data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return transmute(data); + } + else if constexpr (sizeof(T) == sizeof(uint4::Native_vec_)) + { + uint4::Native_vec_ data; + if constexpr (geo == ImageGeometry::_1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::_3D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute(coord)), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A1D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else if constexpr (geo == ImageGeometry::A2D) + { + asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory"); + } + else + { + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); + } + return transmute(data); + } + else + { + static_assert(sizeof(T) == 0, "Invalid vector size"); + } +} + template static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates::type coord, surface_ptr surface) { @@ -327,8 +423,7 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates::typ surface_ptr surface = get_surface_pointer(surf_arg); uint32_t shift_x = x_coordinate_shift(surf_arg); coord.x = coord.x >> shift_x; - uint4::Native_vec_ pixel = image_load_pck_full(coord, surface); - return transmute(pixel); + return image_load_pck(coord, surface); } template diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 4ffe3b0..1085258 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -2934,7 +2934,7 @@ fn replace_instructions_with_builtins_impl<'input>( vector, "_", suld.type_.to_ptx_name(), - "_trap", + "_zero", ] .concat(); statements.push(instruction_to_fn_call( diff --git a/zluda/src/impl/surface.rs b/zluda/src/impl/surface.rs index cb7ec2b..0f24fa3 100644 --- a/zluda/src/impl/surface.rs +++ b/zluda/src/impl/surface.rs @@ -22,10 +22,19 @@ pub(crate) unsafe fn create( } let mut surf_obj = mem::zeroed(); hip_call_cuda!(hipCreateSurfaceObject(&mut surf_obj, &desc)); + let top_reserved_bits = surf_obj as usize >> (usize::BITS - IMAGE_RESERVED_TOP_BITS); + if top_reserved_bits != 0 { + #[allow(unused_must_use)] + { + hipDestroySurfaceObject(surf_obj); + } + return Err(CUresult::CUDA_ERROR_UNKNOWN); + } let format_size = format_size((&*desc.res.array.array).Format)?; let channels = (&*desc.res.array.array).NumChannels; let pixel_size = format_size * channels as usize; - let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - IMAGE_RESERVED_TOP_BITS); + let shift_amount = + (pixel_size.trailing_zeros() as usize) << (usize::BITS - IMAGE_RESERVED_TOP_BITS); surf_obj = (surf_obj as usize | shift_amount) as _; *result = surf_obj; Ok(()) diff --git a/zluda/tests/kernel_suld.rs b/zluda/tests/kernel_suld.rs index ad6e964..07fc560 100644 --- a/zluda/tests/kernel_suld.rs +++ b/zluda/tests/kernel_suld.rs @@ -340,10 +340,6 @@ unsafe fn kernel_suld_impl< if mem::size_of::() * CHANNELS < mem::size_of::() * SULD_N { return; } - // TODO: reenable those tests - if mem::size_of::() != mem::size_of::() || CHANNELS != SULD_N { - return; - } let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(seed); let size = 4usize; let random_size = rand::distributions::Uniform::::new(1, size as u32);