diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 1d13514..c014e58 100644 Binary files a/ptx/lib/zluda_ptx_impl.bc and b/ptx/lib/zluda_ptx_impl.bc differ diff --git a/ptx/lib/zluda_ptx_impl.cpp b/ptx/lib/zluda_ptx_impl.cpp index 0330ba3..d023d30 100644 --- a/ptx/lib/zluda_ptx_impl.cpp +++ b/ptx/lib/zluda_ptx_impl.cpp @@ -11,6 +11,7 @@ // https://llvm.org/docs/AMDGPUUsage.html #include +#include #include #define HIP_NO_HALF #include @@ -162,13 +163,24 @@ typedef uint8 CONSTANT_SPACE *surface_ptr; template static __device__ To transmute(From f) { - union + if constexpr (sizeof(To) == sizeof(From)) { - To t; - From f; - } u = {To{0}}; - u.f = f; - return u.t; + return std::bit_cast(f); + } + else if constexpr (sizeof(To) > sizeof(From)) + { + union + { + To t; + From f; + } u = {To{0}}; + u.f = f; + return u.t; + } + else + { + static_assert(sizeof(To) == 0); + } } enum class ImageGeometry @@ -231,10 +243,10 @@ static __device__ void image_store_pck(T value, typename Coordinates::type } else { - static_assert(!std::is_same::value, "Invalid geometry"); + static_assert(sizeof(T) == 0, "Invalid geometry"); } } - else if constexpr (sizeof(T) == sizeof(uint2)) + else if constexpr (sizeof(T) == sizeof(uint2::Native_vec_)) { uint2::Native_vec_ value_dword2 = transmute(value); if constexpr (geo == ImageGeometry::_1D) @@ -259,10 +271,10 @@ static __device__ void image_store_pck(T value, typename Coordinates::type } else { - static_assert(!std::is_same::value, "Invalid geometry"); + static_assert(sizeof(T) == 0, "Invalid geometry"); } } - else if constexpr (sizeof(T) == sizeof(uint4)) + else if constexpr (sizeof(T) == sizeof(uint4::Native_vec_)) { uint4::Native_vec_ value_dword4 = transmute(value); if constexpr (geo == ImageGeometry::_1D) @@ -287,12 +299,12 @@ static __device__ void image_store_pck(T value, typename Coordinates::type } else { - static_assert(!std::is_same::value, "Invalid geometry"); + static_assert(sizeof(T) == 0, "Invalid geometry"); } } else { - static_assert(!std::is_same::value, "Invalid vector size"); + static_assert(sizeof(T) == 0, "Invalid vector size"); } } @@ -323,7 +335,7 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates::value, "Invalid geometry"); + static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry"); } return data; } @@ -331,14 +343,36 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates static __device__ void image_store_pck_full_with(uint4::Native_vec_ data, T value, typename Coordinates::type coord, surface_ptr surface) { - union + // We avoid unions for types smaller than sizeof(uint32_t), + // because in those cases we get this garbage: + // ds_write_b128 v2, v[5:8] + // ds_write_b16 v2, v9 + // ds_read_b128 v[5:8], v2 + // tested with ROCm 5.7.1 + if constexpr (sizeof(T) == sizeof(uint8_t)) { - uint4::Native_vec_ full_vec; - T value; - } u = {0}; - u.full_vec = data; - u.value = value; - image_store_pck(u.full_vec, coord, surface); + uint32_t x = uint32_t(std::bit_cast(value)); + uint32_t data_0 = ((data[0]) >> 8) << 8; + data[0] = data_0 | x; + } + else if constexpr (sizeof(T) == sizeof(uint16_t)) + { + uint32_t x = uint32_t(std::bit_cast(value)); + uint32_t data_0 = ((data[0]) >> 16) << 16; + data[0] = data_0 | x; + } + else + { + union + { + uint4::Native_vec_ full_vec; + T value; + } u = {0}; + u.full_vec = data; + u.value = value; + data = u.full_vec; + } + image_store_pck(data, coord, surface); } constexpr auto IMAGE_RESERVED_TOP_BITS = 3; @@ -348,13 +382,45 @@ static __device__ surface_ptr get_surface_pointer(uint64_t s) return (surface_ptr)((s << IMAGE_RESERVED_TOP_BITS) >> IMAGE_RESERVED_TOP_BITS); } +static __device__ surface_ptr get_surface_pointer(struct textureReference GLOBAL_SPACE *surf_ref) +{ + return (surface_ptr)(surf_ref->textureObject); +} + static __device__ uint32_t x_coordinate_shift(uint64_t s) { return uint32_t(s >> (64 - IMAGE_RESERVED_TOP_BITS)); } -template -static __device__ void sust_b_indirect_zero(uint64_t surf_arg, typename Coordinates::type coord, T data) +static __device__ uint32_t x_coordinate_shift(struct textureReference GLOBAL_SPACE *ptr) +{ + uint32_t channels = uint32_t(ptr->numChannels); + uint32_t format_width = 0; + hipArray_Format format = ptr->format; + switch (format) + { + case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8: + case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8: + format_width = 1; + break; + case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16: + case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16: + case hipArray_Format::HIP_AD_FORMAT_HALF: + format_width = 2; + break; + case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32: + case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32: + case hipArray_Format::HIP_AD_FORMAT_FLOAT: + format_width = 4; + break; + default: + __builtin_unreachable(); + } + return uint32_t(__builtin_ctz(format_width * channels)); +} + +template +static __device__ void sust_b_indirect_zero(Surface surf_arg, typename Coordinates::type coord, T data) { surface_ptr surface = get_surface_pointer(surf_arg); uint32_t shift_x = x_coordinate_shift(surf_arg); @@ -835,34 +901,6 @@ extern "C" suld_b_a2d_vec(_v4, b32, uint4); // suld_b_a2d_vec(_v4, b64, ulong4); - static __device__ uint64_t repack(struct textureReference GLOBAL_SPACE *ptr) - { - uint32_t channels = uint32_t(ptr->numChannels); - uint32_t format_width = 0; - hipArray_Format format = ptr->format; - switch (format) - { - case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8: - case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8: - format_width = 1; - break; - case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16: - case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16: - case hipArray_Format::HIP_AD_FORMAT_HALF: - format_width = 2; - break; - case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32: - case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32: - case hipArray_Format::HIP_AD_FORMAT_FLOAT: - format_width = 4; - break; - default: - __builtin_unreachable(); - } - uint64_t shift_size = uint64_t(__builtin_ctz(format_width * channels)); - return uint64_t(ptr->textureObject) | (shift_size << (64 - 3)); - } - #define SUST_B_ZERO(TYPE, GEOMETRY, HIP_TYPE) \ void FUNC(sust_b_indirect_##TYPE##_zero)(uint64_t surf_arg, typename Coordinates::type coord, HIP_TYPE::Native_vec_ data) \ { \ @@ -870,8 +908,7 @@ extern "C" } \ void FUNC(sust_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, typename Coordinates::type coord, HIP_TYPE::Native_vec_ data) \ { \ - FUNC_CALL(sust_b_indirect_##TYPE##_zero) \ - (repack(ptr), coord, data); \ + sust_b_indirect_zero(ptr, coord, data); \ } #define SUST_B_ZERO_ARRAY(TYPE, GEOMETRY, HIP_TYPE) \ @@ -882,8 +919,8 @@ extern "C" } \ void FUNC(sust_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, uint32_t layer, typename Coordinates::arg_type coord, HIP_TYPE::Native_vec_ data) \ { \ - FUNC_CALL(sust_b_indirect_##TYPE##_zero) \ - (repack(ptr), layer, coord, data); \ + auto coord_array = Coordinates::pack_layer(layer, coord); \ + sust_b_indirect_zero(ptr, coord_array, data); \ } SUST_B_ZERO(1d_b8, ImageGeometry::_1D, uchar1); @@ -942,180 +979,6 @@ extern "C" SUST_B_ZERO_ARRAY(a2d_v4_b16, ImageGeometry::A2D, ushort4); SUST_B_ZERO_ARRAY(a2d_v4_b32, ImageGeometry::A2D, uint4); -#define sust_b_1d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_1d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int1::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_1D(i, byte_coord, tmp); \ - } \ - void FUNC(sust_b_indirect_1d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int1::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - surf1Dwrite(hip_data, surfObj, coord.x); \ - } - - sust_b_1d_vec(, b8, uchar1); - sust_b_1d_vec(, b16, ushort1); - sust_b_1d_vec(, b32, uint1); - // sust_b_1d_vec(, b64, ulong1); - sust_b_1d_vec(_v2, b8, uchar2); - sust_b_1d_vec(_v2, b16, ushort2); - sust_b_1d_vec(_v2, b32, uint2); - // sust_b_1d_vec(_v2, b64, ulong2); - sust_b_1d_vec(_v4, b8, uchar4); - sust_b_1d_vec(_v4, b16, ushort4); - sust_b_1d_vec(_v4, b32, uint4); - // sust_b_1d_vec(_v4, b64, ulong4); - -#define sust_b_2d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_2d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int2::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_2D(i, int2(byte_coord, coord.y).data, tmp); \ - } \ - void FUNC(sust_b_indirect_2d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int2::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - surf2Dwrite(hip_data, surfObj, coord.x, coord.y); \ - } - - sust_b_2d_vec(, b8, uchar1); - sust_b_2d_vec(, b16, ushort1); - sust_b_2d_vec(, b32, uint1); - // sust_b_2d_vec(, b64, ulong1); - sust_b_2d_vec(_v2, b8, uchar2); - sust_b_2d_vec(_v2, b16, ushort2); - sust_b_2d_vec(_v2, b32, uint2); - // sust_b_2d_vec(_v2, b64, ulong2); - sust_b_2d_vec(_v4, b8, uchar4); - sust_b_2d_vec(_v4, b16, ushort4); - sust_b_2d_vec(_v4, b32, uint4); - // sust_b_2d_vec(_v4, b64, ulong4); - -#define sust_b_3d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_3d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int4::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_3D(i, int4(byte_coord, coord.y, coord.z, 0).data, tmp); \ - } \ - void FUNC(sust_b_indirect_3d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int4::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - __HIP_SURFACE_OBJECT_PARAMETERS_INIT; \ - int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_3D(i, int4(byte_coord, coord.y, coord.z, 0).data, tmp); \ - } - - sust_b_3d_vec(, b8, uchar1); - sust_b_3d_vec(, b16, ushort1); - sust_b_3d_vec(, b32, uint1); - // sust_b_3d_vec(, b64, ulong1); - sust_b_3d_vec(_v2, b8, uchar2); - sust_b_3d_vec(_v2, b16, ushort2); - sust_b_3d_vec(_v2, b32, uint2); - // sust_b_3d_vec(_v2, b64, ulong2); - sust_b_3d_vec(_v4, b8, uchar4); - sust_b_3d_vec(_v4, b16, ushort4); - sust_b_3d_vec(_v4, b32, uint4); - // sust_b_3d_vec(_v4, b64, ulong4); - -#define sust_b_a1d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_a1d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, uint layer, int x, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1Da(i), __ockl_image_channel_order_1Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_1Da(i, int2(byte_coord, int(layer)).data, tmp); \ - } \ - void FUNC(sust_b_indirect_a1d##VEC##_##TYPE##_trap)(uint64_t serf_arg, uint layer, int x, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - __HIP_SURFACE_OBJECT_PARAMETERS_INIT; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1Da(i), __ockl_image_channel_order_1Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_1Da(i, int2(byte_coord, int(layer)).data, tmp); \ - } - - sust_b_a1d_vec(, b8, uchar1); - sust_b_a1d_vec(, b16, ushort1); - sust_b_a1d_vec(, b32, uint1); - // sust_b_a1d_vec(, b64, ulong1); - sust_b_a1d_vec(_v2, b8, uchar2); - sust_b_a1d_vec(_v2, b16, ushort2); - sust_b_a1d_vec(_v2, b32, uint2); - // sust_b_a1d_vec(_v2, b64, ulong2); - sust_b_a1d_vec(_v4, b8, uchar4); - sust_b_a1d_vec(_v4, b16, ushort4); - sust_b_a1d_vec(_v4, b32, uint4); - // sust_b_a1d_vec(_v4, b64, ulong4); - -#define sust_b_a2d_vec(VEC, TYPE, HIP_TYPE) \ - void FUNC(sust_b_a2d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, uint layer, int x, int y, HIP_TYPE::Native_vec_ data) \ - { \ - hipTextureObject_t textureObject = ptr->textureObject; \ - TEXTURE_OBJECT_PARAMETERS_INIT; \ - (void)s; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2Da(i), __ockl_image_channel_order_2Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_2Da(i, int4(byte_coord, y, int(layer), 0).data, tmp); \ - } \ - void FUNC(sust_b_indirect_a2d##VEC##_##TYPE##_trap)(uint64_t serf_arg, uint layer, int x, int y, HIP_TYPE::Native_vec_ data) \ - { \ - hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \ - __HIP_SURFACE_OBJECT_PARAMETERS_INIT; \ - int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2Da(i), __ockl_image_channel_order_2Da(i)); \ - HIP_TYPE hip_data; \ - hip_data.data = data; \ - auto tmp = __pack_to_float4(hip_data); \ - __ockl_image_store_2Da(i, int4(byte_coord, y, int(layer), 0).data, tmp); \ - } - - sust_b_a2d_vec(, b8, uchar1); - sust_b_a2d_vec(, b16, ushort1); - sust_b_a2d_vec(, b32, uint1); - // sust_b_a2d_vec(, b64, ulong1); - sust_b_a2d_vec(_v2, b8, uchar2); - sust_b_a2d_vec(_v2, b16, ushort2); - sust_b_a2d_vec(_v2, b32, uint2); - // sust_b_a2d_vec(_v2, b64, ulong2); - sust_b_a2d_vec(_v4, b8, uchar4); - sust_b_a2d_vec(_v4, b16, ushort4); - sust_b_a2d_vec(_v4, b32, uint4); - // sust_b_a2d_vec(_v4, b64, ulong4); - __device__ static inline bool is_upper_warp() { return FUNC_CALL(COMPILATION_MODE) == CompilationMode::DoubleWave32OnWave64 && __lane_id() >= 32; diff --git a/zluda/src/impl/surface.rs b/zluda/src/impl/surface.rs index 519e76e..5d11f7b 100644 --- a/zluda/src/impl/surface.rs +++ b/zluda/src/impl/surface.rs @@ -1,10 +1,11 @@ +use super::hipfix; +use crate::hip_call_cuda; use cuda_types::*; use hip_runtime_sys::*; use std::{mem, ptr}; -use crate::hip_call_cuda; - -use super::{hipfix, FromCuda}; +// Same as in zluda_ptx_impl.cpp +const IMAGE_RESERVED_TOP_BITS: u32 = 3; pub(crate) unsafe fn create( result: *mut hipSurfaceObject_t, @@ -14,23 +15,24 @@ pub(crate) unsafe fn create( return Err(CUresult::CUDA_ERROR_INVALID_VALUE); } let desc = to_surface_desc(*p_res_desc)?; + // We need to check array format and channel count to set top bits of the surface object. + // HIP does not support non-Array sources anyway + if desc.resType != hipResourceType::hipResourceTypeArray { + return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED); + } let mut surf_obj = mem::zeroed(); hip_call_cuda!(hipCreateSurfaceObject(&mut surf_obj, &desc)); - if desc.resType != hipResourceType::hipResourceTypeArray { - panic!() - } - let format_size = format_size((&*desc.res.array.array).Format); + let format_size = format_size((&*desc.res.array.array).Format)?; let channels = (&*desc.res.array.array).NumChannels; let pixel_size = format_size * channels as usize; - let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - 3); + let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - IMAGE_RESERVED_TOP_BITS); surf_obj = (surf_obj as usize | shift_amount) as _; - dbg!(surf_obj); *result = surf_obj; Ok(()) } -pub(crate) fn format_size(f: hipArray_Format) -> usize { - match f { +pub(crate) fn format_size(f: hipArray_Format) -> Result { + Ok(match f { hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8 | hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 => 1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16 @@ -39,8 +41,8 @@ pub(crate) fn format_size(f: hipArray_Format) -> usize { hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32 | hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32 | hipArray_Format::HIP_AD_FORMAT_FLOAT => 4, - _ => panic!(), - } + _ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), + }) } unsafe fn to_surface_desc(res_desc: CUDA_RESOURCE_DESC) -> Result { @@ -51,92 +53,10 @@ unsafe fn to_surface_desc(res_desc: CUDA_RESOURCE_DESC) -> Result hipResourceDesc__bindgen_ty_1 { - mipmap: hipResourceDesc__bindgen_ty_1__bindgen_ty_2 { - mipmap: mem::transmute(res_desc.res.mipmap.hMipmappedArray), - }, - }, - CUresourcetype::CU_RESOURCE_TYPE_LINEAR => hipResourceDesc__bindgen_ty_1 { - linear: hipResourceDesc__bindgen_ty_1__bindgen_ty_3 { - devPtr: res_desc.res.linear.devPtr.0, - desc: channel_format_desc( - FromCuda::from_cuda(res_desc.res.linear.format), - res_desc.res.linear.numChannels, - )?, - sizeInBytes: res_desc.res.linear.sizeInBytes, - }, - }, - CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => hipResourceDesc__bindgen_ty_1 { - pitch2D: hipResourceDesc__bindgen_ty_1__bindgen_ty_4 { - devPtr: res_desc.res.pitch2D.devPtr.0, - desc: channel_format_desc( - FromCuda::from_cuda(res_desc.res.pitch2D.format), - res_desc.res.pitch2D.numChannels, - )?, - width: res_desc.res.pitch2D.width, - height: res_desc.res.pitch2D.height, - pitchInBytes: res_desc.res.pitch2D.pitchInBytes, - }, - }, - _ => todo!(), + _ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED), }; Ok(hipResourceDesc { resType: res_type, res, }) } - -fn channel_format_desc( - format: hipArray_Format, - num_channels: u32, -) -> Result { - let mut bits = match num_channels { - 1 => (1, 0, 0, 0), - 2 => (1, 1, 0, 0), - 3 => (1, 1, 1, 0), - 4 => (1, 1, 1, 1), - _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), - }; - let (kind, bit_width) = match format { - hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8 => { - (hipChannelFormatKind::hipChannelFormatKindUnsigned, u8::BITS) - } - hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16 => ( - hipChannelFormatKind::hipChannelFormatKindUnsigned, - u16::BITS, - ), - hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32 => ( - hipChannelFormatKind::hipChannelFormatKindUnsigned, - u32::BITS, - ), - hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 => { - (hipChannelFormatKind::hipChannelFormatKindSigned, i8::BITS) - } - hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16 => { - (hipChannelFormatKind::hipChannelFormatKindSigned, i16::BITS) - } - hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32 => { - (hipChannelFormatKind::hipChannelFormatKindSigned, i32::BITS) - } - hipArray_Format::HIP_AD_FORMAT_HALF => ( - hipChannelFormatKind::hipChannelFormatKindFloat, - mem::size_of::() as u32 * u8::BITS, - ), - hipArray_Format::HIP_AD_FORMAT_FLOAT => ( - hipChannelFormatKind::hipChannelFormatKindFloat, - mem::size_of::() as u32 * u8::BITS, - ), - _ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE), - }; - bits.0 *= bit_width; - bits.1 *= bit_width; - bits.2 *= bit_width; - bits.3 *= bit_width; - Ok(hipChannelFormatDesc { - x: bits.0 as i32, - y: bits.0 as i32, - z: bits.0 as i32, - w: bits.0 as i32, - f: kind, - }) -} diff --git a/zluda/src/impl/surfref.rs b/zluda/src/impl/surfref.rs index c1526f9..457f9c4 100644 --- a/zluda/src/impl/surfref.rs +++ b/zluda/src/impl/surfref.rs @@ -1,10 +1,7 @@ -use crate::{ - hip_call_cuda, - r#impl::{hipfix, surface}, -}; +use crate::{hip_call_cuda, r#impl::hipfix}; use cuda_types::{CUarray, CUresult}; use hip_runtime_sys::*; -use std::{mem, ptr}; +use std::ptr; pub(crate) unsafe fn set_array( surfref: *mut textureReference, @@ -21,14 +18,6 @@ pub(crate) unsafe fn set_array( array.Format, array.NumChannels as i32, )); - //assert_eq!(hipError_t::hipSuccess, hipHostGetDevicePointer(&mut dev_ptr, surfref.cast(), 0)); - dbg!(surfref); - // TODO: clear bits on the old textureobject hip_call_cuda!(hipTexRefSetArray(surfref, array, HIP_TRSA_OVERRIDE_FORMAT)); - let format_size = surface::format_size(array.Format); - let pixel_size = format_size * array.NumChannels as usize; - let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - 3); - let mut surfref = &mut *surfref; - surfref.textureObject = (surfref.textureObject as usize | shift_amount) as _; Ok(()) }