mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-03 06:40:21 +00:00
Clean up surface functions
This commit is contained in:
parent
d9d1308800
commit
d23d247ed9
4 changed files with 109 additions and 337 deletions
Binary file not shown.
|
@ -11,6 +11,7 @@
|
|||
// https://llvm.org/docs/AMDGPUUsage.html
|
||||
|
||||
#include <cstdint>
|
||||
#include <bit>
|
||||
#include <hip/hip_runtime.h>
|
||||
#define HIP_NO_HALF
|
||||
#include <hip/amd_detail/amd_hip_fp16.h>
|
||||
|
@ -162,6 +163,12 @@ typedef uint8 CONSTANT_SPACE *surface_ptr;
|
|||
template <typename To, typename From>
|
||||
static __device__ To transmute(From f)
|
||||
{
|
||||
if constexpr (sizeof(To) == sizeof(From))
|
||||
{
|
||||
return std::bit_cast<To>(f);
|
||||
}
|
||||
else if constexpr (sizeof(To) > sizeof(From))
|
||||
{
|
||||
union
|
||||
{
|
||||
To t;
|
||||
|
@ -169,6 +176,11 @@ static __device__ To transmute(From f)
|
|||
} u = {To{0}};
|
||||
u.f = f;
|
||||
return u.t;
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(sizeof(To) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
enum class ImageGeometry
|
||||
|
@ -231,10 +243,10 @@ static __device__ void image_store_pck(T value, typename Coordinates<geo>::type
|
|||
}
|
||||
else
|
||||
{
|
||||
static_assert(!std::is_same<T, T>::value, "Invalid geometry");
|
||||
static_assert(sizeof(T) == 0, "Invalid geometry");
|
||||
}
|
||||
}
|
||||
else if constexpr (sizeof(T) == sizeof(uint2))
|
||||
else if constexpr (sizeof(T) == sizeof(uint2::Native_vec_))
|
||||
{
|
||||
uint2::Native_vec_ value_dword2 = transmute<uint2::Native_vec_>(value);
|
||||
if constexpr (geo == ImageGeometry::_1D)
|
||||
|
@ -259,10 +271,10 @@ static __device__ void image_store_pck(T value, typename Coordinates<geo>::type
|
|||
}
|
||||
else
|
||||
{
|
||||
static_assert(!std::is_same<T, T>::value, "Invalid geometry");
|
||||
static_assert(sizeof(T) == 0, "Invalid geometry");
|
||||
}
|
||||
}
|
||||
else if constexpr (sizeof(T) == sizeof(uint4))
|
||||
else if constexpr (sizeof(T) == sizeof(uint4::Native_vec_))
|
||||
{
|
||||
uint4::Native_vec_ value_dword4 = transmute<uint4::Native_vec_>(value);
|
||||
if constexpr (geo == ImageGeometry::_1D)
|
||||
|
@ -287,12 +299,12 @@ static __device__ void image_store_pck(T value, typename Coordinates<geo>::type
|
|||
}
|
||||
else
|
||||
{
|
||||
static_assert(!std::is_same<T, T>::value, "Invalid geometry");
|
||||
static_assert(sizeof(T) == 0, "Invalid geometry");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(!std::is_same<T, T>::value, "Invalid vector size");
|
||||
static_assert(sizeof(T) == 0, "Invalid vector size");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -323,7 +335,7 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates<ge
|
|||
}
|
||||
else
|
||||
{
|
||||
static_assert(!std::is_same<void, void>::value, "Invalid geometry");
|
||||
static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry");
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
@ -331,6 +343,26 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates<ge
|
|||
template <typename T, ImageGeometry geo>
|
||||
static __device__ void image_store_pck_full_with(uint4::Native_vec_ data, T value, typename Coordinates<geo>::type coord, surface_ptr surface)
|
||||
{
|
||||
// We avoid unions for types smaller than sizeof(uint32_t),
|
||||
// because in those cases we get this garbage:
|
||||
// ds_write_b128 v2, v[5:8]
|
||||
// ds_write_b16 v2, v9
|
||||
// ds_read_b128 v[5:8], v2
|
||||
// tested with ROCm 5.7.1
|
||||
if constexpr (sizeof(T) == sizeof(uint8_t))
|
||||
{
|
||||
uint32_t x = uint32_t(std::bit_cast<uint8_t>(value));
|
||||
uint32_t data_0 = ((data[0]) >> 8) << 8;
|
||||
data[0] = data_0 | x;
|
||||
}
|
||||
else if constexpr (sizeof(T) == sizeof(uint16_t))
|
||||
{
|
||||
uint32_t x = uint32_t(std::bit_cast<uint16_t>(value));
|
||||
uint32_t data_0 = ((data[0]) >> 16) << 16;
|
||||
data[0] = data_0 | x;
|
||||
}
|
||||
else
|
||||
{
|
||||
union
|
||||
{
|
||||
uint4::Native_vec_ full_vec;
|
||||
|
@ -338,7 +370,9 @@ static __device__ void image_store_pck_full_with(uint4::Native_vec_ data, T valu
|
|||
} u = {0};
|
||||
u.full_vec = data;
|
||||
u.value = value;
|
||||
image_store_pck<uint4::Native_vec_, geo>(u.full_vec, coord, surface);
|
||||
data = u.full_vec;
|
||||
}
|
||||
image_store_pck<uint4::Native_vec_, geo>(data, coord, surface);
|
||||
}
|
||||
|
||||
constexpr auto IMAGE_RESERVED_TOP_BITS = 3;
|
||||
|
@ -348,13 +382,45 @@ static __device__ surface_ptr get_surface_pointer(uint64_t s)
|
|||
return (surface_ptr)((s << IMAGE_RESERVED_TOP_BITS) >> IMAGE_RESERVED_TOP_BITS);
|
||||
}
|
||||
|
||||
static __device__ surface_ptr get_surface_pointer(struct textureReference GLOBAL_SPACE *surf_ref)
|
||||
{
|
||||
return (surface_ptr)(surf_ref->textureObject);
|
||||
}
|
||||
|
||||
static __device__ uint32_t x_coordinate_shift(uint64_t s)
|
||||
{
|
||||
return uint32_t(s >> (64 - IMAGE_RESERVED_TOP_BITS));
|
||||
}
|
||||
|
||||
template <typename T, ImageGeometry geo>
|
||||
static __device__ void sust_b_indirect_zero(uint64_t surf_arg, typename Coordinates<geo>::type coord, T data)
|
||||
static __device__ uint32_t x_coordinate_shift(struct textureReference GLOBAL_SPACE *ptr)
|
||||
{
|
||||
uint32_t channels = uint32_t(ptr->numChannels);
|
||||
uint32_t format_width = 0;
|
||||
hipArray_Format format = ptr->format;
|
||||
switch (format)
|
||||
{
|
||||
case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8:
|
||||
case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8:
|
||||
format_width = 1;
|
||||
break;
|
||||
case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16:
|
||||
case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16:
|
||||
case hipArray_Format::HIP_AD_FORMAT_HALF:
|
||||
format_width = 2;
|
||||
break;
|
||||
case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32:
|
||||
case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32:
|
||||
case hipArray_Format::HIP_AD_FORMAT_FLOAT:
|
||||
format_width = 4;
|
||||
break;
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
return uint32_t(__builtin_ctz(format_width * channels));
|
||||
}
|
||||
|
||||
template <typename T, ImageGeometry geo, typename Surface>
|
||||
static __device__ void sust_b_indirect_zero(Surface surf_arg, typename Coordinates<geo>::type coord, T data)
|
||||
{
|
||||
surface_ptr surface = get_surface_pointer(surf_arg);
|
||||
uint32_t shift_x = x_coordinate_shift(surf_arg);
|
||||
|
@ -835,34 +901,6 @@ extern "C"
|
|||
suld_b_a2d_vec(_v4, b32, uint4);
|
||||
// suld_b_a2d_vec(_v4, b64, ulong4);
|
||||
|
||||
static __device__ uint64_t repack(struct textureReference GLOBAL_SPACE *ptr)
|
||||
{
|
||||
uint32_t channels = uint32_t(ptr->numChannels);
|
||||
uint32_t format_width = 0;
|
||||
hipArray_Format format = ptr->format;
|
||||
switch (format)
|
||||
{
|
||||
case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8:
|
||||
case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8:
|
||||
format_width = 1;
|
||||
break;
|
||||
case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16:
|
||||
case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16:
|
||||
case hipArray_Format::HIP_AD_FORMAT_HALF:
|
||||
format_width = 2;
|
||||
break;
|
||||
case hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32:
|
||||
case hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32:
|
||||
case hipArray_Format::HIP_AD_FORMAT_FLOAT:
|
||||
format_width = 4;
|
||||
break;
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
uint64_t shift_size = uint64_t(__builtin_ctz(format_width * channels));
|
||||
return uint64_t(ptr->textureObject) | (shift_size << (64 - 3));
|
||||
}
|
||||
|
||||
#define SUST_B_ZERO(TYPE, GEOMETRY, HIP_TYPE) \
|
||||
void FUNC(sust_b_indirect_##TYPE##_zero)(uint64_t surf_arg, typename Coordinates<GEOMETRY>::type coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
|
@ -870,8 +908,7 @@ extern "C"
|
|||
} \
|
||||
void FUNC(sust_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, typename Coordinates<GEOMETRY>::type coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
FUNC_CALL(sust_b_indirect_##TYPE##_zero) \
|
||||
(repack(ptr), coord, data); \
|
||||
sust_b_indirect_zero<HIP_TYPE::Native_vec_, GEOMETRY>(ptr, coord, data); \
|
||||
}
|
||||
|
||||
#define SUST_B_ZERO_ARRAY(TYPE, GEOMETRY, HIP_TYPE) \
|
||||
|
@ -882,8 +919,8 @@ extern "C"
|
|||
} \
|
||||
void FUNC(sust_b_##TYPE##_zero)(struct textureReference GLOBAL_SPACE * ptr, uint32_t layer, typename Coordinates<GEOMETRY>::arg_type coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
FUNC_CALL(sust_b_indirect_##TYPE##_zero) \
|
||||
(repack(ptr), layer, coord, data); \
|
||||
auto coord_array = Coordinates<GEOMETRY>::pack_layer(layer, coord); \
|
||||
sust_b_indirect_zero<HIP_TYPE::Native_vec_, GEOMETRY>(ptr, coord_array, data); \
|
||||
}
|
||||
|
||||
SUST_B_ZERO(1d_b8, ImageGeometry::_1D, uchar1);
|
||||
|
@ -942,180 +979,6 @@ extern "C"
|
|||
SUST_B_ZERO_ARRAY(a2d_v4_b16, ImageGeometry::A2D, ushort4);
|
||||
SUST_B_ZERO_ARRAY(a2d_v4_b32, ImageGeometry::A2D, uint4);
|
||||
|
||||
#define sust_b_1d_vec(VEC, TYPE, HIP_TYPE) \
|
||||
void FUNC(sust_b_1d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int1::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipTextureObject_t textureObject = ptr->textureObject; \
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT; \
|
||||
(void)s; \
|
||||
int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_1D(i, byte_coord, tmp); \
|
||||
} \
|
||||
void FUNC(sust_b_indirect_1d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int1::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
surf1Dwrite(hip_data, surfObj, coord.x); \
|
||||
}
|
||||
|
||||
sust_b_1d_vec(, b8, uchar1);
|
||||
sust_b_1d_vec(, b16, ushort1);
|
||||
sust_b_1d_vec(, b32, uint1);
|
||||
// sust_b_1d_vec(, b64, ulong1);
|
||||
sust_b_1d_vec(_v2, b8, uchar2);
|
||||
sust_b_1d_vec(_v2, b16, ushort2);
|
||||
sust_b_1d_vec(_v2, b32, uint2);
|
||||
// sust_b_1d_vec(_v2, b64, ulong2);
|
||||
sust_b_1d_vec(_v4, b8, uchar4);
|
||||
sust_b_1d_vec(_v4, b16, ushort4);
|
||||
sust_b_1d_vec(_v4, b32, uint4);
|
||||
// sust_b_1d_vec(_v4, b64, ulong4);
|
||||
|
||||
#define sust_b_2d_vec(VEC, TYPE, HIP_TYPE) \
|
||||
void FUNC(sust_b_2d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int2::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipTextureObject_t textureObject = ptr->textureObject; \
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT; \
|
||||
(void)s; \
|
||||
int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_2D(i, int2(byte_coord, coord.y).data, tmp); \
|
||||
} \
|
||||
void FUNC(sust_b_indirect_2d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int2::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
surf2Dwrite(hip_data, surfObj, coord.x, coord.y); \
|
||||
}
|
||||
|
||||
sust_b_2d_vec(, b8, uchar1);
|
||||
sust_b_2d_vec(, b16, ushort1);
|
||||
sust_b_2d_vec(, b32, uint1);
|
||||
// sust_b_2d_vec(, b64, ulong1);
|
||||
sust_b_2d_vec(_v2, b8, uchar2);
|
||||
sust_b_2d_vec(_v2, b16, ushort2);
|
||||
sust_b_2d_vec(_v2, b32, uint2);
|
||||
// sust_b_2d_vec(_v2, b64, ulong2);
|
||||
sust_b_2d_vec(_v4, b8, uchar4);
|
||||
sust_b_2d_vec(_v4, b16, ushort4);
|
||||
sust_b_2d_vec(_v4, b32, uint4);
|
||||
// sust_b_2d_vec(_v4, b64, ulong4);
|
||||
|
||||
#define sust_b_3d_vec(VEC, TYPE, HIP_TYPE) \
|
||||
void FUNC(sust_b_3d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, int4::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipTextureObject_t textureObject = ptr->textureObject; \
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT; \
|
||||
(void)s; \
|
||||
int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_3D(i, int4(byte_coord, coord.y, coord.z, 0).data, tmp); \
|
||||
} \
|
||||
void FUNC(sust_b_indirect_3d##VEC##_##TYPE##_trap)(uint64_t serf_arg, int4::Native_vec_ coord, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT; \
|
||||
int byte_coord = __hipGetPixelAddr(coord.x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_3D(i, int4(byte_coord, coord.y, coord.z, 0).data, tmp); \
|
||||
}
|
||||
|
||||
sust_b_3d_vec(, b8, uchar1);
|
||||
sust_b_3d_vec(, b16, ushort1);
|
||||
sust_b_3d_vec(, b32, uint1);
|
||||
// sust_b_3d_vec(, b64, ulong1);
|
||||
sust_b_3d_vec(_v2, b8, uchar2);
|
||||
sust_b_3d_vec(_v2, b16, ushort2);
|
||||
sust_b_3d_vec(_v2, b32, uint2);
|
||||
// sust_b_3d_vec(_v2, b64, ulong2);
|
||||
sust_b_3d_vec(_v4, b8, uchar4);
|
||||
sust_b_3d_vec(_v4, b16, ushort4);
|
||||
sust_b_3d_vec(_v4, b32, uint4);
|
||||
// sust_b_3d_vec(_v4, b64, ulong4);
|
||||
|
||||
#define sust_b_a1d_vec(VEC, TYPE, HIP_TYPE) \
|
||||
void FUNC(sust_b_a1d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, uint layer, int x, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipTextureObject_t textureObject = ptr->textureObject; \
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT; \
|
||||
(void)s; \
|
||||
int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1Da(i), __ockl_image_channel_order_1Da(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_1Da(i, int2(byte_coord, int(layer)).data, tmp); \
|
||||
} \
|
||||
void FUNC(sust_b_indirect_a1d##VEC##_##TYPE##_trap)(uint64_t serf_arg, uint layer, int x, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT; \
|
||||
int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1Da(i), __ockl_image_channel_order_1Da(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_1Da(i, int2(byte_coord, int(layer)).data, tmp); \
|
||||
}
|
||||
|
||||
sust_b_a1d_vec(, b8, uchar1);
|
||||
sust_b_a1d_vec(, b16, ushort1);
|
||||
sust_b_a1d_vec(, b32, uint1);
|
||||
// sust_b_a1d_vec(, b64, ulong1);
|
||||
sust_b_a1d_vec(_v2, b8, uchar2);
|
||||
sust_b_a1d_vec(_v2, b16, ushort2);
|
||||
sust_b_a1d_vec(_v2, b32, uint2);
|
||||
// sust_b_a1d_vec(_v2, b64, ulong2);
|
||||
sust_b_a1d_vec(_v4, b8, uchar4);
|
||||
sust_b_a1d_vec(_v4, b16, ushort4);
|
||||
sust_b_a1d_vec(_v4, b32, uint4);
|
||||
// sust_b_a1d_vec(_v4, b64, ulong4);
|
||||
|
||||
#define sust_b_a2d_vec(VEC, TYPE, HIP_TYPE) \
|
||||
void FUNC(sust_b_a2d##VEC##_##TYPE##_trap)(struct textureReference GLOBAL_SPACE * ptr, uint layer, int x, int y, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipTextureObject_t textureObject = ptr->textureObject; \
|
||||
TEXTURE_OBJECT_PARAMETERS_INIT; \
|
||||
(void)s; \
|
||||
int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2Da(i), __ockl_image_channel_order_2Da(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_2Da(i, int4(byte_coord, y, int(layer), 0).data, tmp); \
|
||||
} \
|
||||
void FUNC(sust_b_indirect_a2d##VEC##_##TYPE##_trap)(uint64_t serf_arg, uint layer, int x, int y, HIP_TYPE::Native_vec_ data) \
|
||||
{ \
|
||||
hipSurfaceObject_t surfObj = (hipSurfaceObject_t)serf_arg; \
|
||||
__HIP_SURFACE_OBJECT_PARAMETERS_INIT; \
|
||||
int byte_coord = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2Da(i), __ockl_image_channel_order_2Da(i)); \
|
||||
HIP_TYPE hip_data; \
|
||||
hip_data.data = data; \
|
||||
auto tmp = __pack_to_float4(hip_data); \
|
||||
__ockl_image_store_2Da(i, int4(byte_coord, y, int(layer), 0).data, tmp); \
|
||||
}
|
||||
|
||||
sust_b_a2d_vec(, b8, uchar1);
|
||||
sust_b_a2d_vec(, b16, ushort1);
|
||||
sust_b_a2d_vec(, b32, uint1);
|
||||
// sust_b_a2d_vec(, b64, ulong1);
|
||||
sust_b_a2d_vec(_v2, b8, uchar2);
|
||||
sust_b_a2d_vec(_v2, b16, ushort2);
|
||||
sust_b_a2d_vec(_v2, b32, uint2);
|
||||
// sust_b_a2d_vec(_v2, b64, ulong2);
|
||||
sust_b_a2d_vec(_v4, b8, uchar4);
|
||||
sust_b_a2d_vec(_v4, b16, ushort4);
|
||||
sust_b_a2d_vec(_v4, b32, uint4);
|
||||
// sust_b_a2d_vec(_v4, b64, ulong4);
|
||||
|
||||
__device__ static inline bool is_upper_warp()
|
||||
{
|
||||
return FUNC_CALL(COMPILATION_MODE) == CompilationMode::DoubleWave32OnWave64 && __lane_id() >= 32;
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use super::hipfix;
|
||||
use crate::hip_call_cuda;
|
||||
use cuda_types::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{mem, ptr};
|
||||
|
||||
use crate::hip_call_cuda;
|
||||
|
||||
use super::{hipfix, FromCuda};
|
||||
// Same as in zluda_ptx_impl.cpp
|
||||
const IMAGE_RESERVED_TOP_BITS: u32 = 3;
|
||||
|
||||
pub(crate) unsafe fn create(
|
||||
result: *mut hipSurfaceObject_t,
|
||||
|
@ -14,23 +15,24 @@ pub(crate) unsafe fn create(
|
|||
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||
}
|
||||
let desc = to_surface_desc(*p_res_desc)?;
|
||||
// We need to check array format and channel count to set top bits of the surface object.
|
||||
// HIP does not support non-Array sources anyway
|
||||
if desc.resType != hipResourceType::hipResourceTypeArray {
|
||||
return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED);
|
||||
}
|
||||
let mut surf_obj = mem::zeroed();
|
||||
hip_call_cuda!(hipCreateSurfaceObject(&mut surf_obj, &desc));
|
||||
if desc.resType != hipResourceType::hipResourceTypeArray {
|
||||
panic!()
|
||||
}
|
||||
let format_size = format_size((&*desc.res.array.array).Format);
|
||||
let format_size = format_size((&*desc.res.array.array).Format)?;
|
||||
let channels = (&*desc.res.array.array).NumChannels;
|
||||
let pixel_size = format_size * channels as usize;
|
||||
let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - 3);
|
||||
let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - IMAGE_RESERVED_TOP_BITS);
|
||||
surf_obj = (surf_obj as usize | shift_amount) as _;
|
||||
dbg!(surf_obj);
|
||||
*result = surf_obj;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn format_size(f: hipArray_Format) -> usize {
|
||||
match f {
|
||||
pub(crate) fn format_size(f: hipArray_Format) -> Result<usize, CUresult> {
|
||||
Ok(match f {
|
||||
hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8
|
||||
| hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 => 1,
|
||||
hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16
|
||||
|
@ -39,8 +41,8 @@ pub(crate) fn format_size(f: hipArray_Format) -> usize {
|
|||
hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32
|
||||
| hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32
|
||||
| hipArray_Format::HIP_AD_FORMAT_FLOAT => 4,
|
||||
_ => panic!(),
|
||||
}
|
||||
_ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED),
|
||||
})
|
||||
}
|
||||
|
||||
unsafe fn to_surface_desc(res_desc: CUDA_RESOURCE_DESC) -> Result<hipResourceDesc, CUresult> {
|
||||
|
@ -51,92 +53,10 @@ unsafe fn to_surface_desc(res_desc: CUDA_RESOURCE_DESC) -> Result<hipResourceDes
|
|||
array: hipfix::array::get(res_desc.res.array.hArray),
|
||||
},
|
||||
},
|
||||
CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY => hipResourceDesc__bindgen_ty_1 {
|
||||
mipmap: hipResourceDesc__bindgen_ty_1__bindgen_ty_2 {
|
||||
mipmap: mem::transmute(res_desc.res.mipmap.hMipmappedArray),
|
||||
},
|
||||
},
|
||||
CUresourcetype::CU_RESOURCE_TYPE_LINEAR => hipResourceDesc__bindgen_ty_1 {
|
||||
linear: hipResourceDesc__bindgen_ty_1__bindgen_ty_3 {
|
||||
devPtr: res_desc.res.linear.devPtr.0,
|
||||
desc: channel_format_desc(
|
||||
FromCuda::from_cuda(res_desc.res.linear.format),
|
||||
res_desc.res.linear.numChannels,
|
||||
)?,
|
||||
sizeInBytes: res_desc.res.linear.sizeInBytes,
|
||||
},
|
||||
},
|
||||
CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => hipResourceDesc__bindgen_ty_1 {
|
||||
pitch2D: hipResourceDesc__bindgen_ty_1__bindgen_ty_4 {
|
||||
devPtr: res_desc.res.pitch2D.devPtr.0,
|
||||
desc: channel_format_desc(
|
||||
FromCuda::from_cuda(res_desc.res.pitch2D.format),
|
||||
res_desc.res.pitch2D.numChannels,
|
||||
)?,
|
||||
width: res_desc.res.pitch2D.width,
|
||||
height: res_desc.res.pitch2D.height,
|
||||
pitchInBytes: res_desc.res.pitch2D.pitchInBytes,
|
||||
},
|
||||
},
|
||||
_ => todo!(),
|
||||
_ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED),
|
||||
};
|
||||
Ok(hipResourceDesc {
|
||||
resType: res_type,
|
||||
res,
|
||||
})
|
||||
}
|
||||
|
||||
fn channel_format_desc(
|
||||
format: hipArray_Format,
|
||||
num_channels: u32,
|
||||
) -> Result<hipChannelFormatDesc, CUresult> {
|
||||
let mut bits = match num_channels {
|
||||
1 => (1, 0, 0, 0),
|
||||
2 => (1, 1, 0, 0),
|
||||
3 => (1, 1, 1, 0),
|
||||
4 => (1, 1, 1, 1),
|
||||
_ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE),
|
||||
};
|
||||
let (kind, bit_width) = match format {
|
||||
hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8 => {
|
||||
(hipChannelFormatKind::hipChannelFormatKindUnsigned, u8::BITS)
|
||||
}
|
||||
hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16 => (
|
||||
hipChannelFormatKind::hipChannelFormatKindUnsigned,
|
||||
u16::BITS,
|
||||
),
|
||||
hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32 => (
|
||||
hipChannelFormatKind::hipChannelFormatKindUnsigned,
|
||||
u32::BITS,
|
||||
),
|
||||
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8 => {
|
||||
(hipChannelFormatKind::hipChannelFormatKindSigned, i8::BITS)
|
||||
}
|
||||
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16 => {
|
||||
(hipChannelFormatKind::hipChannelFormatKindSigned, i16::BITS)
|
||||
}
|
||||
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32 => {
|
||||
(hipChannelFormatKind::hipChannelFormatKindSigned, i32::BITS)
|
||||
}
|
||||
hipArray_Format::HIP_AD_FORMAT_HALF => (
|
||||
hipChannelFormatKind::hipChannelFormatKindFloat,
|
||||
mem::size_of::<u16>() as u32 * u8::BITS,
|
||||
),
|
||||
hipArray_Format::HIP_AD_FORMAT_FLOAT => (
|
||||
hipChannelFormatKind::hipChannelFormatKindFloat,
|
||||
mem::size_of::<f32>() as u32 * u8::BITS,
|
||||
),
|
||||
_ => return Err(CUresult::CUDA_ERROR_INVALID_VALUE),
|
||||
};
|
||||
bits.0 *= bit_width;
|
||||
bits.1 *= bit_width;
|
||||
bits.2 *= bit_width;
|
||||
bits.3 *= bit_width;
|
||||
Ok(hipChannelFormatDesc {
|
||||
x: bits.0 as i32,
|
||||
y: bits.0 as i32,
|
||||
z: bits.0 as i32,
|
||||
w: bits.0 as i32,
|
||||
f: kind,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
use crate::{
|
||||
hip_call_cuda,
|
||||
r#impl::{hipfix, surface},
|
||||
};
|
||||
use crate::{hip_call_cuda, r#impl::hipfix};
|
||||
use cuda_types::{CUarray, CUresult};
|
||||
use hip_runtime_sys::*;
|
||||
use std::{mem, ptr};
|
||||
use std::ptr;
|
||||
|
||||
pub(crate) unsafe fn set_array(
|
||||
surfref: *mut textureReference,
|
||||
|
@ -21,14 +18,6 @@ pub(crate) unsafe fn set_array(
|
|||
array.Format,
|
||||
array.NumChannels as i32,
|
||||
));
|
||||
//assert_eq!(hipError_t::hipSuccess, hipHostGetDevicePointer(&mut dev_ptr, surfref.cast(), 0));
|
||||
dbg!(surfref);
|
||||
// TODO: clear bits on the old textureobject
|
||||
hip_call_cuda!(hipTexRefSetArray(surfref, array, HIP_TRSA_OVERRIDE_FORMAT));
|
||||
let format_size = surface::format_size(array.Format);
|
||||
let pixel_size = format_size * array.NumChannels as usize;
|
||||
let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - 3);
|
||||
let mut surfref = &mut *surfref;
|
||||
surfref.textureObject = (surfref.textureObject as usize | shift_amount) as _;
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue