mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Also implement suld
This commit is contained in:
parent
795fb768b6
commit
6762f98692
5 changed files with 109 additions and 10 deletions
Binary file not shown.
|
@ -317,6 +317,102 @@ static __device__ void image_store_pck(T value, typename Coordinates<geo>::type
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T, ImageGeometry geo>
|
||||
static __device__ T image_load_pck(typename Coordinates<geo>::type coord, surface_ptr surface)
|
||||
{
|
||||
if constexpr (sizeof(T) <= sizeof(uint))
|
||||
{
|
||||
uint data;
|
||||
if constexpr (geo == ImageGeometry::_1D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_2D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_3D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute<zluda_uint3>(coord)), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::A1D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::A2D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x1 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry");
|
||||
}
|
||||
return transmute<T>(data);
|
||||
}
|
||||
else if constexpr (sizeof(T) == sizeof(uint2::Native_vec_))
|
||||
{
|
||||
uint2::Native_vec_ data;
|
||||
if constexpr (geo == ImageGeometry::_1D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_2D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_3D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute<zluda_uint3>(coord)), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::A1D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::A2D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0x3 dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry");
|
||||
}
|
||||
return transmute<T>(data);
|
||||
}
|
||||
else if constexpr (sizeof(T) == sizeof(uint4::Native_vec_))
|
||||
{
|
||||
uint4::Native_vec_ data;
|
||||
if constexpr (geo == ImageGeometry::_1D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord.x), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_2D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_3D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:3D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(transmute<zluda_uint3>(coord)), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::A1D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:1D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::A2D)
|
||||
{
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D_ARRAY unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(sizeof(ImageGeometry) == 0, "Invalid geometry");
|
||||
}
|
||||
return transmute<T>(data);
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(sizeof(T) == 0, "Invalid vector size");
|
||||
}
|
||||
}
|
||||
|
||||
template <ImageGeometry geo>
|
||||
static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates<geo>::type coord, surface_ptr surface)
|
||||
{
|
||||
|
@ -327,8 +423,7 @@ static __device__ uint4::Native_vec_ image_load_pck_full(typename Coordinates<ge
|
|||
}
|
||||
else if constexpr (geo == ImageGeometry::_2D)
|
||||
{
|
||||
auto s = *surface;
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(s) : "memory");
|
||||
asm volatile("image_load_pck %0, %1, %2 dmask:0xf dim:2D unorm\ns_waitcnt vmcnt(0)" : "=v"(data) : "v"(coord), "s"(*surface) : "memory");
|
||||
}
|
||||
else if constexpr (geo == ImageGeometry::_3D)
|
||||
{
|
||||
|
@ -434,8 +529,7 @@ static __device__ T suld_b_zero(Surface surf_arg, typename Coordinates<geo>::typ
|
|||
surface_ptr surface = get_surface_pointer(surf_arg);
|
||||
uint32_t shift_x = x_coordinate_shift(surf_arg);
|
||||
coord.x = coord.x >> shift_x;
|
||||
uint4::Native_vec_ pixel = image_load_pck_full<geo>(coord, surface);
|
||||
return transmute<T>(pixel);
|
||||
return image_load_pck<T, geo>(coord, surface);
|
||||
}
|
||||
|
||||
template <typename T, ImageGeometry geo, typename Surface>
|
||||
|
|
|
@ -2934,7 +2934,7 @@ fn replace_instructions_with_builtins_impl<'input>(
|
|||
vector,
|
||||
"_",
|
||||
suld.type_.to_ptx_name(),
|
||||
"_trap",
|
||||
"_zero",
|
||||
]
|
||||
.concat();
|
||||
statements.push(instruction_to_fn_call(
|
||||
|
|
|
@ -22,10 +22,19 @@ pub(crate) unsafe fn create(
|
|||
}
|
||||
let mut surf_obj = mem::zeroed();
|
||||
hip_call_cuda!(hipCreateSurfaceObject(&mut surf_obj, &desc));
|
||||
let top_reserved_bits = surf_obj as usize >> (usize::BITS - IMAGE_RESERVED_TOP_BITS);
|
||||
if top_reserved_bits != 0 {
|
||||
#[allow(unused_must_use)]
|
||||
{
|
||||
hipDestroySurfaceObject(surf_obj);
|
||||
}
|
||||
return Err(CUresult::CUDA_ERROR_UNKNOWN);
|
||||
}
|
||||
let format_size = format_size((&*desc.res.array.array).Format)?;
|
||||
let channels = (&*desc.res.array.array).NumChannels;
|
||||
let pixel_size = format_size * channels as usize;
|
||||
let shift_amount = (pixel_size.trailing_zeros() as usize) << (64 - IMAGE_RESERVED_TOP_BITS);
|
||||
let shift_amount =
|
||||
(pixel_size.trailing_zeros() as usize) << (usize::BITS - IMAGE_RESERVED_TOP_BITS);
|
||||
surf_obj = (surf_obj as usize | shift_amount) as _;
|
||||
*result = surf_obj;
|
||||
Ok(())
|
||||
|
|
|
@ -340,10 +340,6 @@ unsafe fn kernel_suld_impl<
|
|||
if mem::size_of::<Format>() * CHANNELS < mem::size_of::<SustType>() * SULD_N {
|
||||
return;
|
||||
}
|
||||
// TODO: reenable those tests
|
||||
if mem::size_of::<Format>() != mem::size_of::<SustType>() || CHANNELS != SULD_N {
|
||||
return;
|
||||
}
|
||||
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(seed);
|
||||
let size = 4usize;
|
||||
let random_size = rand::distributions::Uniform::<u32>::new(1, size as u32);
|
||||
|
|
Loading…
Add table
Reference in a new issue