mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-03 14:50:53 +00:00
More hacks for mipmapped texobjs
This commit is contained in:
parent
93987a2cfe
commit
c64018db89
4 changed files with 195 additions and 18 deletions
|
@ -47,12 +47,13 @@ pub(crate) unsafe fn get_descriptor_3d(
|
||||||
flags |= CUDA_ARRAY3D_SURFACE_LDST;
|
flags |= CUDA_ARRAY3D_SURFACE_LDST;
|
||||||
let array = hipfix::array::get(array);
|
let array = hipfix::array::get(array);
|
||||||
if let (Some(array), Some(array_descriptor)) = (array.as_ref(), array_descriptor.as_mut()) {
|
if let (Some(array), Some(array_descriptor)) = (array.as_ref(), array_descriptor.as_mut()) {
|
||||||
|
let real_format = hipfix::get_broken_format(array).unwrap_or(array.Format);
|
||||||
*array_descriptor = CUDA_ARRAY3D_DESCRIPTOR {
|
*array_descriptor = CUDA_ARRAY3D_DESCRIPTOR {
|
||||||
Width: array.width as usize,
|
Width: array.width as usize,
|
||||||
Height: array.height as usize,
|
Height: array.height as usize,
|
||||||
Depth: array.depth as usize,
|
Depth: array.depth as usize,
|
||||||
NumChannels: array.NumChannels,
|
NumChannels: array.NumChannels,
|
||||||
Format: mem::transmute(array.Format), // compatible
|
Format: mem::transmute(real_format), // compatible
|
||||||
Flags: flags,
|
Flags: flags,
|
||||||
};
|
};
|
||||||
hipError_t::hipSuccess
|
hipError_t::hipSuccess
|
||||||
|
@ -129,6 +130,14 @@ pub(crate) unsafe fn mipmapped_get_level(
|
||||||
));
|
));
|
||||||
let hip_array_mut = hip_array.as_mut().ok_or(CUresult::CUDA_ERROR_UNKNOWN)?;
|
let hip_array_mut = hip_array.as_mut().ok_or(CUresult::CUDA_ERROR_UNKNOWN)?;
|
||||||
hip_array_mut.textureType = hack_flag;
|
hip_array_mut.textureType = hack_flag;
|
||||||
|
if mipmapped_array.height == 0 {
|
||||||
|
// HIP returns 1 here for no good reason
|
||||||
|
hip_array_mut.height = 0;
|
||||||
|
}
|
||||||
|
if mipmapped_array.depth == 0 {
|
||||||
|
// HIP returns 1 here for no good reason
|
||||||
|
hip_array_mut.depth = 0;
|
||||||
|
}
|
||||||
*level_array = mem::transmute(hip_array);
|
*level_array = mem::transmute(hip_array);
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -3,6 +3,8 @@ use cuda_types::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use std::{env, ptr};
|
use std::{env, ptr};
|
||||||
|
|
||||||
|
use self::array::get_mipmapped;
|
||||||
|
|
||||||
use super::{function::FunctionData, stream, LiveCheck};
|
use super::{function::FunctionData, stream, LiveCheck};
|
||||||
|
|
||||||
// For some reason HIP does not tolerate hipArraySurfaceLoadStore, even though
|
// For some reason HIP does not tolerate hipArraySurfaceLoadStore, even though
|
||||||
|
@ -27,7 +29,23 @@ pub(crate) fn get_non_broken_format(format: hipArray_Format) -> (u32, hipArray_F
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn get_broken_format(array: &hipArray) -> Option<hipArray_Format> {
|
pub(crate) fn get_broken_format(array: &hipArray) -> Option<hipArray_Format> {
|
||||||
Some(match (array.textureType, array.Format) {
|
get_broken_format_impl(array.textureType, array.Format)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) unsafe fn get_broken_format_mipmapped(
|
||||||
|
array: CUmipmappedArray,
|
||||||
|
) -> Result<(&'static hipMipmappedArray, Option<hipArray_Format>), CUresult> {
|
||||||
|
let (hip_array, flag) = get_mipmapped(array);
|
||||||
|
let hip_array_ref = hip_array
|
||||||
|
.as_ref()
|
||||||
|
.ok_or(CUresult::CUDA_ERROR_INVALID_VALUE)?;
|
||||||
|
let format_override = get_broken_format_impl(flag, hip_array_ref.format);
|
||||||
|
Ok((hip_array_ref, format_override))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_broken_format_impl(hack_flag: u32, format: hipArray_Format) -> Option<hipArray_Format> {
|
||||||
|
Some(match (hack_flag, format) {
|
||||||
(2, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => hipArray_Format::HIP_AD_FORMAT_HALF,
|
(2, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => hipArray_Format::HIP_AD_FORMAT_HALF,
|
||||||
(1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => {
|
(1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => {
|
||||||
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16
|
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16
|
||||||
|
@ -42,7 +60,7 @@ pub(crate) fn get_broken_format(array: &hipArray) -> Option<hipArray_Format> {
|
||||||
// memcpy3d fails when copying array1d arrays, so we mark all layered arrays by
|
// memcpy3d fails when copying array1d arrays, so we mark all layered arrays by
|
||||||
// settings LSB
|
// settings LSB
|
||||||
pub(crate) mod array {
|
pub(crate) mod array {
|
||||||
use super::get_broken_format;
|
use super::{get_broken_format, get_broken_format_mipmapped};
|
||||||
use crate::{
|
use crate::{
|
||||||
hip_call_cuda,
|
hip_call_cuda,
|
||||||
r#impl::{memcpy3d_from_cuda, memory_type_from_cuda, FromCuda},
|
r#impl::{memcpy3d_from_cuda, memory_type_from_cuda, FromCuda},
|
||||||
|
@ -62,10 +80,10 @@ pub(crate) mod array {
|
||||||
let hip_array = get(cuda.res.array.hArray);
|
let hip_array = get(cuda.res.array.hArray);
|
||||||
cuda.res.array.hArray = mem::transmute(hip_array);
|
cuda.res.array.hArray = mem::transmute(hip_array);
|
||||||
if let Some(hip_array) = hip_array.as_ref() {
|
if let Some(hip_array) = hip_array.as_ref() {
|
||||||
if let Some(format_) = get_broken_format(hip_array) {
|
if let Some(new_format) = get_broken_format(hip_array) {
|
||||||
return if res_desc_view == ptr::null() {
|
return if res_desc_view == ptr::null() {
|
||||||
let res_desc_view = HIP_RESOURCE_VIEW_DESC {
|
let res_desc_view = HIP_RESOURCE_VIEW_DESC {
|
||||||
format: resource_view_format(format_, hip_array.NumChannels)?,
|
format: resource_view_format(new_format, hip_array.NumChannels)?,
|
||||||
width: hip_array.width as usize,
|
width: hip_array.width as usize,
|
||||||
height: hip_array.height as usize,
|
height: hip_array.height as usize,
|
||||||
depth: hip_array.depth as usize,
|
depth: hip_array.depth as usize,
|
||||||
|
@ -88,6 +106,36 @@ pub(crate) mod array {
|
||||||
(&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
(&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
res_desc_view,
|
res_desc_view,
|
||||||
))
|
))
|
||||||
|
} else if cuda.resType == CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY {
|
||||||
|
let (hip_mipmapped_array, format_override) =
|
||||||
|
get_broken_format_mipmapped(cuda.res.mipmap.hMipmappedArray)?;
|
||||||
|
let mut cuda = *cuda;
|
||||||
|
cuda.res.mipmap.hMipmappedArray = mem::transmute(hip_mipmapped_array as *const _);
|
||||||
|
if let Some(new_format) = format_override {
|
||||||
|
return if res_desc_view == ptr::null() {
|
||||||
|
let res_desc_view = HIP_RESOURCE_VIEW_DESC {
|
||||||
|
format: resource_view_format(new_format, hip_mipmapped_array.num_channels)?,
|
||||||
|
width: hip_mipmapped_array.width as usize,
|
||||||
|
height: hip_mipmapped_array.height as usize,
|
||||||
|
depth: hip_mipmapped_array.depth as usize,
|
||||||
|
firstMipmapLevel: hip_mipmapped_array.min_mipmap_level,
|
||||||
|
lastMipmapLevel: hip_mipmapped_array.max_mipmap_level,
|
||||||
|
firstLayer: 0,
|
||||||
|
lastLayer: 0,
|
||||||
|
reserved: mem::zeroed(),
|
||||||
|
};
|
||||||
|
Ok(fn_(
|
||||||
|
(&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
|
&res_desc_view,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Err(CUresult::CUDA_ERROR_NOT_SUPPORTED)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(fn_(
|
||||||
|
(&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
|
res_desc_view,
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
Ok(fn_(
|
Ok(fn_(
|
||||||
(cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
(cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
|
|
|
@ -7,9 +7,6 @@
|
||||||
.param .u64 surface_param
|
.param .u64 surface_param
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
.reg .u64 in_addr;
|
|
||||||
.reg .u64 out_addr;
|
|
||||||
|
|
||||||
.reg .u64 texture;
|
.reg .u64 texture;
|
||||||
.reg .u64 surface;
|
.reg .u64 surface;
|
||||||
.reg .f32 f<5>;
|
.reg .f32 f<5>;
|
||||||
|
@ -26,3 +23,27 @@
|
||||||
sust.b.2d.v4.b16.trap [surface, {0, 0}], {rs1, rs2, rs3, rs4};
|
sust.b.2d.v4.b16.trap [surface, {0, 0}], {rs1, rs2, rs3, rs4};
|
||||||
ret;
|
ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
.entry read_tex_2d_mip(
|
||||||
|
.param .u64 texture_param,
|
||||||
|
.param .u64 output_param
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 texture;
|
||||||
|
.reg .u64 output;
|
||||||
|
.reg .f32 f<5>;
|
||||||
|
.reg .f32 level;
|
||||||
|
.reg .b16 rs<5>;
|
||||||
|
|
||||||
|
ld.param.u64 texture, [texture_param];
|
||||||
|
ld.param.u64 output, [output_param];
|
||||||
|
|
||||||
|
// 3F800000 = 1.0
|
||||||
|
tex.level.2d.v4.f32.f32 {f1, f2, f3, f4}, [texture, {0f00000000, 0f00000000}], 0f3f800000;
|
||||||
|
st.global.f32 [output], f1;
|
||||||
|
st.global.f32 [output+4], f2;
|
||||||
|
st.global.f32 [output+8], f3;
|
||||||
|
st.global.f32 [output+12], f4;
|
||||||
|
ret;
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
use crate::common::CudaDriverFns;
|
use crate::common::CudaDriverFns;
|
||||||
use cuda_types::*;
|
use cuda_types::*;
|
||||||
use std::{mem, ptr};
|
use half::f16;
|
||||||
|
use std::{ffi::c_void, mem, ptr};
|
||||||
|
|
||||||
mod common;
|
mod common;
|
||||||
|
|
||||||
|
@ -11,7 +12,7 @@ mod common;
|
||||||
cuda_driver_test!(mipmap_texture_to_surface);
|
cuda_driver_test!(mipmap_texture_to_surface);
|
||||||
|
|
||||||
unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
let kernel = include_str!("mipmap_texture_to_surface.ptx");
|
let kernel = include_str!("mipmap_array.ptx");
|
||||||
let mut kernel = kernel.to_owned();
|
let mut kernel = kernel.to_owned();
|
||||||
kernel.push('\0');
|
kernel.push('\0');
|
||||||
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
|
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
@ -32,7 +33,7 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
Depth: 0,
|
Depth: 0,
|
||||||
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
||||||
NumChannels: 4,
|
NumChannels: 4,
|
||||||
Flags: 2,
|
Flags: 0,
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8),
|
cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8),
|
||||||
|
@ -44,6 +45,12 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
cuda.cuMipmappedArrayGetLevel(&mut array_0, mipmap_array, 0),
|
cuda.cuMipmappedArrayGetLevel(&mut array_0, mipmap_array, 0),
|
||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
);
|
);
|
||||||
|
let mut queried_descriptor = mem::zeroed();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuArray3DGetDescriptor_v2(&mut queried_descriptor, array_0),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(mipmap_desc.Depth, queried_descriptor.Depth);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
cuda.cuMipmappedArrayGetLevel(&mut array_1, mipmap_array, 1),
|
cuda.cuMipmappedArrayGetLevel(&mut array_1, mipmap_array, 1),
|
||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
|
@ -71,11 +78,6 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host),
|
cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host),
|
||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
);
|
);
|
||||||
let mut func = mem::zeroed();
|
|
||||||
assert_eq!(
|
|
||||||
cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()),
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
);
|
|
||||||
let mut texture = mem::zeroed();
|
let mut texture = mem::zeroed();
|
||||||
let texture_resource_desc = CUDA_RESOURCE_DESC {
|
let texture_resource_desc = CUDA_RESOURCE_DESC {
|
||||||
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
|
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
|
||||||
|
@ -121,10 +123,19 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc),
|
cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc),
|
||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
);
|
);
|
||||||
|
let mut texture_to_surface = mem::zeroed();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuModuleGetFunction(
|
||||||
|
&mut texture_to_surface,
|
||||||
|
module,
|
||||||
|
b"texture_to_surface\0".as_ptr().cast()
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
let mut params = [&mut texture, &mut surface];
|
let mut params = [&mut texture, &mut surface];
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
cuda.cuLaunchKernel(
|
cuda.cuLaunchKernel(
|
||||||
func,
|
texture_to_surface,
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
|
@ -166,12 +177,100 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
);
|
);
|
||||||
assert_eq!(&pixels, &memcpy_dst);
|
assert_eq!(&pixels, &memcpy_dst);
|
||||||
|
let texture_resource_desc = CUDA_RESOURCE_DESC {
|
||||||
|
resType: CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY,
|
||||||
|
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
|
||||||
|
mipmap: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2 {
|
||||||
|
hMipmappedArray: mipmap_array,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
flags: 0,
|
||||||
|
};
|
||||||
|
let texture_desc = CUDA_TEXTURE_DESC {
|
||||||
|
addressMode: [
|
||||||
|
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||||
|
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||||
|
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||||
|
],
|
||||||
|
filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR,
|
||||||
|
flags: 2,
|
||||||
|
maxAnisotropy: 0,
|
||||||
|
mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR,
|
||||||
|
mipmapLevelBias: 0f32,
|
||||||
|
minMipmapLevelClamp: 0f32,
|
||||||
|
maxMipmapLevelClamp: 7f32,
|
||||||
|
borderColor: [0f32, 0f32, 0f32, 0f32],
|
||||||
|
reserved: mem::zeroed(),
|
||||||
|
};
|
||||||
|
let mut mipmapped_tex_obj = mem::zeroed();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuTexObjectCreate(
|
||||||
|
&mut mipmapped_tex_obj,
|
||||||
|
&texture_resource_desc,
|
||||||
|
&texture_desc,
|
||||||
|
ptr::null()
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut read_tex_2d_mip = mem::zeroed();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuModuleGetFunction(
|
||||||
|
&mut read_tex_2d_mip,
|
||||||
|
module,
|
||||||
|
b"read_tex_2d_mip\0".as_ptr().cast()
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut output_buffer = mem::zeroed();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuMemAlloc_v2(&mut output_buffer, 4 * mem::size_of::<u32>()),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut params = [
|
||||||
|
&mut mipmapped_tex_obj as *mut _ as *mut c_void,
|
||||||
|
&mut output_buffer as *mut _ as *mut c_void,
|
||||||
|
];
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuLaunchKernel(
|
||||||
|
read_tex_2d_mip,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
ptr::null_mut(),
|
||||||
|
params.as_mut_ptr().cast(),
|
||||||
|
ptr::null_mut(),
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuStreamSynchronize(ptr::null_mut()),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut output = [f32::MAX; 4];
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuMemcpyDtoH_v2(
|
||||||
|
output.as_mut_ptr().cast(),
|
||||||
|
output_buffer,
|
||||||
|
4 * mem::size_of::<u32>()
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let pixels_f32 = pixels
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.map(|x| mem::transmute::<_, f16>(x).to_f32())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
assert_eq!(&output[..], &*pixels_f32);
|
||||||
}
|
}
|
||||||
|
|
||||||
cuda_driver_test!(mipmap_texture_to_surface2);
|
cuda_driver_test!(mipmap_texture_to_surface2);
|
||||||
|
|
||||||
unsafe fn mipmap_texture_to_surface2<T: CudaDriverFns>(cuda: T) {
|
unsafe fn mipmap_texture_to_surface2<T: CudaDriverFns>(cuda: T) {
|
||||||
let kernel = include_str!("mipmap_texture_to_surface.ptx");
|
let kernel = include_str!("mipmap_array.ptx");
|
||||||
let mut kernel = kernel.to_owned();
|
let mut kernel = kernel.to_owned();
|
||||||
kernel.push('\0');
|
kernel.push('\0');
|
||||||
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
|
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
|
Loading…
Add table
Add a link
Reference in a new issue