Add failing test, make tiny fixes

This commit is contained in:
Andrzej Janik 2024-03-22 21:25:10 +01:00
parent f8db1b8c63
commit b7b8502859
5 changed files with 221 additions and 5 deletions

View file

@ -6511,9 +6511,17 @@ impl<T: ArgParamsEx> ast::Instruction<T> {
ast::Instruction::Tex(details, arg)
}
ast::Instruction::Suld(details, arg) => {
let image_type_space = if details.direct {
(ast::Type::Texref, ast::StateSpace::Global)
} else {
(
ast::Type::Scalar(ast::ScalarType::B64),
ast::StateSpace::Reg,
)
};
let arg = arg.map(
visitor,
(ast::Type::Surfref, ast::StateSpace::Global),
image_type_space,
details.geometry,
details.value_type(),
ast::ScalarType::B32,
@ -8148,6 +8156,14 @@ impl<T: ArgParamsEx> ast::Arg4Sust<T> {
visitor: &mut V,
details: &ast::SurfaceDetails,
) -> Result<ast::Arg4Sust<U>, TranslateError> {
let (type_, space) = if details.direct {
(ast::Type::Surfref, ast::StateSpace::Global)
} else {
(
ast::Type::Scalar(ast::ScalarType::B64),
ast::StateSpace::Reg,
)
};
let image = visitor.operand(
ArgumentDescriptor {
op: self.image,
@ -8155,8 +8171,8 @@ impl<T: ArgParamsEx> ast::Arg4Sust<T> {
is_memory_access: false,
non_default_implicit_conversion: None,
},
&ast::Type::Surfref,
ast::StateSpace::Global,
&type_,
space,
)?;
let layer = self
.layer

View file

@ -381,7 +381,10 @@ unsafe fn kernel_suld_impl<
let x = random_size.sample(&mut rng) * sizeof_pixel;
let y = random_size.sample(&mut rng);
let z = random_size.sample(&mut rng);
let values = [rng.gen::<SustType>(); SULD_N];
let mut values = [SustType::default(); SULD_N];
for value in values.iter_mut() {
*value = rng.gen::<SustType>();
}
let converted_values = force_transmute(values, BYTE_FILLER3);
*host_side_data.get_unchecked_mut(geo.address(size, x, y, z, sizeof_pixel)) = converted_values;
assert_eq!(

View file

@ -421,7 +421,11 @@ unsafe fn kernel_sust_impl<
let x = random_size.sample(&mut rng) * sizeof_pixel;
let y = random_size.sample(&mut rng);
let z = random_size.sample(&mut rng);
let values = [rng.gen::<SustType>(); SUST_N];
let mut values = [SustType::default(); SUST_N];
for value in values.iter_mut() {
*value = rng.gen::<SustType>();
}
dbg!(&values);
let mut args = vec![
&x as *const _ as *const c_void,
&y as *const _ as *const _,

View file

@ -0,0 +1,28 @@
.version 6.5
.target sm_30
.address_size 64
.entry texture_to_surface(
.param .u64 texture_param,
.param .u64 surface_param
)
{
.reg .u64 in_addr;
.reg .u64 out_addr;
.reg .u64 texture;
.reg .u64 surface;
.reg .f32 f<5>;
.reg .b16 rs<5>;
ld.param.u64 texture, [texture_param];
ld.param.u64 surface, [surface_param];
tex.2d.v4.f32.f32 {f1, f2, f3, f4}, [texture, {0f00000000, 0f00000000}];
{ cvt.rn.f16.f32 rs1, f1;}
{ cvt.rn.f16.f32 rs2, f2;}
{ cvt.rn.f16.f32 rs3, f3;}
{ cvt.rn.f16.f32 rs4, f4;}
sust.b.2d.v4.b16.trap [surface, {0, 0}], {rs1, rs2, rs3, rs4};
ret;
}

View file

@ -0,0 +1,165 @@
use crate::common::CudaDriverFns;
use cuda_types::*;
use std::{mem, ptr};
mod common;
cuda_driver_test!(mipmap_texture_to_surface);
unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
let kernel = include_str!("mipmap_texture_to_surface.ptx");
let mut kernel = kernel.to_owned();
kernel.push('\0');
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
let mut ctx = ptr::null_mut();
assert_eq!(
cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)),
CUresult::CUDA_SUCCESS
);
let mut module = ptr::null_mut();
assert_eq!(
cuda.cuModuleLoadData(&mut module, kernel.as_ptr() as _),
CUresult::CUDA_SUCCESS
);
let mut mipmap_array = ptr::null_mut();
let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR {
Width: 1368,
Height: 770,
Depth: 0,
Format: CUarray_format::CU_AD_FORMAT_HALF,
NumChannels: 4,
Flags: 0,
};
assert_eq!(
cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8),
CUresult::CUDA_SUCCESS
);
let mut array_0 = mem::zeroed();
let mut array_1 = mem::zeroed();
assert_eq!(
cuda.cuMipmappedArrayGetLevel(&mut array_0, mipmap_array, 0),
CUresult::CUDA_SUCCESS
);
assert_eq!(
cuda.cuMipmappedArrayGetLevel(&mut array_1, mipmap_array, 1),
CUresult::CUDA_SUCCESS
);
let mut pixels = [0x3C66u16, 0x4066, 0x4299, 4466];
let memcpy_from_host = CUDA_MEMCPY2D {
srcXInBytes: 0,
srcY: 0,
srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
srcHost: pixels.as_mut_ptr() as _,
srcDevice: CUdeviceptr_v2(ptr::null_mut()),
srcArray: ptr::null_mut(),
srcPitch: 4 * mem::size_of::<u16>(),
dstXInBytes: 0,
dstY: 0,
dstMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
dstHost: ptr::null_mut(),
dstDevice: CUdeviceptr_v2(ptr::null_mut()),
dstArray: array_0,
dstPitch: 0,
WidthInBytes: 4 * mem::size_of::<u16>(),
Height: 1,
};
assert_eq!(
cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host),
CUresult::CUDA_SUCCESS
);
let mut func = mem::zeroed();
assert_eq!(
cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()),
CUresult::CUDA_SUCCESS
);
let mut texture = mem::zeroed();
let texture_resource_desc = CUDA_RESOURCE_DESC {
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_0 },
},
flags: 0,
};
let texture_desc = CUDA_TEXTURE_DESC {
addressMode: [
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
],
filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR,
flags: 2,
maxAnisotropy: 0,
mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_POINT,
mipmapLevelBias: 0f32,
minMipmapLevelClamp: 0f32,
maxMipmapLevelClamp: 0f32,
borderColor: [0f32, 0f32, 0f32, 0f32],
reserved: mem::zeroed(),
};
assert_eq!(
cuda.cuTexObjectCreate(
&mut texture,
&texture_resource_desc,
&texture_desc,
ptr::null()
),
CUresult::CUDA_SUCCESS
);
let mut surface = mem::zeroed();
let surface_resource_desc = CUDA_RESOURCE_DESC {
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_1 },
},
flags: 0,
};
assert_eq!(
cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc),
CUresult::CUDA_SUCCESS
);
let mut params = [&mut texture, &mut surface];
assert_eq!(
cuda.cuLaunchKernel(
func,
1,
1,
1,
1,
1,
1,
0,
ptr::null_mut(),
params.as_mut_ptr().cast(),
ptr::null_mut(),
),
CUresult::CUDA_SUCCESS
);
assert_eq!(
cuda.cuStreamSynchronize(ptr::null_mut()),
CUresult::CUDA_SUCCESS
);
let mut memcpy_dst = [u16::MAX; 4];
let memcpy_to_host = CUDA_MEMCPY2D {
srcXInBytes: 0,
srcY: 0,
srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
srcHost: ptr::null(),
srcDevice: CUdeviceptr_v2(ptr::null_mut()),
srcArray: array_1,
srcPitch: 0,
dstXInBytes: 0,
dstY: 0,
dstMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
dstHost: memcpy_dst.as_mut_ptr() as _,
dstDevice: CUdeviceptr_v2(ptr::null_mut()),
dstArray: ptr::null_mut(),
dstPitch: 4 * mem::size_of::<u16>(),
WidthInBytes: 4 * mem::size_of::<u16>(),
Height: 1,
};
assert_eq!(
cuda.cuMemcpy2DUnaligned_v2(&memcpy_to_host),
CUresult::CUDA_SUCCESS
);
assert_eq!(&pixels, &memcpy_dst);
}