mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:14:45 +00:00
Add failing test, make tiny fixes
This commit is contained in:
parent
f8db1b8c63
commit
b7b8502859
5 changed files with 221 additions and 5 deletions
|
@ -6511,9 +6511,17 @@ impl<T: ArgParamsEx> ast::Instruction<T> {
|
|||
ast::Instruction::Tex(details, arg)
|
||||
}
|
||||
ast::Instruction::Suld(details, arg) => {
|
||||
let image_type_space = if details.direct {
|
||||
(ast::Type::Texref, ast::StateSpace::Global)
|
||||
} else {
|
||||
(
|
||||
ast::Type::Scalar(ast::ScalarType::B64),
|
||||
ast::StateSpace::Reg,
|
||||
)
|
||||
};
|
||||
let arg = arg.map(
|
||||
visitor,
|
||||
(ast::Type::Surfref, ast::StateSpace::Global),
|
||||
image_type_space,
|
||||
details.geometry,
|
||||
details.value_type(),
|
||||
ast::ScalarType::B32,
|
||||
|
@ -8148,6 +8156,14 @@ impl<T: ArgParamsEx> ast::Arg4Sust<T> {
|
|||
visitor: &mut V,
|
||||
details: &ast::SurfaceDetails,
|
||||
) -> Result<ast::Arg4Sust<U>, TranslateError> {
|
||||
let (type_, space) = if details.direct {
|
||||
(ast::Type::Surfref, ast::StateSpace::Global)
|
||||
} else {
|
||||
(
|
||||
ast::Type::Scalar(ast::ScalarType::B64),
|
||||
ast::StateSpace::Reg,
|
||||
)
|
||||
};
|
||||
let image = visitor.operand(
|
||||
ArgumentDescriptor {
|
||||
op: self.image,
|
||||
|
@ -8155,8 +8171,8 @@ impl<T: ArgParamsEx> ast::Arg4Sust<T> {
|
|||
is_memory_access: false,
|
||||
non_default_implicit_conversion: None,
|
||||
},
|
||||
&ast::Type::Surfref,
|
||||
ast::StateSpace::Global,
|
||||
&type_,
|
||||
space,
|
||||
)?;
|
||||
let layer = self
|
||||
.layer
|
||||
|
|
|
@ -381,7 +381,10 @@ unsafe fn kernel_suld_impl<
|
|||
let x = random_size.sample(&mut rng) * sizeof_pixel;
|
||||
let y = random_size.sample(&mut rng);
|
||||
let z = random_size.sample(&mut rng);
|
||||
let values = [rng.gen::<SustType>(); SULD_N];
|
||||
let mut values = [SustType::default(); SULD_N];
|
||||
for value in values.iter_mut() {
|
||||
*value = rng.gen::<SustType>();
|
||||
}
|
||||
let converted_values = force_transmute(values, BYTE_FILLER3);
|
||||
*host_side_data.get_unchecked_mut(geo.address(size, x, y, z, sizeof_pixel)) = converted_values;
|
||||
assert_eq!(
|
||||
|
|
|
@ -421,7 +421,11 @@ unsafe fn kernel_sust_impl<
|
|||
let x = random_size.sample(&mut rng) * sizeof_pixel;
|
||||
let y = random_size.sample(&mut rng);
|
||||
let z = random_size.sample(&mut rng);
|
||||
let values = [rng.gen::<SustType>(); SUST_N];
|
||||
let mut values = [SustType::default(); SUST_N];
|
||||
for value in values.iter_mut() {
|
||||
*value = rng.gen::<SustType>();
|
||||
}
|
||||
dbg!(&values);
|
||||
let mut args = vec![
|
||||
&x as *const _ as *const c_void,
|
||||
&y as *const _ as *const _,
|
||||
|
|
28
zluda/tests/mipmap_texture_to_surface.ptx
Normal file
28
zluda/tests/mipmap_texture_to_surface.ptx
Normal file
|
@ -0,0 +1,28 @@
|
|||
.version 6.5
|
||||
.target sm_30
|
||||
.address_size 64
|
||||
|
||||
.entry texture_to_surface(
|
||||
.param .u64 texture_param,
|
||||
.param .u64 surface_param
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
|
||||
.reg .u64 texture;
|
||||
.reg .u64 surface;
|
||||
.reg .f32 f<5>;
|
||||
.reg .b16 rs<5>;
|
||||
|
||||
ld.param.u64 texture, [texture_param];
|
||||
ld.param.u64 surface, [surface_param];
|
||||
|
||||
tex.2d.v4.f32.f32 {f1, f2, f3, f4}, [texture, {0f00000000, 0f00000000}];
|
||||
{ cvt.rn.f16.f32 rs1, f1;}
|
||||
{ cvt.rn.f16.f32 rs2, f2;}
|
||||
{ cvt.rn.f16.f32 rs3, f3;}
|
||||
{ cvt.rn.f16.f32 rs4, f4;}
|
||||
sust.b.2d.v4.b16.trap [surface, {0, 0}], {rs1, rs2, rs3, rs4};
|
||||
ret;
|
||||
}
|
165
zluda/tests/mipmap_texture_to_surface.rs
Normal file
165
zluda/tests/mipmap_texture_to_surface.rs
Normal file
|
@ -0,0 +1,165 @@
|
|||
use crate::common::CudaDriverFns;
|
||||
use cuda_types::*;
|
||||
use std::{mem, ptr};
|
||||
|
||||
mod common;
|
||||
|
||||
cuda_driver_test!(mipmap_texture_to_surface);
|
||||
|
||||
unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||
let kernel = include_str!("mipmap_texture_to_surface.ptx");
|
||||
let mut kernel = kernel.to_owned();
|
||||
kernel.push('\0');
|
||||
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
|
||||
let mut ctx = ptr::null_mut();
|
||||
assert_eq!(
|
||||
cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut module = ptr::null_mut();
|
||||
assert_eq!(
|
||||
cuda.cuModuleLoadData(&mut module, kernel.as_ptr() as _),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut mipmap_array = ptr::null_mut();
|
||||
let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR {
|
||||
Width: 1368,
|
||||
Height: 770,
|
||||
Depth: 0,
|
||||
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
||||
NumChannels: 4,
|
||||
Flags: 0,
|
||||
};
|
||||
assert_eq!(
|
||||
cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut array_0 = mem::zeroed();
|
||||
let mut array_1 = mem::zeroed();
|
||||
assert_eq!(
|
||||
cuda.cuMipmappedArrayGetLevel(&mut array_0, mipmap_array, 0),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
assert_eq!(
|
||||
cuda.cuMipmappedArrayGetLevel(&mut array_1, mipmap_array, 1),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut pixels = [0x3C66u16, 0x4066, 0x4299, 4466];
|
||||
let memcpy_from_host = CUDA_MEMCPY2D {
|
||||
srcXInBytes: 0,
|
||||
srcY: 0,
|
||||
srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
|
||||
srcHost: pixels.as_mut_ptr() as _,
|
||||
srcDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||
srcArray: ptr::null_mut(),
|
||||
srcPitch: 4 * mem::size_of::<u16>(),
|
||||
dstXInBytes: 0,
|
||||
dstY: 0,
|
||||
dstMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
|
||||
dstHost: ptr::null_mut(),
|
||||
dstDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||
dstArray: array_0,
|
||||
dstPitch: 0,
|
||||
WidthInBytes: 4 * mem::size_of::<u16>(),
|
||||
Height: 1,
|
||||
};
|
||||
assert_eq!(
|
||||
cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut func = mem::zeroed();
|
||||
assert_eq!(
|
||||
cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut texture = mem::zeroed();
|
||||
let texture_resource_desc = CUDA_RESOURCE_DESC {
|
||||
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
|
||||
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
|
||||
array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_0 },
|
||||
},
|
||||
flags: 0,
|
||||
};
|
||||
let texture_desc = CUDA_TEXTURE_DESC {
|
||||
addressMode: [
|
||||
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||
],
|
||||
filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR,
|
||||
flags: 2,
|
||||
maxAnisotropy: 0,
|
||||
mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_POINT,
|
||||
mipmapLevelBias: 0f32,
|
||||
minMipmapLevelClamp: 0f32,
|
||||
maxMipmapLevelClamp: 0f32,
|
||||
borderColor: [0f32, 0f32, 0f32, 0f32],
|
||||
reserved: mem::zeroed(),
|
||||
};
|
||||
assert_eq!(
|
||||
cuda.cuTexObjectCreate(
|
||||
&mut texture,
|
||||
&texture_resource_desc,
|
||||
&texture_desc,
|
||||
ptr::null()
|
||||
),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut surface = mem::zeroed();
|
||||
let surface_resource_desc = CUDA_RESOURCE_DESC {
|
||||
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
|
||||
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
|
||||
array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_1 },
|
||||
},
|
||||
flags: 0,
|
||||
};
|
||||
assert_eq!(
|
||||
cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut params = [&mut texture, &mut surface];
|
||||
assert_eq!(
|
||||
cuda.cuLaunchKernel(
|
||||
func,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
ptr::null_mut(),
|
||||
params.as_mut_ptr().cast(),
|
||||
ptr::null_mut(),
|
||||
),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
assert_eq!(
|
||||
cuda.cuStreamSynchronize(ptr::null_mut()),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
let mut memcpy_dst = [u16::MAX; 4];
|
||||
let memcpy_to_host = CUDA_MEMCPY2D {
|
||||
srcXInBytes: 0,
|
||||
srcY: 0,
|
||||
srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
|
||||
srcHost: ptr::null(),
|
||||
srcDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||
srcArray: array_1,
|
||||
srcPitch: 0,
|
||||
dstXInBytes: 0,
|
||||
dstY: 0,
|
||||
dstMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
|
||||
dstHost: memcpy_dst.as_mut_ptr() as _,
|
||||
dstDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||
dstArray: ptr::null_mut(),
|
||||
dstPitch: 4 * mem::size_of::<u16>(),
|
||||
WidthInBytes: 4 * mem::size_of::<u16>(),
|
||||
Height: 1,
|
||||
};
|
||||
assert_eq!(
|
||||
cuda.cuMemcpy2DUnaligned_v2(&memcpy_to_host),
|
||||
CUresult::CUDA_SUCCESS
|
||||
);
|
||||
assert_eq!(&pixels, &memcpy_dst);
|
||||
}
|
Loading…
Add table
Reference in a new issue