From b7b85028596a962e338c400248d4aac5118f399d Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 22 Mar 2024 21:25:10 +0100 Subject: [PATCH] Add failing test, make tiny fixes --- ptx/src/translate.rs | 22 ++- zluda/tests/kernel_suld.rs | 5 +- zluda/tests/kernel_sust.rs | 6 +- zluda/tests/mipmap_texture_to_surface.ptx | 28 ++++ zluda/tests/mipmap_texture_to_surface.rs | 165 ++++++++++++++++++++++ 5 files changed, 221 insertions(+), 5 deletions(-) create mode 100644 zluda/tests/mipmap_texture_to_surface.ptx create mode 100644 zluda/tests/mipmap_texture_to_surface.rs diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index db4e3e0..fbf286b 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -6511,9 +6511,17 @@ impl ast::Instruction { ast::Instruction::Tex(details, arg) } ast::Instruction::Suld(details, arg) => { + let image_type_space = if details.direct { + (ast::Type::Texref, ast::StateSpace::Global) + } else { + ( + ast::Type::Scalar(ast::ScalarType::B64), + ast::StateSpace::Reg, + ) + }; let arg = arg.map( visitor, - (ast::Type::Surfref, ast::StateSpace::Global), + image_type_space, details.geometry, details.value_type(), ast::ScalarType::B32, @@ -8148,6 +8156,14 @@ impl ast::Arg4Sust { visitor: &mut V, details: &ast::SurfaceDetails, ) -> Result, TranslateError> { + let (type_, space) = if details.direct { + (ast::Type::Surfref, ast::StateSpace::Global) + } else { + ( + ast::Type::Scalar(ast::ScalarType::B64), + ast::StateSpace::Reg, + ) + }; let image = visitor.operand( ArgumentDescriptor { op: self.image, @@ -8155,8 +8171,8 @@ impl ast::Arg4Sust { is_memory_access: false, non_default_implicit_conversion: None, }, - &ast::Type::Surfref, - ast::StateSpace::Global, + &type_, + space, )?; let layer = self .layer diff --git a/zluda/tests/kernel_suld.rs b/zluda/tests/kernel_suld.rs index ad6e964..8255f3d 100644 --- a/zluda/tests/kernel_suld.rs +++ b/zluda/tests/kernel_suld.rs @@ -381,7 +381,10 @@ unsafe fn kernel_suld_impl< let x = random_size.sample(&mut rng) * sizeof_pixel; let y = random_size.sample(&mut rng); let z = random_size.sample(&mut rng); - let values = [rng.gen::(); SULD_N]; + let mut values = [SustType::default(); SULD_N]; + for value in values.iter_mut() { + *value = rng.gen::(); + } let converted_values = force_transmute(values, BYTE_FILLER3); *host_side_data.get_unchecked_mut(geo.address(size, x, y, z, sizeof_pixel)) = converted_values; assert_eq!( diff --git a/zluda/tests/kernel_sust.rs b/zluda/tests/kernel_sust.rs index 831e467..889332e 100644 --- a/zluda/tests/kernel_sust.rs +++ b/zluda/tests/kernel_sust.rs @@ -421,7 +421,11 @@ unsafe fn kernel_sust_impl< let x = random_size.sample(&mut rng) * sizeof_pixel; let y = random_size.sample(&mut rng); let z = random_size.sample(&mut rng); - let values = [rng.gen::(); SUST_N]; + let mut values = [SustType::default(); SUST_N]; + for value in values.iter_mut() { + *value = rng.gen::(); + } + dbg!(&values); let mut args = vec![ &x as *const _ as *const c_void, &y as *const _ as *const _, diff --git a/zluda/tests/mipmap_texture_to_surface.ptx b/zluda/tests/mipmap_texture_to_surface.ptx new file mode 100644 index 0000000..12e0ac5 --- /dev/null +++ b/zluda/tests/mipmap_texture_to_surface.ptx @@ -0,0 +1,28 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.entry texture_to_surface( + .param .u64 texture_param, + .param .u64 surface_param +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + + .reg .u64 texture; + .reg .u64 surface; + .reg .f32 f<5>; + .reg .b16 rs<5>; + + ld.param.u64 texture, [texture_param]; + ld.param.u64 surface, [surface_param]; + + tex.2d.v4.f32.f32 {f1, f2, f3, f4}, [texture, {0f00000000, 0f00000000}]; + { cvt.rn.f16.f32 rs1, f1;} + { cvt.rn.f16.f32 rs2, f2;} + { cvt.rn.f16.f32 rs3, f3;} + { cvt.rn.f16.f32 rs4, f4;} + sust.b.2d.v4.b16.trap [surface, {0, 0}], {rs1, rs2, rs3, rs4}; + ret; +} diff --git a/zluda/tests/mipmap_texture_to_surface.rs b/zluda/tests/mipmap_texture_to_surface.rs new file mode 100644 index 0000000..00ba4a6 --- /dev/null +++ b/zluda/tests/mipmap_texture_to_surface.rs @@ -0,0 +1,165 @@ +use crate::common::CudaDriverFns; +use cuda_types::*; +use std::{mem, ptr}; + +mod common; + +cuda_driver_test!(mipmap_texture_to_surface); + +unsafe fn mipmap_texture_to_surface(cuda: T) { + let kernel = include_str!("mipmap_texture_to_surface.ptx"); + let mut kernel = kernel.to_owned(); + kernel.push('\0'); + assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); + let mut ctx = ptr::null_mut(); + assert_eq!( + cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)), + CUresult::CUDA_SUCCESS + ); + let mut module = ptr::null_mut(); + assert_eq!( + cuda.cuModuleLoadData(&mut module, kernel.as_ptr() as _), + CUresult::CUDA_SUCCESS + ); + let mut mipmap_array = ptr::null_mut(); + let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR { + Width: 1368, + Height: 770, + Depth: 0, + Format: CUarray_format::CU_AD_FORMAT_HALF, + NumChannels: 4, + Flags: 0, + }; + assert_eq!( + cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8), + CUresult::CUDA_SUCCESS + ); + let mut array_0 = mem::zeroed(); + let mut array_1 = mem::zeroed(); + assert_eq!( + cuda.cuMipmappedArrayGetLevel(&mut array_0, mipmap_array, 0), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuMipmappedArrayGetLevel(&mut array_1, mipmap_array, 1), + CUresult::CUDA_SUCCESS + ); + let mut pixels = [0x3C66u16, 0x4066, 0x4299, 4466]; + let memcpy_from_host = CUDA_MEMCPY2D { + srcXInBytes: 0, + srcY: 0, + srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST, + srcHost: pixels.as_mut_ptr() as _, + srcDevice: CUdeviceptr_v2(ptr::null_mut()), + srcArray: ptr::null_mut(), + srcPitch: 4 * mem::size_of::(), + dstXInBytes: 0, + dstY: 0, + dstMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY, + dstHost: ptr::null_mut(), + dstDevice: CUdeviceptr_v2(ptr::null_mut()), + dstArray: array_0, + dstPitch: 0, + WidthInBytes: 4 * mem::size_of::(), + Height: 1, + }; + assert_eq!( + cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host), + CUresult::CUDA_SUCCESS + ); + let mut func = mem::zeroed(); + assert_eq!( + cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()), + CUresult::CUDA_SUCCESS + ); + let mut texture = mem::zeroed(); + let texture_resource_desc = CUDA_RESOURCE_DESC { + resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY, + res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 { + array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_0 }, + }, + flags: 0, + }; + let texture_desc = CUDA_TEXTURE_DESC { + addressMode: [ + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + ], + filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR, + flags: 2, + maxAnisotropy: 0, + mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_POINT, + mipmapLevelBias: 0f32, + minMipmapLevelClamp: 0f32, + maxMipmapLevelClamp: 0f32, + borderColor: [0f32, 0f32, 0f32, 0f32], + reserved: mem::zeroed(), + }; + assert_eq!( + cuda.cuTexObjectCreate( + &mut texture, + &texture_resource_desc, + &texture_desc, + ptr::null() + ), + CUresult::CUDA_SUCCESS + ); + let mut surface = mem::zeroed(); + let surface_resource_desc = CUDA_RESOURCE_DESC { + resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY, + res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 { + array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_1 }, + }, + flags: 0, + }; + assert_eq!( + cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc), + CUresult::CUDA_SUCCESS + ); + let mut params = [&mut texture, &mut surface]; + assert_eq!( + cuda.cuLaunchKernel( + func, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + ptr::null_mut(), + params.as_mut_ptr().cast(), + ptr::null_mut(), + ), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuStreamSynchronize(ptr::null_mut()), + CUresult::CUDA_SUCCESS + ); + let mut memcpy_dst = [u16::MAX; 4]; + let memcpy_to_host = CUDA_MEMCPY2D { + srcXInBytes: 0, + srcY: 0, + srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY, + srcHost: ptr::null(), + srcDevice: CUdeviceptr_v2(ptr::null_mut()), + srcArray: array_1, + srcPitch: 0, + dstXInBytes: 0, + dstY: 0, + dstMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST, + dstHost: memcpy_dst.as_mut_ptr() as _, + dstDevice: CUdeviceptr_v2(ptr::null_mut()), + dstArray: ptr::null_mut(), + dstPitch: 4 * mem::size_of::(), + WidthInBytes: 4 * mem::size_of::(), + Height: 1, + }; + assert_eq!( + cuda.cuMemcpy2DUnaligned_v2(&memcpy_to_host), + CUresult::CUDA_SUCCESS + ); + assert_eq!(&pixels, &memcpy_dst); +}