From c64018db89b13bfd6ca9f81c8c4bb0db73976f39 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Tue, 26 Mar 2024 01:35:42 +0100 Subject: [PATCH] More hacks for mipmapped texobjs --- zluda/src/impl/array.rs | 11 +- zluda/src/impl/hipfix.rs | 56 ++++++++- ...exture_to_surface.ptx => mipmap_array.ptx} | 27 +++- ..._texture_to_surface.rs => mipmap_array.rs} | 119 ++++++++++++++++-- 4 files changed, 195 insertions(+), 18 deletions(-) rename zluda/tests/{mipmap_texture_to_surface.ptx => mipmap_array.ptx} (51%) rename zluda/tests/{mipmap_texture_to_surface.rs => mipmap_array.rs} (73%) diff --git a/zluda/src/impl/array.rs b/zluda/src/impl/array.rs index 76031a9..4acbb7d 100644 --- a/zluda/src/impl/array.rs +++ b/zluda/src/impl/array.rs @@ -47,12 +47,13 @@ pub(crate) unsafe fn get_descriptor_3d( flags |= CUDA_ARRAY3D_SURFACE_LDST; let array = hipfix::array::get(array); if let (Some(array), Some(array_descriptor)) = (array.as_ref(), array_descriptor.as_mut()) { + let real_format = hipfix::get_broken_format(array).unwrap_or(array.Format); *array_descriptor = CUDA_ARRAY3D_DESCRIPTOR { Width: array.width as usize, Height: array.height as usize, Depth: array.depth as usize, NumChannels: array.NumChannels, - Format: mem::transmute(array.Format), // compatible + Format: mem::transmute(real_format), // compatible Flags: flags, }; hipError_t::hipSuccess @@ -129,6 +130,14 @@ pub(crate) unsafe fn mipmapped_get_level( )); let hip_array_mut = hip_array.as_mut().ok_or(CUresult::CUDA_ERROR_UNKNOWN)?; hip_array_mut.textureType = hack_flag; + if mipmapped_array.height == 0 { + // HIP returns 1 here for no good reason + hip_array_mut.height = 0; + } + if mipmapped_array.depth == 0 { + // HIP returns 1 here for no good reason + hip_array_mut.depth = 0; + } *level_array = mem::transmute(hip_array); Ok(()) } else { diff --git a/zluda/src/impl/hipfix.rs b/zluda/src/impl/hipfix.rs index 4c02e2d..3257d97 100644 --- a/zluda/src/impl/hipfix.rs +++ b/zluda/src/impl/hipfix.rs @@ -3,6 +3,8 @@ use cuda_types::*; use hip_runtime_sys::*; use std::{env, ptr}; +use self::array::get_mipmapped; + use super::{function::FunctionData, stream, LiveCheck}; // For some reason HIP does not tolerate hipArraySurfaceLoadStore, even though @@ -27,7 +29,23 @@ pub(crate) fn get_non_broken_format(format: hipArray_Format) -> (u32, hipArray_F #[must_use] pub(crate) fn get_broken_format(array: &hipArray) -> Option { - Some(match (array.textureType, array.Format) { + get_broken_format_impl(array.textureType, array.Format) +} + +#[must_use] +pub(crate) unsafe fn get_broken_format_mipmapped( + array: CUmipmappedArray, +) -> Result<(&'static hipMipmappedArray, Option), CUresult> { + let (hip_array, flag) = get_mipmapped(array); + let hip_array_ref = hip_array + .as_ref() + .ok_or(CUresult::CUDA_ERROR_INVALID_VALUE)?; + let format_override = get_broken_format_impl(flag, hip_array_ref.format); + Ok((hip_array_ref, format_override)) +} + +fn get_broken_format_impl(hack_flag: u32, format: hipArray_Format) -> Option { + Some(match (hack_flag, format) { (2, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => hipArray_Format::HIP_AD_FORMAT_HALF, (1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => { hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16 @@ -42,7 +60,7 @@ pub(crate) fn get_broken_format(array: &hipArray) -> Option { // memcpy3d fails when copying array1d arrays, so we mark all layered arrays by // settings LSB pub(crate) mod array { - use super::get_broken_format; + use super::{get_broken_format, get_broken_format_mipmapped}; use crate::{ hip_call_cuda, r#impl::{memcpy3d_from_cuda, memory_type_from_cuda, FromCuda}, @@ -62,10 +80,10 @@ pub(crate) mod array { let hip_array = get(cuda.res.array.hArray); cuda.res.array.hArray = mem::transmute(hip_array); if let Some(hip_array) = hip_array.as_ref() { - if let Some(format_) = get_broken_format(hip_array) { + if let Some(new_format) = get_broken_format(hip_array) { return if res_desc_view == ptr::null() { let res_desc_view = HIP_RESOURCE_VIEW_DESC { - format: resource_view_format(format_, hip_array.NumChannels)?, + format: resource_view_format(new_format, hip_array.NumChannels)?, width: hip_array.width as usize, height: hip_array.height as usize, depth: hip_array.depth as usize, @@ -88,6 +106,36 @@ pub(crate) mod array { (&cuda as *const CUDA_RESOURCE_DESC).cast::(), res_desc_view, )) + } else if cuda.resType == CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY { + let (hip_mipmapped_array, format_override) = + get_broken_format_mipmapped(cuda.res.mipmap.hMipmappedArray)?; + let mut cuda = *cuda; + cuda.res.mipmap.hMipmappedArray = mem::transmute(hip_mipmapped_array as *const _); + if let Some(new_format) = format_override { + return if res_desc_view == ptr::null() { + let res_desc_view = HIP_RESOURCE_VIEW_DESC { + format: resource_view_format(new_format, hip_mipmapped_array.num_channels)?, + width: hip_mipmapped_array.width as usize, + height: hip_mipmapped_array.height as usize, + depth: hip_mipmapped_array.depth as usize, + firstMipmapLevel: hip_mipmapped_array.min_mipmap_level, + lastMipmapLevel: hip_mipmapped_array.max_mipmap_level, + firstLayer: 0, + lastLayer: 0, + reserved: mem::zeroed(), + }; + Ok(fn_( + (&cuda as *const CUDA_RESOURCE_DESC).cast::(), + &res_desc_view, + )) + } else { + Err(CUresult::CUDA_ERROR_NOT_SUPPORTED) + }; + } + Ok(fn_( + (&cuda as *const CUDA_RESOURCE_DESC).cast::(), + res_desc_view, + )) } else { Ok(fn_( (cuda as *const CUDA_RESOURCE_DESC).cast::(), diff --git a/zluda/tests/mipmap_texture_to_surface.ptx b/zluda/tests/mipmap_array.ptx similarity index 51% rename from zluda/tests/mipmap_texture_to_surface.ptx rename to zluda/tests/mipmap_array.ptx index 12e0ac5..96f7994 100644 --- a/zluda/tests/mipmap_texture_to_surface.ptx +++ b/zluda/tests/mipmap_array.ptx @@ -7,9 +7,6 @@ .param .u64 surface_param ) { - .reg .u64 in_addr; - .reg .u64 out_addr; - .reg .u64 texture; .reg .u64 surface; .reg .f32 f<5>; @@ -26,3 +23,27 @@ sust.b.2d.v4.b16.trap [surface, {0, 0}], {rs1, rs2, rs3, rs4}; ret; } + + +.entry read_tex_2d_mip( + .param .u64 texture_param, + .param .u64 output_param +) +{ + .reg .u64 texture; + .reg .u64 output; + .reg .f32 f<5>; + .reg .f32 level; + .reg .b16 rs<5>; + + ld.param.u64 texture, [texture_param]; + ld.param.u64 output, [output_param]; + + // 3F800000 = 1.0 + tex.level.2d.v4.f32.f32 {f1, f2, f3, f4}, [texture, {0f00000000, 0f00000000}], 0f3f800000; + st.global.f32 [output], f1; + st.global.f32 [output+4], f2; + st.global.f32 [output+8], f3; + st.global.f32 [output+12], f4; + ret; +} diff --git a/zluda/tests/mipmap_texture_to_surface.rs b/zluda/tests/mipmap_array.rs similarity index 73% rename from zluda/tests/mipmap_texture_to_surface.rs rename to zluda/tests/mipmap_array.rs index e968e8c..998f81e 100644 --- a/zluda/tests/mipmap_texture_to_surface.rs +++ b/zluda/tests/mipmap_array.rs @@ -1,6 +1,7 @@ use crate::common::CudaDriverFns; use cuda_types::*; -use std::{mem, ptr}; +use half::f16; +use std::{ffi::c_void, mem, ptr}; mod common; @@ -11,7 +12,7 @@ mod common; cuda_driver_test!(mipmap_texture_to_surface); unsafe fn mipmap_texture_to_surface(cuda: T) { - let kernel = include_str!("mipmap_texture_to_surface.ptx"); + let kernel = include_str!("mipmap_array.ptx"); let mut kernel = kernel.to_owned(); kernel.push('\0'); assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS); @@ -32,7 +33,7 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { Depth: 0, Format: CUarray_format::CU_AD_FORMAT_HALF, NumChannels: 4, - Flags: 2, + Flags: 0, }; assert_eq!( cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8), @@ -44,6 +45,12 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { cuda.cuMipmappedArrayGetLevel(&mut array_0, mipmap_array, 0), CUresult::CUDA_SUCCESS ); + let mut queried_descriptor = mem::zeroed(); + assert_eq!( + cuda.cuArray3DGetDescriptor_v2(&mut queried_descriptor, array_0), + CUresult::CUDA_SUCCESS + ); + assert_eq!(mipmap_desc.Depth, queried_descriptor.Depth); assert_eq!( cuda.cuMipmappedArrayGetLevel(&mut array_1, mipmap_array, 1), CUresult::CUDA_SUCCESS @@ -71,11 +78,6 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host), CUresult::CUDA_SUCCESS ); - let mut func = mem::zeroed(); - assert_eq!( - cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()), - CUresult::CUDA_SUCCESS - ); let mut texture = mem::zeroed(); let texture_resource_desc = CUDA_RESOURCE_DESC { resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY, @@ -121,10 +123,19 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc), CUresult::CUDA_SUCCESS ); + let mut texture_to_surface = mem::zeroed(); + assert_eq!( + cuda.cuModuleGetFunction( + &mut texture_to_surface, + module, + b"texture_to_surface\0".as_ptr().cast() + ), + CUresult::CUDA_SUCCESS + ); let mut params = [&mut texture, &mut surface]; assert_eq!( cuda.cuLaunchKernel( - func, + texture_to_surface, 1, 1, 1, @@ -166,12 +177,100 @@ unsafe fn mipmap_texture_to_surface(cuda: T) { CUresult::CUDA_SUCCESS ); assert_eq!(&pixels, &memcpy_dst); + let texture_resource_desc = CUDA_RESOURCE_DESC { + resType: CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY, + res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 { + mipmap: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_2 { + hMipmappedArray: mipmap_array, + }, + }, + flags: 0, + }; + let texture_desc = CUDA_TEXTURE_DESC { + addressMode: [ + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP, + ], + filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR, + flags: 2, + maxAnisotropy: 0, + mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR, + mipmapLevelBias: 0f32, + minMipmapLevelClamp: 0f32, + maxMipmapLevelClamp: 7f32, + borderColor: [0f32, 0f32, 0f32, 0f32], + reserved: mem::zeroed(), + }; + let mut mipmapped_tex_obj = mem::zeroed(); + assert_eq!( + cuda.cuTexObjectCreate( + &mut mipmapped_tex_obj, + &texture_resource_desc, + &texture_desc, + ptr::null() + ), + CUresult::CUDA_SUCCESS + ); + let mut read_tex_2d_mip = mem::zeroed(); + assert_eq!( + cuda.cuModuleGetFunction( + &mut read_tex_2d_mip, + module, + b"read_tex_2d_mip\0".as_ptr().cast() + ), + CUresult::CUDA_SUCCESS + ); + let mut output_buffer = mem::zeroed(); + assert_eq!( + cuda.cuMemAlloc_v2(&mut output_buffer, 4 * mem::size_of::()), + CUresult::CUDA_SUCCESS + ); + let mut params = [ + &mut mipmapped_tex_obj as *mut _ as *mut c_void, + &mut output_buffer as *mut _ as *mut c_void, + ]; + assert_eq!( + cuda.cuLaunchKernel( + read_tex_2d_mip, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + ptr::null_mut(), + params.as_mut_ptr().cast(), + ptr::null_mut(), + ), + CUresult::CUDA_SUCCESS + ); + assert_eq!( + cuda.cuStreamSynchronize(ptr::null_mut()), + CUresult::CUDA_SUCCESS + ); + let mut output = [f32::MAX; 4]; + assert_eq!( + cuda.cuMemcpyDtoH_v2( + output.as_mut_ptr().cast(), + output_buffer, + 4 * mem::size_of::() + ), + CUresult::CUDA_SUCCESS + ); + let pixels_f32 = pixels + .iter() + .copied() + .map(|x| mem::transmute::<_, f16>(x).to_f32()) + .collect::>(); + assert_eq!(&output[..], &*pixels_f32); } cuda_driver_test!(mipmap_texture_to_surface2); unsafe fn mipmap_texture_to_surface2(cuda: T) { - let kernel = include_str!("mipmap_texture_to_surface.ptx"); + let kernel = include_str!("mipmap_array.ptx"); let mut kernel = kernel.to_owned(); kernel.push('\0'); assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);