mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-05 07:41:25 +00:00
Apply f16 hacks to texture objects and mipmapped arrays
This commit is contained in:
parent
b7b8502859
commit
93987a2cfe
10 changed files with 387 additions and 48 deletions
|
@ -215,7 +215,7 @@ cuda_function_declarations!(
|
||||||
cuLinkCreate_v2,
|
cuLinkCreate_v2,
|
||||||
cuMipmappedArrayCreate,
|
cuMipmappedArrayCreate,
|
||||||
cuMipmappedArrayDestroy,
|
cuMipmappedArrayDestroy,
|
||||||
cuMipmappedArrayGetLevel,
|
cuMipmappedArrayGetLevel
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1249,10 +1249,8 @@ mod definitions {
|
||||||
pResDesc: *const CUDA_RESOURCE_DESC,
|
pResDesc: *const CUDA_RESOURCE_DESC,
|
||||||
pTexDesc: *const HIP_TEXTURE_DESC,
|
pTexDesc: *const HIP_TEXTURE_DESC,
|
||||||
pResViewDesc: *const HIP_RESOURCE_VIEW_DESC,
|
pResViewDesc: *const HIP_RESOURCE_VIEW_DESC,
|
||||||
) -> hipError_t {
|
) -> Result<(), CUresult> {
|
||||||
let mut tex_desc = *pTexDesc;
|
texobj::create(pTexObject, pResDesc, pTexDesc, pResViewDesc)
|
||||||
tex_desc.maxMipmapLevelClamp = 0f32;
|
|
||||||
texobj::create(pTexObject, pResDesc, &tex_desc, pResViewDesc)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn cuTexObjectDestroy(texObject: hipTextureObject_t) -> hipError_t {
|
pub(crate) unsafe fn cuTexObjectDestroy(texObject: hipTextureObject_t) -> hipError_t {
|
||||||
|
@ -1652,24 +1650,24 @@ mod definitions {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn cuMipmappedArrayCreate(
|
pub(crate) unsafe fn cuMipmappedArrayCreate(
|
||||||
pHandle: *mut hipMipmappedArray_t,
|
pHandle: *mut CUmipmappedArray,
|
||||||
pMipmappedArrayDesc: *const HIP_ARRAY3D_DESCRIPTOR,
|
pMipmappedArrayDesc: *const HIP_ARRAY3D_DESCRIPTOR,
|
||||||
numMipmapLevels: ::std::os::raw::c_uint,
|
numMipmapLevels: ::std::os::raw::c_uint,
|
||||||
) -> hipError_t {
|
) -> Result<(), CUresult> {
|
||||||
array::mipmapped_create(pHandle, pMipmappedArrayDesc, numMipmapLevels)
|
array::mipmapped_create(pHandle, pMipmappedArrayDesc, numMipmapLevels)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn cuMipmappedArrayDestroy(
|
pub(crate) unsafe fn cuMipmappedArrayDestroy(
|
||||||
hMipmappedArray: hipMipmappedArray_t,
|
hMipmappedArray: CUmipmappedArray,
|
||||||
) -> hipError_t {
|
) -> hipError_t {
|
||||||
hipMipmappedArrayDestroy(hMipmappedArray)
|
array::mipmapped_destroy(hMipmappedArray)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn cuMipmappedArrayGetLevel(
|
pub(crate) unsafe fn cuMipmappedArrayGetLevel(
|
||||||
pLevelArray: *mut CUarray,
|
pLevelArray: *mut CUarray,
|
||||||
hMipmappedArray: hipMipmappedArray_t,
|
hMipmappedArray: CUmipmappedArray,
|
||||||
level: ::std::os::raw::c_uint,
|
level: ::std::os::raw::c_uint,
|
||||||
) -> hipError_t {
|
) -> Result<(), CUresult> {
|
||||||
hipMipmappedArrayGetLevel(pLevelArray.cast(), hMipmappedArray, level)
|
array::mipmapped_get_level(pLevelArray, hMipmappedArray, level)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,13 +83,55 @@ pub(crate) unsafe fn create(
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn mipmapped_create(
|
pub(crate) unsafe fn mipmapped_create(
|
||||||
p_handle: *mut hipMipmappedArray_t,
|
mipmapped_array: *mut CUmipmappedArray,
|
||||||
p_mipmapped_array_desc: *const HIP_ARRAY3D_DESCRIPTOR,
|
mipmapped_array_desc: *const HIP_ARRAY3D_DESCRIPTOR,
|
||||||
num_mipmap_levels: u32,
|
num_mipmap_levels: u32,
|
||||||
) -> hipError_t {
|
) -> Result<(), CUresult> {
|
||||||
hipMipmappedArrayCreate(
|
if let Some(mipmapped_array_desc) = (mipmapped_array_desc).as_ref() {
|
||||||
p_handle,
|
let mut mipmapped_array_desc = *mipmapped_array_desc;
|
||||||
p_mipmapped_array_desc.cast_mut(),
|
let (hack_flag, format) = hipfix::get_non_broken_format(mipmapped_array_desc.Format);
|
||||||
num_mipmap_levels,
|
mipmapped_array_desc.Format = format;
|
||||||
)
|
let mut hip_array = ptr::null_mut();
|
||||||
|
hip_call_cuda!(hipMipmappedArrayCreate(
|
||||||
|
&mut hip_array,
|
||||||
|
&mut mipmapped_array_desc,
|
||||||
|
num_mipmap_levels
|
||||||
|
));
|
||||||
|
if (hip_array as usize & 0b11) != 0 {
|
||||||
|
hip_call_cuda!(hipMipmappedArrayDestroy(hip_array));
|
||||||
|
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||||
|
}
|
||||||
|
hip_array = (hip_array as usize | hack_flag as usize) as _;
|
||||||
|
*mipmapped_array = hip_array.cast();
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(CUresult::CUDA_ERROR_INVALID_VALUE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn mipmapped_destroy(mipmapped_array: CUmipmappedArray) -> hipError_t {
|
||||||
|
let mipmapped_array = hipfix::array::get_mipmapped(mipmapped_array).0;
|
||||||
|
hipMipmappedArrayDestroy(mipmapped_array)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn mipmapped_get_level(
|
||||||
|
level_array: *mut CUarray,
|
||||||
|
mipmapped_array: CUmipmappedArray,
|
||||||
|
level: u32,
|
||||||
|
) -> Result<(), CUresult> {
|
||||||
|
let (mipmapped_array, hack_flag) = hipfix::array::get_mipmapped(mipmapped_array);
|
||||||
|
if let Some(mipmapped_array) = mipmapped_array.as_mut() {
|
||||||
|
let mut hip_array = mem::zeroed();
|
||||||
|
hip_call_cuda!(hipMipmappedArrayGetLevel(
|
||||||
|
&mut hip_array,
|
||||||
|
mipmapped_array as *mut _,
|
||||||
|
level
|
||||||
|
));
|
||||||
|
let hip_array_mut = hip_array.as_mut().ok_or(CUresult::CUDA_ERROR_UNKNOWN)?;
|
||||||
|
hip_array_mut.textureType = hack_flag;
|
||||||
|
*level_array = mem::transmute(hip_array);
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(CUresult::CUDA_ERROR_INVALID_VALUE)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,8 +26,8 @@ pub(crate) fn get_non_broken_format(format: hipArray_Format) -> (u32, hipArray_F
|
||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn get_broken_format(broken: u32, format: hipArray_Format) -> hipArray_Format {
|
pub(crate) fn get_broken_format(array: &hipArray) -> Option<hipArray_Format> {
|
||||||
match (broken, format) {
|
Some(match (array.textureType, array.Format) {
|
||||||
(2, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => hipArray_Format::HIP_AD_FORMAT_HALF,
|
(2, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => hipArray_Format::HIP_AD_FORMAT_HALF,
|
||||||
(1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => {
|
(1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16) => {
|
||||||
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16
|
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16
|
||||||
|
@ -35,13 +35,14 @@ pub(crate) fn get_broken_format(broken: u32, format: hipArray_Format) -> hipArra
|
||||||
(1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8) => {
|
(1, hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8) => {
|
||||||
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8
|
hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8
|
||||||
}
|
}
|
||||||
(_, f) => f,
|
(_, _) => return None,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// memcpy3d fails when copying array1d arrays, so we mark all layered arrays by
|
// memcpy3d fails when copying array1d arrays, so we mark all layered arrays by
|
||||||
// settings LSB
|
// settings LSB
|
||||||
pub(crate) mod array {
|
pub(crate) mod array {
|
||||||
|
use super::get_broken_format;
|
||||||
use crate::{
|
use crate::{
|
||||||
hip_call_cuda,
|
hip_call_cuda,
|
||||||
r#impl::{memcpy3d_from_cuda, memory_type_from_cuda, FromCuda},
|
r#impl::{memcpy3d_from_cuda, memory_type_from_cuda, FromCuda},
|
||||||
|
@ -51,23 +52,141 @@ pub(crate) mod array {
|
||||||
use std::{mem, ptr};
|
use std::{mem, ptr};
|
||||||
|
|
||||||
pub(crate) unsafe fn with_resource_desc<T>(
|
pub(crate) unsafe fn with_resource_desc<T>(
|
||||||
cuda: *const CUDA_RESOURCE_DESC,
|
res_desc: *const CUDA_RESOURCE_DESC,
|
||||||
fn_: impl FnOnce(*const HIP_RESOURCE_DESC) -> T,
|
res_desc_view: *const HIP_RESOURCE_VIEW_DESC,
|
||||||
) -> T {
|
fn_: impl FnOnce(*const HIP_RESOURCE_DESC, *const HIP_RESOURCE_VIEW_DESC) -> T,
|
||||||
let cuda = &*cuda;
|
) -> Result<T, CUresult> {
|
||||||
|
let cuda = &*res_desc;
|
||||||
if cuda.resType == CUresourcetype::CU_RESOURCE_TYPE_ARRAY {
|
if cuda.resType == CUresourcetype::CU_RESOURCE_TYPE_ARRAY {
|
||||||
let mut cuda = *cuda;
|
let mut cuda = *cuda;
|
||||||
cuda.res.array.hArray = mem::transmute(get(cuda.res.array.hArray));
|
let hip_array = get(cuda.res.array.hArray);
|
||||||
fn_((&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>())
|
cuda.res.array.hArray = mem::transmute(hip_array);
|
||||||
|
if let Some(hip_array) = hip_array.as_ref() {
|
||||||
|
if let Some(format_) = get_broken_format(hip_array) {
|
||||||
|
return if res_desc_view == ptr::null() {
|
||||||
|
let res_desc_view = HIP_RESOURCE_VIEW_DESC {
|
||||||
|
format: resource_view_format(format_, hip_array.NumChannels)?,
|
||||||
|
width: hip_array.width as usize,
|
||||||
|
height: hip_array.height as usize,
|
||||||
|
depth: hip_array.depth as usize,
|
||||||
|
firstMipmapLevel: 0,
|
||||||
|
lastMipmapLevel: 0,
|
||||||
|
firstLayer: 0,
|
||||||
|
lastLayer: 0,
|
||||||
|
reserved: mem::zeroed(),
|
||||||
|
};
|
||||||
|
Ok(fn_(
|
||||||
|
(&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
|
&res_desc_view,
|
||||||
|
))
|
||||||
} else {
|
} else {
|
||||||
fn_((cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>())
|
Err(CUresult::CUDA_ERROR_NOT_SUPPORTED)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(fn_(
|
||||||
|
(&cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
|
res_desc_view,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(fn_(
|
||||||
|
(cuda as *const CUDA_RESOURCE_DESC).cast::<HIP_RESOURCE_DESC>(),
|
||||||
|
res_desc_view,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resource_view_format(
|
||||||
|
format: hipArray_Format,
|
||||||
|
num_channels: u32,
|
||||||
|
) -> Result<HIPresourceViewFormat, CUresult> {
|
||||||
|
Ok(match (format, num_channels) {
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_1X8
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_2X8
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_4X8
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_1X8
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_2X8
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_4X8
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_1X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_2X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_4X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_1X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_2X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_4X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_1X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_2X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_UINT_4X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_1X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_2X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_SINT_4X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_HALF, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_1X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_HALF, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_2X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_HALF, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_4X16
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_FLOAT, 1) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_1X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_FLOAT, 2) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_2X32
|
||||||
|
}
|
||||||
|
(hipArray_Format::HIP_AD_FORMAT_FLOAT, 4) => {
|
||||||
|
HIPresourceViewFormat::HIP_RES_VIEW_FORMAT_FLOAT_4X32
|
||||||
|
}
|
||||||
|
_ => return Err(CUresult::CUDA_ERROR_NOT_SUPPORTED),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn get(cuda: CUarray) -> hipArray_t {
|
pub(crate) fn get(cuda: CUarray) -> hipArray_t {
|
||||||
(cuda as usize & !3usize) as hipArray_t
|
(cuda as usize & !3usize) as hipArray_t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_mipmapped(cuda: CUmipmappedArray) -> (hipMipmappedArray_t, u32) {
|
||||||
|
let array = (cuda as usize & !3usize) as hipMipmappedArray_t;
|
||||||
|
let broken_flag = (cuda as usize & 3usize) as u32;
|
||||||
|
(array, broken_flag)
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn to_cuda(array: hipArray_t, layered_dims: usize) -> CUarray {
|
pub(crate) fn to_cuda(array: hipArray_t, layered_dims: usize) -> CUarray {
|
||||||
let a1d_layered = layered_dims as usize;
|
let a1d_layered = layered_dims as usize;
|
||||||
((array as usize) | a1d_layered) as CUarray
|
((array as usize) | a1d_layered) as CUarray
|
||||||
|
|
|
@ -216,6 +216,7 @@ impl FromCuda<CUlibraryOption> for CUlibraryOption {}
|
||||||
impl FromCuda<CUDA_KERNEL_NODE_PARAMS_v1> for CUDA_KERNEL_NODE_PARAMS_v1 {}
|
impl FromCuda<CUDA_KERNEL_NODE_PARAMS_v1> for CUDA_KERNEL_NODE_PARAMS_v1 {}
|
||||||
impl FromCuda<CUjitInputType> for CUjitInputType {}
|
impl FromCuda<CUjitInputType> for CUjitInputType {}
|
||||||
impl FromCuda<CUDA_RESOURCE_DESC> for CUDA_RESOURCE_DESC {}
|
impl FromCuda<CUDA_RESOURCE_DESC> for CUDA_RESOURCE_DESC {}
|
||||||
|
impl FromCuda<CUmipmappedArray> for CUmipmappedArray {}
|
||||||
|
|
||||||
impl FromCuda<CUcontext> for *mut context::Context {}
|
impl FromCuda<CUcontext> for *mut context::Context {}
|
||||||
impl FromCuda<CUstream> for *mut stream::Stream {}
|
impl FromCuda<CUstream> for *mut stream::Stream {}
|
||||||
|
@ -253,7 +254,6 @@ impl FromCuda<CUgraphExec> for hipGraphExec_t {}
|
||||||
impl FromCuda<CUgraphicsResource> for hipGraphicsResource_t {}
|
impl FromCuda<CUgraphicsResource> for hipGraphicsResource_t {}
|
||||||
impl FromCuda<CUlimit> for hipLimit_t {}
|
impl FromCuda<CUlimit> for hipLimit_t {}
|
||||||
impl FromCuda<CUsurfObject> for hipSurfaceObject_t {}
|
impl FromCuda<CUsurfObject> for hipSurfaceObject_t {}
|
||||||
impl FromCuda<CUmipmappedArray> for hipMipmappedArray_t {}
|
|
||||||
|
|
||||||
impl<From, Into: FromCuda<From>> FromCuda<*mut From> for *mut Into {}
|
impl<From, Into: FromCuda<From>> FromCuda<*mut From> for *mut Into {}
|
||||||
impl<From, Into: FromCuda<From>> FromCuda<*const From> for *const Into {}
|
impl<From, Into: FromCuda<From>> FromCuda<*const From> for *const Into {}
|
||||||
|
|
|
@ -109,9 +109,9 @@ fn channel_format_desc(
|
||||||
bits.3 *= bit_width;
|
bits.3 *= bit_width;
|
||||||
Ok(hipChannelFormatDesc {
|
Ok(hipChannelFormatDesc {
|
||||||
x: bits.0 as i32,
|
x: bits.0 as i32,
|
||||||
y: bits.0 as i32,
|
y: bits.1 as i32,
|
||||||
z: bits.0 as i32,
|
z: bits.2 as i32,
|
||||||
w: bits.0 as i32,
|
w: bits.3 as i32,
|
||||||
f: kind,
|
f: kind,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,29 @@
|
||||||
|
use super::hipfix;
|
||||||
|
use crate::hip_call_cuda;
|
||||||
use cuda_types::*;
|
use cuda_types::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
|
|
||||||
use super::hipfix;
|
|
||||||
|
|
||||||
pub(crate) unsafe fn create(
|
pub(crate) unsafe fn create(
|
||||||
p_tex_object: *mut hipTextureObject_t,
|
p_tex_object: *mut hipTextureObject_t,
|
||||||
p_res_desc: *const CUDA_RESOURCE_DESC,
|
p_res_desc: *const CUDA_RESOURCE_DESC,
|
||||||
p_tex_desc: *const HIP_TEXTURE_DESC,
|
p_tex_desc: *const HIP_TEXTURE_DESC,
|
||||||
p_res_view_desc: *const HIP_RESOURCE_VIEW_DESC,
|
p_res_view_desc: *const HIP_RESOURCE_VIEW_DESC,
|
||||||
) -> hipError_t {
|
) -> Result<(), CUresult> {
|
||||||
if p_res_desc == ptr::null() {
|
if p_res_desc == ptr::null() {
|
||||||
return hipError_t::hipErrorInvalidValue;
|
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||||
}
|
}
|
||||||
hipfix::array::with_resource_desc(p_res_desc, |p_res_desc| {
|
hipfix::array::with_resource_desc(
|
||||||
hipTexObjectCreate(p_tex_object, p_res_desc, p_tex_desc, p_res_view_desc)
|
p_res_desc,
|
||||||
})
|
p_res_view_desc,
|
||||||
|
|p_res_desc, p_res_view_desc| {
|
||||||
|
hip_call_cuda!(hipTexObjectCreate(
|
||||||
|
p_tex_object,
|
||||||
|
p_res_desc,
|
||||||
|
p_tex_desc,
|
||||||
|
p_res_view_desc
|
||||||
|
));
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)?
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,7 +94,7 @@ pub(crate) unsafe fn set_array(
|
||||||
if let Some(array) = array.as_ref() {
|
if let Some(array) = array.as_ref() {
|
||||||
hip_call_cuda!(hipTexRefSetFormat(
|
hip_call_cuda!(hipTexRefSetFormat(
|
||||||
texref,
|
texref,
|
||||||
hipfix::get_broken_format(array.textureType, array.Format),
|
hipfix::get_broken_format(array).unwrap_or(array.Format),
|
||||||
array.NumChannels as i32,
|
array.NumChannels as i32,
|
||||||
));
|
));
|
||||||
hip_call_cuda!(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT));
|
hip_call_cuda!(hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT));
|
||||||
|
|
|
@ -425,7 +425,6 @@ unsafe fn kernel_sust_impl<
|
||||||
for value in values.iter_mut() {
|
for value in values.iter_mut() {
|
||||||
*value = rng.gen::<SustType>();
|
*value = rng.gen::<SustType>();
|
||||||
}
|
}
|
||||||
dbg!(&values);
|
|
||||||
let mut args = vec![
|
let mut args = vec![
|
||||||
&x as *const _ as *const c_void,
|
&x as *const _ as *const c_void,
|
||||||
&y as *const _ as *const _,
|
&y as *const _ as *const _,
|
||||||
|
|
|
@ -213,6 +213,7 @@ generate_tests!(
|
||||||
CU_AD_FORMAT_SIGNED_INT8,
|
CU_AD_FORMAT_SIGNED_INT8,
|
||||||
CU_AD_FORMAT_SIGNED_INT16,
|
CU_AD_FORMAT_SIGNED_INT16,
|
||||||
CU_AD_FORMAT_SIGNED_INT32,
|
CU_AD_FORMAT_SIGNED_INT32,
|
||||||
|
// TODO: update half crate
|
||||||
//CU_AD_FORMAT_HALF,
|
//CU_AD_FORMAT_HALF,
|
||||||
CU_AD_FORMAT_FLOAT
|
CU_AD_FORMAT_FLOAT
|
||||||
],
|
],
|
||||||
|
@ -337,13 +338,13 @@ const BYTE_FILLER2: u8 = 0xfe;
|
||||||
|
|
||||||
unsafe fn force_transmute<From: SustValue, To: SustValue>(f: From) -> To {
|
unsafe fn force_transmute<From: SustValue, To: SustValue>(f: From) -> To {
|
||||||
if mem::size_of::<From>() == mem::size_of::<To>()
|
if mem::size_of::<From>() == mem::size_of::<To>()
|
||||||
&& mem::size_of::<To>() == mem::size_of::<u32>()
|
&& mem::size_of::<To>() == mem::size_of::<f32>()
|
||||||
{
|
{
|
||||||
return mem::transmute_copy(&f);
|
return mem::transmute_copy(&f);
|
||||||
}
|
}
|
||||||
if mem::size_of::<To>() == mem::size_of::<u32>() {
|
if mem::size_of::<To>() == mem::size_of::<f32>() {
|
||||||
if let Some(value) = <dyn Any>::downcast_ref::<f16>(&f) {
|
if let Some(value) = <dyn Any>::downcast_ref::<f16>(&f) {
|
||||||
return mem::transmute_copy(&((value.to_f64() / f16::MAX.to_f64()) as f32));
|
return mem::transmute_copy(&value.to_f32());
|
||||||
}
|
}
|
||||||
if let Some(value) = <dyn Any>::downcast_ref::<u8>(&f) {
|
if let Some(value) = <dyn Any>::downcast_ref::<u8>(&f) {
|
||||||
return mem::transmute_copy(&((*value as f64 / u8::MAX as f64) as f32));
|
return mem::transmute_copy(&((*value as f64 / u8::MAX as f64) as f32));
|
||||||
|
@ -359,6 +360,9 @@ unsafe fn force_transmute<From: SustValue, To: SustValue>(f: From) -> To {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if mem::size_of::<To>() == mem::size_of::<f16>() {
|
if mem::size_of::<To>() == mem::size_of::<f16>() {
|
||||||
|
if let Some(_) = <dyn Any>::downcast_ref::<f16>(&f) {
|
||||||
|
return mem::transmute_copy(&f);
|
||||||
|
}
|
||||||
if let Some(value) = <dyn Any>::downcast_ref::<u8>(&f) {
|
if let Some(value) = <dyn Any>::downcast_ref::<u8>(&f) {
|
||||||
return mem::transmute_copy(&f16::from_f64(*value as f64 / u8::MAX as f64));
|
return mem::transmute_copy(&f16::from_f64(*value as f64 / u8::MAX as f64));
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,10 @@ use std::{mem, ptr};
|
||||||
|
|
||||||
mod common;
|
mod common;
|
||||||
|
|
||||||
|
// TODO: These two tests expose various random brokenness of mipmapped array
|
||||||
|
// and texture objects. This should be turned into extensive tests like
|
||||||
|
// kernel_sust/kernel_suld/kernel_tex
|
||||||
|
|
||||||
cuda_driver_test!(mipmap_texture_to_surface);
|
cuda_driver_test!(mipmap_texture_to_surface);
|
||||||
|
|
||||||
unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
|
@ -28,7 +32,7 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
Depth: 0,
|
Depth: 0,
|
||||||
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
||||||
NumChannels: 4,
|
NumChannels: 4,
|
||||||
Flags: 0,
|
Flags: 2,
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8),
|
cuda.cuMipmappedArrayCreate(&mut mipmap_array, &mipmap_desc, 8),
|
||||||
|
@ -163,3 +167,166 @@ unsafe fn mipmap_texture_to_surface<T: CudaDriverFns>(cuda: T) {
|
||||||
);
|
);
|
||||||
assert_eq!(&pixels, &memcpy_dst);
|
assert_eq!(&pixels, &memcpy_dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cuda_driver_test!(mipmap_texture_to_surface2);
|
||||||
|
|
||||||
|
unsafe fn mipmap_texture_to_surface2<T: CudaDriverFns>(cuda: T) {
|
||||||
|
let kernel = include_str!("mipmap_texture_to_surface.ptx");
|
||||||
|
let mut kernel = kernel.to_owned();
|
||||||
|
kernel.push('\0');
|
||||||
|
assert_eq!(cuda.cuInit(0), CUresult::CUDA_SUCCESS);
|
||||||
|
let mut ctx = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuCtxCreate_v2(&mut ctx, 0, CUdevice_v1(0)),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut module = ptr::null_mut();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuModuleLoadData(&mut module, kernel.as_ptr() as _),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut array_0 = mem::zeroed();
|
||||||
|
let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR {
|
||||||
|
Width: 1368,
|
||||||
|
Height: 770,
|
||||||
|
Depth: 0,
|
||||||
|
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
||||||
|
NumChannels: 4,
|
||||||
|
Flags: 2,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuArray3DCreate_v2(&mut array_0, &mipmap_desc),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut array_1 = mem::zeroed();
|
||||||
|
let mipmap_desc = CUDA_ARRAY3D_DESCRIPTOR {
|
||||||
|
Width: 1368 / 2,
|
||||||
|
Height: 770 / 2,
|
||||||
|
Depth: 0,
|
||||||
|
Format: CUarray_format::CU_AD_FORMAT_HALF,
|
||||||
|
NumChannels: 4,
|
||||||
|
Flags: 2,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuArray3DCreate_v2(&mut array_1, &mipmap_desc),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut pixels = [0x3C66u16, 0x4066, 0x4299, 4466];
|
||||||
|
let memcpy_from_host = CUDA_MEMCPY2D {
|
||||||
|
srcXInBytes: 0,
|
||||||
|
srcY: 0,
|
||||||
|
srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
|
||||||
|
srcHost: pixels.as_mut_ptr() as _,
|
||||||
|
srcDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||||
|
srcArray: ptr::null_mut(),
|
||||||
|
srcPitch: 4 * mem::size_of::<u16>(),
|
||||||
|
dstXInBytes: 0,
|
||||||
|
dstY: 0,
|
||||||
|
dstMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
|
||||||
|
dstHost: ptr::null_mut(),
|
||||||
|
dstDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||||
|
dstArray: array_0,
|
||||||
|
dstPitch: 0,
|
||||||
|
WidthInBytes: 4 * mem::size_of::<u16>(),
|
||||||
|
Height: 1,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuMemcpy2DUnaligned_v2(&memcpy_from_host),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut func = mem::zeroed();
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuModuleGetFunction(&mut func, module, b"texture_to_surface\0".as_ptr().cast()),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut texture = mem::zeroed();
|
||||||
|
let texture_resource_desc = CUDA_RESOURCE_DESC {
|
||||||
|
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
|
||||||
|
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
|
||||||
|
array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_0 },
|
||||||
|
},
|
||||||
|
flags: 0,
|
||||||
|
};
|
||||||
|
let texture_desc = CUDA_TEXTURE_DESC {
|
||||||
|
addressMode: [
|
||||||
|
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||||
|
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||||
|
CUaddress_mode::CU_TR_ADDRESS_MODE_CLAMP,
|
||||||
|
],
|
||||||
|
filterMode: CUfilter_mode::CU_TR_FILTER_MODE_LINEAR,
|
||||||
|
flags: 2,
|
||||||
|
maxAnisotropy: 0,
|
||||||
|
mipmapFilterMode: CUfilter_mode::CU_TR_FILTER_MODE_POINT,
|
||||||
|
mipmapLevelBias: 0f32,
|
||||||
|
minMipmapLevelClamp: 0f32,
|
||||||
|
maxMipmapLevelClamp: 0f32,
|
||||||
|
borderColor: [0f32, 0f32, 0f32, 0f32],
|
||||||
|
reserved: mem::zeroed(),
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuTexObjectCreate(
|
||||||
|
&mut texture,
|
||||||
|
&texture_resource_desc,
|
||||||
|
&texture_desc,
|
||||||
|
ptr::null()
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut surface = mem::zeroed();
|
||||||
|
let surface_resource_desc = CUDA_RESOURCE_DESC {
|
||||||
|
resType: CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
|
||||||
|
res: CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
|
||||||
|
array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 { hArray: array_1 },
|
||||||
|
},
|
||||||
|
flags: 0,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuSurfObjectCreate(&mut surface, &surface_resource_desc),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut params = [&mut texture, &mut surface];
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuLaunchKernel(
|
||||||
|
func,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
ptr::null_mut(),
|
||||||
|
params.as_mut_ptr().cast(),
|
||||||
|
ptr::null_mut(),
|
||||||
|
),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuStreamSynchronize(ptr::null_mut()),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
let mut memcpy_dst = [u16::MAX; 4];
|
||||||
|
let memcpy_to_host = CUDA_MEMCPY2D {
|
||||||
|
srcXInBytes: 0,
|
||||||
|
srcY: 0,
|
||||||
|
srcMemoryType: CUmemorytype::CU_MEMORYTYPE_ARRAY,
|
||||||
|
srcHost: ptr::null(),
|
||||||
|
srcDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||||
|
srcArray: array_1,
|
||||||
|
srcPitch: 0,
|
||||||
|
dstXInBytes: 0,
|
||||||
|
dstY: 0,
|
||||||
|
dstMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
|
||||||
|
dstHost: memcpy_dst.as_mut_ptr() as _,
|
||||||
|
dstDevice: CUdeviceptr_v2(ptr::null_mut()),
|
||||||
|
dstArray: ptr::null_mut(),
|
||||||
|
dstPitch: 4 * mem::size_of::<u16>(),
|
||||||
|
WidthInBytes: 4 * mem::size_of::<u16>(),
|
||||||
|
Height: 1,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
cuda.cuMemcpy2DUnaligned_v2(&memcpy_to_host),
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
);
|
||||||
|
assert_eq!(&pixels, &memcpy_dst);
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue