From 53f2533a081afbe68d5d849d0f0da80847c9ea87 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 1 Apr 2018 02:47:43 +0300 Subject: [PATCH] rsx: Implement proper Z-order curve in 3 dimensions - Should fix garbage palette textures getting uploaded (LSD graphics) --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 24 +++++++++--- rpcs3/Emu/RSX/rsx_utils.h | 54 ++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 2f3631bfb6..7c8efe1100 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -42,13 +42,25 @@ struct copy_unmodified_block_swizzled template static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block) { - std::unique_ptr temp_swizzled(new U[width_in_block * row_count]); - for (int d = 0; d < depth; ++d) + if (std::is_same::value && dst_pitch_in_block == width_in_block) { - rsx::convert_linear_swizzle((void*)src.subspan(d * width_in_block * row_count).data(), temp_swizzled.get(), width_in_block, row_count, true); - gsl::span swizzled_src{ temp_swizzled.get(), ::narrow(width_in_block * row_count) }; - for (int row = 0; row < row_count; ++row) - copy(dst.subspan((row + d * row_count) * dst_pitch_in_block, width_in_block), swizzled_src.subspan(row * width_in_block, width_in_block)); + rsx::convert_linear_swizzle_3d((void*)src.data(), (void*)dst.data(), width_in_block, row_count, depth); + } + else + { + std::vector tmp(width_in_block * row_count * depth); + rsx::convert_linear_swizzle_3d((void*)src.data(), tmp.data(), width_in_block, row_count, depth); + + gsl::span src_span = tmp; + u32 src_offset = 0; + u32 dst_offset = 0; + + for (int n = 0; n < row_count * depth; ++n) + { + copy(dst.subspan(dst_offset, width_in_block), src_span.subspan(src_offset, width_in_block)); + dst_offset += dst_pitch_in_block; + src_offset += width_in_block; + } } } }; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index b1633abef5..a34cf5ae54 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -96,10 +96,32 @@ namespace rsx return static_cast((1ULL << 32) >> ::cntlz32(x - 1, true)); } + // Returns interleaved bits of X|Y|Z used as Z-order curve indices + static inline u32 calculate_z_index(u32 x, u32 y, u32 z) + { + //Result = X' | Y' | Z' which are x,y,z bits interleaved + u32 shift_size = 0; + u32 result = 0; + + while (x | y | z) + { + result |= (x & 0x1) << shift_size++; + result |= (y & 0x1) << shift_size++; + result |= (z & 0x1) << shift_size++; + + x >>= 1; + y >>= 1; + z >>= 1; + } + + return result; + } + /* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels * - Input can be swizzled or linear, bool flag handles conversion to and from * - It will handle any width and height that are a power of 2, square or non square - * Restriction: It has mixed results if the height or width is not a power of 2 + * Restriction: It has mixed results if the height or width is not a power of 2 + * Restriction: Only works with 2D surfaces */ template void convert_linear_swizzle(void* input_pixels, void* output_pixels, u16 width, u16 height, bool input_is_swizzled) @@ -172,6 +194,36 @@ namespace rsx } } + /** + * Write swizzled data to linear memory with support for 3 dimensions + * Z ordering is done in all 3 planes independently with a unit being a 2x2 block per-plane + * A unit in 3d textures is a group of 2x2x2 texels advancing towards depth in units of 2x2x1 blocks + * i.e 32 texels per "unit" + */ + template + void convert_linear_swizzle_3d(void *input_pixels, void *output_pixels, u16 width, u16 height, u16 depth) + { + if (depth == 1) + { + convert_linear_swizzle(input_pixels, output_pixels, width, height, true); + return; + } + + T *src = static_cast(input_pixels); + T *dst = static_cast(output_pixels); + + for (u32 z = 0; z < depth; ++z) + { + for (u32 y = 0; y < height; ++y) + { + for (u32 x = 0; x < width; ++x) + { + *dst++ = src[calculate_z_index(x, y, z)]; + } + } + } + } + void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes = false); void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,