diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 1d01d09878..b68899ac40 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "Utilities/types.h" #include "Utilities/BitField.h" #include "Utilities/StrFmt.h" @@ -2243,9 +2243,22 @@ struct registers_decoder public: decoded_type(u32 raw_value) { m_data.raw_value = raw_value; } - u32 ds_dx() const + // Convert signed fixed point 32-bit format + f32 ds_dx() const { - return m_data.raw_value; + const u32 val = m_data.raw_value; + + if ((val & ~(1<<31)) == 0) + { + return 0; + } + + if ((s32)val < 0) + { + return 1. / (((val & ~(1<<31)) / 1048576.f) - 2048.f); + } + + return 1048576.f / val; } }; @@ -2268,9 +2281,22 @@ struct registers_decoder public: decoded_type(u32 raw_value) { m_data.raw_value = raw_value; } - u32 dt_dy() const + // Convert signed fixed point 32-bit format + f32 dt_dy() const { - return m_data.raw_value; + const u32 val = m_data.raw_value; + + if ((val & ~(1<<31)) == 0) + { + return 0; + } + + if ((s32)val < 0) + { + return 1. / (((val & ~(1<<31)) / 1048576.f) - 2048.f); + } + + return 1048576.f / val; } }; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 40279132bc..ece75cf614 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -908,15 +908,15 @@ namespace rsx fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)" HERE, (u8)src_color_format); } - f32 scale_x = 1048576.f / method_registers.blit_engine_ds_dx(); - f32 scale_y = 1048576.f / method_registers.blit_engine_dt_dy(); + f32 scale_x = method_registers.blit_engine_ds_dx(); + f32 scale_y = method_registers.blit_engine_dt_dy(); - u32 convert_w = (u32)(scale_x * in_w); - u32 convert_h = (u32)(scale_y * in_h); + u32 convert_w = (u32)(std::abs(scale_x) * in_w); + u32 convert_h = (u32)(std::abs(scale_y) * in_h); if (convert_w == 0 || convert_h == 0) { - LOG_ERROR(RSX, "NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%d, dt_dy=%d)", + LOG_ERROR(RSX, "NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%f, dt_dy=%f)", method_registers.blit_engine_ds_dx(), method_registers.blit_engine_dt_dy()); return; } @@ -960,7 +960,7 @@ namespace rsx dst_info.max_tile_h = static_cast((dst_region.tile->size - dst_region.base) / out_pitch); } - if (!g_cfg.video.force_cpu_blit_processing && (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)) + if (!g_cfg.video.force_cpu_blit_processing && (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) && scale_x > 0 && scale_y > 0) { //For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer //TODO: Figure out more instances where we can use this without problems @@ -996,7 +996,38 @@ namespace rsx return; } - std::unique_ptr temp1, temp2, sw_temp; + std::unique_ptr temp1, temp2, temp3, sw_temp; + + if (scale_y < 0 || scale_x < 0) + { + temp1.reset(new u8[in_pitch * (in_h - 1) + (in_bpp * in_w)]); + + const s32 stride_y = (scale_y < 0 ? -1 : 1) * in_pitch; + + for (u32 y = 0; y < in_h; ++y) + { + u8 *dst = temp1.get() + (in_pitch * y); + u8 *src = pixels_src + (y * stride_y); + + if (scale_x < 0) + { + if (in_bpp == 2) + { + rsx::memcpy_r(dst, src, in_w); + } + else + { + rsx::memcpy_r(dst, src, in_w); + } + } + else + { + std::memcpy(dst, src, in_bpp * in_w); + } + } + + pixels_src = temp1.get(); + } const AVPixelFormat in_format = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; const AVPixelFormat out_format = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB; @@ -1007,7 +1038,7 @@ namespace rsx clip_x > 0 || clip_y > 0 || convert_w != out_w || convert_h != out_h; - const bool need_convert = out_format != in_format || scale_x != 1.0 || scale_y != 1.0; + const bool need_convert = out_format != in_format || std::abs(scale_x) != 1.0 || std::abs(scale_y) != 1.0; if (method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d) { @@ -1017,10 +1048,10 @@ namespace rsx { if (need_convert) { - convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, + convert_scale_image(temp2, out_format, convert_w, convert_h, out_pitch, pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter == blit_engine::transfer_interpolator::foh); - clip_image(pixels_dst, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + clip_image(pixels_dst, temp2.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); } else { @@ -1059,23 +1090,23 @@ namespace rsx { if (need_convert) { - convert_scale_image(temp1, out_format, convert_w, convert_h, out_pitch, + convert_scale_image(temp2, out_format, convert_w, convert_h, out_pitch, pixels_src, in_format, in_w, in_h, in_pitch, slice_h, in_inter == blit_engine::transfer_interpolator::foh); - clip_image(temp2, temp1.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); + clip_image(temp3, temp2.get(), clip_x, clip_y, clip_w, clip_h, out_bpp, out_pitch, out_pitch); } else { - clip_image(temp2, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); + clip_image(temp3, pixels_src, clip_x, clip_y, clip_w, clip_h, out_bpp, in_pitch, out_pitch); } } else { - convert_scale_image(temp2, out_format, out_w, out_h, out_pitch, + convert_scale_image(temp3, out_format, out_w, out_h, out_pitch, pixels_src, in_format, in_w, in_h, in_pitch, clip_h, in_inter == blit_engine::transfer_interpolator::foh); } - pixels_src = temp2.get(); + pixels_src = temp3.get(); } // It looks like rsx may ignore the requested swizzle size and just always @@ -1094,10 +1125,10 @@ namespace rsx u32 sw_width = next_pow2(out_w); u32 sw_height = next_pow2(out_h); - temp2.reset(new u8[out_bpp * sw_width * sw_height]); + temp3.reset(new u8[out_bpp * sw_width * sw_height]); u8* linear_pixels = pixels_src; - u8* swizzled_pixels = temp2.get(); + u8* swizzled_pixels = temp3.get(); // Check and pad texture out if we are given non power of 2 output if (sw_width != out_w || sw_height != out_h) diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index f36ab54337..2bf92931ac 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -1469,12 +1469,12 @@ namespace rsx return decode().format(); } - u32 blit_engine_ds_dx() const + f32 blit_engine_ds_dx() const { return decode().ds_dx(); } - u32 blit_engine_dt_dy() const + f32 blit_engine_dt_dy() const { return decode().dt_dy(); } diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index fbe7b48f02..bc6e695315 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -161,6 +161,17 @@ namespace rsx return static_cast((1ULL << 32) >> utils::cntlz32(x - 1, true)); } + // Copy memory in inverse direction from source + // Used to scale negatively x axis while transfering image data + template + static void memcpy_r(void* dst, void* src, std::size_t size) + { + for (u32 i = 0; i < size; i++) + { + *((Td*)dst + i) = *((Ts*)src - i); + } + } + // Returns interleaved bits of X|Y|Z used as Z-order curve indices static inline u32 calculate_z_index(u32 x, u32 y, u32 z) {