From 82af17beb16c10152e4ac96a4c33bd2408ef4dca Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 16 Jan 2020 17:24:04 +0300 Subject: [PATCH] gl: Optimize image operations - Avoid double transfers where a transfer to a temp image is done without scaling and then a secondary transfer follows. Combines the two steps into one whenever possible which can significantly alleviate bandwidth problems at higher resolutions. Significant speedup, upto 90% in some cases (PDF, PDF2) --- rpcs3/Emu/RSX/GL/GLHelpers.cpp | 29 +++++++++++++++++++++++++++++ rpcs3/Emu/RSX/GL/GLTextureCache.h | 11 +++++++++++ 2 files changed, 40 insertions(+) diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.cpp b/rpcs3/Emu/RSX/GL/GLHelpers.cpp index 6dc8bbebb6..9c8ba3fd08 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.cpp +++ b/rpcs3/Emu/RSX/GL/GLHelpers.cpp @@ -382,6 +382,35 @@ namespace gl const gl::texture* real_src = src; const gl::texture* real_dst = dst; + // Optimization pass; check for pass-through data transfer + if (!xfer_info.flip_horizontal && !xfer_info.flip_vertical && src_rect.height() == dst_rect.height()) + { + auto src_w = src_rect.width(); + auto dst_w = dst_rect.width(); + + if (xfer_info.src_is_typeless) src_w *= xfer_info.src_scaling_hint; + if (xfer_info.dst_is_typeless) dst_w *= xfer_info.dst_scaling_hint; + + if (src_w == dst_w) + { + // Final dimensions are a match + if (xfer_info.src_is_typeless || xfer_info.dst_is_typeless) + { + const coord3i src_region = { { src_rect.x1, src_rect.y1, 0 }, { src_w, src_rect.height(), 1 } }; + const coord3i dst_region = { { dst_rect.x1, dst_rect.y1, 0 }, { src_w, src_rect.height(), 1 } }; + gl::copy_typeless(dst, src, dst_region, src_region); + } + else + { + glCopyImageSubData(src->id(), dst->id(), 0, src_rect.x1, src_rect.y1, 0, + dst->id(), GL_TEXTURE_2D, 0, dst_rect.x1, dst_rect.y1, 0, + src_w, src_rect.height(), 1); + } + + return; + } + } + if (xfer_info.src_is_typeless) { const auto internal_fmt = xfer_info.src_native_format_override ? diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index ab28008155..3e7a033679 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -609,6 +609,17 @@ namespace gl const u16 src_w2 = u16(src_w * src_bpp) / dst_bpp; const u16 src_x2 = u16(src_x * src_bpp) / dst_bpp; + if (src_w2 == slice.dst_w && src_h == slice.dst_h && slice.level == 0) + { + // Optimization, avoid typeless copy to tmp followed by data copy to dst + // Combine the two transfers into one + const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } }; + const coord3u dst_region = { { slice.dst_x, slice.dst_y, slice.dst_z }, { slice.dst_w, slice.dst_h, 1 } }; + gl::copy_typeless(dst_image, slice.src, dst_region, src_region); + + continue; + } + const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } }; const coord3u dst_region = { { src_x2, src_y, 0 }, { src_w2, src_h, 1 } }; gl::copy_typeless(src_image, slice.src, dst_region, src_region);