From 9d4d3d94439d6da3a089ac29f2c3cf27b5de8e7d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 4 Mar 2019 14:24:15 +0300 Subject: [PATCH] rsx: Reimplement render target intersection tests when using hw accelerated blit engine - Properly collapse memory tree when scanning in case of overlaps! --- rpcs3/Emu/RSX/Common/surface_store.h | 261 +++------------------------ rpcs3/Emu/RSX/Common/texture_cache.h | 73 +++++--- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 2 +- 4 files changed, 77 insertions(+), 261 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 3de6b35e42..e76321afc8 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -24,33 +24,13 @@ namespace rsx size_t get_packed_pitch(surface_color_format format, u32 width); } - template - struct surface_subresource_storage - { - surface_type surface = nullptr; - u32 base_address = 0; - - u16 x = 0; - u16 y = 0; - u16 w = 0; - u16 h = 0; - - bool is_bound = false; - bool is_depth_surface = false; - bool is_clipped = false; - - surface_subresource_storage() {} - - surface_subresource_storage(u32 addr, surface_type src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth, bool _Clipped = false) - : base_address(addr), surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth), is_clipped(_Clipped) - {} - }; - template struct surface_overlap_info_t { surface_type surface = nullptr; + u32 base_address = 0; bool is_depth = false; + bool is_clipped = false; u16 src_x = 0; u16 src_y = 0; @@ -222,14 +202,14 @@ namespace rsx } } - protected: + public: using surface_storage_type = typename Traits::surface_storage_type; using surface_type = typename Traits::surface_type; using command_list_type = typename Traits::command_list_type; using download_buffer_object = typename Traits::download_buffer_object; - using surface_subresource = surface_subresource_storage; using surface_overlap_info = surface_overlap_info_t; + protected: std::unordered_map m_render_targets_storage = {}; std::unordered_map m_depth_stencil_storage = {}; @@ -829,85 +809,6 @@ namespace rsx } } - /** - * Clipping and fitting lookup functions - * surface_overlaps - returns true if surface overlaps a given surface address and returns the relative x and y position of the surface address within the surface - * address_is_bound - returns true if the surface at a given address is actively bound - * get_surface_subresource_if_available - returns a section descriptor that allows to crop surfaces stored in memory - */ - bool surface_overlaps_address(surface_type surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y) - { - bool is_subslice = false; - u16 x_offset = 0; - u16 y_offset = 0; - - if (surface_address > texaddr) - return false; - - const u32 offset = texaddr - surface_address; - if (offset == 0) - { - *x = 0; - *y = 0; - return true; - } - else - { - surface_format_info info{}; - Traits::get_surface_info(surface, &info); - - bool doubled_x = false; - bool doubled_y = false; - - switch (surface->read_aa_mode) - { - case rsx::surface_antialiasing::square_rotated_4_samples: - case rsx::surface_antialiasing::square_centered_4_samples: - doubled_y = true; - //fall through - case rsx::surface_antialiasing::diagonal_centered_2_samples: - doubled_x = true; - break; - } - - u32 range = info.rsx_pitch * info.surface_height; - if (doubled_y) range <<= 1; - - if (offset < range) - { - y_offset = (offset / info.rsx_pitch); - x_offset = (offset % info.rsx_pitch) / info.bpp; - - if (doubled_x) x_offset /= 2; - if (doubled_y) y_offset /= 2; - - is_subslice = true; - } - } - - if (is_subslice) - { - *x = x_offset; - *y = y_offset; - - return true; - } - - return false; - } - - //Fast hit test - inline bool surface_overlaps_address_fast(surface_type surface, u32 surface_address, u32 texaddr) - { - if (surface_address > texaddr) - return false; - - const u32 offset = texaddr - surface_address; - const u32 range = surface->get_rsx_pitch() * surface->get_surface_height(); - - return (offset < range); - } - bool address_is_bound(u32 address) const { for (auto &surface : m_bound_render_targets) @@ -923,127 +824,8 @@ namespace rsx return false; } - inline bool region_fits(u16 region_width, u16 region_height, u16 x_offset, u16 y_offset, u16 width, u16 height) const - { - if ((x_offset + width) > region_width) return false; - if ((y_offset + height) > region_height) return false; - - return true; - } - - surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, - bool crop = false, bool ignore_depth_formats = false, bool ignore_color_formats = false) - { - auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped) - { - if (surface_overlaps_address(surface, this_address, texaddr, &x_offset, &y_offset)) - { - surface_format_info info{}; - Traits::get_surface_info(surface, &info); - - u16 real_width = requested_width; - u16 real_height = requested_height; - - switch (surface->read_aa_mode) - { - case rsx::surface_antialiasing::diagonal_centered_2_samples: - real_width /= 2; - break; - case rsx::surface_antialiasing::square_centered_4_samples: - case rsx::surface_antialiasing::square_rotated_4_samples: - real_width /= 2; - real_height /= 2; - break; - } - - if (region_fits(info.surface_width, info.surface_height, x_offset, y_offset, real_width, real_height)) - { - w = real_width; - h = real_height; - clipped = false; - - return true; - } - else if (crop && info.surface_width > x_offset && info.surface_height > y_offset) - { - //Forcefully fit the requested region by clipping and scaling - u16 remaining_width = info.surface_width - x_offset; - u16 remaining_height = info.surface_height - y_offset; - - w = std::min(real_width, remaining_width); - h = std::min(real_height, remaining_height); - clipped = true; - - return true; - } - } - - return false; - }; - - surface_type surface = nullptr; - bool clipped = false; - u16 x_offset = 0; - u16 y_offset = 0; - u16 w; - u16 h; - - if (!ignore_color_formats) - { - for (auto &tex_info : m_render_targets_storage) - { - const u32 this_address = std::get<0>(tex_info); - if (texaddr < this_address) - continue; - - surface = std::get<1>(tex_info).get(); - if (!rsx::pitch_compatible(surface, requested_pitch, requested_height)) - continue; - - if (requested_width == 0 || requested_height == 0) - { - if (!surface_overlaps_address_fast(surface, this_address, texaddr)) - continue; - else - return{ this_address, surface, 0, 0, 0, 0, false, false, false }; - } - - if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) - return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), false, clipped }; - } - } - - if (!ignore_depth_formats) - { - //Check depth surfaces for overlap - for (auto &tex_info : m_depth_stencil_storage) - { - const u32 this_address = std::get<0>(tex_info); - if (texaddr < this_address) - continue; - - surface = std::get<1>(tex_info).get(); - if (!rsx::pitch_compatible(surface, requested_pitch, requested_height)) - continue; - - if (requested_width == 0 || requested_height == 0) - { - if (!surface_overlaps_address_fast(surface, this_address, texaddr)) - continue; - else - return{ this_address, surface, 0, 0, 0, 0, false, true, false }; - } - - if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) - return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), true, clipped }; - } - } - - return{}; - } - template - std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp) + std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch) { std::vector result; std::vector> dirty; @@ -1062,7 +844,10 @@ namespace rsx if (!rsx::pitch_compatible(surface, required_pitch, required_height)) continue; - const auto texture_size = pitch * surface->get_surface_height(); + const u16 scale_x = surface->read_aa_mode > rsx::surface_antialiasing::center_1_sample? 2 : 1; + const u16 scale_y = surface->read_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2 : 1; + const auto texture_size = pitch * surface->get_surface_height() * scale_y; + if ((this_address + texture_size) <= texaddr) continue; @@ -1074,27 +859,41 @@ namespace rsx surface_overlap_info info; info.surface = surface; + info.base_address = this_address; info.is_depth = is_depth; + surface_format_info surface_info{}; + Traits::get_surface_info(surface, &surface_info); + if (this_address < texaddr) { + const auto int_required_width = required_width / scale_x; + const auto int_required_height = required_height / scale_y; + auto offset = texaddr - this_address; - info.src_y = (offset / required_pitch); - info.src_x = (offset % required_pitch) / bpp; + info.src_y = (offset / required_pitch) / scale_y; + info.src_x = (offset % required_pitch) / surface_info.bpp / scale_x; info.dst_x = 0; info.dst_y = 0; - info.width = std::min(required_width, surface->get_surface_width() - info.src_x); - info.height = std::min(required_height, surface->get_surface_height() - info.src_y); + info.width = std::min(int_required_width, surface_info.surface_width - info.src_x); + info.height = std::min(int_required_height, surface_info.surface_height - info.src_y); + info.is_clipped = (info.width < int_required_width || info.height < int_required_height); } else { + const auto int_surface_width = surface_info.surface_width * scale_x; + const auto int_surface_height = surface_info.surface_height * scale_y; + auto offset = this_address - texaddr; info.src_x = 0; info.src_y = 0; info.dst_y = (offset / required_pitch); - info.dst_x = (offset % required_pitch) / bpp; - info.width = std::min(surface->get_surface_width(), required_width - info.dst_x); - info.height = std::min(surface->get_surface_height(), required_height - info.dst_y); + info.dst_x = (offset % required_pitch) / surface_info.bpp; + info.width = std::min(int_surface_width, required_width - info.dst_x); + info.height = std::min(int_surface_height, required_height - info.dst_y); + info.is_clipped = (info.width < required_width || info.height < required_height); + info.width /= scale_x; + info.height /= scale_y; } result.push_back(info); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 42e87c4d27..d36cb84124 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2034,8 +2034,7 @@ namespace rsx break; } - auto bpp = get_format_block_size_in_bytes(format); - const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp); + const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch); if (!overlapping_fbos.empty() || !overlapping_locals.empty()) { @@ -2150,8 +2149,19 @@ namespace rsx u16 dst_w = dst.clip_width; u16 dst_h = dst.clip_height; - //Check if src/dst are parts of render targets - auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, false, false, false); + auto rtt_lookup = [&m_rtts, &cmd](u32 address, u32 width, u32 height, u32 pitch, bool allow_clipped) -> typename surface_store_type::surface_overlap_info + { + const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch); + if (list.empty() || (list.back().is_clipped && !allow_clipped)) + { + return {}; + } + + return list.back(); + }; + + // Check if src/dst are parts of render targets + auto dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false); dst_is_render_target = dst_subres.surface != nullptr; if (dst_is_render_target && dst_subres.surface->get_native_pitch() != dst.pitch) @@ -2163,7 +2173,7 @@ namespace rsx } //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src_w, src_h, src.pitch, true, false, false); + auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true); src_is_render_target = src_subres.surface != nullptr; if (src_is_render_target && src_subres.surface->get_native_pitch() != src.pitch) @@ -2176,14 +2186,14 @@ namespace rsx if (src_is_render_target && !src_subres.surface->test() && !m_rtts.address_is_bound(src_subres.base_address)) { - m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface); + m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth); invalidate_address(cmd, src_subres.base_address, invalidation_cause::read, std::forward(extras)...); src_is_render_target = false; } if (dst_is_render_target && !dst_subres.surface->test() && !m_rtts.address_is_bound(dst_subres.base_address)) { - m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface); + m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth); invalidate_address(cmd, dst_subres.base_address, invalidation_cause::read, std::forward(extras)...); dst_is_render_target = false; } @@ -2203,15 +2213,15 @@ namespace rsx { //Enable type scaling in src typeless_info.src_is_typeless = true; - typeless_info.src_is_depth = src_subres.is_depth_surface; + typeless_info.src_is_depth = src_subres.is_depth; typeless_info.src_scaling_hint = (f32)src_bpp / expected_bpp; typeless_info.src_gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; src_w = (u16)(src_w / typeless_info.src_scaling_hint); if (!src_subres.is_clipped) - src_subres.w = (u16)(src_subres.w / typeless_info.src_scaling_hint); + src_subres.width = (u16)(src_subres.width / typeless_info.src_scaling_hint); else - src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src_w, src_h, src.pitch, true, false, false); + src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, true); verify(HERE), src_subres.surface != nullptr; } @@ -2228,15 +2238,15 @@ namespace rsx { //Enable type scaling in dst typeless_info.dst_is_typeless = true; - typeless_info.dst_is_depth = dst_subres.is_depth_surface; + typeless_info.dst_is_depth = dst_subres.is_depth; typeless_info.dst_scaling_hint = (f32)dst_bpp / expected_bpp; typeless_info.dst_gcm_format = dst_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; dst_w = (u16)(dst_w / typeless_info.dst_scaling_hint); if (!dst_subres.is_clipped) - dst_subres.w = (u16)(dst_subres.w / typeless_info.dst_scaling_hint); + dst_subres.width = (u16)(dst_subres.width / typeless_info.dst_scaling_hint); else - dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst_w, dst_h, dst.pitch, true, false, false); + dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, false); verify(HERE), dst_subres.surface != nullptr; } @@ -2325,12 +2335,19 @@ namespace rsx } else { - //TODO: Investigate effects of tile compression + // Destination dimensions are relaxed (true) + dst_area.x1 = dst_subres.src_x; + dst_area.y1 = dst_subres.src_y; + dst_area.x2 += dst_subres.src_x; + dst_area.y2 += dst_subres.src_y; - dst_area.x1 = dst_subres.x; - dst_area.y1 = dst_subres.y; - dst_area.x2 += dst_subres.x; - dst_area.y2 += dst_subres.y; + f32 scale_x = get_internal_scaling_x(dst_subres.surface); + f32 scale_y = get_internal_scaling_y(dst_subres.surface); + + dst_area.x1 = s32(scale_x * dst_area.x1); + dst_area.x2 = s32(scale_x * dst_area.x2); + dst_area.y1 = s32(scale_y * dst_area.y1); + dst_area.y2 = s32(scale_y * dst_area.y2); dest_texture = dst_subres.surface->get_surface(); typeless_info.dst_context = texture_upload_context::framebuffer_storage; @@ -2400,8 +2417,8 @@ namespace rsx { if (!dst_is_render_target) { - u16 src_subres_w = src_subres.w; - u16 src_subres_h = src_subres.h; + u16 src_subres_w = src_subres.width; + u16 src_subres_h = src_subres.height; get_rsx_dimensions(src_subres_w, src_subres_h, src_subres.surface); const int dst_width = (int)(src_subres_w * scale_x * typeless_info.src_scaling_hint); @@ -2411,20 +2428,20 @@ namespace rsx dst_area.y2 = dst_area.y1 + dst_height; } - src_area.x2 = src_subres.w; - src_area.y2 = src_subres.h; + src_area.x2 = src_subres.width; + src_area.y2 = src_subres.height; - src_area.x1 = src_subres.x; - src_area.y1 = src_subres.y; - src_area.x2 += src_subres.x; - src_area.y2 += src_subres.y; + src_area.x1 = src_subres.src_x; + src_area.y1 = src_subres.src_y; + src_area.x2 += src_subres.src_x; + src_area.y2 += src_subres.src_y; vram_texture = src_subres.surface->get_surface(); typeless_info.src_context = texture_upload_context::framebuffer_storage; } - const bool src_is_depth = src_subres.is_depth_surface; - const bool dst_is_depth = dst_is_render_target? dst_subres.is_depth_surface : + const bool src_is_depth = src_subres.is_depth; + const bool dst_is_depth = dst_is_render_target? dst_subres.is_depth : dest_texture ? cached_dest->is_depth_texture() : src_is_depth; //Type of blit decided by the source, destination use should adapt on the fly diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index d18a2d148a..a01351ae3e 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -1645,7 +1645,7 @@ void GLGSRender::flip(int buffer) else { gl::command_context cmd = { gl_state }; - const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, 4); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch); verify(HERE), !overlap_info.empty(); if (overlap_info.back().surface == render_target_texture) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 50019c77da..de18f08098 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -3313,7 +3313,7 @@ void VKGSRender::flip(int buffer) } else { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, 4); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch); verify(HERE), !overlap_info.empty(); if (overlap_info.back().surface == render_target_texture)