From 15d5507154d396d8f714a63b59404b9cbf958859 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 16 Dec 2018 14:57:22 +0300 Subject: [PATCH] rsx: Rewrite memory inheritance transfers - Implicitly invoke a memory barrier if actively reading from an unsynchronized texture - Simplify memory transfer operations - Should allow more games to work without strict mode --- rpcs3/Emu/RSX/Common/TextureUtils.h | 32 ++++++ rpcs3/Emu/RSX/Common/texture_cache.h | 16 ++- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 30 ----- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 61 ++++------ rpcs3/Emu/RSX/GL/GLHelpers.cpp | 98 ++++++++++++++++ rpcs3/Emu/RSX/GL/GLHelpers.h | 24 ++++ rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 65 +++++++++++ rpcs3/Emu/RSX/GL/GLRenderTargets.h | 2 + rpcs3/Emu/RSX/GL/GLTextureCache.h | 112 ------------------ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 26 +---- rpcs3/Emu/RSX/VK/VKHelpers.h | 10 ++ rpcs3/Emu/RSX/VK/VKRenderTargets.h | 49 ++++++++ rpcs3/Emu/RSX/VK/VKTexture.cpp | 119 ++++++++++++++++++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 125 +-------------------- 14 files changed, 434 insertions(+), 335 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index f95f1b659a..54c3b415d1 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -33,6 +33,38 @@ namespace rsx virtual ~sampled_image_descriptor_base() {} virtual u32 encoded_component_map() const = 0; }; + + struct typeless_xfer + { + bool src_is_typeless = false; + bool dst_is_typeless = false; + bool src_is_depth = false; + bool dst_is_depth = false; + u32 src_gcm_format = 0; + u32 dst_gcm_format = 0; + u32 src_native_format_override = 0; + u32 dst_native_format_override = 0; + f32 src_scaling_hint = 1.f; + f32 dst_scaling_hint = 1.f; + texture_upload_context src_context = texture_upload_context::blit_engine_src; + texture_upload_context dst_context = texture_upload_context::blit_engine_dst; + + void analyse() + { + if (src_is_typeless && dst_is_typeless) + { + if (src_scaling_hint == dst_scaling_hint && + src_scaling_hint != 1.f) + { + if (src_is_depth == dst_is_depth) + { + src_is_typeless = dst_is_typeless = false; + src_scaling_hint = dst_scaling_hint = 1.f; + } + } + } + } + }; } struct rsx_subresource_layout diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 5011db294f..52d229a06b 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1444,7 +1444,8 @@ namespace rsx } template - std::vector gather_texture_slices_from_framebuffers(u32 texaddr, u16 slice_w, u16 slice_h, u16 pitch, u16 count, u8 bpp, surface_store_type& m_rtts) + std::vector gather_texture_slices_from_framebuffers(commandbuffer_type& cmd, + u32 texaddr, u16 slice_w, u16 slice_h, u16 pitch, u16 count, u8 bpp, surface_store_type& m_rtts) { std::vector surfaces; u32 current_address = texaddr; @@ -1465,6 +1466,8 @@ namespace rsx { for (auto §ion : overlapping) { + section.surface->memory_barrier(cmd); + surfaces.push_back ({ section.surface->get_surface(), @@ -1501,6 +1504,8 @@ namespace rsx u32 internal_height = tex_height; get_native_dimensions(internal_width, internal_height, texptr); + texptr->memory_barrier(cmd); + if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) { @@ -1521,7 +1526,7 @@ namespace rsx rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; auto bpp = get_format_block_size_in_bytes(format); - desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(texaddr, tex_width, tex_height, tex_pitch, 6, bpp, m_rtts)); + desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(cmd, texaddr, tex_width, tex_height, tex_pitch, 6, bpp, m_rtts)); return desc; } else if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d && tex_depth > 1) @@ -1543,7 +1548,7 @@ namespace rsx rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; const auto bpp = get_format_block_size_in_bytes(format); - desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(texaddr, tex_width, tex_height, tex_pitch, tex_depth, bpp, m_rtts)); + desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(cmd, texaddr, tex_width, tex_height, tex_pitch, tex_depth, bpp, m_rtts)); return desc; } } @@ -1562,8 +1567,7 @@ namespace rsx auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); bool requires_merging = false; - // TODO ruipin: This AUDIT fails due to a bug that kd will have to fix - //AUDIT( !overlapping.empty() ); + AUDIT(!overlapping.empty()); if (overlapping.size() > 1) { // The returned values are sorted with oldest first and newest last @@ -1593,6 +1597,8 @@ namespace rsx for (auto §ion : overlapping) { + section.surface->memory_barrier(cmd); + result.external_subresource_desc.sections_to_copy.push_back ({ section.surface->get_surface(), diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 161a490a92..76ca912b06 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -105,36 +105,6 @@ namespace rsx constexpr operator enum_type() const { return cause; } }; - struct typeless_xfer - { - bool src_is_typeless = false; - bool dst_is_typeless = false; - bool src_is_depth = false; - bool dst_is_depth = false; - u32 src_gcm_format = 0; - u32 dst_gcm_format = 0; - f32 src_scaling_hint = 1.f; - f32 dst_scaling_hint = 1.f; - texture_upload_context src_context = texture_upload_context::blit_engine_src; - texture_upload_context dst_context = texture_upload_context::blit_engine_dst; - - void analyse() - { - if (src_is_typeless && dst_is_typeless) - { - if (src_scaling_hint == dst_scaling_hint && - src_scaling_hint != 1.f) - { - if (src_is_depth == dst_is_depth) - { - src_is_typeless = dst_is_typeless = false; - src_scaling_hint = dst_scaling_hint = 1.f; - } - } - } - } - }; - /** diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index a5ebd0838f..408f6b060d 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -210,26 +210,6 @@ void GLGSRender::end() } }; - //Check if depth buffer is bound and valid - //If ds is not initialized clear it; it seems new depth textures should have depth cleared - auto copy_rtt_contents = [this](gl::render_target *surface, bool is_depth) - { - if (surface->get_internal_format() == surface->old_contents->get_internal_format()) - { - // Disable stencil test to avoid switching off and back on later - gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); - - // Copy data from old contents onto this one - const auto region = rsx::get_transferable_region(surface); - gl::g_hw_blitter->scale_image(surface->old_contents, surface, { 0, 0, std::get<0>(region), std::get<1>(region) }, { 0, 0, std::get<2>(region) , std::get<3>(region) }, !is_depth, is_depth, {}); - - // Memory has been transferred, discard old contents and update memory flags - // TODO: Preserve memory outside surface clip region - surface->on_write(); - } - //TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible - }; - //Check if we have any 'recycled' surfaces in memory and if so, clear them std::vector buffers_to_clear; bool clear_all_color = true; @@ -291,24 +271,6 @@ void GLGSRender::end() ds->on_write(); } - if (g_cfg.video.strict_rendering_mode) - { - if (ds && ds->old_contents != nullptr) - copy_rtt_contents(ds, true); - - for (auto &rtt : m_rtts.m_bound_render_targets) - { - if (auto surface = std::get<1>(rtt)) - { - if (surface->old_contents != nullptr) - copy_rtt_contents(surface, false); - } - } - } - - // Unconditionally enable stencil test if it was disabled before - gl_state.enable(GL_TRUE, GL_SCISSOR_TEST); - // Load textures { std::chrono::time_point textures_start = steady_clock::now(); @@ -473,9 +435,28 @@ void GLGSRender::end() std::chrono::time_point textures_end = steady_clock::now(); m_textures_upload_time += (u32)std::chrono::duration_cast(textures_end - textures_start).count(); - update_draw_state(); + std::chrono::time_point draw_start = textures_end; - std::chrono::time_point draw_start = steady_clock::now(); + // Optionally do memory synchronization if the texture stage has not yet triggered this + if (g_cfg.video.strict_rendering_mode) + { + gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); + + if (ds) ds->memory_barrier(); + + for (auto &rtt : m_rtts.m_bound_render_targets) + { + if (auto surface = std::get<1>(rtt)) + { + surface->memory_barrier(); + } + } + } + + // Unconditionally enable stencil test if it was disabled before + gl_state.enable(GL_TRUE, GL_SCISSOR_TEST); + + update_draw_state(); if (g_cfg.video.debug_output) { diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.cpp b/rpcs3/Emu/RSX/GL/GLHelpers.cpp index 29a47d3b8d..d6a8c52fe7 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.cpp +++ b/rpcs3/Emu/RSX/GL/GLHelpers.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "GLHelpers.h" +#include "GLTexture.h" #include "Utilities/Log.h" namespace gl @@ -360,4 +361,101 @@ namespace gl { return attrib_t(index); } + + void blitter::scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, + bool is_depth_copy, const rsx::typeless_xfer& xfer_info) + { + std::unique_ptr typeless_src; + std::unique_ptr typeless_dst; + u32 src_id = src->id(); + u32 dst_id = dst->id(); + + if (xfer_info.src_is_typeless) + { + const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint); + const auto internal_fmt = xfer_info.src_native_format_override ? + GLenum(xfer_info.src_native_format_override) : + get_sized_internal_format(xfer_info.src_gcm_format); + + typeless_src = std::make_unique(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt); + copy_typeless(typeless_src.get(), src); + + src_id = typeless_src->id(); + src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint); + src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint); + } + + if (xfer_info.dst_is_typeless) + { + const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint); + const auto internal_fmt = xfer_info.dst_native_format_override ? + GLenum(xfer_info.dst_native_format_override) : + get_sized_internal_format(xfer_info.dst_gcm_format); + + typeless_dst = std::make_unique(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt); + copy_typeless(typeless_dst.get(), dst); + + dst_id = typeless_dst->id(); + dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint); + dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint); + } + + s32 old_fbo = 0; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); + + filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest; + GLenum attachment; + gl::buffers target; + + if (is_depth_copy) + { + if (src->get_internal_format() == gl::texture::internal_format::depth16 || + dst->get_internal_format() == gl::texture::internal_format::depth16) + { + attachment = GL_DEPTH_ATTACHMENT; + target = gl::buffers::depth; + } + else + { + attachment = GL_DEPTH_STENCIL_ATTACHMENT; + target = gl::buffers::depth_stencil; + } + } + else + { + attachment = GL_COLOR_ATTACHMENT0; + target = gl::buffers::color; + } + + blit_src.bind(); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0); + blit_src.check(); + + blit_dst.bind(); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0); + blit_dst.check(); + + GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST); + if (scissor_test_enabled) + glDisable(GL_SCISSOR_TEST); + + blit_src.blit(blit_dst, src_rect, dst_rect, target, interp); + + if (xfer_info.dst_is_typeless) + { + //Transfer contents from typeless dst back to original dst + copy_typeless(dst, typeless_dst.get()); + } + + blit_src.bind(); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0); + + blit_dst.bind(); + glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0); + + if (scissor_test_enabled) + glEnable(GL_SCISSOR_TEST); + + glBindFramebuffer(GL_FRAMEBUFFER, old_fbo); + } } diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 9ce6f44565..65d2c384c9 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -10,6 +10,7 @@ #include "OpenGL.h" #include "../GCM.h" +#include "../Common/TextureUtils.h" #include "Utilities/geometry.h" @@ -2792,4 +2793,27 @@ public: } }; } + + class blitter + { + fbo blit_src; + fbo blit_dst; + + public: + + void init() + { + blit_src.create(); + blit_dst.create(); + } + + void destroy() + { + blit_dst.remove(); + blit_src.remove(); + } + + void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, + bool is_depth_copy, const rsx::typeless_xfer& xfer_info); + }; } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index ffb8f63ac2..45bdec3aaf 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -553,3 +553,68 @@ void GLGSRender::read_buffers() std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type); } } + +void gl::render_target::memory_barrier(void*) +{ + if (!old_contents) + { + // No memory to inherit + return; + } + + auto src_texture = static_cast(old_contents); + if (src_texture->get_rsx_pitch() != get_rsx_pitch()) + { + LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory"); + return; + } + + auto is_depth = [](gl::texture::internal_format format) + { + // TODO: Change this to image aspect semantics + switch (format) + { + case gl::texture::internal_format::depth16: + case gl::texture::internal_format::depth24_stencil8: + case gl::texture::internal_format::depth32f_stencil8: + return true; + default: + return false; + } + }; + + auto src_bpp = src_texture->get_native_pitch() / src_texture->width(); + auto dst_bpp = get_native_pitch() / width(); + rsx::typeless_xfer typeless_info{}; + + const bool dst_is_depth = is_depth(get_internal_format()); + const auto region = rsx::get_transferable_region(this); + + if (get_internal_format() == src_texture->get_internal_format()) + { + // Copy data from old contents onto this one + verify(HERE), src_bpp == dst_bpp; + } + else + { + // Mem cast, generate typeless xfer info + const bool src_is_depth = is_depth(src_texture->get_internal_format()); + if (src_bpp != dst_bpp || dst_is_depth || src_is_depth) + { + typeless_info.src_is_typeless = true; + typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; + typeless_info.src_native_format_override = (u32)get_internal_format(); + typeless_info.src_is_depth = src_is_depth; + typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; + } + } + + gl::g_hw_blitter->scale_image(old_contents, this, + { 0, 0, std::get<0>(region), std::get<1>(region) }, + { 0, 0, std::get<2>(region) , std::get<3>(region) }, + !dst_is_depth, dst_is_depth, typeless_info); + + // Memory has been transferred, discard old contents and update memory flags + // TODO: Preserve memory outside surface clip region + on_write(); +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index f238745606..b40b812285 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -130,6 +130,8 @@ namespace gl //Use forward scaling to account for rounding and clamping errors return (rsx::apply_resolution_scale(_width, true) == internal_width) && (rsx::apply_resolution_scale(_height, true) == internal_height); } + + void memory_barrier(void* = nullptr); }; struct framebuffer_holder : public gl::fbo, public rsx::ref_counted diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 24469dfe56..11398610df 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -30,118 +30,6 @@ namespace gl extern void copy_typeless(texture*, const texture*); extern blitter *g_hw_blitter; - class blitter - { - fbo blit_src; - fbo blit_dst; - - public: - - void init() - { - blit_src.create(); - blit_dst.create(); - } - - void destroy() - { - blit_dst.remove(); - blit_src.remove(); - } - - void scale_image(const texture* src, texture* dst, areai src_rect, areai dst_rect, bool linear_interpolation, - bool is_depth_copy, const rsx::typeless_xfer& xfer_info) - { - std::unique_ptr typeless_src; - std::unique_ptr typeless_dst; - u32 src_id = src->id(); - u32 dst_id = dst->id(); - - if (xfer_info.src_is_typeless) - { - const auto internal_width = (u16)(src->width() * xfer_info.src_scaling_hint); - const auto internal_fmt = get_sized_internal_format(xfer_info.src_gcm_format); - typeless_src = std::make_unique(GL_TEXTURE_2D, internal_width, src->height(), 1, 1, internal_fmt); - copy_typeless(typeless_src.get(), src); - - src_id = typeless_src->id(); - src_rect.x1 = (u16)(src_rect.x1 * xfer_info.src_scaling_hint); - src_rect.x2 = (u16)(src_rect.x2 * xfer_info.src_scaling_hint); - } - - if (xfer_info.dst_is_typeless) - { - const auto internal_width = (u16)(dst->width() * xfer_info.dst_scaling_hint); - const auto internal_fmt = get_sized_internal_format(xfer_info.dst_gcm_format); - typeless_dst = std::make_unique(GL_TEXTURE_2D, internal_width, dst->height(), 1, 1, internal_fmt); - copy_typeless(typeless_dst.get(), dst); - - dst_id = typeless_dst->id(); - dst_rect.x1 = (u16)(dst_rect.x1 * xfer_info.dst_scaling_hint); - dst_rect.x2 = (u16)(dst_rect.x2 * xfer_info.dst_scaling_hint); - } - - s32 old_fbo = 0; - glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); - - filter interp = (linear_interpolation && !is_depth_copy) ? filter::linear : filter::nearest; - GLenum attachment; - gl::buffers target; - - if (is_depth_copy) - { - if (src->get_internal_format() == gl::texture::internal_format::depth16 || - dst->get_internal_format() == gl::texture::internal_format::depth16) - { - attachment = GL_DEPTH_ATTACHMENT; - target = gl::buffers::depth; - } - else - { - attachment = GL_DEPTH_STENCIL_ATTACHMENT; - target = gl::buffers::depth_stencil; - } - } - else - { - attachment = GL_COLOR_ATTACHMENT0; - target = gl::buffers::color; - } - - blit_src.bind(); - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, src_id, 0); - blit_src.check(); - - blit_dst.bind(); - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_id, 0); - blit_dst.check(); - - GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST); - if (scissor_test_enabled) - glDisable(GL_SCISSOR_TEST); - - blit_src.blit(blit_dst, src_rect, dst_rect, target, interp); - - if (xfer_info.dst_is_typeless) - { - //Transfer contents from typeless dst back to original dst - copy_typeless(dst, typeless_dst.get()); - } - - blit_src.bind(); - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0); - - blit_dst.bind(); - glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, GL_NONE, 0); - - if (scissor_test_enabled) - glEnable(GL_SCISSOR_TEST); - - glBindFramebuffer(GL_FRAMEBUFFER, old_fbo); - } - }; - - class cached_texture_section; class texture_cache; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index fec90ddcd1..fe384e4b1d 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1387,41 +1387,19 @@ void VKGSRender::end() if (g_cfg.video.strict_rendering_mode) { - auto copy_rtt_contents = [&](vk::render_target* surface, bool is_depth) - { - if (LIKELY(surface->info.format == surface->old_contents->info.format)) - { - const auto region = rsx::get_transferable_region(surface); - const auto src_w = std::get<0>(region); - const auto src_h = std::get<1>(region); - const auto dst_w = std::get<2>(region); - const auto dst_h = std::get<3>(region); - - const VkImageAspectFlags aspect = surface->attachment_aspect_flag; - - vk::copy_scaled_image(*m_current_command_buffer, surface->old_contents->value, surface->value, - surface->old_contents->current_layout, surface->current_layout, 0, 0, src_w, src_h, - 0, 0, dst_w, dst_h, 1, aspect, true, VK_FILTER_LINEAR, surface->info.format, surface->old_contents->info.format); - - // Memory has been transferred, discard old contents and update memory flags - // TODO: Preserve memory outside surface clip region - surface->on_write(); - } - }; - //Prepare surfaces if needed for (auto &rtt : m_rtts.m_bound_render_targets) { if (auto surface = std::get<1>(rtt)) { if (surface->old_contents != nullptr) - copy_rtt_contents(surface, false); + surface->memory_barrier(*m_current_command_buffer); } } if (ds && ds->old_contents) { - copy_rtt_contents(ds, true); + ds->memory_barrier(*m_current_command_buffer); } } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index ecab3f612b..ca779f03af 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -2980,4 +2980,14 @@ public: } } }; + + class blitter + { + vk::command_buffer* commands; + + public: + blitter(vk::command_buffer *c) : commands(c) {} + + void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info); + }; } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 75bce80931..9ac9ac7f2b 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -5,6 +5,7 @@ #include "../GCM.h" #include "../Common/surface_store.h" #include "../Common/TextureUtils.h" +#include "../Common/texture_cache_utils.h" #include "VKFormats.h" #include "../rsx_utils.h" @@ -55,6 +56,54 @@ namespace vk //Use forward scaling to account for rounding and clamping errors return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); } + + void memory_barrier(vk::command_buffer& cmd) + { + if (!old_contents) + { + return; + } + + auto src_texture = static_cast(old_contents); + if (src_texture->get_rsx_pitch() != get_rsx_pitch()) + { + LOG_TODO(RSX, "Pitch mismatch, could not transfer inherited memory"); + return; + } + + auto src_bpp = src_texture->get_native_pitch() / src_texture->width(); + auto dst_bpp = get_native_pitch() / width(); + rsx::typeless_xfer typeless_info{}; + + const auto region = rsx::get_transferable_region(this); + + if (src_texture->info.format == info.format) + { + verify(HERE), src_bpp == dst_bpp; + } + else + { + const bool src_is_depth = !!(vk::get_aspect_flags(src_texture->info.format) & VK_IMAGE_ASPECT_DEPTH_BIT); + const bool dst_is_depth = !!(vk::get_aspect_flags(info.format) & VK_IMAGE_ASPECT_DEPTH_BIT); + + if (src_bpp != dst_bpp || src_is_depth || dst_is_depth) + { + typeless_info.src_is_typeless = true; + typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; + typeless_info.src_native_format_override = (u32)info.format; + typeless_info.src_is_depth = src_is_depth; + typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; + } + } + + vk::blitter hw_blitter(&cmd); + hw_blitter.scale_image(old_contents, this, + { 0, 0, std::get<0>(region), std::get<1>(region) }, + { 0, 0, std::get<2>(region) , std::get<3>(region) }, + /*linear?*/false, /*depth?(unused)*/false, typeless_info); + + on_write(); + } }; struct framebuffer_holder: public vk::framebuffer, public rsx::ref_counted diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index f77cfc70bf..e2bdf998d1 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -529,4 +529,123 @@ namespace vk return{ final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] }; } + + void blitter::scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info) + { + const auto src_aspect = vk::get_aspect_flags(src->info.format); + const auto dst_aspect = vk::get_aspect_flags(dst->info.format); + + vk::image* real_src = src; + vk::image* real_dst = dst; + + if (xfer_info.src_is_typeless) + { + auto internal_width = src->width() * xfer_info.src_scaling_hint; + auto format = xfer_info.src_native_format_override ? + VkFormat(xfer_info.src_native_format_override) : + vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format); + + // Transfer bits from src to typeless src + real_src = vk::get_typeless_helper(format); + src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint); + src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint); + + vk::copy_image_typeless(*commands, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1, + vk::get_aspect_flags(src->info.format), vk::get_aspect_flags(format)); + } + + if (xfer_info.dst_is_typeless) + { + auto internal_width = dst->width() * xfer_info.dst_scaling_hint; + auto format = xfer_info.dst_native_format_override ? + VkFormat(xfer_info.dst_native_format_override) : + vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format); + + // Transfer bits from dst to typeless dst + real_dst = vk::get_typeless_helper(format); + dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint); + dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint); + + vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1, + vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format)); + } + else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage) + { + if (xfer_info.src_context != rsx::texture_upload_context::blit_engine_dst && + xfer_info.src_context != rsx::texture_upload_context::framebuffer_storage) + { + // Data moving to rendertarget, where byte ordering has to be preserved + // NOTE: This is a workaround, true accuracy would require all RTT<->cache transfers to invoke this step but thats too slow + // Sampling is ok; image view swizzle will work around it + if (dst->info.format == VK_FORMAT_B8G8R8A8_UNORM) + { + // For this specific format, channel ordering is faked via custom remap, undo this before transfer + VkBufferImageCopy copy{}; + copy.imageExtent = src->info.extent; + copy.imageOffset = { 0, 0, 0 }; + copy.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; + + const auto scratch_buf = vk::get_scratch_buffer(); + const auto data_length = src->info.extent.width * src->info.extent.height * 4; + + const auto current_layout = src->current_layout; + vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, ©); + vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + + vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + + vk::get_compute_task()->run(*commands, scratch_buf, data_length); + + vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + + real_src = vk::get_typeless_helper(src->info.format); + vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + + vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, ©); + } + } + } + + // Checks + if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1) + { + LOG_ERROR(RSX, "Blit request consists of an empty region descriptor!"); + return; + } + + if (src_area.x1 < 0 || src_area.x2 >(s32)real_src->width() || src_area.y1 < 0 || src_area.y2 >(s32)real_src->height()) + { + LOG_ERROR(RSX, "Blit request denied because the source region does not fit!"); + return; + } + + if (dst_area.x1 < 0 || dst_area.x2 >(s32)real_dst->width() || dst_area.y1 < 0 || dst_area.y2 >(s32)real_dst->height()) + { + LOG_ERROR(RSX, "Blit request denied because the destination region does not fit!"); + return; + } + + const auto src_width = src_area.x2 - src_area.x1; + const auto src_height = src_area.y2 - src_area.y1; + const auto dst_width = dst_area.x2 - dst_area.x1; + const auto dst_height = dst_area.y2 - dst_area.y1; + + copy_scaled_image(*commands, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, + dst_area.x1, dst_area.y1, dst_width, dst_height, 1, dst_aspect, real_src->info.format == real_dst->info.format, + interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format); + + if (real_dst != dst) + { + auto internal_width = dst->width() * xfer_info.dst_scaling_hint; + vk::copy_image_typeless(*commands, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1, + vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format)); + } + + change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { (VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers }); + } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 401c4e16c7..903d0a238f 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1097,130 +1097,7 @@ namespace vk bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) { - struct blit_helper - { - vk::command_buffer* commands; - VkFormat format; - blit_helper(vk::command_buffer *c) : commands(c) {} - - void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool /*is_depth*/, const rsx::typeless_xfer& xfer_info) - { - const auto src_aspect = vk::get_aspect_flags(src->info.format); - const auto dst_aspect = vk::get_aspect_flags(dst->info.format); - - vk::image* real_src = src; - vk::image* real_dst = dst; - - if (xfer_info.src_is_typeless) - { - auto internal_width = src->width() * xfer_info.src_scaling_hint; - auto format = vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.src_gcm_format); - - // Transfer bits from src to typeless src - real_src = vk::get_typeless_helper(format); - src_area.x1 = (u16)(src_area.x1 * xfer_info.src_scaling_hint); - src_area.x2 = (u16)(src_area.x2 * xfer_info.src_scaling_hint); - - vk::copy_image_typeless(*commands, src, real_src, { 0, 0, (s32)src->width(), (s32)src->height() }, { 0, 0, (s32)internal_width, (s32)src->height() }, 1, - vk::get_aspect_flags(src->info.format), vk::get_aspect_flags(format)); - } - - if (xfer_info.dst_is_typeless) - { - auto internal_width = dst->width() * xfer_info.dst_scaling_hint; - auto format = vk::get_compatible_sampler_format(vk::get_current_renderer()->get_formats_support(), xfer_info.dst_gcm_format); - - // Transfer bits from dst to typeless dst - real_dst = vk::get_typeless_helper(format); - dst_area.x1 = (u16)(dst_area.x1 * xfer_info.dst_scaling_hint); - dst_area.x2 = (u16)(dst_area.x2 * xfer_info.dst_scaling_hint); - - vk::copy_image_typeless(*commands, dst, real_dst, { 0, 0, (s32)dst->width(), (s32)dst->height() }, { 0, 0, (s32)internal_width, (s32)dst->height() }, 1, - vk::get_aspect_flags(dst->info.format), vk::get_aspect_flags(format)); - } - else if (xfer_info.dst_context == rsx::texture_upload_context::framebuffer_storage) - { - if (xfer_info.src_context != rsx::texture_upload_context::blit_engine_dst && - xfer_info.src_context != rsx::texture_upload_context::framebuffer_storage) - { - // Data moving to rendertarget, where byte ordering has to be preserved - // NOTE: This is a workaround, true accuracy would require all RTT<->cache transfers to invoke this step but thats too slow - // Sampling is ok; image view swizzle will work around it - if (dst->info.format == VK_FORMAT_B8G8R8A8_UNORM) - { - // For this specific format, channel ordering is faked via custom remap, undo this before transfer - VkBufferImageCopy copy{}; - copy.imageExtent = src->info.extent; - copy.imageOffset = { 0, 0, 0 }; - copy.imageSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1 }; - - const auto scratch_buf = vk::get_scratch_buffer(); - const auto data_length = src->info.extent.width * src->info.extent.height * 4; - - const auto current_layout = src->current_layout; - vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - vkCmdCopyImageToBuffer(*commands, src->value, src->current_layout, scratch_buf->value, 1, ©); - vk::change_image_layout(*commands, real_src, current_layout, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - - vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - - vk::get_compute_task()->run(*commands, scratch_buf, data_length); - - vk::insert_buffer_memory_barrier(*commands, scratch_buf->value, 0, data_length, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); - - real_src = vk::get_typeless_helper(src->info.format); - vk::change_image_layout(*commands, real_src, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}); - - vkCmdCopyBufferToImage(*commands, scratch_buf->value, real_src->value, real_src->current_layout, 1, ©); - } - } - } - - // Checks - if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1) - { - LOG_ERROR(RSX, "Blit request consists of an empty region descriptor!"); - return; - } - - if (src_area.x1 < 0 || src_area.x2 >(s32)real_src->width() || src_area.y1 < 0 || src_area.y2 >(s32)real_src->height()) - { - LOG_ERROR(RSX, "Blit request denied because the source region does not fit!"); - return; - } - - if (dst_area.x1 < 0 || dst_area.x2 >(s32)real_dst->width() || dst_area.y1 < 0 || dst_area.y2 >(s32)real_dst->height()) - { - LOG_ERROR(RSX, "Blit request denied because the destination region does not fit!"); - return; - } - - const auto src_width = src_area.x2 - src_area.x1; - const auto src_height = src_area.y2 - src_area.y1; - const auto dst_width = dst_area.x2 - dst_area.x1; - const auto dst_height = dst_area.y2 - dst_area.y1; - - copy_scaled_image(*commands, real_src->value, real_dst->value, real_src->current_layout, real_dst->current_layout, src_area.x1, src_area.y1, src_width, src_height, - dst_area.x1, dst_area.y1, dst_width, dst_height, 1, dst_aspect, real_src->info.format == real_dst->info.format, - interpolate ? VK_FILTER_LINEAR : VK_FILTER_NEAREST, real_src->info.format, real_dst->info.format); - - if (real_dst != dst) - { - auto internal_width = dst->width() * xfer_info.dst_scaling_hint; - vk::copy_image_typeless(*commands, real_dst, dst, { 0, 0, (s32)internal_width, (s32)dst->height() }, { 0, 0, (s32)dst->width(), (s32)dst->height() }, 1, - vk::get_aspect_flags(real_dst->info.format), vk::get_aspect_flags(dst->info.format)); - } - - change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)dst_aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers}); - format = dst->info.format; - } - } - helper(&cmd); - + blitter helper(&cmd); auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, cmd, const_cast(m_submit_queue)); if (reply.succeeded)