From 2526626646dd5440730b19b74f92f9c76916237f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 18 Jul 2017 13:44:36 +0300 Subject: [PATCH] rsx: Surface cache bug fixes - Properly handle data 'transfer' when recycling frame buffer images - Clear 'recycled' surfaces before use --- rpcs3/Emu/RSX/Common/surface_store.h | 12 ++-- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 7 +-- rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h | 8 +-- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 65 ++++++++++++++++----- rpcs3/Emu/RSX/GL/GLProcTable.h | 2 + rpcs3/Emu/RSX/GL/GLRenderTargets.h | 15 +++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 46 ++++++++++----- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 30 +++++----- 8 files changed, 121 insertions(+), 64 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index ed23883647..eddd1d57f6 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -117,7 +117,7 @@ namespace rsx for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { auto &rtt = *It; - if (Traits::rtt_has_format_width_height(rtt, color_format, width, height)) + if (Traits::rtt_has_format_width_height(rtt, color_format, width, height, true)) { new_surface_storage = std::move(rtt); @@ -129,7 +129,7 @@ namespace rsx invalidated_resources.erase(It); new_surface = Traits::get(new_surface_storage); - Traits::invalidate_rtt_surface_contents(command_list, new_surface, true); + Traits::invalidate_rtt_surface_contents(command_list, new_surface, old_surface, true); Traits::prepare_rtt_for_drawing(command_list, new_surface); break; } @@ -181,7 +181,7 @@ namespace rsx for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { auto &ds = *It; - if (Traits::ds_has_format_width_height(ds, depth_format, width, height)) + if (Traits::ds_has_format_width_height(ds, depth_format, width, height, true)) { new_surface_storage = std::move(ds); @@ -193,7 +193,7 @@ namespace rsx new_surface = Traits::get(new_surface_storage); Traits::prepare_ds_for_drawing(command_list, new_surface); - Traits::invalidate_depth_surface_contents(command_list, new_surface, true); + Traits::invalidate_depth_surface_contents(command_list, new_surface, old_surface, true); break; } } @@ -432,10 +432,10 @@ namespace rsx void invalidate_surface_cache_data(command_list_type command_list) { for (auto &rtt : m_render_targets_storage) - Traits::invalidate_rtt_surface_contents(command_list, Traits::get(std::get<1>(rtt)), false); + Traits::invalidate_rtt_surface_contents(command_list, Traits::get(std::get<1>(rtt)), nullptr, false); for (auto &ds : m_depth_stencil_storage) - Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), true); + Traits::invalidate_depth_surface_contents(command_list, Traits::get(std::get<1>(ds)), nullptr, true); } }; } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index ecda416bde..8868b9a219 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -223,9 +223,8 @@ namespace const std::vector>& vertex_ranges, d3d12_data_heap& m_buffer_data) { size_t index_count = std::accumulate( - vertex_ranges.begin(), vertex_ranges.end(), 0, [](size_t acc, const auto& pair) { - return acc + get_index_count( - rsx::method_registers.current_draw_clause.primitive, pair.second); + vertex_ranges.begin(), vertex_ranges.end(), 0ll, [](size_t acc, const auto& pair) { + return acc + get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second); }); // Alloc @@ -236,7 +235,7 @@ namespace void* mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - size_t vertex_count = 0; + u32 vertex_count = 0; for (const auto& pair : vertex_ranges) vertex_count += pair.second; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 2c92edf87c..c6a9fbaedb 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -119,27 +119,27 @@ struct render_target_traits static void invalidate_rtt_surface_contents( gsl::not_null, - ID3D12Resource*, bool) + ID3D12Resource*, ID3D12Resource*, bool) {} static void invalidate_depth_surface_contents( gsl::not_null, - ID3D12Resource*, bool) + ID3D12Resource*, ID3D12Resource*, bool) { //TODO } static - bool rtt_has_format_width_height(const ComPtr &rtt, surface_color_format surface_color_format, size_t width, size_t height) + bool rtt_has_format_width_height(const ComPtr &rtt, surface_color_format surface_color_format, size_t width, size_t height, bool=false) { DXGI_FORMAT dxgi_format = get_color_surface_format(surface_color_format); return rtt->GetDesc().Format == dxgi_format && rtt->GetDesc().Width == width && rtt->GetDesc().Height == height; } static - bool ds_has_format_width_height(const ComPtr &rtt, surface_depth_format, size_t width, size_t height) + bool ds_has_format_width_height(const ComPtr &rtt, surface_depth_format, size_t width, size_t height, bool=false) { //TODO: Check format return rtt->GetDesc().Width == width && rtt->GetDesc().Height == height; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 58843627c2..1e687c4081 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -362,44 +362,80 @@ void GLGSRender::end() surface->old_contents = nullptr; }; + //Check if we have any 'recycled' surfaces in memory and if so, clear them + std::vector buffers_to_clear; + bool clear_all_color = true; + bool clear_depth = false; + + for (int index = 0; index < 4; index++) + { + if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0) + { + if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared()) + clear_all_color = false; + else + buffers_to_clear.push_back(index); + } + } + gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && !ds->cleared()) { - //Temporarily disable pixel tests - glDisable(GL_SCISSOR_TEST); - glDepthMask(GL_TRUE); - - glClearDepth(1.0); - glClearStencil(255); + clear_depth = true; + } - glClear(GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + //Temporarily disable pixel tests + glDisable(GL_SCISSOR_TEST); - if (g_cfg.video.strict_rendering_mode) + if (clear_depth || buffers_to_clear.size() > 0) + { + GLenum mask = 0; + + if (clear_depth) { - //Copy previous data if any - if (ds->old_contents != nullptr) - copy_rtt_contents(ds); + glDepthMask(GL_TRUE); + glClearDepth(1.0); + glClearStencil(255); + mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } - glDepthMask(rsx::method_registers.depth_write_enabled()); - glEnable(GL_SCISSOR_TEST); + if (clear_all_color) + mask |= GL_COLOR_BUFFER_BIT; + + glClear(mask); + + if (buffers_to_clear.size() > 0 && !clear_all_color) + { + GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f }; + //It is impossible for the render target to be typa A or B here (clear all would have been flagged) + for (auto &i: buffers_to_clear) + glClearBufferfv(draw_fbo.id(), i, colors); + } + + if (clear_depth) + glDepthMask(rsx::method_registers.depth_write_enabled()); ds->set_cleared(); } if (g_cfg.video.strict_rendering_mode) { + if (ds->old_contents != nullptr) + copy_rtt_contents(ds); + for (auto &rtt : m_rtts.m_bound_render_targets) { if (std::get<0>(rtt) != 0) { auto surface = std::get<1>(rtt); - if (!surface->cleared() && surface->old_contents != nullptr) + if (surface->old_contents != nullptr) copy_rtt_contents(surface); } } } + glEnable(GL_SCISSOR_TEST); + std::chrono::time_point textures_start = steady_clock::now(); //Setup textures @@ -461,7 +497,6 @@ void GLGSRender::end() } else { - //LOG_ERROR(RSX, "No work is needed for this draw call! Muhahahahahahaha"); skip_upload = true; } diff --git a/rpcs3/Emu/RSX/GL/GLProcTable.h b/rpcs3/Emu/RSX/GL/GLProcTable.h index f7873d505c..2eb4c399bd 100644 --- a/rpcs3/Emu/RSX/GL/GLProcTable.h +++ b/rpcs3/Emu/RSX/GL/GLProcTable.h @@ -173,6 +173,8 @@ OPENGL_PROC(PFNGLMULTIDRAWARRAYSPROC, MultiDrawArrays); OPENGL_PROC(PFNGLGETTEXTUREIMAGEEXTPROC, GetTextureImageEXT); OPENGL_PROC(PFNGLGETTEXTUREIMAGEPROC, GetTextureImage); +OPENGL_PROC(PFNGLCLEARBUFFERFVPROC, ClearBufferfv); + //Sampler Objects OPENGL_PROC(PFNGLGENSAMPLERSPROC, GenSamplers); OPENGL_PROC(PFNGLDELETESAMPLERSPROC, DeleteSamplers); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index bf2010e9dd..e88879e308 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -192,6 +192,7 @@ struct gl_render_target_traits if (old_surface != nullptr && old_surface->get_compatible_internal_format() == internal_fmt) result->old_contents = old_surface; + result->set_cleared(); return result; } @@ -240,19 +241,25 @@ struct gl_render_target_traits static void prepare_ds_for_drawing(void *, gl::render_target*) {} static void prepare_ds_for_sampling(void *, gl::render_target*) {} - static void invalidate_rtt_surface_contents(void *, gl::render_target*, bool) {} - static void invalidate_depth_surface_contents(void *, gl::render_target *ds, bool) { ds->set_cleared(false); } + static void invalidate_rtt_surface_contents(void *, gl::render_target *rtt, gl::render_target* /*old*/, bool forced) { if (forced) rtt->set_cleared(false); } + static void invalidate_depth_surface_contents(void *, gl::render_target *ds, gl::render_target* /*old*/, bool) { ds->set_cleared(false); } static - bool rtt_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_color_format format, size_t width, size_t height) + bool rtt_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_color_format format, size_t width, size_t height, bool check_refs=false) { + if (check_refs) //TODO + return false; + auto internal_fmt = rsx::internals::sized_internal_format(format); return rtt->get_compatible_internal_format() == internal_fmt && rtt->width() == width && rtt->height() == height; } static - bool ds_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_depth_format, size_t width, size_t height) + bool ds_has_format_width_height(const std::unique_ptr &rtt, rsx::surface_depth_format, size_t width, size_t height, bool check_refs=false) { + if (check_refs) //TODO + return false; + // TODO: check format return rtt->width() == width && rtt->height() == height; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 38b13db484..f3e1397423 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -875,7 +875,7 @@ void VKGSRender::begin_render_pass() size_t idx = vk::get_render_pass_location( vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), - (u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size()); + (u8)m_draw_buffers_count); VkRenderPass current_render_pass = m_render_passes[idx]; VkRenderPassBeginInfo rp_begin = {}; @@ -1005,14 +1005,14 @@ void VKGSRender::end() { auto surface = std::get<1>(rtt); - if (surface->dirty && surface->old_contents != nullptr) + if (surface->old_contents != nullptr) copy_rtt_contents(surface); } } if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) { - if (ds->dirty && ds->old_contents != nullptr) + if (ds->old_contents != nullptr) copy_rtt_contents(ds); } } @@ -1120,6 +1120,11 @@ void VKGSRender::end() vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); + //Clear any 'dirty' surfaces - possible is a recycled cache surface is used + std::vector buffers_to_clear; + buffers_to_clear.reserve(4); + const auto targets = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); + if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil)) { if (ds->dirty) @@ -1129,14 +1134,30 @@ void VKGSRender::end() depth_clear_value.depthStencil.depth = 1.f; depth_clear_value.depthStencil.stencil = 255; - VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 }; VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value }; - vkCmdClearAttachments(*m_current_command_buffer, 1, &clear_desc, 1, &clear_rect); + buffers_to_clear.push_back(clear_desc); ds->dirty = false; } } + for (int index = 0; index < targets.size(); ++index) + { + if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0 && std::get<1>(m_rtts.m_bound_render_targets[index])->dirty) + { + const u32 real_index = (index == 1 && targets.size() == 1) ? 0 : static_cast(index); + buffers_to_clear.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, real_index, {} }); + + std::get<1>(m_rtts.m_bound_render_targets[index])->dirty = false; + } + } + + if (buffers_to_clear.size() > 0) + { + VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 }; + vkCmdClearAttachments(*m_current_command_buffer, static_cast(buffers_to_clear.size()), buffers_to_clear.data(), 1, &clear_rect); + } + std::optional > index_info = std::get<2>(upload_info); std::chrono::time_point vertex_end = steady_clock::now(); @@ -1270,8 +1291,6 @@ void VKGSRender::clear_surface(u32 mask) u32 depth_stencil_mask = 0; std::vector clear_descriptors; - std::vector clear_regions; - VkClearValue depth_stencil_clear_values, color_clear_values; u16 scissor_x = rsx::method_registers.scissor_origin_x(); @@ -1328,8 +1347,8 @@ void VKGSRender::clear_surface(u32 mask) for (int index = 0; index < targets.size(); ++index) { - clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, (uint32_t)index, color_clear_values }); - clear_regions.push_back(region); + const u32 real_index = (index == 1 && targets.size() == 1) ? 0 : static_cast(index); + clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, real_index, color_clear_values }); } for (auto &rtt : m_rtts.m_bound_render_targets) @@ -1343,13 +1362,10 @@ void VKGSRender::clear_surface(u32 mask) } if (mask & 0x3) - { clear_descriptors.push_back({ (VkImageAspectFlags)depth_stencil_mask, 0, depth_stencil_clear_values }); - clear_regions.push_back(region); - } begin_render_pass(); - vkCmdClearAttachments(*m_current_command_buffer, (u32)clear_descriptors.size(), clear_descriptors.data(), (u32)clear_regions.size(), clear_regions.data()); + vkCmdClearAttachments(*m_current_command_buffer, (u32)clear_descriptors.size(), clear_descriptors.data(), 1, ®ion); if (mask & 0x3) { @@ -1724,7 +1740,7 @@ bool VKGSRender::load_program(bool fast_update) size_t idx = vk::get_render_pass_location( vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), - (u8)vk::get_draw_buffers(rsx::method_registers.surface_color_target()).size()); + (u8)m_draw_buffers_count); properties.render_pass = m_render_passes[idx]; @@ -1982,7 +1998,7 @@ void VKGSRender::prepare_rtts() m_depth_surface_info.pitch = 0; } - m_draw_buffers_count = static_cast(bound_images.size()); + m_draw_buffers_count = static_cast(draw_buffers.size()); if (g_cfg.video.write_color_buffers) { diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 97270ad71a..bd367fc97d 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -191,31 +191,26 @@ namespace rsx change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); } - static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, bool /*forced*/) + static void invalidate_rtt_surface_contents(vk::command_buffer* pcmd, vk::render_target *rtt, vk::render_target *old_surface, bool forced) { - if (0)//forced) + if (forced) { - VkClearColorValue clear_color; - VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - - clear_color.float32[0] = 0.f; - clear_color.float32[1] = 0.f; - clear_color.float32[2] = 0.f; - clear_color.float32[3] = 0.f; - - change_image_layout(*pcmd, rtt, VK_IMAGE_LAYOUT_GENERAL, range); - vkCmdClearColorImage(*pcmd, rtt->value, VK_IMAGE_LAYOUT_GENERAL, &clear_color, 1, &range); - change_image_layout(*pcmd, rtt, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); + rtt->old_contents = old_surface; + rtt->dirty = true; } } - static void invalidate_depth_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *ds, bool /*forced*/) + static void invalidate_depth_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *ds, vk::render_target *old_surface, bool /*forced*/) { ds->dirty = true; + ds->old_contents = old_surface; } - static bool rtt_has_format_width_height(const std::unique_ptr &rtt, surface_color_format format, size_t width, size_t height) + static bool rtt_has_format_width_height(const std::unique_ptr &rtt, surface_color_format format, size_t width, size_t height, bool check_refs=false) { + if (check_refs && rtt->deref_count == 0) //Surface may still have read refs from data 'copy' + return false; + VkFormat fmt = vk::get_compatible_surface_format(format).first; if (rtt->info.format == fmt && @@ -226,8 +221,11 @@ namespace rsx return false; } - static bool ds_has_format_width_height(const std::unique_ptr &ds, surface_depth_format format, size_t width, size_t height) + static bool ds_has_format_width_height(const std::unique_ptr &ds, surface_depth_format format, size_t width, size_t height, bool check_refs=false) { + if (check_refs && ds->deref_count == 0) //Surface may still have read refs from data 'copy' + return false; + if (ds->info.extent.width == width && ds->info.extent.height == height) {