From 7080305d828e31e51e2f435accc3b967af19feed Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 20 May 2020 22:51:15 +0300 Subject: [PATCH] vk: Implement masked stencil buffer clears - Partial stencil buffer clears were not implemented. This is for example where a game can choose to clear only some bits from the stencil buffer. - Vulkan does not support masked stencil clears natively, it has to be implemented as a graphics operation. - Also refactors vulkan overlay passes to use global resource system instead of forcing the render backend to own all of them and manage lifetimes. --- rpcs3/Emu/RSX/VK/VKDraw.cpp | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 60 ++++++++++---------- rpcs3/Emu/RSX/VK/VKGSRender.h | 4 -- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 17 +++++- rpcs3/Emu/RSX/VK/VKOverlays.h | 98 ++++++++++++++++++++++++++++++--- rpcs3/Emu/RSX/VK/VKPresent.cpp | 19 +++---- 6 files changed, 145 insertions(+), 55 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 13f69aa157..486d36fd05 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -944,7 +944,7 @@ void VKGSRender::end() // TODO: Stencil transfer ds->old_contents[0].init_transfer(ds); - m_depth_converter->run(*m_current_command_buffer, + vk::get_overlay_pass()->run(*m_current_command_buffer, ds->old_contents[0].src_rect(), ds->old_contents[0].dst_rect(), src->get_view(0xAAE4, rsx::default_remap_vector), diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 551dc873fb..8030b95cfc 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -502,15 +502,6 @@ VKGSRender::VKGSRender() : GSRender() m_text_writer->init(*m_device, vk::get_renderpass(*m_device, key)); } - m_depth_converter = std::make_unique(); - m_depth_converter->create(*m_device); - - m_attachment_clear_pass = std::make_unique(); - m_attachment_clear_pass->create(*m_device); - - m_video_output_pass = std::make_unique(); - m_video_output_pass->create(*m_device); - m_prog_buffer = std::make_unique ( [this](const vk::pipeline_props& props, const RSXVertexProgram& vp, const RSXFragmentProgram& fp) @@ -547,8 +538,7 @@ VKGSRender::VKGSRender() : GSRender() m_texture_cache.initialize((*m_device), m_swapchain->get_graphics_queue(), m_texture_upload_buffer_ring_info); - m_ui_renderer = std::make_unique(); - m_ui_renderer->create(*m_current_command_buffer, m_texture_upload_buffer_ring_info); + vk::get_overlay_pass()->init(*m_current_command_buffer, m_texture_upload_buffer_ring_info); m_occlusion_query_pool.initialize(*m_current_command_buffer); @@ -650,22 +640,6 @@ VKGSRender::~VKGSRender() //Overlay text handler m_text_writer.reset(); - //Overlay UI renderer - m_ui_renderer->destroy(); - m_ui_renderer.reset(); - - //RGBA->depth cast helper - m_depth_converter->destroy(); - m_depth_converter.reset(); - - //Attachment clear helper - m_attachment_clear_pass->destroy(); - m_attachment_clear_pass.reset(); - - // Video-out calibration (gamma, colorspace, etc) - m_video_output_pass->destroy(); - m_video_output_pass.reset(); - //Pipeline descriptors vkDestroyPipelineLayout(*m_device, pipeline_layout, nullptr); vkDestroyDescriptorSetLayout(*m_device, descriptor_layouts, nullptr); @@ -1192,7 +1166,8 @@ void VKGSRender::clear_surface(u32 mask) }; VkRenderPass renderpass = VK_NULL_HANDLE; - m_attachment_clear_pass->update_config(colormask, clear_color); + auto attachment_clear_pass = vk::get_overlay_pass(); + attachment_clear_pass->update_config(colormask, clear_color); for (const auto &index : m_draw_buffers) { @@ -1211,7 +1186,7 @@ void VKGSRender::clear_surface(u32 mask) renderpass = vk::get_renderpass(*m_device, key); } - m_attachment_clear_pass->run(*m_current_command_buffer, rtt, region.rect, renderpass); + attachment_clear_pass->run(*m_current_command_buffer, rtt, region.rect, renderpass); rtt->change_layout(*m_current_command_buffer, old_layout); } @@ -1237,9 +1212,32 @@ void VKGSRender::clear_surface(u32 mask) { if (m_rtts.m_bound_depth_stencil.first) { - if (require_mem_load) m_rtts.m_bound_depth_stencil.second->write_barrier(*m_current_command_buffer); + if (require_mem_load) + { + m_rtts.m_bound_depth_stencil.second->write_barrier(*m_current_command_buffer); + } + + if ((depth_stencil_mask & VK_IMAGE_ASPECT_STENCIL_BIT) && + rsx::method_registers.stencil_mask() != 0xff) + { + // Partial stencil clear. Disables fast stencil clear + auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); + auto key = vk::get_renderpass_key({ ds }); + auto renderpass = vk::get_renderpass(*m_device, key); + + vk::get_overlay_pass()->run( + *m_current_command_buffer, ds, region.rect, + depth_stencil_clear_values.depthStencil.stencil, + rsx::method_registers.stencil_mask(), renderpass); + + depth_stencil_mask &= ~VK_IMAGE_ASPECT_STENCIL_BIT; + } + + if (depth_stencil_mask) + { + clear_descriptors.push_back({ static_cast(depth_stencil_mask), 0, depth_stencil_clear_values }); + } - clear_descriptors.push_back({ static_cast(depth_stencil_mask), 0, depth_stencil_clear_values }); update_z = true; } } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 96c9a1d17c..169caf3ff9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -360,10 +360,6 @@ private: std::unique_ptr null_buffer_view; std::unique_ptr m_text_writer; - std::unique_ptr m_depth_converter; - std::unique_ptr m_ui_renderer; - std::unique_ptr m_attachment_clear_pass; - std::unique_ptr m_video_output_pass; std::unique_ptr m_cond_render_buffer; u64 m_cond_render_sync_tag = 0; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 53a7fa68e8..aa6b796f21 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -72,6 +72,7 @@ namespace vk std::unordered_map> g_null_image_views; std::unordered_map> g_typeless_textures; std::unordered_map> g_compute_tasks; + std::unordered_map> g_overlay_passes; // General purpose upload heap // TODO: Clean this up and integrate cleanly with VKGSRender @@ -384,10 +385,19 @@ namespace vk } } + void reset_overlay_passes() + { + for (const auto& p : g_overlay_passes) + { + p.second->free_resources(); + } + } + void reset_global_resources() { vk::reset_compute_tasks(); vk::reset_resolve_resources(); + vk::reset_overlay_passes(); g_upload_heap.reset_allocation_stats(); } @@ -420,8 +430,13 @@ namespace vk { p.second->destroy(); } - g_compute_tasks.clear(); + + for (const auto& p : g_overlay_passes) + { + p.second->destroy(); + } + g_overlay_passes.clear(); } vk::mem_allocator_base* get_current_mem_allocator() diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 8eed9eb072..29e8231acb 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -33,7 +33,7 @@ namespace vk std::unique_ptr m_draw_fbo; vk::data_heap m_vao; vk::data_heap m_ubo; - vk::render_device* m_device = nullptr; + const vk::render_device* m_device = nullptr; std::string vs_src; std::string fs_src; @@ -305,7 +305,7 @@ namespace vk vkCmdBindVertexBuffers(cmd, 0, 1, &buffers, &offsets); } - void create(vk::render_device &dev) + virtual void create(const vk::render_device &dev) { if (!initialized) { @@ -316,7 +316,7 @@ namespace vk } } - void destroy() + virtual void destroy() { if (initialized) { @@ -682,15 +682,14 @@ namespace vk return result; } - void create(vk::command_buffer &cmd, vk::data_heap &upload_heap) + void init(vk::command_buffer &cmd, vk::data_heap &upload_heap) { - auto& dev = cmd.get_command_pool().get_owner(); - overlay_pass::create(dev); - rsx::overlays::resource_config configuration; configuration.load_files(); + auto& dev = cmd.get_command_pool().get_owner(); u64 storage_key = 1; + for (const auto &res : configuration.texture_raw_data) { upload_simple_texture(dev, cmd, upload_heap, storage_key++, res->w, res->h, 1, false, false, res->data, UINT32_MAX); @@ -699,7 +698,7 @@ namespace vk configuration.free_resources(); } - void destroy() + void destroy() override { temp_image_cache.clear(); temp_view_cache.clear(); @@ -1032,6 +1031,69 @@ namespace vk } }; + struct stencil_clear_pass : public overlay_pass + { + VkRect2D region = {}; + + stencil_clear_pass() + { + vs_src = + "#version 450\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "\n" + "void main()\n" + "{\n" + " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" + " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" + "}\n"; + + fs_src = + "#version 420\n" + "#extension GL_ARB_separate_shader_objects : enable\n" + "layout(location=0) out vec4 out_color;\n" + "\n" + "void main()\n" + "{\n" + " out_color = vec4(0.);\n" + "}\n"; + } + + void set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) override + { + VkViewport vp{}; + vp.x = static_cast(x); + vp.y = static_cast(y); + vp.width = static_cast(w); + vp.height = static_cast(h); + vp.minDepth = 0.f; + vp.maxDepth = 1.f; + vkCmdSetViewport(cmd, 0, 1, &vp); + + vkCmdSetScissor(cmd, 0, 1, ®ion); + } + + void run(vk::command_buffer& cmd, vk::render_target* target, VkRect2D rect, uint32_t stencil_clear, uint32_t stencil_write_mask, VkRenderPass render_pass) + { + region = rect; + + // Stencil setup. Replace all pixels in the scissor region with stencil_clear with the correct write mask. + renderpass_config.enable_stencil_test( + VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace + VK_COMPARE_OP_ALWAYS, // Always pass + 0xFF, // Full write-through + stencil_clear); // Write active bit + + renderpass_config.set_stencil_mask(stencil_write_mask); + renderpass_config.set_depth_mask(false); + + // Coverage sampling disabled, but actually report correct number of samples + renderpass_config.set_multisample_state(target->samples(), 0xFFFF, false, false, false); + + overlay_pass::run(cmd, { 0, 0, target->width(), target->height() }, target, + target->get_view(0xAAE4, rsx::default_remap_vector), render_pass); + } + }; + struct video_out_calibration_pass : public overlay_pass { union config_t @@ -1158,4 +1220,24 @@ namespace vk overlay_pass::run(cmd, viewport, target, views, render_pass); } }; + + // TODO: Replace with a proper manager + extern std::unordered_map> g_overlay_passes; + + template + T* get_overlay_pass() + { + u32 index = id_manager::typeinfo::get_index(); + auto& e = g_overlay_passes[index]; + + if (!e) + { + e = std::make_unique(); + e->create(*vk::get_current_renderer()); + } + + return static_cast(e.get()); + } + + void reset_overlay_passes(); } diff --git a/rpcs3/Emu/RSX/VK/VKPresent.cpp b/rpcs3/Emu/RSX/VK/VKPresent.cpp index 0da357e677..42f691fe2b 100644 --- a/rpcs3/Emu/RSX/VK/VKPresent.cpp +++ b/rpcs3/Emu/RSX/VK/VKPresent.cpp @@ -193,6 +193,7 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resou if (m_overlay_manager && m_overlay_manager->has_dirty()) { + auto ui_renderer = vk::get_overlay_pass(); m_overlay_manager->lock(); std::vector uids_to_dispose; @@ -200,7 +201,7 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resou for (const auto& view : m_overlay_manager->get_dirty()) { - m_ui_renderer->remove_temp_resources(view->uid); + ui_renderer->remove_temp_resources(view->uid); uids_to_dispose.push_back(view->uid); } @@ -210,11 +211,6 @@ void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resou vk::reset_global_resources(); - m_attachment_clear_pass->free_resources(); - m_depth_converter->free_resources(); - m_ui_renderer->free_resources(); - m_video_output_pass->free_resources(); - ctx->buffer_views_to_clean.clear(); const auto shadermode = g_cfg.video.shadermode.get(); @@ -601,11 +597,13 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image); direct_fbo->add_ref(); - image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !use_full_rgb_range_output, avconfig->_3d, single_target_pass); - image_to_flip->pop_layout(*m_current_command_buffer); + vk::get_overlay_pass()->run( + *m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, + avconfig->gamma, !use_full_rgb_range_output, avconfig->_3d, single_target_pass); + + image_to_flip->pop_layout(*m_current_command_buffer); direct_fbo->release(); } @@ -682,11 +680,12 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info) if (has_overlay) { // Lock to avoid modification during run-update chain + auto ui_renderer = vk::get_overlay_pass(); std::lock_guard lock(*m_overlay_manager); for (const auto& view : m_overlay_manager->get_views()) { - m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get()); + ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get()); } }