diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 506b78f08e..d44567d3ca 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -172,7 +172,7 @@ bool Passes(const std::array& modules, return true; } -using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool, bool&); template ConfigureFuncPtr FindSpec(const std::array& modules, @@ -296,7 +296,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { } template -void GraphicsPipeline::ConfigureImpl(bool is_indexed) { +void GraphicsPipeline::ConfigureImpl(bool is_indexed, bool& out_has_feedback_loop) { std::array views; std::array samplers; size_t sampler_index{}; @@ -482,7 +482,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { prepare_stage(4); } texture_cache.UpdateRenderTargets(false); - texture_cache.CheckFeedbackLoop(views); + out_has_feedback_loop = texture_cache.CheckFeedbackLoop(views); ConfigureDraw(rescaling, render_area); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 99e56e9ad8..8338cdd879 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -86,8 +86,8 @@ public: void AddTransition(GraphicsPipeline* transition); - void Configure(bool is_indexed) { - configure_func(this, is_indexed); + void Configure(bool is_indexed, bool& out_has_feedback_loop) { + configure_func(this, is_indexed, out_has_feedback_loop); } [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { @@ -105,7 +105,9 @@ public: template static auto MakeConfigureSpecFunc() { - return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl(is_indexed); }; + return [](GraphicsPipeline* pl, bool is_indexed, bool& out_has_feedback_loop) { + pl->ConfigureImpl(is_indexed, out_has_feedback_loop); + }; } void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) { @@ -115,7 +117,7 @@ public: private: template - void ConfigureImpl(bool is_indexed); + void ConfigureImpl(bool is_indexed, bool& out_has_feedback_loop); void ConfigureDraw(const RescalingPushConstant& rescaling, const RenderAreaPushConstant& render_are); @@ -134,7 +136,7 @@ private: Scheduler& scheduler; GuestDescriptorQueue& guest_descriptor_queue; - void (*configure_func)(GraphicsPipeline*, bool){}; + void (*configure_func)(GraphicsPipeline*, bool, bool&){}; std::vector transition_keys; std::vector transitions; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8d3a9736b3..b9bbe677eb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -151,6 +151,17 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, } return params; } + +u32 FeedbackLoopVerticesPerPrimitive(const MaxwellDrawState& draw_state, const DrawParams& params) { + switch (draw_state.topology) { + case Maxwell::PrimitiveTopology::Triangles: + return 3; + default: + ASSERT(false); + return params.num_vertices; + } +} + } // Anonymous namespace RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -195,24 +206,58 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { return; } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + bool has_feedback_loop{}; + // update engine as channel may be different. pipeline->SetEngine(maxwell3d, gpu_memory); - pipeline->Configure(is_indexed); + pipeline->Configure(is_indexed, has_feedback_loop); BeginTransformFeedback(); UpdateDynamicStates(); - draw_func(); + draw_func(has_feedback_loop); EndTransformFeedback(); } void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { - PrepareDraw(is_indexed, [this, is_indexed, instance_count] { + PrepareDraw(is_indexed, [this, is_indexed, instance_count](bool has_feedback_loop) { const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const u32 num_instances{instance_count}; const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; + + if (has_feedback_loop && draw_params.num_vertices > 6 && draw_params.num_instances == 1) { + u32 vertices_per_primitive = FeedbackLoopVerticesPerPrimitive(draw_state, draw_params); + + std::array barriers{}; + u32 num_barriers{}; + scheduler.GetFeedbackLoopBarrier(barriers, num_barriers); + scheduler.Record([draw_params, barriers, num_barriers, + vertices_per_primitive](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + for (u32 i = 0; i < draw_params.num_vertices; i += vertices_per_primitive) { + cmdbuf.DrawIndexed(vertices_per_primitive, draw_params.num_instances, i, + draw_params.base_vertex, draw_params.base_instance); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_DEPENDENCY_BY_REGION_BIT, {}, {}, + vk::Span(barriers.data(), num_barriers)); + } + } else { + for (u32 i = 0; i < draw_params.num_vertices; i += vertices_per_primitive) { + cmdbuf.Draw(vertices_per_primitive, draw_params.num_instances, i, + draw_params.base_instance); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_DEPENDENCY_BY_REGION_BIT, {}, {}, + vk::Span(barriers.data(), num_barriers)); + } + } + }); + return; + } + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, @@ -229,7 +274,7 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { void RasterizerVulkan::DrawIndirect() { const auto& params = maxwell3d->draw_manager->GetIndirectParams(); buffer_cache.SetDrawIndirect(¶ms); - PrepareDraw(params.is_indexed, [this, ¶ms] { + PrepareDraw(params.is_indexed, [this, ¶ms](bool) { const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); const auto& buffer = indirect_buffer.first; const auto& offset = indirect_buffer.second; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index ae9f1de642..27ca18c121 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -78,6 +78,15 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .preserveAttachmentCount = 0, .pPreserveAttachments = nullptr, }; + constexpr VkSubpassDependency self_dependency{ + .srcSubpass = 0, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + .dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + }; pair->second = device->GetLogical().CreateRenderPass({ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .pNext = nullptr, @@ -86,8 +95,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .subpassCount = 1, .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, + .dependencyCount = 1, + .pDependencies = &self_dependency, }); return *pair->second; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 80455ec080..cf61136717 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -304,4 +304,23 @@ void Scheduler::AcquireNewChunk() { } } +void Scheduler::GetFeedbackLoopBarrier(std::array& out_barriers, + u32& out_num_barriers) { + out_num_barriers = num_renderpass_images; + for (u32 i = 0; i < num_renderpass_images; i++) { + out_barriers[i] = VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = renderpass_images[i], + .subresourceRange = renderpass_image_ranges[i], + }; + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 475c682eb2..0afafd4f50 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -16,6 +16,7 @@ #include "common/polyfill_thread.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include "vulkan/vulkan_core.h" namespace Vulkan { @@ -108,6 +109,9 @@ public: std::mutex submit_mutex; + void GetFeedbackLoopBarrier(std::array& out_barriers, + u32& out_num_barriers); + private: class Command { public: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cf082c5dd..8760c8920e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -185,7 +185,7 @@ void TextureCache

::FillComputeImageViews(std::span views) { } template -void TextureCache

::CheckFeedbackLoop(std::span views) { +bool TextureCache

::CheckFeedbackLoop(std::span views) { const bool requires_barrier = [&] { for (const auto& view : views) { if (!view.id) { @@ -218,6 +218,8 @@ void TextureCache

::CheckFeedbackLoop(std::span views) { if (requires_barrier) { runtime.BarrierFeedbackLoop(); } + + return requires_barrier; } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 3bfa921549..c74fe2a0f9 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -151,7 +151,7 @@ public: void FillComputeImageViews(std::span views); /// Handle feedback loops during draws. - void CheckFeedbackLoop(std::span views); + bool CheckFeedbackLoop(std::span views); /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index);