renderer_vulkan: emulate rasterization order attachment access

This commit is contained in:
Liam 2023-06-01 22:46:46 -04:00
commit 4e57f3e385
8 changed files with 97 additions and 16 deletions

View file

@ -172,7 +172,7 @@ bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
return true; return true;
} }
using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool, bool&);
template <typename Spec, typename... Specs> template <typename Spec, typename... Specs>
ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules, ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
@ -296,7 +296,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
} }
template <typename Spec> template <typename Spec>
void GraphicsPipeline::ConfigureImpl(bool is_indexed) { void GraphicsPipeline::ConfigureImpl(bool is_indexed, bool& out_has_feedback_loop) {
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
size_t sampler_index{}; size_t sampler_index{};
@ -482,7 +482,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
prepare_stage(4); prepare_stage(4);
} }
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views); out_has_feedback_loop = texture_cache.CheckFeedbackLoop(views);
ConfigureDraw(rescaling, render_area); ConfigureDraw(rescaling, render_area);
} }

View file

@ -86,8 +86,8 @@ public:
void AddTransition(GraphicsPipeline* transition); void AddTransition(GraphicsPipeline* transition);
void Configure(bool is_indexed) { void Configure(bool is_indexed, bool& out_has_feedback_loop) {
configure_func(this, is_indexed); configure_func(this, is_indexed, out_has_feedback_loop);
} }
[[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
@ -105,7 +105,9 @@ public:
template <typename Spec> template <typename Spec>
static auto MakeConfigureSpecFunc() { static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); }; return [](GraphicsPipeline* pl, bool is_indexed, bool& out_has_feedback_loop) {
pl->ConfigureImpl<Spec>(is_indexed, out_has_feedback_loop);
};
} }
void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) { void SetEngine(Tegra::Engines::Maxwell3D* maxwell3d_, Tegra::MemoryManager* gpu_memory_) {
@ -115,7 +117,7 @@ public:
private: private:
template <typename Spec> template <typename Spec>
void ConfigureImpl(bool is_indexed); void ConfigureImpl(bool is_indexed, bool& out_has_feedback_loop);
void ConfigureDraw(const RescalingPushConstant& rescaling, void ConfigureDraw(const RescalingPushConstant& rescaling,
const RenderAreaPushConstant& render_are); const RenderAreaPushConstant& render_are);
@ -134,7 +136,7 @@ private:
Scheduler& scheduler; Scheduler& scheduler;
GuestDescriptorQueue& guest_descriptor_queue; GuestDescriptorQueue& guest_descriptor_queue;
void (*configure_func)(GraphicsPipeline*, bool){}; void (*configure_func)(GraphicsPipeline*, bool, bool&){};
std::vector<GraphicsPipelineCacheKey> transition_keys; std::vector<GraphicsPipelineCacheKey> transition_keys;
std::vector<GraphicsPipeline*> transitions; std::vector<GraphicsPipeline*> transitions;

View file

@ -151,6 +151,17 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
} }
return params; return params;
} }
u32 FeedbackLoopVerticesPerPrimitive(const MaxwellDrawState& draw_state, const DrawParams& params) {
switch (draw_state.topology) {
case Maxwell::PrimitiveTopology::Triangles:
return 3;
default:
ASSERT(false);
return params.num_vertices;
}
}
} // Anonymous namespace } // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@ -195,24 +206,58 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
return; return;
} }
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
bool has_feedback_loop{};
// update engine as channel may be different. // update engine as channel may be different.
pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed); pipeline->Configure(is_indexed, has_feedback_loop);
BeginTransformFeedback(); BeginTransformFeedback();
UpdateDynamicStates(); UpdateDynamicStates();
draw_func(); draw_func(has_feedback_loop);
EndTransformFeedback(); EndTransformFeedback();
} }
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count] { PrepareDraw(is_indexed, [this, is_indexed, instance_count](bool has_feedback_loop) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count}; const u32 num_instances{instance_count};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
if (has_feedback_loop && draw_params.num_vertices > 6 && draw_params.num_instances == 1) {
u32 vertices_per_primitive = FeedbackLoopVerticesPerPrimitive(draw_state, draw_params);
std::array<VkImageMemoryBarrier, 9> barriers{};
u32 num_barriers{};
scheduler.GetFeedbackLoopBarrier(barriers, num_barriers);
scheduler.Record([draw_params, barriers, num_barriers,
vertices_per_primitive](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
for (u32 i = 0; i < draw_params.num_vertices; i += vertices_per_primitive) {
cmdbuf.DrawIndexed(vertices_per_primitive, draw_params.num_instances, i,
draw_params.base_vertex, draw_params.base_instance);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_DEPENDENCY_BY_REGION_BIT, {}, {},
vk::Span(barriers.data(), num_barriers));
}
} else {
for (u32 i = 0; i < draw_params.num_vertices; i += vertices_per_primitive) {
cmdbuf.Draw(vertices_per_primitive, draw_params.num_instances, i,
draw_params.base_instance);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_DEPENDENCY_BY_REGION_BIT, {}, {},
vk::Span(barriers.data(), num_barriers));
}
}
});
return;
}
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) { if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
@ -229,7 +274,7 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
void RasterizerVulkan::DrawIndirect() { void RasterizerVulkan::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams(); const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(&params); buffer_cache.SetDrawIndirect(&params);
PrepareDraw(params.is_indexed, [this, &params] { PrepareDraw(params.is_indexed, [this, &params](bool) {
const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
const auto& buffer = indirect_buffer.first; const auto& buffer = indirect_buffer.first;
const auto& offset = indirect_buffer.second; const auto& offset = indirect_buffer.second;

View file

@ -78,6 +78,15 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.preserveAttachmentCount = 0, .preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr, .pPreserveAttachments = nullptr,
}; };
constexpr VkSubpassDependency self_dependency{
.srcSubpass = 0,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
};
pair->second = device->GetLogical().CreateRenderPass({ pair->second = device->GetLogical().CreateRenderPass({
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
@ -86,8 +95,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
.subpassCount = 1, .subpassCount = 1,
.pSubpasses = &subpass, .pSubpasses = &subpass,
.dependencyCount = 0, .dependencyCount = 1,
.pDependencies = nullptr, .pDependencies = &self_dependency,
}); });
return *pair->second; return *pair->second;
} }

View file

@ -304,4 +304,23 @@ void Scheduler::AcquireNewChunk() {
} }
} }
void Scheduler::GetFeedbackLoopBarrier(std::array<VkImageMemoryBarrier, 9>& out_barriers,
u32& out_num_barriers) {
out_num_barriers = num_renderpass_images;
for (u32 i = 0; i < num_renderpass_images; i++) {
out_barriers[i] = VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = renderpass_images[i],
.subresourceRange = renderpass_image_ranges[i],
};
}
}
} // namespace Vulkan } // namespace Vulkan

View file

@ -16,6 +16,7 @@
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"
#include "vulkan/vulkan_core.h"
namespace Vulkan { namespace Vulkan {
@ -108,6 +109,9 @@ public:
std::mutex submit_mutex; std::mutex submit_mutex;
void GetFeedbackLoopBarrier(std::array<VkImageMemoryBarrier, 9>& out_barriers,
u32& out_num_barriers);
private: private:
class Command { class Command {
public: public:

View file

@ -185,7 +185,7 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
} }
template <class P> template <class P>
void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { bool TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
const bool requires_barrier = [&] { const bool requires_barrier = [&] {
for (const auto& view : views) { for (const auto& view : views) {
if (!view.id) { if (!view.id) {
@ -218,6 +218,8 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
if (requires_barrier) { if (requires_barrier) {
runtime.BarrierFeedbackLoop(); runtime.BarrierFeedbackLoop();
} }
return requires_barrier;
} }
template <class P> template <class P>

View file

@ -151,7 +151,7 @@ public:
void FillComputeImageViews(std::span<ImageViewInOut> views); void FillComputeImageViews(std::span<ImageViewInOut> views);
/// Handle feedback loops during draws. /// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views); bool CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Get the sampler from the graphics descriptor table in the specified index /// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index); Sampler* GetGraphicsSampler(u32 index);