From c5f065b3ae732571c5c629628669ba2a7412d076 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 16 Dec 2024 02:05:29 +0200 Subject: [PATCH] ir: Add heuristic based LDS barrier pass * Attempts to insert barriers after zero-depth divergant conditional blocks in shaders that use shared memory --- CMakeLists.txt | 1 + src/shader_recompiler/ir/passes/ir_passes.h | 1 + .../ir/passes/shared_memory_barrier_pass.cpp | 45 +++++++++++++++++++ src/shader_recompiler/recompiler.cpp | 1 + 4 files changed, 48 insertions(+) create mode 100644 src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 78d8421a3..af003195e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -671,6 +671,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp + src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 61f43e7e4..ab5eb0fd7 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -21,5 +21,6 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info); +void SharedMemoryBarrierPass(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp new file mode 100644 index 000000000..fe0847187 --- /dev/null +++ b/src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void SharedMemoryBarrierPass(IR::Program& program) { + if (!program.info.uses_shared) { + return; + } + using Type = IR::AbstractSyntaxNode::Type; + u32 branch_depth{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + if (node.type == Type::EndIf) { + --branch_depth; + continue; + } + if (node.type != Type::If) { + continue; + } + u32 curr_depth = branch_depth++; + if (curr_depth != 0) { + continue; + } + const IR::U1 cond = node.data.if_node.cond; + const auto insert_barrier = IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && + inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { + return true; + } + return std::nullopt; + }); + if (insert_barrier) { + IR::Block* const merge = node.data.if_node.merge; + auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); + IR::IREmitter ir{*merge, insert_point}; + ir.Barrier(); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index ad57adb6a..60e0b7df2 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -91,6 +91,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); + Shader::Optimization::SharedMemoryBarrierPass(program); return program; }