diff --git a/CMakeLists.txt b/CMakeLists.txt index c192dd7bc..bd37b5555 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,6 +419,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/profile.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h + src/shader_recompiler/info.h + src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h src/shader_recompiler/specialization.h src/shader_recompiler/backend/spirv/emit_spirv.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 98eac0819..c681be97c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -210,7 +210,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { } switch (program.info.stage) { case Stage::Compute: { - const std::array workgroup_size{program.info.workgroup_size}; + const std::array workgroup_size{ctx.runtime_info.cs_info.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); @@ -258,8 +258,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) { - EmitContext ctx{profile, program.info, binding}; +std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + const IR::Program& program, u32& binding) { + EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (program.info.stage == Stage::Vertex) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4c862185f..aada0ff67 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const IR::Program& program, - u32& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + const IR::Program& program, u32& binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 7bdc98de9..39a214fa0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -59,7 +59,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { case IR::Attribute::Position2: case IR::Attribute::Position3: { const u32 index = u32(attr) - u32(IR::Attribute::Position1); - return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]); + return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]); } case IR::Attribute::RenderTarget0: case IR::Attribute::RenderTarget1: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 51315139f..9e4c58621 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -41,9 +41,10 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace -EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_) - : Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage}, - binding{binding_} { +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + const Shader::Info& info_, u32& binding_) + : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, + profile{profile_}, stage{info.stage}, binding{binding_} { AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); DefineInterfaces(); @@ -247,7 +248,7 @@ void EmitContext::DefineInputs() { frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); - for (const auto& input : info.ps_inputs) { + for (const auto& input : runtime_info.fs_info.inputs) { const u32 semantic = input.param_index; if (input.is_default && !input.is_flat) { input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1], @@ -554,7 +555,7 @@ void EmitContext::DefineSharedMemory() { if (!info.uses_shared) { return; } - u32 shared_memory_size = info.shared_memory_size; + u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; if (shared_memory_size == 0) { shared_memory_size = DefaultSharedMemSize; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index d3646382f..782343f22 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -6,9 +6,9 @@ #include #include +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/profile.h" -#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::SPIRV { @@ -36,7 +36,8 @@ struct VectorIds { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding); + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, + const Shader::Info& info, u32& binding); ~EmitContext(); Id Def(const IR::Value& value); @@ -125,6 +126,7 @@ public: } const Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; Stage stage{}; diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index fefc623fc..bf5ba6bce 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -602,13 +602,14 @@ public: Common::ObjectPool& block_pool_, Common::ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Info& info_, const Profile& profile_) + Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, + runtime_info{runtime_info_}, profile{profile_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - Translator{&first_block, info, profile}.EmitPrologue(); + Translator{&first_block, info, runtime_info, profile}.EmitPrologue(); } private: @@ -637,7 +638,7 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stmt.block->begin, inst_list.subspan(start, size), - info, profile); + info, runtime_info, profile); } break; } @@ -817,19 +818,20 @@ private: const Block dummy_flow_block{.is_dummy = true}; std::span inst_list; Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, Common::ObjectPool& block_pool, CFG& cfg, Info& info, - const Profile& profile) { + const RuntimeInfo& runtime_info, const Profile& profile) { Common::ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, - syntax_list, cfg.inst_list, info, profile}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, + cfg.inst_list, info, runtime_info, profile}; ASSERT_MSG(!info.translation_failed, "Shader translation has failed"); return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index f5a540518..2119484e3 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -11,12 +11,14 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool& inst_pool, Common::ObjectPool& block_pool, CFG& cfg, - Info& info, const Profile& profile); + Info& info, const RuntimeInfo& runtime_info, + const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 821923e90..d4db09a64 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" -#include "shader_recompiler/specialization.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -25,7 +25,7 @@ void Translator::EmitExport(const GcnInst& inst) { return comp; } const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); - switch (info.mrt_swizzles[index]) { + switch (runtime_info.fs_info.mrt_swizzles[index]) { case MrtSwizzle::Identity: return comp; case MrtSwizzle::Alt: diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 8600dbd39..b33746c7b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" @@ -16,8 +17,9 @@ namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_) - : ir{*block_, block_->begin()}, info{info_}, profile{profile_} {} +Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_, + const Profile& profile_) + : ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {} void Translator::EmitPrologue() { ir.Prologue(); @@ -25,7 +27,7 @@ void Translator::EmitPrologue() { // Initialize user data. IR::ScalarReg dst_sreg = IR::ScalarReg::S0; - for (u32 i = 0; i < info.num_user_data; i++) { + for (u32 i = 0; i < runtime_info.num_user_data; i++) { ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg)); ++dst_sreg; } @@ -36,15 +38,15 @@ void Translator::EmitPrologue() { // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); // v1: instance ID, step rate 0 - if (info.num_input_vgprs > 0) { + if (runtime_info.num_input_vgprs > 0) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0)); } // v2: instance ID, step rate 1 - if (info.num_input_vgprs > 1) { + if (runtime_info.num_input_vgprs > 1) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1)); } // v3: instance ID, plain - if (info.num_input_vgprs > 2) { + if (runtime_info.num_input_vgprs > 2) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } break; @@ -64,13 +66,13 @@ void Translator::EmitPrologue() { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); - if (info.tgid_enable[0]) { + if (runtime_info.cs_info.tgid_enable[0]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0)); } - if (info.tgid_enable[1]) { + if (runtime_info.cs_info.tgid_enable[1]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1)); } - if (info.tgid_enable[2]) { + if (runtime_info.cs_info.tgid_enable[2]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; @@ -452,11 +454,11 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { } void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info, - const Profile& profile) { + const RuntimeInfo& runtime_info, const Profile& profile) { if (inst_list.empty()) { return; } - Translator translator{block, info, profile}; + Translator translator{block, info, runtime_info, profile}; for (const auto& inst : inst_list) { pc += inst.length; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index f1619e810..0c1f3a587 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -5,9 +5,9 @@ #include #include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" -#include "shader_recompiler/runtime_info.h" namespace Shader { struct Info; @@ -55,7 +55,8 @@ enum class NegateMode : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Info& info, const Profile& profile); + explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info, + const Profile& profile); // Instruction categories void EmitPrologue(); @@ -237,12 +238,13 @@ private: private: IR::IREmitter ir; Info& info; + const RuntimeInfo& runtime_info; const Profile& profile; IR::U32 m0_value; bool opcode_missing = false; }; void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info, - const Profile& profile); + const RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 4ff846cf8..c12ae8f57 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -7,14 +7,14 @@ namespace Shader::Gcn { void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h new file mode 100644 index 000000000..cdc17304c --- /dev/null +++ b/src/shader_recompiler/info.h @@ -0,0 +1,232 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" +#include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/params.h" +#include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader { + +static constexpr size_t NumUserDataRegs = 16; + +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + Buffer, +}; +constexpr u32 NUM_TEXTURE_TYPES = 7; + +struct Info; + +struct BufferResource { + u32 sgpr_base; + u32 dword_offset; + IR::Type used_types; + AmdGpu::Buffer inline_cbuf; + bool is_instance_data{}; + bool is_written{}; + + bool IsStorage(AmdGpu::Buffer buffer) const noexcept { + static constexpr size_t MaxUboSize = 65536; + return buffer.GetSize() > MaxUboSize || is_written; + } + + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; +}; +using BufferResourceList = boost::container::small_vector; + +struct TextureBufferResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::NumberFormat nfmt; + bool is_written{}; + + constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; +}; +using TextureBufferResourceList = boost::container::small_vector; + +struct ImageResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::ImageType type; + AmdGpu::NumberFormat nfmt; + bool is_storage; + bool is_depth; + bool is_atomic{}; + + constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; +}; +using ImageResourceList = boost::container::small_vector; + +struct SamplerResource { + u32 sgpr_base; + u32 dword_offset; + AmdGpu::Sampler inline_sampler{}; + u32 associated_image : 4; + u32 disable_aniso : 1; + + constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; +}; +using SamplerResourceList = boost::container::small_vector; + +struct PushData { + static constexpr size_t BufOffsetIndex = 2; + + u32 step0; + u32 step1; + std::array buf_offsets; + + void AddOffset(u32 binding, u32 offset) { + ASSERT(offset < 256 && binding < buf_offsets.size()); + buf_offsets[binding] = offset; + } +}; + +/** + * Contains general information generated by the shader recompiler for an input program. + */ +struct Info { + struct VsInput { + enum InstanceIdType : u8 { + None = 0, + OverStepRate0 = 1, + OverStepRate1 = 2, + Plain = 3, + }; + + AmdGpu::NumberFormat fmt; + u16 binding; + u16 num_components; + u8 sgpr_base; + u8 dword_offset; + InstanceIdType instance_step_rate; + s32 instance_data_buf; + }; + boost::container::static_vector vs_inputs{}; + + struct AttributeFlags { + bool Get(IR::Attribute attrib, u32 comp = 0) const { + return flags[Index(attrib)] & (1 << comp); + } + + bool GetAny(IR::Attribute attrib) const { + return flags[Index(attrib)]; + } + + void Set(IR::Attribute attrib, u32 comp = 0) { + flags[Index(attrib)] |= (1 << comp); + } + + u32 NumComponents(IR::Attribute attrib) const { + return 4; + } + + static size_t Index(IR::Attribute attrib) { + return static_cast(attrib); + } + + std::array flags; + }; + AttributeFlags loads{}; + AttributeFlags stores{}; + + s8 vertex_offset_sgpr = -1; + s8 instance_offset_sgpr = -1; + + BufferResourceList buffers; + TextureBufferResourceList texture_buffers; + ImageResourceList images; + SamplerResourceList samplers; + + std::span user_data; + Stage stage; + + u64 pgm_hash{}; + VAddr pgm_base; + bool has_storage_images{}; + bool has_image_buffers{}; + bool has_texel_buffers{}; + bool has_discard{}; + bool has_image_gather{}; + bool has_image_query{}; + bool uses_lane_id{}; + bool uses_group_quad{}; + bool uses_shared{}; + bool uses_fp16{}; + bool uses_step_rates{}; + bool translation_failed{}; // indicates that shader has unsupported instructions + + explicit Info(Stage stage_, ShaderParams params) + : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, + user_data{params.user_data} {} + + template + T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { + T data; + const u32* base = user_data.data(); + if (ptr_index != IR::NumScalarRegs) { + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + } + std::memcpy(&data, base + dword_offset, sizeof(T)); + return data; + } + + size_t NumBindings() const noexcept { + return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); + } + + [[nodiscard]] std::pair GetDrawOffsets() const noexcept { + u32 vertex_offset = 0; + u32 instance_offset = 0; + if (vertex_offset_sgpr != -1) { + vertex_offset = user_data[vertex_offset_sgpr]; + } + if (instance_offset_sgpr != -1) { + instance_offset = user_data[instance_offset_sgpr]; + } + return {vertex_offset, instance_offset}; + } +}; + +constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { + return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { + return info.ReadUd(sgpr_base, dword_offset); +} + +constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { + return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); +} + +} // namespace Shader + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(const Shader::Stage stage, format_context& ctx) const { + constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; + return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); + } +}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 3bf3ad35e..025bb98c8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -4,11 +4,11 @@ #include #include #include "common/alignment.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" -#include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f7abba641..84a1a2d40 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -5,9 +5,9 @@ #include #include "shader_recompiler/frontend/instruction.h" +#include "shader_recompiler/info.h" #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/runtime_info.h" namespace Shader::IR { diff --git a/src/shader_recompiler/params.h b/src/shader_recompiler/params.h new file mode 100644 index 000000000..0dce9a0f3 --- /dev/null +++ b/src/shader_recompiler/params.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" + +namespace Shader { + +/** + * Compilation parameters used to identify and locate a guest shader program. + */ +struct ShaderParams { + static constexpr u32 NumShaderUserData = 16; + + std::span user_data; + std::span code; + u64 hash; + + VAddr Base() const noexcept { + return reinterpret_cast(code.data()); + } +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index bde93ec1b..12dbc6c1b 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -28,29 +28,32 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -IR::Program TranslateProgram(Common::ObjectPool& inst_pool, - Common::ObjectPool& block_pool, std::span token, - Info& info, const Profile& profile) { +IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, + const RuntimeInfo& runtime_info, const Profile& profile) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; - ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); + ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); - Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); + Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size()); Gcn::GcnDecodeContext decoder; // Decode and save instructions IR::Program program{info}; - program.ins_list.reserve(token.size()); + program.ins_list.reserve(code.size()); while (!slice.atEnd()) { program.ins_list.emplace_back(decoder.decodeInstruction(slice)); } + // Clear any previous pooled data. + pools.ReleaseContents(); + // Create control flow graph Common::ObjectPool gcn_block_pool{64}; Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile); + program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg, + program.info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); @@ -64,7 +67,6 @@ IR::Program TranslateProgram(Common::ObjectPool& inst_pool, Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::DeadCodeEliminationPass(program); Shader::Optimization::CollectShaderInfoPass(program); - LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program)); return program; } diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 3a2295189..f8acf6c9e 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -10,10 +10,24 @@ namespace Shader { struct Profile; +struct RuntimeInfo; -[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool& inst_pool, - Common::ObjectPool& block_pool, - std::span code, Info& info, - const Profile& profile); +struct Pools { + static constexpr u32 InstPoolSize = 8192; + static constexpr u32 BlockPoolSize = 32; + + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; + + explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {} + + void ReleaseContents() { + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + } +}; + +[[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, + const RuntimeInfo& runtime_info, const Profile& profile); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 0c0985aa9..92b205360 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,28 +3,14 @@ #pragma once -#include -#include +#include #include + #include "common/assert.h" #include "common/types.h" -#include "shader_recompiler/ir/attribute.h" -#include "shader_recompiler/ir/reg.h" -#include "shader_recompiler/ir/type.h" -#include "video_core/amdgpu/resource.h" namespace Shader { -static constexpr size_t NumUserDataRegs = 16; -static constexpr size_t MaxColorBuffers = 8; - -enum class MrtSwizzle : u32 { - Identity = 0, - Alt = 1, - Reverse = 2, - ReverseAlt = 3, -}; - enum class Stage : u32 { Fragment, Vertex, @@ -37,21 +23,18 @@ enum class Stage : u32 { constexpr u32 MaxStageTypes = 6; [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { - return static_cast(static_cast(Stage::Vertex) + index); + return static_cast(index); } -enum class TextureType : u32 { - Color1D, - ColorArray1D, - Color2D, - ColorArray2D, - Color3D, - ColorCube, - Buffer, +enum class MrtSwizzle : u8 { + Identity = 0, + Alt = 1, + Reverse = 2, + ReverseAlt = 3, }; -constexpr u32 NUM_TEXTURE_TYPES = 7; +static constexpr u32 MaxColorBuffers = 8; -enum class VsOutput : u32 { +enum class VsOutput : u8 { None, PointSprite, EdgeFlag, @@ -78,212 +61,67 @@ enum class VsOutput : u32 { }; using VsOutputMap = std::array; -struct Info; +struct VertexRuntimeInfo { + boost::container::static_vector outputs; -struct BufferResource { - u32 sgpr_base; - u32 dword_offset; - IR::Type used_types; - AmdGpu::Buffer inline_cbuf; - bool is_instance_data{}; - bool is_written{}; - - bool IsStorage(AmdGpu::Buffer buffer) const noexcept { - static constexpr size_t MaxUboSize = 65536; - return buffer.GetSize() > MaxUboSize || is_written; - } - - constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using BufferResourceList = boost::container::small_vector; - -struct TextureBufferResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::NumberFormat nfmt; - bool is_written{}; - - constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; -}; -using TextureBufferResourceList = boost::container::small_vector; - -struct ImageResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::ImageType type; - AmdGpu::NumberFormat nfmt; - bool is_storage; - bool is_depth; - bool is_atomic{}; - - constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; -}; -using ImageResourceList = boost::container::small_vector; - -struct SamplerResource { - u32 sgpr_base; - u32 dword_offset; - AmdGpu::Sampler inline_sampler{}; - u32 associated_image : 4; - u32 disable_aniso : 1; - - constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; -}; -using SamplerResourceList = boost::container::small_vector; - -struct PushData { - static constexpr size_t BufOffsetIndex = 2; - - u32 step0; - u32 step1; - std::array buf_offsets; - - void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 256 && binding < buf_offsets.size()); - buf_offsets[binding] = offset; + bool operator==(const VertexRuntimeInfo& other) const noexcept { + return true; } }; -struct Info { - struct VsInput { - enum InstanceIdType : u8 { - None = 0, - OverStepRate0 = 1, - OverStepRate1 = 2, - Plain = 3, - }; - - AmdGpu::NumberFormat fmt; - u16 binding; - u16 num_components; - u8 sgpr_base; - u8 dword_offset; - InstanceIdType instance_step_rate; - s32 instance_data_buf; - }; - boost::container::static_vector vs_inputs{}; - +struct FragmentRuntimeInfo { struct PsInput { - u32 param_index; + u8 param_index; bool is_default; bool is_flat; - u32 default_value; + u8 default_value; + + auto operator<=>(const PsInput&) const noexcept = default; }; - boost::container::static_vector ps_inputs{}; + boost::container::static_vector inputs; std::array mrt_swizzles; - struct AttributeFlags { - bool Get(IR::Attribute attrib, u32 comp = 0) const { - return flags[Index(attrib)] & (1 << comp); - } + bool operator==(const FragmentRuntimeInfo& other) const noexcept { + return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) && + std::ranges::equal(inputs, other.inputs); + } +}; - bool GetAny(IR::Attribute attrib) const { - return flags[Index(attrib)]; - } - - void Set(IR::Attribute attrib, u32 comp = 0) { - flags[Index(attrib)] |= (1 << comp); - } - - u32 NumComponents(IR::Attribute attrib) const { - return 4; - } - - static size_t Index(IR::Attribute attrib) { - return static_cast(attrib); - } - - std::array flags; - }; - AttributeFlags loads{}; - AttributeFlags stores{}; - boost::container::static_vector vs_outputs; - - s8 vertex_offset_sgpr = -1; - s8 instance_offset_sgpr = -1; - - BufferResourceList buffers; - TextureBufferResourceList texture_buffers; - ImageResourceList images; - SamplerResourceList samplers; - - std::array workgroup_size{}; +struct ComputeRuntimeInfo { + u32 shared_memory_size; + std::array workgroup_size; std::array tgid_enable; + bool operator==(const ComputeRuntimeInfo& other) const noexcept { + return workgroup_size == other.workgroup_size && tgid_enable == other.tgid_enable; + } +}; + +/** + * Stores information relevant to shader compilation sourced from liverpool registers. + * It may potentially differ with the same shader module so must be checked. + * It's also possible to store any other custom information that needs to be part of shader key. + */ +struct RuntimeInfo { + Stage stage; u32 num_user_data; u32 num_input_vgprs; - std::span user_data; - Stage stage; + VertexRuntimeInfo vs_info; + FragmentRuntimeInfo fs_info; + ComputeRuntimeInfo cs_info; - uintptr_t pgm_base{}; - u64 pgm_hash{}; - u32 shared_memory_size{}; - bool has_storage_images{}; - bool has_image_buffers{}; - bool has_texel_buffers{}; - bool has_discard{}; - bool has_image_gather{}; - bool has_image_query{}; - bool uses_lane_id{}; - bool uses_group_quad{}; - bool uses_shared{}; - bool uses_fp16{}; - bool uses_step_rates{}; - bool translation_failed{}; // indicates that shader has unsupported instructions + RuntimeInfo(Stage stage_) : stage{stage_} {} - template - T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { - T data; - const u32* base = user_data.data(); - if (ptr_index != IR::NumScalarRegs) { - std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + bool operator==(const RuntimeInfo& other) const noexcept { + if (stage == Stage::Fragment) { + return fs_info == other.fs_info; + } else if (stage == Stage::Vertex) { + return vs_info == other.vs_info; + } else if (stage == Stage::Compute) { + return cs_info == other.cs_info; } - std::memcpy(&data, base + dword_offset, sizeof(T)); - return data; - } - - size_t NumBindings() const noexcept { - return buffers.size() + texture_buffers.size() + images.size() + samplers.size(); - } - - [[nodiscard]] std::pair GetDrawOffsets() const noexcept { - u32 vertex_offset = 0; - u32 instance_offset = 0; - if (vertex_offset_sgpr != -1) { - vertex_offset = user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = user_data[instance_offset_sgpr]; - } - return {vertex_offset, instance_offset}; + UNREACHABLE(); } }; -constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept { - return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept { - return info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - return info.ReadUd(sgpr_base, dword_offset); -} - -constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); -} - } // namespace Shader - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - auto format(const Shader::Stage stage, format_context& ctx) const { - constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"}; - return fmt::format_to(ctx.out(), "{}", names[static_cast(stage)]); - } -}; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index b29892afd..3dd75dbd7 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -6,7 +6,7 @@ #include #include "common/types.h" -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" namespace Shader { @@ -30,18 +30,42 @@ struct ImageSpecialization { auto operator<=>(const ImageSpecialization&) const = default; }; +/** + * Alongside runtime information, this structure also checks bound resources + * for compatibility. Can be used as a key for storing shader permutations. + * Is separate from runtime information, because resource layout can only be deduced + * after the first compilation of a module. + */ struct StageSpecialization { static constexpr size_t MaxStageResources = 32; const Shader::Info* info; + RuntimeInfo runtime_info; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; boost::container::small_vector images; - std::array mrt_swizzles; u32 start_binding{}; - explicit StageSpecialization(const Shader::Info& info_) : info{&info_} {} + explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, + u32 start_binding_) + : info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} { + u32 binding{}; + ForEachSharp(binding, buffers, info->buffers, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.stride = sharp.GetStride(); + spec.is_storage = desc.IsStorage(sharp); + }); + ForEachSharp(binding, tex_buffers, info->texture_buffers, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + }); + ForEachSharp(binding, images, info->images, + [](auto& spec, const auto& desc, AmdGpu::Image sharp) { + spec.type = sharp.GetType(); + spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + }); + } void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { for (const auto& desc : desc_list) { @@ -60,8 +84,7 @@ struct StageSpecialization { if (start_binding != other.start_binding) { return false; } - if (info->stage == Shader::Stage::Fragment && - !std::ranges::equal(mrt_swizzles, other.mrt_swizzles)) { + if (runtime_info != other.runtime_info) { return false; } u32 binding{}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 7f262e1f4..37720168a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -18,6 +18,7 @@ #include "common/polyfill_thread.h" #include "common/types.h" #include "common/unique_function.h" +#include "shader_recompiler/params.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" @@ -171,6 +172,15 @@ struct Liverpool { return bininfo; } + static constexpr Shader::ShaderParams GetParams(const auto& sh) { + auto* bininfo = GetBinaryInfo(sh); + return { + .user_data = sh.user_data, + .code = sh.Code(), + .hash = bininfo->shader_hash, + }; + } + union PsInputControl { u32 raw; BitField<0, 5, u32> input_offset; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 71228786e..93e05085d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -4,7 +4,7 @@ #include #include "common/alignment.h" #include "common/scope_exit.h" -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0132066c5..54eaf6532 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,7 @@ #pragma once #include -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" namespace VideoCore { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 183961ac3..b7c70f4b9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -7,8 +7,7 @@ #include "common/io_file.h" #include "common/path_util.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/recompiler.h" -#include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -21,11 +20,16 @@ namespace Vulkan { using Shader::VsOutput; -void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + +void GatherVertexOutputs(Shader::VertexRuntimeInfo& info, + const AmdGpu::Liverpool::VsOutputControl& ctl) { const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || w != VsOutput::None) { - info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); } }; // VS_OUT_MISC_VEC @@ -65,14 +69,51 @@ void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); } -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, const GraphicsPipelineKey& key, + const AmdGpu::Liverpool::Regs& regs) { + auto info = Shader::RuntimeInfo{stage}; + switch (stage) { + case Shader::Stage::Vertex: { + info.num_user_data = regs.vs_program.settings.num_user_regs; + info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + GatherVertexOutputs(info.vs_info, regs.vs_output_control); + break; + } + case Shader::Stage::Fragment: { + info.num_user_data = regs.ps_program.settings.num_user_regs; + std::ranges::transform(key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), + [](Liverpool::ColorBuffer::SwapMode mode) { + return static_cast(mode); + }); + for (u32 i = 0; i < regs.num_interp; i++) { + info.fs_info.inputs.push_back({ + .param_index = u8(regs.ps_inputs[i].input_offset.Value()), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = u8(regs.ps_inputs[i].default_value), + }); + } + break; + } + case Shader::Stage::Compute: { + const auto& cs_pgm = regs.cs_program; + info.num_user_data = cs_pgm.settings.num_user_regs; + info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, + cs_pgm.num_thread_z.full}; + info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), + cs_pgm.IsTgidEnabled(2)}; + info.cs_info.shared_memory_size = cs_pgm.SharedMemSize(); + break; + } + default: + UNREACHABLE(); + } + return info; } PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, - block_pool{512} { + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} { profile = Shader::Profile{ .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, .subgroup_size = instance.SubgroupSize(), @@ -243,9 +284,9 @@ bool PipelineCache::RefreshGraphicsKey() { if (ShouldSkipShader(bininfo->shader_hash, "graphics")) { return false; } - const auto stage = Shader::Stage{i}; - const auto guest_pgm = GuestProgram{pgm, stage}; - std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(guest_pgm, binding); + const auto stage = Shader::StageFromIndex(i); + const auto params = Liverpool::GetParams(*pgm); + std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } return true; } @@ -253,149 +294,68 @@ bool PipelineCache::RefreshGraphicsKey() { bool PipelineCache::RefreshComputeKey() { u32 binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; - const GuestProgram guest_pgm{cs_pgm, Shader::Stage::Compute}; - if (ShouldSkipShader(guest_pgm.hash, "compute")) { + const auto cs_params = Liverpool::GetParams(*cs_pgm); + if (ShouldSkipShader(cs_params.hash, "compute")) { return false; } - std::tie(infos[0], modules[0], compute_key) = GetProgram(guest_pgm, binding); + std::tie(infos[0], modules[0], compute_key) = + GetProgram(Shader::Stage::Compute, cs_params, binding); return true; } -vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span code, - size_t perm_idx, u32& binding) { +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, + const Shader::RuntimeInfo& runtime_info, + std::span code, size_t perm_idx, + u32& binding) { LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, perm_idx != 0 ? "(permutation)" : ""); - if (Config::dumpShaders()) { DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); } - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); - - // Compile IR to SPIR-V - const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); + const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile); + const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding); if (Config::dumpShaders()) { DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); } - // Create module and set name to hash in renderdoc const auto module = CompileSPV(spv, instance.GetDevice()); - ASSERT(module != VK_NULL_HANDLE); const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); Vulkan::SetObjectName(instance.GetDevice(), module, name); return module; } std::tuple PipelineCache::GetProgram( - const GuestProgram& pgm, u32& binding) { - auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash); + Shader::Stage stage, Shader::ShaderParams params, u32& binding) { + const auto runtime_info = BuildRuntimeInfo(stage, graphics_key, liverpool->regs); + auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { - Program* program = program_pool.Create(BuildShaderInfo(pgm, liverpool->regs)); + Program* program = program_pool.Create(stage, params); u32 start_binding = binding; - const auto module = CompileModule(program->info, pgm.code, 0, binding); - const auto spec = BuildStageSpec(program->info, start_binding); - program->modules.emplace_back(module, spec); + const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding); + program->AddPermut(module, std::move(spec)); it_pgm.value() = program; - return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0)); + return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); } Program* program = it_pgm->second; const auto& info = program->info; - const auto spec = BuildStageSpec(info, binding); + const auto spec = Shader::StageSpecialization(info, runtime_info, binding); size_t perm_idx = program->modules.size(); vk::ShaderModule module{}; const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); if (it == program->modules.end()) { - auto new_info = BuildShaderInfo(pgm, liverpool->regs); - module = CompileModule(new_info, pgm.code, perm_idx, binding); - program->modules.emplace_back(module, std::move(spec)); + auto new_info = Shader::Info(stage, params); + module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding); + program->AddPermut(module, std::move(spec)); } else { binding += info.NumBindings(); module = it->module; perm_idx = std::distance(program->modules.begin(), it); } - return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx)); -} - -Shader::Info PipelineCache::BuildShaderInfo(const GuestProgram& pgm, - const AmdGpu::Liverpool::Regs& regs) { - Shader::Info info{}; - info.user_data = pgm.user_data; - info.pgm_base = VAddr(pgm.code.data()); - info.pgm_hash = pgm.hash; - info.stage = pgm.stage; - switch (pgm.stage) { - case Shader::Stage::Vertex: { - info.num_user_data = regs.vs_program.settings.num_user_regs; - info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; - BuildVsOutputs(info, regs.vs_output_control); - break; - } - case Shader::Stage::Fragment: { - info.num_user_data = regs.ps_program.settings.num_user_regs; - std::ranges::transform(graphics_key.mrt_swizzles, info.mrt_swizzles.begin(), - [](Liverpool::ColorBuffer::SwapMode mode) { - return static_cast(mode); - }); - for (u32 i = 0; i < regs.num_interp; i++) { - info.ps_inputs.push_back({ - .param_index = regs.ps_inputs[i].input_offset.Value(), - .is_default = bool(regs.ps_inputs[i].use_default), - .is_flat = bool(regs.ps_inputs[i].flat_shade), - .default_value = regs.ps_inputs[i].default_value, - }); - } - break; - } - case Shader::Stage::Compute: { - const auto& cs_pgm = regs.cs_program; - info.num_user_data = cs_pgm.settings.num_user_regs; - info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, - cs_pgm.num_thread_z.full}; - info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), - cs_pgm.IsTgidEnabled(2)}; - info.shared_memory_size = cs_pgm.SharedMemSize(); - break; - } - default: - break; - } - return info; -} - -Shader::StageSpecialization PipelineCache::BuildStageSpec(const Shader::Info& info, - u32 start_binding) { - u32 binding{}; - auto spec = Shader::StageSpecialization{info}; - spec.start_binding = start_binding; - spec.ForEachSharp(binding, spec.buffers, info.buffers, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.stride = sharp.GetStride(); - spec.is_storage = desc.IsStorage(sharp); - }); - spec.ForEachSharp(binding, spec.tex_buffers, info.texture_buffers, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); - }); - spec.ForEachSharp(binding, spec.images, info.images, - [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.GetType(); - spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); - }); - switch (info.stage) { - case Shader::Stage::Fragment: - std::ranges::transform(graphics_key.mrt_swizzles, spec.mrt_swizzles.begin(), - [](Liverpool::ColorBuffer::SwapMode mode) { - return static_cast(mode); - }); - default: - break; - } - - return spec; + return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); } void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 545ddaf27..26130994c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -4,9 +4,8 @@ #pragma once #include -#include "common/object_pool.h" -#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/recompiler.h" #include "shader_recompiler/specialization.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -30,19 +29,10 @@ struct Program { Shader::Info info; boost::container::small_vector modules; - explicit Program(const Shader::Info& info_) : info{info_} {} -}; + explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {} -struct GuestProgram { - Shader::Stage stage; - std::span user_data; - std::span code; - u64 hash; - - explicit GuestProgram(const auto* pgm, Shader::Stage stage_) - : stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} { - const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm); - hash = bininfo->shader_hash; + void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) { + modules.emplace_back(module, std::move(spec)); } }; @@ -58,7 +48,8 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple GetProgram(const GuestProgram& pgm, + std::tuple GetProgram(Shader::Stage stage, + Shader::ShaderParams params, u32& binding); private: @@ -67,11 +58,8 @@ private: void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, - u32& binding); - - Shader::Info BuildShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs); - Shader::StageSpecialization BuildStageSpec(const Shader::Info& info, u32 binding); + vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info, + std::span code, size_t perm_idx, u32& binding); private: const Instance& instance; @@ -80,9 +68,8 @@ private: vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; Shader::Profile profile{}; + Shader::Pools pools; tsl::robin_map program_cache; - Common::ObjectPool inst_pool; - Common::ObjectPool block_pool; Common::ObjectPool program_pool; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines;